{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.0, "eval_steps": 500, "global_step": 5229, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.56022944550669e-10, "logits/chosen": -2.1973071098327637, "logits/rejected": -2.063166379928589, "logps/chosen": -72.67636108398438, "logps/rejected": -70.58360290527344, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.912045889101338e-09, "logits/chosen": -2.196955919265747, "logits/rejected": -1.5970442295074463, "logps/chosen": -89.44160461425781, "logps/rejected": -70.98513793945312, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "epoch": 0.01, "learning_rate": 2.8680688336520077e-09, "logits/chosen": -1.994170904159546, "logits/rejected": -2.238757610321045, "logps/chosen": -87.91758728027344, "logps/rejected": -91.01237487792969, "loss": 0.6944, "rewards/accuracies": 0.5, "rewards/chosen": -0.00047340383753180504, "rewards/margins": -0.0036617284640669823, "rewards/rejected": 0.00318832416087389, "step": 3 }, { "epoch": 0.01, "learning_rate": 3.824091778202676e-09, "logits/chosen": -2.062040328979492, "logits/rejected": -2.103986978530884, "logps/chosen": -73.61285400390625, "logps/rejected": -82.38819122314453, "loss": 0.6897, "rewards/accuracies": 0.5, "rewards/chosen": -9.813308133743703e-05, "rewards/margins": 0.003009796142578125, "rewards/rejected": -0.0031079293694347143, "step": 4 }, { "epoch": 0.01, "learning_rate": 4.780114722753346e-09, "logits/chosen": -1.7863361835479736, "logits/rejected": -2.102857828140259, "logps/chosen": -100.2719497680664, "logps/rejected": -102.88688659667969, "loss": 0.6872, "rewards/accuracies": 1.0, "rewards/chosen": -0.008409881964325905, "rewards/margins": 0.013592816889286041, "rewards/rejected": -0.022002698853611946, "step": 5 }, { "epoch": 0.01, "learning_rate": 5.7361376673040155e-09, "logits/chosen": -2.1035733222961426, "logits/rejected": -2.040540933609009, "logps/chosen": -96.39738464355469, "logps/rejected": -93.3469009399414, "loss": 0.6971, "rewards/accuracies": 0.5, "rewards/chosen": 0.0006069180089980364, "rewards/margins": -0.006039239466190338, "rewards/rejected": 0.006646157242357731, "step": 6 }, { "epoch": 0.01, "learning_rate": 6.692160611854685e-09, "logits/chosen": -1.8828966617584229, "logits/rejected": -1.7745822668075562, "logps/chosen": -85.66909790039062, "logps/rejected": -82.95043182373047, "loss": 0.6922, "rewards/accuracies": 0.25, "rewards/chosen": -0.022777941077947617, "rewards/margins": -0.012322044931352139, "rewards/rejected": -0.010455894283950329, "step": 7 }, { "epoch": 0.01, "learning_rate": 7.648183556405352e-09, "logits/chosen": -1.604448676109314, "logits/rejected": -2.1486330032348633, "logps/chosen": -69.46320343017578, "logps/rejected": -88.41170501708984, "loss": 0.6969, "rewards/accuracies": 0.0, "rewards/chosen": -0.007596779614686966, "rewards/margins": -0.04148807376623154, "rewards/rejected": 0.03389129787683487, "step": 8 }, { "epoch": 0.02, "learning_rate": 8.604206500956022e-09, "logits/chosen": -2.0820536613464355, "logits/rejected": -1.9636483192443848, "logps/chosen": -92.87248229980469, "logps/rejected": -101.40841674804688, "loss": 0.6943, "rewards/accuracies": 0.75, "rewards/chosen": 0.009395790286362171, "rewards/margins": 0.003721047192811966, "rewards/rejected": 0.005674743093550205, "step": 9 }, { "epoch": 0.02, "learning_rate": 9.560229445506692e-09, "logits/chosen": -2.0903196334838867, "logits/rejected": -1.85238778591156, "logps/chosen": -78.22772216796875, "logps/rejected": -63.32625961303711, "loss": 0.6976, "rewards/accuracies": 0.5, "rewards/chosen": -0.017000580206513405, "rewards/margins": -0.009409617632627487, "rewards/rejected": -0.00759096210822463, "step": 10 }, { "epoch": 0.02, "learning_rate": 1.0516252390057361e-08, "logits/chosen": -1.8324108123779297, "logits/rejected": -2.0637261867523193, "logps/chosen": -84.39460754394531, "logps/rejected": -89.12655639648438, "loss": 0.693, "rewards/accuracies": 0.75, "rewards/chosen": 0.016580581665039062, "rewards/margins": 0.010043526068329811, "rewards/rejected": 0.006537056062370539, "step": 11 }, { "epoch": 0.02, "learning_rate": 1.1472275334608031e-08, "logits/chosen": -2.0129637718200684, "logits/rejected": -2.051783323287964, "logps/chosen": -82.39252471923828, "logps/rejected": -83.65422821044922, "loss": 0.6972, "rewards/accuracies": 0.0, "rewards/chosen": -0.006260872818529606, "rewards/margins": -0.0403590202331543, "rewards/rejected": 0.034098148345947266, "step": 12 }, { "epoch": 0.02, "learning_rate": 1.24282982791587e-08, "logits/chosen": -1.90733003616333, "logits/rejected": -2.1982622146606445, "logps/chosen": -74.05764770507812, "logps/rejected": -107.33108520507812, "loss": 0.6941, "rewards/accuracies": 0.0, "rewards/chosen": -0.007502841763198376, "rewards/margins": -0.03062906302511692, "rewards/rejected": 0.02312622033059597, "step": 13 }, { "epoch": 0.02, "learning_rate": 1.338432122370937e-08, "logits/chosen": -1.9526876211166382, "logits/rejected": -2.0747411251068115, "logps/chosen": -78.99064636230469, "logps/rejected": -81.9149169921875, "loss": 0.6939, "rewards/accuracies": 0.5, "rewards/chosen": -0.002608871553093195, "rewards/margins": 0.005807303823530674, "rewards/rejected": -0.008416175842285156, "step": 14 }, { "epoch": 0.03, "learning_rate": 1.4340344168260036e-08, "logits/chosen": -2.172732353210449, "logits/rejected": -2.1473331451416016, "logps/chosen": -91.09015655517578, "logps/rejected": -98.52915954589844, "loss": 0.6941, "rewards/accuracies": 0.5, "rewards/chosen": 0.0013477331958711147, "rewards/margins": -0.002383613958954811, "rewards/rejected": 0.003731345757842064, "step": 15 }, { "epoch": 0.03, "learning_rate": 1.5296367112810705e-08, "logits/chosen": -1.9169869422912598, "logits/rejected": -1.9713995456695557, "logps/chosen": -84.72262573242188, "logps/rejected": -86.46129608154297, "loss": 0.696, "rewards/accuracies": 0.5, "rewards/chosen": -0.006387138739228249, "rewards/margins": 0.014386177062988281, "rewards/rejected": -0.02077331580221653, "step": 16 }, { "epoch": 0.03, "learning_rate": 1.6252390057361376e-08, "logits/chosen": -2.062924385070801, "logits/rejected": -1.8603233098983765, "logps/chosen": -103.0205078125, "logps/rejected": -93.50677490234375, "loss": 0.6927, "rewards/accuracies": 0.75, "rewards/chosen": 0.003280448727309704, "rewards/margins": 0.011556816287338734, "rewards/rejected": -0.00827636756002903, "step": 17 }, { "epoch": 0.03, "learning_rate": 1.7208413001912045e-08, "logits/chosen": -2.2211339473724365, "logits/rejected": -1.8958468437194824, "logps/chosen": -87.43402862548828, "logps/rejected": -84.70587158203125, "loss": 0.6953, "rewards/accuracies": 0.0, "rewards/chosen": -0.013893318362534046, "rewards/margins": -0.026289844885468483, "rewards/rejected": 0.012396525591611862, "step": 18 }, { "epoch": 0.03, "learning_rate": 1.8164435946462717e-08, "logits/chosen": -1.862783432006836, "logits/rejected": -1.9488188028335571, "logps/chosen": -69.97599792480469, "logps/rejected": -78.84602355957031, "loss": 0.6926, "rewards/accuracies": 0.75, "rewards/chosen": 0.03161907196044922, "rewards/margins": 0.020583629608154297, "rewards/rejected": 0.011035443283617496, "step": 19 }, { "epoch": 0.03, "learning_rate": 1.9120458891013385e-08, "logits/chosen": -1.8018039464950562, "logits/rejected": -2.257089614868164, "logps/chosen": -65.60667419433594, "logps/rejected": -78.18537902832031, "loss": 0.6951, "rewards/accuracies": 0.25, "rewards/chosen": -0.025450801476836205, "rewards/margins": -0.018368341028690338, "rewards/rejected": -0.007082462310791016, "step": 20 }, { "epoch": 0.04, "learning_rate": 2.007648183556405e-08, "logits/chosen": -2.0484588146209717, "logits/rejected": -1.9410511255264282, "logps/chosen": -95.14643096923828, "logps/rejected": -91.43789672851562, "loss": 0.6945, "rewards/accuracies": 1.0, "rewards/chosen": 0.026809118688106537, "rewards/margins": 0.026485061272978783, "rewards/rejected": 0.0003240583464503288, "step": 21 }, { "epoch": 0.04, "learning_rate": 2.1032504780114722e-08, "logits/chosen": -1.8727211952209473, "logits/rejected": -2.0498557090759277, "logps/chosen": -79.28569030761719, "logps/rejected": -87.12315368652344, "loss": 0.6922, "rewards/accuracies": 0.5, "rewards/chosen": 0.009914589114487171, "rewards/margins": 0.011578941717743874, "rewards/rejected": -0.0016643526032567024, "step": 22 }, { "epoch": 0.04, "learning_rate": 2.198852772466539e-08, "logits/chosen": -2.1930086612701416, "logits/rejected": -2.1740059852600098, "logps/chosen": -100.28849029541016, "logps/rejected": -86.52992248535156, "loss": 0.6924, "rewards/accuracies": 0.75, "rewards/chosen": 0.011839485727250576, "rewards/margins": 0.015399934723973274, "rewards/rejected": -0.00356044783256948, "step": 23 }, { "epoch": 0.04, "learning_rate": 2.2944550669216062e-08, "logits/chosen": -1.9021377563476562, "logits/rejected": -1.9288283586502075, "logps/chosen": -86.39988708496094, "logps/rejected": -90.15104675292969, "loss": 0.6879, "rewards/accuracies": 0.75, "rewards/chosen": 0.010869979858398438, "rewards/margins": 0.03175373375415802, "rewards/rejected": -0.020883753895759583, "step": 24 }, { "epoch": 0.04, "learning_rate": 2.3900573613766727e-08, "logits/chosen": -1.7363539934158325, "logits/rejected": -1.8282155990600586, "logps/chosen": -89.8712387084961, "logps/rejected": -88.73493194580078, "loss": 0.6965, "rewards/accuracies": 0.25, "rewards/chosen": -0.004950332920998335, "rewards/margins": -0.0131378173828125, "rewards/rejected": 0.008187484927475452, "step": 25 }, { "epoch": 0.04, "learning_rate": 2.48565965583174e-08, "logits/chosen": -1.7897191047668457, "logits/rejected": -2.129481315612793, "logps/chosen": -86.51022338867188, "logps/rejected": -101.07330322265625, "loss": 0.6947, "rewards/accuracies": 0.75, "rewards/chosen": 0.016119956970214844, "rewards/margins": 0.0334140807390213, "rewards/rejected": -0.01729412004351616, "step": 26 }, { "epoch": 0.05, "learning_rate": 2.5812619502868067e-08, "logits/chosen": -1.856147289276123, "logits/rejected": -2.178858995437622, "logps/chosen": -91.95254516601562, "logps/rejected": -111.70944213867188, "loss": 0.6865, "rewards/accuracies": 0.5, "rewards/chosen": -3.62396240234375e-05, "rewards/margins": -0.0029594418592751026, "rewards/rejected": 0.0029232031665742397, "step": 27 }, { "epoch": 0.05, "learning_rate": 2.676864244741874e-08, "logits/chosen": -2.064474105834961, "logits/rejected": -2.1797127723693848, "logps/chosen": -75.5852279663086, "logps/rejected": -90.88275146484375, "loss": 0.6888, "rewards/accuracies": 0.75, "rewards/chosen": 0.014455223456025124, "rewards/margins": 0.01707477681338787, "rewards/rejected": -0.0026195519603788853, "step": 28 }, { "epoch": 0.05, "learning_rate": 2.7724665391969407e-08, "logits/chosen": -1.7796107530593872, "logits/rejected": -2.023076295852661, "logps/chosen": -83.45785522460938, "logps/rejected": -98.63151550292969, "loss": 0.6927, "rewards/accuracies": 0.5, "rewards/chosen": 0.009675598703324795, "rewards/margins": -0.00416641216725111, "rewards/rejected": 0.013842010870575905, "step": 29 }, { "epoch": 0.05, "learning_rate": 2.8680688336520072e-08, "logits/chosen": -2.1726551055908203, "logits/rejected": -2.0169694423675537, "logps/chosen": -83.13931274414062, "logps/rejected": -69.66505432128906, "loss": 0.6924, "rewards/accuracies": 0.5, "rewards/chosen": 0.005389689933508635, "rewards/margins": -0.007461833767592907, "rewards/rejected": 0.012851523235440254, "step": 30 }, { "epoch": 0.05, "learning_rate": 2.9636711281070744e-08, "logits/chosen": -1.721571683883667, "logits/rejected": -2.194314956665039, "logps/chosen": -80.71511840820312, "logps/rejected": -94.54031372070312, "loss": 0.6954, "rewards/accuracies": 0.75, "rewards/chosen": -0.002954387804493308, "rewards/margins": 0.005715657025575638, "rewards/rejected": -0.008670044131577015, "step": 31 }, { "epoch": 0.06, "learning_rate": 3.059273422562141e-08, "logits/chosen": -1.9206441640853882, "logits/rejected": -2.0048513412475586, "logps/chosen": -95.30135345458984, "logps/rejected": -103.46625518798828, "loss": 0.6917, "rewards/accuracies": 0.0, "rewards/chosen": -0.023640252649784088, "rewards/margins": -0.018777847290039062, "rewards/rejected": -0.004862403962761164, "step": 32 }, { "epoch": 0.06, "learning_rate": 3.154875717017208e-08, "logits/chosen": -1.9335010051727295, "logits/rejected": -2.0241239070892334, "logps/chosen": -68.09516906738281, "logps/rejected": -73.98262023925781, "loss": 0.692, "rewards/accuracies": 0.75, "rewards/chosen": 0.011966800317168236, "rewards/margins": 0.01067447755485773, "rewards/rejected": 0.0012923236936330795, "step": 33 }, { "epoch": 0.06, "learning_rate": 3.250478011472275e-08, "logits/chosen": -2.1245276927948, "logits/rejected": -1.884817123413086, "logps/chosen": -110.49464416503906, "logps/rejected": -91.6221694946289, "loss": 0.6969, "rewards/accuracies": 0.5, "rewards/chosen": 0.008659553714096546, "rewards/margins": -0.005628777667880058, "rewards/rejected": 0.01428833045065403, "step": 34 }, { "epoch": 0.06, "learning_rate": 3.3460803059273424e-08, "logits/chosen": -1.781224250793457, "logits/rejected": -2.03633713722229, "logps/chosen": -84.92391967773438, "logps/rejected": -85.984130859375, "loss": 0.6891, "rewards/accuracies": 1.0, "rewards/chosen": -0.003984546754509211, "rewards/margins": 0.028141498565673828, "rewards/rejected": -0.0321260467171669, "step": 35 }, { "epoch": 0.06, "learning_rate": 3.441682600382409e-08, "logits/chosen": -1.8488883972167969, "logits/rejected": -2.363676071166992, "logps/chosen": -71.33898162841797, "logps/rejected": -96.13390350341797, "loss": 0.687, "rewards/accuracies": 0.25, "rewards/chosen": -0.009374617598950863, "rewards/margins": -0.008650684729218483, "rewards/rejected": -0.0007239347323775291, "step": 36 }, { "epoch": 0.06, "learning_rate": 3.5372848948374755e-08, "logits/chosen": -2.115004062652588, "logits/rejected": -1.967246174812317, "logps/chosen": -80.04134368896484, "logps/rejected": -72.97013092041016, "loss": 0.6942, "rewards/accuracies": 0.75, "rewards/chosen": -0.0061969757080078125, "rewards/margins": 0.005729102995246649, "rewards/rejected": -0.011926079168915749, "step": 37 }, { "epoch": 0.07, "learning_rate": 3.632887189292543e-08, "logits/chosen": -2.122972249984741, "logits/rejected": -2.007885456085205, "logps/chosen": -96.15853118896484, "logps/rejected": -67.67509460449219, "loss": 0.693, "rewards/accuracies": 0.75, "rewards/chosen": 0.007552910130470991, "rewards/margins": 0.003442097455263138, "rewards/rejected": 0.004110813140869141, "step": 38 }, { "epoch": 0.07, "learning_rate": 3.72848948374761e-08, "logits/chosen": -1.9455091953277588, "logits/rejected": -2.1947546005249023, "logps/chosen": -72.90967559814453, "logps/rejected": -89.94277954101562, "loss": 0.6879, "rewards/accuracies": 0.75, "rewards/chosen": 0.0005460737738758326, "rewards/margins": 0.005804442800581455, "rewards/rejected": -0.005258369259536266, "step": 39 }, { "epoch": 0.07, "learning_rate": 3.824091778202677e-08, "logits/chosen": -2.1482479572296143, "logits/rejected": -1.9467953443527222, "logps/chosen": -95.1553726196289, "logps/rejected": -86.8175277709961, "loss": 0.6974, "rewards/accuracies": 0.25, "rewards/chosen": -0.01059188786894083, "rewards/margins": -0.007581518962979317, "rewards/rejected": -0.003010367974638939, "step": 40 }, { "epoch": 0.07, "learning_rate": 3.9196940726577435e-08, "logits/chosen": -2.0594916343688965, "logits/rejected": -1.8669663667678833, "logps/chosen": -98.22380065917969, "logps/rejected": -85.51632690429688, "loss": 0.6933, "rewards/accuracies": 0.75, "rewards/chosen": -0.0036766056437045336, "rewards/margins": 0.00882720947265625, "rewards/rejected": -0.01250381488353014, "step": 41 }, { "epoch": 0.07, "learning_rate": 4.01529636711281e-08, "logits/chosen": -1.8558428287506104, "logits/rejected": -2.1459224224090576, "logps/chosen": -77.48761749267578, "logps/rejected": -82.30140686035156, "loss": 0.6923, "rewards/accuracies": 0.5, "rewards/chosen": -0.011723997071385384, "rewards/margins": -0.0038461703807115555, "rewards/rejected": -0.007877825759351254, "step": 42 }, { "epoch": 0.07, "learning_rate": 4.110898661567878e-08, "logits/chosen": -2.1504931449890137, "logits/rejected": -1.4087207317352295, "logps/chosen": -97.43284606933594, "logps/rejected": -73.0469970703125, "loss": 0.6901, "rewards/accuracies": 0.75, "rewards/chosen": 0.005084037780761719, "rewards/margins": 0.0024257656186819077, "rewards/rejected": 0.002658271696418524, "step": 43 }, { "epoch": 0.08, "learning_rate": 4.2065009560229444e-08, "logits/chosen": -1.870161533355713, "logits/rejected": -2.0222115516662598, "logps/chosen": -88.85574340820312, "logps/rejected": -95.01040649414062, "loss": 0.6883, "rewards/accuracies": 0.75, "rewards/chosen": 0.008381652645766735, "rewards/margins": 0.022541236132383347, "rewards/rejected": -0.014159584417939186, "step": 44 }, { "epoch": 0.08, "learning_rate": 4.3021032504780115e-08, "logits/chosen": -2.0832104682922363, "logits/rejected": -2.1220712661743164, "logps/chosen": -97.86566162109375, "logps/rejected": -95.08859252929688, "loss": 0.6921, "rewards/accuracies": 0.5, "rewards/chosen": 0.019474031403660774, "rewards/margins": -0.008159255608916283, "rewards/rejected": 0.027633287012577057, "step": 45 }, { "epoch": 0.08, "learning_rate": 4.397705544933078e-08, "logits/chosen": -1.8809243440628052, "logits/rejected": -2.148940324783325, "logps/chosen": -86.05230712890625, "logps/rejected": -98.91381072998047, "loss": 0.6876, "rewards/accuracies": 0.75, "rewards/chosen": -0.0006423951126635075, "rewards/margins": 0.012515068054199219, "rewards/rejected": -0.013157462701201439, "step": 46 }, { "epoch": 0.08, "learning_rate": 4.493307839388145e-08, "logits/chosen": -2.1083595752716064, "logits/rejected": -1.9968719482421875, "logps/chosen": -93.3061294555664, "logps/rejected": -79.9675064086914, "loss": 0.694, "rewards/accuracies": 0.75, "rewards/chosen": 0.021950911730527878, "rewards/margins": 0.017521096393465996, "rewards/rejected": 0.004429817199707031, "step": 47 }, { "epoch": 0.08, "learning_rate": 4.5889101338432124e-08, "logits/chosen": -1.4933611154556274, "logits/rejected": -2.4276492595672607, "logps/chosen": -84.44865417480469, "logps/rejected": -102.53071594238281, "loss": 0.6959, "rewards/accuracies": 0.25, "rewards/chosen": -0.007007217966020107, "rewards/margins": -0.012592029757797718, "rewards/rejected": 0.005584812257438898, "step": 48 }, { "epoch": 0.08, "learning_rate": 4.684512428298279e-08, "logits/chosen": -2.209707736968994, "logits/rejected": -1.7953914403915405, "logps/chosen": -95.1135482788086, "logps/rejected": -84.7956314086914, "loss": 0.6929, "rewards/accuracies": 0.5, "rewards/chosen": 0.006323433481156826, "rewards/margins": -0.006651114672422409, "rewards/rejected": 0.01297454908490181, "step": 49 }, { "epoch": 0.09, "learning_rate": 4.7801147227533454e-08, "logits/chosen": -1.8521829843521118, "logits/rejected": -2.1232047080993652, "logps/chosen": -66.45122528076172, "logps/rejected": -80.44654846191406, "loss": 0.692, "rewards/accuracies": 0.5, "rewards/chosen": 0.0031079286709427834, "rewards/margins": 0.012627411633729935, "rewards/rejected": -0.009519482031464577, "step": 50 }, { "epoch": 0.09, "learning_rate": 4.8757170172084126e-08, "logits/chosen": -1.8772484064102173, "logits/rejected": -2.2556190490722656, "logps/chosen": -106.3731918334961, "logps/rejected": -96.19992065429688, "loss": 0.687, "rewards/accuracies": 0.75, "rewards/chosen": 0.03895626217126846, "rewards/margins": 0.046126171946525574, "rewards/rejected": -0.007169914431869984, "step": 51 }, { "epoch": 0.09, "learning_rate": 4.97131931166348e-08, "logits/chosen": -1.8665354251861572, "logits/rejected": -2.10444974899292, "logps/chosen": -95.8636703491211, "logps/rejected": -98.71359252929688, "loss": 0.6895, "rewards/accuracies": 0.75, "rewards/chosen": 0.008492088876664639, "rewards/margins": 0.03004284016788006, "rewards/rejected": -0.021550752222537994, "step": 52 }, { "epoch": 0.09, "learning_rate": 5.066921606118547e-08, "logits/chosen": -1.9863166809082031, "logits/rejected": -2.073666572570801, "logps/chosen": -73.28507995605469, "logps/rejected": -75.96986389160156, "loss": 0.6905, "rewards/accuracies": 0.5, "rewards/chosen": 0.014775276184082031, "rewards/margins": 0.013177967630326748, "rewards/rejected": 0.001597309485077858, "step": 53 }, { "epoch": 0.09, "learning_rate": 5.1625239005736134e-08, "logits/chosen": -2.2947163581848145, "logits/rejected": -2.0196595191955566, "logps/chosen": -108.21220397949219, "logps/rejected": -96.63551330566406, "loss": 0.6916, "rewards/accuracies": 0.25, "rewards/chosen": -0.009195137768983841, "rewards/margins": -0.02207336388528347, "rewards/rejected": 0.012878227978944778, "step": 54 }, { "epoch": 0.09, "learning_rate": 5.25812619502868e-08, "logits/chosen": -1.943080186843872, "logits/rejected": -1.7942981719970703, "logps/chosen": -72.21722412109375, "logps/rejected": -67.79021453857422, "loss": 0.6942, "rewards/accuracies": 0.25, "rewards/chosen": -0.026096438989043236, "rewards/margins": -0.03181753307580948, "rewards/rejected": 0.005721092224121094, "step": 55 }, { "epoch": 0.1, "learning_rate": 5.353728489483748e-08, "logits/chosen": -1.8873443603515625, "logits/rejected": -2.0346248149871826, "logps/chosen": -75.26033020019531, "logps/rejected": -82.73161315917969, "loss": 0.6902, "rewards/accuracies": 1.0, "rewards/chosen": 0.005416584201157093, "rewards/margins": 0.019275760278105736, "rewards/rejected": -0.013859177008271217, "step": 56 }, { "epoch": 0.1, "learning_rate": 5.449330783938814e-08, "logits/chosen": -1.8880035877227783, "logits/rejected": -2.1627488136291504, "logps/chosen": -69.87973022460938, "logps/rejected": -77.38755798339844, "loss": 0.6915, "rewards/accuracies": 0.75, "rewards/chosen": -0.016570186242461205, "rewards/margins": 0.0037951464764773846, "rewards/rejected": -0.020365333184599876, "step": 57 }, { "epoch": 0.1, "learning_rate": 5.5449330783938815e-08, "logits/chosen": -2.162766456604004, "logits/rejected": -1.779099702835083, "logps/chosen": -82.07647705078125, "logps/rejected": -81.96194458007812, "loss": 0.6908, "rewards/accuracies": 0.25, "rewards/chosen": -0.00926132220774889, "rewards/margins": -0.023522377014160156, "rewards/rejected": 0.014261053875088692, "step": 58 }, { "epoch": 0.1, "learning_rate": 5.640535372848948e-08, "logits/chosen": -1.8307300806045532, "logits/rejected": -1.9980918169021606, "logps/chosen": -91.0003433227539, "logps/rejected": -82.33019256591797, "loss": 0.6958, "rewards/accuracies": 0.5, "rewards/chosen": -0.004237746354192495, "rewards/margins": 3.719329833984375e-05, "rewards/rejected": -0.004274940583854914, "step": 59 }, { "epoch": 0.1, "learning_rate": 5.7361376673040145e-08, "logits/chosen": -2.0998995304107666, "logits/rejected": -1.683275580406189, "logps/chosen": -92.83942413330078, "logps/rejected": -83.5606918334961, "loss": 0.6976, "rewards/accuracies": 0.5, "rewards/chosen": 0.00039997114799916744, "rewards/margins": -0.019201183691620827, "rewards/rejected": 0.019601155072450638, "step": 60 }, { "epoch": 0.1, "learning_rate": 5.831739961759082e-08, "logits/chosen": -2.0808870792388916, "logits/rejected": -2.0301318168640137, "logps/chosen": -69.93807220458984, "logps/rejected": -75.227783203125, "loss": 0.6828, "rewards/accuracies": 0.75, "rewards/chosen": 0.012164783664047718, "rewards/margins": 0.021799469366669655, "rewards/rejected": -0.009634685702621937, "step": 61 }, { "epoch": 0.11, "learning_rate": 5.927342256214149e-08, "logits/chosen": -2.0852179527282715, "logits/rejected": -2.090135097503662, "logps/chosen": -97.69342041015625, "logps/rejected": -88.2237319946289, "loss": 0.6946, "rewards/accuracies": 0.75, "rewards/chosen": 0.0027275087777525187, "rewards/margins": 0.011728476732969284, "rewards/rejected": -0.009000970050692558, "step": 62 }, { "epoch": 0.11, "learning_rate": 6.022944550669216e-08, "logits/chosen": -1.9971768856048584, "logits/rejected": -2.113734006881714, "logps/chosen": -104.79315948486328, "logps/rejected": -109.17648315429688, "loss": 0.6944, "rewards/accuracies": 0.75, "rewards/chosen": -0.008572387509047985, "rewards/margins": 0.007094956003129482, "rewards/rejected": -0.015667343512177467, "step": 63 }, { "epoch": 0.11, "learning_rate": 6.118546845124282e-08, "logits/chosen": -1.6366215944290161, "logits/rejected": -2.072248935699463, "logps/chosen": -82.47084045410156, "logps/rejected": -98.27986907958984, "loss": 0.6913, "rewards/accuracies": 0.5, "rewards/chosen": -0.0006220817449502647, "rewards/margins": -0.003582858480513096, "rewards/rejected": 0.002960776910185814, "step": 64 }, { "epoch": 0.11, "learning_rate": 6.21414913957935e-08, "logits/chosen": -2.1108782291412354, "logits/rejected": -1.8905881643295288, "logps/chosen": -91.6669921875, "logps/rejected": -75.38764953613281, "loss": 0.6921, "rewards/accuracies": 0.5, "rewards/chosen": -0.007580948527902365, "rewards/margins": -0.0016424180939793587, "rewards/rejected": -0.005938530899584293, "step": 65 }, { "epoch": 0.11, "learning_rate": 6.309751434034416e-08, "logits/chosen": -2.2708775997161865, "logits/rejected": -2.034946918487549, "logps/chosen": -66.63204956054688, "logps/rejected": -67.05463409423828, "loss": 0.697, "rewards/accuracies": 0.75, "rewards/chosen": 0.007603549864143133, "rewards/margins": 0.006682205013930798, "rewards/rejected": 0.0009213448502123356, "step": 66 }, { "epoch": 0.12, "learning_rate": 6.405353728489483e-08, "logits/chosen": -2.2508692741394043, "logits/rejected": -1.9091815948486328, "logps/chosen": -89.10067749023438, "logps/rejected": -74.66455078125, "loss": 0.6907, "rewards/accuracies": 0.5, "rewards/chosen": -0.0045919413678348064, "rewards/margins": -0.003899573814123869, "rewards/rejected": -0.0006923676119185984, "step": 67 }, { "epoch": 0.12, "learning_rate": 6.50095602294455e-08, "logits/chosen": -1.8949716091156006, "logits/rejected": -1.9303662776947021, "logps/chosen": -75.9842529296875, "logps/rejected": -87.71788787841797, "loss": 0.6919, "rewards/accuracies": 0.25, "rewards/chosen": 0.01770162582397461, "rewards/margins": -0.007425404619425535, "rewards/rejected": 0.025127029046416283, "step": 68 }, { "epoch": 0.12, "learning_rate": 6.596558317399616e-08, "logits/chosen": -2.1218032836914062, "logits/rejected": -1.989557147026062, "logps/chosen": -92.18951416015625, "logps/rejected": -92.56341552734375, "loss": 0.6996, "rewards/accuracies": 0.25, "rewards/chosen": -0.01454258058220148, "rewards/margins": -0.01589660719037056, "rewards/rejected": 0.0013540268409997225, "step": 69 }, { "epoch": 0.12, "learning_rate": 6.692160611854685e-08, "logits/chosen": -2.2218785285949707, "logits/rejected": -1.933761715888977, "logps/chosen": -79.99283599853516, "logps/rejected": -68.86131286621094, "loss": 0.694, "rewards/accuracies": 0.25, "rewards/chosen": -0.007529831491410732, "rewards/margins": -0.02394132688641548, "rewards/rejected": 0.016411494463682175, "step": 70 }, { "epoch": 0.12, "learning_rate": 6.787762906309751e-08, "logits/chosen": -1.9753592014312744, "logits/rejected": -2.0100302696228027, "logps/chosen": -78.76881408691406, "logps/rejected": -86.09394836425781, "loss": 0.6862, "rewards/accuracies": 0.75, "rewards/chosen": 0.004292106255888939, "rewards/margins": 0.014547157101333141, "rewards/rejected": -0.010255050845444202, "step": 71 }, { "epoch": 0.12, "learning_rate": 6.883365200764818e-08, "logits/chosen": -2.058505058288574, "logits/rejected": -2.0855326652526855, "logps/chosen": -88.02342987060547, "logps/rejected": -104.74211120605469, "loss": 0.6932, "rewards/accuracies": 0.0, "rewards/chosen": -0.011709021404385567, "rewards/margins": -0.03680457919836044, "rewards/rejected": 0.025095559656620026, "step": 72 }, { "epoch": 0.13, "learning_rate": 6.978967495219885e-08, "logits/chosen": -1.8977375030517578, "logits/rejected": -2.00032901763916, "logps/chosen": -72.31184387207031, "logps/rejected": -83.7205581665039, "loss": 0.6874, "rewards/accuracies": 0.75, "rewards/chosen": 0.004976082127541304, "rewards/margins": 0.005161284934729338, "rewards/rejected": -0.00018520327284932137, "step": 73 }, { "epoch": 0.13, "learning_rate": 7.074569789674951e-08, "logits/chosen": -2.019408941268921, "logits/rejected": -2.1969990730285645, "logps/chosen": -90.87122344970703, "logps/rejected": -100.66471099853516, "loss": 0.6988, "rewards/accuracies": 0.25, "rewards/chosen": -0.010412026196718216, "rewards/margins": -0.03891925886273384, "rewards/rejected": 0.028507232666015625, "step": 74 }, { "epoch": 0.13, "learning_rate": 7.17017208413002e-08, "logits/chosen": -1.999540090560913, "logits/rejected": -2.0702321529388428, "logps/chosen": -93.35003662109375, "logps/rejected": -113.99481201171875, "loss": 0.692, "rewards/accuracies": 0.25, "rewards/chosen": -0.028576374053955078, "rewards/margins": -0.02386160008609295, "rewards/rejected": -0.004714774899184704, "step": 75 }, { "epoch": 0.13, "learning_rate": 7.265774378585087e-08, "logits/chosen": -2.251020908355713, "logits/rejected": -1.7957446575164795, "logps/chosen": -100.34861755371094, "logps/rejected": -95.8778305053711, "loss": 0.6921, "rewards/accuracies": 0.5, "rewards/chosen": -0.013412093743681908, "rewards/margins": 0.006436347961425781, "rewards/rejected": -0.019848443567752838, "step": 76 }, { "epoch": 0.13, "learning_rate": 7.361376673040152e-08, "logits/chosen": -1.895275354385376, "logits/rejected": -2.0624117851257324, "logps/chosen": -86.33174133300781, "logps/rejected": -97.32064056396484, "loss": 0.6985, "rewards/accuracies": 0.75, "rewards/chosen": 0.015626192092895508, "rewards/margins": 0.038881637156009674, "rewards/rejected": -0.023255443200469017, "step": 77 }, { "epoch": 0.13, "learning_rate": 7.45697896749522e-08, "logits/chosen": -2.0112130641937256, "logits/rejected": -1.7417092323303223, "logps/chosen": -88.84105682373047, "logps/rejected": -77.52669525146484, "loss": 0.6932, "rewards/accuracies": 0.75, "rewards/chosen": 0.0026439670473337173, "rewards/margins": 0.008667183108627796, "rewards/rejected": -0.006023216526955366, "step": 78 }, { "epoch": 0.14, "learning_rate": 7.552581261950285e-08, "logits/chosen": -1.6780600547790527, "logits/rejected": -2.0981197357177734, "logps/chosen": -78.30056762695312, "logps/rejected": -90.92897033691406, "loss": 0.6924, "rewards/accuracies": 0.5, "rewards/chosen": -0.00499725341796875, "rewards/margins": 0.01568756252527237, "rewards/rejected": -0.02068481408059597, "step": 79 }, { "epoch": 0.14, "learning_rate": 7.648183556405354e-08, "logits/chosen": -1.7849457263946533, "logits/rejected": -2.210995674133301, "logps/chosen": -67.76716613769531, "logps/rejected": -89.24943542480469, "loss": 0.6937, "rewards/accuracies": 0.25, "rewards/chosen": -0.007939242757856846, "rewards/margins": -0.010801602154970169, "rewards/rejected": 0.002862358232960105, "step": 80 }, { "epoch": 0.14, "learning_rate": 7.743785850860421e-08, "logits/chosen": -1.8508048057556152, "logits/rejected": -2.259955644607544, "logps/chosen": -80.18157958984375, "logps/rejected": -79.9675064086914, "loss": 0.6867, "rewards/accuracies": 0.75, "rewards/chosen": -0.0013567921705543995, "rewards/margins": 0.004898357205092907, "rewards/rejected": -0.006255150772631168, "step": 81 }, { "epoch": 0.14, "learning_rate": 7.839388145315487e-08, "logits/chosen": -2.0330569744110107, "logits/rejected": -1.8426048755645752, "logps/chosen": -103.13678741455078, "logps/rejected": -81.00886535644531, "loss": 0.6884, "rewards/accuracies": 0.75, "rewards/chosen": 0.020973777398467064, "rewards/margins": 0.011779308319091797, "rewards/rejected": 0.009194469079375267, "step": 82 }, { "epoch": 0.14, "learning_rate": 7.934990439770554e-08, "logits/chosen": -1.8976035118103027, "logits/rejected": -1.8718129396438599, "logps/chosen": -87.85549926757812, "logps/rejected": -99.32231140136719, "loss": 0.6927, "rewards/accuracies": 1.0, "rewards/chosen": 0.024402428418397903, "rewards/margins": 0.03474884107708931, "rewards/rejected": -0.010346412658691406, "step": 83 }, { "epoch": 0.14, "learning_rate": 8.03059273422562e-08, "logits/chosen": -1.6928048133850098, "logits/rejected": -2.2161009311676025, "logps/chosen": -66.28378295898438, "logps/rejected": -74.25349426269531, "loss": 0.6875, "rewards/accuracies": 0.5, "rewards/chosen": -0.0003774641081690788, "rewards/margins": -0.006328010465949774, "rewards/rejected": 0.005950546357780695, "step": 84 }, { "epoch": 0.15, "learning_rate": 8.126195028680689e-08, "logits/chosen": -1.852860689163208, "logits/rejected": -2.059957981109619, "logps/chosen": -84.96211242675781, "logps/rejected": -95.5157470703125, "loss": 0.6937, "rewards/accuracies": 0.75, "rewards/chosen": 0.013517857529222965, "rewards/margins": 0.007370663806796074, "rewards/rejected": 0.006147194653749466, "step": 85 }, { "epoch": 0.15, "learning_rate": 8.221797323135756e-08, "logits/chosen": -1.9072657823562622, "logits/rejected": -2.0564942359924316, "logps/chosen": -66.64222717285156, "logps/rejected": -81.80054473876953, "loss": 0.6893, "rewards/accuracies": 0.25, "rewards/chosen": -0.0041525838896632195, "rewards/margins": -0.01516561396420002, "rewards/rejected": 0.011013031005859375, "step": 86 }, { "epoch": 0.15, "learning_rate": 8.317399617590822e-08, "logits/chosen": -2.0715067386627197, "logits/rejected": -1.9132726192474365, "logps/chosen": -96.06746673583984, "logps/rejected": -98.55844116210938, "loss": 0.6962, "rewards/accuracies": 0.25, "rewards/chosen": -0.009243871085345745, "rewards/margins": -0.014315510168671608, "rewards/rejected": 0.0050716400146484375, "step": 87 }, { "epoch": 0.15, "learning_rate": 8.413001912045889e-08, "logits/chosen": -1.798439383506775, "logits/rejected": -2.0467638969421387, "logps/chosen": -98.78282165527344, "logps/rejected": -96.81950378417969, "loss": 0.6872, "rewards/accuracies": 1.0, "rewards/chosen": 0.03795566409826279, "rewards/margins": 0.024779319763183594, "rewards/rejected": 0.013176346197724342, "step": 88 }, { "epoch": 0.15, "learning_rate": 8.508604206500955e-08, "logits/chosen": -1.707331657409668, "logits/rejected": -2.0587661266326904, "logps/chosen": -100.00350189208984, "logps/rejected": -103.0768814086914, "loss": 0.6921, "rewards/accuracies": 0.75, "rewards/chosen": 0.0018703462556004524, "rewards/margins": 0.01828746870160103, "rewards/rejected": -0.016417121514678, "step": 89 }, { "epoch": 0.15, "learning_rate": 8.604206500956023e-08, "logits/chosen": -2.189800977706909, "logits/rejected": -2.008476495742798, "logps/chosen": -101.76670837402344, "logps/rejected": -99.65896606445312, "loss": 0.6895, "rewards/accuracies": 1.0, "rewards/chosen": 0.009818267077207565, "rewards/margins": 0.012778853997588158, "rewards/rejected": -0.002960586454719305, "step": 90 }, { "epoch": 0.16, "learning_rate": 8.69980879541109e-08, "logits/chosen": -2.0588490962982178, "logits/rejected": -2.053804397583008, "logps/chosen": -77.49336242675781, "logps/rejected": -80.24893188476562, "loss": 0.691, "rewards/accuracies": 0.75, "rewards/chosen": -0.004797077737748623, "rewards/margins": -0.0020169259514659643, "rewards/rejected": -0.002780151553452015, "step": 91 }, { "epoch": 0.16, "learning_rate": 8.795411089866156e-08, "logits/chosen": -2.1115262508392334, "logits/rejected": -1.9881036281585693, "logps/chosen": -90.370849609375, "logps/rejected": -84.6987075805664, "loss": 0.6935, "rewards/accuracies": 0.75, "rewards/chosen": -0.007176113780587912, "rewards/margins": 0.014744283631443977, "rewards/rejected": -0.021920396015048027, "step": 92 }, { "epoch": 0.16, "learning_rate": 8.891013384321223e-08, "logits/chosen": -1.793448805809021, "logits/rejected": -2.2045488357543945, "logps/chosen": -90.88945770263672, "logps/rejected": -91.27722930908203, "loss": 0.6874, "rewards/accuracies": 0.75, "rewards/chosen": 0.03639660030603409, "rewards/margins": 0.0361144058406353, "rewards/rejected": 0.0002821926027536392, "step": 93 }, { "epoch": 0.16, "learning_rate": 8.98661567877629e-08, "logits/chosen": -2.082095146179199, "logits/rejected": -2.056790351867676, "logps/chosen": -74.50213623046875, "logps/rejected": -84.76109313964844, "loss": 0.6836, "rewards/accuracies": 0.5, "rewards/chosen": 0.007322024554014206, "rewards/margins": 0.01457986794412136, "rewards/rejected": -0.0072578429244458675, "step": 94 }, { "epoch": 0.16, "learning_rate": 9.082217973231358e-08, "logits/chosen": -1.8937642574310303, "logits/rejected": -1.801027536392212, "logps/chosen": -77.54779815673828, "logps/rejected": -85.71172332763672, "loss": 0.6908, "rewards/accuracies": 0.75, "rewards/chosen": 0.007098770700395107, "rewards/margins": 0.0119139663875103, "rewards/rejected": -0.004815197084099054, "step": 95 }, { "epoch": 0.17, "learning_rate": 9.177820267686425e-08, "logits/chosen": -1.8004670143127441, "logits/rejected": -2.2921316623687744, "logps/chosen": -72.50743865966797, "logps/rejected": -94.8070297241211, "loss": 0.6896, "rewards/accuracies": 0.75, "rewards/chosen": 0.0035018925555050373, "rewards/margins": -0.005100155249238014, "rewards/rejected": 0.008602047339081764, "step": 96 }, { "epoch": 0.17, "learning_rate": 9.27342256214149e-08, "logits/chosen": -1.9794337749481201, "logits/rejected": -2.1253180503845215, "logps/chosen": -88.6761703491211, "logps/rejected": -89.59420776367188, "loss": 0.694, "rewards/accuracies": 0.5, "rewards/chosen": -0.037819959223270416, "rewards/margins": -0.02331247366964817, "rewards/rejected": -0.014507484622299671, "step": 97 }, { "epoch": 0.17, "learning_rate": 9.369024856596558e-08, "logits/chosen": -2.0653018951416016, "logits/rejected": -1.972257375717163, "logps/chosen": -96.17794799804688, "logps/rejected": -93.1509017944336, "loss": 0.6928, "rewards/accuracies": 0.25, "rewards/chosen": -0.02135639265179634, "rewards/margins": -0.011679267510771751, "rewards/rejected": -0.00967712327837944, "step": 98 }, { "epoch": 0.17, "learning_rate": 9.464627151051625e-08, "logits/chosen": -1.7898035049438477, "logits/rejected": -2.0992255210876465, "logps/chosen": -75.44658660888672, "logps/rejected": -83.91547393798828, "loss": 0.6869, "rewards/accuracies": 0.5, "rewards/chosen": 0.007830810733139515, "rewards/margins": 4.5204535126686096e-05, "rewards/rejected": 0.007785606198012829, "step": 99 }, { "epoch": 0.17, "learning_rate": 9.560229445506691e-08, "logits/chosen": -2.01501727104187, "logits/rejected": -2.1055569648742676, "logps/chosen": -77.62606048583984, "logps/rejected": -86.22711944580078, "loss": 0.6932, "rewards/accuracies": 0.25, "rewards/chosen": -0.018511008471250534, "rewards/margins": -0.014472580514848232, "rewards/rejected": -0.004038429353386164, "step": 100 }, { "epoch": 0.17, "learning_rate": 9.655831739961759e-08, "logits/chosen": -2.1337757110595703, "logits/rejected": -1.883650302886963, "logps/chosen": -95.52398681640625, "logps/rejected": -86.48275756835938, "loss": 0.6953, "rewards/accuracies": 0.5, "rewards/chosen": -0.005775642581284046, "rewards/margins": 0.0035406125243753195, "rewards/rejected": -0.00931625347584486, "step": 101 }, { "epoch": 0.18, "learning_rate": 9.751434034416825e-08, "logits/chosen": -1.7105845212936401, "logits/rejected": -2.0506558418273926, "logps/chosen": -64.66902160644531, "logps/rejected": -72.6435317993164, "loss": 0.6861, "rewards/accuracies": 0.75, "rewards/chosen": 0.012487554922699928, "rewards/margins": 0.012920999899506569, "rewards/rejected": -0.0004334448021836579, "step": 102 }, { "epoch": 0.18, "learning_rate": 9.847036328871892e-08, "logits/chosen": -1.6967827081680298, "logits/rejected": -2.1134285926818848, "logps/chosen": -81.02777099609375, "logps/rejected": -91.46859741210938, "loss": 0.6953, "rewards/accuracies": 0.5, "rewards/chosen": -0.018747616559267044, "rewards/margins": 0.002442836295813322, "rewards/rejected": -0.021190453320741653, "step": 103 }, { "epoch": 0.18, "learning_rate": 9.94263862332696e-08, "logits/chosen": -2.0928850173950195, "logits/rejected": -2.1403679847717285, "logps/chosen": -80.16477966308594, "logps/rejected": -89.70281982421875, "loss": 0.6916, "rewards/accuracies": 0.5, "rewards/chosen": -0.013430595397949219, "rewards/margins": 0.007893180474638939, "rewards/rejected": -0.021323775872588158, "step": 104 }, { "epoch": 0.18, "learning_rate": 1.0038240917782025e-07, "logits/chosen": -1.904998540878296, "logits/rejected": -2.145491600036621, "logps/chosen": -81.01983642578125, "logps/rejected": -104.33941650390625, "loss": 0.692, "rewards/accuracies": 0.75, "rewards/chosen": 0.007326127029955387, "rewards/margins": -0.003706548362970352, "rewards/rejected": 0.011032676324248314, "step": 105 }, { "epoch": 0.18, "learning_rate": 1.0133843212237094e-07, "logits/chosen": -2.2491495609283447, "logits/rejected": -2.1434881687164307, "logps/chosen": -78.23795318603516, "logps/rejected": -82.8675765991211, "loss": 0.6955, "rewards/accuracies": 0.5, "rewards/chosen": 0.0020238878205418587, "rewards/margins": -0.006517220754176378, "rewards/rejected": 0.008541107177734375, "step": 106 }, { "epoch": 0.18, "learning_rate": 1.022944550669216e-07, "logits/chosen": -1.7723805904388428, "logits/rejected": -2.011291027069092, "logps/chosen": -86.94017028808594, "logps/rejected": -95.7523193359375, "loss": 0.6886, "rewards/accuracies": 1.0, "rewards/chosen": 0.004932594485580921, "rewards/margins": 0.041603945195674896, "rewards/rejected": -0.03667135164141655, "step": 107 }, { "epoch": 0.19, "learning_rate": 1.0325047801147227e-07, "logits/chosen": -1.9822427034378052, "logits/rejected": -2.2398126125335693, "logps/chosen": -71.08271789550781, "logps/rejected": -81.57892608642578, "loss": 0.6898, "rewards/accuracies": 0.25, "rewards/chosen": -0.04971466213464737, "rewards/margins": -0.02704925648868084, "rewards/rejected": -0.02266540564596653, "step": 108 }, { "epoch": 0.19, "learning_rate": 1.0420650095602294e-07, "logits/chosen": -1.866703987121582, "logits/rejected": -2.054093837738037, "logps/chosen": -89.85597229003906, "logps/rejected": -101.01898193359375, "loss": 0.6922, "rewards/accuracies": 0.75, "rewards/chosen": 0.015700722113251686, "rewards/margins": 0.014740753918886185, "rewards/rejected": 0.0009599681943655014, "step": 109 }, { "epoch": 0.19, "learning_rate": 1.051625239005736e-07, "logits/chosen": -2.351750135421753, "logits/rejected": -2.1896538734436035, "logps/chosen": -108.87242126464844, "logps/rejected": -101.06710815429688, "loss": 0.6869, "rewards/accuracies": 0.5, "rewards/chosen": -0.00951328407973051, "rewards/margins": 0.0037703507114201784, "rewards/rejected": -0.01328363362699747, "step": 110 }, { "epoch": 0.19, "learning_rate": 1.0611854684512428e-07, "logits/chosen": -2.140810012817383, "logits/rejected": -2.0565903186798096, "logps/chosen": -102.59649658203125, "logps/rejected": -107.99884033203125, "loss": 0.6897, "rewards/accuracies": 0.75, "rewards/chosen": 0.0042247772216796875, "rewards/margins": 0.011222075670957565, "rewards/rejected": -0.006997299380600452, "step": 111 }, { "epoch": 0.19, "learning_rate": 1.0707456978967496e-07, "logits/chosen": -1.9057955741882324, "logits/rejected": -1.996309757232666, "logps/chosen": -68.713623046875, "logps/rejected": -76.16781616210938, "loss": 0.6881, "rewards/accuracies": 0.75, "rewards/chosen": 0.008276271633803844, "rewards/margins": 0.01805276982486248, "rewards/rejected": -0.009776498191058636, "step": 112 }, { "epoch": 0.19, "learning_rate": 1.0803059273422561e-07, "logits/chosen": -2.085484743118286, "logits/rejected": -2.0367612838745117, "logps/chosen": -89.98314666748047, "logps/rejected": -87.2674560546875, "loss": 0.6915, "rewards/accuracies": 0.5, "rewards/chosen": -0.010871506296098232, "rewards/margins": 0.022335432469844818, "rewards/rejected": -0.033206939697265625, "step": 113 }, { "epoch": 0.2, "learning_rate": 1.0898661567877629e-07, "logits/chosen": -2.005657434463501, "logits/rejected": -2.0419154167175293, "logps/chosen": -104.6557846069336, "logps/rejected": -104.09737396240234, "loss": 0.6903, "rewards/accuracies": 0.5, "rewards/chosen": -0.026770401746034622, "rewards/margins": -0.0014116289094090462, "rewards/rejected": -0.02535877376794815, "step": 114 }, { "epoch": 0.2, "learning_rate": 1.0994263862332694e-07, "logits/chosen": -1.851632833480835, "logits/rejected": -1.9441828727722168, "logps/chosen": -72.67768859863281, "logps/rejected": -86.00860595703125, "loss": 0.6927, "rewards/accuracies": 0.75, "rewards/chosen": 0.004082202911376953, "rewards/margins": 0.029244424775242805, "rewards/rejected": -0.025162221863865852, "step": 115 }, { "epoch": 0.2, "learning_rate": 1.1089866156787763e-07, "logits/chosen": -2.323197364807129, "logits/rejected": -2.1916277408599854, "logps/chosen": -78.31240844726562, "logps/rejected": -80.92047119140625, "loss": 0.6866, "rewards/accuracies": 0.5, "rewards/chosen": -0.016902733594179153, "rewards/margins": 0.013776780106127262, "rewards/rejected": -0.03067951463162899, "step": 116 }, { "epoch": 0.2, "learning_rate": 1.118546845124283e-07, "logits/chosen": -2.097135305404663, "logits/rejected": -1.79258131980896, "logps/chosen": -79.87498474121094, "logps/rejected": -67.79788970947266, "loss": 0.6892, "rewards/accuracies": 0.25, "rewards/chosen": -0.004557228181511164, "rewards/margins": -0.00907058734446764, "rewards/rejected": 0.004513359162956476, "step": 117 }, { "epoch": 0.2, "learning_rate": 1.1281070745697896e-07, "logits/chosen": -2.149089813232422, "logits/rejected": -1.9276138544082642, "logps/chosen": -86.39649200439453, "logps/rejected": -79.61372375488281, "loss": 0.6911, "rewards/accuracies": 0.5, "rewards/chosen": -0.024860573932528496, "rewards/margins": -0.020652199164032936, "rewards/rejected": -0.004208373837172985, "step": 118 }, { "epoch": 0.2, "learning_rate": 1.1376673040152963e-07, "logits/chosen": -1.9146298170089722, "logits/rejected": -2.1184191703796387, "logps/chosen": -84.82962036132812, "logps/rejected": -89.53099060058594, "loss": 0.6878, "rewards/accuracies": 0.5, "rewards/chosen": -0.0289979949593544, "rewards/margins": 0.018861960619688034, "rewards/rejected": -0.047859955579042435, "step": 119 }, { "epoch": 0.21, "learning_rate": 1.1472275334608029e-07, "logits/chosen": -1.9448047876358032, "logits/rejected": -2.247081995010376, "logps/chosen": -72.7546615600586, "logps/rejected": -77.8631591796875, "loss": 0.6937, "rewards/accuracies": 0.5, "rewards/chosen": -0.0009989738464355469, "rewards/margins": 0.008613873273134232, "rewards/rejected": -0.009612847119569778, "step": 120 }, { "epoch": 0.21, "learning_rate": 1.1567877629063097e-07, "logits/chosen": -1.840713381767273, "logits/rejected": -2.1097073554992676, "logps/chosen": -77.39634704589844, "logps/rejected": -79.5125961303711, "loss": 0.6878, "rewards/accuracies": 0.75, "rewards/chosen": -0.011694527231156826, "rewards/margins": 0.04827594384551048, "rewards/rejected": -0.05997047200798988, "step": 121 }, { "epoch": 0.21, "learning_rate": 1.1663479923518165e-07, "logits/chosen": -1.8348169326782227, "logits/rejected": -2.0719995498657227, "logps/chosen": -79.7616958618164, "logps/rejected": -77.21018981933594, "loss": 0.685, "rewards/accuracies": 0.75, "rewards/chosen": 0.004237270914018154, "rewards/margins": -0.004750823602080345, "rewards/rejected": 0.0089880945160985, "step": 122 }, { "epoch": 0.21, "learning_rate": 1.175908221797323e-07, "logits/chosen": -2.0266730785369873, "logits/rejected": -2.1426944732666016, "logps/chosen": -90.95858001708984, "logps/rejected": -101.54270935058594, "loss": 0.686, "rewards/accuracies": 0.75, "rewards/chosen": -0.01539249625056982, "rewards/margins": 0.04183521121740341, "rewards/rejected": -0.05722770839929581, "step": 123 }, { "epoch": 0.21, "learning_rate": 1.1854684512428298e-07, "logits/chosen": -1.9156184196472168, "logits/rejected": -1.8945732116699219, "logps/chosen": -104.69676208496094, "logps/rejected": -89.06524658203125, "loss": 0.684, "rewards/accuracies": 0.75, "rewards/chosen": 0.0053369514644145966, "rewards/margins": 0.013402365148067474, "rewards/rejected": -0.008065413683652878, "step": 124 }, { "epoch": 0.22, "learning_rate": 1.1950286806883364e-07, "logits/chosen": -1.8994051218032837, "logits/rejected": -2.2185535430908203, "logps/chosen": -82.61456298828125, "logps/rejected": -95.51473236083984, "loss": 0.6955, "rewards/accuracies": 0.5, "rewards/chosen": -0.010074043646454811, "rewards/margins": -0.029547695070505142, "rewards/rejected": 0.019473649561405182, "step": 125 }, { "epoch": 0.22, "learning_rate": 1.2045889101338432e-07, "logits/chosen": -2.0512638092041016, "logits/rejected": -2.094785451889038, "logps/chosen": -83.861328125, "logps/rejected": -85.88685607910156, "loss": 0.6875, "rewards/accuracies": 0.5, "rewards/chosen": -0.024892617017030716, "rewards/margins": -0.0012428294867277145, "rewards/rejected": -0.02364978939294815, "step": 126 }, { "epoch": 0.22, "learning_rate": 1.2141491395793498e-07, "logits/chosen": -1.6789567470550537, "logits/rejected": -2.2587924003601074, "logps/chosen": -80.51075744628906, "logps/rejected": -89.31449890136719, "loss": 0.6903, "rewards/accuracies": 0.75, "rewards/chosen": 0.015065574087202549, "rewards/margins": -0.018056489527225494, "rewards/rejected": 0.03312206268310547, "step": 127 }, { "epoch": 0.22, "learning_rate": 1.2237093690248564e-07, "logits/chosen": -1.887876033782959, "logits/rejected": -2.0513367652893066, "logps/chosen": -82.66722106933594, "logps/rejected": -72.19084167480469, "loss": 0.6872, "rewards/accuracies": 0.0, "rewards/chosen": -0.0415155403316021, "rewards/margins": -0.03352327644824982, "rewards/rejected": -0.007992267608642578, "step": 128 }, { "epoch": 0.22, "learning_rate": 1.2332695984703632e-07, "logits/chosen": -1.998863697052002, "logits/rejected": -2.0596237182617188, "logps/chosen": -93.73526000976562, "logps/rejected": -94.39118957519531, "loss": 0.6967, "rewards/accuracies": 0.5, "rewards/chosen": 0.004163932986557484, "rewards/margins": -0.009954835288226604, "rewards/rejected": 0.014118767343461514, "step": 129 }, { "epoch": 0.22, "learning_rate": 1.24282982791587e-07, "logits/chosen": -2.0331943035125732, "logits/rejected": -2.0675535202026367, "logps/chosen": -96.22083282470703, "logps/rejected": -84.53948974609375, "loss": 0.6941, "rewards/accuracies": 0.25, "rewards/chosen": 0.001357079017907381, "rewards/margins": -0.0065652853809297085, "rewards/rejected": 0.00792236439883709, "step": 130 }, { "epoch": 0.23, "learning_rate": 1.2523900573613767e-07, "logits/chosen": -2.0610125064849854, "logits/rejected": -1.8648321628570557, "logps/chosen": -93.57881164550781, "logps/rejected": -86.1031265258789, "loss": 0.6926, "rewards/accuracies": 0.5, "rewards/chosen": 0.00601615896448493, "rewards/margins": -0.006127549335360527, "rewards/rejected": 0.012143706902861595, "step": 131 }, { "epoch": 0.23, "learning_rate": 1.2619502868068832e-07, "logits/chosen": -2.0217723846435547, "logits/rejected": -2.0370912551879883, "logps/chosen": -82.89396667480469, "logps/rejected": -79.90837097167969, "loss": 0.6934, "rewards/accuracies": 0.25, "rewards/chosen": -0.02605600282549858, "rewards/margins": -0.034224797040224075, "rewards/rejected": 0.00816879328340292, "step": 132 }, { "epoch": 0.23, "learning_rate": 1.27151051625239e-07, "logits/chosen": -1.9038512706756592, "logits/rejected": -2.1973910331726074, "logps/chosen": -86.78309631347656, "logps/rejected": -98.04620361328125, "loss": 0.6857, "rewards/accuracies": 1.0, "rewards/chosen": 0.0011821752414107323, "rewards/margins": 0.04322891682386398, "rewards/rejected": -0.04204673692584038, "step": 133 }, { "epoch": 0.23, "learning_rate": 1.2810707456978967e-07, "logits/chosen": -2.0580224990844727, "logits/rejected": -1.8067877292633057, "logps/chosen": -97.42086791992188, "logps/rejected": -83.2459716796875, "loss": 0.6827, "rewards/accuracies": 0.75, "rewards/chosen": 0.03525238111615181, "rewards/margins": 0.06293907761573792, "rewards/rejected": -0.027686692774295807, "step": 134 }, { "epoch": 0.23, "learning_rate": 1.2906309751434033e-07, "logits/chosen": -1.9336568117141724, "logits/rejected": -2.1647772789001465, "logps/chosen": -82.81147766113281, "logps/rejected": -84.9268798828125, "loss": 0.6742, "rewards/accuracies": 0.5, "rewards/chosen": 0.038218118250370026, "rewards/margins": 0.016420364379882812, "rewards/rejected": 0.021797753870487213, "step": 135 }, { "epoch": 0.23, "learning_rate": 1.30019120458891e-07, "logits/chosen": -1.9897410869598389, "logits/rejected": -2.0390942096710205, "logps/chosen": -73.21003723144531, "logps/rejected": -86.53271484375, "loss": 0.6811, "rewards/accuracies": 0.5, "rewards/chosen": -0.01108694076538086, "rewards/margins": -0.005732156336307526, "rewards/rejected": -0.005354786291718483, "step": 136 }, { "epoch": 0.24, "learning_rate": 1.3097514340344167e-07, "logits/chosen": -1.473192572593689, "logits/rejected": -2.0860323905944824, "logps/chosen": -119.82948303222656, "logps/rejected": -106.8944091796875, "loss": 0.6808, "rewards/accuracies": 1.0, "rewards/chosen": 0.0409734770655632, "rewards/margins": 0.09783420711755753, "rewards/rejected": -0.05686073377728462, "step": 137 }, { "epoch": 0.24, "learning_rate": 1.3193116634799233e-07, "logits/chosen": -2.051887035369873, "logits/rejected": -1.9666911363601685, "logps/chosen": -81.25946044921875, "logps/rejected": -83.72895812988281, "loss": 0.6916, "rewards/accuracies": 0.25, "rewards/chosen": -0.06756591796875, "rewards/margins": -0.08306284248828888, "rewards/rejected": 0.015496921725571156, "step": 138 }, { "epoch": 0.24, "learning_rate": 1.3288718929254304e-07, "logits/chosen": -2.1616151332855225, "logits/rejected": -2.1738905906677246, "logps/chosen": -79.03445434570312, "logps/rejected": -84.82156372070312, "loss": 0.6806, "rewards/accuracies": 0.5, "rewards/chosen": 0.011741353198885918, "rewards/margins": 0.013873006217181683, "rewards/rejected": -0.002131653018295765, "step": 139 }, { "epoch": 0.24, "learning_rate": 1.338432122370937e-07, "logits/chosen": -1.9197206497192383, "logits/rejected": -1.960654377937317, "logps/chosen": -72.39884948730469, "logps/rejected": -77.66154479980469, "loss": 0.691, "rewards/accuracies": 0.75, "rewards/chosen": 0.014089488424360752, "rewards/margins": 0.015266893431544304, "rewards/rejected": -0.0011774058220908046, "step": 140 }, { "epoch": 0.24, "learning_rate": 1.3479923518164436e-07, "logits/chosen": -2.061363935470581, "logits/rejected": -2.185084819793701, "logps/chosen": -60.47782516479492, "logps/rejected": -75.48606872558594, "loss": 0.6862, "rewards/accuracies": 1.0, "rewards/chosen": 0.0030567175708711147, "rewards/margins": 0.048383235931396484, "rewards/rejected": -0.04532651975750923, "step": 141 }, { "epoch": 0.24, "learning_rate": 1.3575525812619501e-07, "logits/chosen": -2.11572265625, "logits/rejected": -1.987998604774475, "logps/chosen": -75.93724822998047, "logps/rejected": -85.7891845703125, "loss": 0.6835, "rewards/accuracies": 0.5, "rewards/chosen": 0.03792419657111168, "rewards/margins": 0.005649472586810589, "rewards/rejected": 0.032274723052978516, "step": 142 }, { "epoch": 0.25, "learning_rate": 1.367112810707457e-07, "logits/chosen": -1.9678645133972168, "logits/rejected": -2.125974655151367, "logps/chosen": -86.83773803710938, "logps/rejected": -76.92704772949219, "loss": 0.6836, "rewards/accuracies": 0.75, "rewards/chosen": -0.019086839631199837, "rewards/margins": 0.022608565166592598, "rewards/rejected": -0.04169540852308273, "step": 143 }, { "epoch": 0.25, "learning_rate": 1.3766730401529636e-07, "logits/chosen": -2.026113510131836, "logits/rejected": -2.142690420150757, "logps/chosen": -63.73603057861328, "logps/rejected": -75.86422729492188, "loss": 0.6972, "rewards/accuracies": 0.5, "rewards/chosen": 0.007749463431537151, "rewards/margins": -0.009206676855683327, "rewards/rejected": 0.016956137493252754, "step": 144 }, { "epoch": 0.25, "learning_rate": 1.3862332695984702e-07, "logits/chosen": -1.6465694904327393, "logits/rejected": -2.024968147277832, "logps/chosen": -71.3321762084961, "logps/rejected": -98.19916534423828, "loss": 0.6753, "rewards/accuracies": 0.75, "rewards/chosen": 0.005157568491995335, "rewards/margins": -0.013774774968624115, "rewards/rejected": 0.018932342529296875, "step": 145 }, { "epoch": 0.25, "learning_rate": 1.395793499043977e-07, "logits/chosen": -2.079864263534546, "logits/rejected": -1.8471872806549072, "logps/chosen": -102.06462097167969, "logps/rejected": -75.34146118164062, "loss": 0.7044, "rewards/accuracies": 0.0, "rewards/chosen": -0.07597008347511292, "rewards/margins": -0.05650759115815163, "rewards/rejected": -0.01946248859167099, "step": 146 }, { "epoch": 0.25, "learning_rate": 1.4053537284894836e-07, "logits/chosen": -1.9634253978729248, "logits/rejected": -1.9958703517913818, "logps/chosen": -76.38963317871094, "logps/rejected": -84.43247985839844, "loss": 0.6784, "rewards/accuracies": 1.0, "rewards/chosen": 0.00818023830652237, "rewards/margins": 0.06291981041431427, "rewards/rejected": -0.0547395758330822, "step": 147 }, { "epoch": 0.25, "learning_rate": 1.4149139579349902e-07, "logits/chosen": -2.0604543685913086, "logits/rejected": -2.2111663818359375, "logps/chosen": -86.34759521484375, "logps/rejected": -86.99099731445312, "loss": 0.687, "rewards/accuracies": 0.75, "rewards/chosen": 0.029210472479462624, "rewards/margins": 0.02067279815673828, "rewards/rejected": 0.008537673391401768, "step": 148 }, { "epoch": 0.26, "learning_rate": 1.4244741873804973e-07, "logits/chosen": -1.6996080875396729, "logits/rejected": -2.362722158432007, "logps/chosen": -105.27664184570312, "logps/rejected": -109.97016906738281, "loss": 0.6757, "rewards/accuracies": 0.5, "rewards/chosen": -0.0038002014625817537, "rewards/margins": -0.00629920931532979, "rewards/rejected": 0.0024990083184093237, "step": 149 }, { "epoch": 0.26, "learning_rate": 1.434034416826004e-07, "logits/chosen": -1.5085639953613281, "logits/rejected": -2.1798079013824463, "logps/chosen": -73.30286407470703, "logps/rejected": -102.65129089355469, "loss": 0.6776, "rewards/accuracies": 1.0, "rewards/chosen": 0.003406524658203125, "rewards/margins": 0.06936664879322052, "rewards/rejected": -0.0659601241350174, "step": 150 }, { "epoch": 0.26, "learning_rate": 1.4435946462715105e-07, "logits/chosen": -2.1882522106170654, "logits/rejected": -1.7559213638305664, "logps/chosen": -72.14685821533203, "logps/rejected": -85.45406341552734, "loss": 0.6771, "rewards/accuracies": 1.0, "rewards/chosen": -0.006225872784852982, "rewards/margins": 0.02293873019516468, "rewards/rejected": -0.029164601117372513, "step": 151 }, { "epoch": 0.26, "learning_rate": 1.4531548757170173e-07, "logits/chosen": -1.6647648811340332, "logits/rejected": -2.180492401123047, "logps/chosen": -79.9472427368164, "logps/rejected": -106.456298828125, "loss": 0.6843, "rewards/accuracies": 0.75, "rewards/chosen": 0.020037079229950905, "rewards/margins": 0.06030254438519478, "rewards/rejected": -0.040265463292598724, "step": 152 }, { "epoch": 0.26, "learning_rate": 1.462715105162524e-07, "logits/chosen": -2.247687339782715, "logits/rejected": -1.8695828914642334, "logps/chosen": -77.40975189208984, "logps/rejected": -74.20671844482422, "loss": 0.6833, "rewards/accuracies": 0.25, "rewards/chosen": -0.048043154180049896, "rewards/margins": -0.0645410567522049, "rewards/rejected": 0.0164978988468647, "step": 153 }, { "epoch": 0.27, "learning_rate": 1.4722753346080305e-07, "logits/chosen": -2.149657726287842, "logits/rejected": -2.0334067344665527, "logps/chosen": -77.05068969726562, "logps/rejected": -72.01443481445312, "loss": 0.6771, "rewards/accuracies": 0.25, "rewards/chosen": -0.046113014221191406, "rewards/margins": -0.0038986224681138992, "rewards/rejected": -0.042214393615722656, "step": 154 }, { "epoch": 0.27, "learning_rate": 1.481835564053537e-07, "logits/chosen": -2.0029568672180176, "logits/rejected": -1.812103509902954, "logps/chosen": -87.56387329101562, "logps/rejected": -90.25425720214844, "loss": 0.6735, "rewards/accuracies": 0.75, "rewards/chosen": 0.0697532668709755, "rewards/margins": 0.04719962924718857, "rewards/rejected": 0.022553633898496628, "step": 155 }, { "epoch": 0.27, "learning_rate": 1.491395793499044e-07, "logits/chosen": -1.8339329957962036, "logits/rejected": -2.1894707679748535, "logps/chosen": -92.29591369628906, "logps/rejected": -96.1429672241211, "loss": 0.6774, "rewards/accuracies": 0.75, "rewards/chosen": 0.021735381335020065, "rewards/margins": 0.027240945026278496, "rewards/rejected": -0.005505561828613281, "step": 156 }, { "epoch": 0.27, "learning_rate": 1.5009560229445505e-07, "logits/chosen": -2.097494602203369, "logits/rejected": -2.0686445236206055, "logps/chosen": -80.81083679199219, "logps/rejected": -74.941650390625, "loss": 0.6808, "rewards/accuracies": 0.75, "rewards/chosen": 0.03006305918097496, "rewards/margins": 0.09397716820240021, "rewards/rejected": -0.06391411274671555, "step": 157 }, { "epoch": 0.27, "learning_rate": 1.510516252390057e-07, "logits/chosen": -2.0120325088500977, "logits/rejected": -1.930579423904419, "logps/chosen": -72.5203857421875, "logps/rejected": -80.71542358398438, "loss": 0.6887, "rewards/accuracies": 0.5, "rewards/chosen": 0.009774971753358841, "rewards/margins": -0.013417433947324753, "rewards/rejected": 0.023192405700683594, "step": 158 }, { "epoch": 0.27, "learning_rate": 1.5200764818355642e-07, "logits/chosen": -1.833253026008606, "logits/rejected": -2.090336799621582, "logps/chosen": -64.63916015625, "logps/rejected": -83.2510757446289, "loss": 0.6842, "rewards/accuracies": 0.75, "rewards/chosen": 0.05318107455968857, "rewards/margins": 0.02349109947681427, "rewards/rejected": 0.029689980670809746, "step": 159 }, { "epoch": 0.28, "learning_rate": 1.5296367112810708e-07, "logits/chosen": -1.8794105052947998, "logits/rejected": -2.107675790786743, "logps/chosen": -90.71235656738281, "logps/rejected": -89.65370178222656, "loss": 0.6679, "rewards/accuracies": 0.75, "rewards/chosen": 0.013424682430922985, "rewards/margins": 0.05992593616247177, "rewards/rejected": -0.04650125652551651, "step": 160 }, { "epoch": 0.28, "learning_rate": 1.5391969407265774e-07, "logits/chosen": -2.125720262527466, "logits/rejected": -1.9413738250732422, "logps/chosen": -82.65763854980469, "logps/rejected": -73.42992401123047, "loss": 0.6798, "rewards/accuracies": 0.5, "rewards/chosen": -0.030356502160429955, "rewards/margins": -0.019707394763827324, "rewards/rejected": -0.010649111121892929, "step": 161 }, { "epoch": 0.28, "learning_rate": 1.5487571701720842e-07, "logits/chosen": -1.8284618854522705, "logits/rejected": -1.966083288192749, "logps/chosen": -73.56945037841797, "logps/rejected": -84.1612777709961, "loss": 0.6655, "rewards/accuracies": 1.0, "rewards/chosen": 0.06775932759046555, "rewards/margins": 0.14958296716213226, "rewards/rejected": -0.08182363212108612, "step": 162 }, { "epoch": 0.28, "learning_rate": 1.5583173996175908e-07, "logits/chosen": -2.101426839828491, "logits/rejected": -2.0001561641693115, "logps/chosen": -65.1329345703125, "logps/rejected": -78.58509063720703, "loss": 0.6744, "rewards/accuracies": 0.5, "rewards/chosen": -0.008165359497070312, "rewards/margins": 0.03560343012213707, "rewards/rejected": -0.04376878961920738, "step": 163 }, { "epoch": 0.28, "learning_rate": 1.5678776290630974e-07, "logits/chosen": -1.9888408184051514, "logits/rejected": -2.073159694671631, "logps/chosen": -76.79391479492188, "logps/rejected": -78.6104736328125, "loss": 0.6787, "rewards/accuracies": 0.25, "rewards/chosen": 0.06388969719409943, "rewards/margins": -0.00462083937600255, "rewards/rejected": 0.06851053237915039, "step": 164 }, { "epoch": 0.28, "learning_rate": 1.577437858508604e-07, "logits/chosen": -2.1813488006591797, "logits/rejected": -2.12565279006958, "logps/chosen": -96.11878204345703, "logps/rejected": -77.09060668945312, "loss": 0.675, "rewards/accuracies": 0.25, "rewards/chosen": 0.02959156036376953, "rewards/margins": 0.024843785911798477, "rewards/rejected": 0.004747772589325905, "step": 165 }, { "epoch": 0.29, "learning_rate": 1.5869980879541108e-07, "logits/chosen": -2.1093034744262695, "logits/rejected": -2.2518744468688965, "logps/chosen": -60.775638580322266, "logps/rejected": -66.5081787109375, "loss": 0.6872, "rewards/accuracies": 0.75, "rewards/chosen": 0.04994554817676544, "rewards/margins": 0.01261520478874445, "rewards/rejected": 0.03733034059405327, "step": 166 }, { "epoch": 0.29, "learning_rate": 1.5965583173996174e-07, "logits/chosen": -1.6785200834274292, "logits/rejected": -2.192922592163086, "logps/chosen": -71.26847076416016, "logps/rejected": -108.57334899902344, "loss": 0.675, "rewards/accuracies": 1.0, "rewards/chosen": 0.08569536358118057, "rewards/margins": 0.14268293976783752, "rewards/rejected": -0.056987568736076355, "step": 167 }, { "epoch": 0.29, "learning_rate": 1.606118546845124e-07, "logits/chosen": -2.0372235774993896, "logits/rejected": -2.158473491668701, "logps/chosen": -97.85472106933594, "logps/rejected": -100.99180603027344, "loss": 0.6695, "rewards/accuracies": 0.5, "rewards/chosen": -0.0801902785897255, "rewards/margins": -0.07113132625818253, "rewards/rejected": -0.009058952331542969, "step": 168 }, { "epoch": 0.29, "learning_rate": 1.6156787762906309e-07, "logits/chosen": -2.037719249725342, "logits/rejected": -2.057281494140625, "logps/chosen": -66.14596557617188, "logps/rejected": -72.39103698730469, "loss": 0.6786, "rewards/accuracies": 0.5, "rewards/chosen": 0.01718912087380886, "rewards/margins": 0.016586877405643463, "rewards/rejected": 0.0006022471934556961, "step": 169 }, { "epoch": 0.29, "learning_rate": 1.6252390057361377e-07, "logits/chosen": -2.0109241008758545, "logits/rejected": -2.1032979488372803, "logps/chosen": -94.89728546142578, "logps/rejected": -108.04664611816406, "loss": 0.6788, "rewards/accuracies": 0.75, "rewards/chosen": 0.056072041392326355, "rewards/margins": 0.09376831352710724, "rewards/rejected": -0.037696268409490585, "step": 170 }, { "epoch": 0.29, "learning_rate": 1.6347992351816443e-07, "logits/chosen": -2.193460464477539, "logits/rejected": -1.6476753950119019, "logps/chosen": -96.44576263427734, "logps/rejected": -92.20896911621094, "loss": 0.673, "rewards/accuracies": 0.75, "rewards/chosen": 0.06227397918701172, "rewards/margins": 0.0305507630109787, "rewards/rejected": 0.03172321245074272, "step": 171 }, { "epoch": 0.3, "learning_rate": 1.6443594646271511e-07, "logits/chosen": -1.954669713973999, "logits/rejected": -1.944104790687561, "logps/chosen": -78.14715576171875, "logps/rejected": -85.49861145019531, "loss": 0.6645, "rewards/accuracies": 0.75, "rewards/chosen": 0.026815416291356087, "rewards/margins": 0.07287750393152237, "rewards/rejected": -0.04606208950281143, "step": 172 }, { "epoch": 0.3, "learning_rate": 1.6539196940726577e-07, "logits/chosen": -1.9467296600341797, "logits/rejected": -2.0636816024780273, "logps/chosen": -79.05654907226562, "logps/rejected": -85.17572784423828, "loss": 0.6806, "rewards/accuracies": 0.0, "rewards/chosen": 0.0319635383784771, "rewards/margins": -0.048168472945690155, "rewards/rejected": 0.08013200759887695, "step": 173 }, { "epoch": 0.3, "learning_rate": 1.6634799235181643e-07, "logits/chosen": -2.1246750354766846, "logits/rejected": -2.1404266357421875, "logps/chosen": -70.24715423583984, "logps/rejected": -70.00772094726562, "loss": 0.6697, "rewards/accuracies": 1.0, "rewards/chosen": 0.08219842612743378, "rewards/margins": 0.11397028714418411, "rewards/rejected": -0.03177185356616974, "step": 174 }, { "epoch": 0.3, "learning_rate": 1.6730401529636712e-07, "logits/chosen": -1.7552320957183838, "logits/rejected": -2.0971481800079346, "logps/chosen": -69.39833068847656, "logps/rejected": -80.50474548339844, "loss": 0.6724, "rewards/accuracies": 0.75, "rewards/chosen": -0.005201627034693956, "rewards/margins": 0.009834861382842064, "rewards/rejected": -0.015036487951874733, "step": 175 }, { "epoch": 0.3, "learning_rate": 1.6826003824091777e-07, "logits/chosen": -1.8097730875015259, "logits/rejected": -2.0107240676879883, "logps/chosen": -94.68180847167969, "logps/rejected": -83.18202209472656, "loss": 0.6632, "rewards/accuracies": 0.75, "rewards/chosen": 0.027524184435606003, "rewards/margins": 0.11567115783691406, "rewards/rejected": -0.08814697712659836, "step": 176 }, { "epoch": 0.3, "learning_rate": 1.6921606118546843e-07, "logits/chosen": -1.587547779083252, "logits/rejected": -2.2653989791870117, "logps/chosen": -65.6724853515625, "logps/rejected": -90.29216003417969, "loss": 0.6767, "rewards/accuracies": 0.25, "rewards/chosen": -0.055333565920591354, "rewards/margins": -0.07188878208398819, "rewards/rejected": 0.016555214300751686, "step": 177 }, { "epoch": 0.31, "learning_rate": 1.701720841300191e-07, "logits/chosen": -1.847864031791687, "logits/rejected": -2.1235451698303223, "logps/chosen": -73.68803405761719, "logps/rejected": -93.09024810791016, "loss": 0.6824, "rewards/accuracies": 0.5, "rewards/chosen": 0.10633183270692825, "rewards/margins": 0.054844096302986145, "rewards/rejected": 0.05148772895336151, "step": 178 }, { "epoch": 0.31, "learning_rate": 1.7112810707456978e-07, "logits/chosen": -1.9442909955978394, "logits/rejected": -1.9560751914978027, "logps/chosen": -78.44960021972656, "logps/rejected": -82.65908813476562, "loss": 0.6548, "rewards/accuracies": 0.5, "rewards/chosen": 0.09008923172950745, "rewards/margins": 0.0670686736702919, "rewards/rejected": 0.023020554333925247, "step": 179 }, { "epoch": 0.31, "learning_rate": 1.7208413001912046e-07, "logits/chosen": -1.7315177917480469, "logits/rejected": -2.0197079181671143, "logps/chosen": -77.55375671386719, "logps/rejected": -82.6791000366211, "loss": 0.6657, "rewards/accuracies": 1.0, "rewards/chosen": 0.10804262012243271, "rewards/margins": 0.13484802842140198, "rewards/rejected": -0.026805400848388672, "step": 180 }, { "epoch": 0.31, "learning_rate": 1.7304015296367112e-07, "logits/chosen": -1.7608474493026733, "logits/rejected": -1.9905986785888672, "logps/chosen": -78.10124206542969, "logps/rejected": -91.48824310302734, "loss": 0.6733, "rewards/accuracies": 0.75, "rewards/chosen": 0.0807337760925293, "rewards/margins": 0.06015234440565109, "rewards/rejected": 0.020581435412168503, "step": 181 }, { "epoch": 0.31, "learning_rate": 1.739961759082218e-07, "logits/chosen": -1.811152696609497, "logits/rejected": -2.080037832260132, "logps/chosen": -81.0771255493164, "logps/rejected": -86.25516510009766, "loss": 0.6741, "rewards/accuracies": 0.75, "rewards/chosen": 0.09394387900829315, "rewards/margins": 0.09091272950172424, "rewards/rejected": 0.003031158819794655, "step": 182 }, { "epoch": 0.31, "learning_rate": 1.7495219885277246e-07, "logits/chosen": -1.7531386613845825, "logits/rejected": -2.1469967365264893, "logps/chosen": -80.56843566894531, "logps/rejected": -88.08788299560547, "loss": 0.6681, "rewards/accuracies": 1.0, "rewards/chosen": 0.07740669697523117, "rewards/margins": 0.10836392641067505, "rewards/rejected": -0.03095722384750843, "step": 183 }, { "epoch": 0.32, "learning_rate": 1.7590822179732312e-07, "logits/chosen": -2.056152820587158, "logits/rejected": -2.0004310607910156, "logps/chosen": -95.39852905273438, "logps/rejected": -105.60749816894531, "loss": 0.6741, "rewards/accuracies": 0.5, "rewards/chosen": 0.011477090418338776, "rewards/margins": 0.040204811841249466, "rewards/rejected": -0.02872772328555584, "step": 184 }, { "epoch": 0.32, "learning_rate": 1.768642447418738e-07, "logits/chosen": -2.0511906147003174, "logits/rejected": -2.421954393386841, "logps/chosen": -99.71271514892578, "logps/rejected": -113.00950622558594, "loss": 0.6827, "rewards/accuracies": 0.5, "rewards/chosen": -0.03636055067181587, "rewards/margins": -0.019182398915290833, "rewards/rejected": -0.01717815361917019, "step": 185 }, { "epoch": 0.32, "learning_rate": 1.7782026768642447e-07, "logits/chosen": -2.079601764678955, "logits/rejected": -2.062284231185913, "logps/chosen": -92.79335021972656, "logps/rejected": -89.71634674072266, "loss": 0.6678, "rewards/accuracies": 0.25, "rewards/chosen": -0.052999306470155716, "rewards/margins": -0.07184143364429474, "rewards/rejected": 0.018842125311493874, "step": 186 }, { "epoch": 0.32, "learning_rate": 1.7877629063097512e-07, "logits/chosen": -1.936375617980957, "logits/rejected": -1.9955167770385742, "logps/chosen": -86.71530151367188, "logps/rejected": -83.90714263916016, "loss": 0.6527, "rewards/accuracies": 1.0, "rewards/chosen": 0.0736573189496994, "rewards/margins": 0.0542876273393631, "rewards/rejected": 0.019369695335626602, "step": 187 }, { "epoch": 0.32, "learning_rate": 1.797323135755258e-07, "logits/chosen": -1.8387945890426636, "logits/rejected": -2.0744235515594482, "logps/chosen": -70.45050048828125, "logps/rejected": -85.3461685180664, "loss": 0.6847, "rewards/accuracies": 0.75, "rewards/chosen": 0.054482366889715195, "rewards/margins": 0.014467909932136536, "rewards/rejected": 0.04001445695757866, "step": 188 }, { "epoch": 0.33, "learning_rate": 1.8068833652007647e-07, "logits/chosen": -1.9833062887191772, "logits/rejected": -2.0346946716308594, "logps/chosen": -72.9024887084961, "logps/rejected": -72.0979232788086, "loss": 0.661, "rewards/accuracies": 0.75, "rewards/chosen": 0.05903949961066246, "rewards/margins": 0.10741862654685974, "rewards/rejected": -0.048379138112068176, "step": 189 }, { "epoch": 0.33, "learning_rate": 1.8164435946462715e-07, "logits/chosen": -2.1433496475219727, "logits/rejected": -1.7727655172348022, "logps/chosen": -79.31265258789062, "logps/rejected": -65.89323425292969, "loss": 0.6586, "rewards/accuracies": 0.5, "rewards/chosen": 0.05091075599193573, "rewards/margins": -0.0029837600886821747, "rewards/rejected": 0.0538945198059082, "step": 190 }, { "epoch": 0.33, "learning_rate": 1.826003824091778e-07, "logits/chosen": -2.121910333633423, "logits/rejected": -2.274935245513916, "logps/chosen": -96.62612915039062, "logps/rejected": -97.98896789550781, "loss": 0.6687, "rewards/accuracies": 0.5, "rewards/chosen": -0.05269375070929527, "rewards/margins": 0.09120502322912216, "rewards/rejected": -0.14389877021312714, "step": 191 }, { "epoch": 0.33, "learning_rate": 1.835564053537285e-07, "logits/chosen": -1.9107463359832764, "logits/rejected": -1.9514799118041992, "logps/chosen": -64.98454284667969, "logps/rejected": -85.65718078613281, "loss": 0.6896, "rewards/accuracies": 0.5, "rewards/chosen": 0.06905098259449005, "rewards/margins": 0.07583427429199219, "rewards/rejected": -0.006783295422792435, "step": 192 }, { "epoch": 0.33, "learning_rate": 1.8451242829827915e-07, "logits/chosen": -2.206336498260498, "logits/rejected": -2.007627487182617, "logps/chosen": -109.32470703125, "logps/rejected": -105.25208282470703, "loss": 0.6617, "rewards/accuracies": 0.5, "rewards/chosen": 0.013206098228693008, "rewards/margins": -0.014446640387177467, "rewards/rejected": 0.027652742341160774, "step": 193 }, { "epoch": 0.33, "learning_rate": 1.854684512428298e-07, "logits/chosen": -1.804894208908081, "logits/rejected": -2.0043466091156006, "logps/chosen": -78.26168823242188, "logps/rejected": -89.57063293457031, "loss": 0.6648, "rewards/accuracies": 0.5, "rewards/chosen": 0.02784891240298748, "rewards/margins": 0.02238636091351509, "rewards/rejected": 0.005462550558149815, "step": 194 }, { "epoch": 0.34, "learning_rate": 1.864244741873805e-07, "logits/chosen": -2.0821352005004883, "logits/rejected": -2.0329928398132324, "logps/chosen": -85.58139038085938, "logps/rejected": -84.44700622558594, "loss": 0.6998, "rewards/accuracies": 0.5, "rewards/chosen": 0.1163061261177063, "rewards/margins": 0.07476826012134552, "rewards/rejected": 0.04153785854578018, "step": 195 }, { "epoch": 0.34, "learning_rate": 1.8738049713193116e-07, "logits/chosen": -1.811051607131958, "logits/rejected": -1.8718805313110352, "logps/chosen": -76.22531127929688, "logps/rejected": -96.99215698242188, "loss": 0.6962, "rewards/accuracies": 0.5, "rewards/chosen": -0.04395170509815216, "rewards/margins": -0.036281682550907135, "rewards/rejected": -0.007670022547245026, "step": 196 }, { "epoch": 0.34, "learning_rate": 1.8833652007648181e-07, "logits/chosen": -1.9112403392791748, "logits/rejected": -2.0459389686584473, "logps/chosen": -112.83050537109375, "logps/rejected": -91.16242218017578, "loss": 0.6599, "rewards/accuracies": 0.25, "rewards/chosen": -0.1631181836128235, "rewards/margins": -0.10583782196044922, "rewards/rejected": -0.05728035047650337, "step": 197 }, { "epoch": 0.34, "learning_rate": 1.892925430210325e-07, "logits/chosen": -2.0204176902770996, "logits/rejected": -2.144561767578125, "logps/chosen": -68.50086975097656, "logps/rejected": -77.5340805053711, "loss": 0.6511, "rewards/accuracies": 0.5, "rewards/chosen": 0.07064952701330185, "rewards/margins": 0.11010923236608505, "rewards/rejected": -0.0394597053527832, "step": 198 }, { "epoch": 0.34, "learning_rate": 1.9024856596558316e-07, "logits/chosen": -2.0761868953704834, "logits/rejected": -1.752086877822876, "logps/chosen": -88.53147888183594, "logps/rejected": -85.36076354980469, "loss": 0.6745, "rewards/accuracies": 0.5, "rewards/chosen": -0.00433654710650444, "rewards/margins": 0.014907646924257278, "rewards/rejected": -0.019244195893406868, "step": 199 }, { "epoch": 0.34, "learning_rate": 1.9120458891013382e-07, "logits/chosen": -1.799019455909729, "logits/rejected": -1.9904450178146362, "logps/chosen": -74.13819885253906, "logps/rejected": -84.17031860351562, "loss": 0.6471, "rewards/accuracies": 1.0, "rewards/chosen": 0.1153654158115387, "rewards/margins": 0.11825113743543625, "rewards/rejected": -0.0028857216238975525, "step": 200 }, { "epoch": 0.35, "learning_rate": 1.921606118546845e-07, "logits/chosen": -2.174722671508789, "logits/rejected": -1.4282867908477783, "logps/chosen": -92.97573852539062, "logps/rejected": -67.83287048339844, "loss": 0.6662, "rewards/accuracies": 0.5, "rewards/chosen": 0.07161254435777664, "rewards/margins": 0.05945253372192383, "rewards/rejected": 0.012160016223788261, "step": 201 }, { "epoch": 0.35, "learning_rate": 1.9311663479923519e-07, "logits/chosen": -1.8409643173217773, "logits/rejected": -2.1533396244049072, "logps/chosen": -89.74612426757812, "logps/rejected": -104.1021957397461, "loss": 0.6439, "rewards/accuracies": 0.75, "rewards/chosen": 0.09973106533288956, "rewards/margins": 0.31002044677734375, "rewards/rejected": -0.2102893739938736, "step": 202 }, { "epoch": 0.35, "learning_rate": 1.9407265774378584e-07, "logits/chosen": -2.043947458267212, "logits/rejected": -2.0543510913848877, "logps/chosen": -88.28569030761719, "logps/rejected": -90.34358978271484, "loss": 0.6761, "rewards/accuracies": 0.75, "rewards/chosen": 0.09315109252929688, "rewards/margins": 0.06318693608045578, "rewards/rejected": 0.029964162036776543, "step": 203 }, { "epoch": 0.35, "learning_rate": 1.950286806883365e-07, "logits/chosen": -2.043626546859741, "logits/rejected": -1.9875961542129517, "logps/chosen": -93.68898010253906, "logps/rejected": -96.40731811523438, "loss": 0.699, "rewards/accuracies": 0.5, "rewards/chosen": 0.050043102353811264, "rewards/margins": 0.06509514153003693, "rewards/rejected": -0.015052035450935364, "step": 204 }, { "epoch": 0.35, "learning_rate": 1.959847036328872e-07, "logits/chosen": -1.7600555419921875, "logits/rejected": -2.236328601837158, "logps/chosen": -80.88528442382812, "logps/rejected": -98.71256256103516, "loss": 0.6625, "rewards/accuracies": 0.5, "rewards/chosen": -0.051836106926202774, "rewards/margins": -0.0033642780035734177, "rewards/rejected": -0.048471830785274506, "step": 205 }, { "epoch": 0.35, "learning_rate": 1.9694072657743785e-07, "logits/chosen": -1.9607679843902588, "logits/rejected": -1.926255226135254, "logps/chosen": -91.31061553955078, "logps/rejected": -85.26671600341797, "loss": 0.6781, "rewards/accuracies": 1.0, "rewards/chosen": 0.1263103485107422, "rewards/margins": 0.10280437767505646, "rewards/rejected": 0.02350597269833088, "step": 206 }, { "epoch": 0.36, "learning_rate": 1.978967495219885e-07, "logits/chosen": -2.023005247116089, "logits/rejected": -2.0882344245910645, "logps/chosen": -85.94258117675781, "logps/rejected": -96.73451232910156, "loss": 0.6612, "rewards/accuracies": 0.75, "rewards/chosen": 0.13450735807418823, "rewards/margins": 0.11333847045898438, "rewards/rejected": 0.021168898791074753, "step": 207 }, { "epoch": 0.36, "learning_rate": 1.988527724665392e-07, "logits/chosen": -2.0668540000915527, "logits/rejected": -2.1046364307403564, "logps/chosen": -79.6681900024414, "logps/rejected": -81.6088638305664, "loss": 0.6491, "rewards/accuracies": 0.25, "rewards/chosen": -0.07702207565307617, "rewards/margins": -0.06475534290075302, "rewards/rejected": -0.01226673275232315, "step": 208 }, { "epoch": 0.36, "learning_rate": 1.9980879541108985e-07, "logits/chosen": -1.9002277851104736, "logits/rejected": -2.0934462547302246, "logps/chosen": -83.62168884277344, "logps/rejected": -89.2113265991211, "loss": 0.6573, "rewards/accuracies": 0.75, "rewards/chosen": 0.0633154883980751, "rewards/margins": 0.17756490409374237, "rewards/rejected": -0.11424942314624786, "step": 209 }, { "epoch": 0.36, "learning_rate": 2.007648183556405e-07, "logits/chosen": -2.166749954223633, "logits/rejected": -1.9344794750213623, "logps/chosen": -88.73641967773438, "logps/rejected": -83.30070495605469, "loss": 0.6577, "rewards/accuracies": 1.0, "rewards/chosen": 0.07879000157117844, "rewards/margins": 0.11041631549596786, "rewards/rejected": -0.031626321375370026, "step": 210 }, { "epoch": 0.36, "learning_rate": 2.0172084130019122e-07, "logits/chosen": -2.059098243713379, "logits/rejected": -2.2167208194732666, "logps/chosen": -79.47830200195312, "logps/rejected": -87.41736602783203, "loss": 0.6671, "rewards/accuracies": 0.75, "rewards/chosen": 0.06206941604614258, "rewards/margins": 0.09326934814453125, "rewards/rejected": -0.03119993396103382, "step": 211 }, { "epoch": 0.36, "learning_rate": 2.0267686424474188e-07, "logits/chosen": -2.0595648288726807, "logits/rejected": -2.1297178268432617, "logps/chosen": -90.15618896484375, "logps/rejected": -88.76057434082031, "loss": 0.6853, "rewards/accuracies": 0.25, "rewards/chosen": 0.05678920820355415, "rewards/margins": -0.0178007110953331, "rewards/rejected": 0.07458992302417755, "step": 212 }, { "epoch": 0.37, "learning_rate": 2.0363288718929254e-07, "logits/chosen": -1.9337685108184814, "logits/rejected": -1.81995689868927, "logps/chosen": -113.01835632324219, "logps/rejected": -98.44171142578125, "loss": 0.6638, "rewards/accuracies": 0.5, "rewards/chosen": -0.00965404137969017, "rewards/margins": 0.0455901138484478, "rewards/rejected": -0.05524415895342827, "step": 213 }, { "epoch": 0.37, "learning_rate": 2.045889101338432e-07, "logits/chosen": -1.9147207736968994, "logits/rejected": -2.065981864929199, "logps/chosen": -91.9068374633789, "logps/rejected": -99.21004486083984, "loss": 0.6584, "rewards/accuracies": 0.5, "rewards/chosen": -0.005194857716560364, "rewards/margins": 0.11073380708694458, "rewards/rejected": -0.11592865735292435, "step": 214 }, { "epoch": 0.37, "learning_rate": 2.0554493307839388e-07, "logits/chosen": -1.9055655002593994, "logits/rejected": -2.0654408931732178, "logps/chosen": -84.9306640625, "logps/rejected": -97.33314514160156, "loss": 0.64, "rewards/accuracies": 1.0, "rewards/chosen": 0.09539870917797089, "rewards/margins": 0.17922039330005646, "rewards/rejected": -0.08382168412208557, "step": 215 }, { "epoch": 0.37, "learning_rate": 2.0650095602294454e-07, "logits/chosen": -2.014446258544922, "logits/rejected": -2.1543128490448, "logps/chosen": -76.5035629272461, "logps/rejected": -80.57372283935547, "loss": 0.6512, "rewards/accuracies": 0.75, "rewards/chosen": 0.0900631919503212, "rewards/margins": 0.09660683572292328, "rewards/rejected": -0.006543640047311783, "step": 216 }, { "epoch": 0.37, "learning_rate": 2.074569789674952e-07, "logits/chosen": -2.0034639835357666, "logits/rejected": -2.02146053314209, "logps/chosen": -86.847900390625, "logps/rejected": -95.64535522460938, "loss": 0.6882, "rewards/accuracies": 0.5, "rewards/chosen": 0.0801319107413292, "rewards/margins": -0.0036373119801282883, "rewards/rejected": 0.08376922458410263, "step": 217 }, { "epoch": 0.38, "learning_rate": 2.0841300191204588e-07, "logits/chosen": -1.7862257957458496, "logits/rejected": -2.2540271282196045, "logps/chosen": -77.92982482910156, "logps/rejected": -94.46226501464844, "loss": 0.677, "rewards/accuracies": 0.75, "rewards/chosen": 0.09839478135108948, "rewards/margins": 0.11060868203639984, "rewards/rejected": -0.012213896960020065, "step": 218 }, { "epoch": 0.38, "learning_rate": 2.0936902485659654e-07, "logits/chosen": -2.220534324645996, "logits/rejected": -1.6178879737854004, "logps/chosen": -100.8427505493164, "logps/rejected": -78.19224548339844, "loss": 0.6527, "rewards/accuracies": 0.75, "rewards/chosen": -0.022278212010860443, "rewards/margins": 0.12288819253444672, "rewards/rejected": -0.14516639709472656, "step": 219 }, { "epoch": 0.38, "learning_rate": 2.103250478011472e-07, "logits/chosen": -1.6884443759918213, "logits/rejected": -1.8811360597610474, "logps/chosen": -71.39717102050781, "logps/rejected": -85.39673614501953, "loss": 0.6404, "rewards/accuracies": 0.5, "rewards/chosen": 0.15418872237205505, "rewards/margins": 0.05640773847699165, "rewards/rejected": 0.0977809950709343, "step": 220 }, { "epoch": 0.38, "learning_rate": 2.112810707456979e-07, "logits/chosen": -1.9307749271392822, "logits/rejected": -1.6454700231552124, "logps/chosen": -89.55736541748047, "logps/rejected": -81.92749786376953, "loss": 0.6839, "rewards/accuracies": 0.25, "rewards/chosen": -0.1496797651052475, "rewards/margins": -0.04867057502269745, "rewards/rejected": -0.10100918263196945, "step": 221 }, { "epoch": 0.38, "learning_rate": 2.1223709369024857e-07, "logits/chosen": -2.0397961139678955, "logits/rejected": -2.013455390930176, "logps/chosen": -73.14329528808594, "logps/rejected": -74.50252532958984, "loss": 0.6434, "rewards/accuracies": 1.0, "rewards/chosen": 0.11541824042797089, "rewards/margins": 0.13138943910598755, "rewards/rejected": -0.01597118377685547, "step": 222 }, { "epoch": 0.38, "learning_rate": 2.1319311663479923e-07, "logits/chosen": -1.9797446727752686, "logits/rejected": -2.1222143173217773, "logps/chosen": -65.4858627319336, "logps/rejected": -65.97869873046875, "loss": 0.6297, "rewards/accuracies": 1.0, "rewards/chosen": 0.10346727818250656, "rewards/margins": 0.15404339134693146, "rewards/rejected": -0.050576113164424896, "step": 223 }, { "epoch": 0.39, "learning_rate": 2.141491395793499e-07, "logits/chosen": -2.1398961544036865, "logits/rejected": -2.0581612586975098, "logps/chosen": -72.881591796875, "logps/rejected": -73.88377380371094, "loss": 0.6626, "rewards/accuracies": 1.0, "rewards/chosen": 0.14151154458522797, "rewards/margins": 0.21692484617233276, "rewards/rejected": -0.07541332393884659, "step": 224 }, { "epoch": 0.39, "learning_rate": 2.1510516252390057e-07, "logits/chosen": -2.1958417892456055, "logits/rejected": -2.011812448501587, "logps/chosen": -92.83877563476562, "logps/rejected": -90.71367645263672, "loss": 0.6849, "rewards/accuracies": 0.75, "rewards/chosen": -0.044388577342033386, "rewards/margins": 0.05378836393356323, "rewards/rejected": -0.09817695617675781, "step": 225 }, { "epoch": 0.39, "learning_rate": 2.1606118546845123e-07, "logits/chosen": -2.136631965637207, "logits/rejected": -1.9585291147232056, "logps/chosen": -81.86181640625, "logps/rejected": -72.07275390625, "loss": 0.6612, "rewards/accuracies": 0.75, "rewards/chosen": 0.2170238494873047, "rewards/margins": -0.006719980388879776, "rewards/rejected": 0.22374382615089417, "step": 226 }, { "epoch": 0.39, "learning_rate": 2.170172084130019e-07, "logits/chosen": -1.9394690990447998, "logits/rejected": -2.2785491943359375, "logps/chosen": -101.98294830322266, "logps/rejected": -102.3807373046875, "loss": 0.6604, "rewards/accuracies": 0.5, "rewards/chosen": 0.02253427729010582, "rewards/margins": 0.009570982307195663, "rewards/rejected": 0.012963294982910156, "step": 227 }, { "epoch": 0.39, "learning_rate": 2.1797323135755257e-07, "logits/chosen": -2.008406400680542, "logits/rejected": -1.6862127780914307, "logps/chosen": -89.77078247070312, "logps/rejected": -82.99530029296875, "loss": 0.6568, "rewards/accuracies": 0.5, "rewards/chosen": -0.015956886112689972, "rewards/margins": 0.07581672072410583, "rewards/rejected": -0.0917736068367958, "step": 228 }, { "epoch": 0.39, "learning_rate": 2.1892925430210323e-07, "logits/chosen": -1.9107770919799805, "logits/rejected": -2.112947940826416, "logps/chosen": -78.04499816894531, "logps/rejected": -88.97824096679688, "loss": 0.6547, "rewards/accuracies": 0.75, "rewards/chosen": 0.015944862738251686, "rewards/margins": 0.06212177127599716, "rewards/rejected": -0.046176910400390625, "step": 229 }, { "epoch": 0.4, "learning_rate": 2.198852772466539e-07, "logits/chosen": -2.0918638706207275, "logits/rejected": -1.7061803340911865, "logps/chosen": -111.7435073852539, "logps/rejected": -87.78691101074219, "loss": 0.6851, "rewards/accuracies": 0.5, "rewards/chosen": -0.04182949662208557, "rewards/margins": 0.05823164060711861, "rewards/rejected": -0.10006113350391388, "step": 230 }, { "epoch": 0.4, "learning_rate": 2.208413001912046e-07, "logits/chosen": -2.0224428176879883, "logits/rejected": -2.137007236480713, "logps/chosen": -84.26957702636719, "logps/rejected": -94.31015014648438, "loss": 0.6583, "rewards/accuracies": 1.0, "rewards/chosen": 0.13539285957813263, "rewards/margins": 0.24413079023361206, "rewards/rejected": -0.10873794555664062, "step": 231 }, { "epoch": 0.4, "learning_rate": 2.2179732313575526e-07, "logits/chosen": -1.7755749225616455, "logits/rejected": -1.7525635957717896, "logps/chosen": -83.26101684570312, "logps/rejected": -88.4072036743164, "loss": 0.6226, "rewards/accuracies": 0.75, "rewards/chosen": 0.03236427158117294, "rewards/margins": 0.19139136373996735, "rewards/rejected": -0.1590270847082138, "step": 232 }, { "epoch": 0.4, "learning_rate": 2.2275334608030592e-07, "logits/chosen": -1.9921801090240479, "logits/rejected": -2.070070266723633, "logps/chosen": -86.90048217773438, "logps/rejected": -91.3769302368164, "loss": 0.6343, "rewards/accuracies": 0.75, "rewards/chosen": 0.11577606201171875, "rewards/margins": 0.20621797442436218, "rewards/rejected": -0.09044189751148224, "step": 233 }, { "epoch": 0.4, "learning_rate": 2.237093690248566e-07, "logits/chosen": -2.0460205078125, "logits/rejected": -2.0703794956207275, "logps/chosen": -72.52018737792969, "logps/rejected": -85.90266418457031, "loss": 0.6757, "rewards/accuracies": 0.75, "rewards/chosen": 0.05962495505809784, "rewards/margins": 0.14923696219921112, "rewards/rejected": -0.08961200714111328, "step": 234 }, { "epoch": 0.4, "learning_rate": 2.2466539196940726e-07, "logits/chosen": -1.654861569404602, "logits/rejected": -2.2504069805145264, "logps/chosen": -95.63079833984375, "logps/rejected": -115.58038330078125, "loss": 0.6237, "rewards/accuracies": 1.0, "rewards/chosen": 0.21579037606716156, "rewards/margins": 0.3437250256538391, "rewards/rejected": -0.12793464958667755, "step": 235 }, { "epoch": 0.41, "learning_rate": 2.2562141491395792e-07, "logits/chosen": -2.1923186779022217, "logits/rejected": -2.0201988220214844, "logps/chosen": -82.44168853759766, "logps/rejected": -85.55845642089844, "loss": 0.6189, "rewards/accuracies": 0.75, "rewards/chosen": 0.005915265530347824, "rewards/margins": 0.12213630974292755, "rewards/rejected": -0.11622104793787003, "step": 236 }, { "epoch": 0.41, "learning_rate": 2.265774378585086e-07, "logits/chosen": -2.0622591972351074, "logits/rejected": -1.9163228273391724, "logps/chosen": -71.46786499023438, "logps/rejected": -86.00993347167969, "loss": 0.6452, "rewards/accuracies": 0.75, "rewards/chosen": 0.14477252960205078, "rewards/margins": 0.329362690448761, "rewards/rejected": -0.1845901608467102, "step": 237 }, { "epoch": 0.41, "learning_rate": 2.2753346080305926e-07, "logits/chosen": -2.115575075149536, "logits/rejected": -1.9255173206329346, "logps/chosen": -88.73851013183594, "logps/rejected": -81.70103454589844, "loss": 0.6559, "rewards/accuracies": 0.5, "rewards/chosen": -0.05354861915111542, "rewards/margins": -0.029854964464902878, "rewards/rejected": -0.023693658411502838, "step": 238 }, { "epoch": 0.41, "learning_rate": 2.2848948374760992e-07, "logits/chosen": -2.260450839996338, "logits/rejected": -2.0165324211120605, "logps/chosen": -90.03697967529297, "logps/rejected": -98.43805694580078, "loss": 0.6169, "rewards/accuracies": 0.75, "rewards/chosen": 0.07352772355079651, "rewards/margins": 0.14628563821315765, "rewards/rejected": -0.07275791466236115, "step": 239 }, { "epoch": 0.41, "learning_rate": 2.2944550669216058e-07, "logits/chosen": -2.258876323699951, "logits/rejected": -2.0225508213043213, "logps/chosen": -86.93971252441406, "logps/rejected": -86.1629638671875, "loss": 0.6293, "rewards/accuracies": 0.75, "rewards/chosen": 0.05736389383673668, "rewards/margins": 0.03625297173857689, "rewards/rejected": 0.02111092209815979, "step": 240 }, { "epoch": 0.41, "learning_rate": 2.304015296367113e-07, "logits/chosen": -1.9787054061889648, "logits/rejected": -1.7428383827209473, "logps/chosen": -78.11241912841797, "logps/rejected": -80.04951477050781, "loss": 0.6708, "rewards/accuracies": 1.0, "rewards/chosen": 0.1278984099626541, "rewards/margins": 0.2548372149467468, "rewards/rejected": -0.1269388198852539, "step": 241 }, { "epoch": 0.42, "learning_rate": 2.3135755258126195e-07, "logits/chosen": -1.8615357875823975, "logits/rejected": -2.1003551483154297, "logps/chosen": -75.73294067382812, "logps/rejected": -85.154296875, "loss": 0.6457, "rewards/accuracies": 0.5, "rewards/chosen": 0.06861648708581924, "rewards/margins": 0.01490326039493084, "rewards/rejected": 0.053713224828243256, "step": 242 }, { "epoch": 0.42, "learning_rate": 2.323135755258126e-07, "logits/chosen": -1.994934320449829, "logits/rejected": -1.8839404582977295, "logps/chosen": -69.84376525878906, "logps/rejected": -75.61143493652344, "loss": 0.6332, "rewards/accuracies": 0.5, "rewards/chosen": 0.20715665817260742, "rewards/margins": 0.22424975037574768, "rewards/rejected": -0.017093084752559662, "step": 243 }, { "epoch": 0.42, "learning_rate": 2.332695984703633e-07, "logits/chosen": -1.923697829246521, "logits/rejected": -2.137376546859741, "logps/chosen": -114.98424530029297, "logps/rejected": -129.5244903564453, "loss": 0.6358, "rewards/accuracies": 1.0, "rewards/chosen": 0.08478603512048721, "rewards/margins": 0.1405477523803711, "rewards/rejected": -0.05576172471046448, "step": 244 }, { "epoch": 0.42, "learning_rate": 2.3422562141491395e-07, "logits/chosen": -1.854264259338379, "logits/rejected": -2.218738079071045, "logps/chosen": -85.48158264160156, "logps/rejected": -96.03387451171875, "loss": 0.6451, "rewards/accuracies": 1.0, "rewards/chosen": 0.26288270950317383, "rewards/margins": 0.21674281358718872, "rewards/rejected": 0.046139907091856, "step": 245 }, { "epoch": 0.42, "learning_rate": 2.351816443594646e-07, "logits/chosen": -1.7029896974563599, "logits/rejected": -2.0194575786590576, "logps/chosen": -73.62770080566406, "logps/rejected": -87.56461334228516, "loss": 0.6305, "rewards/accuracies": 0.75, "rewards/chosen": 0.14729471504688263, "rewards/margins": 0.23415249586105347, "rewards/rejected": -0.08685778826475143, "step": 246 }, { "epoch": 0.43, "learning_rate": 2.361376673040153e-07, "logits/chosen": -1.6718281507492065, "logits/rejected": -2.1925125122070312, "logps/chosen": -66.69658660888672, "logps/rejected": -85.02704620361328, "loss": 0.6375, "rewards/accuracies": 0.25, "rewards/chosen": 0.08060703426599503, "rewards/margins": -0.036558523774147034, "rewards/rejected": 0.11716555058956146, "step": 247 }, { "epoch": 0.43, "learning_rate": 2.3709369024856595e-07, "logits/chosen": -1.9503382444381714, "logits/rejected": -2.1199660301208496, "logps/chosen": -74.20629119873047, "logps/rejected": -83.83865356445312, "loss": 0.6185, "rewards/accuracies": 1.0, "rewards/chosen": 0.1523509919643402, "rewards/margins": 0.28055572509765625, "rewards/rejected": -0.12820473313331604, "step": 248 }, { "epoch": 0.43, "learning_rate": 2.380497131931166e-07, "logits/chosen": -2.0385351181030273, "logits/rejected": -1.8754421472549438, "logps/chosen": -112.26296997070312, "logps/rejected": -108.55007934570312, "loss": 0.6368, "rewards/accuracies": 0.25, "rewards/chosen": 0.11553649604320526, "rewards/margins": 0.047640398144721985, "rewards/rejected": 0.06789608299732208, "step": 249 }, { "epoch": 0.43, "learning_rate": 2.3900573613766727e-07, "logits/chosen": -1.8961875438690186, "logits/rejected": -2.0824596881866455, "logps/chosen": -67.18657684326172, "logps/rejected": -73.5388412475586, "loss": 0.6476, "rewards/accuracies": 0.75, "rewards/chosen": 0.2922424077987671, "rewards/margins": 0.2778785526752472, "rewards/rejected": 0.014363858848810196, "step": 250 }, { "epoch": 0.43, "learning_rate": 2.3996175908221796e-07, "logits/chosen": -1.8714354038238525, "logits/rejected": -2.1542563438415527, "logps/chosen": -79.95736694335938, "logps/rejected": -93.22126770019531, "loss": 0.63, "rewards/accuracies": 0.75, "rewards/chosen": 0.2985075116157532, "rewards/margins": 0.20833531022071838, "rewards/rejected": 0.0901721939444542, "step": 251 }, { "epoch": 0.43, "learning_rate": 2.4091778202676864e-07, "logits/chosen": -1.9832403659820557, "logits/rejected": -1.9193902015686035, "logps/chosen": -86.99752044677734, "logps/rejected": -80.88160705566406, "loss": 0.6618, "rewards/accuracies": 0.75, "rewards/chosen": 0.14911222457885742, "rewards/margins": 0.09538964927196503, "rewards/rejected": 0.0537225715816021, "step": 252 }, { "epoch": 0.44, "learning_rate": 2.4187380497131927e-07, "logits/chosen": -2.0435173511505127, "logits/rejected": -1.9910223484039307, "logps/chosen": -72.33782196044922, "logps/rejected": -79.660400390625, "loss": 0.6187, "rewards/accuracies": 0.5, "rewards/chosen": -0.04725828021764755, "rewards/margins": -0.01178760826587677, "rewards/rejected": -0.03547067567706108, "step": 253 }, { "epoch": 0.44, "learning_rate": 2.4282982791586996e-07, "logits/chosen": -2.0874743461608887, "logits/rejected": -2.0613372325897217, "logps/chosen": -104.0281753540039, "logps/rejected": -89.40221405029297, "loss": 0.6429, "rewards/accuracies": 0.75, "rewards/chosen": 0.1652860790491104, "rewards/margins": 0.15087701380252838, "rewards/rejected": 0.01440906897187233, "step": 254 }, { "epoch": 0.44, "learning_rate": 2.4378585086042064e-07, "logits/chosen": -1.9478986263275146, "logits/rejected": -2.199288845062256, "logps/chosen": -81.02596282958984, "logps/rejected": -88.84567260742188, "loss": 0.6735, "rewards/accuracies": 0.5, "rewards/chosen": -0.01648407056927681, "rewards/margins": -0.09410000592470169, "rewards/rejected": 0.07761593163013458, "step": 255 }, { "epoch": 0.44, "learning_rate": 2.447418738049713e-07, "logits/chosen": -1.6250674724578857, "logits/rejected": -1.891884207725525, "logps/chosen": -97.92562866210938, "logps/rejected": -92.16635131835938, "loss": 0.6025, "rewards/accuracies": 0.75, "rewards/chosen": 0.22280216217041016, "rewards/margins": 0.27428770065307617, "rewards/rejected": -0.051485542207956314, "step": 256 }, { "epoch": 0.44, "learning_rate": 2.45697896749522e-07, "logits/chosen": -2.0455470085144043, "logits/rejected": -1.8208316564559937, "logps/chosen": -91.1756591796875, "logps/rejected": -93.41930389404297, "loss": 0.6227, "rewards/accuracies": 0.5, "rewards/chosen": -0.083147332072258, "rewards/margins": 0.08373012393712997, "rewards/rejected": -0.16687747836112976, "step": 257 }, { "epoch": 0.44, "learning_rate": 2.4665391969407264e-07, "logits/chosen": -1.9546318054199219, "logits/rejected": -2.1284613609313965, "logps/chosen": -90.86896514892578, "logps/rejected": -108.9039306640625, "loss": 0.6602, "rewards/accuracies": 0.75, "rewards/chosen": -0.030812550336122513, "rewards/margins": 0.08356915414333344, "rewards/rejected": -0.11438170075416565, "step": 258 }, { "epoch": 0.45, "learning_rate": 2.4760994263862333e-07, "logits/chosen": -1.9081346988677979, "logits/rejected": -2.1096529960632324, "logps/chosen": -88.67839050292969, "logps/rejected": -78.42815399169922, "loss": 0.6591, "rewards/accuracies": 0.75, "rewards/chosen": 0.13961106538772583, "rewards/margins": 0.1961609125137329, "rewards/rejected": -0.056549835950136185, "step": 259 }, { "epoch": 0.45, "learning_rate": 2.48565965583174e-07, "logits/chosen": -1.7179145812988281, "logits/rejected": -1.9407498836517334, "logps/chosen": -94.39994812011719, "logps/rejected": -89.14263153076172, "loss": 0.5964, "rewards/accuracies": 1.0, "rewards/chosen": 0.01361369900405407, "rewards/margins": 0.2755105793476105, "rewards/rejected": -0.26189690828323364, "step": 260 }, { "epoch": 0.45, "learning_rate": 2.4952198852772465e-07, "logits/chosen": -1.941927433013916, "logits/rejected": -2.063417911529541, "logps/chosen": -85.50338745117188, "logps/rejected": -105.60172271728516, "loss": 0.6303, "rewards/accuracies": 1.0, "rewards/chosen": 0.051663972437381744, "rewards/margins": 0.3445817828178406, "rewards/rejected": -0.29291781783103943, "step": 261 }, { "epoch": 0.45, "learning_rate": 2.5047801147227533e-07, "logits/chosen": -2.124156951904297, "logits/rejected": -1.826830267906189, "logps/chosen": -94.88099670410156, "logps/rejected": -106.57850646972656, "loss": 0.6433, "rewards/accuracies": 1.0, "rewards/chosen": 0.31334343552589417, "rewards/margins": 0.5676210522651672, "rewards/rejected": -0.25427761673927307, "step": 262 }, { "epoch": 0.45, "learning_rate": 2.5143403441682596e-07, "logits/chosen": -1.8537001609802246, "logits/rejected": -2.1178104877471924, "logps/chosen": -76.12335205078125, "logps/rejected": -101.96958923339844, "loss": 0.617, "rewards/accuracies": 0.75, "rewards/chosen": 0.019263263791799545, "rewards/margins": 0.25367891788482666, "rewards/rejected": -0.23441562056541443, "step": 263 }, { "epoch": 0.45, "learning_rate": 2.5239005736137665e-07, "logits/chosen": -2.012739658355713, "logits/rejected": -1.9662659168243408, "logps/chosen": -80.74649810791016, "logps/rejected": -85.99250030517578, "loss": 0.605, "rewards/accuracies": 0.75, "rewards/chosen": -0.023932646960020065, "rewards/margins": 0.1398795247077942, "rewards/rejected": -0.16381216049194336, "step": 264 }, { "epoch": 0.46, "learning_rate": 2.5334608030592733e-07, "logits/chosen": -1.768721103668213, "logits/rejected": -1.8991405963897705, "logps/chosen": -80.17694854736328, "logps/rejected": -90.04689025878906, "loss": 0.6314, "rewards/accuracies": 0.5, "rewards/chosen": 0.19903451204299927, "rewards/margins": 0.27060413360595703, "rewards/rejected": -0.07156963646411896, "step": 265 }, { "epoch": 0.46, "learning_rate": 2.54302103250478e-07, "logits/chosen": -2.0548958778381348, "logits/rejected": -1.9545419216156006, "logps/chosen": -75.83049774169922, "logps/rejected": -73.14643859863281, "loss": 0.611, "rewards/accuracies": 1.0, "rewards/chosen": 0.07339592278003693, "rewards/margins": 0.21325598657131195, "rewards/rejected": -0.13986006379127502, "step": 266 }, { "epoch": 0.46, "learning_rate": 2.552581261950287e-07, "logits/chosen": -2.1332154273986816, "logits/rejected": -1.818711757659912, "logps/chosen": -82.5963134765625, "logps/rejected": -87.76451873779297, "loss": 0.6254, "rewards/accuracies": 0.5, "rewards/chosen": -0.010117623955011368, "rewards/margins": -0.06581507623195648, "rewards/rejected": 0.055697448551654816, "step": 267 }, { "epoch": 0.46, "learning_rate": 2.5621414913957934e-07, "logits/chosen": -2.0935962200164795, "logits/rejected": -1.9430387020111084, "logps/chosen": -111.32025909423828, "logps/rejected": -100.22254943847656, "loss": 0.5773, "rewards/accuracies": 1.0, "rewards/chosen": 0.34403613209724426, "rewards/margins": 0.3956791162490845, "rewards/rejected": -0.05164299160242081, "step": 268 }, { "epoch": 0.46, "learning_rate": 2.5717017208413e-07, "logits/chosen": -2.099074125289917, "logits/rejected": -2.029576301574707, "logps/chosen": -75.01412200927734, "logps/rejected": -94.2630615234375, "loss": 0.6115, "rewards/accuracies": 0.75, "rewards/chosen": 0.27428531646728516, "rewards/margins": 0.39985391497612, "rewards/rejected": -0.12556858360767365, "step": 269 }, { "epoch": 0.46, "learning_rate": 2.5812619502868065e-07, "logits/chosen": -1.7685041427612305, "logits/rejected": -2.2341179847717285, "logps/chosen": -80.2627944946289, "logps/rejected": -89.4440689086914, "loss": 0.6375, "rewards/accuracies": 0.75, "rewards/chosen": 0.11262901872396469, "rewards/margins": 0.2493271827697754, "rewards/rejected": -0.1366981565952301, "step": 270 }, { "epoch": 0.47, "learning_rate": 2.5908221797323134e-07, "logits/chosen": -1.8778433799743652, "logits/rejected": -1.8804912567138672, "logps/chosen": -89.76187896728516, "logps/rejected": -83.27854919433594, "loss": 0.6082, "rewards/accuracies": 0.75, "rewards/chosen": 0.3514895439147949, "rewards/margins": 0.20234185457229614, "rewards/rejected": 0.14914768934249878, "step": 271 }, { "epoch": 0.47, "learning_rate": 2.60038240917782e-07, "logits/chosen": -1.8034052848815918, "logits/rejected": -1.9166806936264038, "logps/chosen": -79.69463348388672, "logps/rejected": -80.96076965332031, "loss": 0.6224, "rewards/accuracies": 0.75, "rewards/chosen": 0.12801408767700195, "rewards/margins": 0.2944537103176117, "rewards/rejected": -0.16643962264060974, "step": 272 }, { "epoch": 0.47, "learning_rate": 2.609942638623327e-07, "logits/chosen": -1.9296023845672607, "logits/rejected": -1.7824636697769165, "logps/chosen": -102.91497039794922, "logps/rejected": -98.8305892944336, "loss": 0.6083, "rewards/accuracies": 0.5, "rewards/chosen": 0.2724590599536896, "rewards/margins": 0.15890313684940338, "rewards/rejected": 0.113555908203125, "step": 273 }, { "epoch": 0.47, "learning_rate": 2.6195028680688334e-07, "logits/chosen": -2.1607251167297363, "logits/rejected": -2.0864949226379395, "logps/chosen": -68.25477600097656, "logps/rejected": -92.94677734375, "loss": 0.6717, "rewards/accuracies": 1.0, "rewards/chosen": 0.20889253914356232, "rewards/margins": 0.40145233273506165, "rewards/rejected": -0.19255982339382172, "step": 274 }, { "epoch": 0.47, "learning_rate": 2.62906309751434e-07, "logits/chosen": -2.148855209350586, "logits/rejected": -1.8925683498382568, "logps/chosen": -77.67935943603516, "logps/rejected": -77.86077117919922, "loss": 0.6736, "rewards/accuracies": 0.5, "rewards/chosen": 0.14646244049072266, "rewards/margins": 0.046450138092041016, "rewards/rejected": 0.10001230239868164, "step": 275 }, { "epoch": 0.48, "learning_rate": 2.6386233269598466e-07, "logits/chosen": -2.075352191925049, "logits/rejected": -1.9991751909255981, "logps/chosen": -77.75213623046875, "logps/rejected": -87.32064819335938, "loss": 0.5756, "rewards/accuracies": 0.75, "rewards/chosen": -0.04850311577320099, "rewards/margins": 0.15770205855369568, "rewards/rejected": -0.20620517432689667, "step": 276 }, { "epoch": 0.48, "learning_rate": 2.6481835564053534e-07, "logits/chosen": -2.1476683616638184, "logits/rejected": -2.0008440017700195, "logps/chosen": -88.98123931884766, "logps/rejected": -83.60301208496094, "loss": 0.5908, "rewards/accuracies": 0.75, "rewards/chosen": -0.12534819543361664, "rewards/margins": 0.13490086793899536, "rewards/rejected": -0.2602490484714508, "step": 277 }, { "epoch": 0.48, "learning_rate": 2.657743785850861e-07, "logits/chosen": -1.898353099822998, "logits/rejected": -2.0624165534973145, "logps/chosen": -89.48905181884766, "logps/rejected": -100.37100219726562, "loss": 0.5923, "rewards/accuracies": 0.75, "rewards/chosen": 0.03284207358956337, "rewards/margins": 0.2623398005962372, "rewards/rejected": -0.22949771583080292, "step": 278 }, { "epoch": 0.48, "learning_rate": 2.667304015296367e-07, "logits/chosen": -1.8200291395187378, "logits/rejected": -2.1084375381469727, "logps/chosen": -73.0489501953125, "logps/rejected": -83.49716186523438, "loss": 0.6331, "rewards/accuracies": 0.5, "rewards/chosen": 0.13147860765457153, "rewards/margins": -0.003164101392030716, "rewards/rejected": 0.13464270532131195, "step": 279 }, { "epoch": 0.48, "learning_rate": 2.676864244741874e-07, "logits/chosen": -2.0140209197998047, "logits/rejected": -1.9507107734680176, "logps/chosen": -74.40006256103516, "logps/rejected": -77.86890411376953, "loss": 0.6121, "rewards/accuracies": 0.75, "rewards/chosen": 0.158257395029068, "rewards/margins": 0.11781930178403854, "rewards/rejected": 0.040438082069158554, "step": 280 }, { "epoch": 0.48, "learning_rate": 2.6864244741873803e-07, "logits/chosen": -2.0351321697235107, "logits/rejected": -1.9653476476669312, "logps/chosen": -91.40787506103516, "logps/rejected": -91.04258728027344, "loss": 0.6226, "rewards/accuracies": 0.25, "rewards/chosen": -0.1773841828107834, "rewards/margins": -0.4047532081604004, "rewards/rejected": 0.2273690402507782, "step": 281 }, { "epoch": 0.49, "learning_rate": 2.695984703632887e-07, "logits/chosen": -1.8713961839675903, "logits/rejected": -2.2966065406799316, "logps/chosen": -65.84269714355469, "logps/rejected": -85.22756958007812, "loss": 0.628, "rewards/accuracies": 0.75, "rewards/chosen": 0.257339209318161, "rewards/margins": 0.3491423726081848, "rewards/rejected": -0.0918031707406044, "step": 282 }, { "epoch": 0.49, "learning_rate": 2.7055449330783934e-07, "logits/chosen": -2.0560078620910645, "logits/rejected": -2.1324052810668945, "logps/chosen": -94.81842041015625, "logps/rejected": -87.28052520751953, "loss": 0.6007, "rewards/accuracies": 0.5, "rewards/chosen": -0.02838592603802681, "rewards/margins": 0.23829929530620575, "rewards/rejected": -0.26668521761894226, "step": 283 }, { "epoch": 0.49, "learning_rate": 2.7151051625239003e-07, "logits/chosen": -1.8485939502716064, "logits/rejected": -2.1167521476745605, "logps/chosen": -71.18531799316406, "logps/rejected": -93.4831771850586, "loss": 0.6012, "rewards/accuracies": 0.75, "rewards/chosen": 0.24813736975193024, "rewards/margins": 0.3458481729030609, "rewards/rejected": -0.09771078824996948, "step": 284 }, { "epoch": 0.49, "learning_rate": 2.724665391969407e-07, "logits/chosen": -1.7384397983551025, "logits/rejected": -2.1705493927001953, "logps/chosen": -80.8526840209961, "logps/rejected": -103.35813903808594, "loss": 0.514, "rewards/accuracies": 0.75, "rewards/chosen": 0.4083659052848816, "rewards/margins": 0.41945046186447144, "rewards/rejected": -0.01108456403017044, "step": 285 }, { "epoch": 0.49, "learning_rate": 2.734225621414914e-07, "logits/chosen": -2.0836968421936035, "logits/rejected": -2.1318037509918213, "logps/chosen": -86.23441314697266, "logps/rejected": -77.54132080078125, "loss": 0.6054, "rewards/accuracies": 0.5, "rewards/chosen": -0.27778685092926025, "rewards/margins": -0.3113729953765869, "rewards/rejected": 0.03358611464500427, "step": 286 }, { "epoch": 0.49, "learning_rate": 2.7437858508604203e-07, "logits/chosen": -1.8352317810058594, "logits/rejected": -2.0800905227661133, "logps/chosen": -86.96878051757812, "logps/rejected": -101.90534210205078, "loss": 0.6113, "rewards/accuracies": 0.5, "rewards/chosen": 0.21654605865478516, "rewards/margins": 0.4508243501186371, "rewards/rejected": -0.23427829146385193, "step": 287 }, { "epoch": 0.5, "learning_rate": 2.753346080305927e-07, "logits/chosen": -1.7856591939926147, "logits/rejected": -2.0001091957092285, "logps/chosen": -91.35235595703125, "logps/rejected": -88.71160888671875, "loss": 0.6328, "rewards/accuracies": 0.75, "rewards/chosen": -0.22819627821445465, "rewards/margins": -0.004594933707267046, "rewards/rejected": -0.2236013412475586, "step": 288 }, { "epoch": 0.5, "learning_rate": 2.762906309751434e-07, "logits/chosen": -2.233621597290039, "logits/rejected": -1.9586153030395508, "logps/chosen": -100.61235046386719, "logps/rejected": -98.08549499511719, "loss": 0.5483, "rewards/accuracies": 0.25, "rewards/chosen": -0.2597704827785492, "rewards/margins": -0.12824049592018127, "rewards/rejected": -0.1315300017595291, "step": 289 }, { "epoch": 0.5, "learning_rate": 2.7724665391969403e-07, "logits/chosen": -2.2588677406311035, "logits/rejected": -1.973250389099121, "logps/chosen": -84.7867202758789, "logps/rejected": -82.74980926513672, "loss": 0.5717, "rewards/accuracies": 0.5, "rewards/chosen": 0.4977167248725891, "rewards/margins": 0.715061604976654, "rewards/rejected": -0.21734488010406494, "step": 290 }, { "epoch": 0.5, "learning_rate": 2.7820267686424477e-07, "logits/chosen": -2.0009448528289795, "logits/rejected": -2.0981783866882324, "logps/chosen": -84.49632263183594, "logps/rejected": -76.91552734375, "loss": 0.6207, "rewards/accuracies": 0.5, "rewards/chosen": 0.08655643463134766, "rewards/margins": -0.19461402297019958, "rewards/rejected": 0.28117045760154724, "step": 291 }, { "epoch": 0.5, "learning_rate": 2.791586998087954e-07, "logits/chosen": -1.8949193954467773, "logits/rejected": -2.0187911987304688, "logps/chosen": -105.41613006591797, "logps/rejected": -99.28246307373047, "loss": 0.6265, "rewards/accuracies": 0.75, "rewards/chosen": 0.11777401715517044, "rewards/margins": 0.26492196321487427, "rewards/rejected": -0.14714793860912323, "step": 292 }, { "epoch": 0.5, "learning_rate": 2.801147227533461e-07, "logits/chosen": -2.0558319091796875, "logits/rejected": -2.249372720718384, "logps/chosen": -66.031005859375, "logps/rejected": -80.99681091308594, "loss": 0.5426, "rewards/accuracies": 1.0, "rewards/chosen": 0.13950130343437195, "rewards/margins": 0.6269415616989136, "rewards/rejected": -0.487440288066864, "step": 293 }, { "epoch": 0.51, "learning_rate": 2.810707456978967e-07, "logits/chosen": -1.9534311294555664, "logits/rejected": -1.7508134841918945, "logps/chosen": -106.73246765136719, "logps/rejected": -85.1046142578125, "loss": 0.5727, "rewards/accuracies": 0.5, "rewards/chosen": 0.18515530228614807, "rewards/margins": 0.3438885807991028, "rewards/rejected": -0.1587332785129547, "step": 294 }, { "epoch": 0.51, "learning_rate": 2.820267686424474e-07, "logits/chosen": -1.980957269668579, "logits/rejected": -1.9032623767852783, "logps/chosen": -74.70911407470703, "logps/rejected": -76.36561584472656, "loss": 0.6515, "rewards/accuracies": 0.75, "rewards/chosen": 0.22072581946849823, "rewards/margins": 0.364721417427063, "rewards/rejected": -0.14399558305740356, "step": 295 }, { "epoch": 0.51, "learning_rate": 2.8298279158699804e-07, "logits/chosen": -1.7599586248397827, "logits/rejected": -2.060213088989258, "logps/chosen": -84.0015869140625, "logps/rejected": -96.04927062988281, "loss": 0.6475, "rewards/accuracies": 0.75, "rewards/chosen": -0.29223594069480896, "rewards/margins": 0.10148372501134872, "rewards/rejected": -0.3937196731567383, "step": 296 }, { "epoch": 0.51, "learning_rate": 2.839388145315487e-07, "logits/chosen": -1.68614661693573, "logits/rejected": -2.135000228881836, "logps/chosen": -84.80066680908203, "logps/rejected": -103.99566650390625, "loss": 0.5563, "rewards/accuracies": 1.0, "rewards/chosen": -0.08808517456054688, "rewards/margins": 0.2649519145488739, "rewards/rejected": -0.3530370891094208, "step": 297 }, { "epoch": 0.51, "learning_rate": 2.8489483747609946e-07, "logits/chosen": -2.184455156326294, "logits/rejected": -2.0556247234344482, "logps/chosen": -83.08639526367188, "logps/rejected": -103.6192626953125, "loss": 0.5244, "rewards/accuracies": 1.0, "rewards/chosen": 0.16083049774169922, "rewards/margins": 0.39103299379348755, "rewards/rejected": -0.23020246624946594, "step": 298 }, { "epoch": 0.51, "learning_rate": 2.858508604206501e-07, "logits/chosen": -1.8967337608337402, "logits/rejected": -2.0755765438079834, "logps/chosen": -75.01480865478516, "logps/rejected": -77.35787963867188, "loss": 0.6252, "rewards/accuracies": 0.5, "rewards/chosen": 0.05710611492395401, "rewards/margins": 0.007828041911125183, "rewards/rejected": 0.04927806928753853, "step": 299 }, { "epoch": 0.52, "learning_rate": 2.868068833652008e-07, "logits/chosen": -1.6103403568267822, "logits/rejected": -2.124863862991333, "logps/chosen": -71.61741638183594, "logps/rejected": -94.35206604003906, "loss": 0.5186, "rewards/accuracies": 1.0, "rewards/chosen": 0.6568753719329834, "rewards/margins": 1.2256507873535156, "rewards/rejected": -0.5687754154205322, "step": 300 }, { "epoch": 0.52, "learning_rate": 2.877629063097514e-07, "logits/chosen": -2.099888801574707, "logits/rejected": -1.717930793762207, "logps/chosen": -97.91241455078125, "logps/rejected": -95.2533950805664, "loss": 0.5959, "rewards/accuracies": 1.0, "rewards/chosen": 0.043697550892829895, "rewards/margins": 0.39267367124557495, "rewards/rejected": -0.34897613525390625, "step": 301 }, { "epoch": 0.52, "learning_rate": 2.887189292543021e-07, "logits/chosen": -1.8093550205230713, "logits/rejected": -2.1984221935272217, "logps/chosen": -71.92082977294922, "logps/rejected": -89.95555114746094, "loss": 0.5591, "rewards/accuracies": 0.75, "rewards/chosen": -0.04958409070968628, "rewards/margins": 0.15621040761470795, "rewards/rejected": -0.2057945281267166, "step": 302 }, { "epoch": 0.52, "learning_rate": 2.896749521988527e-07, "logits/chosen": -1.873695731163025, "logits/rejected": -1.8912936449050903, "logps/chosen": -78.83098602294922, "logps/rejected": -86.98140716552734, "loss": 0.5306, "rewards/accuracies": 0.5, "rewards/chosen": -0.1385468691587448, "rewards/margins": 0.2576766908168793, "rewards/rejected": -0.3962235450744629, "step": 303 }, { "epoch": 0.52, "learning_rate": 2.9063097514340346e-07, "logits/chosen": -2.083383560180664, "logits/rejected": -2.0775306224823, "logps/chosen": -75.66841125488281, "logps/rejected": -78.9480209350586, "loss": 0.6897, "rewards/accuracies": 0.5, "rewards/chosen": 0.0029930174350738525, "rewards/margins": 0.37777435779571533, "rewards/rejected": -0.3747813403606415, "step": 304 }, { "epoch": 0.52, "learning_rate": 2.915869980879541e-07, "logits/chosen": -1.7565029859542847, "logits/rejected": -2.181678533554077, "logps/chosen": -76.67152404785156, "logps/rejected": -110.38058471679688, "loss": 0.508, "rewards/accuracies": 1.0, "rewards/chosen": 0.10511721670627594, "rewards/margins": 1.0038609504699707, "rewards/rejected": -0.8987436294555664, "step": 305 }, { "epoch": 0.53, "learning_rate": 2.925430210325048e-07, "logits/chosen": -2.124422311782837, "logits/rejected": -1.8574974536895752, "logps/chosen": -77.8558349609375, "logps/rejected": -87.17546081542969, "loss": 0.7039, "rewards/accuracies": 1.0, "rewards/chosen": 0.2812303304672241, "rewards/margins": 0.33831557631492615, "rewards/rejected": -0.057085223495960236, "step": 306 }, { "epoch": 0.53, "learning_rate": 2.934990439770554e-07, "logits/chosen": -1.9968044757843018, "logits/rejected": -2.147326707839966, "logps/chosen": -53.83660888671875, "logps/rejected": -52.99882888793945, "loss": 0.6274, "rewards/accuracies": 0.5, "rewards/chosen": 0.12977656722068787, "rewards/margins": -0.19357050955295563, "rewards/rejected": 0.3233470916748047, "step": 307 }, { "epoch": 0.53, "learning_rate": 2.944550669216061e-07, "logits/chosen": -2.0337181091308594, "logits/rejected": -1.9729557037353516, "logps/chosen": -75.06807708740234, "logps/rejected": -76.71932983398438, "loss": 0.6264, "rewards/accuracies": 0.5, "rewards/chosen": -0.20850497484207153, "rewards/margins": 0.2730198800563812, "rewards/rejected": -0.48152485489845276, "step": 308 }, { "epoch": 0.53, "learning_rate": 2.954110898661568e-07, "logits/chosen": -1.8492008447647095, "logits/rejected": -1.8590391874313354, "logps/chosen": -102.08244323730469, "logps/rejected": -94.7771224975586, "loss": 0.6017, "rewards/accuracies": 0.75, "rewards/chosen": -0.14934426546096802, "rewards/margins": 0.3904576599597931, "rewards/rejected": -0.5398019552230835, "step": 309 }, { "epoch": 0.53, "learning_rate": 2.963671128107074e-07, "logits/chosen": -1.8194847106933594, "logits/rejected": -2.12575101852417, "logps/chosen": -72.20040130615234, "logps/rejected": -86.30438232421875, "loss": 0.5133, "rewards/accuracies": 0.75, "rewards/chosen": -0.11313458532094955, "rewards/margins": 0.3480141758918762, "rewards/rejected": -0.461148738861084, "step": 310 }, { "epoch": 0.54, "learning_rate": 2.9732313575525815e-07, "logits/chosen": -2.1395225524902344, "logits/rejected": -1.8150995969772339, "logps/chosen": -95.21078491210938, "logps/rejected": -83.30145263671875, "loss": 0.6281, "rewards/accuracies": 0.25, "rewards/chosen": -0.3973596692085266, "rewards/margins": -0.37559717893600464, "rewards/rejected": -0.02176244929432869, "step": 311 }, { "epoch": 0.54, "learning_rate": 2.982791586998088e-07, "logits/chosen": -2.173396348953247, "logits/rejected": -1.9455790519714355, "logps/chosen": -86.28528594970703, "logps/rejected": -89.52232360839844, "loss": 0.5693, "rewards/accuracies": 0.5, "rewards/chosen": -0.15351761877536774, "rewards/margins": 0.23620472848415375, "rewards/rejected": -0.3897223472595215, "step": 312 }, { "epoch": 0.54, "learning_rate": 2.9923518164435947e-07, "logits/chosen": -2.0879764556884766, "logits/rejected": -1.9344336986541748, "logps/chosen": -101.29786682128906, "logps/rejected": -101.45344543457031, "loss": 0.5274, "rewards/accuracies": 1.0, "rewards/chosen": 0.4885980486869812, "rewards/margins": 0.8546653389930725, "rewards/rejected": -0.3660672903060913, "step": 313 }, { "epoch": 0.54, "learning_rate": 3.001912045889101e-07, "logits/chosen": -2.1020290851593018, "logits/rejected": -2.142033338546753, "logps/chosen": -112.97265625, "logps/rejected": -99.53963470458984, "loss": 0.5001, "rewards/accuracies": 0.75, "rewards/chosen": 0.2517315745353699, "rewards/margins": 0.3544546961784363, "rewards/rejected": -0.1027231216430664, "step": 314 }, { "epoch": 0.54, "learning_rate": 3.011472275334608e-07, "logits/chosen": -2.142465591430664, "logits/rejected": -1.8830580711364746, "logps/chosen": -95.20966339111328, "logps/rejected": -93.46554565429688, "loss": 0.5513, "rewards/accuracies": 0.5, "rewards/chosen": -0.14183694124221802, "rewards/margins": 0.46455007791519165, "rewards/rejected": -0.6063870191574097, "step": 315 }, { "epoch": 0.54, "learning_rate": 3.021032504780114e-07, "logits/chosen": -1.870273470878601, "logits/rejected": -2.034313678741455, "logps/chosen": -69.28380584716797, "logps/rejected": -79.92610168457031, "loss": 0.5499, "rewards/accuracies": 0.5, "rewards/chosen": 0.16742858290672302, "rewards/margins": 0.16521766781806946, "rewards/rejected": 0.0022109132260084152, "step": 316 }, { "epoch": 0.55, "learning_rate": 3.030592734225621e-07, "logits/chosen": -1.7100975513458252, "logits/rejected": -2.0779101848602295, "logps/chosen": -84.43829345703125, "logps/rejected": -91.07797241210938, "loss": 0.6883, "rewards/accuracies": 0.5, "rewards/chosen": -0.21060162782669067, "rewards/margins": 0.3880031406879425, "rewards/rejected": -0.5986047983169556, "step": 317 }, { "epoch": 0.55, "learning_rate": 3.0401529636711284e-07, "logits/chosen": -2.0810647010803223, "logits/rejected": -1.700080394744873, "logps/chosen": -99.34054565429688, "logps/rejected": -76.10014343261719, "loss": 0.5915, "rewards/accuracies": 0.25, "rewards/chosen": -0.4276618957519531, "rewards/margins": -0.36459553241729736, "rewards/rejected": -0.06306638568639755, "step": 318 }, { "epoch": 0.55, "learning_rate": 3.049713193116635e-07, "logits/chosen": -2.117095708847046, "logits/rejected": -1.934523344039917, "logps/chosen": -101.79452514648438, "logps/rejected": -82.14076232910156, "loss": 0.706, "rewards/accuracies": 0.75, "rewards/chosen": -0.39765283465385437, "rewards/margins": 0.6554616093635559, "rewards/rejected": -1.053114414215088, "step": 319 }, { "epoch": 0.55, "learning_rate": 3.0592734225621416e-07, "logits/chosen": -2.1867125034332275, "logits/rejected": -1.6309831142425537, "logps/chosen": -94.43438720703125, "logps/rejected": -78.94987487792969, "loss": 0.6453, "rewards/accuracies": 0.25, "rewards/chosen": -0.28554993867874146, "rewards/margins": 0.06523333489894867, "rewards/rejected": -0.35078325867652893, "step": 320 }, { "epoch": 0.55, "learning_rate": 3.068833652007648e-07, "logits/chosen": -1.5436651706695557, "logits/rejected": -2.0356342792510986, "logps/chosen": -65.69554901123047, "logps/rejected": -89.486572265625, "loss": 0.494, "rewards/accuracies": 0.5, "rewards/chosen": 0.23891858756542206, "rewards/margins": 0.45872727036476135, "rewards/rejected": -0.2198086827993393, "step": 321 }, { "epoch": 0.55, "learning_rate": 3.078393881453155e-07, "logits/chosen": -2.2388243675231934, "logits/rejected": -1.927962303161621, "logps/chosen": -100.49943542480469, "logps/rejected": -102.06370544433594, "loss": 0.6016, "rewards/accuracies": 0.5, "rewards/chosen": -0.2825227677822113, "rewards/margins": 0.004995524883270264, "rewards/rejected": -0.28751832246780396, "step": 322 }, { "epoch": 0.56, "learning_rate": 3.087954110898661e-07, "logits/chosen": -1.7580840587615967, "logits/rejected": -2.1134471893310547, "logps/chosen": -64.3009033203125, "logps/rejected": -87.19456481933594, "loss": 0.4578, "rewards/accuracies": 0.75, "rewards/chosen": -0.1779075562953949, "rewards/margins": 0.237160786986351, "rewards/rejected": -0.4150683283805847, "step": 323 }, { "epoch": 0.56, "learning_rate": 3.0975143403441685e-07, "logits/chosen": -1.9603796005249023, "logits/rejected": -2.0254435539245605, "logps/chosen": -79.54496765136719, "logps/rejected": -102.7524642944336, "loss": 0.5963, "rewards/accuracies": 0.75, "rewards/chosen": 0.031278423964977264, "rewards/margins": 0.6930742263793945, "rewards/rejected": -0.6617958545684814, "step": 324 }, { "epoch": 0.56, "learning_rate": 3.107074569789675e-07, "logits/chosen": -2.0005581378936768, "logits/rejected": -1.8674266338348389, "logps/chosen": -85.40232849121094, "logps/rejected": -97.19544219970703, "loss": 0.5916, "rewards/accuracies": 0.5, "rewards/chosen": -0.07971210032701492, "rewards/margins": 0.385396271944046, "rewards/rejected": -0.46510839462280273, "step": 325 }, { "epoch": 0.56, "learning_rate": 3.1166347992351816e-07, "logits/chosen": -1.926552176475525, "logits/rejected": -1.9844764471054077, "logps/chosen": -83.39572143554688, "logps/rejected": -91.38101196289062, "loss": 0.5731, "rewards/accuracies": 1.0, "rewards/chosen": 0.4068673253059387, "rewards/margins": 0.688968300819397, "rewards/rejected": -0.28210097551345825, "step": 326 }, { "epoch": 0.56, "learning_rate": 3.126195028680688e-07, "logits/chosen": -1.7600847482681274, "logits/rejected": -2.2252025604248047, "logps/chosen": -82.97364044189453, "logps/rejected": -96.58613586425781, "loss": 0.5236, "rewards/accuracies": 0.75, "rewards/chosen": 0.06261608004570007, "rewards/margins": 0.7645074129104614, "rewards/rejected": -0.701891303062439, "step": 327 }, { "epoch": 0.56, "learning_rate": 3.135755258126195e-07, "logits/chosen": -1.9400324821472168, "logits/rejected": -2.0737245082855225, "logps/chosen": -90.58009338378906, "logps/rejected": -100.95301818847656, "loss": 0.6153, "rewards/accuracies": 0.5, "rewards/chosen": -0.31317681074142456, "rewards/margins": 0.11255179345607758, "rewards/rejected": -0.42572861909866333, "step": 328 }, { "epoch": 0.57, "learning_rate": 3.1453154875717016e-07, "logits/chosen": -1.7530121803283691, "logits/rejected": -2.067105293273926, "logps/chosen": -76.65299987792969, "logps/rejected": -94.59478759765625, "loss": 0.4826, "rewards/accuracies": 0.75, "rewards/chosen": -0.08859996497631073, "rewards/margins": 0.17344772815704346, "rewards/rejected": -0.262047678232193, "step": 329 }, { "epoch": 0.57, "learning_rate": 3.154875717017208e-07, "logits/chosen": -1.981825828552246, "logits/rejected": -2.124166488647461, "logps/chosen": -59.4567756652832, "logps/rejected": -79.04486846923828, "loss": 0.487, "rewards/accuracies": 0.25, "rewards/chosen": -0.1547083854675293, "rewards/margins": 0.5476846098899841, "rewards/rejected": -0.7023929953575134, "step": 330 }, { "epoch": 0.57, "learning_rate": 3.1644359464627153e-07, "logits/chosen": -1.4186429977416992, "logits/rejected": -2.0838425159454346, "logps/chosen": -86.64715576171875, "logps/rejected": -153.74020385742188, "loss": 0.6448, "rewards/accuracies": 0.5, "rewards/chosen": -0.0866537094116211, "rewards/margins": 0.31638965010643005, "rewards/rejected": -0.40304335951805115, "step": 331 }, { "epoch": 0.57, "learning_rate": 3.1739961759082217e-07, "logits/chosen": -2.206881046295166, "logits/rejected": -1.9970414638519287, "logps/chosen": -87.19760131835938, "logps/rejected": -86.5029296875, "loss": 0.4307, "rewards/accuracies": 1.0, "rewards/chosen": 0.841663122177124, "rewards/margins": 1.2809011936187744, "rewards/rejected": -0.4392380118370056, "step": 332 }, { "epoch": 0.57, "learning_rate": 3.1835564053537285e-07, "logits/chosen": -2.023578643798828, "logits/rejected": -2.175159215927124, "logps/chosen": -99.45663452148438, "logps/rejected": -105.82139587402344, "loss": 0.5571, "rewards/accuracies": 0.75, "rewards/chosen": -0.3375371992588043, "rewards/margins": 0.2094588726758957, "rewards/rejected": -0.5469961166381836, "step": 333 }, { "epoch": 0.57, "learning_rate": 3.193116634799235e-07, "logits/chosen": -2.1841201782226562, "logits/rejected": -1.888443112373352, "logps/chosen": -75.5561294555664, "logps/rejected": -65.83451843261719, "loss": 0.635, "rewards/accuracies": 0.75, "rewards/chosen": 0.3536292314529419, "rewards/margins": 0.2714346945285797, "rewards/rejected": 0.08219452202320099, "step": 334 }, { "epoch": 0.58, "learning_rate": 3.2026768642447417e-07, "logits/chosen": -1.8848237991333008, "logits/rejected": -2.2129688262939453, "logps/chosen": -65.71392822265625, "logps/rejected": -97.72526550292969, "loss": 0.5804, "rewards/accuracies": 1.0, "rewards/chosen": 0.4738055467605591, "rewards/margins": 0.8930181860923767, "rewards/rejected": -0.4192127287387848, "step": 335 }, { "epoch": 0.58, "learning_rate": 3.212237093690248e-07, "logits/chosen": -1.7999740839004517, "logits/rejected": -2.122060775756836, "logps/chosen": -62.80368423461914, "logps/rejected": -96.42321014404297, "loss": 0.5592, "rewards/accuracies": 1.0, "rewards/chosen": -0.09330625832080841, "rewards/margins": 0.7370918393135071, "rewards/rejected": -0.8303980827331543, "step": 336 }, { "epoch": 0.58, "learning_rate": 3.2217973231357554e-07, "logits/chosen": -1.916405439376831, "logits/rejected": -2.175516366958618, "logps/chosen": -92.82579040527344, "logps/rejected": -101.86477661132812, "loss": 0.4968, "rewards/accuracies": 1.0, "rewards/chosen": 0.22943554818630219, "rewards/margins": 0.5393989682197571, "rewards/rejected": -0.3099634051322937, "step": 337 }, { "epoch": 0.58, "learning_rate": 3.2313575525812617e-07, "logits/chosen": -1.9851329326629639, "logits/rejected": -2.006559371948242, "logps/chosen": -111.76080322265625, "logps/rejected": -102.82134246826172, "loss": 0.5918, "rewards/accuracies": 0.75, "rewards/chosen": -0.3173332214355469, "rewards/margins": 0.23344212770462036, "rewards/rejected": -0.5507753491401672, "step": 338 }, { "epoch": 0.58, "learning_rate": 3.2409177820267686e-07, "logits/chosen": -1.9312551021575928, "logits/rejected": -2.2500929832458496, "logps/chosen": -68.03353118896484, "logps/rejected": -78.54720306396484, "loss": 0.6229, "rewards/accuracies": 0.5, "rewards/chosen": 0.06424781680107117, "rewards/margins": 0.014273203909397125, "rewards/rejected": 0.04997463524341583, "step": 339 }, { "epoch": 0.59, "learning_rate": 3.2504780114722754e-07, "logits/chosen": -1.977905511856079, "logits/rejected": -2.1878981590270996, "logps/chosen": -78.98915100097656, "logps/rejected": -100.08831787109375, "loss": 0.598, "rewards/accuracies": 1.0, "rewards/chosen": 0.11065960675477982, "rewards/margins": 0.5167840719223022, "rewards/rejected": -0.4061245024204254, "step": 340 }, { "epoch": 0.59, "learning_rate": 3.2600382409177817e-07, "logits/chosen": -1.904938817024231, "logits/rejected": -2.040189504623413, "logps/chosen": -71.18148040771484, "logps/rejected": -107.6953125, "loss": 0.5778, "rewards/accuracies": 1.0, "rewards/chosen": 0.7998030185699463, "rewards/margins": 1.8272242546081543, "rewards/rejected": -1.027421236038208, "step": 341 }, { "epoch": 0.59, "learning_rate": 3.2695984703632886e-07, "logits/chosen": -1.9566224813461304, "logits/rejected": -2.1682288646698, "logps/chosen": -75.55276489257812, "logps/rejected": -81.59195709228516, "loss": 0.572, "rewards/accuracies": 0.5, "rewards/chosen": 0.18823358416557312, "rewards/margins": 0.506115198135376, "rewards/rejected": -0.31788158416748047, "step": 342 }, { "epoch": 0.59, "learning_rate": 3.279158699808795e-07, "logits/chosen": -2.0488786697387695, "logits/rejected": -1.754557490348816, "logps/chosen": -87.47345733642578, "logps/rejected": -90.32276153564453, "loss": 0.5648, "rewards/accuracies": 0.75, "rewards/chosen": 0.2451750934123993, "rewards/margins": 1.0773768424987793, "rewards/rejected": -0.8322016596794128, "step": 343 }, { "epoch": 0.59, "learning_rate": 3.2887189292543023e-07, "logits/chosen": -2.0078790187835693, "logits/rejected": -2.312429904937744, "logps/chosen": -103.31449127197266, "logps/rejected": -123.96754455566406, "loss": 0.4654, "rewards/accuracies": 0.5, "rewards/chosen": -0.16029739379882812, "rewards/margins": 0.07281780242919922, "rewards/rejected": -0.23311519622802734, "step": 344 }, { "epoch": 0.59, "learning_rate": 3.2982791586998086e-07, "logits/chosen": -2.080733299255371, "logits/rejected": -2.049989700317383, "logps/chosen": -92.69189453125, "logps/rejected": -102.25933837890625, "loss": 0.5142, "rewards/accuracies": 0.75, "rewards/chosen": -0.38294678926467896, "rewards/margins": 0.8240473866462708, "rewards/rejected": -1.2069941759109497, "step": 345 }, { "epoch": 0.6, "learning_rate": 3.3078393881453154e-07, "logits/chosen": -2.1257405281066895, "logits/rejected": -1.9184200763702393, "logps/chosen": -77.6908950805664, "logps/rejected": -68.31731414794922, "loss": 0.4759, "rewards/accuracies": 0.75, "rewards/chosen": -0.14528772234916687, "rewards/margins": 0.25854596495628357, "rewards/rejected": -0.40383368730545044, "step": 346 }, { "epoch": 0.6, "learning_rate": 3.317399617590822e-07, "logits/chosen": -1.9828095436096191, "logits/rejected": -2.1319642066955566, "logps/chosen": -68.69400024414062, "logps/rejected": -87.40052032470703, "loss": 0.4922, "rewards/accuracies": 0.75, "rewards/chosen": -0.05995815619826317, "rewards/margins": 0.3541019558906555, "rewards/rejected": -0.414060115814209, "step": 347 }, { "epoch": 0.6, "learning_rate": 3.3269598470363286e-07, "logits/chosen": -1.94382905960083, "logits/rejected": -2.0376105308532715, "logps/chosen": -90.96432495117188, "logps/rejected": -96.67764282226562, "loss": 0.4923, "rewards/accuracies": 0.75, "rewards/chosen": -0.22672787308692932, "rewards/margins": 0.39173126220703125, "rewards/rejected": -0.6184591054916382, "step": 348 }, { "epoch": 0.6, "learning_rate": 3.336520076481835e-07, "logits/chosen": -1.7760119438171387, "logits/rejected": -2.04541015625, "logps/chosen": -71.89248657226562, "logps/rejected": -94.97349548339844, "loss": 0.4957, "rewards/accuracies": 0.5, "rewards/chosen": -0.2446001023054123, "rewards/margins": 0.10979748517274857, "rewards/rejected": -0.35439759492874146, "step": 349 }, { "epoch": 0.6, "learning_rate": 3.3460803059273423e-07, "logits/chosen": -2.2792739868164062, "logits/rejected": -1.7024227380752563, "logps/chosen": -86.15721130371094, "logps/rejected": -93.30146026611328, "loss": 0.5182, "rewards/accuracies": 0.5, "rewards/chosen": 0.07550440728664398, "rewards/margins": 0.9870800971984863, "rewards/rejected": -0.9115757346153259, "step": 350 }, { "epoch": 0.6, "learning_rate": 3.355640535372849e-07, "logits/chosen": -1.6593173742294312, "logits/rejected": -2.130580425262451, "logps/chosen": -78.21620178222656, "logps/rejected": -101.28565216064453, "loss": 0.5846, "rewards/accuracies": 0.75, "rewards/chosen": -0.4449988603591919, "rewards/margins": 0.2662034034729004, "rewards/rejected": -0.7112022638320923, "step": 351 }, { "epoch": 0.61, "learning_rate": 3.3652007648183555e-07, "logits/chosen": -2.2332603931427, "logits/rejected": -1.908484697341919, "logps/chosen": -100.09076690673828, "logps/rejected": -90.13571166992188, "loss": 0.4768, "rewards/accuracies": 0.75, "rewards/chosen": -0.44074147939682007, "rewards/margins": 0.46830177307128906, "rewards/rejected": -0.9090432524681091, "step": 352 }, { "epoch": 0.61, "learning_rate": 3.3747609942638623e-07, "logits/chosen": -1.8735027313232422, "logits/rejected": -1.7163647413253784, "logps/chosen": -105.8303451538086, "logps/rejected": -99.1122055053711, "loss": 0.497, "rewards/accuracies": 0.5, "rewards/chosen": -0.5895533561706543, "rewards/margins": 0.17446574568748474, "rewards/rejected": -0.7640191316604614, "step": 353 }, { "epoch": 0.61, "learning_rate": 3.3843212237093687e-07, "logits/chosen": -1.958373785018921, "logits/rejected": -1.954774022102356, "logps/chosen": -74.78685760498047, "logps/rejected": -95.73468780517578, "loss": 0.5869, "rewards/accuracies": 0.75, "rewards/chosen": -0.039089977741241455, "rewards/margins": 0.6080716252326965, "rewards/rejected": -0.6471616625785828, "step": 354 }, { "epoch": 0.61, "learning_rate": 3.3938814531548755e-07, "logits/chosen": -2.067993640899658, "logits/rejected": -1.9385552406311035, "logps/chosen": -85.21250915527344, "logps/rejected": -72.60763549804688, "loss": 0.6306, "rewards/accuracies": 0.75, "rewards/chosen": -0.12293292582035065, "rewards/margins": 0.6642767786979675, "rewards/rejected": -0.7872097492218018, "step": 355 }, { "epoch": 0.61, "learning_rate": 3.403441682600382e-07, "logits/chosen": -2.1157498359680176, "logits/rejected": -1.9544456005096436, "logps/chosen": -67.50309753417969, "logps/rejected": -96.28915405273438, "loss": 0.4245, "rewards/accuracies": 1.0, "rewards/chosen": 0.7269476056098938, "rewards/margins": 2.334684371948242, "rewards/rejected": -1.6077367067337036, "step": 356 }, { "epoch": 0.61, "learning_rate": 3.413001912045889e-07, "logits/chosen": -2.063948631286621, "logits/rejected": -1.9301378726959229, "logps/chosen": -81.00011444091797, "logps/rejected": -83.76765441894531, "loss": 0.5561, "rewards/accuracies": 0.75, "rewards/chosen": 0.11920127272605896, "rewards/margins": 0.7916761636734009, "rewards/rejected": -0.6724748611450195, "step": 357 }, { "epoch": 0.62, "learning_rate": 3.4225621414913955e-07, "logits/chosen": -1.8530371189117432, "logits/rejected": -1.9574463367462158, "logps/chosen": -53.39192199707031, "logps/rejected": -70.83479309082031, "loss": 0.5863, "rewards/accuracies": 0.75, "rewards/chosen": -0.22196045517921448, "rewards/margins": 0.5433354377746582, "rewards/rejected": -0.7652958631515503, "step": 358 }, { "epoch": 0.62, "learning_rate": 3.4321223709369024e-07, "logits/chosen": -1.6694676876068115, "logits/rejected": -2.2102766036987305, "logps/chosen": -88.14588928222656, "logps/rejected": -104.17140197753906, "loss": 0.5126, "rewards/accuracies": 0.75, "rewards/chosen": -0.1631685346364975, "rewards/margins": 0.7813066244125366, "rewards/rejected": -0.9444751739501953, "step": 359 }, { "epoch": 0.62, "learning_rate": 3.441682600382409e-07, "logits/chosen": -1.5539451837539673, "logits/rejected": -2.1408305168151855, "logps/chosen": -88.59964752197266, "logps/rejected": -109.12380981445312, "loss": 0.6338, "rewards/accuracies": 0.5, "rewards/chosen": -0.24688512086868286, "rewards/margins": 0.6189810037612915, "rewards/rejected": -0.8658661246299744, "step": 360 }, { "epoch": 0.62, "learning_rate": 3.4512428298279155e-07, "logits/chosen": -2.056530237197876, "logits/rejected": -2.12272310256958, "logps/chosen": -86.81903839111328, "logps/rejected": -99.50833892822266, "loss": 0.6064, "rewards/accuracies": 0.75, "rewards/chosen": 0.10026665031909943, "rewards/margins": 0.3728925585746765, "rewards/rejected": -0.2726259231567383, "step": 361 }, { "epoch": 0.62, "learning_rate": 3.4608030592734224e-07, "logits/chosen": -2.1324703693389893, "logits/rejected": -1.7758831977844238, "logps/chosen": -95.64985656738281, "logps/rejected": -81.95953369140625, "loss": 0.6488, "rewards/accuracies": 0.5, "rewards/chosen": -0.12952899932861328, "rewards/margins": 0.19051378965377808, "rewards/rejected": -0.32004278898239136, "step": 362 }, { "epoch": 0.62, "learning_rate": 3.470363288718929e-07, "logits/chosen": -2.1516382694244385, "logits/rejected": -1.8641952276229858, "logps/chosen": -79.81251525878906, "logps/rejected": -85.25827026367188, "loss": 0.5336, "rewards/accuracies": 0.75, "rewards/chosen": -0.21222266554832458, "rewards/margins": 0.6598236560821533, "rewards/rejected": -0.8720462322235107, "step": 363 }, { "epoch": 0.63, "learning_rate": 3.479923518164436e-07, "logits/chosen": -1.9112627506256104, "logits/rejected": -2.143226146697998, "logps/chosen": -69.95817565917969, "logps/rejected": -82.54468536376953, "loss": 0.605, "rewards/accuracies": 0.5, "rewards/chosen": -0.5450955033302307, "rewards/margins": 0.15173707902431488, "rewards/rejected": -0.6968326568603516, "step": 364 }, { "epoch": 0.63, "learning_rate": 3.4894837476099424e-07, "logits/chosen": -1.9842718839645386, "logits/rejected": -2.1043851375579834, "logps/chosen": -85.98355102539062, "logps/rejected": -77.96389770507812, "loss": 0.5707, "rewards/accuracies": 0.75, "rewards/chosen": -0.20462553203105927, "rewards/margins": 1.1522181034088135, "rewards/rejected": -1.3568435907363892, "step": 365 }, { "epoch": 0.63, "learning_rate": 3.499043977055449e-07, "logits/chosen": -1.7745956182479858, "logits/rejected": -2.1901659965515137, "logps/chosen": -61.408878326416016, "logps/rejected": -102.24456787109375, "loss": 0.4895, "rewards/accuracies": 1.0, "rewards/chosen": 0.8169694542884827, "rewards/margins": 1.897552490234375, "rewards/rejected": -1.080583095550537, "step": 366 }, { "epoch": 0.63, "learning_rate": 3.5086042065009556e-07, "logits/chosen": -1.7972297668457031, "logits/rejected": -2.041029930114746, "logps/chosen": -70.05557250976562, "logps/rejected": -79.07891845703125, "loss": 0.5116, "rewards/accuracies": 1.0, "rewards/chosen": -0.062265150249004364, "rewards/margins": 0.5097801685333252, "rewards/rejected": -0.5720453858375549, "step": 367 }, { "epoch": 0.63, "learning_rate": 3.5181644359464624e-07, "logits/chosen": -2.0323519706726074, "logits/rejected": -1.9476739168167114, "logps/chosen": -81.83737182617188, "logps/rejected": -82.00825500488281, "loss": 0.4178, "rewards/accuracies": 0.75, "rewards/chosen": 0.15202239155769348, "rewards/margins": 0.35196441411972046, "rewards/rejected": -0.19994202256202698, "step": 368 }, { "epoch": 0.64, "learning_rate": 3.527724665391969e-07, "logits/chosen": -1.9624943733215332, "logits/rejected": -2.0964136123657227, "logps/chosen": -74.611083984375, "logps/rejected": -92.75173950195312, "loss": 0.5506, "rewards/accuracies": 0.5, "rewards/chosen": -0.26617422699928284, "rewards/margins": 0.5800784230232239, "rewards/rejected": -0.8462526798248291, "step": 369 }, { "epoch": 0.64, "learning_rate": 3.537284894837476e-07, "logits/chosen": -2.0673694610595703, "logits/rejected": -2.084665298461914, "logps/chosen": -83.05935668945312, "logps/rejected": -79.03254699707031, "loss": 0.5314, "rewards/accuracies": 0.5, "rewards/chosen": -0.5673525333404541, "rewards/margins": 0.047849059104919434, "rewards/rejected": -0.6152015924453735, "step": 370 }, { "epoch": 0.64, "learning_rate": 3.546845124282983e-07, "logits/chosen": -2.0351510047912598, "logits/rejected": -2.012829065322876, "logps/chosen": -95.73204040527344, "logps/rejected": -100.80382537841797, "loss": 0.5507, "rewards/accuracies": 1.0, "rewards/chosen": 0.20420551300048828, "rewards/margins": 1.122999906539917, "rewards/rejected": -0.9187945127487183, "step": 371 }, { "epoch": 0.64, "learning_rate": 3.5564053537284893e-07, "logits/chosen": -1.5694785118103027, "logits/rejected": -1.7751362323760986, "logps/chosen": -91.56226348876953, "logps/rejected": -90.46824645996094, "loss": 0.6001, "rewards/accuracies": 0.75, "rewards/chosen": 0.314847469329834, "rewards/margins": 1.374867558479309, "rewards/rejected": -1.060020089149475, "step": 372 }, { "epoch": 0.64, "learning_rate": 3.565965583173996e-07, "logits/chosen": -2.0285136699676514, "logits/rejected": -2.0616936683654785, "logps/chosen": -84.29702758789062, "logps/rejected": -95.5648193359375, "loss": 0.4078, "rewards/accuracies": 0.75, "rewards/chosen": 0.034748271107673645, "rewards/margins": 0.8526849746704102, "rewards/rejected": -0.8179367780685425, "step": 373 }, { "epoch": 0.64, "learning_rate": 3.5755258126195025e-07, "logits/chosen": -2.061905860900879, "logits/rejected": -2.006558418273926, "logps/chosen": -75.27387237548828, "logps/rejected": -83.98577117919922, "loss": 0.5249, "rewards/accuracies": 0.75, "rewards/chosen": 0.4303106665611267, "rewards/margins": 1.0571081638336182, "rewards/rejected": -0.6267974972724915, "step": 374 }, { "epoch": 0.65, "learning_rate": 3.5850860420650093e-07, "logits/chosen": -2.0092451572418213, "logits/rejected": -2.02738618850708, "logps/chosen": -85.75302124023438, "logps/rejected": -96.62167358398438, "loss": 0.4882, "rewards/accuracies": 1.0, "rewards/chosen": 0.4835953712463379, "rewards/margins": 1.6953866481781006, "rewards/rejected": -1.2117912769317627, "step": 375 }, { "epoch": 0.65, "learning_rate": 3.594646271510516e-07, "logits/chosen": -1.7993345260620117, "logits/rejected": -2.0944719314575195, "logps/chosen": -66.62001037597656, "logps/rejected": -97.54383850097656, "loss": 0.4552, "rewards/accuracies": 1.0, "rewards/chosen": 0.41871386766433716, "rewards/margins": 1.4715732336044312, "rewards/rejected": -1.0528593063354492, "step": 376 }, { "epoch": 0.65, "learning_rate": 3.604206500956023e-07, "logits/chosen": -1.987966537475586, "logits/rejected": -2.151970386505127, "logps/chosen": -74.36163330078125, "logps/rejected": -84.53702545166016, "loss": 0.4758, "rewards/accuracies": 1.0, "rewards/chosen": -0.00346335768699646, "rewards/margins": 0.8343693017959595, "rewards/rejected": -0.8378326892852783, "step": 377 }, { "epoch": 0.65, "learning_rate": 3.6137667304015293e-07, "logits/chosen": -2.1051995754241943, "logits/rejected": -2.1096620559692383, "logps/chosen": -55.39861297607422, "logps/rejected": -85.42576599121094, "loss": 0.4506, "rewards/accuracies": 1.0, "rewards/chosen": 0.3253825306892395, "rewards/margins": 0.8985930681228638, "rewards/rejected": -0.5732105374336243, "step": 378 }, { "epoch": 0.65, "learning_rate": 3.623326959847036e-07, "logits/chosen": -2.090146064758301, "logits/rejected": -1.6158664226531982, "logps/chosen": -93.05650329589844, "logps/rejected": -79.87966918945312, "loss": 0.498, "rewards/accuracies": 0.75, "rewards/chosen": 0.04565992206335068, "rewards/margins": 0.891191303730011, "rewards/rejected": -0.8455313444137573, "step": 379 }, { "epoch": 0.65, "learning_rate": 3.632887189292543e-07, "logits/chosen": -2.2889277935028076, "logits/rejected": -2.136761426925659, "logps/chosen": -80.21802520751953, "logps/rejected": -102.50513458251953, "loss": 0.4018, "rewards/accuracies": 1.0, "rewards/chosen": 0.004210472106933594, "rewards/margins": 1.7506259679794312, "rewards/rejected": -1.7464154958724976, "step": 380 }, { "epoch": 0.66, "learning_rate": 3.6424474187380494e-07, "logits/chosen": -2.0074477195739746, "logits/rejected": -1.9867764711380005, "logps/chosen": -67.17121887207031, "logps/rejected": -71.35968017578125, "loss": 0.6106, "rewards/accuracies": 0.5, "rewards/chosen": 0.32204458117485046, "rewards/margins": 0.9772815704345703, "rewards/rejected": -0.6552370190620422, "step": 381 }, { "epoch": 0.66, "learning_rate": 3.652007648183556e-07, "logits/chosen": -2.0497336387634277, "logits/rejected": -2.063581705093384, "logps/chosen": -67.29097747802734, "logps/rejected": -72.83706665039062, "loss": 0.451, "rewards/accuracies": 1.0, "rewards/chosen": 0.4986443519592285, "rewards/margins": 1.1904717683792114, "rewards/rejected": -0.6918274164199829, "step": 382 }, { "epoch": 0.66, "learning_rate": 3.661567877629063e-07, "logits/chosen": -2.006122589111328, "logits/rejected": -2.1958038806915283, "logps/chosen": -76.68258666992188, "logps/rejected": -74.89225769042969, "loss": 0.6362, "rewards/accuracies": 0.25, "rewards/chosen": -0.2507268786430359, "rewards/margins": 0.07476025819778442, "rewards/rejected": -0.3254871368408203, "step": 383 }, { "epoch": 0.66, "learning_rate": 3.67112810707457e-07, "logits/chosen": -2.0337109565734863, "logits/rejected": -2.2324671745300293, "logps/chosen": -81.2001953125, "logps/rejected": -101.04330444335938, "loss": 0.496, "rewards/accuracies": 0.25, "rewards/chosen": -0.41607019305229187, "rewards/margins": 0.2030171900987625, "rewards/rejected": -0.6190873384475708, "step": 384 }, { "epoch": 0.66, "learning_rate": 3.680688336520076e-07, "logits/chosen": -1.988822102546692, "logits/rejected": -2.0624704360961914, "logps/chosen": -90.22693634033203, "logps/rejected": -107.49404907226562, "loss": 0.4752, "rewards/accuracies": 0.5, "rewards/chosen": 0.38939231634140015, "rewards/margins": 1.103510856628418, "rewards/rejected": -0.7141185998916626, "step": 385 }, { "epoch": 0.66, "learning_rate": 3.690248565965583e-07, "logits/chosen": -2.1240622997283936, "logits/rejected": -1.932265043258667, "logps/chosen": -77.00920104980469, "logps/rejected": -76.74916076660156, "loss": 0.6172, "rewards/accuracies": 0.75, "rewards/chosen": 0.584243893623352, "rewards/margins": 0.6549475789070129, "rewards/rejected": -0.07070371508598328, "step": 386 }, { "epoch": 0.67, "learning_rate": 3.6998087954110894e-07, "logits/chosen": -1.97305166721344, "logits/rejected": -1.6528306007385254, "logps/chosen": -80.56987762451172, "logps/rejected": -87.6678695678711, "loss": 0.5439, "rewards/accuracies": 0.75, "rewards/chosen": -0.0021187737584114075, "rewards/margins": 0.5806781053543091, "rewards/rejected": -0.5827969312667847, "step": 387 }, { "epoch": 0.67, "learning_rate": 3.709369024856596e-07, "logits/chosen": -1.8405628204345703, "logits/rejected": -2.213297128677368, "logps/chosen": -93.98493194580078, "logps/rejected": -133.0418243408203, "loss": 0.6042, "rewards/accuracies": 0.75, "rewards/chosen": 0.1450922042131424, "rewards/margins": 0.8354536294937134, "rewards/rejected": -0.6903613805770874, "step": 388 }, { "epoch": 0.67, "learning_rate": 3.718929254302103e-07, "logits/chosen": -2.1927218437194824, "logits/rejected": -1.634804129600525, "logps/chosen": -104.91355895996094, "logps/rejected": -83.12547302246094, "loss": 0.5741, "rewards/accuracies": 0.5, "rewards/chosen": -0.057104334235191345, "rewards/margins": 0.20408746600151062, "rewards/rejected": -0.2611917555332184, "step": 389 }, { "epoch": 0.67, "learning_rate": 3.72848948374761e-07, "logits/chosen": -1.2404714822769165, "logits/rejected": -2.1003293991088867, "logps/chosen": -62.92586898803711, "logps/rejected": -96.61937713623047, "loss": 0.4445, "rewards/accuracies": 0.75, "rewards/chosen": 0.2267591506242752, "rewards/margins": 0.5100923776626587, "rewards/rejected": -0.2833332419395447, "step": 390 }, { "epoch": 0.67, "learning_rate": 3.738049713193117e-07, "logits/chosen": -2.1247496604919434, "logits/rejected": -1.8596898317337036, "logps/chosen": -96.95081329345703, "logps/rejected": -115.67572021484375, "loss": 0.4422, "rewards/accuracies": 1.0, "rewards/chosen": 0.8190661668777466, "rewards/margins": 1.8968935012817383, "rewards/rejected": -1.0778274536132812, "step": 391 }, { "epoch": 0.67, "learning_rate": 3.747609942638623e-07, "logits/chosen": -2.0927133560180664, "logits/rejected": -2.259138584136963, "logps/chosen": -82.83946990966797, "logps/rejected": -114.65347290039062, "loss": 0.4701, "rewards/accuracies": 1.0, "rewards/chosen": 0.6867069602012634, "rewards/margins": 2.4179861545562744, "rewards/rejected": -1.7312790155410767, "step": 392 }, { "epoch": 0.68, "learning_rate": 3.75717017208413e-07, "logits/chosen": -2.152867555618286, "logits/rejected": -1.9206113815307617, "logps/chosen": -85.52920532226562, "logps/rejected": -83.68478393554688, "loss": 0.5481, "rewards/accuracies": 0.5, "rewards/chosen": 0.16306115686893463, "rewards/margins": 0.5934078693389893, "rewards/rejected": -0.43034669756889343, "step": 393 }, { "epoch": 0.68, "learning_rate": 3.7667304015296363e-07, "logits/chosen": -1.8918251991271973, "logits/rejected": -1.8230535984039307, "logps/chosen": -88.44974517822266, "logps/rejected": -99.20134735107422, "loss": 0.5489, "rewards/accuracies": 0.75, "rewards/chosen": -0.676582396030426, "rewards/margins": 0.9621816873550415, "rewards/rejected": -1.6387641429901123, "step": 394 }, { "epoch": 0.68, "learning_rate": 3.776290630975143e-07, "logits/chosen": -1.8358805179595947, "logits/rejected": -1.5623016357421875, "logps/chosen": -99.38089752197266, "logps/rejected": -91.54934692382812, "loss": 0.5804, "rewards/accuracies": 0.25, "rewards/chosen": -0.35509777069091797, "rewards/margins": 0.20224237442016602, "rewards/rejected": -0.557340145111084, "step": 395 }, { "epoch": 0.68, "learning_rate": 3.78585086042065e-07, "logits/chosen": -1.5555155277252197, "logits/rejected": -2.2361884117126465, "logps/chosen": -68.70211791992188, "logps/rejected": -132.138671875, "loss": 0.598, "rewards/accuracies": 1.0, "rewards/chosen": 0.6866505146026611, "rewards/margins": 2.1358160972595215, "rewards/rejected": -1.4491655826568604, "step": 396 }, { "epoch": 0.68, "learning_rate": 3.795411089866157e-07, "logits/chosen": -2.0016868114471436, "logits/rejected": -1.9078502655029297, "logps/chosen": -90.0107650756836, "logps/rejected": -87.20996856689453, "loss": 0.5217, "rewards/accuracies": 1.0, "rewards/chosen": -0.5560691952705383, "rewards/margins": 0.2947368919849396, "rewards/rejected": -0.8508061170578003, "step": 397 }, { "epoch": 0.69, "learning_rate": 3.804971319311663e-07, "logits/chosen": -1.8147664070129395, "logits/rejected": -2.193586587905884, "logps/chosen": -69.20638275146484, "logps/rejected": -94.00411987304688, "loss": 0.5284, "rewards/accuracies": 0.75, "rewards/chosen": 0.30053824186325073, "rewards/margins": 1.7911425828933716, "rewards/rejected": -1.4906044006347656, "step": 398 }, { "epoch": 0.69, "learning_rate": 3.81453154875717e-07, "logits/chosen": -2.1277525424957275, "logits/rejected": -2.2066996097564697, "logps/chosen": -82.46968841552734, "logps/rejected": -90.44571685791016, "loss": 0.5624, "rewards/accuracies": 0.75, "rewards/chosen": -0.23407095670700073, "rewards/margins": 0.7960740923881531, "rewards/rejected": -1.0301450490951538, "step": 399 }, { "epoch": 0.69, "learning_rate": 3.8240917782026763e-07, "logits/chosen": -1.88511061668396, "logits/rejected": -1.8553645610809326, "logps/chosen": -83.56295776367188, "logps/rejected": -78.34930419921875, "loss": 0.5069, "rewards/accuracies": 0.5, "rewards/chosen": 0.14907056093215942, "rewards/margins": 0.8604952096939087, "rewards/rejected": -0.711424708366394, "step": 400 }, { "epoch": 0.69, "learning_rate": 3.833652007648183e-07, "logits/chosen": -2.0490269660949707, "logits/rejected": -1.9258902072906494, "logps/chosen": -85.45075988769531, "logps/rejected": -74.25480651855469, "loss": 0.3881, "rewards/accuracies": 0.75, "rewards/chosen": 0.31002864241600037, "rewards/margins": 0.7337995767593384, "rewards/rejected": -0.4237709045410156, "step": 401 }, { "epoch": 0.69, "learning_rate": 3.84321223709369e-07, "logits/chosen": -1.9489113092422485, "logits/rejected": -2.2108254432678223, "logps/chosen": -84.43872833251953, "logps/rejected": -87.53781127929688, "loss": 0.5605, "rewards/accuracies": 0.5, "rewards/chosen": 0.26932352781295776, "rewards/margins": 0.6071564555168152, "rewards/rejected": -0.33783286809921265, "step": 402 }, { "epoch": 0.69, "learning_rate": 3.852772466539197e-07, "logits/chosen": -2.099208354949951, "logits/rejected": -2.012924909591675, "logps/chosen": -84.80934143066406, "logps/rejected": -101.74081420898438, "loss": 0.4311, "rewards/accuracies": 1.0, "rewards/chosen": 0.02439597249031067, "rewards/margins": 1.4238396883010864, "rewards/rejected": -1.3994438648223877, "step": 403 }, { "epoch": 0.7, "learning_rate": 3.8623326959847037e-07, "logits/chosen": -2.1325430870056152, "logits/rejected": -1.5952095985412598, "logps/chosen": -85.00932312011719, "logps/rejected": -71.47555541992188, "loss": 0.5053, "rewards/accuracies": 0.75, "rewards/chosen": 0.11436787992715836, "rewards/margins": 0.835562527179718, "rewards/rejected": -0.7211946845054626, "step": 404 }, { "epoch": 0.7, "learning_rate": 3.87189292543021e-07, "logits/chosen": -1.9219385385513306, "logits/rejected": -2.1258327960968018, "logps/chosen": -77.24634552001953, "logps/rejected": -84.593994140625, "loss": 0.5302, "rewards/accuracies": 0.5, "rewards/chosen": -0.15927192568778992, "rewards/margins": 0.27602139115333557, "rewards/rejected": -0.4352933168411255, "step": 405 }, { "epoch": 0.7, "learning_rate": 3.881453154875717e-07, "logits/chosen": -2.00365948677063, "logits/rejected": -2.1549153327941895, "logps/chosen": -89.76176452636719, "logps/rejected": -115.08628845214844, "loss": 0.3394, "rewards/accuracies": 1.0, "rewards/chosen": 0.34346771240234375, "rewards/margins": 2.0554497241973877, "rewards/rejected": -1.711982011795044, "step": 406 }, { "epoch": 0.7, "learning_rate": 3.891013384321223e-07, "logits/chosen": -1.862933874130249, "logits/rejected": -2.24015474319458, "logps/chosen": -72.96148681640625, "logps/rejected": -96.06169891357422, "loss": 0.6594, "rewards/accuracies": 0.25, "rewards/chosen": -0.6041862964630127, "rewards/margins": -0.37242984771728516, "rewards/rejected": -0.23175641894340515, "step": 407 }, { "epoch": 0.7, "learning_rate": 3.90057361376673e-07, "logits/chosen": -1.958115816116333, "logits/rejected": -1.9294579029083252, "logps/chosen": -75.85946655273438, "logps/rejected": -103.18534851074219, "loss": 0.4908, "rewards/accuracies": 0.75, "rewards/chosen": 0.27631017565727234, "rewards/margins": 1.9048069715499878, "rewards/rejected": -1.628496766090393, "step": 408 }, { "epoch": 0.7, "learning_rate": 3.910133843212237e-07, "logits/chosen": -2.1231682300567627, "logits/rejected": -2.3287527561187744, "logps/chosen": -79.00687408447266, "logps/rejected": -111.44580078125, "loss": 0.5125, "rewards/accuracies": 1.0, "rewards/chosen": 0.24071750044822693, "rewards/margins": 2.0038721561431885, "rewards/rejected": -1.7631547451019287, "step": 409 }, { "epoch": 0.71, "learning_rate": 3.919694072657744e-07, "logits/chosen": -2.0843214988708496, "logits/rejected": -1.9043755531311035, "logps/chosen": -81.5389404296875, "logps/rejected": -93.06048583984375, "loss": 0.5115, "rewards/accuracies": 0.75, "rewards/chosen": 0.20330582559108734, "rewards/margins": 0.4235738515853882, "rewards/rejected": -0.22026804089546204, "step": 410 }, { "epoch": 0.71, "learning_rate": 3.9292543021032506e-07, "logits/chosen": -1.8833608627319336, "logits/rejected": -1.9893218278884888, "logps/chosen": -86.58349609375, "logps/rejected": -85.6672134399414, "loss": 0.5062, "rewards/accuracies": 0.75, "rewards/chosen": 0.42509377002716064, "rewards/margins": 0.3894140124320984, "rewards/rejected": 0.035679712891578674, "step": 411 }, { "epoch": 0.71, "learning_rate": 3.938814531548757e-07, "logits/chosen": -2.199256420135498, "logits/rejected": -2.094923973083496, "logps/chosen": -77.32113647460938, "logps/rejected": -75.62203979492188, "loss": 0.5666, "rewards/accuracies": 1.0, "rewards/chosen": -0.1999671757221222, "rewards/margins": 0.7526119947433472, "rewards/rejected": -0.952579140663147, "step": 412 }, { "epoch": 0.71, "learning_rate": 3.948374760994264e-07, "logits/chosen": -1.9330986738204956, "logits/rejected": -1.8056713342666626, "logps/chosen": -76.98432922363281, "logps/rejected": -92.28449249267578, "loss": 0.509, "rewards/accuracies": 0.75, "rewards/chosen": 0.37471750378608704, "rewards/margins": 1.5180143117904663, "rewards/rejected": -1.1432968378067017, "step": 413 }, { "epoch": 0.71, "learning_rate": 3.95793499043977e-07, "logits/chosen": -1.7396366596221924, "logits/rejected": -2.0925610065460205, "logps/chosen": -68.75911712646484, "logps/rejected": -96.8015365600586, "loss": 0.5474, "rewards/accuracies": 1.0, "rewards/chosen": 0.46828246116638184, "rewards/margins": 1.2559915781021118, "rewards/rejected": -0.7877092361450195, "step": 414 }, { "epoch": 0.71, "learning_rate": 3.967495219885277e-07, "logits/chosen": -1.7976939678192139, "logits/rejected": -2.2535014152526855, "logps/chosen": -65.78398132324219, "logps/rejected": -82.50791931152344, "loss": 0.4739, "rewards/accuracies": 0.75, "rewards/chosen": 0.32085534930229187, "rewards/margins": 0.3840891718864441, "rewards/rejected": -0.06323385238647461, "step": 415 }, { "epoch": 0.72, "learning_rate": 3.977055449330784e-07, "logits/chosen": -2.0927579402923584, "logits/rejected": -2.118669271469116, "logps/chosen": -96.2884521484375, "logps/rejected": -105.69564819335938, "loss": 0.5275, "rewards/accuracies": 0.5, "rewards/chosen": -1.0156102180480957, "rewards/margins": 0.911209225654602, "rewards/rejected": -1.9268193244934082, "step": 416 }, { "epoch": 0.72, "learning_rate": 3.9866156787762907e-07, "logits/chosen": -1.8962526321411133, "logits/rejected": -2.0300726890563965, "logps/chosen": -71.49971008300781, "logps/rejected": -89.66744995117188, "loss": 0.6579, "rewards/accuracies": 0.75, "rewards/chosen": -0.25746095180511475, "rewards/margins": 1.11050283908844, "rewards/rejected": -1.3679637908935547, "step": 417 }, { "epoch": 0.72, "learning_rate": 3.996175908221797e-07, "logits/chosen": -1.8933688402175903, "logits/rejected": -2.059349775314331, "logps/chosen": -101.18995666503906, "logps/rejected": -127.56741333007812, "loss": 0.3434, "rewards/accuracies": 0.75, "rewards/chosen": -0.7547554969787598, "rewards/margins": 0.6745134592056274, "rewards/rejected": -1.4292690753936768, "step": 418 }, { "epoch": 0.72, "learning_rate": 4.005736137667304e-07, "logits/chosen": -1.7263929843902588, "logits/rejected": -2.1868162155151367, "logps/chosen": -108.04374694824219, "logps/rejected": -121.62445831298828, "loss": 0.4199, "rewards/accuracies": 1.0, "rewards/chosen": -0.2813466191291809, "rewards/margins": 0.4657605290412903, "rewards/rejected": -0.7471071481704712, "step": 419 }, { "epoch": 0.72, "learning_rate": 4.01529636711281e-07, "logits/chosen": -2.0920331478118896, "logits/rejected": -1.979729175567627, "logps/chosen": -101.22381591796875, "logps/rejected": -97.717041015625, "loss": 0.3809, "rewards/accuracies": 1.0, "rewards/chosen": -0.03431786596775055, "rewards/margins": 1.4270968437194824, "rewards/rejected": -1.4614146947860718, "step": 420 }, { "epoch": 0.72, "learning_rate": 4.024856596558317e-07, "logits/chosen": -1.997711420059204, "logits/rejected": -1.9042139053344727, "logps/chosen": -64.72225952148438, "logps/rejected": -97.0563735961914, "loss": 0.4643, "rewards/accuracies": 1.0, "rewards/chosen": -0.151325523853302, "rewards/margins": 2.7416446208953857, "rewards/rejected": -2.892970323562622, "step": 421 }, { "epoch": 0.73, "learning_rate": 4.0344168260038244e-07, "logits/chosen": -2.1232478618621826, "logits/rejected": -2.0503509044647217, "logps/chosen": -103.99455261230469, "logps/rejected": -92.69593811035156, "loss": 0.579, "rewards/accuracies": 0.25, "rewards/chosen": -1.176287293434143, "rewards/margins": -0.0987296998500824, "rewards/rejected": -1.0775575637817383, "step": 422 }, { "epoch": 0.73, "learning_rate": 4.0439770554493307e-07, "logits/chosen": -2.0645828247070312, "logits/rejected": -1.8563640117645264, "logps/chosen": -66.01776123046875, "logps/rejected": -86.06050109863281, "loss": 0.6477, "rewards/accuracies": 0.75, "rewards/chosen": -0.65426105260849, "rewards/margins": 1.7117856740951538, "rewards/rejected": -2.366046905517578, "step": 423 }, { "epoch": 0.73, "learning_rate": 4.0535372848948375e-07, "logits/chosen": -1.9682904481887817, "logits/rejected": -1.7871575355529785, "logps/chosen": -92.89714050292969, "logps/rejected": -83.46792602539062, "loss": 0.5223, "rewards/accuracies": 1.0, "rewards/chosen": -0.706946611404419, "rewards/margins": 0.23533384501934052, "rewards/rejected": -0.9422804117202759, "step": 424 }, { "epoch": 0.73, "learning_rate": 4.063097514340344e-07, "logits/chosen": -2.039886951446533, "logits/rejected": -2.0080461502075195, "logps/chosen": -100.22743225097656, "logps/rejected": -105.08711242675781, "loss": 0.519, "rewards/accuracies": 0.75, "rewards/chosen": -0.9273182153701782, "rewards/margins": 0.4403482675552368, "rewards/rejected": -1.367666482925415, "step": 425 }, { "epoch": 0.73, "learning_rate": 4.0726577437858507e-07, "logits/chosen": -1.7477102279663086, "logits/rejected": -2.2027807235717773, "logps/chosen": -105.26766967773438, "logps/rejected": -149.78076171875, "loss": 0.4524, "rewards/accuracies": 1.0, "rewards/chosen": -0.9142282009124756, "rewards/margins": 1.3608075380325317, "rewards/rejected": -2.2750356197357178, "step": 426 }, { "epoch": 0.73, "learning_rate": 4.082217973231357e-07, "logits/chosen": -2.231557607650757, "logits/rejected": -1.9937869310379028, "logps/chosen": -98.8597412109375, "logps/rejected": -91.13230895996094, "loss": 0.4757, "rewards/accuracies": 0.5, "rewards/chosen": -1.3854682445526123, "rewards/margins": -0.3308557868003845, "rewards/rejected": -1.0546125173568726, "step": 427 }, { "epoch": 0.74, "learning_rate": 4.091778202676864e-07, "logits/chosen": -2.177767276763916, "logits/rejected": -1.902442455291748, "logps/chosen": -92.49605560302734, "logps/rejected": -89.54228210449219, "loss": 0.6057, "rewards/accuracies": 0.75, "rewards/chosen": -0.053266093134880066, "rewards/margins": 1.278584599494934, "rewards/rejected": -1.3318507671356201, "step": 428 }, { "epoch": 0.74, "learning_rate": 4.1013384321223707e-07, "logits/chosen": -2.184920310974121, "logits/rejected": -2.057225227355957, "logps/chosen": -105.63961791992188, "logps/rejected": -129.60751342773438, "loss": 0.4629, "rewards/accuracies": 1.0, "rewards/chosen": -0.41551631689071655, "rewards/margins": 0.807691216468811, "rewards/rejected": -1.2232074737548828, "step": 429 }, { "epoch": 0.74, "learning_rate": 4.1108986615678776e-07, "logits/chosen": -1.820285439491272, "logits/rejected": -2.0209450721740723, "logps/chosen": -62.75083541870117, "logps/rejected": -107.80796813964844, "loss": 0.3638, "rewards/accuracies": 1.0, "rewards/chosen": 0.3692295253276825, "rewards/margins": 2.8592517375946045, "rewards/rejected": -2.4900221824645996, "step": 430 }, { "epoch": 0.74, "learning_rate": 4.1204588910133844e-07, "logits/chosen": -1.943206548690796, "logits/rejected": -2.0795888900756836, "logps/chosen": -106.69209289550781, "logps/rejected": -118.62907409667969, "loss": 0.3578, "rewards/accuracies": 0.75, "rewards/chosen": -0.6467441916465759, "rewards/margins": 0.8388903141021729, "rewards/rejected": -1.4856345653533936, "step": 431 }, { "epoch": 0.74, "learning_rate": 4.130019120458891e-07, "logits/chosen": -2.213779926300049, "logits/rejected": -2.2004175186157227, "logps/chosen": -99.67860412597656, "logps/rejected": -122.64178466796875, "loss": 0.4442, "rewards/accuracies": 1.0, "rewards/chosen": -0.733655571937561, "rewards/margins": 1.1134674549102783, "rewards/rejected": -1.8471230268478394, "step": 432 }, { "epoch": 0.75, "learning_rate": 4.1395793499043976e-07, "logits/chosen": -2.2160391807556152, "logits/rejected": -2.122184991836548, "logps/chosen": -99.88720703125, "logps/rejected": -92.94425964355469, "loss": 0.4013, "rewards/accuracies": 0.75, "rewards/chosen": -0.11437244713306427, "rewards/margins": 0.5825042724609375, "rewards/rejected": -0.6968767642974854, "step": 433 }, { "epoch": 0.75, "learning_rate": 4.149139579349904e-07, "logits/chosen": -1.973140835762024, "logits/rejected": -2.2388529777526855, "logps/chosen": -96.58981323242188, "logps/rejected": -141.84970092773438, "loss": 0.4428, "rewards/accuracies": 1.0, "rewards/chosen": -0.3178423047065735, "rewards/margins": 2.2872180938720703, "rewards/rejected": -2.60506010055542, "step": 434 }, { "epoch": 0.75, "learning_rate": 4.1586998087954113e-07, "logits/chosen": -1.28702974319458, "logits/rejected": -2.159345865249634, "logps/chosen": -85.59073638916016, "logps/rejected": -135.04493713378906, "loss": 0.4392, "rewards/accuracies": 1.0, "rewards/chosen": -0.1583368331193924, "rewards/margins": 0.511335015296936, "rewards/rejected": -0.6696718335151672, "step": 435 }, { "epoch": 0.75, "learning_rate": 4.1682600382409176e-07, "logits/chosen": -1.9956692457199097, "logits/rejected": -2.023143768310547, "logps/chosen": -98.91839599609375, "logps/rejected": -113.70886993408203, "loss": 0.4429, "rewards/accuracies": 0.75, "rewards/chosen": -0.4581489562988281, "rewards/margins": 1.6282360553741455, "rewards/rejected": -2.0863850116729736, "step": 436 }, { "epoch": 0.75, "learning_rate": 4.1778202676864245e-07, "logits/chosen": -2.0436365604400635, "logits/rejected": -2.194744110107422, "logps/chosen": -82.62097930908203, "logps/rejected": -94.5543441772461, "loss": 0.4334, "rewards/accuracies": 1.0, "rewards/chosen": 0.1131214126944542, "rewards/margins": 2.000152587890625, "rewards/rejected": -1.8870309591293335, "step": 437 }, { "epoch": 0.75, "learning_rate": 4.187380497131931e-07, "logits/chosen": -2.178401470184326, "logits/rejected": -2.090911388397217, "logps/chosen": -110.12999725341797, "logps/rejected": -98.47534942626953, "loss": 0.4363, "rewards/accuracies": 0.5, "rewards/chosen": -1.0122965574264526, "rewards/margins": 0.25412559509277344, "rewards/rejected": -1.2664222717285156, "step": 438 }, { "epoch": 0.76, "learning_rate": 4.1969407265774376e-07, "logits/chosen": -2.048794984817505, "logits/rejected": -2.167720317840576, "logps/chosen": -95.89297485351562, "logps/rejected": -104.74101257324219, "loss": 0.4722, "rewards/accuracies": 0.75, "rewards/chosen": -0.6726093292236328, "rewards/margins": 1.5382239818572998, "rewards/rejected": -2.2108333110809326, "step": 439 }, { "epoch": 0.76, "learning_rate": 4.206500956022944e-07, "logits/chosen": -1.9972327947616577, "logits/rejected": -2.015479564666748, "logps/chosen": -93.08164978027344, "logps/rejected": -151.28001403808594, "loss": 0.3324, "rewards/accuracies": 1.0, "rewards/chosen": -0.416927695274353, "rewards/margins": 3.053546667098999, "rewards/rejected": -3.4704742431640625, "step": 440 }, { "epoch": 0.76, "learning_rate": 4.216061185468451e-07, "logits/chosen": -2.1857693195343018, "logits/rejected": -1.9055683612823486, "logps/chosen": -99.98423767089844, "logps/rejected": -99.67942810058594, "loss": 0.4993, "rewards/accuracies": 0.5, "rewards/chosen": -0.4909875988960266, "rewards/margins": 1.244629979133606, "rewards/rejected": -1.7356176376342773, "step": 441 }, { "epoch": 0.76, "learning_rate": 4.225621414913958e-07, "logits/chosen": -1.9576871395111084, "logits/rejected": -1.743283987045288, "logps/chosen": -92.19873809814453, "logps/rejected": -97.76544952392578, "loss": 0.525, "rewards/accuracies": 0.75, "rewards/chosen": -0.6223770380020142, "rewards/margins": 1.9478238821029663, "rewards/rejected": -2.5702009201049805, "step": 442 }, { "epoch": 0.76, "learning_rate": 4.2351816443594645e-07, "logits/chosen": -2.197498321533203, "logits/rejected": -2.060741901397705, "logps/chosen": -115.80825805664062, "logps/rejected": -99.9216537475586, "loss": 0.7328, "rewards/accuracies": 0.75, "rewards/chosen": -1.1792484521865845, "rewards/margins": 0.30511152744293213, "rewards/rejected": -1.4843599796295166, "step": 443 }, { "epoch": 0.76, "learning_rate": 4.2447418738049714e-07, "logits/chosen": -1.6416321992874146, "logits/rejected": -2.1915106773376465, "logps/chosen": -79.3738784790039, "logps/rejected": -104.60935974121094, "loss": 0.5527, "rewards/accuracies": 0.75, "rewards/chosen": 0.057284727692604065, "rewards/margins": 0.1908218413591385, "rewards/rejected": -0.13353709876537323, "step": 444 }, { "epoch": 0.77, "learning_rate": 4.2543021032504777e-07, "logits/chosen": -2.0758471488952637, "logits/rejected": -1.941046953201294, "logps/chosen": -71.59370422363281, "logps/rejected": -83.5372085571289, "loss": 0.4086, "rewards/accuracies": 0.75, "rewards/chosen": -0.3182465434074402, "rewards/margins": 0.15038557350635529, "rewards/rejected": -0.46863213181495667, "step": 445 }, { "epoch": 0.77, "learning_rate": 4.2638623326959845e-07, "logits/chosen": -2.040626049041748, "logits/rejected": -2.0858683586120605, "logps/chosen": -82.82301330566406, "logps/rejected": -105.62904357910156, "loss": 0.4845, "rewards/accuracies": 0.5, "rewards/chosen": -0.7993770837783813, "rewards/margins": 1.1750547885894775, "rewards/rejected": -1.9744317531585693, "step": 446 }, { "epoch": 0.77, "learning_rate": 4.273422562141491e-07, "logits/chosen": -2.095014810562134, "logits/rejected": -1.8711289167404175, "logps/chosen": -92.97044372558594, "logps/rejected": -102.30496215820312, "loss": 0.419, "rewards/accuracies": 0.75, "rewards/chosen": -0.23058776557445526, "rewards/margins": 1.560053825378418, "rewards/rejected": -1.7906416654586792, "step": 447 }, { "epoch": 0.77, "learning_rate": 4.282982791586998e-07, "logits/chosen": -2.1566789150238037, "logits/rejected": -2.0421791076660156, "logps/chosen": -79.42460632324219, "logps/rejected": -106.57754516601562, "loss": 0.482, "rewards/accuracies": 0.75, "rewards/chosen": 0.47516947984695435, "rewards/margins": 2.0573339462280273, "rewards/rejected": -1.5821646451950073, "step": 448 }, { "epoch": 0.77, "learning_rate": 4.2925430210325045e-07, "logits/chosen": -1.7163150310516357, "logits/rejected": -1.799457311630249, "logps/chosen": -82.57752990722656, "logps/rejected": -90.38603210449219, "loss": 0.587, "rewards/accuracies": 0.5, "rewards/chosen": -0.1078132838010788, "rewards/margins": 0.8525378704071045, "rewards/rejected": -0.9603511691093445, "step": 449 }, { "epoch": 0.77, "learning_rate": 4.3021032504780114e-07, "logits/chosen": -1.8552589416503906, "logits/rejected": -2.101518392562866, "logps/chosen": -80.1647720336914, "logps/rejected": -98.4581527709961, "loss": 0.5001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3213930130004883, "rewards/margins": 1.4074956178665161, "rewards/rejected": -1.7288886308670044, "step": 450 }, { "epoch": 0.78, "learning_rate": 4.3116634799235177e-07, "logits/chosen": -2.1002821922302246, "logits/rejected": -1.9571375846862793, "logps/chosen": -96.18251037597656, "logps/rejected": -94.10652923583984, "loss": 0.5186, "rewards/accuracies": 0.75, "rewards/chosen": -0.38214319944381714, "rewards/margins": 0.9119913578033447, "rewards/rejected": -1.2941346168518066, "step": 451 }, { "epoch": 0.78, "learning_rate": 4.3212237093690246e-07, "logits/chosen": -1.9199323654174805, "logits/rejected": -2.050659656524658, "logps/chosen": -82.71170806884766, "logps/rejected": -82.38214111328125, "loss": 0.5828, "rewards/accuracies": 0.5, "rewards/chosen": -1.2858004570007324, "rewards/margins": 0.23890548944473267, "rewards/rejected": -1.5247058868408203, "step": 452 }, { "epoch": 0.78, "learning_rate": 4.3307839388145314e-07, "logits/chosen": -1.9139939546585083, "logits/rejected": -2.1063807010650635, "logps/chosen": -91.14439392089844, "logps/rejected": -100.2704086303711, "loss": 0.3874, "rewards/accuracies": 1.0, "rewards/chosen": 0.007130414247512817, "rewards/margins": 1.2716600894927979, "rewards/rejected": -1.2645295858383179, "step": 453 }, { "epoch": 0.78, "learning_rate": 4.340344168260038e-07, "logits/chosen": -1.9287214279174805, "logits/rejected": -2.141630172729492, "logps/chosen": -92.84176635742188, "logps/rejected": -105.236328125, "loss": 0.4153, "rewards/accuracies": 1.0, "rewards/chosen": 0.14833098649978638, "rewards/margins": 1.1570922136306763, "rewards/rejected": -1.0087611675262451, "step": 454 }, { "epoch": 0.78, "learning_rate": 4.349904397705545e-07, "logits/chosen": -1.893625259399414, "logits/rejected": -2.060704231262207, "logps/chosen": -88.24596405029297, "logps/rejected": -106.63580322265625, "loss": 0.3371, "rewards/accuracies": 1.0, "rewards/chosen": -0.08817215263843536, "rewards/margins": 1.6904964447021484, "rewards/rejected": -1.7786686420440674, "step": 455 }, { "epoch": 0.78, "learning_rate": 4.3594646271510514e-07, "logits/chosen": -1.9221243858337402, "logits/rejected": -2.0551934242248535, "logps/chosen": -79.08259582519531, "logps/rejected": -118.86749267578125, "loss": 0.4708, "rewards/accuracies": 1.0, "rewards/chosen": -0.1359502673149109, "rewards/margins": 1.8745547533035278, "rewards/rejected": -2.010504961013794, "step": 456 }, { "epoch": 0.79, "learning_rate": 4.3690248565965583e-07, "logits/chosen": -2.0148282051086426, "logits/rejected": -2.1970157623291016, "logps/chosen": -71.40801239013672, "logps/rejected": -88.95671844482422, "loss": 0.4362, "rewards/accuracies": 1.0, "rewards/chosen": 0.23150750994682312, "rewards/margins": 2.649721145629883, "rewards/rejected": -2.418213367462158, "step": 457 }, { "epoch": 0.79, "learning_rate": 4.3785850860420646e-07, "logits/chosen": -1.6732053756713867, "logits/rejected": -2.2636959552764893, "logps/chosen": -65.49894714355469, "logps/rejected": -110.64321899414062, "loss": 0.4966, "rewards/accuracies": 1.0, "rewards/chosen": -0.029141433537006378, "rewards/margins": 3.1077420711517334, "rewards/rejected": -3.1368837356567383, "step": 458 }, { "epoch": 0.79, "learning_rate": 4.3881453154875715e-07, "logits/chosen": -1.8314454555511475, "logits/rejected": -2.1342086791992188, "logps/chosen": -81.9537353515625, "logps/rejected": -109.765380859375, "loss": 0.3768, "rewards/accuracies": 1.0, "rewards/chosen": 0.09020861983299255, "rewards/margins": 1.5635747909545898, "rewards/rejected": -1.473366141319275, "step": 459 }, { "epoch": 0.79, "learning_rate": 4.397705544933078e-07, "logits/chosen": -1.9356434345245361, "logits/rejected": -1.6330760717391968, "logps/chosen": -105.02009582519531, "logps/rejected": -101.28875732421875, "loss": 0.5544, "rewards/accuracies": 0.75, "rewards/chosen": -1.283280611038208, "rewards/margins": 0.6338448524475098, "rewards/rejected": -1.9171254634857178, "step": 460 }, { "epoch": 0.79, "learning_rate": 4.407265774378585e-07, "logits/chosen": -2.1072800159454346, "logits/rejected": -2.0450055599212646, "logps/chosen": -79.18347930908203, "logps/rejected": -98.34024047851562, "loss": 0.5409, "rewards/accuracies": 1.0, "rewards/chosen": -0.4071239233016968, "rewards/margins": 1.0824823379516602, "rewards/rejected": -1.4896061420440674, "step": 461 }, { "epoch": 0.8, "learning_rate": 4.416826003824092e-07, "logits/chosen": -1.6495602130889893, "logits/rejected": -2.231163501739502, "logps/chosen": -86.44140625, "logps/rejected": -114.60507202148438, "loss": 0.5392, "rewards/accuracies": 1.0, "rewards/chosen": -0.08658008277416229, "rewards/margins": 1.2947603464126587, "rewards/rejected": -1.3813403844833374, "step": 462 }, { "epoch": 0.8, "learning_rate": 4.4263862332695983e-07, "logits/chosen": -2.0168044567108154, "logits/rejected": -1.9956128597259521, "logps/chosen": -79.48843383789062, "logps/rejected": -80.74072265625, "loss": 0.4246, "rewards/accuracies": 0.5, "rewards/chosen": -0.13192777335643768, "rewards/margins": 0.08980438113212585, "rewards/rejected": -0.22173213958740234, "step": 463 }, { "epoch": 0.8, "learning_rate": 4.435946462715105e-07, "logits/chosen": -2.077343463897705, "logits/rejected": -2.2353529930114746, "logps/chosen": -76.78817749023438, "logps/rejected": -101.27438354492188, "loss": 0.4256, "rewards/accuracies": 1.0, "rewards/chosen": -0.6054031848907471, "rewards/margins": 1.2888970375061035, "rewards/rejected": -1.8943002223968506, "step": 464 }, { "epoch": 0.8, "learning_rate": 4.4455066921606115e-07, "logits/chosen": -2.2633283138275146, "logits/rejected": -1.7611548900604248, "logps/chosen": -108.36682891845703, "logps/rejected": -103.72915649414062, "loss": 0.3031, "rewards/accuracies": 0.75, "rewards/chosen": -0.17198297381401062, "rewards/margins": 1.7649656534194946, "rewards/rejected": -1.9369486570358276, "step": 465 }, { "epoch": 0.8, "learning_rate": 4.4550669216061183e-07, "logits/chosen": -1.9629030227661133, "logits/rejected": -2.334906578063965, "logps/chosen": -79.77656555175781, "logps/rejected": -116.47685241699219, "loss": 0.4272, "rewards/accuracies": 1.0, "rewards/chosen": 0.48861539363861084, "rewards/margins": 1.8079229593276978, "rewards/rejected": -1.319307565689087, "step": 466 }, { "epoch": 0.8, "learning_rate": 4.4646271510516247e-07, "logits/chosen": -2.113650321960449, "logits/rejected": -1.8392785787582397, "logps/chosen": -86.65052032470703, "logps/rejected": -98.86720275878906, "loss": 0.5694, "rewards/accuracies": 0.75, "rewards/chosen": 0.04362659901380539, "rewards/margins": 1.7324862480163574, "rewards/rejected": -1.6888595819473267, "step": 467 }, { "epoch": 0.81, "learning_rate": 4.474187380497132e-07, "logits/chosen": -1.8977842330932617, "logits/rejected": -2.095889091491699, "logps/chosen": -87.13970947265625, "logps/rejected": -100.94108581542969, "loss": 0.35, "rewards/accuracies": 0.75, "rewards/chosen": -0.45018404722213745, "rewards/margins": 0.7779535055160522, "rewards/rejected": -1.2281376123428345, "step": 468 }, { "epoch": 0.81, "learning_rate": 4.4837476099426384e-07, "logits/chosen": -1.6438469886779785, "logits/rejected": -2.1874327659606934, "logps/chosen": -63.620121002197266, "logps/rejected": -128.3905029296875, "loss": 0.4167, "rewards/accuracies": 1.0, "rewards/chosen": 0.5524696111679077, "rewards/margins": 3.5747158527374268, "rewards/rejected": -3.0222458839416504, "step": 469 }, { "epoch": 0.81, "learning_rate": 4.493307839388145e-07, "logits/chosen": -2.0269227027893066, "logits/rejected": -2.2609782218933105, "logps/chosen": -78.49888610839844, "logps/rejected": -100.23502349853516, "loss": 0.4786, "rewards/accuracies": 1.0, "rewards/chosen": 0.3772563934326172, "rewards/margins": 1.4681098461151123, "rewards/rejected": -1.0908534526824951, "step": 470 }, { "epoch": 0.81, "learning_rate": 4.5028680688336515e-07, "logits/chosen": -2.027249574661255, "logits/rejected": -2.1682446002960205, "logps/chosen": -83.91582489013672, "logps/rejected": -91.42035675048828, "loss": 0.3718, "rewards/accuracies": 0.5, "rewards/chosen": -0.28102684020996094, "rewards/margins": 0.7330994606018066, "rewards/rejected": -1.0141263008117676, "step": 471 }, { "epoch": 0.81, "learning_rate": 4.5124282982791584e-07, "logits/chosen": -1.819584608078003, "logits/rejected": -1.993252158164978, "logps/chosen": -63.4788703918457, "logps/rejected": -108.6789321899414, "loss": 0.4413, "rewards/accuracies": 0.75, "rewards/chosen": 1.0053629875183105, "rewards/margins": 2.685763120651245, "rewards/rejected": -1.6804002523422241, "step": 472 }, { "epoch": 0.81, "learning_rate": 4.521988527724665e-07, "logits/chosen": -1.855224609375, "logits/rejected": -2.007587432861328, "logps/chosen": -68.20806884765625, "logps/rejected": -100.51902770996094, "loss": 0.5843, "rewards/accuracies": 0.75, "rewards/chosen": -0.15050765872001648, "rewards/margins": 2.3860864639282227, "rewards/rejected": -2.5365941524505615, "step": 473 }, { "epoch": 0.82, "learning_rate": 4.531548757170172e-07, "logits/chosen": -2.118403434753418, "logits/rejected": -2.030034303665161, "logps/chosen": -80.5394515991211, "logps/rejected": -109.28509521484375, "loss": 0.3315, "rewards/accuracies": 1.0, "rewards/chosen": 0.20996743440628052, "rewards/margins": 1.9718899726867676, "rewards/rejected": -1.7619224786758423, "step": 474 }, { "epoch": 0.82, "learning_rate": 4.541108986615679e-07, "logits/chosen": -2.2409496307373047, "logits/rejected": -2.0865185260772705, "logps/chosen": -109.92361450195312, "logps/rejected": -108.39999389648438, "loss": 0.3622, "rewards/accuracies": 1.0, "rewards/chosen": -1.3415617942810059, "rewards/margins": 1.1620280742645264, "rewards/rejected": -2.5035898685455322, "step": 475 }, { "epoch": 0.82, "learning_rate": 4.550669216061185e-07, "logits/chosen": -2.036510944366455, "logits/rejected": -1.961535930633545, "logps/chosen": -80.98253631591797, "logps/rejected": -108.57038879394531, "loss": 0.4713, "rewards/accuracies": 0.75, "rewards/chosen": 0.028195764869451523, "rewards/margins": 1.2214632034301758, "rewards/rejected": -1.1932674646377563, "step": 476 }, { "epoch": 0.82, "learning_rate": 4.560229445506692e-07, "logits/chosen": -1.8769524097442627, "logits/rejected": -2.062847375869751, "logps/chosen": -72.43804931640625, "logps/rejected": -98.28524017333984, "loss": 0.5279, "rewards/accuracies": 0.75, "rewards/chosen": 0.06812958419322968, "rewards/margins": 0.9385296106338501, "rewards/rejected": -0.8704001307487488, "step": 477 }, { "epoch": 0.82, "learning_rate": 4.5697896749521984e-07, "logits/chosen": -2.0832252502441406, "logits/rejected": -1.802504062652588, "logps/chosen": -99.86788940429688, "logps/rejected": -82.5252685546875, "loss": 0.377, "rewards/accuracies": 0.75, "rewards/chosen": 0.09678459912538528, "rewards/margins": 1.683565378189087, "rewards/rejected": -1.5867807865142822, "step": 478 }, { "epoch": 0.82, "learning_rate": 4.5793499043977053e-07, "logits/chosen": -1.976064920425415, "logits/rejected": -2.0452709197998047, "logps/chosen": -76.70564270019531, "logps/rejected": -93.40642547607422, "loss": 0.5378, "rewards/accuracies": 0.5, "rewards/chosen": 0.46610087156295776, "rewards/margins": 0.5444414615631104, "rewards/rejected": -0.078340545296669, "step": 479 }, { "epoch": 0.83, "learning_rate": 4.5889101338432116e-07, "logits/chosen": -2.088839292526245, "logits/rejected": -2.241867780685425, "logps/chosen": -53.174346923828125, "logps/rejected": -84.32007598876953, "loss": 0.3644, "rewards/accuracies": 1.0, "rewards/chosen": 0.6930751204490662, "rewards/margins": 1.758188009262085, "rewards/rejected": -1.065112829208374, "step": 480 }, { "epoch": 0.83, "learning_rate": 4.598470363288719e-07, "logits/chosen": -1.7459218502044678, "logits/rejected": -2.159637451171875, "logps/chosen": -72.11885833740234, "logps/rejected": -113.46615600585938, "loss": 0.5927, "rewards/accuracies": 0.75, "rewards/chosen": 0.24704046547412872, "rewards/margins": 1.9517991542816162, "rewards/rejected": -1.704758882522583, "step": 481 }, { "epoch": 0.83, "learning_rate": 4.608030592734226e-07, "logits/chosen": -2.107175350189209, "logits/rejected": -1.783807635307312, "logps/chosen": -106.512939453125, "logps/rejected": -99.51609802246094, "loss": 0.5557, "rewards/accuracies": 0.75, "rewards/chosen": -0.5297170877456665, "rewards/margins": 0.5513079762458801, "rewards/rejected": -1.0810251235961914, "step": 482 }, { "epoch": 0.83, "learning_rate": 4.617590822179732e-07, "logits/chosen": -1.9545989036560059, "logits/rejected": -2.1093814373016357, "logps/chosen": -81.4981460571289, "logps/rejected": -94.41401672363281, "loss": 0.3402, "rewards/accuracies": 0.75, "rewards/chosen": 0.1338636577129364, "rewards/margins": 0.5435816049575806, "rewards/rejected": -0.4097179174423218, "step": 483 }, { "epoch": 0.83, "learning_rate": 4.627151051625239e-07, "logits/chosen": -2.2210941314697266, "logits/rejected": -1.5363261699676514, "logps/chosen": -107.56927490234375, "logps/rejected": -119.02033233642578, "loss": 0.4127, "rewards/accuracies": 1.0, "rewards/chosen": -0.012792244553565979, "rewards/margins": 2.8751401901245117, "rewards/rejected": -2.887932300567627, "step": 484 }, { "epoch": 0.83, "learning_rate": 4.6367112810707453e-07, "logits/chosen": -2.0047993659973145, "logits/rejected": -1.9632649421691895, "logps/chosen": -96.6224594116211, "logps/rejected": -92.4271011352539, "loss": 0.4396, "rewards/accuracies": 0.5, "rewards/chosen": -0.04923057556152344, "rewards/margins": 0.6712000370025635, "rewards/rejected": -0.7204306125640869, "step": 485 }, { "epoch": 0.84, "learning_rate": 4.646271510516252e-07, "logits/chosen": -2.0386266708374023, "logits/rejected": -1.921187400817871, "logps/chosen": -74.36175537109375, "logps/rejected": -91.21369171142578, "loss": 0.4007, "rewards/accuracies": 0.75, "rewards/chosen": 0.7087527513504028, "rewards/margins": 1.400216817855835, "rewards/rejected": -0.6914641261100769, "step": 486 }, { "epoch": 0.84, "learning_rate": 4.6558317399617585e-07, "logits/chosen": -1.792502760887146, "logits/rejected": -1.91475248336792, "logps/chosen": -83.0492172241211, "logps/rejected": -101.16266632080078, "loss": 0.2535, "rewards/accuracies": 0.75, "rewards/chosen": -0.3391004800796509, "rewards/margins": 0.7408661842346191, "rewards/rejected": -1.07996666431427, "step": 487 }, { "epoch": 0.84, "learning_rate": 4.665391969407266e-07, "logits/chosen": -2.1628775596618652, "logits/rejected": -2.111997127532959, "logps/chosen": -86.34194946289062, "logps/rejected": -123.68760681152344, "loss": 0.4584, "rewards/accuracies": 0.75, "rewards/chosen": 0.5159870386123657, "rewards/margins": 2.415639638900757, "rewards/rejected": -1.8996524810791016, "step": 488 }, { "epoch": 0.84, "learning_rate": 4.674952198852772e-07, "logits/chosen": -2.16182541847229, "logits/rejected": -2.1024351119995117, "logps/chosen": -80.06526184082031, "logps/rejected": -89.14448547363281, "loss": 0.4105, "rewards/accuracies": 0.75, "rewards/chosen": 0.14818403124809265, "rewards/margins": 0.5070902109146118, "rewards/rejected": -0.3589061498641968, "step": 489 }, { "epoch": 0.84, "learning_rate": 4.684512428298279e-07, "logits/chosen": -2.171192169189453, "logits/rejected": -1.5932064056396484, "logps/chosen": -81.68072509765625, "logps/rejected": -84.40038299560547, "loss": 0.5297, "rewards/accuracies": 1.0, "rewards/chosen": 0.17467767000198364, "rewards/margins": 1.2723376750946045, "rewards/rejected": -1.0976600646972656, "step": 490 }, { "epoch": 0.85, "learning_rate": 4.6940726577437853e-07, "logits/chosen": -1.9942693710327148, "logits/rejected": -1.849081039428711, "logps/chosen": -86.64482116699219, "logps/rejected": -89.1826400756836, "loss": 0.4545, "rewards/accuracies": 0.75, "rewards/chosen": 0.04124631732702255, "rewards/margins": 1.6044989824295044, "rewards/rejected": -1.5632526874542236, "step": 491 }, { "epoch": 0.85, "learning_rate": 4.703632887189292e-07, "logits/chosen": -1.7466387748718262, "logits/rejected": -2.2451329231262207, "logps/chosen": -59.39234161376953, "logps/rejected": -95.08465576171875, "loss": 0.3172, "rewards/accuracies": 1.0, "rewards/chosen": 0.5035024881362915, "rewards/margins": 2.6229941844940186, "rewards/rejected": -2.1194915771484375, "step": 492 }, { "epoch": 0.85, "learning_rate": 4.713193116634799e-07, "logits/chosen": -2.2227585315704346, "logits/rejected": -1.9833612442016602, "logps/chosen": -104.24762725830078, "logps/rejected": -91.9844970703125, "loss": 0.3804, "rewards/accuracies": 0.5, "rewards/chosen": -0.4685850143432617, "rewards/margins": 0.5834494829177856, "rewards/rejected": -1.052034616470337, "step": 493 }, { "epoch": 0.85, "learning_rate": 4.722753346080306e-07, "logits/chosen": -1.701417088508606, "logits/rejected": -2.025219202041626, "logps/chosen": -68.990966796875, "logps/rejected": -119.13179016113281, "loss": 0.4191, "rewards/accuracies": 1.0, "rewards/chosen": 0.7838778495788574, "rewards/margins": 2.9606993198394775, "rewards/rejected": -2.17682147026062, "step": 494 }, { "epoch": 0.85, "learning_rate": 4.732313575525813e-07, "logits/chosen": -2.030477285385132, "logits/rejected": -2.075101852416992, "logps/chosen": -103.52239990234375, "logps/rejected": -108.41557312011719, "loss": 0.4909, "rewards/accuracies": 0.75, "rewards/chosen": -0.6959629058837891, "rewards/margins": 1.598376750946045, "rewards/rejected": -2.294339656829834, "step": 495 }, { "epoch": 0.85, "learning_rate": 4.741873804971319e-07, "logits/chosen": -1.9535192251205444, "logits/rejected": -1.8884222507476807, "logps/chosen": -97.55509948730469, "logps/rejected": -124.44847869873047, "loss": 0.3187, "rewards/accuracies": 1.0, "rewards/chosen": 0.952404797077179, "rewards/margins": 3.985813617706299, "rewards/rejected": -3.0334088802337646, "step": 496 }, { "epoch": 0.86, "learning_rate": 4.751434034416826e-07, "logits/chosen": -2.013808488845825, "logits/rejected": -1.875051498413086, "logps/chosen": -100.45506286621094, "logps/rejected": -109.26942443847656, "loss": 0.4626, "rewards/accuracies": 0.5, "rewards/chosen": -0.7834661602973938, "rewards/margins": 0.673240065574646, "rewards/rejected": -1.4567062854766846, "step": 497 }, { "epoch": 0.86, "learning_rate": 4.760994263862332e-07, "logits/chosen": -1.8361507654190063, "logits/rejected": -2.1383004188537598, "logps/chosen": -75.01800537109375, "logps/rejected": -98.53700256347656, "loss": 0.4377, "rewards/accuracies": 1.0, "rewards/chosen": -0.8418619632720947, "rewards/margins": 1.2175360918045044, "rewards/rejected": -2.0593981742858887, "step": 498 }, { "epoch": 0.86, "learning_rate": 4.770554493307839e-07, "logits/chosen": -1.7938193082809448, "logits/rejected": -2.098747968673706, "logps/chosen": -94.5177230834961, "logps/rejected": -140.06643676757812, "loss": 0.3737, "rewards/accuracies": 1.0, "rewards/chosen": 0.15301570296287537, "rewards/margins": 1.4778523445129395, "rewards/rejected": -1.3248366117477417, "step": 499 }, { "epoch": 0.86, "learning_rate": 4.780114722753345e-07, "logits/chosen": -2.1792781352996826, "logits/rejected": -1.7944998741149902, "logps/chosen": -79.27053833007812, "logps/rejected": -85.11296844482422, "loss": 0.2305, "rewards/accuracies": 0.75, "rewards/chosen": 0.3206694722175598, "rewards/margins": 1.2935047149658203, "rewards/rejected": -0.972835123538971, "step": 500 }, { "epoch": 0.86, "learning_rate": 4.789674952198852e-07, "logits/chosen": -2.126190423965454, "logits/rejected": -1.7924888134002686, "logps/chosen": -104.59046173095703, "logps/rejected": -100.22987365722656, "loss": 0.4519, "rewards/accuracies": 0.75, "rewards/chosen": -0.6456359624862671, "rewards/margins": 1.3248257637023926, "rewards/rejected": -1.9704618453979492, "step": 501 }, { "epoch": 0.86, "learning_rate": 4.799235181644359e-07, "logits/chosen": -1.8213016986846924, "logits/rejected": -1.847627878189087, "logps/chosen": -89.55986785888672, "logps/rejected": -120.96830749511719, "loss": 0.3386, "rewards/accuracies": 1.0, "rewards/chosen": -0.10872994363307953, "rewards/margins": 3.368687152862549, "rewards/rejected": -3.4774169921875, "step": 502 }, { "epoch": 0.87, "learning_rate": 4.808795411089866e-07, "logits/chosen": -2.042241334915161, "logits/rejected": -2.0908961296081543, "logps/chosen": -93.01039123535156, "logps/rejected": -95.42607116699219, "loss": 0.6206, "rewards/accuracies": 0.25, "rewards/chosen": -1.6099467277526855, "rewards/margins": -0.9380097389221191, "rewards/rejected": -0.6719369292259216, "step": 503 }, { "epoch": 0.87, "learning_rate": 4.818355640535373e-07, "logits/chosen": -1.9883291721343994, "logits/rejected": -1.658750295639038, "logps/chosen": -96.50381469726562, "logps/rejected": -131.70506286621094, "loss": 0.4917, "rewards/accuracies": 1.0, "rewards/chosen": -0.09394264221191406, "rewards/margins": 3.473207950592041, "rewards/rejected": -3.567150592803955, "step": 504 }, { "epoch": 0.87, "learning_rate": 4.82791586998088e-07, "logits/chosen": -1.8732151985168457, "logits/rejected": -2.188380002975464, "logps/chosen": -87.05703735351562, "logps/rejected": -133.926025390625, "loss": 0.496, "rewards/accuracies": 0.75, "rewards/chosen": -0.6496174335479736, "rewards/margins": 2.438687801361084, "rewards/rejected": -3.0883049964904785, "step": 505 }, { "epoch": 0.87, "learning_rate": 4.837476099426385e-07, "logits/chosen": -2.0275838375091553, "logits/rejected": -1.9768937826156616, "logps/chosen": -88.84856414794922, "logps/rejected": -104.53934478759766, "loss": 0.2624, "rewards/accuracies": 0.75, "rewards/chosen": -0.07545891404151917, "rewards/margins": 0.9126782417297363, "rewards/rejected": -0.9881371259689331, "step": 506 }, { "epoch": 0.87, "learning_rate": 4.847036328871893e-07, "logits/chosen": -1.7372264862060547, "logits/rejected": -2.09561824798584, "logps/chosen": -69.96878051757812, "logps/rejected": -102.81889343261719, "loss": 0.4652, "rewards/accuracies": 1.0, "rewards/chosen": 0.2259359508752823, "rewards/margins": 3.290822744369507, "rewards/rejected": -3.0648865699768066, "step": 507 }, { "epoch": 0.87, "learning_rate": 4.856596558317399e-07, "logits/chosen": -2.070023775100708, "logits/rejected": -1.8771553039550781, "logps/chosen": -69.30655670166016, "logps/rejected": -86.54907989501953, "loss": 0.2895, "rewards/accuracies": 0.75, "rewards/chosen": 0.3600481152534485, "rewards/margins": 2.5081064701080322, "rewards/rejected": -2.1480584144592285, "step": 508 }, { "epoch": 0.88, "learning_rate": 4.866156787762906e-07, "logits/chosen": -2.127955675125122, "logits/rejected": -2.0783920288085938, "logps/chosen": -84.33802795410156, "logps/rejected": -104.87149810791016, "loss": 0.4864, "rewards/accuracies": 0.5, "rewards/chosen": -0.8562911152839661, "rewards/margins": 0.2854006886482239, "rewards/rejected": -1.14169180393219, "step": 509 }, { "epoch": 0.88, "learning_rate": 4.875717017208413e-07, "logits/chosen": -1.9890215396881104, "logits/rejected": -2.093502998352051, "logps/chosen": -82.60133361816406, "logps/rejected": -115.39236450195312, "loss": 0.3352, "rewards/accuracies": 1.0, "rewards/chosen": -0.4160412549972534, "rewards/margins": 1.9482604265213013, "rewards/rejected": -2.3643016815185547, "step": 510 }, { "epoch": 0.88, "learning_rate": 4.88527724665392e-07, "logits/chosen": -1.78133225440979, "logits/rejected": -2.0832881927490234, "logps/chosen": -67.5921630859375, "logps/rejected": -90.77629089355469, "loss": 0.4484, "rewards/accuracies": 0.75, "rewards/chosen": -0.019334591925144196, "rewards/margins": 0.8777174353599548, "rewards/rejected": -0.8970520496368408, "step": 511 }, { "epoch": 0.88, "learning_rate": 4.894837476099425e-07, "logits/chosen": -2.1876556873321533, "logits/rejected": -2.0048909187316895, "logps/chosen": -71.6203842163086, "logps/rejected": -84.10336303710938, "loss": 0.4981, "rewards/accuracies": 0.5, "rewards/chosen": -0.6698096990585327, "rewards/margins": 0.37574055790901184, "rewards/rejected": -1.0455502271652222, "step": 512 }, { "epoch": 0.88, "learning_rate": 4.904397705544932e-07, "logits/chosen": -2.016202688217163, "logits/rejected": -2.1924657821655273, "logps/chosen": -73.46800994873047, "logps/rejected": -96.09896850585938, "loss": 0.46, "rewards/accuracies": 0.75, "rewards/chosen": -0.439086377620697, "rewards/margins": 1.181687355041504, "rewards/rejected": -1.6207739114761353, "step": 513 }, { "epoch": 0.88, "learning_rate": 4.91395793499044e-07, "logits/chosen": -1.7832186222076416, "logits/rejected": -2.3352768421173096, "logps/chosen": -80.95560455322266, "logps/rejected": -116.858154296875, "loss": 0.5184, "rewards/accuracies": 0.75, "rewards/chosen": -0.5274121761322021, "rewards/margins": 1.9689289331436157, "rewards/rejected": -2.4963412284851074, "step": 514 }, { "epoch": 0.89, "learning_rate": 4.923518164435946e-07, "logits/chosen": -2.048816680908203, "logits/rejected": -2.1867270469665527, "logps/chosen": -92.13829803466797, "logps/rejected": -116.85456848144531, "loss": 0.4706, "rewards/accuracies": 0.75, "rewards/chosen": -0.18320807814598083, "rewards/margins": 1.9977459907531738, "rewards/rejected": -2.1809542179107666, "step": 515 }, { "epoch": 0.89, "learning_rate": 4.933078393881453e-07, "logits/chosen": -2.075291395187378, "logits/rejected": -1.7716426849365234, "logps/chosen": -90.67626953125, "logps/rejected": -87.95980834960938, "loss": 0.387, "rewards/accuracies": 0.75, "rewards/chosen": 0.004914820194244385, "rewards/margins": 1.7671759128570557, "rewards/rejected": -1.7622612714767456, "step": 516 }, { "epoch": 0.89, "learning_rate": 4.94263862332696e-07, "logits/chosen": -1.9472932815551758, "logits/rejected": -2.0906968116760254, "logps/chosen": -97.48252868652344, "logps/rejected": -110.42720031738281, "loss": 0.4651, "rewards/accuracies": 0.75, "rewards/chosen": -1.0661019086837769, "rewards/margins": 1.0113980770111084, "rewards/rejected": -2.0774998664855957, "step": 517 }, { "epoch": 0.89, "learning_rate": 4.952198852772467e-07, "logits/chosen": -2.030424118041992, "logits/rejected": -2.0882062911987305, "logps/chosen": -103.99085998535156, "logps/rejected": -104.74082946777344, "loss": 0.6388, "rewards/accuracies": 0.25, "rewards/chosen": -1.7146495580673218, "rewards/margins": -0.4814176857471466, "rewards/rejected": -1.2332319021224976, "step": 518 }, { "epoch": 0.89, "learning_rate": 4.961759082217972e-07, "logits/chosen": -1.8104066848754883, "logits/rejected": -1.6216784715652466, "logps/chosen": -95.1793212890625, "logps/rejected": -109.74293518066406, "loss": 0.4193, "rewards/accuracies": 1.0, "rewards/chosen": -0.29385054111480713, "rewards/margins": 2.083038568496704, "rewards/rejected": -2.376889228820801, "step": 519 }, { "epoch": 0.9, "learning_rate": 4.97131931166348e-07, "logits/chosen": -2.290800094604492, "logits/rejected": -2.1119165420532227, "logps/chosen": -86.11351013183594, "logps/rejected": -83.63914489746094, "loss": 0.442, "rewards/accuracies": 0.5, "rewards/chosen": -0.35472697019577026, "rewards/margins": 0.508427619934082, "rewards/rejected": -0.8631545305252075, "step": 520 }, { "epoch": 0.9, "learning_rate": 4.980879541108986e-07, "logits/chosen": -1.9581267833709717, "logits/rejected": -1.9628558158874512, "logps/chosen": -54.60065841674805, "logps/rejected": -52.674903869628906, "loss": 0.4433, "rewards/accuracies": 0.75, "rewards/chosen": -0.10729942470788956, "rewards/margins": 0.7184232473373413, "rewards/rejected": -0.8257226943969727, "step": 521 }, { "epoch": 0.9, "learning_rate": 4.990439770554493e-07, "logits/chosen": -1.9219136238098145, "logits/rejected": -2.1847872734069824, "logps/chosen": -81.87908935546875, "logps/rejected": -117.14825439453125, "loss": 0.5218, "rewards/accuracies": 1.0, "rewards/chosen": 0.44876158237457275, "rewards/margins": 2.8918535709381104, "rewards/rejected": -2.443091869354248, "step": 522 }, { "epoch": 0.9, "learning_rate": 5e-07, "logits/chosen": -2.168200731277466, "logits/rejected": -2.062551736831665, "logps/chosen": -72.93549346923828, "logps/rejected": -100.6035385131836, "loss": 0.5814, "rewards/accuracies": 1.0, "rewards/chosen": 0.19919967651367188, "rewards/margins": 2.75152587890625, "rewards/rejected": -2.552325963973999, "step": 523 }, { "epoch": 0.9, "learning_rate": 4.998937526561836e-07, "logits/chosen": -1.8334537744522095, "logits/rejected": -2.071415424346924, "logps/chosen": -82.38552856445312, "logps/rejected": -109.359375, "loss": 0.4521, "rewards/accuracies": 1.0, "rewards/chosen": 0.2455831617116928, "rewards/margins": 2.110121726989746, "rewards/rejected": -1.8645386695861816, "step": 524 }, { "epoch": 0.9, "learning_rate": 4.997875053123672e-07, "logits/chosen": -2.03682804107666, "logits/rejected": -1.9312798976898193, "logps/chosen": -85.72430419921875, "logps/rejected": -80.18260955810547, "loss": 0.4242, "rewards/accuracies": 0.5, "rewards/chosen": -0.4329044222831726, "rewards/margins": 0.5488168001174927, "rewards/rejected": -0.9817212820053101, "step": 525 }, { "epoch": 0.91, "learning_rate": 4.996812579685508e-07, "logits/chosen": -1.993467926979065, "logits/rejected": -2.215096950531006, "logps/chosen": -96.97535705566406, "logps/rejected": -130.5266876220703, "loss": 0.4974, "rewards/accuracies": 0.75, "rewards/chosen": -0.21193565428256989, "rewards/margins": 2.491032600402832, "rewards/rejected": -2.702968120574951, "step": 526 }, { "epoch": 0.91, "learning_rate": 4.995750106247344e-07, "logits/chosen": -2.134699821472168, "logits/rejected": -2.013460636138916, "logps/chosen": -78.98539733886719, "logps/rejected": -90.28670501708984, "loss": 0.4686, "rewards/accuracies": 0.75, "rewards/chosen": -0.1310095638036728, "rewards/margins": 1.1317346096038818, "rewards/rejected": -1.2627441883087158, "step": 527 }, { "epoch": 0.91, "learning_rate": 4.99468763280918e-07, "logits/chosen": -1.6911413669586182, "logits/rejected": -2.2245867252349854, "logps/chosen": -86.04072570800781, "logps/rejected": -115.04542541503906, "loss": 0.5167, "rewards/accuracies": 0.75, "rewards/chosen": 0.050777237862348557, "rewards/margins": 1.216759443283081, "rewards/rejected": -1.1659822463989258, "step": 528 }, { "epoch": 0.91, "learning_rate": 4.993625159371016e-07, "logits/chosen": -1.9875987768173218, "logits/rejected": -2.1248629093170166, "logps/chosen": -95.28678894042969, "logps/rejected": -95.21996307373047, "loss": 0.5154, "rewards/accuracies": 0.5, "rewards/chosen": 0.044795021414756775, "rewards/margins": 0.7404762506484985, "rewards/rejected": -0.6956812143325806, "step": 529 }, { "epoch": 0.91, "learning_rate": 4.992562685932852e-07, "logits/chosen": -2.047372579574585, "logits/rejected": -2.089491844177246, "logps/chosen": -58.04249572753906, "logps/rejected": -70.97039794921875, "loss": 0.4529, "rewards/accuracies": 0.5, "rewards/chosen": 0.0777578204870224, "rewards/margins": 0.224297434091568, "rewards/rejected": -0.1465395987033844, "step": 530 }, { "epoch": 0.91, "learning_rate": 4.991500212494688e-07, "logits/chosen": -2.1431801319122314, "logits/rejected": -1.7469532489776611, "logps/chosen": -90.65848541259766, "logps/rejected": -96.67198944091797, "loss": 0.7183, "rewards/accuracies": 0.5, "rewards/chosen": -0.01577204465866089, "rewards/margins": -0.05069693922996521, "rewards/rejected": 0.03492487967014313, "step": 531 }, { "epoch": 0.92, "learning_rate": 4.990437739056524e-07, "logits/chosen": -2.065659284591675, "logits/rejected": -2.092231512069702, "logps/chosen": -92.1357421875, "logps/rejected": -121.3798599243164, "loss": 0.4302, "rewards/accuracies": 0.75, "rewards/chosen": 0.6390670537948608, "rewards/margins": 2.654864549636841, "rewards/rejected": -2.0157976150512695, "step": 532 }, { "epoch": 0.92, "learning_rate": 4.989375265618359e-07, "logits/chosen": -2.04850435256958, "logits/rejected": -2.163619041442871, "logps/chosen": -94.91116333007812, "logps/rejected": -94.77406311035156, "loss": 0.3775, "rewards/accuracies": 1.0, "rewards/chosen": 0.49765950441360474, "rewards/margins": 0.8048030734062195, "rewards/rejected": -0.30714359879493713, "step": 533 }, { "epoch": 0.92, "learning_rate": 4.988312792180196e-07, "logits/chosen": -1.8116846084594727, "logits/rejected": -2.033137083053589, "logps/chosen": -91.53192138671875, "logps/rejected": -117.51626586914062, "loss": 0.4499, "rewards/accuracies": 0.75, "rewards/chosen": -0.38662901520729065, "rewards/margins": 0.9726876020431519, "rewards/rejected": -1.3593167066574097, "step": 534 }, { "epoch": 0.92, "learning_rate": 4.987250318742032e-07, "logits/chosen": -1.9659620523452759, "logits/rejected": -2.1380436420440674, "logps/chosen": -78.53692626953125, "logps/rejected": -103.24454498291016, "loss": 0.4016, "rewards/accuracies": 1.0, "rewards/chosen": 0.7326908111572266, "rewards/margins": 2.0839426517486572, "rewards/rejected": -1.3512518405914307, "step": 535 }, { "epoch": 0.92, "learning_rate": 4.986187845303867e-07, "logits/chosen": -2.1147620677948, "logits/rejected": -2.150869131088257, "logps/chosen": -84.50244903564453, "logps/rejected": -115.79826354980469, "loss": 0.4565, "rewards/accuracies": 1.0, "rewards/chosen": 0.11615066230297089, "rewards/margins": 2.205106496810913, "rewards/rejected": -2.088955879211426, "step": 536 }, { "epoch": 0.92, "learning_rate": 4.985125371865704e-07, "logits/chosen": -1.9924557209014893, "logits/rejected": -2.0807149410247803, "logps/chosen": -73.97576904296875, "logps/rejected": -95.89068603515625, "loss": 0.409, "rewards/accuracies": 0.75, "rewards/chosen": 0.22276347875595093, "rewards/margins": 0.9185325503349304, "rewards/rejected": -0.6957691311836243, "step": 537 }, { "epoch": 0.93, "learning_rate": 4.984062898427539e-07, "logits/chosen": -1.986271619796753, "logits/rejected": -2.1107254028320312, "logps/chosen": -88.63723754882812, "logps/rejected": -79.02218627929688, "loss": 0.5575, "rewards/accuracies": 0.5, "rewards/chosen": -0.752366304397583, "rewards/margins": 0.13281351327896118, "rewards/rejected": -0.8851797580718994, "step": 538 }, { "epoch": 0.93, "learning_rate": 4.983000424989375e-07, "logits/chosen": -2.2271385192871094, "logits/rejected": -2.2279088497161865, "logps/chosen": -59.142723083496094, "logps/rejected": -81.94139099121094, "loss": 0.4264, "rewards/accuracies": 1.0, "rewards/chosen": 0.20524878799915314, "rewards/margins": 1.1125417947769165, "rewards/rejected": -0.9072929620742798, "step": 539 }, { "epoch": 0.93, "learning_rate": 4.981937951551212e-07, "logits/chosen": -2.0595295429229736, "logits/rejected": -1.9221463203430176, "logps/chosen": -87.24197387695312, "logps/rejected": -84.34381103515625, "loss": 0.4227, "rewards/accuracies": 0.5, "rewards/chosen": 0.3030775189399719, "rewards/margins": 0.46702367067337036, "rewards/rejected": -0.16394615173339844, "step": 540 }, { "epoch": 0.93, "learning_rate": 4.980875478113047e-07, "logits/chosen": -2.212841749191284, "logits/rejected": -2.1669750213623047, "logps/chosen": -68.89015197753906, "logps/rejected": -68.69105529785156, "loss": 0.4982, "rewards/accuracies": 0.75, "rewards/chosen": 0.04161073639988899, "rewards/margins": 0.651292085647583, "rewards/rejected": -0.609681248664856, "step": 541 }, { "epoch": 0.93, "learning_rate": 4.979813004674883e-07, "logits/chosen": -1.9930825233459473, "logits/rejected": -2.1176700592041016, "logps/chosen": -76.84507751464844, "logps/rejected": -101.07344055175781, "loss": 0.3666, "rewards/accuracies": 1.0, "rewards/chosen": 0.030993837863206863, "rewards/margins": 1.0911730527877808, "rewards/rejected": -1.0601792335510254, "step": 542 }, { "epoch": 0.93, "learning_rate": 4.978750531236718e-07, "logits/chosen": -2.0099048614501953, "logits/rejected": -2.1919283866882324, "logps/chosen": -77.70614624023438, "logps/rejected": -107.27963256835938, "loss": 0.4463, "rewards/accuracies": 0.75, "rewards/chosen": 0.13581635057926178, "rewards/margins": 1.4600342512130737, "rewards/rejected": -1.3242177963256836, "step": 543 }, { "epoch": 0.94, "learning_rate": 4.977688057798554e-07, "logits/chosen": -1.801931381225586, "logits/rejected": -1.9153625965118408, "logps/chosen": -84.15975952148438, "logps/rejected": -106.14797973632812, "loss": 0.4223, "rewards/accuracies": 0.5, "rewards/chosen": 0.682689368724823, "rewards/margins": 1.222021222114563, "rewards/rejected": -0.5393317937850952, "step": 544 }, { "epoch": 0.94, "learning_rate": 4.976625584360392e-07, "logits/chosen": -2.0173001289367676, "logits/rejected": -2.158343553543091, "logps/chosen": -68.85396575927734, "logps/rejected": -99.26518249511719, "loss": 0.4582, "rewards/accuracies": 1.0, "rewards/chosen": 0.4835432171821594, "rewards/margins": 1.9109561443328857, "rewards/rejected": -1.427412986755371, "step": 545 }, { "epoch": 0.94, "learning_rate": 4.975563110922226e-07, "logits/chosen": -1.9845565557479858, "logits/rejected": -1.991443395614624, "logps/chosen": -59.527130126953125, "logps/rejected": -69.99269104003906, "loss": 0.4318, "rewards/accuracies": 1.0, "rewards/chosen": 0.1599416732788086, "rewards/margins": 1.2873493432998657, "rewards/rejected": -1.1274076700210571, "step": 546 }, { "epoch": 0.94, "learning_rate": 4.974500637484062e-07, "logits/chosen": -1.8401721715927124, "logits/rejected": -2.017280101776123, "logps/chosen": -61.25053787231445, "logps/rejected": -54.21210479736328, "loss": 0.5236, "rewards/accuracies": 0.25, "rewards/chosen": 0.1972426027059555, "rewards/margins": -0.5236907005310059, "rewards/rejected": 0.7209332585334778, "step": 547 }, { "epoch": 0.94, "learning_rate": 4.973438164045898e-07, "logits/chosen": -2.0332517623901367, "logits/rejected": -1.8518966436386108, "logps/chosen": -80.81982421875, "logps/rejected": -98.29811096191406, "loss": 0.3935, "rewards/accuracies": 1.0, "rewards/chosen": 0.140171617269516, "rewards/margins": 2.616948366165161, "rewards/rejected": -2.476776599884033, "step": 548 }, { "epoch": 0.94, "learning_rate": 4.972375690607734e-07, "logits/chosen": -1.778872013092041, "logits/rejected": -2.1859402656555176, "logps/chosen": -101.83308410644531, "logps/rejected": -117.29922485351562, "loss": 0.4278, "rewards/accuracies": 1.0, "rewards/chosen": -0.4352472424507141, "rewards/margins": 1.1362648010253906, "rewards/rejected": -1.57151198387146, "step": 549 }, { "epoch": 0.95, "learning_rate": 4.97131321716957e-07, "logits/chosen": -2.2323012351989746, "logits/rejected": -1.9292840957641602, "logps/chosen": -64.7784194946289, "logps/rejected": -80.34086608886719, "loss": 0.3639, "rewards/accuracies": 1.0, "rewards/chosen": 0.24345378577709198, "rewards/margins": 0.5922636389732361, "rewards/rejected": -0.3488098382949829, "step": 550 }, { "epoch": 0.95, "learning_rate": 4.970250743731406e-07, "logits/chosen": -1.9902245998382568, "logits/rejected": -2.3544394969940186, "logps/chosen": -109.06201171875, "logps/rejected": -123.28105926513672, "loss": 0.4018, "rewards/accuracies": 0.75, "rewards/chosen": -0.5651454925537109, "rewards/margins": 0.7005295157432556, "rewards/rejected": -1.2656749486923218, "step": 551 }, { "epoch": 0.95, "learning_rate": 4.969188270293242e-07, "logits/chosen": -2.0358619689941406, "logits/rejected": -2.113546848297119, "logps/chosen": -68.83191680908203, "logps/rejected": -87.98021697998047, "loss": 0.3108, "rewards/accuracies": 1.0, "rewards/chosen": -0.11394837498664856, "rewards/margins": 1.0048538446426392, "rewards/rejected": -1.1188021898269653, "step": 552 }, { "epoch": 0.95, "learning_rate": 4.968125796855078e-07, "logits/chosen": -2.0246243476867676, "logits/rejected": -1.9837579727172852, "logps/chosen": -70.34700012207031, "logps/rejected": -87.46589660644531, "loss": 0.3373, "rewards/accuracies": 0.75, "rewards/chosen": -0.029857128858566284, "rewards/margins": 0.9898002743721008, "rewards/rejected": -1.0196573734283447, "step": 553 }, { "epoch": 0.95, "learning_rate": 4.967063323416914e-07, "logits/chosen": -2.0759968757629395, "logits/rejected": -2.069216728210449, "logps/chosen": -90.57755279541016, "logps/rejected": -124.49999237060547, "loss": 0.4475, "rewards/accuracies": 0.75, "rewards/chosen": -0.2098345011472702, "rewards/margins": 2.8978679180145264, "rewards/rejected": -3.1077022552490234, "step": 554 }, { "epoch": 0.96, "learning_rate": 4.96600084997875e-07, "logits/chosen": -2.0538525581359863, "logits/rejected": -2.0527758598327637, "logps/chosen": -97.36454010009766, "logps/rejected": -123.08499908447266, "loss": 0.4085, "rewards/accuracies": 0.75, "rewards/chosen": -0.009431995451450348, "rewards/margins": 2.378615617752075, "rewards/rejected": -2.388047695159912, "step": 555 }, { "epoch": 0.96, "learning_rate": 4.964938376540586e-07, "logits/chosen": -2.0913052558898926, "logits/rejected": -1.8246430158615112, "logps/chosen": -97.36262512207031, "logps/rejected": -107.05764770507812, "loss": 0.3971, "rewards/accuracies": 1.0, "rewards/chosen": 0.44036543369293213, "rewards/margins": 2.028172016143799, "rewards/rejected": -1.5878065824508667, "step": 556 }, { "epoch": 0.96, "learning_rate": 4.963875903102422e-07, "logits/chosen": -2.2101566791534424, "logits/rejected": -2.2217018604278564, "logps/chosen": -64.57201385498047, "logps/rejected": -76.48979949951172, "loss": 0.3615, "rewards/accuracies": 0.5, "rewards/chosen": -0.11610652506351471, "rewards/margins": 1.0760518312454224, "rewards/rejected": -1.1921584606170654, "step": 557 }, { "epoch": 0.96, "learning_rate": 4.962813429664258e-07, "logits/chosen": -2.239145040512085, "logits/rejected": -1.573937177658081, "logps/chosen": -116.36581420898438, "logps/rejected": -96.07099914550781, "loss": 0.4775, "rewards/accuracies": 1.0, "rewards/chosen": -0.28630122542381287, "rewards/margins": 1.7181936502456665, "rewards/rejected": -2.004495143890381, "step": 558 }, { "epoch": 0.96, "learning_rate": 4.961750956226094e-07, "logits/chosen": -1.659104585647583, "logits/rejected": -2.1633057594299316, "logps/chosen": -80.53489685058594, "logps/rejected": -119.16102600097656, "loss": 0.3273, "rewards/accuracies": 0.75, "rewards/chosen": -0.18676453828811646, "rewards/margins": 2.056943416595459, "rewards/rejected": -2.2437078952789307, "step": 559 }, { "epoch": 0.96, "learning_rate": 4.96068848278793e-07, "logits/chosen": -2.0859155654907227, "logits/rejected": -1.9688754081726074, "logps/chosen": -101.09159088134766, "logps/rejected": -123.93499755859375, "loss": 0.2879, "rewards/accuracies": 0.75, "rewards/chosen": -0.6959778070449829, "rewards/margins": 1.467279076576233, "rewards/rejected": -2.1632566452026367, "step": 560 }, { "epoch": 0.97, "learning_rate": 4.959626009349766e-07, "logits/chosen": -2.0747241973876953, "logits/rejected": -2.142702341079712, "logps/chosen": -91.00504302978516, "logps/rejected": -104.99676513671875, "loss": 0.3482, "rewards/accuracies": 0.75, "rewards/chosen": 0.11178971081972122, "rewards/margins": 2.4401869773864746, "rewards/rejected": -2.328397512435913, "step": 561 }, { "epoch": 0.97, "learning_rate": 4.958563535911602e-07, "logits/chosen": -1.9039642810821533, "logits/rejected": -1.9444323778152466, "logps/chosen": -91.06060028076172, "logps/rejected": -96.11911010742188, "loss": 0.4071, "rewards/accuracies": 0.75, "rewards/chosen": -0.29230883717536926, "rewards/margins": 1.1709110736846924, "rewards/rejected": -1.4632198810577393, "step": 562 }, { "epoch": 0.97, "learning_rate": 4.957501062473438e-07, "logits/chosen": -2.006418466567993, "logits/rejected": -2.108851194381714, "logps/chosen": -72.77388000488281, "logps/rejected": -80.3547134399414, "loss": 0.3965, "rewards/accuracies": 0.5, "rewards/chosen": -0.8731918334960938, "rewards/margins": 0.2674681842327118, "rewards/rejected": -1.1406599283218384, "step": 563 }, { "epoch": 0.97, "learning_rate": 4.956438589035274e-07, "logits/chosen": -2.0656914710998535, "logits/rejected": -2.1389570236206055, "logps/chosen": -106.6336669921875, "logps/rejected": -118.13092041015625, "loss": 0.4904, "rewards/accuracies": 0.5, "rewards/chosen": -1.189738154411316, "rewards/margins": 1.1370131969451904, "rewards/rejected": -2.326751232147217, "step": 564 }, { "epoch": 0.97, "learning_rate": 4.955376115597109e-07, "logits/chosen": -1.918839454650879, "logits/rejected": -1.8755649328231812, "logps/chosen": -103.94694519042969, "logps/rejected": -108.863037109375, "loss": 0.7291, "rewards/accuracies": 0.75, "rewards/chosen": -0.7666841745376587, "rewards/margins": 0.6765029430389404, "rewards/rejected": -1.4431871175765991, "step": 565 }, { "epoch": 0.97, "learning_rate": 4.954313642158946e-07, "logits/chosen": -2.1349198818206787, "logits/rejected": -2.0700342655181885, "logps/chosen": -72.06301879882812, "logps/rejected": -113.452392578125, "loss": 0.529, "rewards/accuracies": 1.0, "rewards/chosen": -0.005971983075141907, "rewards/margins": 3.607391119003296, "rewards/rejected": -3.613363265991211, "step": 566 }, { "epoch": 0.98, "learning_rate": 4.953251168720782e-07, "logits/chosen": -1.861570954322815, "logits/rejected": -1.5204709768295288, "logps/chosen": -96.07875061035156, "logps/rejected": -93.24640655517578, "loss": 0.3924, "rewards/accuracies": 0.25, "rewards/chosen": 0.15919369459152222, "rewards/margins": 0.9414229393005371, "rewards/rejected": -0.7822291851043701, "step": 567 }, { "epoch": 0.98, "learning_rate": 4.952188695282617e-07, "logits/chosen": -1.9365460872650146, "logits/rejected": -1.8871641159057617, "logps/chosen": -92.67182159423828, "logps/rejected": -127.58317565917969, "loss": 0.4153, "rewards/accuracies": 1.0, "rewards/chosen": -0.12546482682228088, "rewards/margins": 2.4664738178253174, "rewards/rejected": -2.5919387340545654, "step": 568 }, { "epoch": 0.98, "learning_rate": 4.951126221844454e-07, "logits/chosen": -1.9533928632736206, "logits/rejected": -1.8872582912445068, "logps/chosen": -83.9326171875, "logps/rejected": -79.66077423095703, "loss": 0.4995, "rewards/accuracies": 0.75, "rewards/chosen": -0.5433512926101685, "rewards/margins": 0.4291127920150757, "rewards/rejected": -0.9724640250205994, "step": 569 }, { "epoch": 0.98, "learning_rate": 4.950063748406289e-07, "logits/chosen": -2.120326519012451, "logits/rejected": -1.996487021446228, "logps/chosen": -90.41253662109375, "logps/rejected": -104.2779541015625, "loss": 0.4506, "rewards/accuracies": 1.0, "rewards/chosen": -0.4476598799228668, "rewards/margins": 2.176584005355835, "rewards/rejected": -2.62424373626709, "step": 570 }, { "epoch": 0.98, "learning_rate": 4.949001274968125e-07, "logits/chosen": -2.2117176055908203, "logits/rejected": -2.029590129852295, "logps/chosen": -115.52532958984375, "logps/rejected": -110.0545425415039, "loss": 0.5457, "rewards/accuracies": 0.5, "rewards/chosen": -0.516742467880249, "rewards/margins": 1.6916112899780273, "rewards/rejected": -2.2083535194396973, "step": 571 }, { "epoch": 0.98, "learning_rate": 4.947938801529962e-07, "logits/chosen": -1.7607694864273071, "logits/rejected": -1.8478314876556396, "logps/chosen": -97.85813903808594, "logps/rejected": -101.0425796508789, "loss": 0.48, "rewards/accuracies": 0.75, "rewards/chosen": -0.6069368720054626, "rewards/margins": 1.2464933395385742, "rewards/rejected": -1.8534302711486816, "step": 572 }, { "epoch": 0.99, "learning_rate": 4.946876328091797e-07, "logits/chosen": -2.021339178085327, "logits/rejected": -2.1942901611328125, "logps/chosen": -86.02312469482422, "logps/rejected": -105.46946716308594, "loss": 0.4274, "rewards/accuracies": 0.5, "rewards/chosen": -1.2795650959014893, "rewards/margins": 0.9312012791633606, "rewards/rejected": -2.210766315460205, "step": 573 }, { "epoch": 0.99, "learning_rate": 4.945813854653633e-07, "logits/chosen": -1.780307650566101, "logits/rejected": -2.1231114864349365, "logps/chosen": -74.8551025390625, "logps/rejected": -97.26152038574219, "loss": 0.4189, "rewards/accuracies": 0.5, "rewards/chosen": -1.1373366117477417, "rewards/margins": 0.5900565385818481, "rewards/rejected": -1.7273931503295898, "step": 574 }, { "epoch": 0.99, "learning_rate": 4.944751381215469e-07, "logits/chosen": -2.043130874633789, "logits/rejected": -2.1383907794952393, "logps/chosen": -112.13601684570312, "logps/rejected": -104.57389068603516, "loss": 0.3664, "rewards/accuracies": 0.25, "rewards/chosen": -1.604437232017517, "rewards/margins": -0.07616329193115234, "rewards/rejected": -1.5282739400863647, "step": 575 }, { "epoch": 0.99, "learning_rate": 4.943688907777305e-07, "logits/chosen": -1.5537071228027344, "logits/rejected": -2.2149224281311035, "logps/chosen": -87.75933837890625, "logps/rejected": -121.33378601074219, "loss": 0.4357, "rewards/accuracies": 0.75, "rewards/chosen": -0.25543442368507385, "rewards/margins": 1.0295817852020264, "rewards/rejected": -1.2850162982940674, "step": 576 }, { "epoch": 0.99, "learning_rate": 4.942626434339141e-07, "logits/chosen": -1.946315050125122, "logits/rejected": -2.132542848587036, "logps/chosen": -108.85401153564453, "logps/rejected": -122.89025115966797, "loss": 0.3682, "rewards/accuracies": 0.75, "rewards/chosen": -0.7594866156578064, "rewards/margins": 1.522078514099121, "rewards/rejected": -2.2815651893615723, "step": 577 }, { "epoch": 0.99, "learning_rate": 4.941563960900977e-07, "logits/chosen": -2.043572187423706, "logits/rejected": -2.089212656021118, "logps/chosen": -94.30043029785156, "logps/rejected": -133.17112731933594, "loss": 0.4344, "rewards/accuracies": 1.0, "rewards/chosen": -0.2846958339214325, "rewards/margins": 2.634949207305908, "rewards/rejected": -2.919644832611084, "step": 578 }, { "epoch": 1.0, "learning_rate": 4.940501487462813e-07, "logits/chosen": -2.1660971641540527, "logits/rejected": -2.112471580505371, "logps/chosen": -97.27824401855469, "logps/rejected": -117.41970825195312, "loss": 0.3025, "rewards/accuracies": 0.75, "rewards/chosen": -0.7667696475982666, "rewards/margins": 1.2673466205596924, "rewards/rejected": -2.034116268157959, "step": 579 }, { "epoch": 1.0, "learning_rate": 4.939439014024649e-07, "logits/chosen": -2.0560154914855957, "logits/rejected": -2.0925145149230957, "logps/chosen": -56.82087707519531, "logps/rejected": -81.85292053222656, "loss": 0.4153, "rewards/accuracies": 0.75, "rewards/chosen": 0.6254573464393616, "rewards/margins": 2.593649387359619, "rewards/rejected": -1.968192219734192, "step": 580 }, { "epoch": 1.0, "learning_rate": 4.938376540586485e-07, "logits/chosen": -1.9848523139953613, "logits/rejected": -1.6520142555236816, "logps/chosen": -65.95477294921875, "logps/rejected": -87.14520263671875, "loss": 0.4452, "rewards/accuracies": 0.75, "rewards/chosen": 0.3182011544704437, "rewards/margins": 3.2039599418640137, "rewards/rejected": -2.885758876800537, "step": 581 }, { "epoch": 1.0, "learning_rate": 4.937314067148321e-07, "logits/chosen": -1.983508825302124, "logits/rejected": -2.0965442657470703, "logps/chosen": -77.61067199707031, "logps/rejected": -94.89698791503906, "loss": 0.4183, "rewards/accuracies": 1.0, "rewards/chosen": -0.3092418611049652, "rewards/margins": 0.7977927923202515, "rewards/rejected": -1.107034683227539, "step": 582 }, { "epoch": 1.0, "learning_rate": 4.936251593710157e-07, "logits/chosen": -1.7966649532318115, "logits/rejected": -2.164715051651001, "logps/chosen": -64.9608383178711, "logps/rejected": -87.56073760986328, "loss": 0.3315, "rewards/accuracies": 1.0, "rewards/chosen": 0.4099993109703064, "rewards/margins": 1.0534882545471191, "rewards/rejected": -0.643488883972168, "step": 583 }, { "epoch": 1.01, "learning_rate": 4.935189120271993e-07, "logits/chosen": -1.8034920692443848, "logits/rejected": -1.9304113388061523, "logps/chosen": -82.52325439453125, "logps/rejected": -98.76582336425781, "loss": 0.5026, "rewards/accuracies": 0.75, "rewards/chosen": -0.3518158793449402, "rewards/margins": 0.9832452535629272, "rewards/rejected": -1.3350611925125122, "step": 584 }, { "epoch": 1.01, "learning_rate": 4.934126646833829e-07, "logits/chosen": -2.1398043632507324, "logits/rejected": -2.0778660774230957, "logps/chosen": -102.99811553955078, "logps/rejected": -134.97393798828125, "loss": 0.4186, "rewards/accuracies": 1.0, "rewards/chosen": 0.1365884691476822, "rewards/margins": 2.3450262546539307, "rewards/rejected": -2.208437919616699, "step": 585 }, { "epoch": 1.01, "learning_rate": 4.933064173395665e-07, "logits/chosen": -2.2281816005706787, "logits/rejected": -1.9435980319976807, "logps/chosen": -91.93756103515625, "logps/rejected": -80.77163696289062, "loss": 0.3054, "rewards/accuracies": 0.75, "rewards/chosen": 0.7146095633506775, "rewards/margins": 0.8111138343811035, "rewards/rejected": -0.09650430828332901, "step": 586 }, { "epoch": 1.01, "learning_rate": 4.932001699957501e-07, "logits/chosen": -2.148066520690918, "logits/rejected": -1.820268154144287, "logps/chosen": -75.13618469238281, "logps/rejected": -93.79704284667969, "loss": 0.4259, "rewards/accuracies": 0.75, "rewards/chosen": 0.4911697506904602, "rewards/margins": 2.6731128692626953, "rewards/rejected": -2.181943416595459, "step": 587 }, { "epoch": 1.01, "learning_rate": 4.930939226519337e-07, "logits/chosen": -2.1115639209747314, "logits/rejected": -2.02449893951416, "logps/chosen": -88.94998168945312, "logps/rejected": -84.66532897949219, "loss": 0.3481, "rewards/accuracies": 0.5, "rewards/chosen": -0.5404603481292725, "rewards/margins": 0.16016148030757904, "rewards/rejected": -0.7006218433380127, "step": 588 }, { "epoch": 1.01, "learning_rate": 4.929876753081173e-07, "logits/chosen": -2.071263313293457, "logits/rejected": -2.2686450481414795, "logps/chosen": -65.01702880859375, "logps/rejected": -97.16140747070312, "loss": 0.3321, "rewards/accuracies": 1.0, "rewards/chosen": 0.21002092957496643, "rewards/margins": 1.9773961305618286, "rewards/rejected": -1.7673752307891846, "step": 589 }, { "epoch": 1.02, "learning_rate": 4.928814279643009e-07, "logits/chosen": -1.9746172428131104, "logits/rejected": -1.7811638116836548, "logps/chosen": -118.01828002929688, "logps/rejected": -141.16384887695312, "loss": 0.4061, "rewards/accuracies": 0.75, "rewards/chosen": -0.1978893280029297, "rewards/margins": 2.2203314304351807, "rewards/rejected": -2.4182207584381104, "step": 590 }, { "epoch": 1.02, "learning_rate": 4.927751806204845e-07, "logits/chosen": -2.1500425338745117, "logits/rejected": -1.8783382177352905, "logps/chosen": -97.86299896240234, "logps/rejected": -105.90507507324219, "loss": 0.3096, "rewards/accuracies": 1.0, "rewards/chosen": -0.4716449975967407, "rewards/margins": 2.1910200119018555, "rewards/rejected": -2.6626646518707275, "step": 591 }, { "epoch": 1.02, "learning_rate": 4.926689332766681e-07, "logits/chosen": -1.736936330795288, "logits/rejected": -2.122641086578369, "logps/chosen": -86.08473205566406, "logps/rejected": -113.67784118652344, "loss": 0.3368, "rewards/accuracies": 1.0, "rewards/chosen": 0.6572670340538025, "rewards/margins": 2.5817275047302246, "rewards/rejected": -1.9244606494903564, "step": 592 }, { "epoch": 1.02, "learning_rate": 4.925626859328517e-07, "logits/chosen": -1.9776828289031982, "logits/rejected": -1.7410742044448853, "logps/chosen": -91.52863311767578, "logps/rejected": -89.24945068359375, "loss": 0.3343, "rewards/accuracies": 0.5, "rewards/chosen": -0.332811176776886, "rewards/margins": 1.6013069152832031, "rewards/rejected": -1.9341182708740234, "step": 593 }, { "epoch": 1.02, "learning_rate": 4.924564385890353e-07, "logits/chosen": -2.0556187629699707, "logits/rejected": -2.3215103149414062, "logps/chosen": -94.33311462402344, "logps/rejected": -104.82571411132812, "loss": 0.4035, "rewards/accuracies": 0.75, "rewards/chosen": -0.09799031913280487, "rewards/margins": 0.6740452647209167, "rewards/rejected": -0.7720356583595276, "step": 594 }, { "epoch": 1.02, "learning_rate": 4.923501912452189e-07, "logits/chosen": -2.0690927505493164, "logits/rejected": -2.0253102779388428, "logps/chosen": -66.84519958496094, "logps/rejected": -104.47090148925781, "loss": 0.2637, "rewards/accuracies": 0.75, "rewards/chosen": 0.04409637302160263, "rewards/margins": 2.9324357509613037, "rewards/rejected": -2.8883395195007324, "step": 595 }, { "epoch": 1.03, "learning_rate": 4.922439439014025e-07, "logits/chosen": -1.9975700378417969, "logits/rejected": -2.1408166885375977, "logps/chosen": -91.6138687133789, "logps/rejected": -127.10108184814453, "loss": 0.3722, "rewards/accuracies": 1.0, "rewards/chosen": -0.20398765802383423, "rewards/margins": 3.137645959854126, "rewards/rejected": -3.3416337966918945, "step": 596 }, { "epoch": 1.03, "learning_rate": 4.92137696557586e-07, "logits/chosen": -1.8268457651138306, "logits/rejected": -2.165151834487915, "logps/chosen": -84.62127685546875, "logps/rejected": -107.25845336914062, "loss": 0.4965, "rewards/accuracies": 1.0, "rewards/chosen": -0.2181682586669922, "rewards/margins": 1.6504309177398682, "rewards/rejected": -1.8685994148254395, "step": 597 }, { "epoch": 1.03, "learning_rate": 4.920314492137697e-07, "logits/chosen": -2.036890983581543, "logits/rejected": -2.123727560043335, "logps/chosen": -88.64703369140625, "logps/rejected": -107.73580932617188, "loss": 0.3383, "rewards/accuracies": 1.0, "rewards/chosen": -0.89121413230896, "rewards/margins": 1.0995805263519287, "rewards/rejected": -1.9907946586608887, "step": 598 }, { "epoch": 1.03, "learning_rate": 4.919252018699533e-07, "logits/chosen": -2.049679756164551, "logits/rejected": -1.9590321779251099, "logps/chosen": -96.43281555175781, "logps/rejected": -125.5816879272461, "loss": 0.3226, "rewards/accuracies": 1.0, "rewards/chosen": -0.05950203537940979, "rewards/margins": 2.4041240215301514, "rewards/rejected": -2.4636261463165283, "step": 599 }, { "epoch": 1.03, "learning_rate": 4.918189545261368e-07, "logits/chosen": -2.188648223876953, "logits/rejected": -2.2040209770202637, "logps/chosen": -75.86096954345703, "logps/rejected": -85.0140609741211, "loss": 0.4019, "rewards/accuracies": 1.0, "rewards/chosen": 0.9227047562599182, "rewards/margins": 1.511716365814209, "rewards/rejected": -0.5890116095542908, "step": 600 }, { "epoch": 1.03, "learning_rate": 4.917127071823205e-07, "logits/chosen": -2.0936484336853027, "logits/rejected": -2.0239245891571045, "logps/chosen": -88.90161895751953, "logps/rejected": -109.2955322265625, "loss": 0.4384, "rewards/accuracies": 0.75, "rewards/chosen": 0.4254112243652344, "rewards/margins": 2.111901044845581, "rewards/rejected": -1.6864898204803467, "step": 601 }, { "epoch": 1.04, "learning_rate": 4.91606459838504e-07, "logits/chosen": -1.8759684562683105, "logits/rejected": -2.0207107067108154, "logps/chosen": -86.63838195800781, "logps/rejected": -120.43085479736328, "loss": 0.2605, "rewards/accuracies": 1.0, "rewards/chosen": 0.8471948504447937, "rewards/margins": 2.226628065109253, "rewards/rejected": -1.379433274269104, "step": 602 }, { "epoch": 1.04, "learning_rate": 4.915002124946876e-07, "logits/chosen": -1.915436029434204, "logits/rejected": -2.241138219833374, "logps/chosen": -97.81693267822266, "logps/rejected": -129.843994140625, "loss": 0.2331, "rewards/accuracies": 0.5, "rewards/chosen": 0.09106352925300598, "rewards/margins": 2.8667654991149902, "rewards/rejected": -2.7757019996643066, "step": 603 }, { "epoch": 1.04, "learning_rate": 4.913939651508713e-07, "logits/chosen": -1.9961782693862915, "logits/rejected": -1.916059970855713, "logps/chosen": -119.46800231933594, "logps/rejected": -141.52069091796875, "loss": 0.3455, "rewards/accuracies": 0.75, "rewards/chosen": -1.7345678806304932, "rewards/margins": 0.4412269592285156, "rewards/rejected": -2.175794839859009, "step": 604 }, { "epoch": 1.04, "learning_rate": 4.912877178070548e-07, "logits/chosen": -2.064499616622925, "logits/rejected": -2.0394716262817383, "logps/chosen": -63.273414611816406, "logps/rejected": -85.83369445800781, "loss": 0.1864, "rewards/accuracies": 0.75, "rewards/chosen": 0.19632172584533691, "rewards/margins": 2.709519386291504, "rewards/rejected": -2.513197422027588, "step": 605 }, { "epoch": 1.04, "learning_rate": 4.911814704632384e-07, "logits/chosen": -1.929052472114563, "logits/rejected": -2.2568368911743164, "logps/chosen": -69.7923355102539, "logps/rejected": -102.06558990478516, "loss": 0.261, "rewards/accuracies": 1.0, "rewards/chosen": 0.09215240180492401, "rewards/margins": 1.9465183019638062, "rewards/rejected": -1.8543658256530762, "step": 606 }, { "epoch": 1.04, "learning_rate": 4.91075223119422e-07, "logits/chosen": -2.0827181339263916, "logits/rejected": -2.161504030227661, "logps/chosen": -75.40145874023438, "logps/rejected": -96.07177734375, "loss": 0.3763, "rewards/accuracies": 1.0, "rewards/chosen": -0.632844090461731, "rewards/margins": 1.8908814191818237, "rewards/rejected": -2.5237255096435547, "step": 607 }, { "epoch": 1.05, "learning_rate": 4.909689757756056e-07, "logits/chosen": -1.7853388786315918, "logits/rejected": -2.0696475505828857, "logps/chosen": -86.34849548339844, "logps/rejected": -117.86601257324219, "loss": 0.4273, "rewards/accuracies": 0.75, "rewards/chosen": 0.08100206404924393, "rewards/margins": 1.2409353256225586, "rewards/rejected": -1.15993332862854, "step": 608 }, { "epoch": 1.05, "learning_rate": 4.908627284317892e-07, "logits/chosen": -1.8845133781433105, "logits/rejected": -2.291015625, "logps/chosen": -81.81896209716797, "logps/rejected": -102.37442016601562, "loss": 0.3324, "rewards/accuracies": 0.75, "rewards/chosen": -0.6168162822723389, "rewards/margins": 1.015846848487854, "rewards/rejected": -1.6326630115509033, "step": 609 }, { "epoch": 1.05, "learning_rate": 4.907564810879728e-07, "logits/chosen": -1.8878625631332397, "logits/rejected": -2.4871273040771484, "logps/chosen": -83.63375854492188, "logps/rejected": -122.57797241210938, "loss": 0.3574, "rewards/accuracies": 1.0, "rewards/chosen": 0.08349914848804474, "rewards/margins": 3.0961546897888184, "rewards/rejected": -3.012655258178711, "step": 610 }, { "epoch": 1.05, "learning_rate": 4.906502337441564e-07, "logits/chosen": -2.2199888229370117, "logits/rejected": -1.82977294921875, "logps/chosen": -88.84821319580078, "logps/rejected": -109.279052734375, "loss": 0.295, "rewards/accuracies": 1.0, "rewards/chosen": -0.4916331171989441, "rewards/margins": 3.1259360313415527, "rewards/rejected": -3.6175694465637207, "step": 611 }, { "epoch": 1.05, "learning_rate": 4.9054398640034e-07, "logits/chosen": -1.6974155902862549, "logits/rejected": -2.185598134994507, "logps/chosen": -99.55192565917969, "logps/rejected": -143.50885009765625, "loss": 0.226, "rewards/accuracies": 0.75, "rewards/chosen": -0.7037482261657715, "rewards/margins": 2.5783262252807617, "rewards/rejected": -3.282074451446533, "step": 612 }, { "epoch": 1.06, "learning_rate": 4.904377390565236e-07, "logits/chosen": -1.9374555349349976, "logits/rejected": -2.006728172302246, "logps/chosen": -80.40345764160156, "logps/rejected": -118.04061126708984, "loss": 0.2415, "rewards/accuracies": 1.0, "rewards/chosen": -0.9450967311859131, "rewards/margins": 3.0583553314208984, "rewards/rejected": -4.003452301025391, "step": 613 }, { "epoch": 1.06, "learning_rate": 4.903314917127072e-07, "logits/chosen": -2.038583517074585, "logits/rejected": -1.923487901687622, "logps/chosen": -81.30599212646484, "logps/rejected": -106.98040771484375, "loss": 0.3098, "rewards/accuracies": 1.0, "rewards/chosen": 0.19807317852973938, "rewards/margins": 2.604485273361206, "rewards/rejected": -2.406412124633789, "step": 614 }, { "epoch": 1.06, "learning_rate": 4.902252443688908e-07, "logits/chosen": -1.6604310274124146, "logits/rejected": -2.3174891471862793, "logps/chosen": -96.82347106933594, "logps/rejected": -132.3152313232422, "loss": 0.5426, "rewards/accuracies": 0.75, "rewards/chosen": -0.9095327854156494, "rewards/margins": 0.545734167098999, "rewards/rejected": -1.4552669525146484, "step": 615 }, { "epoch": 1.06, "learning_rate": 4.901189970250744e-07, "logits/chosen": -1.4357542991638184, "logits/rejected": -1.905491828918457, "logps/chosen": -79.79054260253906, "logps/rejected": -99.63725280761719, "loss": 0.2962, "rewards/accuracies": 0.75, "rewards/chosen": 0.4133074879646301, "rewards/margins": 0.8606278896331787, "rewards/rejected": -0.4473203718662262, "step": 616 }, { "epoch": 1.06, "learning_rate": 4.90012749681258e-07, "logits/chosen": -1.8960676193237305, "logits/rejected": -2.099290609359741, "logps/chosen": -71.31288146972656, "logps/rejected": -114.21552276611328, "loss": 0.3349, "rewards/accuracies": 1.0, "rewards/chosen": -0.05198925733566284, "rewards/margins": 2.6904804706573486, "rewards/rejected": -2.7424700260162354, "step": 617 }, { "epoch": 1.06, "learning_rate": 4.899065023374416e-07, "logits/chosen": -2.2532477378845215, "logits/rejected": -1.4602279663085938, "logps/chosen": -109.70172882080078, "logps/rejected": -118.53044128417969, "loss": 0.3366, "rewards/accuracies": 1.0, "rewards/chosen": -0.14849548041820526, "rewards/margins": 2.8140363693237305, "rewards/rejected": -2.9625320434570312, "step": 618 }, { "epoch": 1.07, "learning_rate": 4.898002549936252e-07, "logits/chosen": -2.1195616722106934, "logits/rejected": -2.120356559753418, "logps/chosen": -79.70826721191406, "logps/rejected": -104.57958984375, "loss": 0.2908, "rewards/accuracies": 0.75, "rewards/chosen": -0.10657978057861328, "rewards/margins": 2.7843620777130127, "rewards/rejected": -2.890941858291626, "step": 619 }, { "epoch": 1.07, "learning_rate": 4.896940076498088e-07, "logits/chosen": -1.889219045639038, "logits/rejected": -2.022634267807007, "logps/chosen": -105.35792541503906, "logps/rejected": -123.71422576904297, "loss": 0.2869, "rewards/accuracies": 0.75, "rewards/chosen": -0.8731329441070557, "rewards/margins": 1.2336276769638062, "rewards/rejected": -2.1067605018615723, "step": 620 }, { "epoch": 1.07, "learning_rate": 4.895877603059924e-07, "logits/chosen": -2.2840681076049805, "logits/rejected": -2.0678603649139404, "logps/chosen": -90.22140502929688, "logps/rejected": -111.69559478759766, "loss": 0.4153, "rewards/accuracies": 0.75, "rewards/chosen": -0.6473187804222107, "rewards/margins": 2.650524377822876, "rewards/rejected": -3.2978432178497314, "step": 621 }, { "epoch": 1.07, "learning_rate": 4.89481512962176e-07, "logits/chosen": -1.9271725416183472, "logits/rejected": -2.2175116539001465, "logps/chosen": -80.12640380859375, "logps/rejected": -103.58106231689453, "loss": 0.4209, "rewards/accuracies": 0.5, "rewards/chosen": -0.4430164098739624, "rewards/margins": 2.120274543762207, "rewards/rejected": -2.563291072845459, "step": 622 }, { "epoch": 1.07, "learning_rate": 4.893752656183595e-07, "logits/chosen": -2.0291543006896973, "logits/rejected": -2.0854785442352295, "logps/chosen": -81.27838897705078, "logps/rejected": -98.43614196777344, "loss": 0.357, "rewards/accuracies": 0.75, "rewards/chosen": -0.7571477890014648, "rewards/margins": 1.9321930408477783, "rewards/rejected": -2.6893410682678223, "step": 623 }, { "epoch": 1.07, "learning_rate": 4.89269018274543e-07, "logits/chosen": -1.855882167816162, "logits/rejected": -2.1012439727783203, "logps/chosen": -108.7137451171875, "logps/rejected": -120.70415496826172, "loss": 0.4721, "rewards/accuracies": 0.75, "rewards/chosen": -1.1487376689910889, "rewards/margins": 1.330184817314148, "rewards/rejected": -2.4789223670959473, "step": 624 }, { "epoch": 1.08, "learning_rate": 4.891627709307267e-07, "logits/chosen": -2.127595901489258, "logits/rejected": -2.1648335456848145, "logps/chosen": -89.48208618164062, "logps/rejected": -85.2203598022461, "loss": 0.3745, "rewards/accuracies": 0.5, "rewards/chosen": -0.669516384601593, "rewards/margins": 0.5355413556098938, "rewards/rejected": -1.2050577402114868, "step": 625 }, { "epoch": 1.08, "learning_rate": 4.890565235869103e-07, "logits/chosen": -1.9597554206848145, "logits/rejected": -1.9784963130950928, "logps/chosen": -98.39495086669922, "logps/rejected": -105.11883544921875, "loss": 0.3015, "rewards/accuracies": 0.75, "rewards/chosen": -0.8763206601142883, "rewards/margins": 0.9156601428985596, "rewards/rejected": -1.7919807434082031, "step": 626 }, { "epoch": 1.08, "learning_rate": 4.889502762430938e-07, "logits/chosen": -1.6600295305252075, "logits/rejected": -2.156463623046875, "logps/chosen": -71.396728515625, "logps/rejected": -90.69966125488281, "loss": 0.3491, "rewards/accuracies": 0.75, "rewards/chosen": -1.0917818546295166, "rewards/margins": 0.10543043911457062, "rewards/rejected": -1.1972124576568604, "step": 627 }, { "epoch": 1.08, "learning_rate": 4.888440288992775e-07, "logits/chosen": -2.1662826538085938, "logits/rejected": -1.9831364154815674, "logps/chosen": -86.84315490722656, "logps/rejected": -120.41046905517578, "loss": 0.4784, "rewards/accuracies": 1.0, "rewards/chosen": -0.7411172986030579, "rewards/margins": 2.584787130355835, "rewards/rejected": -3.325904369354248, "step": 628 }, { "epoch": 1.08, "learning_rate": 4.88737781555461e-07, "logits/chosen": -1.7630352973937988, "logits/rejected": -1.9376940727233887, "logps/chosen": -88.74575805664062, "logps/rejected": -120.33932495117188, "loss": 0.316, "rewards/accuracies": 1.0, "rewards/chosen": -0.5508777499198914, "rewards/margins": 2.2590718269348145, "rewards/rejected": -2.8099493980407715, "step": 629 }, { "epoch": 1.08, "learning_rate": 4.886315342116447e-07, "logits/chosen": -2.0417635440826416, "logits/rejected": -2.1485671997070312, "logps/chosen": -84.36418914794922, "logps/rejected": -123.78075408935547, "loss": 0.2302, "rewards/accuracies": 1.0, "rewards/chosen": -0.35959720611572266, "rewards/margins": 2.7902116775512695, "rewards/rejected": -3.149808883666992, "step": 630 }, { "epoch": 1.09, "learning_rate": 4.885252868678283e-07, "logits/chosen": -1.8483089208602905, "logits/rejected": -2.383281707763672, "logps/chosen": -110.0942153930664, "logps/rejected": -153.58547973632812, "loss": 0.365, "rewards/accuracies": 0.75, "rewards/chosen": -1.1566132307052612, "rewards/margins": 2.197847366333008, "rewards/rejected": -3.3544607162475586, "step": 631 }, { "epoch": 1.09, "learning_rate": 4.884190395240118e-07, "logits/chosen": -2.070854663848877, "logits/rejected": -2.0330467224121094, "logps/chosen": -82.9649658203125, "logps/rejected": -121.9772720336914, "loss": 0.5163, "rewards/accuracies": 1.0, "rewards/chosen": -0.12601445615291595, "rewards/margins": 3.9398810863494873, "rewards/rejected": -4.0658955574035645, "step": 632 }, { "epoch": 1.09, "learning_rate": 4.883127921801955e-07, "logits/chosen": -1.790756106376648, "logits/rejected": -2.380310535430908, "logps/chosen": -74.89079284667969, "logps/rejected": -133.35406494140625, "loss": 0.2784, "rewards/accuracies": 1.0, "rewards/chosen": -0.31848013401031494, "rewards/margins": 3.9539976119995117, "rewards/rejected": -4.272477626800537, "step": 633 }, { "epoch": 1.09, "learning_rate": 4.88206544836379e-07, "logits/chosen": -1.9788585901260376, "logits/rejected": -2.0327582359313965, "logps/chosen": -108.05999755859375, "logps/rejected": -134.1658172607422, "loss": 0.2981, "rewards/accuracies": 1.0, "rewards/chosen": -0.9831827878952026, "rewards/margins": 3.040417432785034, "rewards/rejected": -4.0236005783081055, "step": 634 }, { "epoch": 1.09, "learning_rate": 4.881002974925626e-07, "logits/chosen": -2.3778114318847656, "logits/rejected": -2.025425910949707, "logps/chosen": -114.5898666381836, "logps/rejected": -114.00242614746094, "loss": 0.3477, "rewards/accuracies": 1.0, "rewards/chosen": -1.2250030040740967, "rewards/margins": 1.3577942848205566, "rewards/rejected": -2.5827972888946533, "step": 635 }, { "epoch": 1.09, "learning_rate": 4.879940501487463e-07, "logits/chosen": -2.226113796234131, "logits/rejected": -2.25777530670166, "logps/chosen": -92.87651062011719, "logps/rejected": -118.77239990234375, "loss": 0.3933, "rewards/accuracies": 1.0, "rewards/chosen": -1.0521106719970703, "rewards/margins": 1.989608883857727, "rewards/rejected": -3.041719436645508, "step": 636 }, { "epoch": 1.1, "learning_rate": 4.878878028049298e-07, "logits/chosen": -1.9108000993728638, "logits/rejected": -2.15956974029541, "logps/chosen": -85.47433471679688, "logps/rejected": -96.23275756835938, "loss": 0.4266, "rewards/accuracies": 0.5, "rewards/chosen": -1.1662490367889404, "rewards/margins": 0.3042714595794678, "rewards/rejected": -1.4705204963684082, "step": 637 }, { "epoch": 1.1, "learning_rate": 4.877815554611134e-07, "logits/chosen": -2.0231668949127197, "logits/rejected": -1.9795929193496704, "logps/chosen": -111.12416076660156, "logps/rejected": -105.02710723876953, "loss": 0.4327, "rewards/accuracies": 0.5, "rewards/chosen": -0.8497347235679626, "rewards/margins": 0.38326960802078247, "rewards/rejected": -1.2330043315887451, "step": 638 }, { "epoch": 1.1, "learning_rate": 4.87675308117297e-07, "logits/chosen": -1.8918142318725586, "logits/rejected": -2.166532039642334, "logps/chosen": -74.66895294189453, "logps/rejected": -103.7297134399414, "loss": 0.2292, "rewards/accuracies": 1.0, "rewards/chosen": -0.11541604995727539, "rewards/margins": 2.524617910385132, "rewards/rejected": -2.6400341987609863, "step": 639 }, { "epoch": 1.1, "learning_rate": 4.875690607734806e-07, "logits/chosen": -2.1186840534210205, "logits/rejected": -2.0411410331726074, "logps/chosen": -98.92048645019531, "logps/rejected": -113.73200988769531, "loss": 0.3412, "rewards/accuracies": 0.75, "rewards/chosen": -0.6845260262489319, "rewards/margins": 1.651742935180664, "rewards/rejected": -2.3362691402435303, "step": 640 }, { "epoch": 1.1, "learning_rate": 4.874628134296642e-07, "logits/chosen": -2.2169175148010254, "logits/rejected": -2.2397000789642334, "logps/chosen": -111.10731506347656, "logps/rejected": -124.1549301147461, "loss": 0.3961, "rewards/accuracies": 0.75, "rewards/chosen": -1.170547366142273, "rewards/margins": 0.7585572004318237, "rewards/rejected": -1.9291045665740967, "step": 641 }, { "epoch": 1.1, "learning_rate": 4.873565660858478e-07, "logits/chosen": -1.9736770391464233, "logits/rejected": -2.031359910964966, "logps/chosen": -82.61585235595703, "logps/rejected": -101.52949523925781, "loss": 0.3706, "rewards/accuracies": 0.75, "rewards/chosen": -0.9893490672111511, "rewards/margins": 1.6911219358444214, "rewards/rejected": -2.6804709434509277, "step": 642 }, { "epoch": 1.11, "learning_rate": 4.872503187420314e-07, "logits/chosen": -2.3075191974639893, "logits/rejected": -1.9368371963500977, "logps/chosen": -88.77537536621094, "logps/rejected": -107.75100708007812, "loss": 0.3564, "rewards/accuracies": 0.75, "rewards/chosen": 0.09525224566459656, "rewards/margins": 2.4573683738708496, "rewards/rejected": -2.3621160984039307, "step": 643 }, { "epoch": 1.11, "learning_rate": 4.87144071398215e-07, "logits/chosen": -2.0563805103302, "logits/rejected": -2.2270920276641846, "logps/chosen": -97.55467987060547, "logps/rejected": -126.43826293945312, "loss": 0.3505, "rewards/accuracies": 0.75, "rewards/chosen": -0.10621338337659836, "rewards/margins": 1.738055944442749, "rewards/rejected": -1.8442695140838623, "step": 644 }, { "epoch": 1.11, "learning_rate": 4.870378240543986e-07, "logits/chosen": -2.0022737979888916, "logits/rejected": -2.3319144248962402, "logps/chosen": -95.7020263671875, "logps/rejected": -112.87474822998047, "loss": 0.4094, "rewards/accuracies": 1.0, "rewards/chosen": -0.10968933254480362, "rewards/margins": 1.2710894346237183, "rewards/rejected": -1.3807787895202637, "step": 645 }, { "epoch": 1.11, "learning_rate": 4.869315767105822e-07, "logits/chosen": -2.159636974334717, "logits/rejected": -1.664670705795288, "logps/chosen": -104.39338684082031, "logps/rejected": -122.99906921386719, "loss": 0.3695, "rewards/accuracies": 0.75, "rewards/chosen": 0.26550427079200745, "rewards/margins": 3.3055291175842285, "rewards/rejected": -3.040025234222412, "step": 646 }, { "epoch": 1.11, "learning_rate": 4.868253293667658e-07, "logits/chosen": -1.898400068283081, "logits/rejected": -1.9686734676361084, "logps/chosen": -83.35140228271484, "logps/rejected": -115.19093322753906, "loss": 0.2977, "rewards/accuracies": 0.75, "rewards/chosen": 0.34521961212158203, "rewards/margins": 3.431586980819702, "rewards/rejected": -3.086367607116699, "step": 647 }, { "epoch": 1.12, "learning_rate": 4.867190820229494e-07, "logits/chosen": -1.9347219467163086, "logits/rejected": -2.2129716873168945, "logps/chosen": -59.090599060058594, "logps/rejected": -107.447998046875, "loss": 0.3051, "rewards/accuracies": 1.0, "rewards/chosen": 0.15307754278182983, "rewards/margins": 3.414809465408325, "rewards/rejected": -3.2617318630218506, "step": 648 }, { "epoch": 1.12, "learning_rate": 4.86612834679133e-07, "logits/chosen": -2.1080117225646973, "logits/rejected": -1.9616738557815552, "logps/chosen": -77.71703338623047, "logps/rejected": -94.0853500366211, "loss": 0.27, "rewards/accuracies": 1.0, "rewards/chosen": -0.18648220598697662, "rewards/margins": 1.3987343311309814, "rewards/rejected": -1.5852164030075073, "step": 649 }, { "epoch": 1.12, "learning_rate": 4.865065873353166e-07, "logits/chosen": -1.655778169631958, "logits/rejected": -2.063518524169922, "logps/chosen": -103.93820190429688, "logps/rejected": -140.13316345214844, "loss": 0.493, "rewards/accuracies": 1.0, "rewards/chosen": -1.3230302333831787, "rewards/margins": 1.3144800662994385, "rewards/rejected": -2.637510299682617, "step": 650 }, { "epoch": 1.12, "learning_rate": 4.864003399915002e-07, "logits/chosen": -2.1551051139831543, "logits/rejected": -1.778246521949768, "logps/chosen": -86.79643249511719, "logps/rejected": -97.05268859863281, "loss": 0.3132, "rewards/accuracies": 1.0, "rewards/chosen": 0.003773413598537445, "rewards/margins": 2.047898292541504, "rewards/rejected": -2.0441248416900635, "step": 651 }, { "epoch": 1.12, "learning_rate": 4.862940926476838e-07, "logits/chosen": -1.8813972473144531, "logits/rejected": -2.15867018699646, "logps/chosen": -71.18576049804688, "logps/rejected": -94.38531494140625, "loss": 0.4666, "rewards/accuracies": 0.75, "rewards/chosen": -0.33804434537887573, "rewards/margins": 1.2830814123153687, "rewards/rejected": -1.6211258172988892, "step": 652 }, { "epoch": 1.12, "learning_rate": 4.861878453038674e-07, "logits/chosen": -2.2586166858673096, "logits/rejected": -1.9300050735473633, "logps/chosen": -88.06258392333984, "logps/rejected": -103.33696746826172, "loss": 0.3398, "rewards/accuracies": 0.5, "rewards/chosen": -0.7208247184753418, "rewards/margins": 1.5081278085708618, "rewards/rejected": -2.228952407836914, "step": 653 }, { "epoch": 1.13, "learning_rate": 4.86081597960051e-07, "logits/chosen": -2.02980899810791, "logits/rejected": -2.0928802490234375, "logps/chosen": -88.17208862304688, "logps/rejected": -93.12623596191406, "loss": 0.3699, "rewards/accuracies": 0.5, "rewards/chosen": -0.34615132212638855, "rewards/margins": 0.39792442321777344, "rewards/rejected": -0.7440757751464844, "step": 654 }, { "epoch": 1.13, "learning_rate": 4.859753506162346e-07, "logits/chosen": -1.744894027709961, "logits/rejected": -2.084728240966797, "logps/chosen": -82.17584991455078, "logps/rejected": -140.22889709472656, "loss": 0.2966, "rewards/accuracies": 0.75, "rewards/chosen": -1.0037450790405273, "rewards/margins": 2.753560781478882, "rewards/rejected": -3.7573060989379883, "step": 655 }, { "epoch": 1.13, "learning_rate": 4.858691032724181e-07, "logits/chosen": -1.8211677074432373, "logits/rejected": -2.1803417205810547, "logps/chosen": -93.11283874511719, "logps/rejected": -108.02583312988281, "loss": 0.4367, "rewards/accuracies": 0.75, "rewards/chosen": -1.4489216804504395, "rewards/margins": 0.9233483076095581, "rewards/rejected": -2.372270107269287, "step": 656 }, { "epoch": 1.13, "learning_rate": 4.857628559286018e-07, "logits/chosen": -1.826709270477295, "logits/rejected": -2.373823642730713, "logps/chosen": -81.37610626220703, "logps/rejected": -104.92373657226562, "loss": 0.5624, "rewards/accuracies": 1.0, "rewards/chosen": -1.3406667709350586, "rewards/margins": 0.6666115522384644, "rewards/rejected": -2.0072782039642334, "step": 657 }, { "epoch": 1.13, "learning_rate": 4.856566085847854e-07, "logits/chosen": -1.964426040649414, "logits/rejected": -1.8488590717315674, "logps/chosen": -83.72447204589844, "logps/rejected": -132.168212890625, "loss": 0.2494, "rewards/accuracies": 1.0, "rewards/chosen": -0.3715460002422333, "rewards/margins": 4.323550224304199, "rewards/rejected": -4.695096492767334, "step": 658 }, { "epoch": 1.13, "learning_rate": 4.855503612409689e-07, "logits/chosen": -2.2139687538146973, "logits/rejected": -1.6467841863632202, "logps/chosen": -102.12407684326172, "logps/rejected": -101.08460235595703, "loss": 0.2245, "rewards/accuracies": 1.0, "rewards/chosen": -0.7073356509208679, "rewards/margins": 2.0776126384735107, "rewards/rejected": -2.7849483489990234, "step": 659 }, { "epoch": 1.14, "learning_rate": 4.854441138971526e-07, "logits/chosen": -1.9392750263214111, "logits/rejected": -2.110055923461914, "logps/chosen": -92.0729751586914, "logps/rejected": -104.4312744140625, "loss": 0.3811, "rewards/accuracies": 0.75, "rewards/chosen": -0.6056429147720337, "rewards/margins": 1.0598900318145752, "rewards/rejected": -1.6655328273773193, "step": 660 }, { "epoch": 1.14, "learning_rate": 4.853378665533361e-07, "logits/chosen": -2.0784573554992676, "logits/rejected": -1.908815622329712, "logps/chosen": -80.04092407226562, "logps/rejected": -110.86483001708984, "loss": 0.3314, "rewards/accuracies": 0.75, "rewards/chosen": -0.12938769161701202, "rewards/margins": 3.9769606590270996, "rewards/rejected": -4.106348037719727, "step": 661 }, { "epoch": 1.14, "learning_rate": 4.852316192095197e-07, "logits/chosen": -2.275275230407715, "logits/rejected": -1.9869155883789062, "logps/chosen": -90.1055679321289, "logps/rejected": -97.4565200805664, "loss": 0.4378, "rewards/accuracies": 0.75, "rewards/chosen": -0.6286981701850891, "rewards/margins": 0.655126690864563, "rewards/rejected": -1.2838249206542969, "step": 662 }, { "epoch": 1.14, "learning_rate": 4.851253718657034e-07, "logits/chosen": -1.967066764831543, "logits/rejected": -2.048459053039551, "logps/chosen": -86.45008850097656, "logps/rejected": -107.43523406982422, "loss": 0.3517, "rewards/accuracies": 0.75, "rewards/chosen": -0.6135889291763306, "rewards/margins": 2.799492120742798, "rewards/rejected": -3.4130806922912598, "step": 663 }, { "epoch": 1.14, "learning_rate": 4.850191245218869e-07, "logits/chosen": -1.8745112419128418, "logits/rejected": -2.0517897605895996, "logps/chosen": -88.27095031738281, "logps/rejected": -96.85574340820312, "loss": 0.3341, "rewards/accuracies": 0.5, "rewards/chosen": -1.7591028213500977, "rewards/margins": -0.03372848033905029, "rewards/rejected": -1.7253743410110474, "step": 664 }, { "epoch": 1.14, "learning_rate": 4.849128771780705e-07, "logits/chosen": -2.0685510635375977, "logits/rejected": -2.1268391609191895, "logps/chosen": -119.2042465209961, "logps/rejected": -124.81076049804688, "loss": 0.495, "rewards/accuracies": 0.75, "rewards/chosen": -0.6698881387710571, "rewards/margins": 1.1575709581375122, "rewards/rejected": -1.8274590969085693, "step": 665 }, { "epoch": 1.15, "learning_rate": 4.848066298342541e-07, "logits/chosen": -1.8988714218139648, "logits/rejected": -2.1438865661621094, "logps/chosen": -68.2419204711914, "logps/rejected": -118.89968872070312, "loss": 0.3463, "rewards/accuracies": 1.0, "rewards/chosen": 0.289986789226532, "rewards/margins": 4.235618591308594, "rewards/rejected": -3.945631980895996, "step": 666 }, { "epoch": 1.15, "learning_rate": 4.847003824904377e-07, "logits/chosen": -1.889915108680725, "logits/rejected": -1.9855334758758545, "logps/chosen": -89.36444854736328, "logps/rejected": -119.49180603027344, "loss": 0.321, "rewards/accuracies": 0.5, "rewards/chosen": -0.2790081202983856, "rewards/margins": 2.5446267127990723, "rewards/rejected": -2.823634624481201, "step": 667 }, { "epoch": 1.15, "learning_rate": 4.845941351466213e-07, "logits/chosen": -1.995481014251709, "logits/rejected": -1.8859007358551025, "logps/chosen": -84.4765625, "logps/rejected": -96.97213745117188, "loss": 0.3962, "rewards/accuracies": 0.75, "rewards/chosen": -0.8192216157913208, "rewards/margins": 1.7733218669891357, "rewards/rejected": -2.592543601989746, "step": 668 }, { "epoch": 1.15, "learning_rate": 4.844878878028049e-07, "logits/chosen": -2.218079090118408, "logits/rejected": -2.0827763080596924, "logps/chosen": -84.37413024902344, "logps/rejected": -90.60431671142578, "loss": 0.3631, "rewards/accuracies": 1.0, "rewards/chosen": -0.5891107320785522, "rewards/margins": 1.1035118103027344, "rewards/rejected": -1.6926226615905762, "step": 669 }, { "epoch": 1.15, "learning_rate": 4.843816404589885e-07, "logits/chosen": -2.3129634857177734, "logits/rejected": -1.82191801071167, "logps/chosen": -107.74067687988281, "logps/rejected": -111.96678161621094, "loss": 0.2922, "rewards/accuracies": 1.0, "rewards/chosen": -1.3356602191925049, "rewards/margins": 2.314061403274536, "rewards/rejected": -3.649721622467041, "step": 670 }, { "epoch": 1.15, "learning_rate": 4.842753931151721e-07, "logits/chosen": -2.2772574424743652, "logits/rejected": -2.0653324127197266, "logps/chosen": -94.73599243164062, "logps/rejected": -104.07823181152344, "loss": 0.3116, "rewards/accuracies": 1.0, "rewards/chosen": -1.2445068359375, "rewards/margins": 1.4021539688110352, "rewards/rejected": -2.6466610431671143, "step": 671 }, { "epoch": 1.16, "learning_rate": 4.841691457713557e-07, "logits/chosen": -2.2412796020507812, "logits/rejected": -2.0025038719177246, "logps/chosen": -90.33119201660156, "logps/rejected": -131.76589965820312, "loss": 0.2758, "rewards/accuracies": 1.0, "rewards/chosen": 0.6256393194198608, "rewards/margins": 5.783660888671875, "rewards/rejected": -5.158021450042725, "step": 672 }, { "epoch": 1.16, "learning_rate": 4.840628984275393e-07, "logits/chosen": -2.2104380130767822, "logits/rejected": -2.025857448577881, "logps/chosen": -127.77959442138672, "logps/rejected": -108.22772216796875, "loss": 0.5137, "rewards/accuracies": 0.5, "rewards/chosen": -0.9094429612159729, "rewards/margins": -0.5265403389930725, "rewards/rejected": -0.3829025626182556, "step": 673 }, { "epoch": 1.16, "learning_rate": 4.839566510837229e-07, "logits/chosen": -1.9365260601043701, "logits/rejected": -2.0879383087158203, "logps/chosen": -110.07140350341797, "logps/rejected": -121.02919006347656, "loss": 0.4649, "rewards/accuracies": 1.0, "rewards/chosen": -0.4987318515777588, "rewards/margins": 0.5169723033905029, "rewards/rejected": -1.0157041549682617, "step": 674 }, { "epoch": 1.16, "learning_rate": 4.838504037399065e-07, "logits/chosen": -2.1989755630493164, "logits/rejected": -1.88749361038208, "logps/chosen": -101.53152465820312, "logps/rejected": -98.91841125488281, "loss": 0.408, "rewards/accuracies": 0.5, "rewards/chosen": -0.957599937915802, "rewards/margins": 0.5466914176940918, "rewards/rejected": -1.504291296005249, "step": 675 }, { "epoch": 1.16, "learning_rate": 4.837441563960901e-07, "logits/chosen": -2.098203659057617, "logits/rejected": -1.8032631874084473, "logps/chosen": -92.60145568847656, "logps/rejected": -90.51913452148438, "loss": 0.3343, "rewards/accuracies": 0.5, "rewards/chosen": -0.24262523651123047, "rewards/margins": 0.5172216296195984, "rewards/rejected": -0.7598469257354736, "step": 676 }, { "epoch": 1.17, "learning_rate": 4.836379090522737e-07, "logits/chosen": -2.1662392616271973, "logits/rejected": -1.950233817100525, "logps/chosen": -82.45240783691406, "logps/rejected": -99.82024383544922, "loss": 0.2922, "rewards/accuracies": 1.0, "rewards/chosen": -0.4786239266395569, "rewards/margins": 3.261997699737549, "rewards/rejected": -3.740622043609619, "step": 677 }, { "epoch": 1.17, "learning_rate": 4.835316617084573e-07, "logits/chosen": -1.6343616247177124, "logits/rejected": -2.347435474395752, "logps/chosen": -78.37982177734375, "logps/rejected": -139.94265747070312, "loss": 0.3106, "rewards/accuracies": 1.0, "rewards/chosen": -0.3263772428035736, "rewards/margins": 3.3996100425720215, "rewards/rejected": -3.725987434387207, "step": 678 }, { "epoch": 1.17, "learning_rate": 4.834254143646409e-07, "logits/chosen": -1.9330461025238037, "logits/rejected": -2.2076256275177, "logps/chosen": -84.33647155761719, "logps/rejected": -102.1414794921875, "loss": 0.3786, "rewards/accuracies": 0.75, "rewards/chosen": -0.286677747964859, "rewards/margins": 0.9271394610404968, "rewards/rejected": -1.2138172388076782, "step": 679 }, { "epoch": 1.17, "learning_rate": 4.833191670208245e-07, "logits/chosen": -2.119662284851074, "logits/rejected": -2.0916907787323, "logps/chosen": -96.46263885498047, "logps/rejected": -110.57999420166016, "loss": 0.3657, "rewards/accuracies": 0.5, "rewards/chosen": -0.8215373754501343, "rewards/margins": 1.0382325649261475, "rewards/rejected": -1.8597699403762817, "step": 680 }, { "epoch": 1.17, "learning_rate": 4.832129196770081e-07, "logits/chosen": -2.242900848388672, "logits/rejected": -1.7734060287475586, "logps/chosen": -94.61457824707031, "logps/rejected": -104.5790786743164, "loss": 0.2677, "rewards/accuracies": 1.0, "rewards/chosen": -0.8944748640060425, "rewards/margins": 2.1303939819335938, "rewards/rejected": -3.024868965148926, "step": 681 }, { "epoch": 1.17, "learning_rate": 4.831066723331917e-07, "logits/chosen": -1.8662877082824707, "logits/rejected": -2.328122615814209, "logps/chosen": -76.09705352783203, "logps/rejected": -95.83872985839844, "loss": 0.5086, "rewards/accuracies": 1.0, "rewards/chosen": -0.03314056620001793, "rewards/margins": 0.9749530553817749, "rewards/rejected": -1.0080937147140503, "step": 682 }, { "epoch": 1.18, "learning_rate": 4.830004249893753e-07, "logits/chosen": -2.0219571590423584, "logits/rejected": -2.050227403640747, "logps/chosen": -53.03774642944336, "logps/rejected": -92.71237182617188, "loss": 0.3844, "rewards/accuracies": 1.0, "rewards/chosen": 0.44876348972320557, "rewards/margins": 3.7839858531951904, "rewards/rejected": -3.3352222442626953, "step": 683 }, { "epoch": 1.18, "learning_rate": 4.828941776455589e-07, "logits/chosen": -2.1762592792510986, "logits/rejected": -2.139624834060669, "logps/chosen": -69.18724060058594, "logps/rejected": -89.61228942871094, "loss": 0.4126, "rewards/accuracies": 0.75, "rewards/chosen": 0.3356474041938782, "rewards/margins": 0.8167150020599365, "rewards/rejected": -0.4810676574707031, "step": 684 }, { "epoch": 1.18, "learning_rate": 4.827879303017425e-07, "logits/chosen": -2.285463333129883, "logits/rejected": -1.2864660024642944, "logps/chosen": -119.6744384765625, "logps/rejected": -103.59671020507812, "loss": 0.338, "rewards/accuracies": 0.75, "rewards/chosen": -0.31850433349609375, "rewards/margins": 2.3606510162353516, "rewards/rejected": -2.6791553497314453, "step": 685 }, { "epoch": 1.18, "learning_rate": 4.826816829579261e-07, "logits/chosen": -2.0058083534240723, "logits/rejected": -1.775364875793457, "logps/chosen": -96.60879516601562, "logps/rejected": -101.94188690185547, "loss": 0.2486, "rewards/accuracies": 1.0, "rewards/chosen": -0.314115047454834, "rewards/margins": 1.614770531654358, "rewards/rejected": -1.928885579109192, "step": 686 }, { "epoch": 1.18, "learning_rate": 4.825754356141097e-07, "logits/chosen": -2.0671310424804688, "logits/rejected": -1.972970724105835, "logps/chosen": -88.69078063964844, "logps/rejected": -112.46223449707031, "loss": 0.3428, "rewards/accuracies": 0.5, "rewards/chosen": -1.3504524230957031, "rewards/margins": 1.5464166402816772, "rewards/rejected": -2.89686918258667, "step": 687 }, { "epoch": 1.18, "learning_rate": 4.824691882702932e-07, "logits/chosen": -2.3482301235198975, "logits/rejected": -2.1456923484802246, "logps/chosen": -87.697509765625, "logps/rejected": -97.13665771484375, "loss": 0.3558, "rewards/accuracies": 0.5, "rewards/chosen": -0.25383222103118896, "rewards/margins": 1.2920299768447876, "rewards/rejected": -1.5458621978759766, "step": 688 }, { "epoch": 1.19, "learning_rate": 4.823629409264769e-07, "logits/chosen": -1.9825994968414307, "logits/rejected": -2.33919620513916, "logps/chosen": -64.20309448242188, "logps/rejected": -111.70398712158203, "loss": 0.2876, "rewards/accuracies": 1.0, "rewards/chosen": 0.1084195151925087, "rewards/margins": 2.925471782684326, "rewards/rejected": -2.8170523643493652, "step": 689 }, { "epoch": 1.19, "learning_rate": 4.822566935826605e-07, "logits/chosen": -2.1619439125061035, "logits/rejected": -1.6982122659683228, "logps/chosen": -84.66236114501953, "logps/rejected": -95.5096206665039, "loss": 0.3597, "rewards/accuracies": 0.75, "rewards/chosen": -0.41161441802978516, "rewards/margins": 2.522167205810547, "rewards/rejected": -2.933781623840332, "step": 690 }, { "epoch": 1.19, "learning_rate": 4.82150446238844e-07, "logits/chosen": -1.9929783344268799, "logits/rejected": -2.018394947052002, "logps/chosen": -83.12251281738281, "logps/rejected": -102.74007415771484, "loss": 0.3474, "rewards/accuracies": 1.0, "rewards/chosen": 1.107894778251648, "rewards/margins": 2.9118354320526123, "rewards/rejected": -1.8039406538009644, "step": 691 }, { "epoch": 1.19, "learning_rate": 4.820441988950277e-07, "logits/chosen": -2.0258688926696777, "logits/rejected": -1.9350392818450928, "logps/chosen": -81.62889099121094, "logps/rejected": -118.65457153320312, "loss": 0.2836, "rewards/accuracies": 1.0, "rewards/chosen": 0.6272746920585632, "rewards/margins": 3.3316869735717773, "rewards/rejected": -2.7044124603271484, "step": 692 }, { "epoch": 1.19, "learning_rate": 4.819379515512112e-07, "logits/chosen": -2.11047101020813, "logits/rejected": -1.9627552032470703, "logps/chosen": -78.93498229980469, "logps/rejected": -93.32005310058594, "loss": 0.3502, "rewards/accuracies": 1.0, "rewards/chosen": 0.7614513635635376, "rewards/margins": 2.778179168701172, "rewards/rejected": -2.0167276859283447, "step": 693 }, { "epoch": 1.19, "learning_rate": 4.818317042073948e-07, "logits/chosen": -2.1365718841552734, "logits/rejected": -2.362072706222534, "logps/chosen": -102.44039154052734, "logps/rejected": -118.90583801269531, "loss": 0.3116, "rewards/accuracies": 1.0, "rewards/chosen": -0.6255109906196594, "rewards/margins": 2.3912808895111084, "rewards/rejected": -3.016791820526123, "step": 694 }, { "epoch": 1.2, "learning_rate": 4.817254568635784e-07, "logits/chosen": -2.121232509613037, "logits/rejected": -1.9897236824035645, "logps/chosen": -93.26466369628906, "logps/rejected": -99.11187744140625, "loss": 0.3824, "rewards/accuracies": 0.75, "rewards/chosen": 0.1099717915058136, "rewards/margins": 1.6361417770385742, "rewards/rejected": -1.526170015335083, "step": 695 }, { "epoch": 1.2, "learning_rate": 4.81619209519762e-07, "logits/chosen": -2.170029401779175, "logits/rejected": -2.1031579971313477, "logps/chosen": -82.271484375, "logps/rejected": -81.79934692382812, "loss": 0.3746, "rewards/accuracies": 1.0, "rewards/chosen": -0.2893667221069336, "rewards/margins": 0.45043298602104187, "rewards/rejected": -0.7397997379302979, "step": 696 }, { "epoch": 1.2, "learning_rate": 4.815129621759456e-07, "logits/chosen": -2.1429247856140137, "logits/rejected": -2.3142881393432617, "logps/chosen": -59.17033767700195, "logps/rejected": -98.93830871582031, "loss": 0.356, "rewards/accuracies": 1.0, "rewards/chosen": 0.4489712417125702, "rewards/margins": 1.9984607696533203, "rewards/rejected": -1.5494894981384277, "step": 697 }, { "epoch": 1.2, "learning_rate": 4.814067148321291e-07, "logits/chosen": -1.8854823112487793, "logits/rejected": -2.3977675437927246, "logps/chosen": -89.93265533447266, "logps/rejected": -122.74486541748047, "loss": 0.2118, "rewards/accuracies": 1.0, "rewards/chosen": -0.31598997116088867, "rewards/margins": 1.7175641059875488, "rewards/rejected": -2.0335540771484375, "step": 698 }, { "epoch": 1.2, "learning_rate": 4.813004674883127e-07, "logits/chosen": -2.0772814750671387, "logits/rejected": -2.1806108951568604, "logps/chosen": -71.84882354736328, "logps/rejected": -104.61508178710938, "loss": 0.2447, "rewards/accuracies": 1.0, "rewards/chosen": -0.5608676075935364, "rewards/margins": 3.7324328422546387, "rewards/rejected": -4.293300628662109, "step": 699 }, { "epoch": 1.2, "learning_rate": 4.811942201444963e-07, "logits/chosen": -2.1633002758026123, "logits/rejected": -2.134060859680176, "logps/chosen": -109.19174194335938, "logps/rejected": -120.50076293945312, "loss": 0.3308, "rewards/accuracies": 0.75, "rewards/chosen": -1.31356680393219, "rewards/margins": 1.8236963748931885, "rewards/rejected": -3.137263298034668, "step": 700 }, { "epoch": 1.21, "learning_rate": 4.810879728006799e-07, "logits/chosen": -2.04223895072937, "logits/rejected": -1.9969675540924072, "logps/chosen": -91.99449920654297, "logps/rejected": -99.0927963256836, "loss": 0.3616, "rewards/accuracies": 1.0, "rewards/chosen": -0.3695325553417206, "rewards/margins": 1.6029880046844482, "rewards/rejected": -1.9725204706192017, "step": 701 }, { "epoch": 1.21, "learning_rate": 4.809817254568635e-07, "logits/chosen": -1.9583239555358887, "logits/rejected": -2.167821168899536, "logps/chosen": -96.94189453125, "logps/rejected": -125.99488830566406, "loss": 0.4211, "rewards/accuracies": 0.5, "rewards/chosen": -0.5137927532196045, "rewards/margins": 1.9424370527267456, "rewards/rejected": -2.4562299251556396, "step": 702 }, { "epoch": 1.21, "learning_rate": 4.808754781130471e-07, "logits/chosen": -1.9728598594665527, "logits/rejected": -2.079941511154175, "logps/chosen": -77.01227569580078, "logps/rejected": -135.19207763671875, "loss": 0.437, "rewards/accuracies": 0.75, "rewards/chosen": 0.56580650806427, "rewards/margins": 3.167145252227783, "rewards/rejected": -2.6013388633728027, "step": 703 }, { "epoch": 1.21, "learning_rate": 4.807692307692307e-07, "logits/chosen": -2.0182926654815674, "logits/rejected": -1.805945873260498, "logps/chosen": -119.44200134277344, "logps/rejected": -131.0908203125, "loss": 0.3386, "rewards/accuracies": 1.0, "rewards/chosen": -1.0725563764572144, "rewards/margins": 2.328946590423584, "rewards/rejected": -3.401503324508667, "step": 704 }, { "epoch": 1.21, "learning_rate": 4.806629834254143e-07, "logits/chosen": -2.0509705543518066, "logits/rejected": -2.1717283725738525, "logps/chosen": -76.67585754394531, "logps/rejected": -106.07293701171875, "loss": 0.3238, "rewards/accuracies": 1.0, "rewards/chosen": -0.5847753286361694, "rewards/margins": 2.6268057823181152, "rewards/rejected": -3.211581230163574, "step": 705 }, { "epoch": 1.22, "learning_rate": 4.805567360815979e-07, "logits/chosen": -1.9505256414413452, "logits/rejected": -2.0064375400543213, "logps/chosen": -86.62202453613281, "logps/rejected": -118.95097351074219, "loss": 0.3633, "rewards/accuracies": 0.75, "rewards/chosen": 0.23489856719970703, "rewards/margins": 2.5192713737487793, "rewards/rejected": -2.284372568130493, "step": 706 }, { "epoch": 1.22, "learning_rate": 4.804504887377815e-07, "logits/chosen": -1.875123143196106, "logits/rejected": -1.832692265510559, "logps/chosen": -94.31217956542969, "logps/rejected": -116.93888092041016, "loss": 0.3286, "rewards/accuracies": 0.75, "rewards/chosen": -1.594738483428955, "rewards/margins": 1.7538166046142578, "rewards/rejected": -3.348555088043213, "step": 707 }, { "epoch": 1.22, "learning_rate": 4.803442413939651e-07, "logits/chosen": -2.2416515350341797, "logits/rejected": -2.0827457904815674, "logps/chosen": -90.92289733886719, "logps/rejected": -106.5976791381836, "loss": 0.3608, "rewards/accuracies": 0.5, "rewards/chosen": -0.6333627700805664, "rewards/margins": 1.3811041116714478, "rewards/rejected": -2.0144667625427246, "step": 708 }, { "epoch": 1.22, "learning_rate": 4.802379940501487e-07, "logits/chosen": -2.4481265544891357, "logits/rejected": -2.043635845184326, "logps/chosen": -89.06871032714844, "logps/rejected": -92.66923522949219, "loss": 0.3295, "rewards/accuracies": 0.5, "rewards/chosen": -0.24409770965576172, "rewards/margins": 1.4663922786712646, "rewards/rejected": -1.7104899883270264, "step": 709 }, { "epoch": 1.22, "learning_rate": 4.801317467063323e-07, "logits/chosen": -1.886213779449463, "logits/rejected": -2.2481441497802734, "logps/chosen": -90.62477111816406, "logps/rejected": -109.92082214355469, "loss": 0.3754, "rewards/accuracies": 1.0, "rewards/chosen": -0.01897639036178589, "rewards/margins": 1.6549358367919922, "rewards/rejected": -1.6739122867584229, "step": 710 }, { "epoch": 1.22, "learning_rate": 4.800254993625159e-07, "logits/chosen": -2.2619733810424805, "logits/rejected": -2.145735263824463, "logps/chosen": -77.45057678222656, "logps/rejected": -89.74235534667969, "loss": 0.391, "rewards/accuracies": 0.75, "rewards/chosen": -0.2589729130268097, "rewards/margins": 0.9553242921829224, "rewards/rejected": -1.2142971754074097, "step": 711 }, { "epoch": 1.23, "learning_rate": 4.799192520186995e-07, "logits/chosen": -1.8887460231781006, "logits/rejected": -2.01922345161438, "logps/chosen": -82.72781372070312, "logps/rejected": -131.00843811035156, "loss": 0.2398, "rewards/accuracies": 1.0, "rewards/chosen": -0.8047239780426025, "rewards/margins": 2.664059638977051, "rewards/rejected": -3.468783378601074, "step": 712 }, { "epoch": 1.23, "learning_rate": 4.798130046748831e-07, "logits/chosen": -2.3043980598449707, "logits/rejected": -2.150036334991455, "logps/chosen": -76.0661849975586, "logps/rejected": -98.88523864746094, "loss": 0.1933, "rewards/accuracies": 1.0, "rewards/chosen": 0.48173367977142334, "rewards/margins": 1.6484477519989014, "rewards/rejected": -1.1667139530181885, "step": 713 }, { "epoch": 1.23, "learning_rate": 4.797067573310667e-07, "logits/chosen": -2.196254014968872, "logits/rejected": -2.1489224433898926, "logps/chosen": -109.94187927246094, "logps/rejected": -141.72694396972656, "loss": 0.5548, "rewards/accuracies": 0.75, "rewards/chosen": -0.8207073211669922, "rewards/margins": 2.4528627395629883, "rewards/rejected": -3.2735700607299805, "step": 714 }, { "epoch": 1.23, "learning_rate": 4.796005099872503e-07, "logits/chosen": -2.106534481048584, "logits/rejected": -2.177173137664795, "logps/chosen": -69.21812438964844, "logps/rejected": -128.8088836669922, "loss": 0.2723, "rewards/accuracies": 1.0, "rewards/chosen": -0.46638625860214233, "rewards/margins": 4.192389965057373, "rewards/rejected": -4.65877628326416, "step": 715 }, { "epoch": 1.23, "learning_rate": 4.794942626434339e-07, "logits/chosen": -1.8220139741897583, "logits/rejected": -2.3463356494903564, "logps/chosen": -94.99176788330078, "logps/rejected": -132.8946533203125, "loss": 0.1932, "rewards/accuracies": 1.0, "rewards/chosen": -1.7236675024032593, "rewards/margins": 1.8164739608764648, "rewards/rejected": -3.5401411056518555, "step": 716 }, { "epoch": 1.23, "learning_rate": 4.793880152996175e-07, "logits/chosen": -1.9415974617004395, "logits/rejected": -2.122328519821167, "logps/chosen": -102.47084045410156, "logps/rejected": -116.1962890625, "loss": 0.3066, "rewards/accuracies": 0.5, "rewards/chosen": -0.057387739419937134, "rewards/margins": 1.8046398162841797, "rewards/rejected": -1.862027645111084, "step": 717 }, { "epoch": 1.24, "learning_rate": 4.792817679558011e-07, "logits/chosen": -2.214334011077881, "logits/rejected": -1.9299771785736084, "logps/chosen": -87.52626037597656, "logps/rejected": -108.91622924804688, "loss": 0.3456, "rewards/accuracies": 0.75, "rewards/chosen": 0.201978862285614, "rewards/margins": 2.7340807914733887, "rewards/rejected": -2.53210186958313, "step": 718 }, { "epoch": 1.24, "learning_rate": 4.791755206119847e-07, "logits/chosen": -1.9609510898590088, "logits/rejected": -1.9753072261810303, "logps/chosen": -80.13451385498047, "logps/rejected": -112.74825286865234, "loss": 0.2347, "rewards/accuracies": 1.0, "rewards/chosen": 0.44059476256370544, "rewards/margins": 3.9566757678985596, "rewards/rejected": -3.516080856323242, "step": 719 }, { "epoch": 1.24, "learning_rate": 4.790692732681682e-07, "logits/chosen": -2.2918176651000977, "logits/rejected": -2.1924476623535156, "logps/chosen": -81.47156524658203, "logps/rejected": -97.51315307617188, "loss": 0.2863, "rewards/accuracies": 0.75, "rewards/chosen": -0.40291109681129456, "rewards/margins": 1.7641695737838745, "rewards/rejected": -2.167080879211426, "step": 720 }, { "epoch": 1.24, "learning_rate": 4.789630259243519e-07, "logits/chosen": -2.1522908210754395, "logits/rejected": -2.03224515914917, "logps/chosen": -95.8009262084961, "logps/rejected": -117.71861267089844, "loss": 0.3814, "rewards/accuracies": 0.75, "rewards/chosen": -0.2082609236240387, "rewards/margins": 3.3333775997161865, "rewards/rejected": -3.5416383743286133, "step": 721 }, { "epoch": 1.24, "learning_rate": 4.788567785805354e-07, "logits/chosen": -2.0900626182556152, "logits/rejected": -2.004154920578003, "logps/chosen": -97.73823547363281, "logps/rejected": -117.56278991699219, "loss": 0.3543, "rewards/accuracies": 0.5, "rewards/chosen": -1.114595651626587, "rewards/margins": 1.899784803390503, "rewards/rejected": -3.01438045501709, "step": 722 }, { "epoch": 1.24, "learning_rate": 4.78750531236719e-07, "logits/chosen": -2.155860424041748, "logits/rejected": -2.0902533531188965, "logps/chosen": -114.38282012939453, "logps/rejected": -126.61927795410156, "loss": 0.2574, "rewards/accuracies": 0.5, "rewards/chosen": -1.0575892925262451, "rewards/margins": 1.2323863506317139, "rewards/rejected": -2.289975643157959, "step": 723 }, { "epoch": 1.25, "learning_rate": 4.786442838929027e-07, "logits/chosen": -2.200646162033081, "logits/rejected": -1.6165459156036377, "logps/chosen": -86.41215515136719, "logps/rejected": -104.97931671142578, "loss": 0.5011, "rewards/accuracies": 0.75, "rewards/chosen": 0.41736775636672974, "rewards/margins": 3.7972640991210938, "rewards/rejected": -3.3798961639404297, "step": 724 }, { "epoch": 1.25, "learning_rate": 4.785380365490862e-07, "logits/chosen": -2.2066144943237305, "logits/rejected": -2.050708293914795, "logps/chosen": -120.14286041259766, "logps/rejected": -116.0050048828125, "loss": 0.3364, "rewards/accuracies": 0.75, "rewards/chosen": -0.8509252667427063, "rewards/margins": 2.5000360012054443, "rewards/rejected": -3.350961208343506, "step": 725 }, { "epoch": 1.25, "learning_rate": 4.784317892052698e-07, "logits/chosen": -2.22343111038208, "logits/rejected": -2.017575979232788, "logps/chosen": -89.84767150878906, "logps/rejected": -125.05581665039062, "loss": 0.1898, "rewards/accuracies": 1.0, "rewards/chosen": -1.3268413543701172, "rewards/margins": 4.134633541107178, "rewards/rejected": -5.461474895477295, "step": 726 }, { "epoch": 1.25, "learning_rate": 4.783255418614534e-07, "logits/chosen": -2.007678747177124, "logits/rejected": -2.0488529205322266, "logps/chosen": -75.81016540527344, "logps/rejected": -116.16587829589844, "loss": 0.3463, "rewards/accuracies": 0.75, "rewards/chosen": -0.6087332963943481, "rewards/margins": 2.325683355331421, "rewards/rejected": -2.9344167709350586, "step": 727 }, { "epoch": 1.25, "learning_rate": 4.78219294517637e-07, "logits/chosen": -2.1289100646972656, "logits/rejected": -1.9561831951141357, "logps/chosen": -91.4811019897461, "logps/rejected": -117.3236083984375, "loss": 0.4691, "rewards/accuracies": 1.0, "rewards/chosen": 0.1914738565683365, "rewards/margins": 3.3819234371185303, "rewards/rejected": -3.1904494762420654, "step": 728 }, { "epoch": 1.25, "learning_rate": 4.781130471738206e-07, "logits/chosen": -1.7721903324127197, "logits/rejected": -2.18913197517395, "logps/chosen": -89.98455810546875, "logps/rejected": -114.28890991210938, "loss": 0.3396, "rewards/accuracies": 1.0, "rewards/chosen": -0.7554991245269775, "rewards/margins": 2.50016450881958, "rewards/rejected": -3.2556636333465576, "step": 729 }, { "epoch": 1.26, "learning_rate": 4.780067998300042e-07, "logits/chosen": -2.295992374420166, "logits/rejected": -1.998349666595459, "logps/chosen": -85.28636169433594, "logps/rejected": -120.79095458984375, "loss": 0.17, "rewards/accuracies": 1.0, "rewards/chosen": -0.7995792627334595, "rewards/margins": 4.000598430633545, "rewards/rejected": -4.800177574157715, "step": 730 }, { "epoch": 1.26, "learning_rate": 4.779005524861878e-07, "logits/chosen": -1.9573478698730469, "logits/rejected": -1.975958228111267, "logps/chosen": -76.69143676757812, "logps/rejected": -108.68922424316406, "loss": 0.2523, "rewards/accuracies": 1.0, "rewards/chosen": -0.587483286857605, "rewards/margins": 2.741835117340088, "rewards/rejected": -3.3293185234069824, "step": 731 }, { "epoch": 1.26, "learning_rate": 4.777943051423714e-07, "logits/chosen": -2.1198132038116455, "logits/rejected": -2.0389299392700195, "logps/chosen": -93.56902313232422, "logps/rejected": -112.39754486083984, "loss": 0.2665, "rewards/accuracies": 0.75, "rewards/chosen": -0.08182984590530396, "rewards/margins": 2.731783866882324, "rewards/rejected": -2.8136134147644043, "step": 732 }, { "epoch": 1.26, "learning_rate": 4.77688057798555e-07, "logits/chosen": -1.8559932708740234, "logits/rejected": -2.155824661254883, "logps/chosen": -79.46797180175781, "logps/rejected": -123.1733169555664, "loss": 0.3359, "rewards/accuracies": 1.0, "rewards/chosen": -1.0281847715377808, "rewards/margins": 2.5710902214050293, "rewards/rejected": -3.5992751121520996, "step": 733 }, { "epoch": 1.26, "learning_rate": 4.775818104547386e-07, "logits/chosen": -2.108649492263794, "logits/rejected": -2.0585532188415527, "logps/chosen": -69.27407836914062, "logps/rejected": -95.05677795410156, "loss": 0.2588, "rewards/accuracies": 0.75, "rewards/chosen": 0.7684585452079773, "rewards/margins": 2.3365774154663086, "rewards/rejected": -1.5681188106536865, "step": 734 }, { "epoch": 1.27, "learning_rate": 4.774755631109222e-07, "logits/chosen": -1.6007473468780518, "logits/rejected": -2.1496422290802, "logps/chosen": -93.39439392089844, "logps/rejected": -161.435302734375, "loss": 0.4338, "rewards/accuracies": 1.0, "rewards/chosen": -1.1127668619155884, "rewards/margins": 3.420746326446533, "rewards/rejected": -4.533513069152832, "step": 735 }, { "epoch": 1.27, "learning_rate": 4.773693157671058e-07, "logits/chosen": -1.726871132850647, "logits/rejected": -2.326369047164917, "logps/chosen": -80.25792694091797, "logps/rejected": -93.13948059082031, "loss": 0.2915, "rewards/accuracies": 1.0, "rewards/chosen": 0.37886038422584534, "rewards/margins": 0.9855567216873169, "rewards/rejected": -0.6066963076591492, "step": 736 }, { "epoch": 1.27, "learning_rate": 4.772630684232894e-07, "logits/chosen": -2.0771963596343994, "logits/rejected": -2.1207528114318848, "logps/chosen": -104.8944091796875, "logps/rejected": -110.88959503173828, "loss": 0.3896, "rewards/accuracies": 0.75, "rewards/chosen": -0.8028064966201782, "rewards/margins": 0.9730862975120544, "rewards/rejected": -1.775892734527588, "step": 737 }, { "epoch": 1.27, "learning_rate": 4.77156821079473e-07, "logits/chosen": -2.2459211349487305, "logits/rejected": -1.9983381032943726, "logps/chosen": -91.39535522460938, "logps/rejected": -106.40319061279297, "loss": 0.271, "rewards/accuracies": 0.75, "rewards/chosen": -0.49052125215530396, "rewards/margins": 1.9974627494812012, "rewards/rejected": -2.4879839420318604, "step": 738 }, { "epoch": 1.27, "learning_rate": 4.770505737356566e-07, "logits/chosen": -1.7444278001785278, "logits/rejected": -2.224273920059204, "logps/chosen": -94.07343292236328, "logps/rejected": -149.5501708984375, "loss": 0.3137, "rewards/accuracies": 0.75, "rewards/chosen": -1.4036476612091064, "rewards/margins": 2.843561887741089, "rewards/rejected": -4.247209548950195, "step": 739 }, { "epoch": 1.27, "learning_rate": 4.769443263918402e-07, "logits/chosen": -2.0421221256256104, "logits/rejected": -2.291238307952881, "logps/chosen": -81.80760955810547, "logps/rejected": -157.93588256835938, "loss": 0.335, "rewards/accuracies": 1.0, "rewards/chosen": -0.0791553258895874, "rewards/margins": 4.652152061462402, "rewards/rejected": -4.731307029724121, "step": 740 }, { "epoch": 1.28, "learning_rate": 4.768380790480238e-07, "logits/chosen": -2.077375650405884, "logits/rejected": -2.1020050048828125, "logps/chosen": -106.89456176757812, "logps/rejected": -146.3604736328125, "loss": 0.3889, "rewards/accuracies": 1.0, "rewards/chosen": -1.2908309698104858, "rewards/margins": 3.3971354961395264, "rewards/rejected": -4.687966346740723, "step": 741 }, { "epoch": 1.28, "learning_rate": 4.767318317042074e-07, "logits/chosen": -2.104273796081543, "logits/rejected": -2.2321267127990723, "logps/chosen": -88.1490478515625, "logps/rejected": -134.40232849121094, "loss": 0.3125, "rewards/accuracies": 1.0, "rewards/chosen": -0.768013596534729, "rewards/margins": 3.4230685234069824, "rewards/rejected": -4.191082000732422, "step": 742 }, { "epoch": 1.28, "learning_rate": 4.7662558436039094e-07, "logits/chosen": -1.8941800594329834, "logits/rejected": -2.300631523132324, "logps/chosen": -80.09276580810547, "logps/rejected": -114.43144989013672, "loss": 0.3066, "rewards/accuracies": 0.75, "rewards/chosen": -0.32692787051200867, "rewards/margins": 1.7763943672180176, "rewards/rejected": -2.1033222675323486, "step": 743 }, { "epoch": 1.28, "learning_rate": 4.7651933701657454e-07, "logits/chosen": -1.5682796239852905, "logits/rejected": -2.183913469314575, "logps/chosen": -91.6531982421875, "logps/rejected": -124.0700912475586, "loss": 0.5147, "rewards/accuracies": 0.75, "rewards/chosen": -1.0759847164154053, "rewards/margins": 1.9750587940216064, "rewards/rejected": -3.0510435104370117, "step": 744 }, { "epoch": 1.28, "learning_rate": 4.764130896727582e-07, "logits/chosen": -2.234700918197632, "logits/rejected": -2.0372722148895264, "logps/chosen": -105.2791976928711, "logps/rejected": -118.01606750488281, "loss": 0.3332, "rewards/accuracies": 1.0, "rewards/chosen": -0.2600778639316559, "rewards/margins": 2.4586920738220215, "rewards/rejected": -2.7187697887420654, "step": 745 }, { "epoch": 1.28, "learning_rate": 4.7630684232894174e-07, "logits/chosen": -1.921863079071045, "logits/rejected": -2.3393876552581787, "logps/chosen": -74.89517211914062, "logps/rejected": -131.22618103027344, "loss": 0.2247, "rewards/accuracies": 1.0, "rewards/chosen": 0.10886554419994354, "rewards/margins": 4.778721809387207, "rewards/rejected": -4.669856548309326, "step": 746 }, { "epoch": 1.29, "learning_rate": 4.7620059498512534e-07, "logits/chosen": -1.9021143913269043, "logits/rejected": -2.2958972454071045, "logps/chosen": -65.76040649414062, "logps/rejected": -122.68248748779297, "loss": 0.2102, "rewards/accuracies": 1.0, "rewards/chosen": 1.0859990119934082, "rewards/margins": 4.634239673614502, "rewards/rejected": -3.5482406616210938, "step": 747 }, { "epoch": 1.29, "learning_rate": 4.7609434764130894e-07, "logits/chosen": -2.1926751136779785, "logits/rejected": -2.014195442199707, "logps/chosen": -68.22224426269531, "logps/rejected": -105.94625854492188, "loss": 0.1561, "rewards/accuracies": 1.0, "rewards/chosen": -0.028242677450180054, "rewards/margins": 3.146028995513916, "rewards/rejected": -3.174271583557129, "step": 748 }, { "epoch": 1.29, "learning_rate": 4.7598810029749254e-07, "logits/chosen": -2.0332350730895996, "logits/rejected": -2.3001370429992676, "logps/chosen": -88.12110900878906, "logps/rejected": -114.88104248046875, "loss": 0.2614, "rewards/accuracies": 0.75, "rewards/chosen": -0.8686826825141907, "rewards/margins": 2.2355992794036865, "rewards/rejected": -3.1042819023132324, "step": 749 }, { "epoch": 1.29, "learning_rate": 4.7588185295367613e-07, "logits/chosen": -1.9174994230270386, "logits/rejected": -2.4503235816955566, "logps/chosen": -81.32412719726562, "logps/rejected": -138.16937255859375, "loss": 0.247, "rewards/accuracies": 1.0, "rewards/chosen": -1.0895015001296997, "rewards/margins": 4.616410732269287, "rewards/rejected": -5.705912113189697, "step": 750 }, { "epoch": 1.29, "learning_rate": 4.7577560560985973e-07, "logits/chosen": -2.180124282836914, "logits/rejected": -1.779600739479065, "logps/chosen": -102.96349334716797, "logps/rejected": -116.38108825683594, "loss": 0.1927, "rewards/accuracies": 1.0, "rewards/chosen": -0.6786879301071167, "rewards/margins": 2.764605760574341, "rewards/rejected": -3.443293571472168, "step": 751 }, { "epoch": 1.29, "learning_rate": 4.7566935826604333e-07, "logits/chosen": -1.9918525218963623, "logits/rejected": -2.172058582305908, "logps/chosen": -77.04158782958984, "logps/rejected": -120.09310913085938, "loss": 0.4291, "rewards/accuracies": 1.0, "rewards/chosen": -0.31587696075439453, "rewards/margins": 2.2774155139923096, "rewards/rejected": -2.593292236328125, "step": 752 }, { "epoch": 1.3, "learning_rate": 4.755631109222269e-07, "logits/chosen": -2.191479206085205, "logits/rejected": -2.1563024520874023, "logps/chosen": -86.9842300415039, "logps/rejected": -85.45609283447266, "loss": 0.3853, "rewards/accuracies": 0.5, "rewards/chosen": -0.015633970499038696, "rewards/margins": 1.0894396305084229, "rewards/rejected": -1.1050734519958496, "step": 753 }, { "epoch": 1.3, "learning_rate": 4.7545686357841053e-07, "logits/chosen": -2.0505106449127197, "logits/rejected": -1.9982712268829346, "logps/chosen": -99.17080688476562, "logps/rejected": -129.7220458984375, "loss": 0.462, "rewards/accuracies": 1.0, "rewards/chosen": -0.950097918510437, "rewards/margins": 3.4936330318450928, "rewards/rejected": -4.443731307983398, "step": 754 }, { "epoch": 1.3, "learning_rate": 4.7535061623459413e-07, "logits/chosen": -2.15004301071167, "logits/rejected": -2.10215425491333, "logps/chosen": -99.87909698486328, "logps/rejected": -116.31065368652344, "loss": 0.3773, "rewards/accuracies": 0.75, "rewards/chosen": -1.8361773490905762, "rewards/margins": 0.7878409624099731, "rewards/rejected": -2.6240181922912598, "step": 755 }, { "epoch": 1.3, "learning_rate": 4.7524436889077773e-07, "logits/chosen": -2.217040777206421, "logits/rejected": -2.210571527481079, "logps/chosen": -91.07682800292969, "logps/rejected": -119.3204345703125, "loss": 0.2829, "rewards/accuracies": 0.75, "rewards/chosen": -0.09293843805789948, "rewards/margins": 2.653402805328369, "rewards/rejected": -2.7463414669036865, "step": 756 }, { "epoch": 1.3, "learning_rate": 4.751381215469613e-07, "logits/chosen": -1.8803095817565918, "logits/rejected": -1.9249435663223267, "logps/chosen": -93.43287658691406, "logps/rejected": -108.04405975341797, "loss": 0.2475, "rewards/accuracies": 1.0, "rewards/chosen": -1.279079794883728, "rewards/margins": 2.3854165077209473, "rewards/rejected": -3.6644961833953857, "step": 757 }, { "epoch": 1.3, "learning_rate": 4.7503187420314487e-07, "logits/chosen": -1.8394728899002075, "logits/rejected": -2.1776137351989746, "logps/chosen": -96.64859008789062, "logps/rejected": -114.37386322021484, "loss": 0.3182, "rewards/accuracies": 0.75, "rewards/chosen": 0.07686404138803482, "rewards/margins": 0.7269386053085327, "rewards/rejected": -0.6500746011734009, "step": 758 }, { "epoch": 1.31, "learning_rate": 4.749256268593285e-07, "logits/chosen": -1.8911199569702148, "logits/rejected": -2.3075578212738037, "logps/chosen": -66.3844223022461, "logps/rejected": -130.9180450439453, "loss": 0.158, "rewards/accuracies": 1.0, "rewards/chosen": -1.1597099304199219, "rewards/margins": 4.51763916015625, "rewards/rejected": -5.677349090576172, "step": 759 }, { "epoch": 1.31, "learning_rate": 4.748193795155121e-07, "logits/chosen": -1.770997405052185, "logits/rejected": -2.192439317703247, "logps/chosen": -72.52777099609375, "logps/rejected": -122.74557495117188, "loss": 0.416, "rewards/accuracies": 1.0, "rewards/chosen": -0.12045037746429443, "rewards/margins": 3.595597267150879, "rewards/rejected": -3.716047763824463, "step": 760 }, { "epoch": 1.31, "learning_rate": 4.7471313217169567e-07, "logits/chosen": -2.191789150238037, "logits/rejected": -2.4182968139648438, "logps/chosen": -124.17311096191406, "logps/rejected": -148.86512756347656, "loss": 0.1688, "rewards/accuracies": 1.0, "rewards/chosen": -1.5344650745391846, "rewards/margins": 2.836343765258789, "rewards/rejected": -4.3708086013793945, "step": 761 }, { "epoch": 1.31, "learning_rate": 4.746068848278793e-07, "logits/chosen": -1.9137990474700928, "logits/rejected": -2.0449469089508057, "logps/chosen": -82.7076187133789, "logps/rejected": -144.87399291992188, "loss": 0.3178, "rewards/accuracies": 1.0, "rewards/chosen": -1.1820106506347656, "rewards/margins": 4.2014851570129395, "rewards/rejected": -5.383496284484863, "step": 762 }, { "epoch": 1.31, "learning_rate": 4.7450063748406287e-07, "logits/chosen": -2.05159330368042, "logits/rejected": -2.148456335067749, "logps/chosen": -119.0615234375, "logps/rejected": -151.51358032226562, "loss": 0.3278, "rewards/accuracies": 1.0, "rewards/chosen": -1.9647891521453857, "rewards/margins": 3.3677446842193604, "rewards/rejected": -5.332533836364746, "step": 763 }, { "epoch": 1.31, "learning_rate": 4.7439439014024647e-07, "logits/chosen": -2.1520938873291016, "logits/rejected": -2.02693247795105, "logps/chosen": -92.6317367553711, "logps/rejected": -133.8096160888672, "loss": 0.3447, "rewards/accuracies": 1.0, "rewards/chosen": -0.5204574465751648, "rewards/margins": 3.4733963012695312, "rewards/rejected": -3.9938535690307617, "step": 764 }, { "epoch": 1.32, "learning_rate": 4.7428814279643006e-07, "logits/chosen": -2.0005979537963867, "logits/rejected": -2.2055037021636963, "logps/chosen": -95.57167053222656, "logps/rejected": -155.2184600830078, "loss": 0.4172, "rewards/accuracies": 1.0, "rewards/chosen": -0.5102928280830383, "rewards/margins": 4.510280609130859, "rewards/rejected": -5.020573616027832, "step": 765 }, { "epoch": 1.32, "learning_rate": 4.7418189545261366e-07, "logits/chosen": -1.9348130226135254, "logits/rejected": -2.21453857421875, "logps/chosen": -73.29533386230469, "logps/rejected": -122.62945556640625, "loss": 0.2908, "rewards/accuracies": 0.75, "rewards/chosen": -0.32456153631210327, "rewards/margins": 3.762019395828247, "rewards/rejected": -4.086581230163574, "step": 766 }, { "epoch": 1.32, "learning_rate": 4.7407564810879726e-07, "logits/chosen": -2.0399348735809326, "logits/rejected": -2.0763096809387207, "logps/chosen": -104.80513763427734, "logps/rejected": -120.36578369140625, "loss": 0.3231, "rewards/accuracies": 0.5, "rewards/chosen": -1.3309961557388306, "rewards/margins": 2.8803019523620605, "rewards/rejected": -4.211297988891602, "step": 767 }, { "epoch": 1.32, "learning_rate": 4.7396940076498086e-07, "logits/chosen": -2.1333885192871094, "logits/rejected": -1.9847466945648193, "logps/chosen": -98.60061645507812, "logps/rejected": -115.3426284790039, "loss": 0.3525, "rewards/accuracies": 0.75, "rewards/chosen": -0.530582070350647, "rewards/margins": 2.799830436706543, "rewards/rejected": -3.3304126262664795, "step": 768 }, { "epoch": 1.32, "learning_rate": 4.7386315342116446e-07, "logits/chosen": -2.058281898498535, "logits/rejected": -1.9282395839691162, "logps/chosen": -113.93505096435547, "logps/rejected": -115.03221893310547, "loss": 0.3247, "rewards/accuracies": 0.75, "rewards/chosen": -2.121462106704712, "rewards/margins": 0.8083972930908203, "rewards/rejected": -2.9298593997955322, "step": 769 }, { "epoch": 1.33, "learning_rate": 4.73756906077348e-07, "logits/chosen": -2.2117512226104736, "logits/rejected": -2.1409244537353516, "logps/chosen": -83.81415557861328, "logps/rejected": -111.44168090820312, "loss": 0.2468, "rewards/accuracies": 1.0, "rewards/chosen": 0.06178150326013565, "rewards/margins": 3.0897016525268555, "rewards/rejected": -3.0279200077056885, "step": 770 }, { "epoch": 1.33, "learning_rate": 4.7365065873353166e-07, "logits/chosen": -2.0528669357299805, "logits/rejected": -2.074370861053467, "logps/chosen": -78.79794311523438, "logps/rejected": -102.8604736328125, "loss": 0.3405, "rewards/accuracies": 1.0, "rewards/chosen": -0.23658150434494019, "rewards/margins": 2.202390432357788, "rewards/rejected": -2.438971996307373, "step": 771 }, { "epoch": 1.33, "learning_rate": 4.7354441138971526e-07, "logits/chosen": -1.9338231086730957, "logits/rejected": -2.0993285179138184, "logps/chosen": -84.50227355957031, "logps/rejected": -95.83181762695312, "loss": 0.2611, "rewards/accuracies": 0.25, "rewards/chosen": -0.5045533180236816, "rewards/margins": 1.282928466796875, "rewards/rejected": -1.787481665611267, "step": 772 }, { "epoch": 1.33, "learning_rate": 4.734381640458988e-07, "logits/chosen": -2.177875518798828, "logits/rejected": -2.100064277648926, "logps/chosen": -111.21287536621094, "logps/rejected": -121.15481567382812, "loss": 0.2585, "rewards/accuracies": 1.0, "rewards/chosen": -0.8989176750183105, "rewards/margins": 2.474539279937744, "rewards/rejected": -3.3734567165374756, "step": 773 }, { "epoch": 1.33, "learning_rate": 4.7333191670208245e-07, "logits/chosen": -2.1386008262634277, "logits/rejected": -1.8123440742492676, "logps/chosen": -110.5674057006836, "logps/rejected": -128.87228393554688, "loss": 0.2075, "rewards/accuracies": 0.75, "rewards/chosen": -1.0594427585601807, "rewards/margins": 3.9776418209075928, "rewards/rejected": -5.037084579467773, "step": 774 }, { "epoch": 1.33, "learning_rate": 4.73225669358266e-07, "logits/chosen": -2.0535783767700195, "logits/rejected": -2.2538022994995117, "logps/chosen": -122.87268829345703, "logps/rejected": -125.99987030029297, "loss": 0.3829, "rewards/accuracies": 0.5, "rewards/chosen": -1.892814040184021, "rewards/margins": 0.595605731010437, "rewards/rejected": -2.488420009613037, "step": 775 }, { "epoch": 1.34, "learning_rate": 4.731194220144496e-07, "logits/chosen": -1.8358469009399414, "logits/rejected": -2.2732741832733154, "logps/chosen": -92.22583770751953, "logps/rejected": -121.58744049072266, "loss": 0.3105, "rewards/accuracies": 0.75, "rewards/chosen": -1.8967361450195312, "rewards/margins": 0.7453439235687256, "rewards/rejected": -2.642080068588257, "step": 776 }, { "epoch": 1.34, "learning_rate": 4.7301317467063325e-07, "logits/chosen": -2.0081353187561035, "logits/rejected": -2.115116596221924, "logps/chosen": -100.67283630371094, "logps/rejected": -143.60850524902344, "loss": 0.2427, "rewards/accuracies": 0.75, "rewards/chosen": -1.391235589981079, "rewards/margins": 3.883240222930908, "rewards/rejected": -5.274476051330566, "step": 777 }, { "epoch": 1.34, "learning_rate": 4.729069273268168e-07, "logits/chosen": -1.8389853239059448, "logits/rejected": -1.9539244174957275, "logps/chosen": -89.83482360839844, "logps/rejected": -101.65409851074219, "loss": 0.3815, "rewards/accuracies": 0.5, "rewards/chosen": -0.8952536582946777, "rewards/margins": 1.3752472400665283, "rewards/rejected": -2.270500659942627, "step": 778 }, { "epoch": 1.34, "learning_rate": 4.728006799830004e-07, "logits/chosen": -2.2232728004455566, "logits/rejected": -1.944028377532959, "logps/chosen": -85.70512390136719, "logps/rejected": -115.87130737304688, "loss": 0.2728, "rewards/accuracies": 0.75, "rewards/chosen": 0.42235469818115234, "rewards/margins": 4.241931438446045, "rewards/rejected": -3.8195767402648926, "step": 779 }, { "epoch": 1.34, "learning_rate": 4.72694432639184e-07, "logits/chosen": -2.2357840538024902, "logits/rejected": -2.0276293754577637, "logps/chosen": -111.92195129394531, "logps/rejected": -117.17117309570312, "loss": 0.27, "rewards/accuracies": 0.75, "rewards/chosen": -1.0398328304290771, "rewards/margins": 0.9656580686569214, "rewards/rejected": -2.005491018295288, "step": 780 }, { "epoch": 1.34, "learning_rate": 4.725881852953676e-07, "logits/chosen": -1.8674306869506836, "logits/rejected": -2.2744317054748535, "logps/chosen": -85.39826202392578, "logps/rejected": -124.73748016357422, "loss": 0.2064, "rewards/accuracies": 1.0, "rewards/chosen": -1.072021245956421, "rewards/margins": 2.547078847885132, "rewards/rejected": -3.619100570678711, "step": 781 }, { "epoch": 1.35, "learning_rate": 4.724819379515512e-07, "logits/chosen": -2.04443097114563, "logits/rejected": -1.6828447580337524, "logps/chosen": -130.03945922851562, "logps/rejected": -152.99972534179688, "loss": 0.2704, "rewards/accuracies": 1.0, "rewards/chosen": -0.8289319276809692, "rewards/margins": 4.005337715148926, "rewards/rejected": -4.834270000457764, "step": 782 }, { "epoch": 1.35, "learning_rate": 4.723756906077348e-07, "logits/chosen": -2.311429023742676, "logits/rejected": -2.105621337890625, "logps/chosen": -70.54222869873047, "logps/rejected": -123.68634033203125, "loss": 0.1999, "rewards/accuracies": 1.0, "rewards/chosen": -0.40434443950653076, "rewards/margins": 5.146947860717773, "rewards/rejected": -5.5512919425964355, "step": 783 }, { "epoch": 1.35, "learning_rate": 4.722694432639184e-07, "logits/chosen": -2.142853260040283, "logits/rejected": -2.171113967895508, "logps/chosen": -99.61249542236328, "logps/rejected": -142.75906372070312, "loss": 0.1846, "rewards/accuracies": 1.0, "rewards/chosen": -1.1479315757751465, "rewards/margins": 4.454901218414307, "rewards/rejected": -5.602832794189453, "step": 784 }, { "epoch": 1.35, "learning_rate": 4.7216319592010194e-07, "logits/chosen": -1.7749974727630615, "logits/rejected": -2.2523555755615234, "logps/chosen": -101.10009002685547, "logps/rejected": -130.761474609375, "loss": 0.242, "rewards/accuracies": 0.75, "rewards/chosen": -1.64035964012146, "rewards/margins": 1.092166543006897, "rewards/rejected": -2.7325260639190674, "step": 785 }, { "epoch": 1.35, "learning_rate": 4.720569485762856e-07, "logits/chosen": -2.0212512016296387, "logits/rejected": -2.1310133934020996, "logps/chosen": -80.30370330810547, "logps/rejected": -152.80308532714844, "loss": 0.3198, "rewards/accuracies": 1.0, "rewards/chosen": -0.47174280881881714, "rewards/margins": 6.346031665802002, "rewards/rejected": -6.817774772644043, "step": 786 }, { "epoch": 1.35, "learning_rate": 4.719507012324692e-07, "logits/chosen": -2.1705048084259033, "logits/rejected": -2.051675796508789, "logps/chosen": -144.79483032226562, "logps/rejected": -128.9705810546875, "loss": 0.3276, "rewards/accuracies": 0.5, "rewards/chosen": -4.7220611572265625, "rewards/margins": -0.01808112859725952, "rewards/rejected": -4.703979969024658, "step": 787 }, { "epoch": 1.36, "learning_rate": 4.7184445388865273e-07, "logits/chosen": -1.950049638748169, "logits/rejected": -2.074049949645996, "logps/chosen": -80.0118408203125, "logps/rejected": -114.33149719238281, "loss": 0.2747, "rewards/accuracies": 1.0, "rewards/chosen": -0.7190504670143127, "rewards/margins": 3.945542335510254, "rewards/rejected": -4.66459321975708, "step": 788 }, { "epoch": 1.36, "learning_rate": 4.717382065448364e-07, "logits/chosen": -2.1330111026763916, "logits/rejected": -2.1058731079101562, "logps/chosen": -87.29986572265625, "logps/rejected": -99.58580017089844, "loss": 0.3395, "rewards/accuracies": 0.5, "rewards/chosen": -1.2942850589752197, "rewards/margins": 0.7757183313369751, "rewards/rejected": -2.0700032711029053, "step": 789 }, { "epoch": 1.36, "learning_rate": 4.7163195920101993e-07, "logits/chosen": -2.0336034297943115, "logits/rejected": -2.220017671585083, "logps/chosen": -92.96453857421875, "logps/rejected": -140.68994140625, "loss": 0.2911, "rewards/accuracies": 1.0, "rewards/chosen": -0.6331065893173218, "rewards/margins": 3.286970615386963, "rewards/rejected": -3.920077085494995, "step": 790 }, { "epoch": 1.36, "learning_rate": 4.7152571185720353e-07, "logits/chosen": -2.0898759365081787, "logits/rejected": -2.2476003170013428, "logps/chosen": -125.0320053100586, "logps/rejected": -138.9166717529297, "loss": 0.3969, "rewards/accuracies": 0.75, "rewards/chosen": -1.820817470550537, "rewards/margins": 1.7248592376708984, "rewards/rejected": -3.5456764698028564, "step": 791 }, { "epoch": 1.36, "learning_rate": 4.7141946451338713e-07, "logits/chosen": -2.201338529586792, "logits/rejected": -2.266319990158081, "logps/chosen": -120.81803894042969, "logps/rejected": -168.87835693359375, "loss": 0.2438, "rewards/accuracies": 1.0, "rewards/chosen": -1.04768967628479, "rewards/margins": 3.957186222076416, "rewards/rejected": -5.004876136779785, "step": 792 }, { "epoch": 1.36, "learning_rate": 4.7131321716957073e-07, "logits/chosen": -2.0129616260528564, "logits/rejected": -2.3072800636291504, "logps/chosen": -78.7962417602539, "logps/rejected": -125.08082580566406, "loss": 0.233, "rewards/accuracies": 1.0, "rewards/chosen": -0.45193424820899963, "rewards/margins": 4.101518154144287, "rewards/rejected": -4.553452491760254, "step": 793 }, { "epoch": 1.37, "learning_rate": 4.7120696982575433e-07, "logits/chosen": -2.332915782928467, "logits/rejected": -2.160299301147461, "logps/chosen": -83.68456268310547, "logps/rejected": -107.35503387451172, "loss": 0.4977, "rewards/accuracies": 1.0, "rewards/chosen": -0.3514341413974762, "rewards/margins": 3.340473175048828, "rewards/rejected": -3.6919071674346924, "step": 794 }, { "epoch": 1.37, "learning_rate": 4.711007224819379e-07, "logits/chosen": -1.6761648654937744, "logits/rejected": -2.121250629425049, "logps/chosen": -61.36069869995117, "logps/rejected": -141.84927368164062, "loss": 0.346, "rewards/accuracies": 1.0, "rewards/chosen": 0.6050518751144409, "rewards/margins": 5.663084983825684, "rewards/rejected": -5.058032989501953, "step": 795 }, { "epoch": 1.37, "learning_rate": 4.709944751381215e-07, "logits/chosen": -2.226142168045044, "logits/rejected": -1.8805713653564453, "logps/chosen": -79.2366943359375, "logps/rejected": -110.2385482788086, "loss": 0.2539, "rewards/accuracies": 1.0, "rewards/chosen": -0.0634777694940567, "rewards/margins": 4.543262958526611, "rewards/rejected": -4.606740951538086, "step": 796 }, { "epoch": 1.37, "learning_rate": 4.708882277943051e-07, "logits/chosen": -2.2994544506073, "logits/rejected": -2.208983898162842, "logps/chosen": -74.01708984375, "logps/rejected": -95.59684753417969, "loss": 0.3573, "rewards/accuracies": 0.75, "rewards/chosen": -0.5341781973838806, "rewards/margins": 0.9186468124389648, "rewards/rejected": -1.4528249502182007, "step": 797 }, { "epoch": 1.37, "learning_rate": 4.707819804504887e-07, "logits/chosen": -2.1122260093688965, "logits/rejected": -2.3567118644714355, "logps/chosen": -93.35868835449219, "logps/rejected": -116.1565170288086, "loss": 0.3998, "rewards/accuracies": 0.5, "rewards/chosen": -0.7411931753158569, "rewards/margins": 1.1850064992904663, "rewards/rejected": -1.9261997938156128, "step": 798 }, { "epoch": 1.38, "learning_rate": 4.706757331066723e-07, "logits/chosen": -1.9908515214920044, "logits/rejected": -2.2228193283081055, "logps/chosen": -86.02813720703125, "logps/rejected": -137.84918212890625, "loss": 0.3598, "rewards/accuracies": 1.0, "rewards/chosen": -0.04976823925971985, "rewards/margins": 4.731438159942627, "rewards/rejected": -4.7812066078186035, "step": 799 }, { "epoch": 1.38, "learning_rate": 4.705694857628559e-07, "logits/chosen": -2.1115262508392334, "logits/rejected": -2.386942148208618, "logps/chosen": -95.98838806152344, "logps/rejected": -97.25399780273438, "loss": 0.3341, "rewards/accuracies": 1.0, "rewards/chosen": -0.7013254761695862, "rewards/margins": 0.5979780554771423, "rewards/rejected": -1.2993035316467285, "step": 800 }, { "epoch": 1.38, "learning_rate": 4.704632384190395e-07, "logits/chosen": -2.0244972705841064, "logits/rejected": -2.1170623302459717, "logps/chosen": -72.17842102050781, "logps/rejected": -135.76010131835938, "loss": 0.2708, "rewards/accuracies": 1.0, "rewards/chosen": 0.09817084670066833, "rewards/margins": 4.943821430206299, "rewards/rejected": -4.845650672912598, "step": 801 }, { "epoch": 1.38, "learning_rate": 4.7035699107522307e-07, "logits/chosen": -2.171250820159912, "logits/rejected": -2.0972254276275635, "logps/chosen": -77.68852233886719, "logps/rejected": -111.27090454101562, "loss": 0.2089, "rewards/accuracies": 1.0, "rewards/chosen": -0.24390888214111328, "rewards/margins": 3.020535707473755, "rewards/rejected": -3.2644448280334473, "step": 802 }, { "epoch": 1.38, "learning_rate": 4.702507437314067e-07, "logits/chosen": -2.368718385696411, "logits/rejected": -2.4289767742156982, "logps/chosen": -106.07371520996094, "logps/rejected": -143.46527099609375, "loss": 0.3932, "rewards/accuracies": 1.0, "rewards/chosen": -1.3523614406585693, "rewards/margins": 3.2535486221313477, "rewards/rejected": -4.605910301208496, "step": 803 }, { "epoch": 1.38, "learning_rate": 4.701444963875903e-07, "logits/chosen": -2.270641565322876, "logits/rejected": -2.318087100982666, "logps/chosen": -116.23155212402344, "logps/rejected": -141.58697509765625, "loss": 0.3207, "rewards/accuracies": 1.0, "rewards/chosen": -1.1845805644989014, "rewards/margins": 1.9321012496948242, "rewards/rejected": -3.1166820526123047, "step": 804 }, { "epoch": 1.39, "learning_rate": 4.7003824904377386e-07, "logits/chosen": -1.7016398906707764, "logits/rejected": -2.202625274658203, "logps/chosen": -91.22312927246094, "logps/rejected": -116.51516723632812, "loss": 0.3759, "rewards/accuracies": 0.75, "rewards/chosen": -0.391705721616745, "rewards/margins": 0.5908358097076416, "rewards/rejected": -0.9825415015220642, "step": 805 }, { "epoch": 1.39, "learning_rate": 4.699320016999575e-07, "logits/chosen": -1.973920226097107, "logits/rejected": -2.172569513320923, "logps/chosen": -86.73645782470703, "logps/rejected": -105.33568572998047, "loss": 0.2452, "rewards/accuracies": 0.75, "rewards/chosen": -0.9112845063209534, "rewards/margins": 1.0119001865386963, "rewards/rejected": -1.923184871673584, "step": 806 }, { "epoch": 1.39, "learning_rate": 4.6982575435614106e-07, "logits/chosen": -1.7684962749481201, "logits/rejected": -2.130164623260498, "logps/chosen": -74.8687515258789, "logps/rejected": -129.82492065429688, "loss": 0.2975, "rewards/accuracies": 1.0, "rewards/chosen": 0.41936078667640686, "rewards/margins": 4.212008476257324, "rewards/rejected": -3.792647361755371, "step": 807 }, { "epoch": 1.39, "learning_rate": 4.6971950701232466e-07, "logits/chosen": -2.247795820236206, "logits/rejected": -2.1778321266174316, "logps/chosen": -105.00173950195312, "logps/rejected": -128.99273681640625, "loss": 0.4514, "rewards/accuracies": 0.75, "rewards/chosen": -0.6340402960777283, "rewards/margins": 2.498102903366089, "rewards/rejected": -3.132143259048462, "step": 808 }, { "epoch": 1.39, "learning_rate": 4.696132596685083e-07, "logits/chosen": -2.1460866928100586, "logits/rejected": -2.19999098777771, "logps/chosen": -93.12828063964844, "logps/rejected": -97.59176635742188, "loss": 0.4332, "rewards/accuracies": 0.75, "rewards/chosen": -1.360403060913086, "rewards/margins": -0.025648802518844604, "rewards/rejected": -1.334754228591919, "step": 809 }, { "epoch": 1.39, "learning_rate": 4.6950701232469186e-07, "logits/chosen": -2.0206210613250732, "logits/rejected": -2.2186691761016846, "logps/chosen": -62.459632873535156, "logps/rejected": -117.50210571289062, "loss": 0.303, "rewards/accuracies": 1.0, "rewards/chosen": 0.4799482226371765, "rewards/margins": 5.367826461791992, "rewards/rejected": -4.88787841796875, "step": 810 }, { "epoch": 1.4, "learning_rate": 4.6940076498087546e-07, "logits/chosen": -1.6104354858398438, "logits/rejected": -2.0257086753845215, "logps/chosen": -83.61959838867188, "logps/rejected": -127.22245788574219, "loss": 0.2972, "rewards/accuracies": 1.0, "rewards/chosen": -0.8528180718421936, "rewards/margins": 1.488041639328003, "rewards/rejected": -2.3408596515655518, "step": 811 }, { "epoch": 1.4, "learning_rate": 4.6929451763705905e-07, "logits/chosen": -2.186101198196411, "logits/rejected": -1.9637658596038818, "logps/chosen": -108.60356140136719, "logps/rejected": -122.78714752197266, "loss": 0.2897, "rewards/accuracies": 0.75, "rewards/chosen": 0.254189670085907, "rewards/margins": 3.5137245655059814, "rewards/rejected": -3.2595348358154297, "step": 812 }, { "epoch": 1.4, "learning_rate": 4.6918827029324265e-07, "logits/chosen": -2.1580867767333984, "logits/rejected": -2.1459875106811523, "logps/chosen": -83.09822082519531, "logps/rejected": -126.35084533691406, "loss": 0.197, "rewards/accuracies": 1.0, "rewards/chosen": -0.32887548208236694, "rewards/margins": 4.14999532699585, "rewards/rejected": -4.478870868682861, "step": 813 }, { "epoch": 1.4, "learning_rate": 4.6908202294942625e-07, "logits/chosen": -1.759993076324463, "logits/rejected": -2.3576583862304688, "logps/chosen": -66.72000885009766, "logps/rejected": -118.79752349853516, "loss": 0.3143, "rewards/accuracies": 1.0, "rewards/chosen": 0.2595990002155304, "rewards/margins": 4.017491340637207, "rewards/rejected": -3.757892608642578, "step": 814 }, { "epoch": 1.4, "learning_rate": 4.6897577560560985e-07, "logits/chosen": -1.9869205951690674, "logits/rejected": -2.2777533531188965, "logps/chosen": -72.0634994506836, "logps/rejected": -120.60531616210938, "loss": 0.2465, "rewards/accuracies": 1.0, "rewards/chosen": -0.2831905484199524, "rewards/margins": 4.178705215454102, "rewards/rejected": -4.461895942687988, "step": 815 }, { "epoch": 1.4, "learning_rate": 4.6886952826179345e-07, "logits/chosen": -1.8104525804519653, "logits/rejected": -2.2203216552734375, "logps/chosen": -88.93522644042969, "logps/rejected": -105.00170135498047, "loss": 0.2759, "rewards/accuracies": 0.75, "rewards/chosen": -0.5770829319953918, "rewards/margins": 1.71262788772583, "rewards/rejected": -2.289710760116577, "step": 816 }, { "epoch": 1.41, "learning_rate": 4.68763280917977e-07, "logits/chosen": -2.147665500640869, "logits/rejected": -2.12351131439209, "logps/chosen": -90.9544677734375, "logps/rejected": -129.64993286132812, "loss": 0.3987, "rewards/accuracies": 0.75, "rewards/chosen": -0.9709674119949341, "rewards/margins": 3.4639334678649902, "rewards/rejected": -4.434901237487793, "step": 817 }, { "epoch": 1.41, "learning_rate": 4.6865703357416065e-07, "logits/chosen": -1.8263800144195557, "logits/rejected": -2.2345733642578125, "logps/chosen": -63.918495178222656, "logps/rejected": -119.22048950195312, "loss": 0.2378, "rewards/accuracies": 0.75, "rewards/chosen": 0.15051805973052979, "rewards/margins": 3.7065048217773438, "rewards/rejected": -3.5559866428375244, "step": 818 }, { "epoch": 1.41, "learning_rate": 4.685507862303442e-07, "logits/chosen": -1.8576185703277588, "logits/rejected": -2.149545669555664, "logps/chosen": -70.23859405517578, "logps/rejected": -107.21131134033203, "loss": 0.352, "rewards/accuracies": 1.0, "rewards/chosen": -0.8844295740127563, "rewards/margins": 2.5314934253692627, "rewards/rejected": -3.4159228801727295, "step": 819 }, { "epoch": 1.41, "learning_rate": 4.684445388865278e-07, "logits/chosen": -1.846128225326538, "logits/rejected": -2.1961488723754883, "logps/chosen": -90.76329040527344, "logps/rejected": -116.58509063720703, "loss": 0.3179, "rewards/accuracies": 1.0, "rewards/chosen": -0.8498448133468628, "rewards/margins": 1.9213690757751465, "rewards/rejected": -2.771214008331299, "step": 820 }, { "epoch": 1.41, "learning_rate": 4.6833829154271144e-07, "logits/chosen": -2.232646942138672, "logits/rejected": -2.169658899307251, "logps/chosen": -89.24159240722656, "logps/rejected": -106.56913757324219, "loss": 0.1505, "rewards/accuracies": 1.0, "rewards/chosen": -0.008467569947242737, "rewards/margins": 1.839734435081482, "rewards/rejected": -1.8482019901275635, "step": 821 }, { "epoch": 1.41, "learning_rate": 4.68232044198895e-07, "logits/chosen": -2.183835744857788, "logits/rejected": -2.119248390197754, "logps/chosen": -103.0247573852539, "logps/rejected": -139.2760009765625, "loss": 0.3026, "rewards/accuracies": 1.0, "rewards/chosen": 0.10635700821876526, "rewards/margins": 3.7093698978424072, "rewards/rejected": -3.603013038635254, "step": 822 }, { "epoch": 1.42, "learning_rate": 4.681257968550786e-07, "logits/chosen": -2.383193254470825, "logits/rejected": -2.1170296669006348, "logps/chosen": -101.71510314941406, "logps/rejected": -120.86396789550781, "loss": 0.2677, "rewards/accuracies": 1.0, "rewards/chosen": -0.8116683959960938, "rewards/margins": 3.8739123344421387, "rewards/rejected": -4.685581207275391, "step": 823 }, { "epoch": 1.42, "learning_rate": 4.680195495112622e-07, "logits/chosen": -1.9408609867095947, "logits/rejected": -2.1861259937286377, "logps/chosen": -70.49440002441406, "logps/rejected": -119.7754135131836, "loss": 0.1953, "rewards/accuracies": 0.75, "rewards/chosen": 0.19652968645095825, "rewards/margins": 4.109028339385986, "rewards/rejected": -3.9124984741210938, "step": 824 }, { "epoch": 1.42, "learning_rate": 4.679133021674458e-07, "logits/chosen": -2.1555299758911133, "logits/rejected": -2.339061975479126, "logps/chosen": -87.67973327636719, "logps/rejected": -134.21473693847656, "loss": 0.2222, "rewards/accuracies": 1.0, "rewards/chosen": 0.07233960926532745, "rewards/margins": 4.811895847320557, "rewards/rejected": -4.739556312561035, "step": 825 }, { "epoch": 1.42, "learning_rate": 4.678070548236294e-07, "logits/chosen": -1.946864128112793, "logits/rejected": -2.2741618156433105, "logps/chosen": -93.62503814697266, "logps/rejected": -103.24435424804688, "loss": 0.3775, "rewards/accuracies": 0.75, "rewards/chosen": -0.21284180879592896, "rewards/margins": 0.9884802103042603, "rewards/rejected": -1.2013219594955444, "step": 826 }, { "epoch": 1.42, "learning_rate": 4.67700807479813e-07, "logits/chosen": -2.2709603309631348, "logits/rejected": -1.914788842201233, "logps/chosen": -81.14702606201172, "logps/rejected": -118.20480346679688, "loss": 0.195, "rewards/accuracies": 1.0, "rewards/chosen": 0.36826905608177185, "rewards/margins": 4.277584075927734, "rewards/rejected": -3.9093148708343506, "step": 827 }, { "epoch": 1.43, "learning_rate": 4.675945601359966e-07, "logits/chosen": -1.7718979120254517, "logits/rejected": -2.222641706466675, "logps/chosen": -58.20064926147461, "logps/rejected": -112.91838073730469, "loss": 0.4409, "rewards/accuracies": 0.75, "rewards/chosen": -0.013154864311218262, "rewards/margins": 3.523329496383667, "rewards/rejected": -3.5364840030670166, "step": 828 }, { "epoch": 1.43, "learning_rate": 4.6748831279218013e-07, "logits/chosen": -2.1758437156677246, "logits/rejected": -1.8589378595352173, "logps/chosen": -103.94061279296875, "logps/rejected": -145.11825561523438, "loss": 0.3176, "rewards/accuracies": 1.0, "rewards/chosen": -0.8964715003967285, "rewards/margins": 2.976271152496338, "rewards/rejected": -3.8727426528930664, "step": 829 }, { "epoch": 1.43, "learning_rate": 4.673820654483638e-07, "logits/chosen": -2.1325154304504395, "logits/rejected": -2.1147189140319824, "logps/chosen": -87.15997314453125, "logps/rejected": -125.17687225341797, "loss": 0.3489, "rewards/accuracies": 0.75, "rewards/chosen": -0.9474244713783264, "rewards/margins": 3.7983169555664062, "rewards/rejected": -4.745741844177246, "step": 830 }, { "epoch": 1.43, "learning_rate": 4.672758181045474e-07, "logits/chosen": -2.204935073852539, "logits/rejected": -2.205902576446533, "logps/chosen": -89.67293548583984, "logps/rejected": -134.04275512695312, "loss": 0.2538, "rewards/accuracies": 1.0, "rewards/chosen": -0.32942700386047363, "rewards/margins": 3.0242919921875, "rewards/rejected": -3.3537187576293945, "step": 831 }, { "epoch": 1.43, "learning_rate": 4.671695707607309e-07, "logits/chosen": -2.0411200523376465, "logits/rejected": -2.121159076690674, "logps/chosen": -84.58128356933594, "logps/rejected": -98.7540512084961, "loss": 0.1755, "rewards/accuracies": 0.75, "rewards/chosen": -0.032872676849365234, "rewards/margins": 1.8798866271972656, "rewards/rejected": -1.9127591848373413, "step": 832 }, { "epoch": 1.43, "learning_rate": 4.670633234169146e-07, "logits/chosen": -2.241058349609375, "logits/rejected": -1.9569183588027954, "logps/chosen": -99.47389221191406, "logps/rejected": -113.53656005859375, "loss": 0.2828, "rewards/accuracies": 1.0, "rewards/chosen": -0.8020146489143372, "rewards/margins": 2.7996883392333984, "rewards/rejected": -3.60170316696167, "step": 833 }, { "epoch": 1.44, "learning_rate": 4.669570760730981e-07, "logits/chosen": -2.066117525100708, "logits/rejected": -2.1686956882476807, "logps/chosen": -78.69686126708984, "logps/rejected": -102.35841369628906, "loss": 0.376, "rewards/accuracies": 1.0, "rewards/chosen": -0.2218775749206543, "rewards/margins": 1.0338765382766724, "rewards/rejected": -1.2557541131973267, "step": 834 }, { "epoch": 1.44, "learning_rate": 4.668508287292817e-07, "logits/chosen": -1.9225285053253174, "logits/rejected": -2.2323598861694336, "logps/chosen": -86.53520202636719, "logps/rejected": -127.55171203613281, "loss": 0.257, "rewards/accuracies": 1.0, "rewards/chosen": -0.571933388710022, "rewards/margins": 1.9381792545318604, "rewards/rejected": -2.510112762451172, "step": 835 }, { "epoch": 1.44, "learning_rate": 4.667445813854654e-07, "logits/chosen": -2.087843656539917, "logits/rejected": -2.2340610027313232, "logps/chosen": -77.38982391357422, "logps/rejected": -113.57910919189453, "loss": 0.1816, "rewards/accuracies": 0.75, "rewards/chosen": -0.24474845826625824, "rewards/margins": 2.9510884284973145, "rewards/rejected": -3.1958367824554443, "step": 836 }, { "epoch": 1.44, "learning_rate": 4.666383340416489e-07, "logits/chosen": -1.7600808143615723, "logits/rejected": -2.286944627761841, "logps/chosen": -70.21973419189453, "logps/rejected": -129.4042205810547, "loss": 0.2387, "rewards/accuracies": 0.75, "rewards/chosen": 0.051592037081718445, "rewards/margins": 3.6508724689483643, "rewards/rejected": -3.59928035736084, "step": 837 }, { "epoch": 1.44, "learning_rate": 4.665320866978325e-07, "logits/chosen": -1.9030144214630127, "logits/rejected": -1.9448323249816895, "logps/chosen": -88.047119140625, "logps/rejected": -99.36058807373047, "loss": 0.2533, "rewards/accuracies": 0.5, "rewards/chosen": -0.4177219569683075, "rewards/margins": 1.4095875024795532, "rewards/rejected": -1.8273093700408936, "step": 838 }, { "epoch": 1.44, "learning_rate": 4.664258393540161e-07, "logits/chosen": -2.212702751159668, "logits/rejected": -2.065765857696533, "logps/chosen": -97.19845581054688, "logps/rejected": -131.91531372070312, "loss": 0.2885, "rewards/accuracies": 1.0, "rewards/chosen": -0.1139310970902443, "rewards/margins": 3.900620460510254, "rewards/rejected": -4.014551639556885, "step": 839 }, { "epoch": 1.45, "learning_rate": 4.663195920101997e-07, "logits/chosen": -2.1728572845458984, "logits/rejected": -2.0963282585144043, "logps/chosen": -91.79098510742188, "logps/rejected": -113.16072082519531, "loss": 0.2617, "rewards/accuracies": 0.75, "rewards/chosen": -0.9125937223434448, "rewards/margins": 1.7126734256744385, "rewards/rejected": -2.6252670288085938, "step": 840 }, { "epoch": 1.45, "learning_rate": 4.6621334466638337e-07, "logits/chosen": -2.2386884689331055, "logits/rejected": -2.2313857078552246, "logps/chosen": -84.07662200927734, "logps/rejected": -89.55663299560547, "loss": 0.335, "rewards/accuracies": 0.75, "rewards/chosen": -0.8895992040634155, "rewards/margins": 0.8061164617538452, "rewards/rejected": -1.6957156658172607, "step": 841 }, { "epoch": 1.45, "learning_rate": 4.661070973225669e-07, "logits/chosen": -1.9151356220245361, "logits/rejected": -2.009948492050171, "logps/chosen": -83.95774841308594, "logps/rejected": -104.39268493652344, "loss": 0.1756, "rewards/accuracies": 0.75, "rewards/chosen": 0.0630025565624237, "rewards/margins": 2.2979352474212646, "rewards/rejected": -2.2349324226379395, "step": 842 }, { "epoch": 1.45, "learning_rate": 4.660008499787505e-07, "logits/chosen": -2.1157162189483643, "logits/rejected": -2.1919591426849365, "logps/chosen": -102.18728637695312, "logps/rejected": -136.36322021484375, "loss": 0.2608, "rewards/accuracies": 0.75, "rewards/chosen": -0.4812372326850891, "rewards/margins": 2.909327268600464, "rewards/rejected": -3.390564441680908, "step": 843 }, { "epoch": 1.45, "learning_rate": 4.658946026349341e-07, "logits/chosen": -2.2199087142944336, "logits/rejected": -2.018669366836548, "logps/chosen": -102.72325134277344, "logps/rejected": -142.9823760986328, "loss": 0.1892, "rewards/accuracies": 1.0, "rewards/chosen": -0.1946105659008026, "rewards/margins": 3.752011775970459, "rewards/rejected": -3.946622371673584, "step": 844 }, { "epoch": 1.45, "learning_rate": 4.657883552911177e-07, "logits/chosen": -2.317429542541504, "logits/rejected": -1.9876701831817627, "logps/chosen": -80.24038696289062, "logps/rejected": -106.37385559082031, "loss": 0.7025, "rewards/accuracies": 0.75, "rewards/chosen": -0.7409604787826538, "rewards/margins": 2.5138189792633057, "rewards/rejected": -3.254779577255249, "step": 845 }, { "epoch": 1.46, "learning_rate": 4.6568210794730126e-07, "logits/chosen": -2.315439224243164, "logits/rejected": -1.9620672464370728, "logps/chosen": -100.52928924560547, "logps/rejected": -123.85594940185547, "loss": 0.3795, "rewards/accuracies": 1.0, "rewards/chosen": -0.7644361257553101, "rewards/margins": 3.018580913543701, "rewards/rejected": -3.783017158508301, "step": 846 }, { "epoch": 1.46, "learning_rate": 4.655758606034849e-07, "logits/chosen": -1.9783935546875, "logits/rejected": -2.07377290725708, "logps/chosen": -99.76929473876953, "logps/rejected": -130.54051208496094, "loss": 0.2114, "rewards/accuracies": 0.75, "rewards/chosen": -0.286848247051239, "rewards/margins": 3.4255285263061523, "rewards/rejected": -3.712376832962036, "step": 847 }, { "epoch": 1.46, "learning_rate": 4.654696132596685e-07, "logits/chosen": -2.2675509452819824, "logits/rejected": -2.2384047508239746, "logps/chosen": -88.11465454101562, "logps/rejected": -125.60942077636719, "loss": 0.3621, "rewards/accuracies": 1.0, "rewards/chosen": -0.639399528503418, "rewards/margins": 4.0713653564453125, "rewards/rejected": -4.7107648849487305, "step": 848 }, { "epoch": 1.46, "learning_rate": 4.6536336591585205e-07, "logits/chosen": -2.35109281539917, "logits/rejected": -1.6880995035171509, "logps/chosen": -105.54296875, "logps/rejected": -101.53205871582031, "loss": 0.5646, "rewards/accuracies": 1.0, "rewards/chosen": -0.6545001864433289, "rewards/margins": 2.0579285621643066, "rewards/rejected": -2.7124288082122803, "step": 849 }, { "epoch": 1.46, "learning_rate": 4.652571185720357e-07, "logits/chosen": -1.9490121603012085, "logits/rejected": -2.2844414710998535, "logps/chosen": -69.84745788574219, "logps/rejected": -112.02500915527344, "loss": 0.2746, "rewards/accuracies": 0.75, "rewards/chosen": -0.939750075340271, "rewards/margins": 2.7135276794433594, "rewards/rejected": -3.65327787399292, "step": 850 }, { "epoch": 1.46, "learning_rate": 4.6515087122821925e-07, "logits/chosen": -2.2751219272613525, "logits/rejected": -2.1106626987457275, "logps/chosen": -78.82559204101562, "logps/rejected": -121.1953353881836, "loss": 0.2413, "rewards/accuracies": 1.0, "rewards/chosen": 0.5569453239440918, "rewards/margins": 4.045570373535156, "rewards/rejected": -3.4886250495910645, "step": 851 }, { "epoch": 1.47, "learning_rate": 4.6504462388440285e-07, "logits/chosen": -2.3399903774261475, "logits/rejected": -2.1461985111236572, "logps/chosen": -113.51496124267578, "logps/rejected": -141.32810974121094, "loss": 0.2006, "rewards/accuracies": 1.0, "rewards/chosen": -0.8782287836074829, "rewards/margins": 3.296565055847168, "rewards/rejected": -4.174793720245361, "step": 852 }, { "epoch": 1.47, "learning_rate": 4.649383765405865e-07, "logits/chosen": -2.168948173522949, "logits/rejected": -2.179788112640381, "logps/chosen": -93.16942596435547, "logps/rejected": -113.03693389892578, "loss": 0.2233, "rewards/accuracies": 1.0, "rewards/chosen": -0.7613905072212219, "rewards/margins": 3.0716538429260254, "rewards/rejected": -3.8330440521240234, "step": 853 }, { "epoch": 1.47, "learning_rate": 4.6483212919677005e-07, "logits/chosen": -2.1821842193603516, "logits/rejected": -2.3373053073883057, "logps/chosen": -78.708251953125, "logps/rejected": -137.92971801757812, "loss": 0.1728, "rewards/accuracies": 1.0, "rewards/chosen": 0.572723388671875, "rewards/margins": 4.736901760101318, "rewards/rejected": -4.164177894592285, "step": 854 }, { "epoch": 1.47, "learning_rate": 4.6472588185295365e-07, "logits/chosen": -1.9658451080322266, "logits/rejected": -2.2645413875579834, "logps/chosen": -115.71377563476562, "logps/rejected": -152.7821044921875, "loss": 0.3638, "rewards/accuracies": 1.0, "rewards/chosen": -0.8278453946113586, "rewards/margins": 3.1520729064941406, "rewards/rejected": -3.9799182415008545, "step": 855 }, { "epoch": 1.47, "learning_rate": 4.6461963450913725e-07, "logits/chosen": -2.3244705200195312, "logits/rejected": -2.0692343711853027, "logps/chosen": -80.37313842773438, "logps/rejected": -110.22505187988281, "loss": 0.3949, "rewards/accuracies": 0.75, "rewards/chosen": 0.08319798111915588, "rewards/margins": 2.6868486404418945, "rewards/rejected": -2.6036505699157715, "step": 856 }, { "epoch": 1.48, "learning_rate": 4.6451338716532085e-07, "logits/chosen": -2.3627285957336426, "logits/rejected": -2.1471099853515625, "logps/chosen": -116.80345916748047, "logps/rejected": -124.09777069091797, "loss": 0.2448, "rewards/accuracies": 0.5, "rewards/chosen": -0.6490165591239929, "rewards/margins": 1.9785892963409424, "rewards/rejected": -2.62760591506958, "step": 857 }, { "epoch": 1.48, "learning_rate": 4.6440713982150444e-07, "logits/chosen": -2.2128713130950928, "logits/rejected": -1.919873833656311, "logps/chosen": -114.83497619628906, "logps/rejected": -131.43173217773438, "loss": 0.261, "rewards/accuracies": 0.5, "rewards/chosen": -0.8224077224731445, "rewards/margins": 2.2055108547210693, "rewards/rejected": -3.0279183387756348, "step": 858 }, { "epoch": 1.48, "learning_rate": 4.6430089247768804e-07, "logits/chosen": -2.280348062515259, "logits/rejected": -2.266303300857544, "logps/chosen": -95.5439682006836, "logps/rejected": -102.38848876953125, "loss": 0.238, "rewards/accuracies": 1.0, "rewards/chosen": -0.32707035541534424, "rewards/margins": 1.4943432807922363, "rewards/rejected": -1.8214137554168701, "step": 859 }, { "epoch": 1.48, "learning_rate": 4.6419464513387164e-07, "logits/chosen": -2.307438373565674, "logits/rejected": -1.5166462659835815, "logps/chosen": -94.5209732055664, "logps/rejected": -127.27237701416016, "loss": 0.3055, "rewards/accuracies": 1.0, "rewards/chosen": 0.016302675008773804, "rewards/margins": 4.168827056884766, "rewards/rejected": -4.152524471282959, "step": 860 }, { "epoch": 1.48, "learning_rate": 4.640883977900552e-07, "logits/chosen": -2.2414302825927734, "logits/rejected": -2.2103188037872314, "logps/chosen": -85.20352172851562, "logps/rejected": -96.4302978515625, "loss": 0.3012, "rewards/accuracies": 0.75, "rewards/chosen": -0.4490719735622406, "rewards/margins": 2.2341763973236084, "rewards/rejected": -2.683248519897461, "step": 861 }, { "epoch": 1.48, "learning_rate": 4.6398215044623884e-07, "logits/chosen": -2.0851454734802246, "logits/rejected": -2.200993537902832, "logps/chosen": -78.9975357055664, "logps/rejected": -123.77054595947266, "loss": 0.263, "rewards/accuracies": 1.0, "rewards/chosen": -0.27611568570137024, "rewards/margins": 4.721288681030273, "rewards/rejected": -4.9974045753479, "step": 862 }, { "epoch": 1.49, "learning_rate": 4.6387590310242244e-07, "logits/chosen": -1.8722059726715088, "logits/rejected": -2.3415322303771973, "logps/chosen": -84.59991455078125, "logps/rejected": -147.6340789794922, "loss": 0.2992, "rewards/accuracies": 0.75, "rewards/chosen": -0.3110910654067993, "rewards/margins": 3.763139247894287, "rewards/rejected": -4.074230194091797, "step": 863 }, { "epoch": 1.49, "learning_rate": 4.63769655758606e-07, "logits/chosen": -1.8319883346557617, "logits/rejected": -2.356900215148926, "logps/chosen": -66.84109497070312, "logps/rejected": -128.3617706298828, "loss": 0.4129, "rewards/accuracies": 0.75, "rewards/chosen": -0.5075473785400391, "rewards/margins": 3.595306873321533, "rewards/rejected": -4.102854251861572, "step": 864 }, { "epoch": 1.49, "learning_rate": 4.6366340841478964e-07, "logits/chosen": -2.1986544132232666, "logits/rejected": -2.2133524417877197, "logps/chosen": -80.1725082397461, "logps/rejected": -106.83523559570312, "loss": 0.2621, "rewards/accuracies": 1.0, "rewards/chosen": 0.5011423230171204, "rewards/margins": 3.5773091316223145, "rewards/rejected": -3.076167106628418, "step": 865 }, { "epoch": 1.49, "learning_rate": 4.635571610709732e-07, "logits/chosen": -1.999340295791626, "logits/rejected": -2.3025355339050293, "logps/chosen": -75.10122680664062, "logps/rejected": -129.27590942382812, "loss": 0.2831, "rewards/accuracies": 1.0, "rewards/chosen": -0.5650749206542969, "rewards/margins": 4.439949989318848, "rewards/rejected": -5.005024433135986, "step": 866 }, { "epoch": 1.49, "learning_rate": 4.634509137271568e-07, "logits/chosen": -1.9672911167144775, "logits/rejected": -1.850376844406128, "logps/chosen": -100.24588012695312, "logps/rejected": -104.02056121826172, "loss": 0.2711, "rewards/accuracies": 0.75, "rewards/chosen": -1.0884716510772705, "rewards/margins": 0.971439003944397, "rewards/rejected": -2.059910535812378, "step": 867 }, { "epoch": 1.49, "learning_rate": 4.6334466638334043e-07, "logits/chosen": -2.2965145111083984, "logits/rejected": -2.1667842864990234, "logps/chosen": -90.86528778076172, "logps/rejected": -114.29773712158203, "loss": 0.2409, "rewards/accuracies": 0.75, "rewards/chosen": -0.5966771841049194, "rewards/margins": 1.9533805847167969, "rewards/rejected": -2.550057888031006, "step": 868 }, { "epoch": 1.5, "learning_rate": 4.63238419039524e-07, "logits/chosen": -2.0524420738220215, "logits/rejected": -2.415372371673584, "logps/chosen": -99.55987548828125, "logps/rejected": -124.8670654296875, "loss": 0.3268, "rewards/accuracies": 0.5, "rewards/chosen": -1.6763331890106201, "rewards/margins": 0.06012362241744995, "rewards/rejected": -1.7364568710327148, "step": 869 }, { "epoch": 1.5, "learning_rate": 4.631321716957076e-07, "logits/chosen": -2.2284088134765625, "logits/rejected": -1.893391489982605, "logps/chosen": -88.98542022705078, "logps/rejected": -102.4974594116211, "loss": 0.2857, "rewards/accuracies": 0.75, "rewards/chosen": 0.08225355297327042, "rewards/margins": 3.1013033390045166, "rewards/rejected": -3.019049882888794, "step": 870 }, { "epoch": 1.5, "learning_rate": 4.630259243518912e-07, "logits/chosen": -2.031618118286133, "logits/rejected": -2.1935081481933594, "logps/chosen": -107.28985595703125, "logps/rejected": -137.0671844482422, "loss": 0.227, "rewards/accuracies": 0.75, "rewards/chosen": -0.7024725675582886, "rewards/margins": 2.3384809494018555, "rewards/rejected": -3.0409533977508545, "step": 871 }, { "epoch": 1.5, "learning_rate": 4.629196770080748e-07, "logits/chosen": -1.9180347919464111, "logits/rejected": -2.0075786113739014, "logps/chosen": -78.81730651855469, "logps/rejected": -140.50588989257812, "loss": 0.26, "rewards/accuracies": 1.0, "rewards/chosen": 0.22683028876781464, "rewards/margins": 5.750375747680664, "rewards/rejected": -5.523545742034912, "step": 872 }, { "epoch": 1.5, "learning_rate": 4.628134296642583e-07, "logits/chosen": -1.8182708024978638, "logits/rejected": -2.0868654251098633, "logps/chosen": -106.30024719238281, "logps/rejected": -127.67977905273438, "loss": 0.2149, "rewards/accuracies": 0.75, "rewards/chosen": -1.0751957893371582, "rewards/margins": 1.9233952760696411, "rewards/rejected": -2.998591184616089, "step": 873 }, { "epoch": 1.5, "learning_rate": 4.62707182320442e-07, "logits/chosen": -2.1063010692596436, "logits/rejected": -2.0788116455078125, "logps/chosen": -98.44209289550781, "logps/rejected": -109.79029846191406, "loss": 0.2223, "rewards/accuracies": 1.0, "rewards/chosen": -0.42556047439575195, "rewards/margins": 1.1220265626907349, "rewards/rejected": -1.5475869178771973, "step": 874 }, { "epoch": 1.51, "learning_rate": 4.6260093497662557e-07, "logits/chosen": -2.3418002128601074, "logits/rejected": -1.7298790216445923, "logps/chosen": -112.171630859375, "logps/rejected": -125.00714874267578, "loss": 0.4475, "rewards/accuracies": 0.75, "rewards/chosen": 0.13494740426540375, "rewards/margins": 3.6676015853881836, "rewards/rejected": -3.532654047012329, "step": 875 }, { "epoch": 1.51, "learning_rate": 4.624946876328091e-07, "logits/chosen": -2.232363224029541, "logits/rejected": -2.1750495433807373, "logps/chosen": -84.10602569580078, "logps/rejected": -96.63822174072266, "loss": 0.4499, "rewards/accuracies": 1.0, "rewards/chosen": -0.14223842322826385, "rewards/margins": 2.010486602783203, "rewards/rejected": -2.1527249813079834, "step": 876 }, { "epoch": 1.51, "learning_rate": 4.6238844028899277e-07, "logits/chosen": -2.03525972366333, "logits/rejected": -2.2799384593963623, "logps/chosen": -100.23793029785156, "logps/rejected": -103.80612182617188, "loss": 0.4498, "rewards/accuracies": 0.75, "rewards/chosen": -0.9797430038452148, "rewards/margins": 0.9782981276512146, "rewards/rejected": -1.9580411911010742, "step": 877 }, { "epoch": 1.51, "learning_rate": 4.622821929451763e-07, "logits/chosen": -2.0609664916992188, "logits/rejected": -2.3169121742248535, "logps/chosen": -113.58439636230469, "logps/rejected": -151.1067657470703, "loss": 0.3963, "rewards/accuracies": 1.0, "rewards/chosen": -2.2147700786590576, "rewards/margins": 1.0528981685638428, "rewards/rejected": -3.2676680088043213, "step": 878 }, { "epoch": 1.51, "learning_rate": 4.621759456013599e-07, "logits/chosen": -2.31347393989563, "logits/rejected": -2.1444902420043945, "logps/chosen": -105.58430480957031, "logps/rejected": -98.29949951171875, "loss": 0.3408, "rewards/accuracies": 0.75, "rewards/chosen": -0.5244681239128113, "rewards/margins": 0.7789856791496277, "rewards/rejected": -1.3034536838531494, "step": 879 }, { "epoch": 1.51, "learning_rate": 4.6206969825754357e-07, "logits/chosen": -1.9259040355682373, "logits/rejected": -2.0966410636901855, "logps/chosen": -87.53317260742188, "logps/rejected": -126.46227264404297, "loss": 0.2954, "rewards/accuracies": 0.75, "rewards/chosen": -0.5808829069137573, "rewards/margins": 3.230897903442383, "rewards/rejected": -3.8117804527282715, "step": 880 }, { "epoch": 1.52, "learning_rate": 4.619634509137271e-07, "logits/chosen": -2.1171321868896484, "logits/rejected": -2.202620029449463, "logps/chosen": -77.8115005493164, "logps/rejected": -104.2195816040039, "loss": 0.4282, "rewards/accuracies": 1.0, "rewards/chosen": -0.19771651923656464, "rewards/margins": 2.769230842590332, "rewards/rejected": -2.966947317123413, "step": 881 }, { "epoch": 1.52, "learning_rate": 4.6185720356991077e-07, "logits/chosen": -2.3868842124938965, "logits/rejected": -2.181118965148926, "logps/chosen": -98.11243438720703, "logps/rejected": -105.72386169433594, "loss": 0.2759, "rewards/accuracies": 0.75, "rewards/chosen": -1.8762388229370117, "rewards/margins": 1.9063184261322021, "rewards/rejected": -3.7825570106506348, "step": 882 }, { "epoch": 1.52, "learning_rate": 4.617509562260943e-07, "logits/chosen": -2.184875726699829, "logits/rejected": -2.1379311084747314, "logps/chosen": -91.12451171875, "logps/rejected": -128.8629913330078, "loss": 0.202, "rewards/accuracies": 0.75, "rewards/chosen": -0.30109384655952454, "rewards/margins": 3.4311234951019287, "rewards/rejected": -3.732217311859131, "step": 883 }, { "epoch": 1.52, "learning_rate": 4.616447088822779e-07, "logits/chosen": -2.292785167694092, "logits/rejected": -1.9888098239898682, "logps/chosen": -110.09722137451172, "logps/rejected": -112.52530670166016, "loss": 0.3973, "rewards/accuracies": 1.0, "rewards/chosen": -0.10166192054748535, "rewards/margins": 2.373020887374878, "rewards/rejected": -2.4746828079223633, "step": 884 }, { "epoch": 1.52, "learning_rate": 4.6153846153846156e-07, "logits/chosen": -2.1775519847869873, "logits/rejected": -1.8290460109710693, "logps/chosen": -86.08695983886719, "logps/rejected": -110.93162536621094, "loss": 0.1968, "rewards/accuracies": 1.0, "rewards/chosen": -0.5245121121406555, "rewards/margins": 3.5403213500976562, "rewards/rejected": -4.064833164215088, "step": 885 }, { "epoch": 1.52, "learning_rate": 4.614322141946451e-07, "logits/chosen": -2.029646396636963, "logits/rejected": -2.3770692348480225, "logps/chosen": -84.83921813964844, "logps/rejected": -131.57199096679688, "loss": 0.3112, "rewards/accuracies": 1.0, "rewards/chosen": -0.09121009707450867, "rewards/margins": 3.887040138244629, "rewards/rejected": -3.97825026512146, "step": 886 }, { "epoch": 1.53, "learning_rate": 4.613259668508287e-07, "logits/chosen": -2.271562099456787, "logits/rejected": -1.9862120151519775, "logps/chosen": -65.66790771484375, "logps/rejected": -95.28773498535156, "loss": 0.2233, "rewards/accuracies": 0.75, "rewards/chosen": 0.7416252493858337, "rewards/margins": 2.262690305709839, "rewards/rejected": -1.52106511592865, "step": 887 }, { "epoch": 1.53, "learning_rate": 4.612197195070123e-07, "logits/chosen": -2.011751890182495, "logits/rejected": -2.2097482681274414, "logps/chosen": -67.71342468261719, "logps/rejected": -122.96855163574219, "loss": 0.3067, "rewards/accuracies": 1.0, "rewards/chosen": 0.26015734672546387, "rewards/margins": 4.152580738067627, "rewards/rejected": -3.892423391342163, "step": 888 }, { "epoch": 1.53, "learning_rate": 4.611134721631959e-07, "logits/chosen": -1.8033998012542725, "logits/rejected": -2.3360955715179443, "logps/chosen": -70.60015869140625, "logps/rejected": -126.49380493164062, "loss": 0.3861, "rewards/accuracies": 1.0, "rewards/chosen": 0.13322390615940094, "rewards/margins": 4.434628963470459, "rewards/rejected": -4.30140495300293, "step": 889 }, { "epoch": 1.53, "learning_rate": 4.610072248193795e-07, "logits/chosen": -2.3745012283325195, "logits/rejected": -1.8391125202178955, "logps/chosen": -85.79446411132812, "logps/rejected": -102.65133666992188, "loss": 0.3603, "rewards/accuracies": 0.75, "rewards/chosen": -0.4947717785835266, "rewards/margins": 1.562302827835083, "rewards/rejected": -2.0570743083953857, "step": 890 }, { "epoch": 1.53, "learning_rate": 4.609009774755631e-07, "logits/chosen": -2.2552380561828613, "logits/rejected": -1.9943257570266724, "logps/chosen": -104.38255310058594, "logps/rejected": -126.7412109375, "loss": 0.2116, "rewards/accuracies": 1.0, "rewards/chosen": -0.6988159418106079, "rewards/margins": 3.7341771125793457, "rewards/rejected": -4.432992935180664, "step": 891 }, { "epoch": 1.54, "learning_rate": 4.607947301317467e-07, "logits/chosen": -2.0745973587036133, "logits/rejected": -2.2923874855041504, "logps/chosen": -99.94422912597656, "logps/rejected": -129.72607421875, "loss": 0.2304, "rewards/accuracies": 0.75, "rewards/chosen": -0.864962100982666, "rewards/margins": 1.9750019311904907, "rewards/rejected": -2.839963912963867, "step": 892 }, { "epoch": 1.54, "learning_rate": 4.6068848278793025e-07, "logits/chosen": -1.9717159271240234, "logits/rejected": -2.377948045730591, "logps/chosen": -89.98650360107422, "logps/rejected": -148.76327514648438, "loss": 0.2247, "rewards/accuracies": 1.0, "rewards/chosen": 0.061470407992601395, "rewards/margins": 4.411978244781494, "rewards/rejected": -4.350507736206055, "step": 893 }, { "epoch": 1.54, "learning_rate": 4.605822354441139e-07, "logits/chosen": -1.7987706661224365, "logits/rejected": -2.279010772705078, "logps/chosen": -77.95355987548828, "logps/rejected": -125.17173767089844, "loss": 0.3329, "rewards/accuracies": 1.0, "rewards/chosen": -0.1821366846561432, "rewards/margins": 2.980860710144043, "rewards/rejected": -3.162997245788574, "step": 894 }, { "epoch": 1.54, "learning_rate": 4.604759881002975e-07, "logits/chosen": -2.1704695224761963, "logits/rejected": -2.0872507095336914, "logps/chosen": -113.60456085205078, "logps/rejected": -142.0704803466797, "loss": 0.2455, "rewards/accuracies": 1.0, "rewards/chosen": -0.78132164478302, "rewards/margins": 3.994645595550537, "rewards/rejected": -4.775967597961426, "step": 895 }, { "epoch": 1.54, "learning_rate": 4.6036974075648104e-07, "logits/chosen": -2.1490492820739746, "logits/rejected": -2.4283201694488525, "logps/chosen": -78.90869140625, "logps/rejected": -148.3784942626953, "loss": 0.2508, "rewards/accuracies": 1.0, "rewards/chosen": 0.3755011260509491, "rewards/margins": 5.767538070678711, "rewards/rejected": -5.3920369148254395, "step": 896 }, { "epoch": 1.54, "learning_rate": 4.602634934126647e-07, "logits/chosen": -2.024339199066162, "logits/rejected": -2.151398181915283, "logps/chosen": -85.4061279296875, "logps/rejected": -116.5899887084961, "loss": 0.3176, "rewards/accuracies": 0.5, "rewards/chosen": -0.5790263414382935, "rewards/margins": 2.609372615814209, "rewards/rejected": -3.188398838043213, "step": 897 }, { "epoch": 1.55, "learning_rate": 4.6015724606884824e-07, "logits/chosen": -1.974782943725586, "logits/rejected": -2.322542905807495, "logps/chosen": -86.16064453125, "logps/rejected": -119.10577392578125, "loss": 0.2677, "rewards/accuracies": 0.75, "rewards/chosen": -0.502968430519104, "rewards/margins": 1.8075902462005615, "rewards/rejected": -2.310558795928955, "step": 898 }, { "epoch": 1.55, "learning_rate": 4.6005099872503184e-07, "logits/chosen": -2.0553195476531982, "logits/rejected": -2.121150255203247, "logps/chosen": -85.79888153076172, "logps/rejected": -126.91085052490234, "loss": 0.1997, "rewards/accuracies": 1.0, "rewards/chosen": -0.4284425675868988, "rewards/margins": 3.5754361152648926, "rewards/rejected": -4.003878593444824, "step": 899 }, { "epoch": 1.55, "learning_rate": 4.599447513812155e-07, "logits/chosen": -2.057920455932617, "logits/rejected": -2.080087184906006, "logps/chosen": -68.44564819335938, "logps/rejected": -120.15763092041016, "loss": 0.2125, "rewards/accuracies": 1.0, "rewards/chosen": 0.04984492063522339, "rewards/margins": 4.356786251068115, "rewards/rejected": -4.306941509246826, "step": 900 }, { "epoch": 1.55, "learning_rate": 4.5983850403739904e-07, "logits/chosen": -2.2051024436950684, "logits/rejected": -2.228348731994629, "logps/chosen": -88.30965423583984, "logps/rejected": -112.416259765625, "loss": 0.2752, "rewards/accuracies": 1.0, "rewards/chosen": -0.912584662437439, "rewards/margins": 2.643395185470581, "rewards/rejected": -3.5559797286987305, "step": 901 }, { "epoch": 1.55, "learning_rate": 4.5973225669358264e-07, "logits/chosen": -2.0721499919891357, "logits/rejected": -2.1793863773345947, "logps/chosen": -88.61538696289062, "logps/rejected": -107.02012634277344, "loss": 0.4465, "rewards/accuracies": 0.5, "rewards/chosen": -0.06573599576950073, "rewards/margins": 1.5094027519226074, "rewards/rejected": -1.5751385688781738, "step": 902 }, { "epoch": 1.55, "learning_rate": 4.5962600934976624e-07, "logits/chosen": -2.225022077560425, "logits/rejected": -2.154996633529663, "logps/chosen": -95.75302124023438, "logps/rejected": -112.03141021728516, "loss": 0.4297, "rewards/accuracies": 0.75, "rewards/chosen": -1.3194183111190796, "rewards/margins": 1.6267601251602173, "rewards/rejected": -2.946178436279297, "step": 903 }, { "epoch": 1.56, "learning_rate": 4.5951976200594984e-07, "logits/chosen": -2.3049731254577637, "logits/rejected": -1.8422037363052368, "logps/chosen": -86.06546783447266, "logps/rejected": -116.35022735595703, "loss": 0.2532, "rewards/accuracies": 0.75, "rewards/chosen": -0.9264036417007446, "rewards/margins": 2.094468116760254, "rewards/rejected": -3.020871639251709, "step": 904 }, { "epoch": 1.56, "learning_rate": 4.594135146621334e-07, "logits/chosen": -2.0759851932525635, "logits/rejected": -2.039846420288086, "logps/chosen": -83.03356170654297, "logps/rejected": -110.88970184326172, "loss": 0.3171, "rewards/accuracies": 1.0, "rewards/chosen": 0.12424986064434052, "rewards/margins": 3.442018508911133, "rewards/rejected": -3.3177685737609863, "step": 905 }, { "epoch": 1.56, "learning_rate": 4.5930726731831703e-07, "logits/chosen": -2.1642560958862305, "logits/rejected": -1.9852564334869385, "logps/chosen": -82.54440307617188, "logps/rejected": -96.84088897705078, "loss": 0.2796, "rewards/accuracies": 1.0, "rewards/chosen": -0.25337448716163635, "rewards/margins": 1.5403614044189453, "rewards/rejected": -1.7937358617782593, "step": 906 }, { "epoch": 1.56, "learning_rate": 4.5920101997450063e-07, "logits/chosen": -2.038560152053833, "logits/rejected": -2.301862955093384, "logps/chosen": -67.4677734375, "logps/rejected": -124.06990051269531, "loss": 0.1768, "rewards/accuracies": 1.0, "rewards/chosen": 0.30447569489479065, "rewards/margins": 4.563401222229004, "rewards/rejected": -4.258925914764404, "step": 907 }, { "epoch": 1.56, "learning_rate": 4.590947726306842e-07, "logits/chosen": -1.76653254032135, "logits/rejected": -2.2581253051757812, "logps/chosen": -82.76959991455078, "logps/rejected": -131.78082275390625, "loss": 0.3231, "rewards/accuracies": 1.0, "rewards/chosen": 0.28205472230911255, "rewards/margins": 3.5696284770965576, "rewards/rejected": -3.28757381439209, "step": 908 }, { "epoch": 1.56, "learning_rate": 4.5898852528686783e-07, "logits/chosen": -1.6907286643981934, "logits/rejected": -2.074981451034546, "logps/chosen": -79.31826782226562, "logps/rejected": -121.46517181396484, "loss": 0.2717, "rewards/accuracies": 1.0, "rewards/chosen": -0.13842657208442688, "rewards/margins": 2.9888296127319336, "rewards/rejected": -3.127255916595459, "step": 909 }, { "epoch": 1.57, "learning_rate": 4.588822779430514e-07, "logits/chosen": -2.3200485706329346, "logits/rejected": -2.329662322998047, "logps/chosen": -79.55300903320312, "logps/rejected": -104.1552734375, "loss": 0.3555, "rewards/accuracies": 0.75, "rewards/chosen": 0.38035720586776733, "rewards/margins": 2.6883859634399414, "rewards/rejected": -2.3080286979675293, "step": 910 }, { "epoch": 1.57, "learning_rate": 4.58776030599235e-07, "logits/chosen": -2.113422155380249, "logits/rejected": -1.9102240800857544, "logps/chosen": -95.43743896484375, "logps/rejected": -127.97898864746094, "loss": 0.3328, "rewards/accuracies": 0.75, "rewards/chosen": -1.5997509956359863, "rewards/margins": 1.794073462486267, "rewards/rejected": -3.393824577331543, "step": 911 }, { "epoch": 1.57, "learning_rate": 4.5866978325541863e-07, "logits/chosen": -2.2807068824768066, "logits/rejected": -2.0812227725982666, "logps/chosen": -83.43407440185547, "logps/rejected": -130.04563903808594, "loss": 0.3874, "rewards/accuracies": 1.0, "rewards/chosen": -0.2552885115146637, "rewards/margins": 2.705967664718628, "rewards/rejected": -2.961256265640259, "step": 912 }, { "epoch": 1.57, "learning_rate": 4.5856353591160217e-07, "logits/chosen": -2.202328681945801, "logits/rejected": -2.2294321060180664, "logps/chosen": -73.58818054199219, "logps/rejected": -99.83679962158203, "loss": 0.3292, "rewards/accuracies": 0.75, "rewards/chosen": -1.0709103345870972, "rewards/margins": 1.9163477420806885, "rewards/rejected": -2.987257957458496, "step": 913 }, { "epoch": 1.57, "learning_rate": 4.5845728856778577e-07, "logits/chosen": -2.4451191425323486, "logits/rejected": -1.7122514247894287, "logps/chosen": -104.48121643066406, "logps/rejected": -117.72608184814453, "loss": 0.1988, "rewards/accuracies": 1.0, "rewards/chosen": 0.059798240661621094, "rewards/margins": 3.4709081649780273, "rewards/rejected": -3.4111099243164062, "step": 914 }, { "epoch": 1.57, "learning_rate": 4.5835104122396937e-07, "logits/chosen": -1.9992787837982178, "logits/rejected": -2.2374744415283203, "logps/chosen": -95.85964965820312, "logps/rejected": -120.3829116821289, "loss": 0.3052, "rewards/accuracies": 1.0, "rewards/chosen": -1.5633246898651123, "rewards/margins": 1.6129618883132935, "rewards/rejected": -3.176286458969116, "step": 915 }, { "epoch": 1.58, "learning_rate": 4.5824479388015297e-07, "logits/chosen": -2.147756814956665, "logits/rejected": -2.157782793045044, "logps/chosen": -67.41815948486328, "logps/rejected": -114.7271957397461, "loss": 0.1804, "rewards/accuracies": 1.0, "rewards/chosen": 0.08159389346837997, "rewards/margins": 4.253602981567383, "rewards/rejected": -4.172009468078613, "step": 916 }, { "epoch": 1.58, "learning_rate": 4.5813854653633657e-07, "logits/chosen": -2.0831656455993652, "logits/rejected": -2.1262452602386475, "logps/chosen": -92.46055603027344, "logps/rejected": -109.56880187988281, "loss": 0.2648, "rewards/accuracies": 0.75, "rewards/chosen": -1.4188932180404663, "rewards/margins": 2.1735215187072754, "rewards/rejected": -3.5924148559570312, "step": 917 }, { "epoch": 1.58, "learning_rate": 4.5803229919252017e-07, "logits/chosen": -2.3232810497283936, "logits/rejected": -2.1068637371063232, "logps/chosen": -99.69068145751953, "logps/rejected": -115.39310455322266, "loss": 0.2999, "rewards/accuracies": 0.75, "rewards/chosen": -0.1076696515083313, "rewards/margins": 2.5751023292541504, "rewards/rejected": -2.682771682739258, "step": 918 }, { "epoch": 1.58, "learning_rate": 4.5792605184870377e-07, "logits/chosen": -2.1529645919799805, "logits/rejected": -1.9585633277893066, "logps/chosen": -102.78606414794922, "logps/rejected": -107.51705932617188, "loss": 0.5178, "rewards/accuracies": 0.75, "rewards/chosen": -0.4244319796562195, "rewards/margins": 1.5455007553100586, "rewards/rejected": -1.9699326753616333, "step": 919 }, { "epoch": 1.58, "learning_rate": 4.578198045048873e-07, "logits/chosen": -2.1892929077148438, "logits/rejected": -2.143526554107666, "logps/chosen": -100.02711486816406, "logps/rejected": -118.1131820678711, "loss": 0.3129, "rewards/accuracies": 0.75, "rewards/chosen": -1.5243494510650635, "rewards/margins": 1.3073333501815796, "rewards/rejected": -2.8316826820373535, "step": 920 }, { "epoch": 1.59, "learning_rate": 4.5771355716107096e-07, "logits/chosen": -2.3625097274780273, "logits/rejected": -2.1939728260040283, "logps/chosen": -93.30557250976562, "logps/rejected": -106.41519927978516, "loss": 0.2863, "rewards/accuracies": 1.0, "rewards/chosen": -0.2109232097864151, "rewards/margins": 0.9292310476303101, "rewards/rejected": -1.1401543617248535, "step": 921 }, { "epoch": 1.59, "learning_rate": 4.5760730981725456e-07, "logits/chosen": -2.234769821166992, "logits/rejected": -2.115561008453369, "logps/chosen": -125.77182006835938, "logps/rejected": -127.68305206298828, "loss": 0.3829, "rewards/accuracies": 0.75, "rewards/chosen": -0.8605798482894897, "rewards/margins": 2.005183219909668, "rewards/rejected": -2.8657631874084473, "step": 922 }, { "epoch": 1.59, "learning_rate": 4.5750106247343816e-07, "logits/chosen": -1.8501267433166504, "logits/rejected": -2.169682741165161, "logps/chosen": -90.099853515625, "logps/rejected": -139.9678497314453, "loss": 0.2749, "rewards/accuracies": 1.0, "rewards/chosen": -0.26599419116973877, "rewards/margins": 2.2380478382110596, "rewards/rejected": -2.504041910171509, "step": 923 }, { "epoch": 1.59, "learning_rate": 4.5739481512962176e-07, "logits/chosen": -2.267056703567505, "logits/rejected": -1.9891939163208008, "logps/chosen": -79.19293975830078, "logps/rejected": -124.85818481445312, "loss": 0.2592, "rewards/accuracies": 1.0, "rewards/chosen": -0.36506205797195435, "rewards/margins": 4.871032238006592, "rewards/rejected": -5.2360944747924805, "step": 924 }, { "epoch": 1.59, "learning_rate": 4.572885677858053e-07, "logits/chosen": -2.2109830379486084, "logits/rejected": -2.194537878036499, "logps/chosen": -93.35200500488281, "logps/rejected": -122.6158447265625, "loss": 0.6382, "rewards/accuracies": 0.75, "rewards/chosen": -0.9805712699890137, "rewards/margins": 1.4032678604125977, "rewards/rejected": -2.3838391304016113, "step": 925 }, { "epoch": 1.59, "learning_rate": 4.5718232044198896e-07, "logits/chosen": -1.9069533348083496, "logits/rejected": -2.1289801597595215, "logps/chosen": -73.7656021118164, "logps/rejected": -136.14324951171875, "loss": 0.2885, "rewards/accuracies": 1.0, "rewards/chosen": 0.18691599369049072, "rewards/margins": 4.298101425170898, "rewards/rejected": -4.111185073852539, "step": 926 }, { "epoch": 1.6, "learning_rate": 4.5707607309817256e-07, "logits/chosen": -1.876406192779541, "logits/rejected": -2.175564765930176, "logps/chosen": -84.83905792236328, "logps/rejected": -111.61634826660156, "loss": 0.3909, "rewards/accuracies": 0.75, "rewards/chosen": -0.10980761051177979, "rewards/margins": 1.3301455974578857, "rewards/rejected": -1.439953327178955, "step": 927 }, { "epoch": 1.6, "learning_rate": 4.569698257543561e-07, "logits/chosen": -1.8983275890350342, "logits/rejected": -2.1633903980255127, "logps/chosen": -98.55350494384766, "logps/rejected": -131.9325714111328, "loss": 0.3618, "rewards/accuracies": 0.75, "rewards/chosen": -1.3117601871490479, "rewards/margins": 1.1486968994140625, "rewards/rejected": -2.4604568481445312, "step": 928 }, { "epoch": 1.6, "learning_rate": 4.5686357841053976e-07, "logits/chosen": -2.0397167205810547, "logits/rejected": -2.077221393585205, "logps/chosen": -89.38470458984375, "logps/rejected": -115.65971374511719, "loss": 0.4548, "rewards/accuracies": 1.0, "rewards/chosen": -0.7601590156555176, "rewards/margins": 2.2129342555999756, "rewards/rejected": -2.973093271255493, "step": 929 }, { "epoch": 1.6, "learning_rate": 4.567573310667233e-07, "logits/chosen": -2.2566583156585693, "logits/rejected": -2.209789514541626, "logps/chosen": -83.57025146484375, "logps/rejected": -110.5346908569336, "loss": 0.2172, "rewards/accuracies": 1.0, "rewards/chosen": -0.6252723932266235, "rewards/margins": 2.3914082050323486, "rewards/rejected": -3.0166807174682617, "step": 930 }, { "epoch": 1.6, "learning_rate": 4.566510837229069e-07, "logits/chosen": -2.1477060317993164, "logits/rejected": -2.215273857116699, "logps/chosen": -77.79509735107422, "logps/rejected": -117.40192413330078, "loss": 0.2152, "rewards/accuracies": 1.0, "rewards/chosen": 0.1658863127231598, "rewards/margins": 3.4935574531555176, "rewards/rejected": -3.3276712894439697, "step": 931 }, { "epoch": 1.6, "learning_rate": 4.5654483637909055e-07, "logits/chosen": -2.277299642562866, "logits/rejected": -2.177358627319336, "logps/chosen": -98.55855560302734, "logps/rejected": -131.17332458496094, "loss": 0.3371, "rewards/accuracies": 1.0, "rewards/chosen": -0.017447099089622498, "rewards/margins": 4.119846343994141, "rewards/rejected": -4.137293815612793, "step": 932 }, { "epoch": 1.61, "learning_rate": 4.564385890352741e-07, "logits/chosen": -2.285524845123291, "logits/rejected": -2.186936378479004, "logps/chosen": -109.28948974609375, "logps/rejected": -121.45293426513672, "loss": 0.2603, "rewards/accuracies": 1.0, "rewards/chosen": -1.6787571907043457, "rewards/margins": 2.109066963195801, "rewards/rejected": -3.7878241539001465, "step": 933 }, { "epoch": 1.61, "learning_rate": 4.563323416914577e-07, "logits/chosen": -2.100825548171997, "logits/rejected": -2.1660380363464355, "logps/chosen": -100.83673858642578, "logps/rejected": -115.09686279296875, "loss": 0.3888, "rewards/accuracies": 0.5, "rewards/chosen": -1.522796630859375, "rewards/margins": 0.9541942477226257, "rewards/rejected": -2.4769909381866455, "step": 934 }, { "epoch": 1.61, "learning_rate": 4.562260943476413e-07, "logits/chosen": -2.132772445678711, "logits/rejected": -1.7586040496826172, "logps/chosen": -111.09405517578125, "logps/rejected": -125.61003875732422, "loss": 0.1723, "rewards/accuracies": 1.0, "rewards/chosen": -1.7237584590911865, "rewards/margins": 2.6489646434783936, "rewards/rejected": -4.37272310256958, "step": 935 }, { "epoch": 1.61, "learning_rate": 4.561198470038249e-07, "logits/chosen": -2.345564126968384, "logits/rejected": -2.2389636039733887, "logps/chosen": -93.64762878417969, "logps/rejected": -124.79808807373047, "loss": 0.2381, "rewards/accuracies": 1.0, "rewards/chosen": -0.6623550057411194, "rewards/margins": 3.6649060249328613, "rewards/rejected": -4.327260971069336, "step": 936 }, { "epoch": 1.61, "learning_rate": 4.5601359966000844e-07, "logits/chosen": -1.8242237567901611, "logits/rejected": -1.976049542427063, "logps/chosen": -82.58450317382812, "logps/rejected": -109.61219024658203, "loss": 0.3052, "rewards/accuracies": 0.75, "rewards/chosen": -0.9260898232460022, "rewards/margins": 1.3829588890075684, "rewards/rejected": -2.309048891067505, "step": 937 }, { "epoch": 1.61, "learning_rate": 4.559073523161921e-07, "logits/chosen": -2.1882877349853516, "logits/rejected": -2.1792657375335693, "logps/chosen": -87.76161193847656, "logps/rejected": -133.27536010742188, "loss": 0.2093, "rewards/accuracies": 1.0, "rewards/chosen": -1.3520585298538208, "rewards/margins": 3.5080535411834717, "rewards/rejected": -4.860111713409424, "step": 938 }, { "epoch": 1.62, "learning_rate": 4.558011049723757e-07, "logits/chosen": -2.2791643142700195, "logits/rejected": -1.8454530239105225, "logps/chosen": -89.69035339355469, "logps/rejected": -98.32322692871094, "loss": 0.3614, "rewards/accuracies": 0.75, "rewards/chosen": -0.45487502217292786, "rewards/margins": 2.007638931274414, "rewards/rejected": -2.4625139236450195, "step": 939 }, { "epoch": 1.62, "learning_rate": 4.5569485762855924e-07, "logits/chosen": -2.2273640632629395, "logits/rejected": -2.1361780166625977, "logps/chosen": -78.38745880126953, "logps/rejected": -107.79390716552734, "loss": 0.4176, "rewards/accuracies": 1.0, "rewards/chosen": -2.2612903118133545, "rewards/margins": 2.449075222015381, "rewards/rejected": -4.710365295410156, "step": 940 }, { "epoch": 1.62, "learning_rate": 4.555886102847429e-07, "logits/chosen": -1.7974820137023926, "logits/rejected": -2.2305796146392822, "logps/chosen": -105.47160339355469, "logps/rejected": -119.28711700439453, "loss": 0.3818, "rewards/accuracies": 1.0, "rewards/chosen": -1.2723896503448486, "rewards/margins": 1.2617473602294922, "rewards/rejected": -2.534137010574341, "step": 941 }, { "epoch": 1.62, "learning_rate": 4.5548236294092644e-07, "logits/chosen": -2.2153568267822266, "logits/rejected": -2.203303575515747, "logps/chosen": -101.67326354980469, "logps/rejected": -126.71534729003906, "loss": 0.2532, "rewards/accuracies": 1.0, "rewards/chosen": -1.650073766708374, "rewards/margins": 2.5706989765167236, "rewards/rejected": -4.220772743225098, "step": 942 }, { "epoch": 1.62, "learning_rate": 4.5537611559711003e-07, "logits/chosen": -2.3415679931640625, "logits/rejected": -1.9983062744140625, "logps/chosen": -97.80067443847656, "logps/rejected": -102.50210571289062, "loss": 0.2903, "rewards/accuracies": 0.75, "rewards/chosen": -1.1266069412231445, "rewards/margins": 1.996647834777832, "rewards/rejected": -3.1232547760009766, "step": 943 }, { "epoch": 1.62, "learning_rate": 4.552698682532937e-07, "logits/chosen": -2.086383104324341, "logits/rejected": -2.1299500465393066, "logps/chosen": -109.67408752441406, "logps/rejected": -149.61068725585938, "loss": 0.262, "rewards/accuracies": 1.0, "rewards/chosen": -3.0506343841552734, "rewards/margins": 3.494345188140869, "rewards/rejected": -6.544979572296143, "step": 944 }, { "epoch": 1.63, "learning_rate": 4.5516362090947723e-07, "logits/chosen": -2.0942978858947754, "logits/rejected": -2.1808972358703613, "logps/chosen": -99.5660400390625, "logps/rejected": -140.2278594970703, "loss": 0.2072, "rewards/accuracies": 0.75, "rewards/chosen": -1.2768642902374268, "rewards/margins": 3.7991766929626465, "rewards/rejected": -5.076041221618652, "step": 945 }, { "epoch": 1.63, "learning_rate": 4.5505737356566083e-07, "logits/chosen": -2.1025352478027344, "logits/rejected": -2.303253650665283, "logps/chosen": -97.03287506103516, "logps/rejected": -132.0863494873047, "loss": 0.2601, "rewards/accuracies": 0.75, "rewards/chosen": -0.8029175996780396, "rewards/margins": 2.2773666381835938, "rewards/rejected": -3.080284357070923, "step": 946 }, { "epoch": 1.63, "learning_rate": 4.5495112622184443e-07, "logits/chosen": -2.051751136779785, "logits/rejected": -1.9457130432128906, "logps/chosen": -92.57949829101562, "logps/rejected": -136.143798828125, "loss": 0.2418, "rewards/accuracies": 1.0, "rewards/chosen": -1.4356844425201416, "rewards/margins": 4.6887993812561035, "rewards/rejected": -6.124484062194824, "step": 947 }, { "epoch": 1.63, "learning_rate": 4.5484487887802803e-07, "logits/chosen": -2.27486515045166, "logits/rejected": -1.9570579528808594, "logps/chosen": -115.30361938476562, "logps/rejected": -161.298583984375, "loss": 0.2438, "rewards/accuracies": 1.0, "rewards/chosen": -0.6613540649414062, "rewards/margins": 5.760201930999756, "rewards/rejected": -6.421555995941162, "step": 948 }, { "epoch": 1.63, "learning_rate": 4.5473863153421163e-07, "logits/chosen": -2.243591547012329, "logits/rejected": -2.23018741607666, "logps/chosen": -98.6678466796875, "logps/rejected": -137.41647338867188, "loss": 0.322, "rewards/accuracies": 1.0, "rewards/chosen": -2.1304214000701904, "rewards/margins": 3.14052677154541, "rewards/rejected": -5.27094841003418, "step": 949 }, { "epoch": 1.64, "learning_rate": 4.546323841903952e-07, "logits/chosen": -2.0140695571899414, "logits/rejected": -1.9363176822662354, "logps/chosen": -114.41712951660156, "logps/rejected": -114.31804656982422, "loss": 0.3246, "rewards/accuracies": 0.5, "rewards/chosen": -2.8712656497955322, "rewards/margins": 1.205174207687378, "rewards/rejected": -4.07643985748291, "step": 950 }, { "epoch": 1.64, "learning_rate": 4.545261368465788e-07, "logits/chosen": -2.284881830215454, "logits/rejected": -2.021616220474243, "logps/chosen": -112.10636901855469, "logps/rejected": -119.48318481445312, "loss": 0.3753, "rewards/accuracies": 1.0, "rewards/chosen": -0.5542433857917786, "rewards/margins": 2.7168984413146973, "rewards/rejected": -3.271141529083252, "step": 951 }, { "epoch": 1.64, "learning_rate": 4.5441988950276237e-07, "logits/chosen": -2.2233500480651855, "logits/rejected": -2.1075737476348877, "logps/chosen": -98.843017578125, "logps/rejected": -145.04708862304688, "loss": 0.3528, "rewards/accuracies": 1.0, "rewards/chosen": -1.4504170417785645, "rewards/margins": 3.314967632293701, "rewards/rejected": -4.765384674072266, "step": 952 }, { "epoch": 1.64, "learning_rate": 4.54313642158946e-07, "logits/chosen": -1.9775500297546387, "logits/rejected": -2.2480545043945312, "logps/chosen": -104.494140625, "logps/rejected": -124.17096710205078, "loss": 0.3587, "rewards/accuracies": 0.75, "rewards/chosen": -1.0156519412994385, "rewards/margins": 1.943969488143921, "rewards/rejected": -2.9596214294433594, "step": 953 }, { "epoch": 1.64, "learning_rate": 4.542073948151296e-07, "logits/chosen": -2.3699090480804443, "logits/rejected": -2.215822696685791, "logps/chosen": -96.58355712890625, "logps/rejected": -107.17436218261719, "loss": 0.2116, "rewards/accuracies": 1.0, "rewards/chosen": -0.8733538389205933, "rewards/margins": 2.1855597496032715, "rewards/rejected": -3.058913469314575, "step": 954 }, { "epoch": 1.64, "learning_rate": 4.5410114747131317e-07, "logits/chosen": -2.189724922180176, "logits/rejected": -2.204627513885498, "logps/chosen": -109.52999114990234, "logps/rejected": -116.126953125, "loss": 0.2918, "rewards/accuracies": 0.5, "rewards/chosen": -2.0972485542297363, "rewards/margins": 1.3757483959197998, "rewards/rejected": -3.4729971885681152, "step": 955 }, { "epoch": 1.65, "learning_rate": 4.539949001274968e-07, "logits/chosen": -2.1089859008789062, "logits/rejected": -2.1525070667266846, "logps/chosen": -95.99756622314453, "logps/rejected": -132.72003173828125, "loss": 0.2086, "rewards/accuracies": 1.0, "rewards/chosen": -0.5097824335098267, "rewards/margins": 3.9452314376831055, "rewards/rejected": -4.455013751983643, "step": 956 }, { "epoch": 1.65, "learning_rate": 4.5388865278368037e-07, "logits/chosen": -1.8341078758239746, "logits/rejected": -2.288722276687622, "logps/chosen": -76.43637084960938, "logps/rejected": -134.9834747314453, "loss": 0.364, "rewards/accuracies": 1.0, "rewards/chosen": -1.2083343267440796, "rewards/margins": 3.290011405944824, "rewards/rejected": -4.498345851898193, "step": 957 }, { "epoch": 1.65, "learning_rate": 4.5378240543986396e-07, "logits/chosen": -1.9528396129608154, "logits/rejected": -2.1887524127960205, "logps/chosen": -99.40907287597656, "logps/rejected": -140.07424926757812, "loss": 0.4585, "rewards/accuracies": 0.75, "rewards/chosen": -2.052863836288452, "rewards/margins": 2.2476749420166016, "rewards/rejected": -4.300539016723633, "step": 958 }, { "epoch": 1.65, "learning_rate": 4.5367615809604756e-07, "logits/chosen": -2.156221389770508, "logits/rejected": -1.9913568496704102, "logps/chosen": -127.19752502441406, "logps/rejected": -125.1022720336914, "loss": 0.2111, "rewards/accuracies": 1.0, "rewards/chosen": -1.2728900909423828, "rewards/margins": 2.2778279781341553, "rewards/rejected": -3.550718307495117, "step": 959 }, { "epoch": 1.65, "learning_rate": 4.5356991075223116e-07, "logits/chosen": -2.1587154865264893, "logits/rejected": -2.2107596397399902, "logps/chosen": -69.40608978271484, "logps/rejected": -136.40809631347656, "loss": 0.1591, "rewards/accuracies": 1.0, "rewards/chosen": -0.15377426147460938, "rewards/margins": 5.469333171844482, "rewards/rejected": -5.623107433319092, "step": 960 }, { "epoch": 1.65, "learning_rate": 4.5346366340841476e-07, "logits/chosen": -2.043348789215088, "logits/rejected": -2.203669548034668, "logps/chosen": -80.63597869873047, "logps/rejected": -143.56631469726562, "loss": 0.3071, "rewards/accuracies": 1.0, "rewards/chosen": -0.3652525544166565, "rewards/margins": 4.560717582702637, "rewards/rejected": -4.925970077514648, "step": 961 }, { "epoch": 1.66, "learning_rate": 4.5335741606459836e-07, "logits/chosen": -2.1338844299316406, "logits/rejected": -2.09389591217041, "logps/chosen": -84.76148986816406, "logps/rejected": -122.46099090576172, "loss": 0.2197, "rewards/accuracies": 1.0, "rewards/chosen": -1.1844773292541504, "rewards/margins": 3.6021170616149902, "rewards/rejected": -4.786594390869141, "step": 962 }, { "epoch": 1.66, "learning_rate": 4.5325116872078196e-07, "logits/chosen": -1.8020257949829102, "logits/rejected": -2.095223903656006, "logps/chosen": -96.94468688964844, "logps/rejected": -152.37078857421875, "loss": 0.2437, "rewards/accuracies": 1.0, "rewards/chosen": -0.15916690230369568, "rewards/margins": 4.004460334777832, "rewards/rejected": -4.163626670837402, "step": 963 }, { "epoch": 1.66, "learning_rate": 4.531449213769655e-07, "logits/chosen": -2.266552448272705, "logits/rejected": -2.1244397163391113, "logps/chosen": -101.41458892822266, "logps/rejected": -130.0651397705078, "loss": 0.3017, "rewards/accuracies": 0.75, "rewards/chosen": -1.7004477977752686, "rewards/margins": 2.7161083221435547, "rewards/rejected": -4.416556358337402, "step": 964 }, { "epoch": 1.66, "learning_rate": 4.5303867403314916e-07, "logits/chosen": -2.0877833366394043, "logits/rejected": -2.229914665222168, "logps/chosen": -85.93665313720703, "logps/rejected": -139.4205322265625, "loss": 0.158, "rewards/accuracies": 1.0, "rewards/chosen": -0.03386494517326355, "rewards/margins": 3.703688144683838, "rewards/rejected": -3.737553119659424, "step": 965 }, { "epoch": 1.66, "learning_rate": 4.5293242668933276e-07, "logits/chosen": -2.159619092941284, "logits/rejected": -2.0242128372192383, "logps/chosen": -114.86151123046875, "logps/rejected": -126.65991973876953, "loss": 0.2735, "rewards/accuracies": 1.0, "rewards/chosen": -1.64400315284729, "rewards/margins": 1.9288431406021118, "rewards/rejected": -3.5728464126586914, "step": 966 }, { "epoch": 1.66, "learning_rate": 4.5282617934551635e-07, "logits/chosen": -1.9933428764343262, "logits/rejected": -2.348775625228882, "logps/chosen": -92.14363098144531, "logps/rejected": -132.34088134765625, "loss": 0.4045, "rewards/accuracies": 1.0, "rewards/chosen": -0.37561744451522827, "rewards/margins": 2.405200958251953, "rewards/rejected": -2.780818462371826, "step": 967 }, { "epoch": 1.67, "learning_rate": 4.5271993200169995e-07, "logits/chosen": -2.121150016784668, "logits/rejected": -2.117969512939453, "logps/chosen": -95.90679931640625, "logps/rejected": -160.55740356445312, "loss": 0.1501, "rewards/accuracies": 1.0, "rewards/chosen": -0.7484070062637329, "rewards/margins": 5.483926773071289, "rewards/rejected": -6.232334136962891, "step": 968 }, { "epoch": 1.67, "learning_rate": 4.526136846578835e-07, "logits/chosen": -2.100168228149414, "logits/rejected": -2.0729005336761475, "logps/chosen": -102.00884246826172, "logps/rejected": -112.74714660644531, "loss": 0.143, "rewards/accuracies": 1.0, "rewards/chosen": -0.9211400747299194, "rewards/margins": 1.4179729223251343, "rewards/rejected": -2.3391129970550537, "step": 969 }, { "epoch": 1.67, "learning_rate": 4.5250743731406715e-07, "logits/chosen": -2.101555347442627, "logits/rejected": -2.079794406890869, "logps/chosen": -103.68191528320312, "logps/rejected": -164.51573181152344, "loss": 0.3971, "rewards/accuracies": 1.0, "rewards/chosen": -0.07452985644340515, "rewards/margins": 6.766882419586182, "rewards/rejected": -6.841412544250488, "step": 970 }, { "epoch": 1.67, "learning_rate": 4.5240118997025075e-07, "logits/chosen": -2.231851816177368, "logits/rejected": -2.3310546875, "logps/chosen": -93.83192443847656, "logps/rejected": -141.93934631347656, "loss": 0.3071, "rewards/accuracies": 1.0, "rewards/chosen": -0.9536970853805542, "rewards/margins": 3.5718514919281006, "rewards/rejected": -4.525548934936523, "step": 971 }, { "epoch": 1.67, "learning_rate": 4.522949426264343e-07, "logits/chosen": -2.297229290008545, "logits/rejected": -2.1172823905944824, "logps/chosen": -114.20199584960938, "logps/rejected": -135.2436981201172, "loss": 0.2851, "rewards/accuracies": 1.0, "rewards/chosen": -0.23766538500785828, "rewards/margins": 3.898141384124756, "rewards/rejected": -4.135807037353516, "step": 972 }, { "epoch": 1.67, "learning_rate": 4.5218869528261795e-07, "logits/chosen": -2.153317928314209, "logits/rejected": -2.218090772628784, "logps/chosen": -101.31466674804688, "logps/rejected": -128.59463500976562, "loss": 0.3106, "rewards/accuracies": 0.75, "rewards/chosen": -1.6950392723083496, "rewards/margins": 1.9936680793762207, "rewards/rejected": -3.6887073516845703, "step": 973 }, { "epoch": 1.68, "learning_rate": 4.520824479388015e-07, "logits/chosen": -2.1279425621032715, "logits/rejected": -2.2391412258148193, "logps/chosen": -89.03195190429688, "logps/rejected": -136.0289764404297, "loss": 0.1567, "rewards/accuracies": 0.75, "rewards/chosen": -0.5560310482978821, "rewards/margins": 4.7178850173950195, "rewards/rejected": -5.273915767669678, "step": 974 }, { "epoch": 1.68, "learning_rate": 4.519762005949851e-07, "logits/chosen": -2.1170506477355957, "logits/rejected": -1.9197053909301758, "logps/chosen": -51.42852783203125, "logps/rejected": -101.10227966308594, "loss": 0.1917, "rewards/accuracies": 1.0, "rewards/chosen": 0.4997496008872986, "rewards/margins": 6.230527877807617, "rewards/rejected": -5.730778694152832, "step": 975 }, { "epoch": 1.68, "learning_rate": 4.5186995325116874e-07, "logits/chosen": -2.1481361389160156, "logits/rejected": -2.4104721546173096, "logps/chosen": -73.53917694091797, "logps/rejected": -136.22463989257812, "loss": 0.2455, "rewards/accuracies": 1.0, "rewards/chosen": 0.5010205507278442, "rewards/margins": 5.091488361358643, "rewards/rejected": -4.590467929840088, "step": 976 }, { "epoch": 1.68, "learning_rate": 4.517637059073523e-07, "logits/chosen": -2.0554332733154297, "logits/rejected": -2.2753829956054688, "logps/chosen": -65.52427673339844, "logps/rejected": -109.20223236083984, "loss": 0.1657, "rewards/accuracies": 1.0, "rewards/chosen": 0.39579057693481445, "rewards/margins": 3.6051535606384277, "rewards/rejected": -3.2093629837036133, "step": 977 }, { "epoch": 1.68, "learning_rate": 4.516574585635359e-07, "logits/chosen": -2.269531011581421, "logits/rejected": -2.171762228012085, "logps/chosen": -89.42437744140625, "logps/rejected": -110.74052429199219, "loss": 0.2249, "rewards/accuracies": 1.0, "rewards/chosen": -1.3684157133102417, "rewards/margins": 2.921882390975952, "rewards/rejected": -4.2902984619140625, "step": 978 }, { "epoch": 1.69, "learning_rate": 4.515512112197195e-07, "logits/chosen": -2.104735851287842, "logits/rejected": -2.0086426734924316, "logps/chosen": -78.72045135498047, "logps/rejected": -95.18213653564453, "loss": 0.3037, "rewards/accuracies": 1.0, "rewards/chosen": -0.12966424226760864, "rewards/margins": 2.816317081451416, "rewards/rejected": -2.945981502532959, "step": 979 }, { "epoch": 1.69, "learning_rate": 4.514449638759031e-07, "logits/chosen": -1.8821957111358643, "logits/rejected": -2.1833691596984863, "logps/chosen": -80.0082778930664, "logps/rejected": -159.56224060058594, "loss": 0.2729, "rewards/accuracies": 1.0, "rewards/chosen": -0.013730809092521667, "rewards/margins": 4.367867469787598, "rewards/rejected": -4.381598472595215, "step": 980 }, { "epoch": 1.69, "learning_rate": 4.513387165320867e-07, "logits/chosen": -2.0256781578063965, "logits/rejected": -2.17598819732666, "logps/chosen": -68.6728744506836, "logps/rejected": -109.56973266601562, "loss": 0.2243, "rewards/accuracies": 1.0, "rewards/chosen": 0.5941928029060364, "rewards/margins": 3.190047264099121, "rewards/rejected": -2.5958542823791504, "step": 981 }, { "epoch": 1.69, "learning_rate": 4.512324691882703e-07, "logits/chosen": -2.1546630859375, "logits/rejected": -2.308016538619995, "logps/chosen": -84.83538818359375, "logps/rejected": -125.9945297241211, "loss": 0.1704, "rewards/accuracies": 1.0, "rewards/chosen": 0.32375165820121765, "rewards/margins": 2.580526113510132, "rewards/rejected": -2.256774425506592, "step": 982 }, { "epoch": 1.69, "learning_rate": 4.511262218444539e-07, "logits/chosen": -2.3310093879699707, "logits/rejected": -2.3511948585510254, "logps/chosen": -86.88743591308594, "logps/rejected": -106.86967468261719, "loss": 0.3982, "rewards/accuracies": 0.75, "rewards/chosen": 0.23623314499855042, "rewards/margins": 3.5266993045806885, "rewards/rejected": -3.29046630859375, "step": 983 }, { "epoch": 1.69, "learning_rate": 4.5101997450063743e-07, "logits/chosen": -1.8961936235427856, "logits/rejected": -2.421092987060547, "logps/chosen": -69.24150848388672, "logps/rejected": -111.04158782958984, "loss": 0.3022, "rewards/accuracies": 1.0, "rewards/chosen": 0.6150375604629517, "rewards/margins": 3.2762670516967773, "rewards/rejected": -2.6612296104431152, "step": 984 }, { "epoch": 1.7, "learning_rate": 4.509137271568211e-07, "logits/chosen": -2.301206350326538, "logits/rejected": -2.1547930240631104, "logps/chosen": -89.446044921875, "logps/rejected": -114.8963623046875, "loss": 0.2182, "rewards/accuracies": 1.0, "rewards/chosen": 0.9103416204452515, "rewards/margins": 4.252135276794434, "rewards/rejected": -3.3417935371398926, "step": 985 }, { "epoch": 1.7, "learning_rate": 4.5080747981300463e-07, "logits/chosen": -2.0572683811187744, "logits/rejected": -2.179537296295166, "logps/chosen": -93.24608612060547, "logps/rejected": -143.63079833984375, "loss": 0.4855, "rewards/accuracies": 1.0, "rewards/chosen": -0.0757560059428215, "rewards/margins": 3.0731329917907715, "rewards/rejected": -3.1488888263702393, "step": 986 }, { "epoch": 1.7, "learning_rate": 4.5070123246918823e-07, "logits/chosen": -2.2135813236236572, "logits/rejected": -2.189171314239502, "logps/chosen": -99.48445129394531, "logps/rejected": -93.29197692871094, "loss": 0.3531, "rewards/accuracies": 0.5, "rewards/chosen": -0.9048036336898804, "rewards/margins": -0.022381335496902466, "rewards/rejected": -0.8824223279953003, "step": 987 }, { "epoch": 1.7, "learning_rate": 4.505949851253719e-07, "logits/chosen": -2.047847270965576, "logits/rejected": -2.203608512878418, "logps/chosen": -100.71556091308594, "logps/rejected": -121.69483184814453, "loss": 0.4191, "rewards/accuracies": 0.5, "rewards/chosen": -0.8310300707817078, "rewards/margins": 1.3466966152191162, "rewards/rejected": -2.1777267456054688, "step": 988 }, { "epoch": 1.7, "learning_rate": 4.504887377815554e-07, "logits/chosen": -2.1747400760650635, "logits/rejected": -2.239985466003418, "logps/chosen": -90.2275390625, "logps/rejected": -116.80813598632812, "loss": 0.5588, "rewards/accuracies": 0.75, "rewards/chosen": -0.577862560749054, "rewards/margins": 2.7674152851104736, "rewards/rejected": -3.345278024673462, "step": 989 }, { "epoch": 1.7, "learning_rate": 4.50382490437739e-07, "logits/chosen": -2.1165153980255127, "logits/rejected": -2.1581313610076904, "logps/chosen": -97.24713897705078, "logps/rejected": -136.86492919921875, "loss": 0.2337, "rewards/accuracies": 1.0, "rewards/chosen": -0.3661401867866516, "rewards/margins": 3.0737149715423584, "rewards/rejected": -3.4398553371429443, "step": 990 }, { "epoch": 1.71, "learning_rate": 4.502762430939226e-07, "logits/chosen": -2.1873581409454346, "logits/rejected": -2.092212677001953, "logps/chosen": -78.16651916503906, "logps/rejected": -98.23707580566406, "loss": 0.2002, "rewards/accuracies": 1.0, "rewards/chosen": -0.521139919757843, "rewards/margins": 1.219605565071106, "rewards/rejected": -1.7407455444335938, "step": 991 }, { "epoch": 1.71, "learning_rate": 4.501699957501062e-07, "logits/chosen": -2.2003705501556396, "logits/rejected": -1.856534481048584, "logps/chosen": -97.71139526367188, "logps/rejected": -108.94100952148438, "loss": 0.2945, "rewards/accuracies": 1.0, "rewards/chosen": 0.004561245441436768, "rewards/margins": 3.0907580852508545, "rewards/rejected": -3.0861966609954834, "step": 992 }, { "epoch": 1.71, "learning_rate": 4.500637484062898e-07, "logits/chosen": -2.1180238723754883, "logits/rejected": -2.051629066467285, "logps/chosen": -85.72242736816406, "logps/rejected": -102.19180297851562, "loss": 0.1866, "rewards/accuracies": 1.0, "rewards/chosen": 0.17277008295059204, "rewards/margins": 2.94292950630188, "rewards/rejected": -2.7701592445373535, "step": 993 }, { "epoch": 1.71, "learning_rate": 4.499575010624734e-07, "logits/chosen": -2.2869462966918945, "logits/rejected": -2.157592535018921, "logps/chosen": -73.73078918457031, "logps/rejected": -113.16738891601562, "loss": 0.2897, "rewards/accuracies": 1.0, "rewards/chosen": 0.1035681813955307, "rewards/margins": 3.460545063018799, "rewards/rejected": -3.3569769859313965, "step": 994 }, { "epoch": 1.71, "learning_rate": 4.49851253718657e-07, "logits/chosen": -2.017749786376953, "logits/rejected": -2.1522819995880127, "logps/chosen": -92.78901672363281, "logps/rejected": -138.63897705078125, "loss": 0.1539, "rewards/accuracies": 1.0, "rewards/chosen": -0.365399569272995, "rewards/margins": 2.7781713008880615, "rewards/rejected": -3.143570899963379, "step": 995 }, { "epoch": 1.71, "learning_rate": 4.4974500637484056e-07, "logits/chosen": -2.1157760620117188, "logits/rejected": -2.0834314823150635, "logps/chosen": -113.55555725097656, "logps/rejected": -119.20321655273438, "loss": 0.2368, "rewards/accuracies": 1.0, "rewards/chosen": 0.08219355344772339, "rewards/margins": 2.40667986869812, "rewards/rejected": -2.324486494064331, "step": 996 }, { "epoch": 1.72, "learning_rate": 4.496387590310242e-07, "logits/chosen": -1.9436254501342773, "logits/rejected": -2.4287819862365723, "logps/chosen": -79.4102783203125, "logps/rejected": -126.62552642822266, "loss": 0.3517, "rewards/accuracies": 1.0, "rewards/chosen": -0.2333388328552246, "rewards/margins": 4.192010402679443, "rewards/rejected": -4.425349235534668, "step": 997 }, { "epoch": 1.72, "learning_rate": 4.495325116872078e-07, "logits/chosen": -2.0396838188171387, "logits/rejected": -2.304353952407837, "logps/chosen": -81.66464233398438, "logps/rejected": -116.70490264892578, "loss": 0.5033, "rewards/accuracies": 0.75, "rewards/chosen": -0.5954616665840149, "rewards/margins": 2.7127346992492676, "rewards/rejected": -3.3081960678100586, "step": 998 }, { "epoch": 1.72, "learning_rate": 4.4942626434339136e-07, "logits/chosen": -2.1982548236846924, "logits/rejected": -1.8551065921783447, "logps/chosen": -98.10105895996094, "logps/rejected": -122.4642333984375, "loss": 0.241, "rewards/accuracies": 1.0, "rewards/chosen": 0.10264283418655396, "rewards/margins": 3.100208282470703, "rewards/rejected": -2.997565507888794, "step": 999 }, { "epoch": 1.72, "learning_rate": 4.49320016999575e-07, "logits/chosen": -2.0710415840148926, "logits/rejected": -2.4233295917510986, "logps/chosen": -79.7011947631836, "logps/rejected": -104.43046569824219, "loss": 0.4557, "rewards/accuracies": 0.5, "rewards/chosen": -0.3089858889579773, "rewards/margins": 1.080129623413086, "rewards/rejected": -1.389115333557129, "step": 1000 }, { "epoch": 1.72, "learning_rate": 4.4921376965575856e-07, "logits/chosen": -2.1466927528381348, "logits/rejected": -2.1024889945983887, "logps/chosen": -99.23371124267578, "logps/rejected": -143.07894897460938, "loss": 0.1478, "rewards/accuracies": 1.0, "rewards/chosen": -0.054523810744285583, "rewards/margins": 3.5344395637512207, "rewards/rejected": -3.588963270187378, "step": 1001 }, { "epoch": 1.72, "learning_rate": 4.4910752231194216e-07, "logits/chosen": -2.357862949371338, "logits/rejected": -2.0501749515533447, "logps/chosen": -82.66177368164062, "logps/rejected": -103.03398132324219, "loss": 0.3177, "rewards/accuracies": 0.5, "rewards/chosen": -0.340313196182251, "rewards/margins": 2.3049612045288086, "rewards/rejected": -2.6452746391296387, "step": 1002 }, { "epoch": 1.73, "learning_rate": 4.490012749681258e-07, "logits/chosen": -2.458488941192627, "logits/rejected": -1.979067087173462, "logps/chosen": -91.00971984863281, "logps/rejected": -99.05205535888672, "loss": 0.2736, "rewards/accuracies": 0.5, "rewards/chosen": -0.7578001022338867, "rewards/margins": 1.2455952167510986, "rewards/rejected": -2.0033953189849854, "step": 1003 }, { "epoch": 1.73, "learning_rate": 4.4889502762430936e-07, "logits/chosen": -2.238809108734131, "logits/rejected": -2.1718366146087646, "logps/chosen": -80.35272979736328, "logps/rejected": -124.93091583251953, "loss": 0.2746, "rewards/accuracies": 1.0, "rewards/chosen": 0.3649059534072876, "rewards/margins": 4.833052635192871, "rewards/rejected": -4.468146324157715, "step": 1004 }, { "epoch": 1.73, "learning_rate": 4.4878878028049295e-07, "logits/chosen": -2.3935484886169434, "logits/rejected": -2.1911497116088867, "logps/chosen": -87.08311462402344, "logps/rejected": -101.67955780029297, "loss": 0.2151, "rewards/accuracies": 1.0, "rewards/chosen": 0.21369744837284088, "rewards/margins": 2.987509250640869, "rewards/rejected": -2.7738118171691895, "step": 1005 }, { "epoch": 1.73, "learning_rate": 4.4868253293667655e-07, "logits/chosen": -2.2528302669525146, "logits/rejected": -1.9214106798171997, "logps/chosen": -83.97105407714844, "logps/rejected": -110.11947631835938, "loss": 0.1933, "rewards/accuracies": 1.0, "rewards/chosen": 0.2269030511379242, "rewards/margins": 3.456911087036133, "rewards/rejected": -3.230008125305176, "step": 1006 }, { "epoch": 1.73, "learning_rate": 4.4857628559286015e-07, "logits/chosen": -2.3684306144714355, "logits/rejected": -1.8781089782714844, "logps/chosen": -109.55076599121094, "logps/rejected": -127.4919662475586, "loss": 0.3474, "rewards/accuracies": 0.75, "rewards/chosen": -0.5884321928024292, "rewards/margins": 2.960341453552246, "rewards/rejected": -3.548773765563965, "step": 1007 }, { "epoch": 1.73, "learning_rate": 4.484700382490438e-07, "logits/chosen": -2.3449511528015137, "logits/rejected": -1.905555248260498, "logps/chosen": -102.56361389160156, "logps/rejected": -98.47306823730469, "loss": 0.368, "rewards/accuracies": 0.25, "rewards/chosen": -1.5228337049484253, "rewards/margins": 0.45081114768981934, "rewards/rejected": -1.9736448526382446, "step": 1008 }, { "epoch": 1.74, "learning_rate": 4.4836379090522735e-07, "logits/chosen": -2.0614068508148193, "logits/rejected": -2.2577855587005615, "logps/chosen": -101.28013610839844, "logps/rejected": -140.92279052734375, "loss": 0.3782, "rewards/accuracies": 1.0, "rewards/chosen": -0.41052722930908203, "rewards/margins": 3.940610885620117, "rewards/rejected": -4.351138114929199, "step": 1009 }, { "epoch": 1.74, "learning_rate": 4.4825754356141095e-07, "logits/chosen": -1.9334042072296143, "logits/rejected": -2.388908624649048, "logps/chosen": -80.92660522460938, "logps/rejected": -130.71603393554688, "loss": 0.3274, "rewards/accuracies": 1.0, "rewards/chosen": -0.4014543294906616, "rewards/margins": 2.8460123538970947, "rewards/rejected": -3.247466564178467, "step": 1010 }, { "epoch": 1.74, "learning_rate": 4.4815129621759455e-07, "logits/chosen": -1.9870827198028564, "logits/rejected": -2.1002092361450195, "logps/chosen": -90.80743408203125, "logps/rejected": -120.40473175048828, "loss": 0.2248, "rewards/accuracies": 1.0, "rewards/chosen": 0.19761621952056885, "rewards/margins": 2.4294705390930176, "rewards/rejected": -2.2318546772003174, "step": 1011 }, { "epoch": 1.74, "learning_rate": 4.4804504887377815e-07, "logits/chosen": -1.9549641609191895, "logits/rejected": -2.406886577606201, "logps/chosen": -103.36134338378906, "logps/rejected": -130.26815795898438, "loss": 0.3251, "rewards/accuracies": 0.75, "rewards/chosen": -1.8952932357788086, "rewards/margins": 0.041041016578674316, "rewards/rejected": -1.936334252357483, "step": 1012 }, { "epoch": 1.74, "learning_rate": 4.479388015299617e-07, "logits/chosen": -2.0950303077697754, "logits/rejected": -2.1209418773651123, "logps/chosen": -103.63014221191406, "logps/rejected": -126.35468292236328, "loss": 0.2887, "rewards/accuracies": 0.75, "rewards/chosen": -0.5914050936698914, "rewards/margins": 2.3289411067962646, "rewards/rejected": -2.920346260070801, "step": 1013 }, { "epoch": 1.75, "learning_rate": 4.4783255418614534e-07, "logits/chosen": -2.244373083114624, "logits/rejected": -2.416515350341797, "logps/chosen": -113.59721374511719, "logps/rejected": -133.76657104492188, "loss": 0.2618, "rewards/accuracies": 1.0, "rewards/chosen": -0.6300272345542908, "rewards/margins": 2.703387975692749, "rewards/rejected": -3.3334152698516846, "step": 1014 }, { "epoch": 1.75, "learning_rate": 4.4772630684232894e-07, "logits/chosen": -2.286393642425537, "logits/rejected": -2.3369407653808594, "logps/chosen": -78.6734619140625, "logps/rejected": -126.43923950195312, "loss": 0.4246, "rewards/accuracies": 1.0, "rewards/chosen": 0.1129755973815918, "rewards/margins": 5.16261100769043, "rewards/rejected": -5.049635410308838, "step": 1015 }, { "epoch": 1.75, "learning_rate": 4.476200594985125e-07, "logits/chosen": -2.4040582180023193, "logits/rejected": -2.0816595554351807, "logps/chosen": -117.23417663574219, "logps/rejected": -132.29905700683594, "loss": 0.3904, "rewards/accuracies": 0.75, "rewards/chosen": -0.7548803091049194, "rewards/margins": 3.581876754760742, "rewards/rejected": -4.336757183074951, "step": 1016 }, { "epoch": 1.75, "learning_rate": 4.4751381215469614e-07, "logits/chosen": -2.1996874809265137, "logits/rejected": -2.3408961296081543, "logps/chosen": -67.57768249511719, "logps/rejected": -135.34588623046875, "loss": 0.2278, "rewards/accuracies": 0.75, "rewards/chosen": -0.5527553558349609, "rewards/margins": 5.116776943206787, "rewards/rejected": -5.66953182220459, "step": 1017 }, { "epoch": 1.75, "learning_rate": 4.474075648108797e-07, "logits/chosen": -1.9239530563354492, "logits/rejected": -2.290050506591797, "logps/chosen": -106.81537628173828, "logps/rejected": -119.09930419921875, "loss": 0.2649, "rewards/accuracies": 1.0, "rewards/chosen": -1.1512978076934814, "rewards/margins": 1.3008408546447754, "rewards/rejected": -2.452138662338257, "step": 1018 }, { "epoch": 1.75, "learning_rate": 4.473013174670633e-07, "logits/chosen": -2.1333162784576416, "logits/rejected": -2.1983771324157715, "logps/chosen": -87.45793151855469, "logps/rejected": -152.43344116210938, "loss": 0.1821, "rewards/accuracies": 1.0, "rewards/chosen": 0.17690162360668182, "rewards/margins": 6.090255260467529, "rewards/rejected": -5.91335391998291, "step": 1019 }, { "epoch": 1.76, "learning_rate": 4.4719507012324694e-07, "logits/chosen": -2.2397141456604004, "logits/rejected": -1.997175931930542, "logps/chosen": -89.77674102783203, "logps/rejected": -110.95940399169922, "loss": 0.2793, "rewards/accuracies": 1.0, "rewards/chosen": -0.4360117018222809, "rewards/margins": 2.0333261489868164, "rewards/rejected": -2.4693377017974854, "step": 1020 }, { "epoch": 1.76, "learning_rate": 4.470888227794305e-07, "logits/chosen": -1.973262071609497, "logits/rejected": -2.4235708713531494, "logps/chosen": -83.36148834228516, "logps/rejected": -116.78751373291016, "loss": 0.3736, "rewards/accuracies": 0.75, "rewards/chosen": -0.4082714021205902, "rewards/margins": 1.9306631088256836, "rewards/rejected": -2.3389346599578857, "step": 1021 }, { "epoch": 1.76, "learning_rate": 4.469825754356141e-07, "logits/chosen": -2.2424144744873047, "logits/rejected": -2.3072338104248047, "logps/chosen": -78.80946350097656, "logps/rejected": -128.0997772216797, "loss": 0.2893, "rewards/accuracies": 1.0, "rewards/chosen": -0.09239540994167328, "rewards/margins": 4.703845977783203, "rewards/rejected": -4.796241760253906, "step": 1022 }, { "epoch": 1.76, "learning_rate": 4.468763280917977e-07, "logits/chosen": -1.8028106689453125, "logits/rejected": -2.366835594177246, "logps/chosen": -89.1304931640625, "logps/rejected": -132.62677001953125, "loss": 0.1861, "rewards/accuracies": 0.75, "rewards/chosen": -0.5963964462280273, "rewards/margins": 3.314422845840454, "rewards/rejected": -3.9108192920684814, "step": 1023 }, { "epoch": 1.76, "learning_rate": 4.467700807479813e-07, "logits/chosen": -2.0828757286071777, "logits/rejected": -2.223388195037842, "logps/chosen": -65.76051330566406, "logps/rejected": -123.64639282226562, "loss": 0.1947, "rewards/accuracies": 0.75, "rewards/chosen": 1.041498064994812, "rewards/margins": 5.904673099517822, "rewards/rejected": -4.863174915313721, "step": 1024 }, { "epoch": 1.76, "learning_rate": 4.466638334041649e-07, "logits/chosen": -2.0861563682556152, "logits/rejected": -2.266998052597046, "logps/chosen": -100.65170288085938, "logps/rejected": -172.1948699951172, "loss": 0.1129, "rewards/accuracies": 1.0, "rewards/chosen": -0.49677324295043945, "rewards/margins": 4.988025665283203, "rewards/rejected": -5.484798908233643, "step": 1025 }, { "epoch": 1.77, "learning_rate": 4.465575860603485e-07, "logits/chosen": -2.125424385070801, "logits/rejected": -2.1556990146636963, "logps/chosen": -71.99149322509766, "logps/rejected": -115.21206665039062, "loss": 0.1817, "rewards/accuracies": 1.0, "rewards/chosen": 0.09953898191452026, "rewards/margins": 4.996621608734131, "rewards/rejected": -4.897082805633545, "step": 1026 }, { "epoch": 1.77, "learning_rate": 4.464513387165321e-07, "logits/chosen": -2.3375723361968994, "logits/rejected": -2.173642158508301, "logps/chosen": -89.4407958984375, "logps/rejected": -104.3108901977539, "loss": 0.2564, "rewards/accuracies": 0.5, "rewards/chosen": -0.2663561999797821, "rewards/margins": 1.5212444067001343, "rewards/rejected": -1.7876007556915283, "step": 1027 }, { "epoch": 1.77, "learning_rate": 4.463450913727156e-07, "logits/chosen": -2.281374931335449, "logits/rejected": -2.3461647033691406, "logps/chosen": -84.18891906738281, "logps/rejected": -120.56629180908203, "loss": 0.4401, "rewards/accuracies": 0.75, "rewards/chosen": -1.5325281620025635, "rewards/margins": 2.8113770484924316, "rewards/rejected": -4.343905448913574, "step": 1028 }, { "epoch": 1.77, "learning_rate": 4.462388440288993e-07, "logits/chosen": -1.9975638389587402, "logits/rejected": -2.1762454509735107, "logps/chosen": -83.99279022216797, "logps/rejected": -118.68318176269531, "loss": 0.2438, "rewards/accuracies": 1.0, "rewards/chosen": 0.24849817156791687, "rewards/margins": 3.5486209392547607, "rewards/rejected": -3.3001229763031006, "step": 1029 }, { "epoch": 1.77, "learning_rate": 4.461325966850829e-07, "logits/chosen": -2.1765942573547363, "logits/rejected": -2.159148693084717, "logps/chosen": -90.01705169677734, "logps/rejected": -108.80499267578125, "loss": 0.3229, "rewards/accuracies": 1.0, "rewards/chosen": -0.9939643144607544, "rewards/margins": 1.5097589492797852, "rewards/rejected": -2.50372314453125, "step": 1030 }, { "epoch": 1.77, "learning_rate": 4.460263493412664e-07, "logits/chosen": -2.200049877166748, "logits/rejected": -2.1440043449401855, "logps/chosen": -115.568603515625, "logps/rejected": -147.6862030029297, "loss": 0.1783, "rewards/accuracies": 1.0, "rewards/chosen": -0.756671130657196, "rewards/margins": 3.5968570709228516, "rewards/rejected": -4.353528022766113, "step": 1031 }, { "epoch": 1.78, "learning_rate": 4.4592010199745007e-07, "logits/chosen": -2.1417269706726074, "logits/rejected": -2.3040971755981445, "logps/chosen": -81.23856353759766, "logps/rejected": -132.86618041992188, "loss": 0.1624, "rewards/accuracies": 1.0, "rewards/chosen": -0.21385861933231354, "rewards/margins": 5.734091758728027, "rewards/rejected": -5.94795036315918, "step": 1032 }, { "epoch": 1.78, "learning_rate": 4.458138546536336e-07, "logits/chosen": -2.12519907951355, "logits/rejected": -2.188417911529541, "logps/chosen": -103.78663635253906, "logps/rejected": -134.9180908203125, "loss": 0.1731, "rewards/accuracies": 1.0, "rewards/chosen": -0.43645840883255005, "rewards/margins": 3.6302547454833984, "rewards/rejected": -4.066712856292725, "step": 1033 }, { "epoch": 1.78, "learning_rate": 4.457076073098172e-07, "logits/chosen": -2.170764207839966, "logits/rejected": -2.3154876232147217, "logps/chosen": -78.22283172607422, "logps/rejected": -140.2451629638672, "loss": 0.3571, "rewards/accuracies": 0.75, "rewards/chosen": -0.977840006351471, "rewards/margins": 4.399237632751465, "rewards/rejected": -5.377078056335449, "step": 1034 }, { "epoch": 1.78, "learning_rate": 4.4560135996600087e-07, "logits/chosen": -2.225773811340332, "logits/rejected": -1.8924851417541504, "logps/chosen": -113.80622100830078, "logps/rejected": -148.3607940673828, "loss": 0.3307, "rewards/accuracies": 1.0, "rewards/chosen": -0.8327215909957886, "rewards/margins": 4.634054660797119, "rewards/rejected": -5.466776371002197, "step": 1035 }, { "epoch": 1.78, "learning_rate": 4.454951126221844e-07, "logits/chosen": -2.356815814971924, "logits/rejected": -1.8057396411895752, "logps/chosen": -116.23150634765625, "logps/rejected": -126.68316650390625, "loss": 0.4337, "rewards/accuracies": 1.0, "rewards/chosen": 0.32247447967529297, "rewards/margins": 4.361717700958252, "rewards/rejected": -4.039243698120117, "step": 1036 }, { "epoch": 1.78, "learning_rate": 4.45388865278368e-07, "logits/chosen": -2.1049628257751465, "logits/rejected": -2.3415873050689697, "logps/chosen": -88.39875793457031, "logps/rejected": -103.98147583007812, "loss": 0.2591, "rewards/accuracies": 0.75, "rewards/chosen": -0.3308365046977997, "rewards/margins": 2.1035752296447754, "rewards/rejected": -2.4344117641448975, "step": 1037 }, { "epoch": 1.79, "learning_rate": 4.452826179345516e-07, "logits/chosen": -2.317704439163208, "logits/rejected": -2.0010311603546143, "logps/chosen": -83.5619125366211, "logps/rejected": -122.45682525634766, "loss": 0.1429, "rewards/accuracies": 1.0, "rewards/chosen": 0.327852725982666, "rewards/margins": 4.9251322746276855, "rewards/rejected": -4.5972795486450195, "step": 1038 }, { "epoch": 1.79, "learning_rate": 4.451763705907352e-07, "logits/chosen": -2.2256202697753906, "logits/rejected": -2.4989538192749023, "logps/chosen": -83.22881317138672, "logps/rejected": -148.79721069335938, "loss": 0.2727, "rewards/accuracies": 1.0, "rewards/chosen": -0.5250983834266663, "rewards/margins": 5.26176118850708, "rewards/rejected": -5.78685998916626, "step": 1039 }, { "epoch": 1.79, "learning_rate": 4.4507012324691876e-07, "logits/chosen": -1.7977712154388428, "logits/rejected": -2.4037933349609375, "logps/chosen": -81.10061645507812, "logps/rejected": -156.60626220703125, "loss": 0.1713, "rewards/accuracies": 1.0, "rewards/chosen": -0.26209649443626404, "rewards/margins": 3.47914981842041, "rewards/rejected": -3.741246461868286, "step": 1040 }, { "epoch": 1.79, "learning_rate": 4.449638759031024e-07, "logits/chosen": -1.9999589920043945, "logits/rejected": -2.220180034637451, "logps/chosen": -84.33285522460938, "logps/rejected": -145.1181640625, "loss": 0.2694, "rewards/accuracies": 1.0, "rewards/chosen": -0.8132773637771606, "rewards/margins": 4.680647850036621, "rewards/rejected": -5.493925094604492, "step": 1041 }, { "epoch": 1.79, "learning_rate": 4.44857628559286e-07, "logits/chosen": -2.0465145111083984, "logits/rejected": -2.2507200241088867, "logps/chosen": -93.23463439941406, "logps/rejected": -137.73867797851562, "loss": 0.158, "rewards/accuracies": 1.0, "rewards/chosen": -0.9653909802436829, "rewards/margins": 2.7916502952575684, "rewards/rejected": -3.7570416927337646, "step": 1042 }, { "epoch": 1.8, "learning_rate": 4.4475138121546955e-07, "logits/chosen": -2.353029251098633, "logits/rejected": -2.1024508476257324, "logps/chosen": -102.52527618408203, "logps/rejected": -101.5197525024414, "loss": 0.3865, "rewards/accuracies": 0.75, "rewards/chosen": -0.7468138337135315, "rewards/margins": 1.3450978994369507, "rewards/rejected": -2.091911792755127, "step": 1043 }, { "epoch": 1.8, "learning_rate": 4.446451338716532e-07, "logits/chosen": -2.1395368576049805, "logits/rejected": -1.9947994947433472, "logps/chosen": -103.21366119384766, "logps/rejected": -151.19427490234375, "loss": 0.3136, "rewards/accuracies": 1.0, "rewards/chosen": -0.9972960352897644, "rewards/margins": 3.7237777709960938, "rewards/rejected": -4.721074104309082, "step": 1044 }, { "epoch": 1.8, "learning_rate": 4.4453888652783675e-07, "logits/chosen": -2.183546304702759, "logits/rejected": -1.9761382341384888, "logps/chosen": -75.35514068603516, "logps/rejected": -125.85102844238281, "loss": 0.2582, "rewards/accuracies": 1.0, "rewards/chosen": 0.373166024684906, "rewards/margins": 3.947362184524536, "rewards/rejected": -3.5741963386535645, "step": 1045 }, { "epoch": 1.8, "learning_rate": 4.4443263918402035e-07, "logits/chosen": -2.0463452339172363, "logits/rejected": -2.276592254638672, "logps/chosen": -92.84642028808594, "logps/rejected": -139.69412231445312, "loss": 0.1732, "rewards/accuracies": 1.0, "rewards/chosen": -0.4675367474555969, "rewards/margins": 3.519300937652588, "rewards/rejected": -3.98683762550354, "step": 1046 }, { "epoch": 1.8, "learning_rate": 4.44326391840204e-07, "logits/chosen": -2.1589136123657227, "logits/rejected": -2.177170991897583, "logps/chosen": -94.57518768310547, "logps/rejected": -115.65742492675781, "loss": 0.3999, "rewards/accuracies": 0.75, "rewards/chosen": -1.309010624885559, "rewards/margins": 1.4660753011703491, "rewards/rejected": -2.775085926055908, "step": 1047 }, { "epoch": 1.8, "learning_rate": 4.4422014449638755e-07, "logits/chosen": -2.2551043033599854, "logits/rejected": -1.7096867561340332, "logps/chosen": -103.71829223632812, "logps/rejected": -85.01472473144531, "loss": 0.2994, "rewards/accuracies": 0.75, "rewards/chosen": -0.613024890422821, "rewards/margins": 1.585716724395752, "rewards/rejected": -2.1987414360046387, "step": 1048 }, { "epoch": 1.81, "learning_rate": 4.441138971525712e-07, "logits/chosen": -1.8783087730407715, "logits/rejected": -2.2858757972717285, "logps/chosen": -91.2543716430664, "logps/rejected": -148.05252075195312, "loss": 0.3607, "rewards/accuracies": 0.75, "rewards/chosen": -0.6367525458335876, "rewards/margins": 3.095024824142456, "rewards/rejected": -3.7317771911621094, "step": 1049 }, { "epoch": 1.81, "learning_rate": 4.4400764980875475e-07, "logits/chosen": -1.8737640380859375, "logits/rejected": -2.064988374710083, "logps/chosen": -70.46238708496094, "logps/rejected": -121.85302734375, "loss": 0.2005, "rewards/accuracies": 1.0, "rewards/chosen": 0.3765251338481903, "rewards/margins": 5.7799153327941895, "rewards/rejected": -5.403389930725098, "step": 1050 }, { "epoch": 1.81, "learning_rate": 4.4390140246493834e-07, "logits/chosen": -2.2084708213806152, "logits/rejected": -2.125702381134033, "logps/chosen": -80.56844329833984, "logps/rejected": -119.21149444580078, "loss": 0.2539, "rewards/accuracies": 1.0, "rewards/chosen": -0.39872828125953674, "rewards/margins": 3.3809452056884766, "rewards/rejected": -3.7796735763549805, "step": 1051 }, { "epoch": 1.81, "learning_rate": 4.43795155121122e-07, "logits/chosen": -2.1965813636779785, "logits/rejected": -2.205111503601074, "logps/chosen": -102.91087341308594, "logps/rejected": -159.00927734375, "loss": 0.4048, "rewards/accuracies": 1.0, "rewards/chosen": -0.7609857320785522, "rewards/margins": 5.897136211395264, "rewards/rejected": -6.658121585845947, "step": 1052 }, { "epoch": 1.81, "learning_rate": 4.4368890777730554e-07, "logits/chosen": -2.309138774871826, "logits/rejected": -2.2665915489196777, "logps/chosen": -74.75959777832031, "logps/rejected": -108.90000915527344, "loss": 0.2871, "rewards/accuracies": 1.0, "rewards/chosen": 0.3182012438774109, "rewards/margins": 3.970182180404663, "rewards/rejected": -3.6519811153411865, "step": 1053 }, { "epoch": 1.81, "learning_rate": 4.4358266043348914e-07, "logits/chosen": -1.97416090965271, "logits/rejected": -2.115224838256836, "logps/chosen": -93.26175689697266, "logps/rejected": -110.27076721191406, "loss": 0.4488, "rewards/accuracies": 1.0, "rewards/chosen": -0.1407533586025238, "rewards/margins": 2.5900259017944336, "rewards/rejected": -2.7307794094085693, "step": 1054 }, { "epoch": 1.82, "learning_rate": 4.4347641308967274e-07, "logits/chosen": -2.0865049362182617, "logits/rejected": -1.8843629360198975, "logps/chosen": -103.9317626953125, "logps/rejected": -177.26585388183594, "loss": 0.2507, "rewards/accuracies": 1.0, "rewards/chosen": -0.11445885896682739, "rewards/margins": 5.857826232910156, "rewards/rejected": -5.972285270690918, "step": 1055 }, { "epoch": 1.82, "learning_rate": 4.4337016574585634e-07, "logits/chosen": -2.3670146465301514, "logits/rejected": -2.2311558723449707, "logps/chosen": -77.99313354492188, "logps/rejected": -123.16181182861328, "loss": 0.2446, "rewards/accuracies": 1.0, "rewards/chosen": -0.3321382403373718, "rewards/margins": 3.3847570419311523, "rewards/rejected": -3.71689510345459, "step": 1056 }, { "epoch": 1.82, "learning_rate": 4.4326391840203994e-07, "logits/chosen": -2.303088665008545, "logits/rejected": -2.0138602256774902, "logps/chosen": -108.03732299804688, "logps/rejected": -129.77745056152344, "loss": 0.2611, "rewards/accuracies": 0.75, "rewards/chosen": -1.4086421728134155, "rewards/margins": 3.2140538692474365, "rewards/rejected": -4.6226959228515625, "step": 1057 }, { "epoch": 1.82, "learning_rate": 4.4315767105822354e-07, "logits/chosen": -1.9157639741897583, "logits/rejected": -2.3027727603912354, "logps/chosen": -92.46717834472656, "logps/rejected": -145.76812744140625, "loss": 0.4563, "rewards/accuracies": 0.75, "rewards/chosen": -0.010795384645462036, "rewards/margins": 3.186211585998535, "rewards/rejected": -3.197006940841675, "step": 1058 }, { "epoch": 1.82, "learning_rate": 4.4305142371440714e-07, "logits/chosen": -2.2966957092285156, "logits/rejected": -2.458014965057373, "logps/chosen": -89.306884765625, "logps/rejected": -143.3628387451172, "loss": 0.2286, "rewards/accuracies": 1.0, "rewards/chosen": -0.6635158658027649, "rewards/margins": 4.588883876800537, "rewards/rejected": -5.252399444580078, "step": 1059 }, { "epoch": 1.82, "learning_rate": 4.429451763705907e-07, "logits/chosen": -2.176896333694458, "logits/rejected": -2.0678393840789795, "logps/chosen": -85.87166595458984, "logps/rejected": -97.66402435302734, "loss": 0.203, "rewards/accuracies": 1.0, "rewards/chosen": -0.15190260112285614, "rewards/margins": 1.7572818994522095, "rewards/rejected": -1.909184455871582, "step": 1060 }, { "epoch": 1.83, "learning_rate": 4.4283892902677433e-07, "logits/chosen": -2.1004741191864014, "logits/rejected": -2.2980856895446777, "logps/chosen": -104.52446746826172, "logps/rejected": -126.8399658203125, "loss": 0.341, "rewards/accuracies": 0.75, "rewards/chosen": -1.2196730375289917, "rewards/margins": 2.5535898208618164, "rewards/rejected": -3.7732629776000977, "step": 1061 }, { "epoch": 1.83, "learning_rate": 4.4273268168295793e-07, "logits/chosen": -2.0396218299865723, "logits/rejected": -2.1856698989868164, "logps/chosen": -61.451568603515625, "logps/rejected": -121.79071044921875, "loss": 0.1519, "rewards/accuracies": 1.0, "rewards/chosen": 0.2621499001979828, "rewards/margins": 3.828580379486084, "rewards/rejected": -3.5664305686950684, "step": 1062 }, { "epoch": 1.83, "learning_rate": 4.426264343391415e-07, "logits/chosen": -2.272312641143799, "logits/rejected": -2.2884178161621094, "logps/chosen": -102.29395294189453, "logps/rejected": -127.2339096069336, "loss": 0.4452, "rewards/accuracies": 0.75, "rewards/chosen": -1.6880825757980347, "rewards/margins": 2.436551094055176, "rewards/rejected": -4.124633312225342, "step": 1063 }, { "epoch": 1.83, "learning_rate": 4.4252018699532513e-07, "logits/chosen": -2.2780072689056396, "logits/rejected": -2.1154086589813232, "logps/chosen": -110.48030853271484, "logps/rejected": -144.72781372070312, "loss": 0.3177, "rewards/accuracies": 1.0, "rewards/chosen": -1.1593866348266602, "rewards/margins": 4.210263252258301, "rewards/rejected": -5.369649410247803, "step": 1064 }, { "epoch": 1.83, "learning_rate": 4.424139396515087e-07, "logits/chosen": -1.9606342315673828, "logits/rejected": -2.2296791076660156, "logps/chosen": -72.06491088867188, "logps/rejected": -140.3797149658203, "loss": 0.2275, "rewards/accuracies": 1.0, "rewards/chosen": 0.2760561406612396, "rewards/margins": 6.588575839996338, "rewards/rejected": -6.312519550323486, "step": 1065 }, { "epoch": 1.83, "learning_rate": 4.423076923076923e-07, "logits/chosen": -2.206582546234131, "logits/rejected": -2.2213222980499268, "logps/chosen": -79.03672790527344, "logps/rejected": -137.38270568847656, "loss": 0.3092, "rewards/accuracies": 1.0, "rewards/chosen": 0.5258205533027649, "rewards/margins": 6.317897319793701, "rewards/rejected": -5.792076587677002, "step": 1066 }, { "epoch": 1.84, "learning_rate": 4.4220144496387593e-07, "logits/chosen": -2.0613715648651123, "logits/rejected": -2.197432518005371, "logps/chosen": -92.0022201538086, "logps/rejected": -135.1966552734375, "loss": 0.329, "rewards/accuracies": 0.75, "rewards/chosen": -0.2151632457971573, "rewards/margins": 3.6563405990600586, "rewards/rejected": -3.8715038299560547, "step": 1067 }, { "epoch": 1.84, "learning_rate": 4.4209519762005947e-07, "logits/chosen": -2.2072949409484863, "logits/rejected": -2.0167436599731445, "logps/chosen": -95.83900451660156, "logps/rejected": -142.21524047851562, "loss": 0.1899, "rewards/accuracies": 1.0, "rewards/chosen": -0.07089176774024963, "rewards/margins": 4.364090442657471, "rewards/rejected": -4.434982776641846, "step": 1068 }, { "epoch": 1.84, "learning_rate": 4.4198895027624307e-07, "logits/chosen": -1.7474942207336426, "logits/rejected": -2.290339946746826, "logps/chosen": -75.66718292236328, "logps/rejected": -120.0866470336914, "loss": 0.2325, "rewards/accuracies": 0.75, "rewards/chosen": 0.04040423780679703, "rewards/margins": 3.2927145957946777, "rewards/rejected": -3.2523105144500732, "step": 1069 }, { "epoch": 1.84, "learning_rate": 4.4188270293242667e-07, "logits/chosen": -2.0768017768859863, "logits/rejected": -2.017493724822998, "logps/chosen": -70.20372772216797, "logps/rejected": -110.52360534667969, "loss": 0.3606, "rewards/accuracies": 0.75, "rewards/chosen": 0.14193546772003174, "rewards/margins": 3.8343892097473145, "rewards/rejected": -3.692453622817993, "step": 1070 }, { "epoch": 1.84, "learning_rate": 4.4177645558861027e-07, "logits/chosen": -2.0057930946350098, "logits/rejected": -2.2326443195343018, "logps/chosen": -77.72784423828125, "logps/rejected": -103.08818054199219, "loss": 0.3123, "rewards/accuracies": 0.5, "rewards/chosen": -0.6339845061302185, "rewards/margins": 1.1716785430908203, "rewards/rejected": -1.8056628704071045, "step": 1071 }, { "epoch": 1.85, "learning_rate": 4.416702082447938e-07, "logits/chosen": -1.9853918552398682, "logits/rejected": -2.221118450164795, "logps/chosen": -88.30677032470703, "logps/rejected": -134.23341369628906, "loss": 0.4509, "rewards/accuracies": 1.0, "rewards/chosen": -0.7918121218681335, "rewards/margins": 3.3609800338745117, "rewards/rejected": -4.152792453765869, "step": 1072 }, { "epoch": 1.85, "learning_rate": 4.4156396090097747e-07, "logits/chosen": -1.9531190395355225, "logits/rejected": -2.3502631187438965, "logps/chosen": -71.53614044189453, "logps/rejected": -117.35926818847656, "loss": 0.3065, "rewards/accuracies": 0.75, "rewards/chosen": -0.8494769930839539, "rewards/margins": 2.058311939239502, "rewards/rejected": -2.9077887535095215, "step": 1073 }, { "epoch": 1.85, "learning_rate": 4.4145771355716107e-07, "logits/chosen": -2.062995433807373, "logits/rejected": -2.2400927543640137, "logps/chosen": -95.12444305419922, "logps/rejected": -124.14007568359375, "loss": 0.2117, "rewards/accuracies": 0.75, "rewards/chosen": -0.10789012163877487, "rewards/margins": 1.7934986352920532, "rewards/rejected": -1.9013885259628296, "step": 1074 }, { "epoch": 1.85, "learning_rate": 4.413514662133446e-07, "logits/chosen": -2.172079086303711, "logits/rejected": -2.0524303913116455, "logps/chosen": -114.11225128173828, "logps/rejected": -140.21875, "loss": 0.3086, "rewards/accuracies": 0.75, "rewards/chosen": -0.4443499743938446, "rewards/margins": 3.927184581756592, "rewards/rejected": -4.37153434753418, "step": 1075 }, { "epoch": 1.85, "learning_rate": 4.4124521886952826e-07, "logits/chosen": -2.2926278114318848, "logits/rejected": -1.9778175354003906, "logps/chosen": -118.70988464355469, "logps/rejected": -131.65611267089844, "loss": 0.302, "rewards/accuracies": 0.75, "rewards/chosen": -1.5148409605026245, "rewards/margins": 1.9189664125442505, "rewards/rejected": -3.433807373046875, "step": 1076 }, { "epoch": 1.85, "learning_rate": 4.411389715257118e-07, "logits/chosen": -2.158778667449951, "logits/rejected": -2.125082492828369, "logps/chosen": -82.85860443115234, "logps/rejected": -127.21955108642578, "loss": 0.2227, "rewards/accuracies": 1.0, "rewards/chosen": -0.0588793009519577, "rewards/margins": 4.658946514129639, "rewards/rejected": -4.717825889587402, "step": 1077 }, { "epoch": 1.86, "learning_rate": 4.410327241818954e-07, "logits/chosen": -1.8789196014404297, "logits/rejected": -2.4785239696502686, "logps/chosen": -87.127685546875, "logps/rejected": -135.97662353515625, "loss": 0.3065, "rewards/accuracies": 1.0, "rewards/chosen": -0.2943008244037628, "rewards/margins": 3.177001714706421, "rewards/rejected": -3.4713025093078613, "step": 1078 }, { "epoch": 1.86, "learning_rate": 4.4092647683807906e-07, "logits/chosen": -1.8914588689804077, "logits/rejected": -2.2874233722686768, "logps/chosen": -92.08106994628906, "logps/rejected": -113.47817993164062, "loss": 0.3903, "rewards/accuracies": 1.0, "rewards/chosen": -0.2132846862077713, "rewards/margins": 2.097668170928955, "rewards/rejected": -2.31095290184021, "step": 1079 }, { "epoch": 1.86, "learning_rate": 4.408202294942626e-07, "logits/chosen": -1.9732816219329834, "logits/rejected": -2.1271960735321045, "logps/chosen": -86.68440246582031, "logps/rejected": -147.0128173828125, "loss": 0.2475, "rewards/accuracies": 1.0, "rewards/chosen": -0.7689327001571655, "rewards/margins": 5.948634147644043, "rewards/rejected": -6.717566967010498, "step": 1080 }, { "epoch": 1.86, "learning_rate": 4.407139821504462e-07, "logits/chosen": -2.184920072555542, "logits/rejected": -2.004737138748169, "logps/chosen": -82.79548645019531, "logps/rejected": -107.97222137451172, "loss": 0.1508, "rewards/accuracies": 1.0, "rewards/chosen": -1.2514472007751465, "rewards/margins": 2.7194676399230957, "rewards/rejected": -3.970914840698242, "step": 1081 }, { "epoch": 1.86, "learning_rate": 4.406077348066298e-07, "logits/chosen": -1.8311277627944946, "logits/rejected": -2.227998733520508, "logps/chosen": -73.07295227050781, "logps/rejected": -124.49014282226562, "loss": 0.2607, "rewards/accuracies": 1.0, "rewards/chosen": -0.04250069707632065, "rewards/margins": 3.885925054550171, "rewards/rejected": -3.9284255504608154, "step": 1082 }, { "epoch": 1.86, "learning_rate": 4.405014874628134e-07, "logits/chosen": -1.9357233047485352, "logits/rejected": -2.070247173309326, "logps/chosen": -108.78113555908203, "logps/rejected": -157.13037109375, "loss": 0.2438, "rewards/accuracies": 1.0, "rewards/chosen": -1.415452241897583, "rewards/margins": 3.7557711601257324, "rewards/rejected": -5.171223163604736, "step": 1083 }, { "epoch": 1.87, "learning_rate": 4.40395240118997e-07, "logits/chosen": -2.0269205570220947, "logits/rejected": -2.180925130844116, "logps/chosen": -73.06031036376953, "logps/rejected": -141.32733154296875, "loss": 0.2379, "rewards/accuracies": 1.0, "rewards/chosen": 0.4518372416496277, "rewards/margins": 6.216222763061523, "rewards/rejected": -5.76438570022583, "step": 1084 }, { "epoch": 1.87, "learning_rate": 4.402889927751806e-07, "logits/chosen": -2.0993216037750244, "logits/rejected": -1.8858270645141602, "logps/chosen": -102.50273132324219, "logps/rejected": -119.14567565917969, "loss": 0.2622, "rewards/accuracies": 0.75, "rewards/chosen": -0.7734154462814331, "rewards/margins": 2.1964077949523926, "rewards/rejected": -2.9698235988616943, "step": 1085 }, { "epoch": 1.87, "learning_rate": 4.401827454313642e-07, "logits/chosen": -1.7039172649383545, "logits/rejected": -2.233057975769043, "logps/chosen": -96.55360412597656, "logps/rejected": -154.91555786132812, "loss": 0.1773, "rewards/accuracies": 1.0, "rewards/chosen": -0.7031502723693848, "rewards/margins": 3.5959529876708984, "rewards/rejected": -4.299103260040283, "step": 1086 }, { "epoch": 1.87, "learning_rate": 4.4007649808754775e-07, "logits/chosen": -2.102635383605957, "logits/rejected": -2.2380049228668213, "logps/chosen": -90.36422729492188, "logps/rejected": -109.02458190917969, "loss": 0.2718, "rewards/accuracies": 0.75, "rewards/chosen": -1.6129307746887207, "rewards/margins": 1.793365716934204, "rewards/rejected": -3.406296491622925, "step": 1087 }, { "epoch": 1.87, "learning_rate": 4.399702507437314e-07, "logits/chosen": -1.876299500465393, "logits/rejected": -2.245335817337036, "logps/chosen": -116.99720764160156, "logps/rejected": -157.48878479003906, "loss": 0.4339, "rewards/accuracies": 1.0, "rewards/chosen": -1.3086210489273071, "rewards/margins": 2.610825777053833, "rewards/rejected": -3.9194469451904297, "step": 1088 }, { "epoch": 1.87, "learning_rate": 4.39864003399915e-07, "logits/chosen": -1.940509557723999, "logits/rejected": -2.0941755771636963, "logps/chosen": -93.3419189453125, "logps/rejected": -133.04779052734375, "loss": 0.1177, "rewards/accuracies": 1.0, "rewards/chosen": -0.741960346698761, "rewards/margins": 4.155586242675781, "rewards/rejected": -4.897546768188477, "step": 1089 }, { "epoch": 1.88, "learning_rate": 4.3975775605609854e-07, "logits/chosen": -2.117553949356079, "logits/rejected": -2.1705892086029053, "logps/chosen": -81.76811981201172, "logps/rejected": -119.89321899414062, "loss": 0.1942, "rewards/accuracies": 1.0, "rewards/chosen": 0.13027161359786987, "rewards/margins": 3.2796242237091064, "rewards/rejected": -3.149352550506592, "step": 1090 }, { "epoch": 1.88, "learning_rate": 4.396515087122822e-07, "logits/chosen": -2.336688995361328, "logits/rejected": -1.961869239807129, "logps/chosen": -83.52720642089844, "logps/rejected": -124.21424865722656, "loss": 0.2932, "rewards/accuracies": 1.0, "rewards/chosen": -0.36076822876930237, "rewards/margins": 2.7223470211029053, "rewards/rejected": -3.083115339279175, "step": 1091 }, { "epoch": 1.88, "learning_rate": 4.3954526136846574e-07, "logits/chosen": -2.1669809818267822, "logits/rejected": -2.167093515396118, "logps/chosen": -102.14871215820312, "logps/rejected": -144.44259643554688, "loss": 0.2257, "rewards/accuracies": 0.75, "rewards/chosen": -0.5506129264831543, "rewards/margins": 5.586034774780273, "rewards/rejected": -6.136647701263428, "step": 1092 }, { "epoch": 1.88, "learning_rate": 4.394390140246494e-07, "logits/chosen": -2.110852003097534, "logits/rejected": -2.1133172512054443, "logps/chosen": -100.00814819335938, "logps/rejected": -146.90850830078125, "loss": 0.1965, "rewards/accuracies": 0.75, "rewards/chosen": -0.9095454812049866, "rewards/margins": 3.8966803550720215, "rewards/rejected": -4.806225776672363, "step": 1093 }, { "epoch": 1.88, "learning_rate": 4.39332766680833e-07, "logits/chosen": -2.1227238178253174, "logits/rejected": -2.2263541221618652, "logps/chosen": -85.75399780273438, "logps/rejected": -138.9171600341797, "loss": 0.1656, "rewards/accuracies": 1.0, "rewards/chosen": -0.22787675261497498, "rewards/margins": 4.642366409301758, "rewards/rejected": -4.870243549346924, "step": 1094 }, { "epoch": 1.88, "learning_rate": 4.3922651933701654e-07, "logits/chosen": -2.0569539070129395, "logits/rejected": -2.0359578132629395, "logps/chosen": -79.76213836669922, "logps/rejected": -122.20729064941406, "loss": 0.247, "rewards/accuracies": 1.0, "rewards/chosen": -0.2646705210208893, "rewards/margins": 3.8570303916931152, "rewards/rejected": -4.121701240539551, "step": 1095 }, { "epoch": 1.89, "learning_rate": 4.391202719932002e-07, "logits/chosen": -2.221245288848877, "logits/rejected": -2.2459447383880615, "logps/chosen": -105.93761444091797, "logps/rejected": -135.54266357421875, "loss": 0.1813, "rewards/accuracies": 0.75, "rewards/chosen": -1.5174981355667114, "rewards/margins": 1.2377972602844238, "rewards/rejected": -2.755295515060425, "step": 1096 }, { "epoch": 1.89, "learning_rate": 4.3901402464938374e-07, "logits/chosen": -2.3911659717559814, "logits/rejected": -1.9750117063522339, "logps/chosen": -105.57553100585938, "logps/rejected": -101.00296783447266, "loss": 0.3147, "rewards/accuracies": 0.75, "rewards/chosen": -1.7955907583236694, "rewards/margins": 1.033774971961975, "rewards/rejected": -2.8293657302856445, "step": 1097 }, { "epoch": 1.89, "learning_rate": 4.3890777730556733e-07, "logits/chosen": -1.875831961631775, "logits/rejected": -2.185774564743042, "logps/chosen": -99.679443359375, "logps/rejected": -153.49073791503906, "loss": 0.3456, "rewards/accuracies": 0.75, "rewards/chosen": -1.2042917013168335, "rewards/margins": 2.4473862648010254, "rewards/rejected": -3.6516780853271484, "step": 1098 }, { "epoch": 1.89, "learning_rate": 4.38801529961751e-07, "logits/chosen": -1.835007905960083, "logits/rejected": -2.1969876289367676, "logps/chosen": -101.05120849609375, "logps/rejected": -145.57608032226562, "loss": 0.2115, "rewards/accuracies": 1.0, "rewards/chosen": -1.071342945098877, "rewards/margins": 3.780776023864746, "rewards/rejected": -4.852118492126465, "step": 1099 }, { "epoch": 1.89, "learning_rate": 4.3869528261793453e-07, "logits/chosen": -2.043346881866455, "logits/rejected": -2.364914894104004, "logps/chosen": -88.64124298095703, "logps/rejected": -126.63299560546875, "loss": 0.2223, "rewards/accuracies": 1.0, "rewards/chosen": -0.9678369760513306, "rewards/margins": 3.5398035049438477, "rewards/rejected": -4.507640838623047, "step": 1100 }, { "epoch": 1.9, "learning_rate": 4.3858903527411813e-07, "logits/chosen": -2.084028959274292, "logits/rejected": -2.105287790298462, "logps/chosen": -72.01361083984375, "logps/rejected": -142.63027954101562, "loss": 0.1424, "rewards/accuracies": 1.0, "rewards/chosen": 0.25122278928756714, "rewards/margins": 6.891468048095703, "rewards/rejected": -6.64024543762207, "step": 1101 }, { "epoch": 1.9, "learning_rate": 4.3848278793030173e-07, "logits/chosen": -2.33845591545105, "logits/rejected": -2.3776280879974365, "logps/chosen": -110.22697448730469, "logps/rejected": -169.5386962890625, "loss": 0.1185, "rewards/accuracies": 1.0, "rewards/chosen": -0.7333232760429382, "rewards/margins": 5.66796875, "rewards/rejected": -6.401291847229004, "step": 1102 }, { "epoch": 1.9, "learning_rate": 4.3837654058648533e-07, "logits/chosen": -2.0785841941833496, "logits/rejected": -2.0512561798095703, "logps/chosen": -82.19635772705078, "logps/rejected": -117.48284912109375, "loss": 0.1979, "rewards/accuracies": 1.0, "rewards/chosen": -1.442723035812378, "rewards/margins": 2.444068431854248, "rewards/rejected": -3.886791229248047, "step": 1103 }, { "epoch": 1.9, "learning_rate": 4.382702932426689e-07, "logits/chosen": -2.159100294113159, "logits/rejected": -1.7785179615020752, "logps/chosen": -80.4913558959961, "logps/rejected": -127.03614807128906, "loss": 0.2322, "rewards/accuracies": 1.0, "rewards/chosen": 0.7636412382125854, "rewards/margins": 6.373898029327393, "rewards/rejected": -5.610256195068359, "step": 1104 }, { "epoch": 1.9, "learning_rate": 4.3816404589885253e-07, "logits/chosen": -1.874668002128601, "logits/rejected": -2.1688313484191895, "logps/chosen": -81.24513244628906, "logps/rejected": -132.58895874023438, "loss": 0.1377, "rewards/accuracies": 1.0, "rewards/chosen": 0.34470149874687195, "rewards/margins": 3.724886178970337, "rewards/rejected": -3.3801846504211426, "step": 1105 }, { "epoch": 1.9, "learning_rate": 4.380577985550361e-07, "logits/chosen": -2.0867090225219727, "logits/rejected": -1.5898550748825073, "logps/chosen": -109.85246276855469, "logps/rejected": -109.65939331054688, "loss": 0.1932, "rewards/accuracies": 1.0, "rewards/chosen": 0.3012416958808899, "rewards/margins": 3.2804336547851562, "rewards/rejected": -2.979191780090332, "step": 1106 }, { "epoch": 1.91, "learning_rate": 4.3795155121121967e-07, "logits/chosen": -2.0393223762512207, "logits/rejected": -2.11684250831604, "logps/chosen": -108.24876403808594, "logps/rejected": -150.05099487304688, "loss": 0.2858, "rewards/accuracies": 0.75, "rewards/chosen": -1.5094795227050781, "rewards/margins": 4.55992317199707, "rewards/rejected": -6.069402694702148, "step": 1107 }, { "epoch": 1.91, "learning_rate": 4.378453038674033e-07, "logits/chosen": -2.266688346862793, "logits/rejected": -2.1154415607452393, "logps/chosen": -99.98213195800781, "logps/rejected": -135.43878173828125, "loss": 0.2837, "rewards/accuracies": 1.0, "rewards/chosen": -0.7722902297973633, "rewards/margins": 3.561948299407959, "rewards/rejected": -4.334238529205322, "step": 1108 }, { "epoch": 1.91, "learning_rate": 4.3773905652358687e-07, "logits/chosen": -2.102513313293457, "logits/rejected": -2.2855007648468018, "logps/chosen": -74.08367919921875, "logps/rejected": -107.45603942871094, "loss": 0.256, "rewards/accuracies": 1.0, "rewards/chosen": 0.06660765409469604, "rewards/margins": 1.338988184928894, "rewards/rejected": -1.2723803520202637, "step": 1109 }, { "epoch": 1.91, "learning_rate": 4.3763280917977047e-07, "logits/chosen": -2.1280264854431152, "logits/rejected": -2.43611216545105, "logps/chosen": -92.30839538574219, "logps/rejected": -145.4437713623047, "loss": 0.3773, "rewards/accuracies": 1.0, "rewards/chosen": -1.304032325744629, "rewards/margins": 3.2481393814086914, "rewards/rejected": -4.552171230316162, "step": 1110 }, { "epoch": 1.91, "learning_rate": 4.375265618359541e-07, "logits/chosen": -2.2728400230407715, "logits/rejected": -1.613431692123413, "logps/chosen": -105.25956726074219, "logps/rejected": -113.03105163574219, "loss": 0.3625, "rewards/accuracies": 1.0, "rewards/chosen": -1.4526593685150146, "rewards/margins": 3.191427707672119, "rewards/rejected": -4.644087314605713, "step": 1111 }, { "epoch": 1.91, "learning_rate": 4.3742031449213767e-07, "logits/chosen": -2.397331476211548, "logits/rejected": -2.171703338623047, "logps/chosen": -89.54641723632812, "logps/rejected": -138.7490692138672, "loss": 0.3004, "rewards/accuracies": 1.0, "rewards/chosen": 0.03408754616975784, "rewards/margins": 4.441527366638184, "rewards/rejected": -4.407440185546875, "step": 1112 }, { "epoch": 1.92, "learning_rate": 4.3731406714832127e-07, "logits/chosen": -1.7373716831207275, "logits/rejected": -2.2145471572875977, "logps/chosen": -86.02750396728516, "logps/rejected": -165.78372192382812, "loss": 0.3509, "rewards/accuracies": 1.0, "rewards/chosen": -0.6022933125495911, "rewards/margins": 3.811074733734131, "rewards/rejected": -4.413368225097656, "step": 1113 }, { "epoch": 1.92, "learning_rate": 4.3720781980450486e-07, "logits/chosen": -1.9526960849761963, "logits/rejected": -1.8320813179016113, "logps/chosen": -102.44234466552734, "logps/rejected": -126.38140106201172, "loss": 0.4453, "rewards/accuracies": 0.75, "rewards/chosen": -0.9589855074882507, "rewards/margins": 2.194905996322632, "rewards/rejected": -3.1538915634155273, "step": 1114 }, { "epoch": 1.92, "learning_rate": 4.3710157246068846e-07, "logits/chosen": -1.991675615310669, "logits/rejected": -2.3346009254455566, "logps/chosen": -79.32427215576172, "logps/rejected": -139.72943115234375, "loss": 0.1226, "rewards/accuracies": 1.0, "rewards/chosen": -0.32791540026664734, "rewards/margins": 4.770326614379883, "rewards/rejected": -5.098241806030273, "step": 1115 }, { "epoch": 1.92, "learning_rate": 4.3699532511687206e-07, "logits/chosen": -2.017509937286377, "logits/rejected": -2.0797853469848633, "logps/chosen": -95.10498046875, "logps/rejected": -116.9736099243164, "loss": 0.3318, "rewards/accuracies": 0.75, "rewards/chosen": -0.07019558548927307, "rewards/margins": 2.975789785385132, "rewards/rejected": -3.045985460281372, "step": 1116 }, { "epoch": 1.92, "learning_rate": 4.3688907777305566e-07, "logits/chosen": -2.2480509281158447, "logits/rejected": -2.0346858501434326, "logps/chosen": -100.78439331054688, "logps/rejected": -118.41043853759766, "loss": 0.1658, "rewards/accuracies": 1.0, "rewards/chosen": 0.3955889046192169, "rewards/margins": 3.9383435249328613, "rewards/rejected": -3.542754650115967, "step": 1117 }, { "epoch": 1.92, "learning_rate": 4.3678283042923926e-07, "logits/chosen": -2.35612416267395, "logits/rejected": -1.7681397199630737, "logps/chosen": -110.38333129882812, "logps/rejected": -100.19367980957031, "loss": 0.4029, "rewards/accuracies": 1.0, "rewards/chosen": -0.12358588725328445, "rewards/margins": 2.029975652694702, "rewards/rejected": -2.153561592102051, "step": 1118 }, { "epoch": 1.93, "learning_rate": 4.366765830854228e-07, "logits/chosen": -2.2319352626800537, "logits/rejected": -2.4194016456604004, "logps/chosen": -75.53132629394531, "logps/rejected": -138.906982421875, "loss": 0.1831, "rewards/accuracies": 1.0, "rewards/chosen": -0.21253147721290588, "rewards/margins": 4.162351608276367, "rewards/rejected": -4.37488317489624, "step": 1119 }, { "epoch": 1.93, "learning_rate": 4.3657033574160646e-07, "logits/chosen": -1.9363694190979004, "logits/rejected": -2.3487114906311035, "logps/chosen": -78.7845458984375, "logps/rejected": -152.00076293945312, "loss": 0.1365, "rewards/accuracies": 1.0, "rewards/chosen": -0.5744126439094543, "rewards/margins": 3.9539005756378174, "rewards/rejected": -4.528313636779785, "step": 1120 }, { "epoch": 1.93, "learning_rate": 4.3646408839779006e-07, "logits/chosen": -2.3780384063720703, "logits/rejected": -1.835766315460205, "logps/chosen": -116.73526000976562, "logps/rejected": -116.83872985839844, "loss": 0.2573, "rewards/accuracies": 0.75, "rewards/chosen": -0.6058074831962585, "rewards/margins": 3.6563799381256104, "rewards/rejected": -4.262187480926514, "step": 1121 }, { "epoch": 1.93, "learning_rate": 4.363578410539736e-07, "logits/chosen": -1.949764609336853, "logits/rejected": -2.315107822418213, "logps/chosen": -66.87030029296875, "logps/rejected": -134.7295684814453, "loss": 0.4032, "rewards/accuracies": 1.0, "rewards/chosen": -0.37842482328414917, "rewards/margins": 5.477423191070557, "rewards/rejected": -5.85584831237793, "step": 1122 }, { "epoch": 1.93, "learning_rate": 4.3625159371015725e-07, "logits/chosen": -2.248704195022583, "logits/rejected": -1.802876591682434, "logps/chosen": -70.32804107666016, "logps/rejected": -123.4904556274414, "loss": 0.275, "rewards/accuracies": 1.0, "rewards/chosen": 0.6284745931625366, "rewards/margins": 6.254294395446777, "rewards/rejected": -5.625819683074951, "step": 1123 }, { "epoch": 1.93, "learning_rate": 4.361453463663408e-07, "logits/chosen": -2.1082396507263184, "logits/rejected": -2.1502368450164795, "logps/chosen": -113.3646469116211, "logps/rejected": -124.19439697265625, "loss": 0.2697, "rewards/accuracies": 1.0, "rewards/chosen": -1.222527265548706, "rewards/margins": 2.4211885929107666, "rewards/rejected": -3.6437158584594727, "step": 1124 }, { "epoch": 1.94, "learning_rate": 4.360390990225244e-07, "logits/chosen": -1.9327656030654907, "logits/rejected": -2.2558212280273438, "logps/chosen": -88.12873840332031, "logps/rejected": -98.42459869384766, "loss": 0.2676, "rewards/accuracies": 0.75, "rewards/chosen": -1.429324984550476, "rewards/margins": 0.5831733345985413, "rewards/rejected": -2.012498378753662, "step": 1125 }, { "epoch": 1.94, "learning_rate": 4.3593285167870805e-07, "logits/chosen": -1.842491865158081, "logits/rejected": -2.3365259170532227, "logps/chosen": -73.94149017333984, "logps/rejected": -136.1559600830078, "loss": 0.1219, "rewards/accuracies": 1.0, "rewards/chosen": -0.41491034626960754, "rewards/margins": 4.796478748321533, "rewards/rejected": -5.21138858795166, "step": 1126 }, { "epoch": 1.94, "learning_rate": 4.358266043348916e-07, "logits/chosen": -2.326748847961426, "logits/rejected": -1.941891074180603, "logps/chosen": -94.59222412109375, "logps/rejected": -115.77493286132812, "loss": 0.2309, "rewards/accuracies": 1.0, "rewards/chosen": 0.20410975813865662, "rewards/margins": 4.3895392417907715, "rewards/rejected": -4.185429573059082, "step": 1127 }, { "epoch": 1.94, "learning_rate": 4.357203569910752e-07, "logits/chosen": -2.120511770248413, "logits/rejected": -2.1606898307800293, "logps/chosen": -83.73289489746094, "logps/rejected": -96.66690826416016, "loss": 0.294, "rewards/accuracies": 0.75, "rewards/chosen": -0.6661470532417297, "rewards/margins": 1.6242161989212036, "rewards/rejected": -2.290363311767578, "step": 1128 }, { "epoch": 1.94, "learning_rate": 4.356141096472588e-07, "logits/chosen": -2.1600661277770996, "logits/rejected": -1.8966856002807617, "logps/chosen": -94.98127746582031, "logps/rejected": -146.28515625, "loss": 0.2318, "rewards/accuracies": 1.0, "rewards/chosen": 0.7712383270263672, "rewards/margins": 6.301860332489014, "rewards/rejected": -5.530622482299805, "step": 1129 }, { "epoch": 1.94, "learning_rate": 4.355078623034424e-07, "logits/chosen": -1.9762673377990723, "logits/rejected": -2.279430866241455, "logps/chosen": -86.06314086914062, "logps/rejected": -134.81930541992188, "loss": 0.2788, "rewards/accuracies": 0.75, "rewards/chosen": -0.7362638711929321, "rewards/margins": 3.348477840423584, "rewards/rejected": -4.084741592407227, "step": 1130 }, { "epoch": 1.95, "learning_rate": 4.3540161495962594e-07, "logits/chosen": -1.9757113456726074, "logits/rejected": -2.3384923934936523, "logps/chosen": -108.40106201171875, "logps/rejected": -155.94151306152344, "loss": 0.1963, "rewards/accuracies": 0.75, "rewards/chosen": -1.077129602432251, "rewards/margins": 3.812816858291626, "rewards/rejected": -4.889945983886719, "step": 1131 }, { "epoch": 1.95, "learning_rate": 4.352953676158096e-07, "logits/chosen": -2.162395477294922, "logits/rejected": -2.2089881896972656, "logps/chosen": -76.92581176757812, "logps/rejected": -124.61787414550781, "loss": 0.2002, "rewards/accuracies": 1.0, "rewards/chosen": -0.1300726979970932, "rewards/margins": 4.784477233886719, "rewards/rejected": -4.914549827575684, "step": 1132 }, { "epoch": 1.95, "learning_rate": 4.351891202719932e-07, "logits/chosen": -1.937885046005249, "logits/rejected": -2.184366464614868, "logps/chosen": -85.564208984375, "logps/rejected": -132.4761199951172, "loss": 0.5151, "rewards/accuracies": 0.75, "rewards/chosen": -0.3257564604282379, "rewards/margins": 1.5204544067382812, "rewards/rejected": -1.8462109565734863, "step": 1133 }, { "epoch": 1.95, "learning_rate": 4.350828729281768e-07, "logits/chosen": -2.3077292442321777, "logits/rejected": -2.3077383041381836, "logps/chosen": -79.8668212890625, "logps/rejected": -126.65911102294922, "loss": 0.1982, "rewards/accuracies": 1.0, "rewards/chosen": -0.5982990264892578, "rewards/margins": 4.3604536056518555, "rewards/rejected": -4.958752632141113, "step": 1134 }, { "epoch": 1.95, "learning_rate": 4.349766255843604e-07, "logits/chosen": -2.142946481704712, "logits/rejected": -2.0812132358551025, "logps/chosen": -136.8778533935547, "logps/rejected": -184.82798767089844, "loss": 0.2489, "rewards/accuracies": 1.0, "rewards/chosen": -1.0675379037857056, "rewards/margins": 4.349292278289795, "rewards/rejected": -5.416830062866211, "step": 1135 }, { "epoch": 1.96, "learning_rate": 4.3487037824054393e-07, "logits/chosen": -2.1377737522125244, "logits/rejected": -1.8432796001434326, "logps/chosen": -96.12339782714844, "logps/rejected": -108.66909790039062, "loss": 0.2461, "rewards/accuracies": 1.0, "rewards/chosen": -0.5893213152885437, "rewards/margins": 2.494990110397339, "rewards/rejected": -3.0843114852905273, "step": 1136 }, { "epoch": 1.96, "learning_rate": 4.347641308967276e-07, "logits/chosen": -2.032550573348999, "logits/rejected": -1.979049563407898, "logps/chosen": -80.28497314453125, "logps/rejected": -152.76052856445312, "loss": 0.3084, "rewards/accuracies": 1.0, "rewards/chosen": 0.3613206148147583, "rewards/margins": 6.164546489715576, "rewards/rejected": -5.803225994110107, "step": 1137 }, { "epoch": 1.96, "learning_rate": 4.346578835529112e-07, "logits/chosen": -2.103518486022949, "logits/rejected": -2.2771401405334473, "logps/chosen": -74.33894348144531, "logps/rejected": -142.39639282226562, "loss": 0.3654, "rewards/accuracies": 1.0, "rewards/chosen": 0.1606791466474533, "rewards/margins": 4.8773298263549805, "rewards/rejected": -4.71665096282959, "step": 1138 }, { "epoch": 1.96, "learning_rate": 4.3455163620909473e-07, "logits/chosen": -1.9672942161560059, "logits/rejected": -2.1679341793060303, "logps/chosen": -88.04251098632812, "logps/rejected": -95.36885070800781, "loss": 0.258, "rewards/accuracies": 0.75, "rewards/chosen": -1.1197702884674072, "rewards/margins": 0.8208880424499512, "rewards/rejected": -1.9406583309173584, "step": 1139 }, { "epoch": 1.96, "learning_rate": 4.344453888652784e-07, "logits/chosen": -2.2556822299957275, "logits/rejected": -2.392164707183838, "logps/chosen": -97.84457397460938, "logps/rejected": -139.89866638183594, "loss": 0.2194, "rewards/accuracies": 0.75, "rewards/chosen": -0.9898829460144043, "rewards/margins": 3.6560792922973633, "rewards/rejected": -4.645962238311768, "step": 1140 }, { "epoch": 1.96, "learning_rate": 4.3433914152146193e-07, "logits/chosen": -2.1470813751220703, "logits/rejected": -2.1332273483276367, "logps/chosen": -91.37553405761719, "logps/rejected": -126.09904479980469, "loss": 0.175, "rewards/accuracies": 1.0, "rewards/chosen": -0.05739191919565201, "rewards/margins": 4.3592329025268555, "rewards/rejected": -4.416624546051025, "step": 1141 }, { "epoch": 1.97, "learning_rate": 4.3423289417764553e-07, "logits/chosen": -2.368894100189209, "logits/rejected": -2.2227814197540283, "logps/chosen": -86.78802490234375, "logps/rejected": -107.7301025390625, "loss": 0.1626, "rewards/accuracies": 0.75, "rewards/chosen": -0.6987720727920532, "rewards/margins": 1.8706419467926025, "rewards/rejected": -2.5694141387939453, "step": 1142 }, { "epoch": 1.97, "learning_rate": 4.341266468338292e-07, "logits/chosen": -1.6524138450622559, "logits/rejected": -2.2593302726745605, "logps/chosen": -64.66609954833984, "logps/rejected": -111.25969696044922, "loss": 0.4465, "rewards/accuracies": 0.75, "rewards/chosen": -0.5344278812408447, "rewards/margins": 3.325221538543701, "rewards/rejected": -3.859649658203125, "step": 1143 }, { "epoch": 1.97, "learning_rate": 4.340203994900127e-07, "logits/chosen": -1.8513514995574951, "logits/rejected": -2.0928516387939453, "logps/chosen": -98.96604919433594, "logps/rejected": -135.547607421875, "loss": 0.2722, "rewards/accuracies": 0.75, "rewards/chosen": -1.227083444595337, "rewards/margins": 1.7464752197265625, "rewards/rejected": -2.9735586643218994, "step": 1144 }, { "epoch": 1.97, "learning_rate": 4.339141521461963e-07, "logits/chosen": -2.0693612098693848, "logits/rejected": -2.064765453338623, "logps/chosen": -81.86598205566406, "logps/rejected": -96.66556549072266, "loss": 0.3261, "rewards/accuracies": 1.0, "rewards/chosen": -0.8752298355102539, "rewards/margins": 2.226999044418335, "rewards/rejected": -3.102228879928589, "step": 1145 }, { "epoch": 1.97, "learning_rate": 4.338079048023799e-07, "logits/chosen": -1.9615631103515625, "logits/rejected": -2.1405417919158936, "logps/chosen": -77.44410705566406, "logps/rejected": -116.2731704711914, "loss": 0.2818, "rewards/accuracies": 1.0, "rewards/chosen": -0.3421905040740967, "rewards/margins": 3.4992692470550537, "rewards/rejected": -3.8414599895477295, "step": 1146 }, { "epoch": 1.97, "learning_rate": 4.337016574585635e-07, "logits/chosen": -2.2260994911193848, "logits/rejected": -1.7813458442687988, "logps/chosen": -121.59647369384766, "logps/rejected": -115.54935455322266, "loss": 0.1802, "rewards/accuracies": 1.0, "rewards/chosen": -0.7393283843994141, "rewards/margins": 3.194246292114258, "rewards/rejected": -3.9335744380950928, "step": 1147 }, { "epoch": 1.98, "learning_rate": 4.335954101147471e-07, "logits/chosen": -2.3929853439331055, "logits/rejected": -2.2198660373687744, "logps/chosen": -82.08568572998047, "logps/rejected": -99.88662719726562, "loss": 0.282, "rewards/accuracies": 0.75, "rewards/chosen": 0.012392699718475342, "rewards/margins": 2.109663486480713, "rewards/rejected": -2.0972707271575928, "step": 1148 }, { "epoch": 1.98, "learning_rate": 4.334891627709307e-07, "logits/chosen": -2.2140557765960693, "logits/rejected": -2.2686269283294678, "logps/chosen": -66.98871612548828, "logps/rejected": -104.38937377929688, "loss": 0.2675, "rewards/accuracies": 1.0, "rewards/chosen": 0.13034716248512268, "rewards/margins": 3.6870689392089844, "rewards/rejected": -3.5567214488983154, "step": 1149 }, { "epoch": 1.98, "learning_rate": 4.333829154271143e-07, "logits/chosen": -2.3025460243225098, "logits/rejected": -2.168916702270508, "logps/chosen": -91.11829376220703, "logps/rejected": -122.56476593017578, "loss": 0.1223, "rewards/accuracies": 1.0, "rewards/chosen": -0.5153473019599915, "rewards/margins": 2.7988529205322266, "rewards/rejected": -3.3142001628875732, "step": 1150 }, { "epoch": 1.98, "learning_rate": 4.3327666808329786e-07, "logits/chosen": -2.0529093742370605, "logits/rejected": -2.3466970920562744, "logps/chosen": -74.5988540649414, "logps/rejected": -101.99276733398438, "loss": 0.2692, "rewards/accuracies": 0.75, "rewards/chosen": -0.6596167087554932, "rewards/margins": 1.8911250829696655, "rewards/rejected": -2.550741672515869, "step": 1151 }, { "epoch": 1.98, "learning_rate": 4.331704207394815e-07, "logits/chosen": -1.9649364948272705, "logits/rejected": -1.9528357982635498, "logps/chosen": -89.40208435058594, "logps/rejected": -88.73233032226562, "loss": 0.2353, "rewards/accuracies": 0.75, "rewards/chosen": 0.23081904649734497, "rewards/margins": 2.6352133750915527, "rewards/rejected": -2.4043943881988525, "step": 1152 }, { "epoch": 1.98, "learning_rate": 4.330641733956651e-07, "logits/chosen": -2.142430067062378, "logits/rejected": -2.2005715370178223, "logps/chosen": -85.73213958740234, "logps/rejected": -122.89433288574219, "loss": 0.2114, "rewards/accuracies": 1.0, "rewards/chosen": -0.3587018549442291, "rewards/margins": 3.603095054626465, "rewards/rejected": -3.961796760559082, "step": 1153 }, { "epoch": 1.99, "learning_rate": 4.3295792605184866e-07, "logits/chosen": -1.7369496822357178, "logits/rejected": -2.3316304683685303, "logps/chosen": -56.952369689941406, "logps/rejected": -119.56775665283203, "loss": 0.2328, "rewards/accuracies": 0.75, "rewards/chosen": 0.8419656753540039, "rewards/margins": 3.8326172828674316, "rewards/rejected": -2.9906513690948486, "step": 1154 }, { "epoch": 1.99, "learning_rate": 4.328516787080323e-07, "logits/chosen": -1.6044527292251587, "logits/rejected": -2.1482486724853516, "logps/chosen": -65.7319107055664, "logps/rejected": -130.76943969726562, "loss": 0.1677, "rewards/accuracies": 1.0, "rewards/chosen": 0.8721398115158081, "rewards/margins": 5.127663612365723, "rewards/rejected": -4.255524158477783, "step": 1155 }, { "epoch": 1.99, "learning_rate": 4.3274543136421586e-07, "logits/chosen": -2.096081256866455, "logits/rejected": -2.157076835632324, "logps/chosen": -69.88858795166016, "logps/rejected": -127.80429077148438, "loss": 0.2373, "rewards/accuracies": 1.0, "rewards/chosen": 0.5614632964134216, "rewards/margins": 5.550818920135498, "rewards/rejected": -4.989355564117432, "step": 1156 }, { "epoch": 1.99, "learning_rate": 4.3263918402039946e-07, "logits/chosen": -1.966284155845642, "logits/rejected": -2.197216033935547, "logps/chosen": -93.8033447265625, "logps/rejected": -139.1253204345703, "loss": 0.2224, "rewards/accuracies": 1.0, "rewards/chosen": -0.6365441083908081, "rewards/margins": 4.085681915283203, "rewards/rejected": -4.722226142883301, "step": 1157 }, { "epoch": 1.99, "learning_rate": 4.3253293667658306e-07, "logits/chosen": -2.1644742488861084, "logits/rejected": -2.163386583328247, "logps/chosen": -119.27704620361328, "logps/rejected": -135.41600036621094, "loss": 0.1926, "rewards/accuracies": 0.75, "rewards/chosen": -1.5020058155059814, "rewards/margins": 2.757658004760742, "rewards/rejected": -4.2596635818481445, "step": 1158 }, { "epoch": 1.99, "learning_rate": 4.3242668933276666e-07, "logits/chosen": -2.326695680618286, "logits/rejected": -2.250192165374756, "logps/chosen": -83.215576171875, "logps/rejected": -134.222900390625, "loss": 0.2866, "rewards/accuracies": 1.0, "rewards/chosen": 0.22754916548728943, "rewards/margins": 4.839606285095215, "rewards/rejected": -4.612056732177734, "step": 1159 }, { "epoch": 2.0, "learning_rate": 4.3232044198895025e-07, "logits/chosen": -1.8473280668258667, "logits/rejected": -2.2940080165863037, "logps/chosen": -66.4982681274414, "logps/rejected": -125.55587005615234, "loss": 0.3346, "rewards/accuracies": 0.75, "rewards/chosen": 0.08442669361829758, "rewards/margins": 3.1869232654571533, "rewards/rejected": -3.10249662399292, "step": 1160 }, { "epoch": 2.0, "learning_rate": 4.3221419464513385e-07, "logits/chosen": -1.9662442207336426, "logits/rejected": -2.1234495639801025, "logps/chosen": -99.4313735961914, "logps/rejected": -112.99342346191406, "loss": 0.3491, "rewards/accuracies": 0.75, "rewards/chosen": -1.3539272546768188, "rewards/margins": 1.2229034900665283, "rewards/rejected": -2.5768306255340576, "step": 1161 }, { "epoch": 2.0, "learning_rate": 4.3210794730131745e-07, "logits/chosen": -1.9948617219924927, "logits/rejected": -2.2435696125030518, "logps/chosen": -100.70018005371094, "logps/rejected": -145.5618896484375, "loss": 0.273, "rewards/accuracies": 0.75, "rewards/chosen": -0.008807674050331116, "rewards/margins": 4.145925521850586, "rewards/rejected": -4.154732704162598, "step": 1162 }, { "epoch": 2.0, "learning_rate": 4.32001699957501e-07, "logits/chosen": -2.2358551025390625, "logits/rejected": -1.9724513292312622, "logps/chosen": -104.47714233398438, "logps/rejected": -118.531494140625, "loss": 0.1082, "rewards/accuracies": 1.0, "rewards/chosen": -0.4980516731739044, "rewards/margins": 2.5339512825012207, "rewards/rejected": -3.032003164291382, "step": 1163 }, { "epoch": 2.0, "learning_rate": 4.3189545261368465e-07, "logits/chosen": -2.1745495796203613, "logits/rejected": -2.2384376525878906, "logps/chosen": -88.52447509765625, "logps/rejected": -143.4745635986328, "loss": 0.1588, "rewards/accuracies": 1.0, "rewards/chosen": -0.294502854347229, "rewards/margins": 3.444472312927246, "rewards/rejected": -3.7389755249023438, "step": 1164 }, { "epoch": 2.01, "learning_rate": 4.3178920526986825e-07, "logits/chosen": -1.6837551593780518, "logits/rejected": -2.368387222290039, "logps/chosen": -72.42512512207031, "logps/rejected": -120.24239349365234, "loss": 0.1507, "rewards/accuracies": 1.0, "rewards/chosen": -0.6229676008224487, "rewards/margins": 1.5002113580703735, "rewards/rejected": -2.1231789588928223, "step": 1165 }, { "epoch": 2.01, "learning_rate": 4.316829579260518e-07, "logits/chosen": -2.0182902812957764, "logits/rejected": -2.1320011615753174, "logps/chosen": -106.587646484375, "logps/rejected": -152.09017944335938, "loss": 0.1562, "rewards/accuracies": 1.0, "rewards/chosen": -1.414609432220459, "rewards/margins": 4.325199127197266, "rewards/rejected": -5.739809036254883, "step": 1166 }, { "epoch": 2.01, "learning_rate": 4.3157671058223545e-07, "logits/chosen": -1.9577759504318237, "logits/rejected": -2.2064015865325928, "logps/chosen": -66.03845977783203, "logps/rejected": -123.75410461425781, "loss": 0.0837, "rewards/accuracies": 1.0, "rewards/chosen": 0.5470615029335022, "rewards/margins": 5.325421333312988, "rewards/rejected": -4.778359413146973, "step": 1167 }, { "epoch": 2.01, "learning_rate": 4.31470463238419e-07, "logits/chosen": -2.0869641304016113, "logits/rejected": -2.341402053833008, "logps/chosen": -92.42182159423828, "logps/rejected": -149.88735961914062, "loss": 0.193, "rewards/accuracies": 0.75, "rewards/chosen": -0.6721137762069702, "rewards/margins": 4.760088920593262, "rewards/rejected": -5.4322028160095215, "step": 1168 }, { "epoch": 2.01, "learning_rate": 4.313642158946026e-07, "logits/chosen": -2.2659430503845215, "logits/rejected": -2.0649685859680176, "logps/chosen": -91.72608947753906, "logps/rejected": -146.74977111816406, "loss": 0.1851, "rewards/accuracies": 1.0, "rewards/chosen": -0.12343102693557739, "rewards/margins": 4.654050350189209, "rewards/rejected": -4.7774810791015625, "step": 1169 }, { "epoch": 2.01, "learning_rate": 4.3125796855078624e-07, "logits/chosen": -1.8397608995437622, "logits/rejected": -2.043058156967163, "logps/chosen": -72.06875610351562, "logps/rejected": -140.50152587890625, "loss": 0.0996, "rewards/accuracies": 1.0, "rewards/chosen": 0.724321722984314, "rewards/margins": 6.442704200744629, "rewards/rejected": -5.718382835388184, "step": 1170 }, { "epoch": 2.02, "learning_rate": 4.311517212069698e-07, "logits/chosen": -2.1374425888061523, "logits/rejected": -2.3201520442962646, "logps/chosen": -83.94599914550781, "logps/rejected": -126.08252716064453, "loss": 0.215, "rewards/accuracies": 0.75, "rewards/chosen": -0.04459839314222336, "rewards/margins": 3.397408962249756, "rewards/rejected": -3.442007541656494, "step": 1171 }, { "epoch": 2.02, "learning_rate": 4.310454738631534e-07, "logits/chosen": -2.1116151809692383, "logits/rejected": -1.877221941947937, "logps/chosen": -102.73126220703125, "logps/rejected": -140.28115844726562, "loss": 0.091, "rewards/accuracies": 1.0, "rewards/chosen": 0.14098301529884338, "rewards/margins": 5.470005035400391, "rewards/rejected": -5.329022407531738, "step": 1172 }, { "epoch": 2.02, "learning_rate": 4.30939226519337e-07, "logits/chosen": -2.1076037883758545, "logits/rejected": -2.101088047027588, "logps/chosen": -95.65081024169922, "logps/rejected": -129.46142578125, "loss": 0.1101, "rewards/accuracies": 1.0, "rewards/chosen": -1.2218406200408936, "rewards/margins": 3.4893312454223633, "rewards/rejected": -4.711172103881836, "step": 1173 }, { "epoch": 2.02, "learning_rate": 4.308329791755206e-07, "logits/chosen": -1.7800650596618652, "logits/rejected": -2.197972297668457, "logps/chosen": -89.72049713134766, "logps/rejected": -127.35574340820312, "loss": 0.1142, "rewards/accuracies": 1.0, "rewards/chosen": -0.44184261560440063, "rewards/margins": 2.6732373237609863, "rewards/rejected": -3.1150803565979004, "step": 1174 }, { "epoch": 2.02, "learning_rate": 4.3072673183170424e-07, "logits/chosen": -2.032438039779663, "logits/rejected": -2.0193676948547363, "logps/chosen": -90.70689392089844, "logps/rejected": -116.17495727539062, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": -0.3568495213985443, "rewards/margins": 3.679054021835327, "rewards/rejected": -4.035903453826904, "step": 1175 }, { "epoch": 2.02, "learning_rate": 4.306204844878878e-07, "logits/chosen": -2.178953170776367, "logits/rejected": -2.1314306259155273, "logps/chosen": -87.63866424560547, "logps/rejected": -128.31948852539062, "loss": 0.0534, "rewards/accuracies": 1.0, "rewards/chosen": -0.5313528776168823, "rewards/margins": 4.17870569229126, "rewards/rejected": -4.710058212280273, "step": 1176 }, { "epoch": 2.03, "learning_rate": 4.305142371440714e-07, "logits/chosen": -2.2140986919403076, "logits/rejected": -2.140691041946411, "logps/chosen": -98.87895202636719, "logps/rejected": -110.51921081542969, "loss": 0.1837, "rewards/accuracies": 1.0, "rewards/chosen": -0.18022461235523224, "rewards/margins": 2.6976752281188965, "rewards/rejected": -2.877899646759033, "step": 1177 }, { "epoch": 2.03, "learning_rate": 4.30407989800255e-07, "logits/chosen": -1.8412444591522217, "logits/rejected": -2.3468666076660156, "logps/chosen": -86.07784271240234, "logps/rejected": -142.20396423339844, "loss": 0.1153, "rewards/accuracies": 1.0, "rewards/chosen": -0.7396948337554932, "rewards/margins": 3.319164514541626, "rewards/rejected": -4.058859348297119, "step": 1178 }, { "epoch": 2.03, "learning_rate": 4.303017424564386e-07, "logits/chosen": -2.1713638305664062, "logits/rejected": -1.941868543624878, "logps/chosen": -69.88192749023438, "logps/rejected": -142.9093017578125, "loss": 0.1459, "rewards/accuracies": 1.0, "rewards/chosen": 0.48742181062698364, "rewards/margins": 7.546687602996826, "rewards/rejected": -7.059266090393066, "step": 1179 }, { "epoch": 2.03, "learning_rate": 4.3019549511262213e-07, "logits/chosen": -2.2782697677612305, "logits/rejected": -2.10083270072937, "logps/chosen": -88.21985626220703, "logps/rejected": -126.36058807373047, "loss": 0.1179, "rewards/accuracies": 1.0, "rewards/chosen": -0.6238596439361572, "rewards/margins": 3.790339469909668, "rewards/rejected": -4.414199352264404, "step": 1180 }, { "epoch": 2.03, "learning_rate": 4.300892477688058e-07, "logits/chosen": -2.1109185218811035, "logits/rejected": -2.2265169620513916, "logps/chosen": -87.89913177490234, "logps/rejected": -118.21995544433594, "loss": 0.1025, "rewards/accuracies": 1.0, "rewards/chosen": -0.9173336029052734, "rewards/margins": 2.9405057430267334, "rewards/rejected": -3.857839345932007, "step": 1181 }, { "epoch": 2.03, "learning_rate": 4.299830004249894e-07, "logits/chosen": -2.246000289916992, "logits/rejected": -2.1831324100494385, "logps/chosen": -87.69795227050781, "logps/rejected": -144.48257446289062, "loss": 0.0847, "rewards/accuracies": 1.0, "rewards/chosen": -0.24717462062835693, "rewards/margins": 5.864781379699707, "rewards/rejected": -6.1119561195373535, "step": 1182 }, { "epoch": 2.04, "learning_rate": 4.298767530811729e-07, "logits/chosen": -2.312750816345215, "logits/rejected": -2.054356336593628, "logps/chosen": -115.88036346435547, "logps/rejected": -118.98684692382812, "loss": 0.3095, "rewards/accuracies": 0.75, "rewards/chosen": -1.572752594947815, "rewards/margins": 2.6043248176574707, "rewards/rejected": -4.177077770233154, "step": 1183 }, { "epoch": 2.04, "learning_rate": 4.297705057373566e-07, "logits/chosen": -2.1334543228149414, "logits/rejected": -2.241560459136963, "logps/chosen": -84.2445297241211, "logps/rejected": -137.74172973632812, "loss": 0.0979, "rewards/accuracies": 1.0, "rewards/chosen": 0.20106390118598938, "rewards/margins": 5.110443592071533, "rewards/rejected": -4.909379959106445, "step": 1184 }, { "epoch": 2.04, "learning_rate": 4.296642583935401e-07, "logits/chosen": -2.057889938354492, "logits/rejected": -2.1551856994628906, "logps/chosen": -117.36611938476562, "logps/rejected": -157.4252166748047, "loss": 0.1553, "rewards/accuracies": 1.0, "rewards/chosen": -1.7765777111053467, "rewards/margins": 3.6833693981170654, "rewards/rejected": -5.459946632385254, "step": 1185 }, { "epoch": 2.04, "learning_rate": 4.295580110497237e-07, "logits/chosen": -2.258136034011841, "logits/rejected": -1.9350440502166748, "logps/chosen": -80.8809814453125, "logps/rejected": -125.23936462402344, "loss": 0.0704, "rewards/accuracies": 1.0, "rewards/chosen": 0.5862213373184204, "rewards/margins": 5.225840091705322, "rewards/rejected": -4.639618873596191, "step": 1186 }, { "epoch": 2.04, "learning_rate": 4.2945176370590737e-07, "logits/chosen": -2.1037325859069824, "logits/rejected": -2.2888262271881104, "logps/chosen": -91.68144989013672, "logps/rejected": -138.04339599609375, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": -1.1737971305847168, "rewards/margins": 3.50393009185791, "rewards/rejected": -4.677727222442627, "step": 1187 }, { "epoch": 2.04, "learning_rate": 4.293455163620909e-07, "logits/chosen": -2.0887792110443115, "logits/rejected": -1.9405419826507568, "logps/chosen": -84.34970092773438, "logps/rejected": -142.5592041015625, "loss": 0.1437, "rewards/accuracies": 1.0, "rewards/chosen": -0.3121805191040039, "rewards/margins": 6.492435932159424, "rewards/rejected": -6.804616451263428, "step": 1188 }, { "epoch": 2.05, "learning_rate": 4.292392690182745e-07, "logits/chosen": -2.1781187057495117, "logits/rejected": -1.9304111003875732, "logps/chosen": -106.60017395019531, "logps/rejected": -149.6948699951172, "loss": 0.077, "rewards/accuracies": 1.0, "rewards/chosen": -1.4081323146820068, "rewards/margins": 4.645011901855469, "rewards/rejected": -6.0531439781188965, "step": 1189 }, { "epoch": 2.05, "learning_rate": 4.291330216744581e-07, "logits/chosen": -2.2687788009643555, "logits/rejected": -2.220472812652588, "logps/chosen": -86.21219635009766, "logps/rejected": -136.41488647460938, "loss": 0.1627, "rewards/accuracies": 1.0, "rewards/chosen": -1.583207368850708, "rewards/margins": 5.755608081817627, "rewards/rejected": -7.338815689086914, "step": 1190 }, { "epoch": 2.05, "learning_rate": 4.290267743306417e-07, "logits/chosen": -2.100633144378662, "logits/rejected": -2.1140594482421875, "logps/chosen": -109.4697265625, "logps/rejected": -176.73345947265625, "loss": 0.1479, "rewards/accuracies": 1.0, "rewards/chosen": -1.4578917026519775, "rewards/margins": 6.827406883239746, "rewards/rejected": -8.285299301147461, "step": 1191 }, { "epoch": 2.05, "learning_rate": 4.289205269868253e-07, "logits/chosen": -2.20011305809021, "logits/rejected": -1.9486408233642578, "logps/chosen": -97.36157989501953, "logps/rejected": -120.1706771850586, "loss": 0.0654, "rewards/accuracies": 1.0, "rewards/chosen": -1.8866968154907227, "rewards/margins": 3.699873924255371, "rewards/rejected": -5.586570739746094, "step": 1192 }, { "epoch": 2.05, "learning_rate": 4.288142796430089e-07, "logits/chosen": -2.367302894592285, "logits/rejected": -1.8881548643112183, "logps/chosen": -113.68344116210938, "logps/rejected": -129.24832153320312, "loss": 0.1933, "rewards/accuracies": 1.0, "rewards/chosen": -0.37633228302001953, "rewards/margins": 5.382596015930176, "rewards/rejected": -5.758928298950195, "step": 1193 }, { "epoch": 2.06, "learning_rate": 4.287080322991925e-07, "logits/chosen": -1.552030324935913, "logits/rejected": -2.3823766708374023, "logps/chosen": -102.19252014160156, "logps/rejected": -164.53184509277344, "loss": 0.1364, "rewards/accuracies": 0.75, "rewards/chosen": -2.5488743782043457, "rewards/margins": 3.7979042530059814, "rewards/rejected": -6.346778392791748, "step": 1194 }, { "epoch": 2.06, "learning_rate": 4.2860178495537606e-07, "logits/chosen": -2.2127208709716797, "logits/rejected": -2.1900415420532227, "logps/chosen": -101.95899200439453, "logps/rejected": -145.48497009277344, "loss": 0.2709, "rewards/accuracies": 1.0, "rewards/chosen": -1.5637364387512207, "rewards/margins": 4.993746280670166, "rewards/rejected": -6.557483196258545, "step": 1195 }, { "epoch": 2.06, "learning_rate": 4.284955376115597e-07, "logits/chosen": -2.16306209564209, "logits/rejected": -2.126332998275757, "logps/chosen": -87.90059661865234, "logps/rejected": -159.9564208984375, "loss": 0.1114, "rewards/accuracies": 1.0, "rewards/chosen": -1.1286951303482056, "rewards/margins": 6.291512489318848, "rewards/rejected": -7.420207500457764, "step": 1196 }, { "epoch": 2.06, "learning_rate": 4.283892902677433e-07, "logits/chosen": -2.2681684494018555, "logits/rejected": -2.211484909057617, "logps/chosen": -104.58999633789062, "logps/rejected": -156.24673461914062, "loss": 0.073, "rewards/accuracies": 1.0, "rewards/chosen": -1.4849424362182617, "rewards/margins": 4.809237957000732, "rewards/rejected": -6.294179916381836, "step": 1197 }, { "epoch": 2.06, "learning_rate": 4.2828304292392685e-07, "logits/chosen": -2.1252801418304443, "logits/rejected": -2.291367769241333, "logps/chosen": -76.36421203613281, "logps/rejected": -120.19221496582031, "loss": 0.1292, "rewards/accuracies": 0.75, "rewards/chosen": -1.5978786945343018, "rewards/margins": 3.585540294647217, "rewards/rejected": -5.183419227600098, "step": 1198 }, { "epoch": 2.06, "learning_rate": 4.281767955801105e-07, "logits/chosen": -2.234712600708008, "logits/rejected": -2.2851338386535645, "logps/chosen": -75.77155303955078, "logps/rejected": -152.4587860107422, "loss": 0.0872, "rewards/accuracies": 1.0, "rewards/chosen": -0.22197598218917847, "rewards/margins": 7.301911354064941, "rewards/rejected": -7.523887634277344, "step": 1199 }, { "epoch": 2.07, "learning_rate": 4.2807054823629405e-07, "logits/chosen": -2.1601243019104004, "logits/rejected": -2.225144386291504, "logps/chosen": -97.1301040649414, "logps/rejected": -159.507568359375, "loss": 0.0789, "rewards/accuracies": 1.0, "rewards/chosen": -1.329302430152893, "rewards/margins": 4.84876823425293, "rewards/rejected": -6.178070545196533, "step": 1200 }, { "epoch": 2.07, "learning_rate": 4.2796430089247765e-07, "logits/chosen": -2.1156740188598633, "logits/rejected": -2.204124927520752, "logps/chosen": -97.12211608886719, "logps/rejected": -131.14529418945312, "loss": 0.1254, "rewards/accuracies": 1.0, "rewards/chosen": -1.958966851234436, "rewards/margins": 3.4281787872314453, "rewards/rejected": -5.38714599609375, "step": 1201 }, { "epoch": 2.07, "learning_rate": 4.278580535486613e-07, "logits/chosen": -1.9650921821594238, "logits/rejected": -2.36348557472229, "logps/chosen": -106.99524688720703, "logps/rejected": -148.09202575683594, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": -1.9093974828720093, "rewards/margins": 3.5336737632751465, "rewards/rejected": -5.443071365356445, "step": 1202 }, { "epoch": 2.07, "learning_rate": 4.2775180620484485e-07, "logits/chosen": -1.9300789833068848, "logits/rejected": -2.3838062286376953, "logps/chosen": -107.02790069580078, "logps/rejected": -139.7760009765625, "loss": 0.1521, "rewards/accuracies": 0.75, "rewards/chosen": -3.6743578910827637, "rewards/margins": 2.224667549133301, "rewards/rejected": -5.899025917053223, "step": 1203 }, { "epoch": 2.07, "learning_rate": 4.2764555886102845e-07, "logits/chosen": -1.8735158443450928, "logits/rejected": -2.2710776329040527, "logps/chosen": -95.1749038696289, "logps/rejected": -145.27133178710938, "loss": 0.1238, "rewards/accuracies": 1.0, "rewards/chosen": -0.9661468267440796, "rewards/margins": 3.6547868251800537, "rewards/rejected": -4.620933532714844, "step": 1204 }, { "epoch": 2.07, "learning_rate": 4.2753931151721205e-07, "logits/chosen": -1.9298453330993652, "logits/rejected": -2.295253276824951, "logps/chosen": -90.04469299316406, "logps/rejected": -164.68313598632812, "loss": 0.1001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7189984917640686, "rewards/margins": 5.277834415435791, "rewards/rejected": -5.996832847595215, "step": 1205 }, { "epoch": 2.08, "learning_rate": 4.2743306417339565e-07, "logits/chosen": -2.1238319873809814, "logits/rejected": -2.102123737335205, "logps/chosen": -82.76667785644531, "logps/rejected": -158.68304443359375, "loss": 0.08, "rewards/accuracies": 1.0, "rewards/chosen": -0.11179066449403763, "rewards/margins": 6.492696762084961, "rewards/rejected": -6.604487419128418, "step": 1206 }, { "epoch": 2.08, "learning_rate": 4.273268168295792e-07, "logits/chosen": -2.076247215270996, "logits/rejected": -2.063908100128174, "logps/chosen": -120.57982635498047, "logps/rejected": -137.0502166748047, "loss": 0.2192, "rewards/accuracies": 0.5, "rewards/chosen": -3.9413037300109863, "rewards/margins": 1.3238415718078613, "rewards/rejected": -5.265145301818848, "step": 1207 }, { "epoch": 2.08, "learning_rate": 4.2722056948576284e-07, "logits/chosen": -2.3274571895599365, "logits/rejected": -2.457613229751587, "logps/chosen": -111.12771606445312, "logps/rejected": -135.83154296875, "loss": 0.1706, "rewards/accuracies": 0.75, "rewards/chosen": -2.4471378326416016, "rewards/margins": 1.234543800354004, "rewards/rejected": -3.6816811561584473, "step": 1208 }, { "epoch": 2.08, "learning_rate": 4.2711432214194644e-07, "logits/chosen": -2.24784779548645, "logits/rejected": -2.0044660568237305, "logps/chosen": -87.49324035644531, "logps/rejected": -126.1220703125, "loss": 0.3365, "rewards/accuracies": 1.0, "rewards/chosen": -2.239140510559082, "rewards/margins": 3.5648324489593506, "rewards/rejected": -5.8039727210998535, "step": 1209 }, { "epoch": 2.08, "learning_rate": 4.2700807479813e-07, "logits/chosen": -2.282658576965332, "logits/rejected": -2.2960925102233887, "logps/chosen": -86.71806335449219, "logps/rejected": -139.93423461914062, "loss": 0.1095, "rewards/accuracies": 1.0, "rewards/chosen": 0.015213385224342346, "rewards/margins": 4.459859848022461, "rewards/rejected": -4.444646835327148, "step": 1210 }, { "epoch": 2.08, "learning_rate": 4.2690182745431364e-07, "logits/chosen": -2.074481964111328, "logits/rejected": -2.2640416622161865, "logps/chosen": -99.06798553466797, "logps/rejected": -159.25253295898438, "loss": 0.1646, "rewards/accuracies": 0.75, "rewards/chosen": -0.3878123462200165, "rewards/margins": 6.033496856689453, "rewards/rejected": -6.421308994293213, "step": 1211 }, { "epoch": 2.09, "learning_rate": 4.267955801104972e-07, "logits/chosen": -2.2015717029571533, "logits/rejected": -1.949676513671875, "logps/chosen": -93.46038055419922, "logps/rejected": -144.89166259765625, "loss": 0.1431, "rewards/accuracies": 1.0, "rewards/chosen": -1.5197241306304932, "rewards/margins": 5.796443462371826, "rewards/rejected": -7.316167831420898, "step": 1212 }, { "epoch": 2.09, "learning_rate": 4.266893327666808e-07, "logits/chosen": -2.0188956260681152, "logits/rejected": -2.0755040645599365, "logps/chosen": -91.4054946899414, "logps/rejected": -145.11196899414062, "loss": 0.1559, "rewards/accuracies": 1.0, "rewards/chosen": -0.0653708279132843, "rewards/margins": 6.173631191253662, "rewards/rejected": -6.239002227783203, "step": 1213 }, { "epoch": 2.09, "learning_rate": 4.2658308542286444e-07, "logits/chosen": -2.1091818809509277, "logits/rejected": -2.059635877609253, "logps/chosen": -86.81285095214844, "logps/rejected": -134.20962524414062, "loss": 0.1076, "rewards/accuracies": 1.0, "rewards/chosen": 0.10244618356227875, "rewards/margins": 4.3650970458984375, "rewards/rejected": -4.262651443481445, "step": 1214 }, { "epoch": 2.09, "learning_rate": 4.26476838079048e-07, "logits/chosen": -1.97300386428833, "logits/rejected": -2.112398147583008, "logps/chosen": -84.17195129394531, "logps/rejected": -150.28060913085938, "loss": 0.0844, "rewards/accuracies": 1.0, "rewards/chosen": -0.8543484210968018, "rewards/margins": 5.921785354614258, "rewards/rejected": -6.776134014129639, "step": 1215 }, { "epoch": 2.09, "learning_rate": 4.263705907352316e-07, "logits/chosen": -1.9552171230316162, "logits/rejected": -2.3468430042266846, "logps/chosen": -81.857177734375, "logps/rejected": -137.04678344726562, "loss": 0.2975, "rewards/accuracies": 0.75, "rewards/chosen": -0.4035196304321289, "rewards/margins": 5.064847469329834, "rewards/rejected": -5.468367099761963, "step": 1216 }, { "epoch": 2.09, "learning_rate": 4.262643433914152e-07, "logits/chosen": -2.219724655151367, "logits/rejected": -2.128849506378174, "logps/chosen": -92.43087768554688, "logps/rejected": -114.750732421875, "loss": 0.1445, "rewards/accuracies": 0.75, "rewards/chosen": -1.1408859491348267, "rewards/margins": 3.0025410652160645, "rewards/rejected": -4.143426895141602, "step": 1217 }, { "epoch": 2.1, "learning_rate": 4.261580960475988e-07, "logits/chosen": -2.10056209564209, "logits/rejected": -2.294304847717285, "logps/chosen": -92.17544555664062, "logps/rejected": -141.01654052734375, "loss": 0.1551, "rewards/accuracies": 0.75, "rewards/chosen": -0.6327954530715942, "rewards/margins": 4.2892937660217285, "rewards/rejected": -4.922089099884033, "step": 1218 }, { "epoch": 2.1, "learning_rate": 4.2605184870378243e-07, "logits/chosen": -2.2249679565429688, "logits/rejected": -2.029750108718872, "logps/chosen": -69.32733917236328, "logps/rejected": -123.36726379394531, "loss": 0.2551, "rewards/accuracies": 1.0, "rewards/chosen": -0.172001451253891, "rewards/margins": 4.900524139404297, "rewards/rejected": -5.0725250244140625, "step": 1219 }, { "epoch": 2.1, "learning_rate": 4.25945601359966e-07, "logits/chosen": -1.9806780815124512, "logits/rejected": -2.422868490219116, "logps/chosen": -87.97526550292969, "logps/rejected": -141.67910766601562, "loss": 0.2431, "rewards/accuracies": 1.0, "rewards/chosen": -0.6111322641372681, "rewards/margins": 3.272952079772949, "rewards/rejected": -3.8840842247009277, "step": 1220 }, { "epoch": 2.1, "learning_rate": 4.258393540161496e-07, "logits/chosen": -2.167212963104248, "logits/rejected": -2.0453875064849854, "logps/chosen": -98.1091537475586, "logps/rejected": -153.0776824951172, "loss": 0.0566, "rewards/accuracies": 0.75, "rewards/chosen": -1.4359534978866577, "rewards/margins": 4.391613960266113, "rewards/rejected": -5.827568054199219, "step": 1221 }, { "epoch": 2.1, "learning_rate": 4.257331066723332e-07, "logits/chosen": -2.125122547149658, "logits/rejected": -2.212578773498535, "logps/chosen": -89.59310913085938, "logps/rejected": -124.98147583007812, "loss": 0.2725, "rewards/accuracies": 0.75, "rewards/chosen": -1.8284634351730347, "rewards/margins": 3.395747661590576, "rewards/rejected": -5.2242112159729, "step": 1222 }, { "epoch": 2.1, "learning_rate": 4.256268593285168e-07, "logits/chosen": -2.2597384452819824, "logits/rejected": -1.8890348672866821, "logps/chosen": -95.81910705566406, "logps/rejected": -127.6748046875, "loss": 0.1911, "rewards/accuracies": 1.0, "rewards/chosen": -1.0800971984863281, "rewards/margins": 4.638235092163086, "rewards/rejected": -5.718332290649414, "step": 1223 }, { "epoch": 2.11, "learning_rate": 4.2552061198470037e-07, "logits/chosen": -2.276447296142578, "logits/rejected": -2.229231119155884, "logps/chosen": -102.72991180419922, "logps/rejected": -137.97311401367188, "loss": 0.0933, "rewards/accuracies": 1.0, "rewards/chosen": -1.7902195453643799, "rewards/margins": 3.0814614295959473, "rewards/rejected": -4.871680736541748, "step": 1224 }, { "epoch": 2.11, "learning_rate": 4.2541436464088397e-07, "logits/chosen": -2.1343376636505127, "logits/rejected": -2.112093687057495, "logps/chosen": -88.96759033203125, "logps/rejected": -137.12945556640625, "loss": 0.0983, "rewards/accuracies": 1.0, "rewards/chosen": -0.6897867321968079, "rewards/margins": 4.372730255126953, "rewards/rejected": -5.062516689300537, "step": 1225 }, { "epoch": 2.11, "learning_rate": 4.2530811729706757e-07, "logits/chosen": -2.0584049224853516, "logits/rejected": -2.1808836460113525, "logps/chosen": -89.25900268554688, "logps/rejected": -140.91839599609375, "loss": 0.0795, "rewards/accuracies": 1.0, "rewards/chosen": -0.7101109027862549, "rewards/margins": 5.5924882888793945, "rewards/rejected": -6.30259895324707, "step": 1226 }, { "epoch": 2.11, "learning_rate": 4.252018699532511e-07, "logits/chosen": -2.130681037902832, "logits/rejected": -2.31097149848938, "logps/chosen": -85.8644790649414, "logps/rejected": -141.59463500976562, "loss": 0.1261, "rewards/accuracies": 1.0, "rewards/chosen": -0.907092809677124, "rewards/margins": 4.2190470695495605, "rewards/rejected": -5.126140117645264, "step": 1227 }, { "epoch": 2.11, "learning_rate": 4.2509562260943477e-07, "logits/chosen": -2.3636345863342285, "logits/rejected": -2.063742160797119, "logps/chosen": -95.31582641601562, "logps/rejected": -129.099853515625, "loss": 0.1193, "rewards/accuracies": 1.0, "rewards/chosen": -1.032155156135559, "rewards/margins": 4.856671333312988, "rewards/rejected": -5.888826370239258, "step": 1228 }, { "epoch": 2.12, "learning_rate": 4.2498937526561837e-07, "logits/chosen": -2.0358388423919678, "logits/rejected": -2.1010050773620605, "logps/chosen": -106.70792388916016, "logps/rejected": -142.51742553710938, "loss": 0.05, "rewards/accuracies": 1.0, "rewards/chosen": -1.0522630214691162, "rewards/margins": 3.4356932640075684, "rewards/rejected": -4.4879560470581055, "step": 1229 }, { "epoch": 2.12, "learning_rate": 4.248831279218019e-07, "logits/chosen": -2.3346149921417236, "logits/rejected": -2.0915780067443848, "logps/chosen": -94.92344665527344, "logps/rejected": -134.80258178710938, "loss": 0.071, "rewards/accuracies": 1.0, "rewards/chosen": -0.28453391790390015, "rewards/margins": 4.908499240875244, "rewards/rejected": -5.193032741546631, "step": 1230 }, { "epoch": 2.12, "learning_rate": 4.2477688057798556e-07, "logits/chosen": -1.6306276321411133, "logits/rejected": -2.3196804523468018, "logps/chosen": -93.38177490234375, "logps/rejected": -139.11697387695312, "loss": 0.0917, "rewards/accuracies": 1.0, "rewards/chosen": -1.2390671968460083, "rewards/margins": 4.772075176239014, "rewards/rejected": -6.011142730712891, "step": 1231 }, { "epoch": 2.12, "learning_rate": 4.246706332341691e-07, "logits/chosen": -1.8248460292816162, "logits/rejected": -2.1880383491516113, "logps/chosen": -85.06541442871094, "logps/rejected": -143.28311157226562, "loss": 0.0815, "rewards/accuracies": 1.0, "rewards/chosen": -0.11041833460330963, "rewards/margins": 3.6485118865966797, "rewards/rejected": -3.758930206298828, "step": 1232 }, { "epoch": 2.12, "learning_rate": 4.245643858903527e-07, "logits/chosen": -1.8551685810089111, "logits/rejected": -2.16281795501709, "logps/chosen": -109.69210815429688, "logps/rejected": -142.75425720214844, "loss": 0.1991, "rewards/accuracies": 1.0, "rewards/chosen": -0.7803741693496704, "rewards/margins": 4.024910926818848, "rewards/rejected": -4.8052849769592285, "step": 1233 }, { "epoch": 2.12, "learning_rate": 4.2445813854653636e-07, "logits/chosen": -2.225545644760132, "logits/rejected": -2.1013877391815186, "logps/chosen": -98.44917297363281, "logps/rejected": -152.7442626953125, "loss": 0.2544, "rewards/accuracies": 1.0, "rewards/chosen": -0.5617441534996033, "rewards/margins": 5.700575828552246, "rewards/rejected": -6.262320518493652, "step": 1234 }, { "epoch": 2.13, "learning_rate": 4.243518912027199e-07, "logits/chosen": -2.030580520629883, "logits/rejected": -2.2630257606506348, "logps/chosen": -91.61508178710938, "logps/rejected": -120.37544250488281, "loss": 0.1733, "rewards/accuracies": 1.0, "rewards/chosen": -1.5009719133377075, "rewards/margins": 3.143303394317627, "rewards/rejected": -4.644275188446045, "step": 1235 }, { "epoch": 2.13, "learning_rate": 4.242456438589035e-07, "logits/chosen": -1.964984655380249, "logits/rejected": -2.301215171813965, "logps/chosen": -108.61814880371094, "logps/rejected": -203.76675415039062, "loss": 0.0853, "rewards/accuracies": 1.0, "rewards/chosen": -1.2951066493988037, "rewards/margins": 6.768353462219238, "rewards/rejected": -8.063460350036621, "step": 1236 }, { "epoch": 2.13, "learning_rate": 4.241393965150871e-07, "logits/chosen": -1.94551682472229, "logits/rejected": -2.0644569396972656, "logps/chosen": -100.3717041015625, "logps/rejected": -122.67501068115234, "loss": 0.1111, "rewards/accuracies": 1.0, "rewards/chosen": -0.950817883014679, "rewards/margins": 2.1129050254821777, "rewards/rejected": -3.063723087310791, "step": 1237 }, { "epoch": 2.13, "learning_rate": 4.240331491712707e-07, "logits/chosen": -2.248506546020508, "logits/rejected": -2.3187904357910156, "logps/chosen": -90.52181243896484, "logps/rejected": -158.92318725585938, "loss": 0.2298, "rewards/accuracies": 1.0, "rewards/chosen": -0.499022901058197, "rewards/margins": 6.685650825500488, "rewards/rejected": -7.184673309326172, "step": 1238 }, { "epoch": 2.13, "learning_rate": 4.2392690182745425e-07, "logits/chosen": -2.1545333862304688, "logits/rejected": -2.269925117492676, "logps/chosen": -134.5699920654297, "logps/rejected": -181.3119659423828, "loss": 0.166, "rewards/accuracies": 1.0, "rewards/chosen": -2.2962605953216553, "rewards/margins": 3.716543197631836, "rewards/rejected": -6.01280403137207, "step": 1239 }, { "epoch": 2.13, "learning_rate": 4.238206544836379e-07, "logits/chosen": -2.261160135269165, "logits/rejected": -2.3462462425231934, "logps/chosen": -91.06376647949219, "logps/rejected": -155.8978271484375, "loss": 0.0963, "rewards/accuracies": 1.0, "rewards/chosen": -0.10527607798576355, "rewards/margins": 5.884950637817383, "rewards/rejected": -5.9902262687683105, "step": 1240 }, { "epoch": 2.14, "learning_rate": 4.237144071398215e-07, "logits/chosen": -2.305790424346924, "logits/rejected": -2.0342845916748047, "logps/chosen": -89.1795654296875, "logps/rejected": -121.25193786621094, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": -0.6511743664741516, "rewards/margins": 4.646561145782471, "rewards/rejected": -5.297735691070557, "step": 1241 }, { "epoch": 2.14, "learning_rate": 4.2360815979600505e-07, "logits/chosen": -2.322249412536621, "logits/rejected": -2.031453847885132, "logps/chosen": -107.04103088378906, "logps/rejected": -133.72117614746094, "loss": 0.1161, "rewards/accuracies": 1.0, "rewards/chosen": -1.0797648429870605, "rewards/margins": 4.847181797027588, "rewards/rejected": -5.926946640014648, "step": 1242 }, { "epoch": 2.14, "learning_rate": 4.235019124521887e-07, "logits/chosen": -2.318085193634033, "logits/rejected": -2.229001045227051, "logps/chosen": -104.99090576171875, "logps/rejected": -126.27169799804688, "loss": 0.0684, "rewards/accuracies": 1.0, "rewards/chosen": -0.7282261848449707, "rewards/margins": 4.144258499145508, "rewards/rejected": -4.8724846839904785, "step": 1243 }, { "epoch": 2.14, "learning_rate": 4.2339566510837224e-07, "logits/chosen": -2.404200553894043, "logits/rejected": -2.133606195449829, "logps/chosen": -97.16807556152344, "logps/rejected": -136.14682006835938, "loss": 0.079, "rewards/accuracies": 1.0, "rewards/chosen": -0.2618270814418793, "rewards/margins": 4.110608100891113, "rewards/rejected": -4.372435092926025, "step": 1244 }, { "epoch": 2.14, "learning_rate": 4.2328941776455584e-07, "logits/chosen": -2.12453293800354, "logits/rejected": -2.1638951301574707, "logps/chosen": -98.33465576171875, "logps/rejected": -136.66514587402344, "loss": 0.0844, "rewards/accuracies": 1.0, "rewards/chosen": -1.34805428981781, "rewards/margins": 4.1933770179748535, "rewards/rejected": -5.541431427001953, "step": 1245 }, { "epoch": 2.14, "learning_rate": 4.231831704207395e-07, "logits/chosen": -2.023538589477539, "logits/rejected": -2.2301690578460693, "logps/chosen": -90.34158325195312, "logps/rejected": -127.12167358398438, "loss": 0.1257, "rewards/accuracies": 1.0, "rewards/chosen": -0.0953441709280014, "rewards/margins": 4.241725921630859, "rewards/rejected": -4.337069988250732, "step": 1246 }, { "epoch": 2.15, "learning_rate": 4.2307692307692304e-07, "logits/chosen": -2.019406318664551, "logits/rejected": -2.323248863220215, "logps/chosen": -78.29190063476562, "logps/rejected": -157.9497833251953, "loss": 0.1526, "rewards/accuracies": 1.0, "rewards/chosen": -1.5380061864852905, "rewards/margins": 5.390430450439453, "rewards/rejected": -6.928437232971191, "step": 1247 }, { "epoch": 2.15, "learning_rate": 4.2297067573310664e-07, "logits/chosen": -2.3487486839294434, "logits/rejected": -1.9768083095550537, "logps/chosen": -118.3112564086914, "logps/rejected": -129.55319213867188, "loss": 0.2093, "rewards/accuracies": 1.0, "rewards/chosen": -0.9763248562812805, "rewards/margins": 3.7874679565429688, "rewards/rejected": -4.763792991638184, "step": 1248 }, { "epoch": 2.15, "learning_rate": 4.2286442838929024e-07, "logits/chosen": -2.2806458473205566, "logits/rejected": -2.2023379802703857, "logps/chosen": -108.81985473632812, "logps/rejected": -135.4215087890625, "loss": 0.1743, "rewards/accuracies": 0.75, "rewards/chosen": -0.5079013705253601, "rewards/margins": 3.6716408729553223, "rewards/rejected": -4.179542064666748, "step": 1249 }, { "epoch": 2.15, "learning_rate": 4.2275818104547384e-07, "logits/chosen": -1.997776985168457, "logits/rejected": -2.352156162261963, "logps/chosen": -76.74906921386719, "logps/rejected": -109.3808822631836, "loss": 0.1873, "rewards/accuracies": 1.0, "rewards/chosen": -1.1822187900543213, "rewards/margins": 2.1907241344451904, "rewards/rejected": -3.3729429244995117, "step": 1250 }, { "epoch": 2.15, "learning_rate": 4.2265193370165744e-07, "logits/chosen": -1.9706021547317505, "logits/rejected": -2.356213092803955, "logps/chosen": -89.29721069335938, "logps/rejected": -158.02059936523438, "loss": 0.0764, "rewards/accuracies": 1.0, "rewards/chosen": -1.3727144002914429, "rewards/margins": 5.346821308135986, "rewards/rejected": -6.7195353507995605, "step": 1251 }, { "epoch": 2.15, "learning_rate": 4.2254568635784104e-07, "logits/chosen": -2.224268913269043, "logits/rejected": -2.118403911590576, "logps/chosen": -106.365234375, "logps/rejected": -163.60919189453125, "loss": 0.0726, "rewards/accuracies": 1.0, "rewards/chosen": -1.9407179355621338, "rewards/margins": 4.290096759796143, "rewards/rejected": -6.2308149337768555, "step": 1252 }, { "epoch": 2.16, "learning_rate": 4.2243943901402463e-07, "logits/chosen": -2.2793145179748535, "logits/rejected": -2.0508294105529785, "logps/chosen": -95.38052368164062, "logps/rejected": -132.6333770751953, "loss": 0.1737, "rewards/accuracies": 0.75, "rewards/chosen": -0.34482139348983765, "rewards/margins": 4.311051368713379, "rewards/rejected": -4.655872821807861, "step": 1253 }, { "epoch": 2.16, "learning_rate": 4.223331916702082e-07, "logits/chosen": -2.0538206100463867, "logits/rejected": -2.0026581287384033, "logps/chosen": -124.79278564453125, "logps/rejected": -159.70626831054688, "loss": 0.1025, "rewards/accuracies": 1.0, "rewards/chosen": -3.182988166809082, "rewards/margins": 3.9848833084106445, "rewards/rejected": -7.167871475219727, "step": 1254 }, { "epoch": 2.16, "learning_rate": 4.2222694432639183e-07, "logits/chosen": -2.33874249458313, "logits/rejected": -2.001669406890869, "logps/chosen": -119.42231750488281, "logps/rejected": -146.2915802001953, "loss": 0.0969, "rewards/accuracies": 1.0, "rewards/chosen": -0.5230638384819031, "rewards/margins": 4.290274620056152, "rewards/rejected": -4.813338279724121, "step": 1255 }, { "epoch": 2.16, "learning_rate": 4.2212069698257543e-07, "logits/chosen": -1.8768887519836426, "logits/rejected": -2.2868995666503906, "logps/chosen": -91.8944320678711, "logps/rejected": -147.43296813964844, "loss": 0.1457, "rewards/accuracies": 1.0, "rewards/chosen": -1.7657485008239746, "rewards/margins": 4.053316593170166, "rewards/rejected": -5.819065093994141, "step": 1256 }, { "epoch": 2.16, "learning_rate": 4.22014449638759e-07, "logits/chosen": -1.840057373046875, "logits/rejected": -1.9825336933135986, "logps/chosen": -94.01660919189453, "logps/rejected": -199.04949951171875, "loss": 0.2215, "rewards/accuracies": 1.0, "rewards/chosen": -0.679907500743866, "rewards/margins": 8.752874374389648, "rewards/rejected": -9.432782173156738, "step": 1257 }, { "epoch": 2.17, "learning_rate": 4.2190820229494263e-07, "logits/chosen": -2.141350746154785, "logits/rejected": -2.2867372035980225, "logps/chosen": -104.71430206298828, "logps/rejected": -179.54420471191406, "loss": 0.2212, "rewards/accuracies": 1.0, "rewards/chosen": -2.1791677474975586, "rewards/margins": 5.884990215301514, "rewards/rejected": -8.06415843963623, "step": 1258 }, { "epoch": 2.17, "learning_rate": 4.218019549511262e-07, "logits/chosen": -2.006044387817383, "logits/rejected": -2.3762998580932617, "logps/chosen": -101.86683654785156, "logps/rejected": -148.64266967773438, "loss": 0.2077, "rewards/accuracies": 0.75, "rewards/chosen": -2.2992141246795654, "rewards/margins": 3.2024552822113037, "rewards/rejected": -5.501669406890869, "step": 1259 }, { "epoch": 2.17, "learning_rate": 4.2169570760730983e-07, "logits/chosen": -1.8905572891235352, "logits/rejected": -2.3062186241149902, "logps/chosen": -95.20542907714844, "logps/rejected": -185.1490478515625, "loss": 0.3123, "rewards/accuracies": 1.0, "rewards/chosen": -1.0933923721313477, "rewards/margins": 8.19237995147705, "rewards/rejected": -9.285772323608398, "step": 1260 }, { "epoch": 2.17, "learning_rate": 4.215894602634934e-07, "logits/chosen": -2.2599682807922363, "logits/rejected": -2.286158800125122, "logps/chosen": -75.01960754394531, "logps/rejected": -147.4518585205078, "loss": 0.0716, "rewards/accuracies": 1.0, "rewards/chosen": -0.002691924571990967, "rewards/margins": 6.272511959075928, "rewards/rejected": -6.275203704833984, "step": 1261 }, { "epoch": 2.17, "learning_rate": 4.2148321291967697e-07, "logits/chosen": -2.0624327659606934, "logits/rejected": -2.084479570388794, "logps/chosen": -96.65306091308594, "logps/rejected": -143.16702270507812, "loss": 0.0705, "rewards/accuracies": 1.0, "rewards/chosen": -1.7660160064697266, "rewards/margins": 4.331467628479004, "rewards/rejected": -6.0974836349487305, "step": 1262 }, { "epoch": 2.17, "learning_rate": 4.213769655758606e-07, "logits/chosen": -2.023092746734619, "logits/rejected": -2.123897075653076, "logps/chosen": -61.40960693359375, "logps/rejected": -144.7620086669922, "loss": 0.1252, "rewards/accuracies": 1.0, "rewards/chosen": -0.25206801295280457, "rewards/margins": 7.153613567352295, "rewards/rejected": -7.40568208694458, "step": 1263 }, { "epoch": 2.18, "learning_rate": 4.2127071823204417e-07, "logits/chosen": -1.9823455810546875, "logits/rejected": -2.379688262939453, "logps/chosen": -87.46033477783203, "logps/rejected": -145.5874786376953, "loss": 0.1422, "rewards/accuracies": 1.0, "rewards/chosen": -0.501556396484375, "rewards/margins": 5.015106201171875, "rewards/rejected": -5.51666259765625, "step": 1264 }, { "epoch": 2.18, "learning_rate": 4.2116447088822777e-07, "logits/chosen": -2.0270133018493652, "logits/rejected": -2.1519899368286133, "logps/chosen": -81.17247009277344, "logps/rejected": -177.431640625, "loss": 0.0662, "rewards/accuracies": 1.0, "rewards/chosen": -0.35548022389411926, "rewards/margins": 8.628198623657227, "rewards/rejected": -8.983678817749023, "step": 1265 }, { "epoch": 2.18, "learning_rate": 4.210582235444114e-07, "logits/chosen": -2.132499933242798, "logits/rejected": -2.1229867935180664, "logps/chosen": -83.11378479003906, "logps/rejected": -105.6987075805664, "loss": 0.1595, "rewards/accuracies": 1.0, "rewards/chosen": -1.3425500392913818, "rewards/margins": 2.1507794857025146, "rewards/rejected": -3.4933297634124756, "step": 1266 }, { "epoch": 2.18, "learning_rate": 4.2095197620059497e-07, "logits/chosen": -2.092669725418091, "logits/rejected": -2.338247776031494, "logps/chosen": -84.90546417236328, "logps/rejected": -147.60076904296875, "loss": 0.1576, "rewards/accuracies": 1.0, "rewards/chosen": -0.49181050062179565, "rewards/margins": 5.160470485687256, "rewards/rejected": -5.652281284332275, "step": 1267 }, { "epoch": 2.18, "learning_rate": 4.2084572885677857e-07, "logits/chosen": -2.320504665374756, "logits/rejected": -2.1474227905273438, "logps/chosen": -86.35224151611328, "logps/rejected": -132.70303344726562, "loss": 0.0502, "rewards/accuracies": 1.0, "rewards/chosen": -0.9149848222732544, "rewards/margins": 5.012073516845703, "rewards/rejected": -5.927059173583984, "step": 1268 }, { "epoch": 2.18, "learning_rate": 4.2073948151296216e-07, "logits/chosen": -2.354550838470459, "logits/rejected": -2.275836229324341, "logps/chosen": -98.8733901977539, "logps/rejected": -148.6120147705078, "loss": 0.1099, "rewards/accuracies": 1.0, "rewards/chosen": -2.0931267738342285, "rewards/margins": 4.726561546325684, "rewards/rejected": -6.819688320159912, "step": 1269 }, { "epoch": 2.19, "learning_rate": 4.2063323416914576e-07, "logits/chosen": -2.241964817047119, "logits/rejected": -1.9668447971343994, "logps/chosen": -124.57371520996094, "logps/rejected": -157.506103515625, "loss": 0.0728, "rewards/accuracies": 1.0, "rewards/chosen": -2.5629076957702637, "rewards/margins": 5.13720703125, "rewards/rejected": -7.7001142501831055, "step": 1270 }, { "epoch": 2.19, "learning_rate": 4.205269868253293e-07, "logits/chosen": -2.1410129070281982, "logits/rejected": -2.1721432209014893, "logps/chosen": -91.62641906738281, "logps/rejected": -166.53643798828125, "loss": 0.0775, "rewards/accuracies": 1.0, "rewards/chosen": -1.0986740589141846, "rewards/margins": 7.052212715148926, "rewards/rejected": -8.150887489318848, "step": 1271 }, { "epoch": 2.19, "learning_rate": 4.2042073948151296e-07, "logits/chosen": -2.242748975753784, "logits/rejected": -2.2730860710144043, "logps/chosen": -88.37486267089844, "logps/rejected": -116.16976928710938, "loss": 0.1329, "rewards/accuracies": 1.0, "rewards/chosen": -0.8192007541656494, "rewards/margins": 3.8749094009399414, "rewards/rejected": -4.69411039352417, "step": 1272 }, { "epoch": 2.19, "learning_rate": 4.2031449213769656e-07, "logits/chosen": -2.2270164489746094, "logits/rejected": -2.163106918334961, "logps/chosen": -97.58001708984375, "logps/rejected": -153.31552124023438, "loss": 0.0609, "rewards/accuracies": 1.0, "rewards/chosen": -2.2829058170318604, "rewards/margins": 5.441816329956055, "rewards/rejected": -7.724721908569336, "step": 1273 }, { "epoch": 2.19, "learning_rate": 4.202082447938801e-07, "logits/chosen": -2.023267984390259, "logits/rejected": -2.2214479446411133, "logps/chosen": -105.30094146728516, "logps/rejected": -124.35954284667969, "loss": 0.0821, "rewards/accuracies": 1.0, "rewards/chosen": -1.0519134998321533, "rewards/margins": 2.0793204307556152, "rewards/rejected": -3.1312336921691895, "step": 1274 }, { "epoch": 2.19, "learning_rate": 4.2010199745006376e-07, "logits/chosen": -1.7508258819580078, "logits/rejected": -2.343829870223999, "logps/chosen": -93.9990463256836, "logps/rejected": -171.2802734375, "loss": 0.1316, "rewards/accuracies": 1.0, "rewards/chosen": -2.6082944869995117, "rewards/margins": 5.3103461265563965, "rewards/rejected": -7.918641090393066, "step": 1275 }, { "epoch": 2.2, "learning_rate": 4.199957501062473e-07, "logits/chosen": -1.9779622554779053, "logits/rejected": -2.136936902999878, "logps/chosen": -97.46266174316406, "logps/rejected": -167.35760498046875, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": -1.6527847051620483, "rewards/margins": 4.606916904449463, "rewards/rejected": -6.259701728820801, "step": 1276 }, { "epoch": 2.2, "learning_rate": 4.198895027624309e-07, "logits/chosen": -1.7174625396728516, "logits/rejected": -2.378200054168701, "logps/chosen": -83.17170715332031, "logps/rejected": -169.1759796142578, "loss": 0.0706, "rewards/accuracies": 1.0, "rewards/chosen": -1.2635765075683594, "rewards/margins": 6.324851036071777, "rewards/rejected": -7.588427543640137, "step": 1277 }, { "epoch": 2.2, "learning_rate": 4.1978325541861455e-07, "logits/chosen": -2.263075113296509, "logits/rejected": -2.222149133682251, "logps/chosen": -100.23387145996094, "logps/rejected": -129.68919372558594, "loss": 0.1441, "rewards/accuracies": 1.0, "rewards/chosen": -2.1618897914886475, "rewards/margins": 3.6236839294433594, "rewards/rejected": -5.785573482513428, "step": 1278 }, { "epoch": 2.2, "learning_rate": 4.196770080747981e-07, "logits/chosen": -2.150010347366333, "logits/rejected": -2.050218105316162, "logps/chosen": -106.5850601196289, "logps/rejected": -134.13320922851562, "loss": 0.1272, "rewards/accuracies": 1.0, "rewards/chosen": -2.3715438842773438, "rewards/margins": 3.2297449111938477, "rewards/rejected": -5.601288795471191, "step": 1279 }, { "epoch": 2.2, "learning_rate": 4.195707607309817e-07, "logits/chosen": -2.3171725273132324, "logits/rejected": -1.968691349029541, "logps/chosen": -90.01715850830078, "logps/rejected": -125.22489929199219, "loss": 0.0978, "rewards/accuracies": 1.0, "rewards/chosen": 0.03243998438119888, "rewards/margins": 5.0772576332092285, "rewards/rejected": -5.0448174476623535, "step": 1280 }, { "epoch": 2.2, "learning_rate": 4.194645133871653e-07, "logits/chosen": -2.0007472038269043, "logits/rejected": -2.1937215328216553, "logps/chosen": -96.87162780761719, "logps/rejected": -169.80770874023438, "loss": 0.1826, "rewards/accuracies": 1.0, "rewards/chosen": -1.2118676900863647, "rewards/margins": 5.991740703582764, "rewards/rejected": -7.203608512878418, "step": 1281 }, { "epoch": 2.21, "learning_rate": 4.193582660433489e-07, "logits/chosen": -2.29114031791687, "logits/rejected": -2.1754214763641357, "logps/chosen": -107.54200744628906, "logps/rejected": -160.87962341308594, "loss": 0.0787, "rewards/accuracies": 1.0, "rewards/chosen": -0.9427040219306946, "rewards/margins": 6.238637924194336, "rewards/rejected": -7.181342124938965, "step": 1282 }, { "epoch": 2.21, "learning_rate": 4.192520186995325e-07, "logits/chosen": -2.3525729179382324, "logits/rejected": -2.229146718978882, "logps/chosen": -97.86351776123047, "logps/rejected": -128.22132873535156, "loss": 0.112, "rewards/accuracies": 1.0, "rewards/chosen": -0.7537334561347961, "rewards/margins": 3.188690185546875, "rewards/rejected": -3.9424235820770264, "step": 1283 }, { "epoch": 2.21, "learning_rate": 4.191457713557161e-07, "logits/chosen": -2.2986645698547363, "logits/rejected": -2.2429006099700928, "logps/chosen": -116.16499328613281, "logps/rejected": -165.74472045898438, "loss": 0.1626, "rewards/accuracies": 1.0, "rewards/chosen": -1.6753407716751099, "rewards/margins": 3.9488673210144043, "rewards/rejected": -5.624207973480225, "step": 1284 }, { "epoch": 2.21, "learning_rate": 4.190395240118997e-07, "logits/chosen": -1.6312255859375, "logits/rejected": -2.2403109073638916, "logps/chosen": -104.36332702636719, "logps/rejected": -160.69058227539062, "loss": 0.1337, "rewards/accuracies": 1.0, "rewards/chosen": -2.824497699737549, "rewards/margins": 3.6100192070007324, "rewards/rejected": -6.434516906738281, "step": 1285 }, { "epoch": 2.21, "learning_rate": 4.1893327666808324e-07, "logits/chosen": -2.0864365100860596, "logits/rejected": -2.2517693042755127, "logps/chosen": -97.17280578613281, "logps/rejected": -166.43783569335938, "loss": 0.1886, "rewards/accuracies": 1.0, "rewards/chosen": -0.2828161120414734, "rewards/margins": 5.630297660827637, "rewards/rejected": -5.913114547729492, "step": 1286 }, { "epoch": 2.22, "learning_rate": 4.188270293242669e-07, "logits/chosen": -2.437497854232788, "logits/rejected": -2.0366408824920654, "logps/chosen": -107.31226348876953, "logps/rejected": -139.9816436767578, "loss": 0.2242, "rewards/accuracies": 1.0, "rewards/chosen": -1.2335846424102783, "rewards/margins": 4.614067077636719, "rewards/rejected": -5.847651958465576, "step": 1287 }, { "epoch": 2.22, "learning_rate": 4.187207819804505e-07, "logits/chosen": -2.206911087036133, "logits/rejected": -2.242579698562622, "logps/chosen": -92.27783966064453, "logps/rejected": -164.82858276367188, "loss": 0.1562, "rewards/accuracies": 1.0, "rewards/chosen": -1.1664196252822876, "rewards/margins": 6.274580955505371, "rewards/rejected": -7.440999984741211, "step": 1288 }, { "epoch": 2.22, "learning_rate": 4.1861453463663404e-07, "logits/chosen": -2.3760499954223633, "logits/rejected": -2.108393907546997, "logps/chosen": -112.4486083984375, "logps/rejected": -153.80636596679688, "loss": 0.1232, "rewards/accuracies": 1.0, "rewards/chosen": -1.761942982673645, "rewards/margins": 3.8246688842773438, "rewards/rejected": -5.586611747741699, "step": 1289 }, { "epoch": 2.22, "learning_rate": 4.185082872928177e-07, "logits/chosen": -2.094763994216919, "logits/rejected": -2.2143614292144775, "logps/chosen": -98.05624389648438, "logps/rejected": -160.4127655029297, "loss": 0.1249, "rewards/accuracies": 0.75, "rewards/chosen": -3.0429775714874268, "rewards/margins": 5.6675567626953125, "rewards/rejected": -8.710533142089844, "step": 1290 }, { "epoch": 2.22, "learning_rate": 4.1840203994900123e-07, "logits/chosen": -2.442208766937256, "logits/rejected": -2.0004031658172607, "logps/chosen": -111.41655731201172, "logps/rejected": -151.7835235595703, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -1.6092571020126343, "rewards/margins": 4.953114032745361, "rewards/rejected": -6.562371253967285, "step": 1291 }, { "epoch": 2.22, "learning_rate": 4.1829579260518483e-07, "logits/chosen": -2.2350122928619385, "logits/rejected": -1.982290267944336, "logps/chosen": -83.99266052246094, "logps/rejected": -134.75413513183594, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -0.9623576998710632, "rewards/margins": 5.464376449584961, "rewards/rejected": -6.42673397064209, "step": 1292 }, { "epoch": 2.23, "learning_rate": 4.181895452613685e-07, "logits/chosen": -2.2041287422180176, "logits/rejected": -2.2117578983306885, "logps/chosen": -93.60832977294922, "logps/rejected": -114.33732604980469, "loss": 0.1505, "rewards/accuracies": 0.75, "rewards/chosen": -2.269217014312744, "rewards/margins": 1.217953085899353, "rewards/rejected": -3.4871702194213867, "step": 1293 }, { "epoch": 2.23, "learning_rate": 4.1808329791755203e-07, "logits/chosen": -2.318479537963867, "logits/rejected": -1.9765055179595947, "logps/chosen": -112.60670471191406, "logps/rejected": -136.28990173339844, "loss": 0.1511, "rewards/accuracies": 0.75, "rewards/chosen": -1.608215093612671, "rewards/margins": 3.536113739013672, "rewards/rejected": -5.144329071044922, "step": 1294 }, { "epoch": 2.23, "learning_rate": 4.1797705057373563e-07, "logits/chosen": -2.2845940589904785, "logits/rejected": -2.424906015396118, "logps/chosen": -87.20394897460938, "logps/rejected": -165.23538208007812, "loss": 0.0652, "rewards/accuracies": 1.0, "rewards/chosen": 0.1820862740278244, "rewards/margins": 6.297550201416016, "rewards/rejected": -6.115463733673096, "step": 1295 }, { "epoch": 2.23, "learning_rate": 4.1787080322991923e-07, "logits/chosen": -2.0847108364105225, "logits/rejected": -2.3644871711730957, "logps/chosen": -103.35135650634766, "logps/rejected": -148.81805419921875, "loss": 0.1586, "rewards/accuracies": 1.0, "rewards/chosen": -1.3596034049987793, "rewards/margins": 4.148810386657715, "rewards/rejected": -5.508413791656494, "step": 1296 }, { "epoch": 2.23, "learning_rate": 4.1776455588610283e-07, "logits/chosen": -2.2279717922210693, "logits/rejected": -2.2912003993988037, "logps/chosen": -103.15364074707031, "logps/rejected": -154.5516815185547, "loss": 0.0909, "rewards/accuracies": 1.0, "rewards/chosen": -1.3267405033111572, "rewards/margins": 5.069252967834473, "rewards/rejected": -6.395993232727051, "step": 1297 }, { "epoch": 2.23, "learning_rate": 4.176583085422864e-07, "logits/chosen": -1.8318369388580322, "logits/rejected": -2.332411289215088, "logps/chosen": -97.86763000488281, "logps/rejected": -167.25039672851562, "loss": 0.1201, "rewards/accuracies": 1.0, "rewards/chosen": -1.2748241424560547, "rewards/margins": 5.203434467315674, "rewards/rejected": -6.478259086608887, "step": 1298 }, { "epoch": 2.24, "learning_rate": 4.1755206119847e-07, "logits/chosen": -2.1673178672790527, "logits/rejected": -2.1719467639923096, "logps/chosen": -113.38642883300781, "logps/rejected": -174.2973175048828, "loss": 0.0748, "rewards/accuracies": 1.0, "rewards/chosen": -2.2186245918273926, "rewards/margins": 5.496057510375977, "rewards/rejected": -7.714682579040527, "step": 1299 }, { "epoch": 2.24, "learning_rate": 4.174458138546536e-07, "logits/chosen": -2.495152711868286, "logits/rejected": -2.3302435874938965, "logps/chosen": -108.05442810058594, "logps/rejected": -138.72291564941406, "loss": 0.2304, "rewards/accuracies": 0.75, "rewards/chosen": -1.185943603515625, "rewards/margins": 3.0254693031311035, "rewards/rejected": -4.2114129066467285, "step": 1300 }, { "epoch": 2.24, "learning_rate": 4.173395665108372e-07, "logits/chosen": -2.327087879180908, "logits/rejected": -2.4189324378967285, "logps/chosen": -90.66694641113281, "logps/rejected": -148.64285278320312, "loss": 0.1265, "rewards/accuracies": 1.0, "rewards/chosen": -2.5564963817596436, "rewards/margins": 3.3707714080810547, "rewards/rejected": -5.927268028259277, "step": 1301 }, { "epoch": 2.24, "learning_rate": 4.172333191670208e-07, "logits/chosen": -2.4393415451049805, "logits/rejected": -2.267655372619629, "logps/chosen": -102.75399017333984, "logps/rejected": -154.74310302734375, "loss": 0.0638, "rewards/accuracies": 1.0, "rewards/chosen": -0.5413484573364258, "rewards/margins": 5.94553279876709, "rewards/rejected": -6.486880779266357, "step": 1302 }, { "epoch": 2.24, "learning_rate": 4.1712707182320437e-07, "logits/chosen": -2.120020866394043, "logits/rejected": -2.3439781665802, "logps/chosen": -77.96499633789062, "logps/rejected": -158.00477600097656, "loss": 0.0805, "rewards/accuracies": 1.0, "rewards/chosen": -0.03623145818710327, "rewards/margins": 7.858121871948242, "rewards/rejected": -7.894353866577148, "step": 1303 }, { "epoch": 2.24, "learning_rate": 4.17020824479388e-07, "logits/chosen": -1.9181182384490967, "logits/rejected": -2.289292335510254, "logps/chosen": -87.64704895019531, "logps/rejected": -164.693603515625, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -0.886329710483551, "rewards/margins": 6.574995040893555, "rewards/rejected": -7.461325645446777, "step": 1304 }, { "epoch": 2.25, "learning_rate": 4.169145771355716e-07, "logits/chosen": -2.2033209800720215, "logits/rejected": -2.0188040733337402, "logps/chosen": -110.8777084350586, "logps/rejected": -140.25828552246094, "loss": 0.1281, "rewards/accuracies": 0.75, "rewards/chosen": -1.1338003873825073, "rewards/margins": 4.294731140136719, "rewards/rejected": -5.428532123565674, "step": 1305 }, { "epoch": 2.25, "learning_rate": 4.1680832979175516e-07, "logits/chosen": -2.1728198528289795, "logits/rejected": -2.268925905227661, "logps/chosen": -115.57646942138672, "logps/rejected": -174.13067626953125, "loss": 0.1177, "rewards/accuracies": 1.0, "rewards/chosen": -1.3278511762619019, "rewards/margins": 5.2723388671875, "rewards/rejected": -6.600190162658691, "step": 1306 }, { "epoch": 2.25, "learning_rate": 4.167020824479388e-07, "logits/chosen": -2.3359968662261963, "logits/rejected": -2.178020715713501, "logps/chosen": -65.8427734375, "logps/rejected": -134.01193237304688, "loss": 0.1345, "rewards/accuracies": 1.0, "rewards/chosen": -0.1360747218132019, "rewards/margins": 6.867462158203125, "rewards/rejected": -7.003536701202393, "step": 1307 }, { "epoch": 2.25, "learning_rate": 4.1659583510412236e-07, "logits/chosen": -2.110903024673462, "logits/rejected": -2.2906715869903564, "logps/chosen": -93.11601257324219, "logps/rejected": -160.887451171875, "loss": 0.2168, "rewards/accuracies": 0.75, "rewards/chosen": -2.153923511505127, "rewards/margins": 5.110956192016602, "rewards/rejected": -7.26487922668457, "step": 1308 }, { "epoch": 2.25, "learning_rate": 4.1648958776030596e-07, "logits/chosen": -1.7888119220733643, "logits/rejected": -2.185727834701538, "logps/chosen": -65.93470764160156, "logps/rejected": -160.45257568359375, "loss": 0.2216, "rewards/accuracies": 1.0, "rewards/chosen": -0.4260258972644806, "rewards/margins": 6.601093292236328, "rewards/rejected": -7.027118682861328, "step": 1309 }, { "epoch": 2.25, "learning_rate": 4.163833404164896e-07, "logits/chosen": -2.2841525077819824, "logits/rejected": -2.419959783554077, "logps/chosen": -95.3011474609375, "logps/rejected": -157.11624145507812, "loss": 0.095, "rewards/accuracies": 1.0, "rewards/chosen": -1.1241278648376465, "rewards/margins": 4.472377300262451, "rewards/rejected": -5.5965046882629395, "step": 1310 }, { "epoch": 2.26, "learning_rate": 4.1627709307267316e-07, "logits/chosen": -1.9526318311691284, "logits/rejected": -2.4657747745513916, "logps/chosen": -67.54739379882812, "logps/rejected": -137.57505798339844, "loss": 0.1867, "rewards/accuracies": 1.0, "rewards/chosen": -0.13312463462352753, "rewards/margins": 6.191539287567139, "rewards/rejected": -6.324664115905762, "step": 1311 }, { "epoch": 2.26, "learning_rate": 4.1617084572885676e-07, "logits/chosen": -2.2285757064819336, "logits/rejected": -2.2765250205993652, "logps/chosen": -97.59587097167969, "logps/rejected": -173.5501708984375, "loss": 0.0526, "rewards/accuracies": 1.0, "rewards/chosen": -0.7833637595176697, "rewards/margins": 7.671820163726807, "rewards/rejected": -8.455183029174805, "step": 1312 }, { "epoch": 2.26, "learning_rate": 4.1606459838504036e-07, "logits/chosen": -2.2523903846740723, "logits/rejected": -2.1645455360412598, "logps/chosen": -114.76345825195312, "logps/rejected": -165.42388916015625, "loss": 0.1607, "rewards/accuracies": 1.0, "rewards/chosen": -0.8407341241836548, "rewards/margins": 6.148723125457764, "rewards/rejected": -6.989457130432129, "step": 1313 }, { "epoch": 2.26, "learning_rate": 4.1595835104122396e-07, "logits/chosen": -2.1890711784362793, "logits/rejected": -2.2264318466186523, "logps/chosen": -103.88834381103516, "logps/rejected": -158.27194213867188, "loss": 0.1217, "rewards/accuracies": 1.0, "rewards/chosen": -0.6358997225761414, "rewards/margins": 7.135467529296875, "rewards/rejected": -7.771368026733398, "step": 1314 }, { "epoch": 2.26, "learning_rate": 4.1585210369740756e-07, "logits/chosen": -2.091601848602295, "logits/rejected": -2.1543667316436768, "logps/chosen": -98.33070373535156, "logps/rejected": -135.69058227539062, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -0.3374428153038025, "rewards/margins": 5.122913837432861, "rewards/rejected": -5.460356712341309, "step": 1315 }, { "epoch": 2.27, "learning_rate": 4.1574585635359115e-07, "logits/chosen": -2.2566893100738525, "logits/rejected": -2.3767828941345215, "logps/chosen": -92.38188934326172, "logps/rejected": -162.51980590820312, "loss": 0.1449, "rewards/accuracies": 1.0, "rewards/chosen": -1.0254125595092773, "rewards/margins": 5.163944721221924, "rewards/rejected": -6.189357280731201, "step": 1316 }, { "epoch": 2.27, "learning_rate": 4.1563960900977475e-07, "logits/chosen": -2.12043833732605, "logits/rejected": -2.1501693725585938, "logps/chosen": -90.97962951660156, "logps/rejected": -150.42454528808594, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": 0.07101878523826599, "rewards/margins": 6.185252666473389, "rewards/rejected": -6.11423397064209, "step": 1317 }, { "epoch": 2.27, "learning_rate": 4.155333616659583e-07, "logits/chosen": -2.2920262813568115, "logits/rejected": -2.005354881286621, "logps/chosen": -103.481689453125, "logps/rejected": -128.23751831054688, "loss": 0.1184, "rewards/accuracies": 1.0, "rewards/chosen": -1.6249668598175049, "rewards/margins": 3.484480381011963, "rewards/rejected": -5.109447002410889, "step": 1318 }, { "epoch": 2.27, "learning_rate": 4.1542711432214195e-07, "logits/chosen": -2.2874982357025146, "logits/rejected": -2.3668322563171387, "logps/chosen": -121.60479736328125, "logps/rejected": -176.20846557617188, "loss": 0.1207, "rewards/accuracies": 1.0, "rewards/chosen": -1.357918620109558, "rewards/margins": 5.802546501159668, "rewards/rejected": -7.160465240478516, "step": 1319 }, { "epoch": 2.27, "learning_rate": 4.1532086697832555e-07, "logits/chosen": -1.8056137561798096, "logits/rejected": -2.4683353900909424, "logps/chosen": -85.45186614990234, "logps/rejected": -166.46656799316406, "loss": 0.1553, "rewards/accuracies": 1.0, "rewards/chosen": -0.4516572952270508, "rewards/margins": 6.449044704437256, "rewards/rejected": -6.900701999664307, "step": 1320 }, { "epoch": 2.27, "learning_rate": 4.152146196345091e-07, "logits/chosen": -2.0128016471862793, "logits/rejected": -2.422262668609619, "logps/chosen": -92.68461608886719, "logps/rejected": -126.13199615478516, "loss": 0.3517, "rewards/accuracies": 0.5, "rewards/chosen": -2.1061556339263916, "rewards/margins": 2.3578948974609375, "rewards/rejected": -4.46405029296875, "step": 1321 }, { "epoch": 2.28, "learning_rate": 4.1510837229069275e-07, "logits/chosen": -2.0734305381774902, "logits/rejected": -2.2662227153778076, "logps/chosen": -92.9200210571289, "logps/rejected": -147.6220703125, "loss": 0.2639, "rewards/accuracies": 1.0, "rewards/chosen": 0.29981711506843567, "rewards/margins": 5.983001232147217, "rewards/rejected": -5.6831841468811035, "step": 1322 }, { "epoch": 2.28, "learning_rate": 4.150021249468763e-07, "logits/chosen": -2.0650410652160645, "logits/rejected": -2.2995095252990723, "logps/chosen": -95.5904312133789, "logps/rejected": -168.52743530273438, "loss": 0.0901, "rewards/accuracies": 1.0, "rewards/chosen": -0.5955116748809814, "rewards/margins": 4.553029537200928, "rewards/rejected": -5.148541450500488, "step": 1323 }, { "epoch": 2.28, "learning_rate": 4.148958776030599e-07, "logits/chosen": -1.5102111101150513, "logits/rejected": -2.4390339851379395, "logps/chosen": -91.3009033203125, "logps/rejected": -156.01296997070312, "loss": 0.0717, "rewards/accuracies": 1.0, "rewards/chosen": -1.8036646842956543, "rewards/margins": 4.220298767089844, "rewards/rejected": -6.023963451385498, "step": 1324 }, { "epoch": 2.28, "learning_rate": 4.147896302592435e-07, "logits/chosen": -1.9792249202728271, "logits/rejected": -2.237436532974243, "logps/chosen": -98.80934143066406, "logps/rejected": -142.31158447265625, "loss": 0.0785, "rewards/accuracies": 1.0, "rewards/chosen": -1.108260154724121, "rewards/margins": 4.242677211761475, "rewards/rejected": -5.350937366485596, "step": 1325 }, { "epoch": 2.28, "learning_rate": 4.146833829154271e-07, "logits/chosen": -1.9493330717086792, "logits/rejected": -2.2478878498077393, "logps/chosen": -101.49049377441406, "logps/rejected": -159.2839813232422, "loss": 0.1459, "rewards/accuracies": 1.0, "rewards/chosen": -1.846297264099121, "rewards/margins": 4.995800495147705, "rewards/rejected": -6.842097282409668, "step": 1326 }, { "epoch": 2.28, "learning_rate": 4.145771355716107e-07, "logits/chosen": -2.17624568939209, "logits/rejected": -2.3446075916290283, "logps/chosen": -110.49114990234375, "logps/rejected": -152.41610717773438, "loss": 0.1491, "rewards/accuracies": 1.0, "rewards/chosen": -1.7723411321640015, "rewards/margins": 4.048626899719238, "rewards/rejected": -5.820967674255371, "step": 1327 }, { "epoch": 2.29, "learning_rate": 4.144708882277943e-07, "logits/chosen": -2.116255283355713, "logits/rejected": -2.1186280250549316, "logps/chosen": -93.87855529785156, "logps/rejected": -147.74038696289062, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": -1.0639771223068237, "rewards/margins": 4.997200012207031, "rewards/rejected": -6.0611772537231445, "step": 1328 }, { "epoch": 2.29, "learning_rate": 4.143646408839779e-07, "logits/chosen": -2.355384111404419, "logits/rejected": -2.183234453201294, "logps/chosen": -101.82606506347656, "logps/rejected": -124.75930786132812, "loss": 0.0662, "rewards/accuracies": 0.75, "rewards/chosen": -2.4022223949432373, "rewards/margins": 2.678556203842163, "rewards/rejected": -5.0807785987854, "step": 1329 }, { "epoch": 2.29, "learning_rate": 4.1425839354016143e-07, "logits/chosen": -2.152466058731079, "logits/rejected": -2.1419291496276855, "logps/chosen": -71.08549499511719, "logps/rejected": -157.34536743164062, "loss": 0.126, "rewards/accuracies": 1.0, "rewards/chosen": -0.27639174461364746, "rewards/margins": 7.575502872467041, "rewards/rejected": -7.851894378662109, "step": 1330 }, { "epoch": 2.29, "learning_rate": 4.141521461963451e-07, "logits/chosen": -2.015881299972534, "logits/rejected": -2.1963095664978027, "logps/chosen": -79.92100524902344, "logps/rejected": -110.8766860961914, "loss": 0.0877, "rewards/accuracies": 1.0, "rewards/chosen": 0.08988934010267258, "rewards/margins": 2.698577404022217, "rewards/rejected": -2.6086881160736084, "step": 1331 }, { "epoch": 2.29, "learning_rate": 4.140458988525287e-07, "logits/chosen": -1.714046597480774, "logits/rejected": -2.3234517574310303, "logps/chosen": -98.21404266357422, "logps/rejected": -164.2625732421875, "loss": 0.1565, "rewards/accuracies": 1.0, "rewards/chosen": -1.9253755807876587, "rewards/margins": 5.963748931884766, "rewards/rejected": -7.889124870300293, "step": 1332 }, { "epoch": 2.29, "learning_rate": 4.1393965150871223e-07, "logits/chosen": -1.8850091695785522, "logits/rejected": -2.305469036102295, "logps/chosen": -85.45271301269531, "logps/rejected": -130.76473999023438, "loss": 0.1106, "rewards/accuracies": 0.75, "rewards/chosen": -0.548653244972229, "rewards/margins": 3.1810154914855957, "rewards/rejected": -3.729668617248535, "step": 1333 }, { "epoch": 2.3, "learning_rate": 4.138334041648959e-07, "logits/chosen": -1.9044945240020752, "logits/rejected": -2.1663780212402344, "logps/chosen": -111.88489532470703, "logps/rejected": -149.66424560546875, "loss": 0.1506, "rewards/accuracies": 1.0, "rewards/chosen": -2.115305185317993, "rewards/margins": 3.728606700897217, "rewards/rejected": -5.843912124633789, "step": 1334 }, { "epoch": 2.3, "learning_rate": 4.1372715682107943e-07, "logits/chosen": -2.2275800704956055, "logits/rejected": -2.3513829708099365, "logps/chosen": -98.40498352050781, "logps/rejected": -151.0648193359375, "loss": 0.0786, "rewards/accuracies": 1.0, "rewards/chosen": -1.4198095798492432, "rewards/margins": 4.347517490386963, "rewards/rejected": -5.767326831817627, "step": 1335 }, { "epoch": 2.3, "learning_rate": 4.13620909477263e-07, "logits/chosen": -2.2486650943756104, "logits/rejected": -1.869779348373413, "logps/chosen": -117.80976867675781, "logps/rejected": -143.83607482910156, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -0.893639087677002, "rewards/margins": 4.585647106170654, "rewards/rejected": -5.479286193847656, "step": 1336 }, { "epoch": 2.3, "learning_rate": 4.135146621334467e-07, "logits/chosen": -2.414531707763672, "logits/rejected": -2.206172466278076, "logps/chosen": -101.66046142578125, "logps/rejected": -135.93032836914062, "loss": 0.0522, "rewards/accuracies": 1.0, "rewards/chosen": -1.3997515439987183, "rewards/margins": 3.7533230781555176, "rewards/rejected": -5.153074741363525, "step": 1337 }, { "epoch": 2.3, "learning_rate": 4.134084147896302e-07, "logits/chosen": -2.141629219055176, "logits/rejected": -2.258349895477295, "logps/chosen": -102.83531188964844, "logps/rejected": -177.12705993652344, "loss": 0.0979, "rewards/accuracies": 1.0, "rewards/chosen": -0.15238399803638458, "rewards/margins": 7.7816972732543945, "rewards/rejected": -7.934081077575684, "step": 1338 }, { "epoch": 2.3, "learning_rate": 4.133021674458138e-07, "logits/chosen": -2.0438120365142822, "logits/rejected": -2.301563262939453, "logps/chosen": -87.82632446289062, "logps/rejected": -176.6973876953125, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -0.8495903611183167, "rewards/margins": 6.678812026977539, "rewards/rejected": -7.528402328491211, "step": 1339 }, { "epoch": 2.31, "learning_rate": 4.131959201019974e-07, "logits/chosen": -2.2710371017456055, "logits/rejected": -1.9261014461517334, "logps/chosen": -88.73933410644531, "logps/rejected": -126.86865997314453, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": 0.07290056347846985, "rewards/margins": 4.654301643371582, "rewards/rejected": -4.5814008712768555, "step": 1340 }, { "epoch": 2.31, "learning_rate": 4.13089672758181e-07, "logits/chosen": -2.0987775325775146, "logits/rejected": -2.063540458679199, "logps/chosen": -103.36471557617188, "logps/rejected": -153.93838500976562, "loss": 0.0962, "rewards/accuracies": 1.0, "rewards/chosen": -1.720984697341919, "rewards/margins": 5.258133888244629, "rewards/rejected": -6.979118347167969, "step": 1341 }, { "epoch": 2.31, "learning_rate": 4.129834254143646e-07, "logits/chosen": -1.9893896579742432, "logits/rejected": -2.0400278568267822, "logps/chosen": -108.66059875488281, "logps/rejected": -166.75836181640625, "loss": 0.2766, "rewards/accuracies": 1.0, "rewards/chosen": -0.49271470308303833, "rewards/margins": 5.026355266571045, "rewards/rejected": -5.519069671630859, "step": 1342 }, { "epoch": 2.31, "learning_rate": 4.128771780705482e-07, "logits/chosen": -1.8493726253509521, "logits/rejected": -2.2972524166107178, "logps/chosen": -112.47754669189453, "logps/rejected": -172.04043579101562, "loss": 0.1088, "rewards/accuracies": 0.75, "rewards/chosen": -3.3984689712524414, "rewards/margins": 4.359755516052246, "rewards/rejected": -7.7582244873046875, "step": 1343 }, { "epoch": 2.31, "learning_rate": 4.127709307267318e-07, "logits/chosen": -2.207115411758423, "logits/rejected": -2.2773923873901367, "logps/chosen": -108.70912170410156, "logps/rejected": -163.35198974609375, "loss": 0.2088, "rewards/accuracies": 1.0, "rewards/chosen": -0.9558664560317993, "rewards/margins": 5.292983531951904, "rewards/rejected": -6.248849868774414, "step": 1344 }, { "epoch": 2.31, "learning_rate": 4.126646833829154e-07, "logits/chosen": -2.3973007202148438, "logits/rejected": -2.060807466506958, "logps/chosen": -89.64947509765625, "logps/rejected": -186.0759735107422, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -0.8515211343765259, "rewards/margins": 10.172447204589844, "rewards/rejected": -11.023968696594238, "step": 1345 }, { "epoch": 2.32, "learning_rate": 4.12558436039099e-07, "logits/chosen": -2.2618601322174072, "logits/rejected": -2.1541099548339844, "logps/chosen": -97.810546875, "logps/rejected": -137.62307739257812, "loss": 0.081, "rewards/accuracies": 0.75, "rewards/chosen": -0.7700639367103577, "rewards/margins": 4.421426773071289, "rewards/rejected": -5.19149112701416, "step": 1346 }, { "epoch": 2.32, "learning_rate": 4.124521886952826e-07, "logits/chosen": -2.3447911739349365, "logits/rejected": -2.240541696548462, "logps/chosen": -108.12669372558594, "logps/rejected": -154.25726318359375, "loss": 0.2347, "rewards/accuracies": 1.0, "rewards/chosen": -2.9069530963897705, "rewards/margins": 3.7154767513275146, "rewards/rejected": -6.622429847717285, "step": 1347 }, { "epoch": 2.32, "learning_rate": 4.123459413514662e-07, "logits/chosen": -2.3205909729003906, "logits/rejected": -2.2933871746063232, "logps/chosen": -95.3233642578125, "logps/rejected": -171.76014709472656, "loss": 0.0892, "rewards/accuracies": 1.0, "rewards/chosen": -0.5154991149902344, "rewards/margins": 7.392580032348633, "rewards/rejected": -7.908079147338867, "step": 1348 }, { "epoch": 2.32, "learning_rate": 4.122396940076498e-07, "logits/chosen": -1.8340245485305786, "logits/rejected": -2.3543365001678467, "logps/chosen": -96.58427429199219, "logps/rejected": -127.03543090820312, "loss": 0.1097, "rewards/accuracies": 1.0, "rewards/chosen": -0.9056926369667053, "rewards/margins": 1.6841602325439453, "rewards/rejected": -2.589852809906006, "step": 1349 }, { "epoch": 2.32, "learning_rate": 4.1213344666383336e-07, "logits/chosen": -1.927594780921936, "logits/rejected": -2.296163320541382, "logps/chosen": -88.05714416503906, "logps/rejected": -187.57998657226562, "loss": 0.0982, "rewards/accuracies": 1.0, "rewards/chosen": -0.8183477520942688, "rewards/margins": 7.108116149902344, "rewards/rejected": -7.926464080810547, "step": 1350 }, { "epoch": 2.33, "learning_rate": 4.12027199320017e-07, "logits/chosen": -1.9980098009109497, "logits/rejected": -2.154071807861328, "logps/chosen": -113.24839782714844, "logps/rejected": -149.38206481933594, "loss": 0.1831, "rewards/accuracies": 1.0, "rewards/chosen": -1.7424099445343018, "rewards/margins": 3.6262731552124023, "rewards/rejected": -5.368682861328125, "step": 1351 }, { "epoch": 2.33, "learning_rate": 4.1192095197620056e-07, "logits/chosen": -2.2637882232666016, "logits/rejected": -2.179710626602173, "logps/chosen": -131.10826110839844, "logps/rejected": -178.88465881347656, "loss": 0.1973, "rewards/accuracies": 0.75, "rewards/chosen": -3.1864681243896484, "rewards/margins": 3.7394580841064453, "rewards/rejected": -6.925926208496094, "step": 1352 }, { "epoch": 2.33, "learning_rate": 4.1181470463238415e-07, "logits/chosen": -2.0215065479278564, "logits/rejected": -2.153583526611328, "logps/chosen": -97.51136779785156, "logps/rejected": -155.199462890625, "loss": 0.0679, "rewards/accuracies": 1.0, "rewards/chosen": -0.3547472357749939, "rewards/margins": 4.986176490783691, "rewards/rejected": -5.34092378616333, "step": 1353 }, { "epoch": 2.33, "learning_rate": 4.117084572885678e-07, "logits/chosen": -2.108053684234619, "logits/rejected": -2.2195255756378174, "logps/chosen": -122.27276611328125, "logps/rejected": -188.1234130859375, "loss": 0.1652, "rewards/accuracies": 1.0, "rewards/chosen": -1.911881685256958, "rewards/margins": 6.465202808380127, "rewards/rejected": -8.377084732055664, "step": 1354 }, { "epoch": 2.33, "learning_rate": 4.1160220994475135e-07, "logits/chosen": -2.3643083572387695, "logits/rejected": -1.6919806003570557, "logps/chosen": -132.73239135742188, "logps/rejected": -181.253173828125, "loss": 0.1065, "rewards/accuracies": 1.0, "rewards/chosen": -1.8161509037017822, "rewards/margins": 6.156424045562744, "rewards/rejected": -7.9725751876831055, "step": 1355 }, { "epoch": 2.33, "learning_rate": 4.1149596260093495e-07, "logits/chosen": -2.2567896842956543, "logits/rejected": -2.2590832710266113, "logps/chosen": -90.97392272949219, "logps/rejected": -122.86026000976562, "loss": 0.2109, "rewards/accuracies": 1.0, "rewards/chosen": -1.582524061203003, "rewards/margins": 2.834962844848633, "rewards/rejected": -4.417487144470215, "step": 1356 }, { "epoch": 2.34, "learning_rate": 4.1138971525711855e-07, "logits/chosen": -1.839383840560913, "logits/rejected": -2.3215343952178955, "logps/chosen": -89.32929992675781, "logps/rejected": -203.9073486328125, "loss": 0.0747, "rewards/accuracies": 1.0, "rewards/chosen": -0.704054057598114, "rewards/margins": 9.478068351745605, "rewards/rejected": -10.182123184204102, "step": 1357 }, { "epoch": 2.34, "learning_rate": 4.1128346791330215e-07, "logits/chosen": -2.046652317047119, "logits/rejected": -2.4118525981903076, "logps/chosen": -108.46929168701172, "logps/rejected": -136.96170043945312, "loss": 0.1349, "rewards/accuracies": 0.75, "rewards/chosen": -1.222778558731079, "rewards/margins": 3.5142874717712402, "rewards/rejected": -4.737066268920898, "step": 1358 }, { "epoch": 2.34, "learning_rate": 4.1117722056948575e-07, "logits/chosen": -2.1245830059051514, "logits/rejected": -2.399984836578369, "logps/chosen": -101.10188293457031, "logps/rejected": -164.90725708007812, "loss": 0.0893, "rewards/accuracies": 1.0, "rewards/chosen": -1.1855417490005493, "rewards/margins": 5.496803283691406, "rewards/rejected": -6.682344913482666, "step": 1359 }, { "epoch": 2.34, "learning_rate": 4.1107097322566935e-07, "logits/chosen": -2.4032130241394043, "logits/rejected": -1.9928853511810303, "logps/chosen": -110.19717407226562, "logps/rejected": -141.50808715820312, "loss": 0.1116, "rewards/accuracies": 0.75, "rewards/chosen": -1.772007942199707, "rewards/margins": 4.560917377471924, "rewards/rejected": -6.332925319671631, "step": 1360 }, { "epoch": 2.34, "learning_rate": 4.1096472588185295e-07, "logits/chosen": -2.180450439453125, "logits/rejected": -2.3027071952819824, "logps/chosen": -113.29811096191406, "logps/rejected": -162.92044067382812, "loss": 0.2282, "rewards/accuracies": 0.75, "rewards/chosen": -2.467668056488037, "rewards/margins": 4.65606164932251, "rewards/rejected": -7.123729228973389, "step": 1361 }, { "epoch": 2.34, "learning_rate": 4.108584785380365e-07, "logits/chosen": -1.8753336668014526, "logits/rejected": -2.3032736778259277, "logps/chosen": -92.77318572998047, "logps/rejected": -155.22047424316406, "loss": 0.1199, "rewards/accuracies": 1.0, "rewards/chosen": -0.27395710349082947, "rewards/margins": 4.129103660583496, "rewards/rejected": -4.4030609130859375, "step": 1362 }, { "epoch": 2.35, "learning_rate": 4.1075223119422014e-07, "logits/chosen": -2.258572578430176, "logits/rejected": -2.0859789848327637, "logps/chosen": -92.41820526123047, "logps/rejected": -176.09622192382812, "loss": 0.2158, "rewards/accuracies": 1.0, "rewards/chosen": -0.19739240407943726, "rewards/margins": 9.233882904052734, "rewards/rejected": -9.431275367736816, "step": 1363 }, { "epoch": 2.35, "learning_rate": 4.1064598385040374e-07, "logits/chosen": -2.2251009941101074, "logits/rejected": -2.1305196285247803, "logps/chosen": -119.77574157714844, "logps/rejected": -134.0636444091797, "loss": 0.1451, "rewards/accuracies": 1.0, "rewards/chosen": -1.6189806461334229, "rewards/margins": 2.9505715370178223, "rewards/rejected": -4.569552421569824, "step": 1364 }, { "epoch": 2.35, "learning_rate": 4.105397365065873e-07, "logits/chosen": -2.0787644386291504, "logits/rejected": -2.2486324310302734, "logps/chosen": -106.95022583007812, "logps/rejected": -175.14892578125, "loss": 0.0807, "rewards/accuracies": 1.0, "rewards/chosen": -0.8453166484832764, "rewards/margins": 6.698525428771973, "rewards/rejected": -7.54384183883667, "step": 1365 }, { "epoch": 2.35, "learning_rate": 4.1043348916277094e-07, "logits/chosen": -2.101682186126709, "logits/rejected": -2.209048271179199, "logps/chosen": -108.01502990722656, "logps/rejected": -146.25364685058594, "loss": 0.136, "rewards/accuracies": 1.0, "rewards/chosen": -2.617140531539917, "rewards/margins": 3.730895757675171, "rewards/rejected": -6.348036766052246, "step": 1366 }, { "epoch": 2.35, "learning_rate": 4.103272418189545e-07, "logits/chosen": -2.4123010635375977, "logits/rejected": -2.1000449657440186, "logps/chosen": -110.3368911743164, "logps/rejected": -133.250732421875, "loss": 0.1245, "rewards/accuracies": 1.0, "rewards/chosen": -1.6029372215270996, "rewards/margins": 3.3190197944641113, "rewards/rejected": -4.921957015991211, "step": 1367 }, { "epoch": 2.35, "learning_rate": 4.102209944751381e-07, "logits/chosen": -2.1740293502807617, "logits/rejected": -2.3974967002868652, "logps/chosen": -92.41410827636719, "logps/rejected": -170.53517150878906, "loss": 0.1204, "rewards/accuracies": 1.0, "rewards/chosen": -1.1183935403823853, "rewards/margins": 5.94087028503418, "rewards/rejected": -7.059264659881592, "step": 1368 }, { "epoch": 2.36, "learning_rate": 4.1011474713132174e-07, "logits/chosen": -2.2935967445373535, "logits/rejected": -2.178558826446533, "logps/chosen": -98.824951171875, "logps/rejected": -138.06243896484375, "loss": 0.1064, "rewards/accuracies": 1.0, "rewards/chosen": -0.8155624866485596, "rewards/margins": 4.610467910766602, "rewards/rejected": -5.42603063583374, "step": 1369 }, { "epoch": 2.36, "learning_rate": 4.100084997875053e-07, "logits/chosen": -2.393786907196045, "logits/rejected": -1.7209324836730957, "logps/chosen": -100.10519409179688, "logps/rejected": -140.114501953125, "loss": 0.1154, "rewards/accuracies": 1.0, "rewards/chosen": -1.4297415018081665, "rewards/margins": 4.783689022064209, "rewards/rejected": -6.213430404663086, "step": 1370 }, { "epoch": 2.36, "learning_rate": 4.099022524436889e-07, "logits/chosen": -2.068312883377075, "logits/rejected": -2.168271064758301, "logps/chosen": -89.75553894042969, "logps/rejected": -156.73843383789062, "loss": 0.1236, "rewards/accuracies": 1.0, "rewards/chosen": -0.7121545672416687, "rewards/margins": 5.571352958679199, "rewards/rejected": -6.283507347106934, "step": 1371 }, { "epoch": 2.36, "learning_rate": 4.097960050998725e-07, "logits/chosen": -2.26712703704834, "logits/rejected": -2.3355801105499268, "logps/chosen": -86.1874771118164, "logps/rejected": -151.29620361328125, "loss": 0.0991, "rewards/accuracies": 1.0, "rewards/chosen": -0.6403204202651978, "rewards/margins": 5.993465900421143, "rewards/rejected": -6.633786201477051, "step": 1372 }, { "epoch": 2.36, "learning_rate": 4.096897577560561e-07, "logits/chosen": -2.246295213699341, "logits/rejected": -2.2412877082824707, "logps/chosen": -100.65398406982422, "logps/rejected": -149.4327850341797, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": -1.3842382431030273, "rewards/margins": 4.741722106933594, "rewards/rejected": -6.125960350036621, "step": 1373 }, { "epoch": 2.36, "learning_rate": 4.095835104122397e-07, "logits/chosen": -2.363473415374756, "logits/rejected": -2.308960437774658, "logps/chosen": -92.12944793701172, "logps/rejected": -126.54579162597656, "loss": 0.0918, "rewards/accuracies": 1.0, "rewards/chosen": -1.361724615097046, "rewards/margins": 3.316838264465332, "rewards/rejected": -4.678562641143799, "step": 1374 }, { "epoch": 2.37, "learning_rate": 4.094772630684233e-07, "logits/chosen": -2.2908709049224854, "logits/rejected": -1.890726923942566, "logps/chosen": -115.15910339355469, "logps/rejected": -153.60923767089844, "loss": 0.0478, "rewards/accuracies": 1.0, "rewards/chosen": -1.6248539686203003, "rewards/margins": 5.655057907104492, "rewards/rejected": -7.279911994934082, "step": 1375 }, { "epoch": 2.37, "learning_rate": 4.093710157246069e-07, "logits/chosen": -1.9324917793273926, "logits/rejected": -2.084477424621582, "logps/chosen": -105.27093505859375, "logps/rejected": -139.7293701171875, "loss": 0.1515, "rewards/accuracies": 1.0, "rewards/chosen": -1.040345549583435, "rewards/margins": 4.012147903442383, "rewards/rejected": -5.052493095397949, "step": 1376 }, { "epoch": 2.37, "learning_rate": 4.092647683807904e-07, "logits/chosen": -2.1250035762786865, "logits/rejected": -2.360963821411133, "logps/chosen": -128.22979736328125, "logps/rejected": -182.39183044433594, "loss": 0.1954, "rewards/accuracies": 1.0, "rewards/chosen": -2.025693893432617, "rewards/margins": 5.553216934204102, "rewards/rejected": -7.578911304473877, "step": 1377 }, { "epoch": 2.37, "learning_rate": 4.091585210369741e-07, "logits/chosen": -2.3528082370758057, "logits/rejected": -2.362382411956787, "logps/chosen": -104.5551986694336, "logps/rejected": -166.111328125, "loss": 0.1254, "rewards/accuracies": 1.0, "rewards/chosen": -0.4587583541870117, "rewards/margins": 7.428668975830078, "rewards/rejected": -7.88742733001709, "step": 1378 }, { "epoch": 2.37, "learning_rate": 4.090522736931576e-07, "logits/chosen": -2.418271064758301, "logits/rejected": -2.323765993118286, "logps/chosen": -89.56594848632812, "logps/rejected": -157.42420959472656, "loss": 0.0742, "rewards/accuracies": 1.0, "rewards/chosen": -0.33154773712158203, "rewards/margins": 7.300783157348633, "rewards/rejected": -7.632330894470215, "step": 1379 }, { "epoch": 2.38, "learning_rate": 4.089460263493412e-07, "logits/chosen": -2.2539663314819336, "logits/rejected": -2.4243974685668945, "logps/chosen": -125.67166900634766, "logps/rejected": -141.53671264648438, "loss": 0.1263, "rewards/accuracies": 0.5, "rewards/chosen": -2.578080177307129, "rewards/margins": 1.7970666885375977, "rewards/rejected": -4.375146865844727, "step": 1380 }, { "epoch": 2.38, "learning_rate": 4.0883977900552487e-07, "logits/chosen": -2.09934663772583, "logits/rejected": -2.1965126991271973, "logps/chosen": -87.53942108154297, "logps/rejected": -134.8699951171875, "loss": 0.2804, "rewards/accuracies": 0.75, "rewards/chosen": 0.01979956030845642, "rewards/margins": 3.2056503295898438, "rewards/rejected": -3.1858508586883545, "step": 1381 }, { "epoch": 2.38, "learning_rate": 4.087335316617084e-07, "logits/chosen": -2.233954429626465, "logits/rejected": -2.2447898387908936, "logps/chosen": -75.08415985107422, "logps/rejected": -155.03250122070312, "loss": 0.2753, "rewards/accuracies": 1.0, "rewards/chosen": -0.47427406907081604, "rewards/margins": 7.836857318878174, "rewards/rejected": -8.31113052368164, "step": 1382 }, { "epoch": 2.38, "learning_rate": 4.08627284317892e-07, "logits/chosen": -2.404334306716919, "logits/rejected": -2.273761749267578, "logps/chosen": -97.29655456542969, "logps/rejected": -105.48600006103516, "loss": 0.1607, "rewards/accuracies": 0.75, "rewards/chosen": -1.4571688175201416, "rewards/margins": 0.5376030206680298, "rewards/rejected": -1.9947717189788818, "step": 1383 }, { "epoch": 2.38, "learning_rate": 4.085210369740756e-07, "logits/chosen": -2.2579150199890137, "logits/rejected": -2.202470064163208, "logps/chosen": -84.11385345458984, "logps/rejected": -131.38665771484375, "loss": 0.1411, "rewards/accuracies": 1.0, "rewards/chosen": -0.9062725901603699, "rewards/margins": 5.104714870452881, "rewards/rejected": -6.010987758636475, "step": 1384 }, { "epoch": 2.38, "learning_rate": 4.084147896302592e-07, "logits/chosen": -2.259145975112915, "logits/rejected": -1.9175138473510742, "logps/chosen": -113.17584991455078, "logps/rejected": -172.49786376953125, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": -1.7450703382492065, "rewards/margins": 7.330382347106934, "rewards/rejected": -9.07545280456543, "step": 1385 }, { "epoch": 2.39, "learning_rate": 4.0830854228644287e-07, "logits/chosen": -2.2286455631256104, "logits/rejected": -2.332282066345215, "logps/chosen": -74.2944564819336, "logps/rejected": -166.087158203125, "loss": 0.1968, "rewards/accuracies": 1.0, "rewards/chosen": -0.7890851497650146, "rewards/margins": 7.385056018829346, "rewards/rejected": -8.174140930175781, "step": 1386 }, { "epoch": 2.39, "learning_rate": 4.082022949426264e-07, "logits/chosen": -1.9881768226623535, "logits/rejected": -1.9370152950286865, "logps/chosen": -79.5213394165039, "logps/rejected": -128.30892944335938, "loss": 0.2497, "rewards/accuracies": 1.0, "rewards/chosen": -0.6140470504760742, "rewards/margins": 5.8365044593811035, "rewards/rejected": -6.4505510330200195, "step": 1387 }, { "epoch": 2.39, "learning_rate": 4.0809604759881e-07, "logits/chosen": -2.0154621601104736, "logits/rejected": -2.3554275035858154, "logps/chosen": -84.59615325927734, "logps/rejected": -175.16119384765625, "loss": 0.1299, "rewards/accuracies": 1.0, "rewards/chosen": -0.5144331455230713, "rewards/margins": 6.850707054138184, "rewards/rejected": -7.365140438079834, "step": 1388 }, { "epoch": 2.39, "learning_rate": 4.079898002549936e-07, "logits/chosen": -2.1357994079589844, "logits/rejected": -2.0684077739715576, "logps/chosen": -99.15834045410156, "logps/rejected": -156.03285217285156, "loss": 0.1644, "rewards/accuracies": 1.0, "rewards/chosen": -0.9047513604164124, "rewards/margins": 6.684920787811279, "rewards/rejected": -7.589672088623047, "step": 1389 }, { "epoch": 2.39, "learning_rate": 4.078835529111772e-07, "logits/chosen": -1.8824090957641602, "logits/rejected": -2.4381215572357178, "logps/chosen": -87.39021301269531, "logps/rejected": -145.9534912109375, "loss": 0.11, "rewards/accuracies": 1.0, "rewards/chosen": -1.2392792701721191, "rewards/margins": 4.84274435043335, "rewards/rejected": -6.082023620605469, "step": 1390 }, { "epoch": 2.39, "learning_rate": 4.077773055673608e-07, "logits/chosen": -2.213900089263916, "logits/rejected": -2.153359889984131, "logps/chosen": -79.99337005615234, "logps/rejected": -152.51425170898438, "loss": 0.0638, "rewards/accuracies": 1.0, "rewards/chosen": -1.3901045322418213, "rewards/margins": 6.850873947143555, "rewards/rejected": -8.240978240966797, "step": 1391 }, { "epoch": 2.4, "learning_rate": 4.076710582235444e-07, "logits/chosen": -2.220986843109131, "logits/rejected": -2.1465470790863037, "logps/chosen": -94.24219512939453, "logps/rejected": -157.17465209960938, "loss": 0.0822, "rewards/accuracies": 1.0, "rewards/chosen": -1.0295469760894775, "rewards/margins": 5.188596248626709, "rewards/rejected": -6.218142986297607, "step": 1392 }, { "epoch": 2.4, "learning_rate": 4.07564810879728e-07, "logits/chosen": -2.1772072315216064, "logits/rejected": -2.3790805339813232, "logps/chosen": -122.3386459350586, "logps/rejected": -140.25003051757812, "loss": 0.1261, "rewards/accuracies": 0.75, "rewards/chosen": -4.433280944824219, "rewards/margins": 2.4897353649139404, "rewards/rejected": -6.923016548156738, "step": 1393 }, { "epoch": 2.4, "learning_rate": 4.0745856353591155e-07, "logits/chosen": -2.1484506130218506, "logits/rejected": -2.0226268768310547, "logps/chosen": -134.28054809570312, "logps/rejected": -161.5878448486328, "loss": 0.1474, "rewards/accuracies": 1.0, "rewards/chosen": -1.7125318050384521, "rewards/margins": 4.310066223144531, "rewards/rejected": -6.022597789764404, "step": 1394 }, { "epoch": 2.4, "learning_rate": 4.073523161920952e-07, "logits/chosen": -2.0905280113220215, "logits/rejected": -2.310352087020874, "logps/chosen": -68.8570556640625, "logps/rejected": -122.23020935058594, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": 0.045439332723617554, "rewards/margins": 3.9439287185668945, "rewards/rejected": -3.898489475250244, "step": 1395 }, { "epoch": 2.4, "learning_rate": 4.072460688482788e-07, "logits/chosen": -2.293519973754883, "logits/rejected": -2.5509068965911865, "logps/chosen": -117.27220153808594, "logps/rejected": -175.10585021972656, "loss": 0.2056, "rewards/accuracies": 1.0, "rewards/chosen": -0.9204531311988831, "rewards/margins": 6.245658874511719, "rewards/rejected": -7.166112422943115, "step": 1396 }, { "epoch": 2.4, "learning_rate": 4.0713982150446235e-07, "logits/chosen": -2.28183650970459, "logits/rejected": -1.6252386569976807, "logps/chosen": -109.82308197021484, "logps/rejected": -148.71527099609375, "loss": 0.0946, "rewards/accuracies": 1.0, "rewards/chosen": -1.144066572189331, "rewards/margins": 6.48123836517334, "rewards/rejected": -7.62530517578125, "step": 1397 }, { "epoch": 2.41, "learning_rate": 4.07033574160646e-07, "logits/chosen": -2.157010316848755, "logits/rejected": -2.3449888229370117, "logps/chosen": -97.83322143554688, "logps/rejected": -145.933349609375, "loss": 0.2312, "rewards/accuracies": 1.0, "rewards/chosen": -1.1044048070907593, "rewards/margins": 3.2543768882751465, "rewards/rejected": -4.358781814575195, "step": 1398 }, { "epoch": 2.41, "learning_rate": 4.0692732681682955e-07, "logits/chosen": -2.0306906700134277, "logits/rejected": -2.4665355682373047, "logps/chosen": -78.69042205810547, "logps/rejected": -131.98680114746094, "loss": 0.1291, "rewards/accuracies": 1.0, "rewards/chosen": 0.16563357412815094, "rewards/margins": 4.299637794494629, "rewards/rejected": -4.134004592895508, "step": 1399 }, { "epoch": 2.41, "learning_rate": 4.0682107947301314e-07, "logits/chosen": -2.0574426651000977, "logits/rejected": -2.408970594406128, "logps/chosen": -83.91471862792969, "logps/rejected": -142.03817749023438, "loss": 0.2578, "rewards/accuracies": 0.75, "rewards/chosen": -1.3970662355422974, "rewards/margins": 4.270122051239014, "rewards/rejected": -5.6671881675720215, "step": 1400 }, { "epoch": 2.41, "learning_rate": 4.067148321291968e-07, "logits/chosen": -2.4889631271362305, "logits/rejected": -2.0684237480163574, "logps/chosen": -106.42256164550781, "logps/rejected": -145.8384552001953, "loss": 0.059, "rewards/accuracies": 1.0, "rewards/chosen": -1.523829698562622, "rewards/margins": 4.51193904876709, "rewards/rejected": -6.035768508911133, "step": 1401 }, { "epoch": 2.41, "learning_rate": 4.0660858478538034e-07, "logits/chosen": -2.1802096366882324, "logits/rejected": -2.367227792739868, "logps/chosen": -68.60546875, "logps/rejected": -114.74681854248047, "loss": 0.1524, "rewards/accuracies": 1.0, "rewards/chosen": -0.8364626169204712, "rewards/margins": 4.0174784660339355, "rewards/rejected": -4.853940963745117, "step": 1402 }, { "epoch": 2.41, "learning_rate": 4.0650233744156394e-07, "logits/chosen": -2.286970853805542, "logits/rejected": -2.231875419616699, "logps/chosen": -89.73399353027344, "logps/rejected": -164.3091583251953, "loss": 0.2936, "rewards/accuracies": 1.0, "rewards/chosen": -0.08746513724327087, "rewards/margins": 7.390957832336426, "rewards/rejected": -7.478423118591309, "step": 1403 }, { "epoch": 2.42, "learning_rate": 4.0639609009774754e-07, "logits/chosen": -1.8567965030670166, "logits/rejected": -2.3294906616210938, "logps/chosen": -102.01190185546875, "logps/rejected": -166.8155059814453, "loss": 0.1376, "rewards/accuracies": 1.0, "rewards/chosen": -2.4253008365631104, "rewards/margins": 5.981031894683838, "rewards/rejected": -8.406332969665527, "step": 1404 }, { "epoch": 2.42, "learning_rate": 4.0628984275393114e-07, "logits/chosen": -2.388558864593506, "logits/rejected": -1.8887348175048828, "logps/chosen": -103.08810424804688, "logps/rejected": -151.61044311523438, "loss": 0.2192, "rewards/accuracies": 1.0, "rewards/chosen": -0.5039114356040955, "rewards/margins": 6.043103218078613, "rewards/rejected": -6.547014236450195, "step": 1405 }, { "epoch": 2.42, "learning_rate": 4.061835954101147e-07, "logits/chosen": -2.084256649017334, "logits/rejected": -2.3872079849243164, "logps/chosen": -104.87179565429688, "logps/rejected": -133.28538513183594, "loss": 0.2322, "rewards/accuracies": 0.75, "rewards/chosen": -2.097775936126709, "rewards/margins": 1.4774861335754395, "rewards/rejected": -3.5752620697021484, "step": 1406 }, { "epoch": 2.42, "learning_rate": 4.0607734806629834e-07, "logits/chosen": -2.3438165187835693, "logits/rejected": -2.442437171936035, "logps/chosen": -114.29528045654297, "logps/rejected": -125.20484924316406, "loss": 0.09, "rewards/accuracies": 1.0, "rewards/chosen": -0.9377588629722595, "rewards/margins": 3.477111577987671, "rewards/rejected": -4.414870738983154, "step": 1407 }, { "epoch": 2.42, "learning_rate": 4.0597110072248194e-07, "logits/chosen": -2.1502890586853027, "logits/rejected": -2.35695743560791, "logps/chosen": -114.52449035644531, "logps/rejected": -164.12686157226562, "loss": 0.1421, "rewards/accuracies": 1.0, "rewards/chosen": -1.8382363319396973, "rewards/margins": 3.8002350330352783, "rewards/rejected": -5.638471603393555, "step": 1408 }, { "epoch": 2.43, "learning_rate": 4.058648533786655e-07, "logits/chosen": -2.080348014831543, "logits/rejected": -2.1669516563415527, "logps/chosen": -104.25201416015625, "logps/rejected": -163.980224609375, "loss": 0.1459, "rewards/accuracies": 1.0, "rewards/chosen": -2.683537483215332, "rewards/margins": 4.092931747436523, "rewards/rejected": -6.776468753814697, "step": 1409 }, { "epoch": 2.43, "learning_rate": 4.0575860603484913e-07, "logits/chosen": -1.6931716203689575, "logits/rejected": -2.3803439140319824, "logps/chosen": -69.62860870361328, "logps/rejected": -164.09095764160156, "loss": 0.0764, "rewards/accuracies": 1.0, "rewards/chosen": -0.12812289595603943, "rewards/margins": 6.689390182495117, "rewards/rejected": -6.817513465881348, "step": 1410 }, { "epoch": 2.43, "learning_rate": 4.056523586910327e-07, "logits/chosen": -1.9330759048461914, "logits/rejected": -2.3421988487243652, "logps/chosen": -104.26860809326172, "logps/rejected": -192.383056640625, "loss": 0.1361, "rewards/accuracies": 1.0, "rewards/chosen": -1.5592014789581299, "rewards/margins": 6.276960372924805, "rewards/rejected": -7.836162090301514, "step": 1411 }, { "epoch": 2.43, "learning_rate": 4.055461113472163e-07, "logits/chosen": -2.2068893909454346, "logits/rejected": -1.9655230045318604, "logps/chosen": -119.81504821777344, "logps/rejected": -126.38750457763672, "loss": 0.1494, "rewards/accuracies": 0.5, "rewards/chosen": -2.491306781768799, "rewards/margins": 1.6371413469314575, "rewards/rejected": -4.128448486328125, "step": 1412 }, { "epoch": 2.43, "learning_rate": 4.0543986400339993e-07, "logits/chosen": -2.2813961505889893, "logits/rejected": -2.0134711265563965, "logps/chosen": -75.3326644897461, "logps/rejected": -155.40464782714844, "loss": 0.1212, "rewards/accuracies": 1.0, "rewards/chosen": 0.11564958095550537, "rewards/margins": 8.254440307617188, "rewards/rejected": -8.13879108428955, "step": 1413 }, { "epoch": 2.43, "learning_rate": 4.053336166595835e-07, "logits/chosen": -2.2365992069244385, "logits/rejected": -2.2970528602600098, "logps/chosen": -84.01610565185547, "logps/rejected": -149.21890258789062, "loss": 0.0791, "rewards/accuracies": 1.0, "rewards/chosen": 0.0846622884273529, "rewards/margins": 5.026571750640869, "rewards/rejected": -4.941909313201904, "step": 1414 }, { "epoch": 2.44, "learning_rate": 4.052273693157671e-07, "logits/chosen": -2.0142228603363037, "logits/rejected": -2.3678479194641113, "logps/chosen": -103.5140380859375, "logps/rejected": -144.5342559814453, "loss": 0.1461, "rewards/accuracies": 1.0, "rewards/chosen": -1.4378752708435059, "rewards/margins": 2.745384931564331, "rewards/rejected": -4.183259963989258, "step": 1415 }, { "epoch": 2.44, "learning_rate": 4.051211219719507e-07, "logits/chosen": -2.248699903488159, "logits/rejected": -2.014561891555786, "logps/chosen": -99.85711669921875, "logps/rejected": -150.24954223632812, "loss": 0.1315, "rewards/accuracies": 1.0, "rewards/chosen": -1.6126753091812134, "rewards/margins": 6.024162769317627, "rewards/rejected": -7.636838436126709, "step": 1416 }, { "epoch": 2.44, "learning_rate": 4.0501487462813427e-07, "logits/chosen": -2.321488857269287, "logits/rejected": -2.1099607944488525, "logps/chosen": -101.29581451416016, "logps/rejected": -158.20277404785156, "loss": 0.1305, "rewards/accuracies": 1.0, "rewards/chosen": -1.120988130569458, "rewards/margins": 5.193632125854492, "rewards/rejected": -6.314620018005371, "step": 1417 }, { "epoch": 2.44, "learning_rate": 4.0490862728431787e-07, "logits/chosen": -2.347615957260132, "logits/rejected": -2.2625584602355957, "logps/chosen": -99.77798461914062, "logps/rejected": -158.4058380126953, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": -0.1881355345249176, "rewards/margins": 6.222908973693848, "rewards/rejected": -6.411045074462891, "step": 1418 }, { "epoch": 2.44, "learning_rate": 4.0480237994050147e-07, "logits/chosen": -2.4655890464782715, "logits/rejected": -1.880258560180664, "logps/chosen": -122.28048706054688, "logps/rejected": -121.16606140136719, "loss": 0.2055, "rewards/accuracies": 0.75, "rewards/chosen": -2.070399284362793, "rewards/margins": 1.0293067693710327, "rewards/rejected": -3.099705934524536, "step": 1419 }, { "epoch": 2.44, "learning_rate": 4.0469613259668507e-07, "logits/chosen": -2.2974135875701904, "logits/rejected": -2.157285451889038, "logps/chosen": -81.62084197998047, "logps/rejected": -111.5809555053711, "loss": 0.1338, "rewards/accuracies": 1.0, "rewards/chosen": -1.502203106880188, "rewards/margins": 3.258542060852051, "rewards/rejected": -4.760745048522949, "step": 1420 }, { "epoch": 2.45, "learning_rate": 4.045898852528686e-07, "logits/chosen": -2.1286702156066895, "logits/rejected": -2.4625608921051025, "logps/chosen": -123.90707397460938, "logps/rejected": -171.0447540283203, "loss": 0.0999, "rewards/accuracies": 1.0, "rewards/chosen": -1.8248558044433594, "rewards/margins": 4.521440029144287, "rewards/rejected": -6.3462958335876465, "step": 1421 }, { "epoch": 2.45, "learning_rate": 4.0448363790905227e-07, "logits/chosen": -2.0945372581481934, "logits/rejected": -2.067214012145996, "logps/chosen": -91.81686401367188, "logps/rejected": -172.5318603515625, "loss": 0.0805, "rewards/accuracies": 1.0, "rewards/chosen": -0.2925505042076111, "rewards/margins": 7.769593238830566, "rewards/rejected": -8.06214427947998, "step": 1422 }, { "epoch": 2.45, "learning_rate": 4.0437739056523587e-07, "logits/chosen": -2.4579405784606934, "logits/rejected": -2.0443503856658936, "logps/chosen": -82.44844055175781, "logps/rejected": -136.75823974609375, "loss": 0.1757, "rewards/accuracies": 1.0, "rewards/chosen": -1.467904806137085, "rewards/margins": 4.998631954193115, "rewards/rejected": -6.466536521911621, "step": 1423 }, { "epoch": 2.45, "learning_rate": 4.042711432214194e-07, "logits/chosen": -1.8929945230484009, "logits/rejected": -2.4972925186157227, "logps/chosen": -99.29805755615234, "logps/rejected": -182.32386779785156, "loss": 0.2611, "rewards/accuracies": 1.0, "rewards/chosen": -2.227163791656494, "rewards/margins": 5.506774425506592, "rewards/rejected": -7.733938217163086, "step": 1424 }, { "epoch": 2.45, "learning_rate": 4.0416489587760306e-07, "logits/chosen": -2.343139171600342, "logits/rejected": -2.2304630279541016, "logps/chosen": -109.1771469116211, "logps/rejected": -170.30320739746094, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": -1.619676947593689, "rewards/margins": 5.582211017608643, "rewards/rejected": -7.201888561248779, "step": 1425 }, { "epoch": 2.45, "learning_rate": 4.040586485337866e-07, "logits/chosen": -2.074176073074341, "logits/rejected": -2.1237800121307373, "logps/chosen": -106.41098022460938, "logps/rejected": -133.8883819580078, "loss": 0.2542, "rewards/accuracies": 0.75, "rewards/chosen": -0.35133397579193115, "rewards/margins": 3.060887336730957, "rewards/rejected": -3.4122214317321777, "step": 1426 }, { "epoch": 2.46, "learning_rate": 4.0395240118997026e-07, "logits/chosen": -2.3092050552368164, "logits/rejected": -2.0041608810424805, "logps/chosen": -110.85514068603516, "logps/rejected": -148.19979858398438, "loss": 0.0335, "rewards/accuracies": 1.0, "rewards/chosen": -1.860222578048706, "rewards/margins": 3.556851863861084, "rewards/rejected": -5.417074680328369, "step": 1427 }, { "epoch": 2.46, "learning_rate": 4.0384615384615386e-07, "logits/chosen": -2.4979889392852783, "logits/rejected": -2.009981870651245, "logps/chosen": -99.15906524658203, "logps/rejected": -139.34619140625, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": -0.9694696664810181, "rewards/margins": 4.845679759979248, "rewards/rejected": -5.815149307250977, "step": 1428 }, { "epoch": 2.46, "learning_rate": 4.037399065023374e-07, "logits/chosen": -2.194211959838867, "logits/rejected": -2.0104470252990723, "logps/chosen": -86.40593719482422, "logps/rejected": -123.52144622802734, "loss": 0.0534, "rewards/accuracies": 1.0, "rewards/chosen": -1.3916337490081787, "rewards/margins": 4.827969551086426, "rewards/rejected": -6.219603538513184, "step": 1429 }, { "epoch": 2.46, "learning_rate": 4.0363365915852106e-07, "logits/chosen": -1.9461736679077148, "logits/rejected": -2.4326133728027344, "logps/chosen": -110.65933227539062, "logps/rejected": -165.74710083007812, "loss": 0.1303, "rewards/accuracies": 1.0, "rewards/chosen": -3.0239551067352295, "rewards/margins": 5.155107498168945, "rewards/rejected": -8.179062843322754, "step": 1430 }, { "epoch": 2.46, "learning_rate": 4.035274118147046e-07, "logits/chosen": -2.271453380584717, "logits/rejected": -1.9628218412399292, "logps/chosen": -130.75830078125, "logps/rejected": -167.5025634765625, "loss": 0.0691, "rewards/accuracies": 1.0, "rewards/chosen": -2.5511608123779297, "rewards/margins": 4.764674663543701, "rewards/rejected": -7.3158345222473145, "step": 1431 }, { "epoch": 2.46, "learning_rate": 4.034211644708882e-07, "logits/chosen": -2.205188751220703, "logits/rejected": -2.2006120681762695, "logps/chosen": -101.25448608398438, "logps/rejected": -156.29019165039062, "loss": 0.1471, "rewards/accuracies": 1.0, "rewards/chosen": -3.349734306335449, "rewards/margins": 4.074833869934082, "rewards/rejected": -7.424568176269531, "step": 1432 }, { "epoch": 2.47, "learning_rate": 4.0331491712707185e-07, "logits/chosen": -2.2604453563690186, "logits/rejected": -2.2646679878234863, "logps/chosen": -100.09766387939453, "logps/rejected": -166.28164672851562, "loss": 0.1359, "rewards/accuracies": 1.0, "rewards/chosen": -1.4900708198547363, "rewards/margins": 6.238841533660889, "rewards/rejected": -7.728912353515625, "step": 1433 }, { "epoch": 2.47, "learning_rate": 4.032086697832554e-07, "logits/chosen": -2.1801769733428955, "logits/rejected": -2.268923282623291, "logps/chosen": -89.22322082519531, "logps/rejected": -141.24314880371094, "loss": 0.228, "rewards/accuracies": 1.0, "rewards/chosen": -2.7460837364196777, "rewards/margins": 4.473294734954834, "rewards/rejected": -7.219378471374512, "step": 1434 }, { "epoch": 2.47, "learning_rate": 4.03102422439439e-07, "logits/chosen": -1.9947984218597412, "logits/rejected": -2.356055974960327, "logps/chosen": -96.0362777709961, "logps/rejected": -177.0572052001953, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": -1.1803656816482544, "rewards/margins": 6.364116668701172, "rewards/rejected": -7.5444817543029785, "step": 1435 }, { "epoch": 2.47, "learning_rate": 4.029961750956226e-07, "logits/chosen": -2.136659622192383, "logits/rejected": -2.2063825130462646, "logps/chosen": -108.6495361328125, "logps/rejected": -184.61984252929688, "loss": 0.1508, "rewards/accuracies": 1.0, "rewards/chosen": -3.4811649322509766, "rewards/margins": 7.054637908935547, "rewards/rejected": -10.535802841186523, "step": 1436 }, { "epoch": 2.47, "learning_rate": 4.028899277518062e-07, "logits/chosen": -2.215970754623413, "logits/rejected": -2.3873374462127686, "logps/chosen": -80.4319839477539, "logps/rejected": -137.92352294921875, "loss": 0.131, "rewards/accuracies": 1.0, "rewards/chosen": -1.0543212890625, "rewards/margins": 5.332652568817139, "rewards/rejected": -6.386973857879639, "step": 1437 }, { "epoch": 2.48, "learning_rate": 4.0278368040798974e-07, "logits/chosen": -2.2507777214050293, "logits/rejected": -1.865273356437683, "logps/chosen": -131.4549560546875, "logps/rejected": -191.6524658203125, "loss": 0.066, "rewards/accuracies": 1.0, "rewards/chosen": -3.351694107055664, "rewards/margins": 7.505782604217529, "rewards/rejected": -10.857477188110352, "step": 1438 }, { "epoch": 2.48, "learning_rate": 4.026774330641734e-07, "logits/chosen": -2.0935182571411133, "logits/rejected": -2.215069055557251, "logps/chosen": -117.28956604003906, "logps/rejected": -162.0199737548828, "loss": 0.0587, "rewards/accuracies": 1.0, "rewards/chosen": -3.5431628227233887, "rewards/margins": 4.450040817260742, "rewards/rejected": -7.993203639984131, "step": 1439 }, { "epoch": 2.48, "learning_rate": 4.02571185720357e-07, "logits/chosen": -2.1564836502075195, "logits/rejected": -2.071620225906372, "logps/chosen": -135.28146362304688, "logps/rejected": -176.1969757080078, "loss": 0.1307, "rewards/accuracies": 1.0, "rewards/chosen": -4.176107883453369, "rewards/margins": 3.6800308227539062, "rewards/rejected": -7.856139183044434, "step": 1440 }, { "epoch": 2.48, "learning_rate": 4.0246493837654054e-07, "logits/chosen": -2.058317184448242, "logits/rejected": -1.8498293161392212, "logps/chosen": -144.52420043945312, "logps/rejected": -195.94384765625, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -3.088353157043457, "rewards/margins": 6.65972375869751, "rewards/rejected": -9.748077392578125, "step": 1441 }, { "epoch": 2.48, "learning_rate": 4.023586910327242e-07, "logits/chosen": -2.155467987060547, "logits/rejected": -2.2836921215057373, "logps/chosen": -74.57518005371094, "logps/rejected": -170.5172119140625, "loss": 0.2884, "rewards/accuracies": 1.0, "rewards/chosen": -0.29248303174972534, "rewards/margins": 8.375567436218262, "rewards/rejected": -8.668050765991211, "step": 1442 }, { "epoch": 2.48, "learning_rate": 4.0225244368890774e-07, "logits/chosen": -2.0751166343688965, "logits/rejected": -2.450230121612549, "logps/chosen": -108.45079040527344, "logps/rejected": -204.67446899414062, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -2.173366069793701, "rewards/margins": 6.864555358886719, "rewards/rejected": -9.037921905517578, "step": 1443 }, { "epoch": 2.49, "learning_rate": 4.0214619634509134e-07, "logits/chosen": -2.1097452640533447, "logits/rejected": -2.3214807510375977, "logps/chosen": -83.96733093261719, "logps/rejected": -151.99815368652344, "loss": 0.1054, "rewards/accuracies": 1.0, "rewards/chosen": 0.22891847789287567, "rewards/margins": 7.587026596069336, "rewards/rejected": -7.3581085205078125, "step": 1444 }, { "epoch": 2.49, "learning_rate": 4.02039949001275e-07, "logits/chosen": -2.100522518157959, "logits/rejected": -2.156033754348755, "logps/chosen": -103.44198608398438, "logps/rejected": -143.4951171875, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": -1.5092384815216064, "rewards/margins": 3.313499927520752, "rewards/rejected": -4.822737693786621, "step": 1445 }, { "epoch": 2.49, "learning_rate": 4.0193370165745853e-07, "logits/chosen": -2.0861661434173584, "logits/rejected": -2.2586264610290527, "logps/chosen": -85.94527435302734, "logps/rejected": -152.72412109375, "loss": 0.0639, "rewards/accuracies": 1.0, "rewards/chosen": -0.5820847153663635, "rewards/margins": 6.4978132247924805, "rewards/rejected": -7.079897403717041, "step": 1446 }, { "epoch": 2.49, "learning_rate": 4.0182745431364213e-07, "logits/chosen": -2.13140869140625, "logits/rejected": -2.5125906467437744, "logps/chosen": -117.81513214111328, "logps/rejected": -156.22537231445312, "loss": 0.2614, "rewards/accuracies": 1.0, "rewards/chosen": -2.6174726486206055, "rewards/margins": 3.623891830444336, "rewards/rejected": -6.241364479064941, "step": 1447 }, { "epoch": 2.49, "learning_rate": 4.0172120696982573e-07, "logits/chosen": -2.3580174446105957, "logits/rejected": -2.2760047912597656, "logps/chosen": -111.6193618774414, "logps/rejected": -147.49884033203125, "loss": 0.1383, "rewards/accuracies": 1.0, "rewards/chosen": -2.9954428672790527, "rewards/margins": 2.8273329734802246, "rewards/rejected": -5.822775840759277, "step": 1448 }, { "epoch": 2.49, "learning_rate": 4.0161495962600933e-07, "logits/chosen": -2.1181325912475586, "logits/rejected": -2.1061041355133057, "logps/chosen": -93.82599639892578, "logps/rejected": -140.00213623046875, "loss": 0.3032, "rewards/accuracies": 1.0, "rewards/chosen": -1.8208848237991333, "rewards/margins": 3.4196736812591553, "rewards/rejected": -5.240558624267578, "step": 1449 }, { "epoch": 2.5, "learning_rate": 4.0150871228219293e-07, "logits/chosen": -2.1575682163238525, "logits/rejected": -2.3305470943450928, "logps/chosen": -107.4619140625, "logps/rejected": -191.91940307617188, "loss": 0.0806, "rewards/accuracies": 1.0, "rewards/chosen": -1.0897712707519531, "rewards/margins": 6.689585208892822, "rewards/rejected": -7.779356479644775, "step": 1450 }, { "epoch": 2.5, "learning_rate": 4.0140246493837653e-07, "logits/chosen": -2.24397611618042, "logits/rejected": -2.19089674949646, "logps/chosen": -136.997314453125, "logps/rejected": -165.04464721679688, "loss": 0.1128, "rewards/accuracies": 0.75, "rewards/chosen": -4.553738117218018, "rewards/margins": 2.5180470943450928, "rewards/rejected": -7.0717854499816895, "step": 1451 }, { "epoch": 2.5, "learning_rate": 4.0129621759456013e-07, "logits/chosen": -2.1403205394744873, "logits/rejected": -2.2002930641174316, "logps/chosen": -138.27728271484375, "logps/rejected": -197.7520294189453, "loss": 0.0501, "rewards/accuracies": 1.0, "rewards/chosen": -3.165760040283203, "rewards/margins": 5.703236103057861, "rewards/rejected": -8.868996620178223, "step": 1452 }, { "epoch": 2.5, "learning_rate": 4.011899702507437e-07, "logits/chosen": -2.228792190551758, "logits/rejected": -2.2484991550445557, "logps/chosen": -114.75696563720703, "logps/rejected": -169.34571838378906, "loss": 0.0682, "rewards/accuracies": 1.0, "rewards/chosen": -2.717853546142578, "rewards/margins": 5.558152675628662, "rewards/rejected": -8.276006698608398, "step": 1453 }, { "epoch": 2.5, "learning_rate": 4.010837229069273e-07, "logits/chosen": -2.0907130241394043, "logits/rejected": -2.331620931625366, "logps/chosen": -104.91239929199219, "logps/rejected": -170.8029327392578, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": -2.1476101875305176, "rewards/margins": 5.041867733001709, "rewards/rejected": -7.189477920532227, "step": 1454 }, { "epoch": 2.5, "learning_rate": 4.009774755631109e-07, "logits/chosen": -2.3127009868621826, "logits/rejected": -2.2765145301818848, "logps/chosen": -94.67374420166016, "logps/rejected": -148.34344482421875, "loss": 0.0728, "rewards/accuracies": 1.0, "rewards/chosen": -1.842392921447754, "rewards/margins": 4.863807678222656, "rewards/rejected": -6.70620059967041, "step": 1455 }, { "epoch": 2.51, "learning_rate": 4.0087122821929447e-07, "logits/chosen": -2.250873565673828, "logits/rejected": -2.472132921218872, "logps/chosen": -93.01802825927734, "logps/rejected": -181.32345581054688, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": -0.9580489993095398, "rewards/margins": 6.487701892852783, "rewards/rejected": -7.445751190185547, "step": 1456 }, { "epoch": 2.51, "learning_rate": 4.007649808754781e-07, "logits/chosen": -2.335092067718506, "logits/rejected": -2.200079917907715, "logps/chosen": -92.34793853759766, "logps/rejected": -125.38117980957031, "loss": 0.1845, "rewards/accuracies": 0.75, "rewards/chosen": -2.632587432861328, "rewards/margins": 3.48817777633667, "rewards/rejected": -6.120765686035156, "step": 1457 }, { "epoch": 2.51, "learning_rate": 4.0065873353166167e-07, "logits/chosen": -2.376741409301758, "logits/rejected": -2.373018264770508, "logps/chosen": -90.76918029785156, "logps/rejected": -116.6033706665039, "loss": 0.0667, "rewards/accuracies": 1.0, "rewards/chosen": 0.014528840780258179, "rewards/margins": 2.8982443809509277, "rewards/rejected": -2.8837156295776367, "step": 1458 }, { "epoch": 2.51, "learning_rate": 4.0055248618784527e-07, "logits/chosen": -1.8075158596038818, "logits/rejected": -2.3058319091796875, "logps/chosen": -87.97276306152344, "logps/rejected": -141.24737548828125, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": -1.323129653930664, "rewards/margins": 3.8604612350463867, "rewards/rejected": -5.183590888977051, "step": 1459 }, { "epoch": 2.51, "learning_rate": 4.004462388440289e-07, "logits/chosen": -2.1444056034088135, "logits/rejected": -1.9784002304077148, "logps/chosen": -114.67735290527344, "logps/rejected": -184.2361297607422, "loss": 0.1546, "rewards/accuracies": 1.0, "rewards/chosen": -2.265392541885376, "rewards/margins": 7.005918502807617, "rewards/rejected": -9.271310806274414, "step": 1460 }, { "epoch": 2.51, "learning_rate": 4.0033999150021247e-07, "logits/chosen": -2.0585079193115234, "logits/rejected": -2.4138689041137695, "logps/chosen": -72.15892028808594, "logps/rejected": -139.98130798339844, "loss": 0.0853, "rewards/accuracies": 1.0, "rewards/chosen": -1.4155961275100708, "rewards/margins": 5.533729553222656, "rewards/rejected": -6.9493255615234375, "step": 1461 }, { "epoch": 2.52, "learning_rate": 4.0023374415639606e-07, "logits/chosen": -2.2504372596740723, "logits/rejected": -2.3475890159606934, "logps/chosen": -91.05729675292969, "logps/rejected": -136.73341369628906, "loss": 0.1635, "rewards/accuracies": 1.0, "rewards/chosen": -1.1600093841552734, "rewards/margins": 4.759558200836182, "rewards/rejected": -5.919567584991455, "step": 1462 }, { "epoch": 2.52, "learning_rate": 4.0012749681257966e-07, "logits/chosen": -2.1385984420776367, "logits/rejected": -1.9195897579193115, "logps/chosen": -101.13926696777344, "logps/rejected": -141.55596923828125, "loss": 0.139, "rewards/accuracies": 1.0, "rewards/chosen": -1.506148338317871, "rewards/margins": 5.405505657196045, "rewards/rejected": -6.911653995513916, "step": 1463 }, { "epoch": 2.52, "learning_rate": 4.0002124946876326e-07, "logits/chosen": -2.4632692337036133, "logits/rejected": -2.0937764644622803, "logps/chosen": -108.18611145019531, "logps/rejected": -135.67477416992188, "loss": 0.2627, "rewards/accuracies": 0.75, "rewards/chosen": -1.8139442205429077, "rewards/margins": 3.0459048748016357, "rewards/rejected": -4.859849452972412, "step": 1464 }, { "epoch": 2.52, "learning_rate": 3.999150021249468e-07, "logits/chosen": -2.2557694911956787, "logits/rejected": -2.2968037128448486, "logps/chosen": -98.2730941772461, "logps/rejected": -175.91049194335938, "loss": 0.1375, "rewards/accuracies": 1.0, "rewards/chosen": -0.6162880063056946, "rewards/margins": 7.590511322021484, "rewards/rejected": -8.206799507141113, "step": 1465 }, { "epoch": 2.52, "learning_rate": 3.9980875478113046e-07, "logits/chosen": -2.2283406257629395, "logits/rejected": -2.043456554412842, "logps/chosen": -89.2452392578125, "logps/rejected": -154.9536895751953, "loss": 0.1905, "rewards/accuracies": 1.0, "rewards/chosen": -1.2924323081970215, "rewards/margins": 5.8410139083862305, "rewards/rejected": -7.1334452629089355, "step": 1466 }, { "epoch": 2.52, "learning_rate": 3.9970250743731406e-07, "logits/chosen": -2.332223892211914, "logits/rejected": -2.0331969261169434, "logps/chosen": -120.7766342163086, "logps/rejected": -152.4354705810547, "loss": 0.1048, "rewards/accuracies": 0.75, "rewards/chosen": -2.270951747894287, "rewards/margins": 5.450857162475586, "rewards/rejected": -7.721809387207031, "step": 1467 }, { "epoch": 2.53, "learning_rate": 3.995962600934976e-07, "logits/chosen": -2.4314606189727783, "logits/rejected": -2.176668167114258, "logps/chosen": -95.7289047241211, "logps/rejected": -141.18048095703125, "loss": 0.1081, "rewards/accuracies": 1.0, "rewards/chosen": -0.7815883159637451, "rewards/margins": 5.210804462432861, "rewards/rejected": -5.9923930168151855, "step": 1468 }, { "epoch": 2.53, "learning_rate": 3.9949001274968126e-07, "logits/chosen": -1.7949670553207397, "logits/rejected": -2.3881349563598633, "logps/chosen": -71.68804931640625, "logps/rejected": -127.85591888427734, "loss": 0.121, "rewards/accuracies": 1.0, "rewards/chosen": -0.9227635264396667, "rewards/margins": 2.823793411254883, "rewards/rejected": -3.7465567588806152, "step": 1469 }, { "epoch": 2.53, "learning_rate": 3.993837654058648e-07, "logits/chosen": -2.2597453594207764, "logits/rejected": -1.9352155923843384, "logps/chosen": -101.57947540283203, "logps/rejected": -144.9090576171875, "loss": 0.0857, "rewards/accuracies": 1.0, "rewards/chosen": -0.8662495017051697, "rewards/margins": 6.227016925811768, "rewards/rejected": -7.09326696395874, "step": 1470 }, { "epoch": 2.53, "learning_rate": 3.9927751806204845e-07, "logits/chosen": -2.3174889087677, "logits/rejected": -2.249206304550171, "logps/chosen": -97.20989990234375, "logps/rejected": -122.31558227539062, "loss": 0.238, "rewards/accuracies": 1.0, "rewards/chosen": -0.8028678894042969, "rewards/margins": 4.064182758331299, "rewards/rejected": -4.867050647735596, "step": 1471 }, { "epoch": 2.53, "learning_rate": 3.9917127071823205e-07, "logits/chosen": -2.247748851776123, "logits/rejected": -2.154933214187622, "logps/chosen": -79.86813354492188, "logps/rejected": -131.745361328125, "loss": 0.1146, "rewards/accuracies": 1.0, "rewards/chosen": -1.2452361583709717, "rewards/margins": 5.320592403411865, "rewards/rejected": -6.565828323364258, "step": 1472 }, { "epoch": 2.54, "learning_rate": 3.990650233744156e-07, "logits/chosen": -2.3583555221557617, "logits/rejected": -2.203270435333252, "logps/chosen": -97.53875732421875, "logps/rejected": -127.10296630859375, "loss": 0.0766, "rewards/accuracies": 1.0, "rewards/chosen": -0.8806244730949402, "rewards/margins": 4.404119968414307, "rewards/rejected": -5.2847442626953125, "step": 1473 }, { "epoch": 2.54, "learning_rate": 3.9895877603059925e-07, "logits/chosen": -2.296954393386841, "logits/rejected": -2.1585445404052734, "logps/chosen": -97.75959014892578, "logps/rejected": -172.232177734375, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": 0.18262197077274323, "rewards/margins": 7.0833587646484375, "rewards/rejected": -6.9007368087768555, "step": 1474 }, { "epoch": 2.54, "learning_rate": 3.988525286867828e-07, "logits/chosen": -2.3506147861480713, "logits/rejected": -1.830701470375061, "logps/chosen": -100.47901916503906, "logps/rejected": -137.82313537597656, "loss": 0.102, "rewards/accuracies": 0.75, "rewards/chosen": -0.898531973361969, "rewards/margins": 5.947442054748535, "rewards/rejected": -6.845973968505859, "step": 1475 }, { "epoch": 2.54, "learning_rate": 3.987462813429664e-07, "logits/chosen": -2.38507080078125, "logits/rejected": -2.408252716064453, "logps/chosen": -110.18766784667969, "logps/rejected": -138.73353576660156, "loss": 0.1426, "rewards/accuracies": 1.0, "rewards/chosen": -1.7806519269943237, "rewards/margins": 2.4935033321380615, "rewards/rejected": -4.274155139923096, "step": 1476 }, { "epoch": 2.54, "learning_rate": 3.9864003399915005e-07, "logits/chosen": -2.3012290000915527, "logits/rejected": -2.036770820617676, "logps/chosen": -82.3372573852539, "logps/rejected": -135.58221435546875, "loss": 0.0735, "rewards/accuracies": 0.75, "rewards/chosen": -0.7219614386558533, "rewards/margins": 6.298280239105225, "rewards/rejected": -7.020242214202881, "step": 1477 }, { "epoch": 2.54, "learning_rate": 3.985337866553336e-07, "logits/chosen": -2.1892409324645996, "logits/rejected": -2.341306447982788, "logps/chosen": -91.72459411621094, "logps/rejected": -156.443115234375, "loss": 0.0692, "rewards/accuracies": 1.0, "rewards/chosen": -0.7617965936660767, "rewards/margins": 5.459533214569092, "rewards/rejected": -6.221329689025879, "step": 1478 }, { "epoch": 2.55, "learning_rate": 3.984275393115172e-07, "logits/chosen": -1.969095230102539, "logits/rejected": -2.335240602493286, "logps/chosen": -100.6473617553711, "logps/rejected": -166.04042053222656, "loss": 0.1161, "rewards/accuracies": 0.75, "rewards/chosen": -0.7061246633529663, "rewards/margins": 3.977705478668213, "rewards/rejected": -4.6838297843933105, "step": 1479 }, { "epoch": 2.55, "learning_rate": 3.983212919677008e-07, "logits/chosen": -2.2505993843078613, "logits/rejected": -2.343902349472046, "logps/chosen": -90.64968872070312, "logps/rejected": -134.29859924316406, "loss": 0.1511, "rewards/accuracies": 1.0, "rewards/chosen": -1.3178486824035645, "rewards/margins": 3.9569597244262695, "rewards/rejected": -5.274808883666992, "step": 1480 }, { "epoch": 2.55, "learning_rate": 3.982150446238844e-07, "logits/chosen": -2.1730425357818604, "logits/rejected": -1.795020341873169, "logps/chosen": -111.26315307617188, "logps/rejected": -142.90817260742188, "loss": 0.0475, "rewards/accuracies": 1.0, "rewards/chosen": -2.414729118347168, "rewards/margins": 5.441390514373779, "rewards/rejected": -7.856119155883789, "step": 1481 }, { "epoch": 2.55, "learning_rate": 3.98108797280068e-07, "logits/chosen": -2.4119129180908203, "logits/rejected": -1.9810526371002197, "logps/chosen": -99.03810119628906, "logps/rejected": -159.35153198242188, "loss": 0.159, "rewards/accuracies": 1.0, "rewards/chosen": -1.5007193088531494, "rewards/margins": 6.171173095703125, "rewards/rejected": -7.671893119812012, "step": 1482 }, { "epoch": 2.55, "learning_rate": 3.980025499362516e-07, "logits/chosen": -2.411440849304199, "logits/rejected": -2.122300148010254, "logps/chosen": -99.65785217285156, "logps/rejected": -158.1924285888672, "loss": 0.0946, "rewards/accuracies": 1.0, "rewards/chosen": 0.2584589123725891, "rewards/margins": 6.213692665100098, "rewards/rejected": -5.955233573913574, "step": 1483 }, { "epoch": 2.55, "learning_rate": 3.978963025924352e-07, "logits/chosen": -2.251786231994629, "logits/rejected": -2.3289952278137207, "logps/chosen": -105.49089050292969, "logps/rejected": -190.11790466308594, "loss": 0.1607, "rewards/accuracies": 1.0, "rewards/chosen": -2.1235718727111816, "rewards/margins": 7.130681037902832, "rewards/rejected": -9.254252433776855, "step": 1484 }, { "epoch": 2.56, "learning_rate": 3.9779005524861873e-07, "logits/chosen": -2.2702627182006836, "logits/rejected": -2.417112112045288, "logps/chosen": -78.9449234008789, "logps/rejected": -133.07440185546875, "loss": 0.1066, "rewards/accuracies": 1.0, "rewards/chosen": -0.5352428555488586, "rewards/margins": 5.057366371154785, "rewards/rejected": -5.59260892868042, "step": 1485 }, { "epoch": 2.56, "learning_rate": 3.976838079048024e-07, "logits/chosen": -1.9427380561828613, "logits/rejected": -2.2774693965911865, "logps/chosen": -98.06503295898438, "logps/rejected": -140.62510681152344, "loss": 0.1748, "rewards/accuracies": 1.0, "rewards/chosen": -1.6957021951675415, "rewards/margins": 3.088545322418213, "rewards/rejected": -4.784247398376465, "step": 1486 }, { "epoch": 2.56, "learning_rate": 3.97577560560986e-07, "logits/chosen": -2.348179578781128, "logits/rejected": -2.0731663703918457, "logps/chosen": -85.82085418701172, "logps/rejected": -121.84253692626953, "loss": 0.117, "rewards/accuracies": 0.75, "rewards/chosen": -2.4282522201538086, "rewards/margins": 4.009126663208008, "rewards/rejected": -6.437378883361816, "step": 1487 }, { "epoch": 2.56, "learning_rate": 3.9747131321716953e-07, "logits/chosen": -2.1603784561157227, "logits/rejected": -2.5169527530670166, "logps/chosen": -80.063232421875, "logps/rejected": -138.48255920410156, "loss": 0.2174, "rewards/accuracies": 1.0, "rewards/chosen": 0.0027857720851898193, "rewards/margins": 3.5309410095214844, "rewards/rejected": -3.5281548500061035, "step": 1488 }, { "epoch": 2.56, "learning_rate": 3.973650658733532e-07, "logits/chosen": -2.0556395053863525, "logits/rejected": -2.153106212615967, "logps/chosen": -80.47093200683594, "logps/rejected": -164.1299285888672, "loss": 0.0777, "rewards/accuracies": 1.0, "rewards/chosen": -0.3520548939704895, "rewards/margins": 7.406871795654297, "rewards/rejected": -7.7589263916015625, "step": 1489 }, { "epoch": 2.56, "learning_rate": 3.9725881852953673e-07, "logits/chosen": -2.209118127822876, "logits/rejected": -2.09228777885437, "logps/chosen": -84.09473419189453, "logps/rejected": -126.25240325927734, "loss": 0.1463, "rewards/accuracies": 1.0, "rewards/chosen": -0.12956354022026062, "rewards/margins": 5.873762607574463, "rewards/rejected": -6.003326416015625, "step": 1490 }, { "epoch": 2.57, "learning_rate": 3.9715257118572033e-07, "logits/chosen": -2.209057569503784, "logits/rejected": -2.214524030685425, "logps/chosen": -102.23658752441406, "logps/rejected": -132.74905395507812, "loss": 0.0831, "rewards/accuracies": 1.0, "rewards/chosen": -1.605438470840454, "rewards/margins": 3.29152250289917, "rewards/rejected": -4.896961212158203, "step": 1491 }, { "epoch": 2.57, "learning_rate": 3.970463238419039e-07, "logits/chosen": -2.4002442359924316, "logits/rejected": -1.9447264671325684, "logps/chosen": -100.77765655517578, "logps/rejected": -142.76638793945312, "loss": 0.1999, "rewards/accuracies": 1.0, "rewards/chosen": -0.3855116069316864, "rewards/margins": 4.74205207824707, "rewards/rejected": -5.127563953399658, "step": 1492 }, { "epoch": 2.57, "learning_rate": 3.969400764980875e-07, "logits/chosen": -2.076631546020508, "logits/rejected": -2.2711212635040283, "logps/chosen": -92.13514709472656, "logps/rejected": -136.83029174804688, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -1.470824956893921, "rewards/margins": 3.5090043544769287, "rewards/rejected": -4.97982931137085, "step": 1493 }, { "epoch": 2.57, "learning_rate": 3.968338291542711e-07, "logits/chosen": -2.0776045322418213, "logits/rejected": -2.207733154296875, "logps/chosen": -115.62944030761719, "logps/rejected": -139.9364776611328, "loss": 0.2087, "rewards/accuracies": 0.75, "rewards/chosen": -1.844578742980957, "rewards/margins": 1.7093994617462158, "rewards/rejected": -3.553978443145752, "step": 1494 }, { "epoch": 2.57, "learning_rate": 3.967275818104547e-07, "logits/chosen": -2.187067747116089, "logits/rejected": -2.3892781734466553, "logps/chosen": -106.09552001953125, "logps/rejected": -146.20416259765625, "loss": 0.2461, "rewards/accuracies": 0.75, "rewards/chosen": -1.6455577611923218, "rewards/margins": 3.5243492126464844, "rewards/rejected": -5.169907093048096, "step": 1495 }, { "epoch": 2.57, "learning_rate": 3.966213344666383e-07, "logits/chosen": -2.2143630981445312, "logits/rejected": -2.0954928398132324, "logps/chosen": -80.16226959228516, "logps/rejected": -112.8887939453125, "loss": 0.0714, "rewards/accuracies": 1.0, "rewards/chosen": -0.8320879936218262, "rewards/margins": 3.8352952003479004, "rewards/rejected": -4.667383670806885, "step": 1496 }, { "epoch": 2.58, "learning_rate": 3.9651508712282187e-07, "logits/chosen": -2.267533779144287, "logits/rejected": -2.1064388751983643, "logps/chosen": -86.38863372802734, "logps/rejected": -138.7740020751953, "loss": 0.0642, "rewards/accuracies": 1.0, "rewards/chosen": -0.6042488217353821, "rewards/margins": 4.936997413635254, "rewards/rejected": -5.54124641418457, "step": 1497 }, { "epoch": 2.58, "learning_rate": 3.964088397790055e-07, "logits/chosen": -2.2747693061828613, "logits/rejected": -2.160346508026123, "logps/chosen": -99.9058837890625, "logps/rejected": -166.99827575683594, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": -0.663243293762207, "rewards/margins": 6.433696746826172, "rewards/rejected": -7.096940040588379, "step": 1498 }, { "epoch": 2.58, "learning_rate": 3.963025924351891e-07, "logits/chosen": -2.2921743392944336, "logits/rejected": -2.1533565521240234, "logps/chosen": -96.43049621582031, "logps/rejected": -143.75592041015625, "loss": 0.0539, "rewards/accuracies": 1.0, "rewards/chosen": -0.08106344938278198, "rewards/margins": 4.649527072906494, "rewards/rejected": -4.730589866638184, "step": 1499 }, { "epoch": 2.58, "learning_rate": 3.9619634509137266e-07, "logits/chosen": -2.089306116104126, "logits/rejected": -2.214506149291992, "logps/chosen": -99.05467224121094, "logps/rejected": -171.7582244873047, "loss": 0.0805, "rewards/accuracies": 1.0, "rewards/chosen": -1.0302590131759644, "rewards/margins": 5.858978271484375, "rewards/rejected": -6.889237403869629, "step": 1500 }, { "epoch": 2.58, "learning_rate": 3.960900977475563e-07, "logits/chosen": -2.0935182571411133, "logits/rejected": -2.439350128173828, "logps/chosen": -71.15904998779297, "logps/rejected": -132.403076171875, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": -0.7988255023956299, "rewards/margins": 3.6063709259033203, "rewards/rejected": -4.405196666717529, "step": 1501 }, { "epoch": 2.59, "learning_rate": 3.9598385040373986e-07, "logits/chosen": -2.2078776359558105, "logits/rejected": -1.982342004776001, "logps/chosen": -115.30487060546875, "logps/rejected": -134.92068481445312, "loss": 0.071, "rewards/accuracies": 1.0, "rewards/chosen": -1.8546991348266602, "rewards/margins": 2.8847267627716064, "rewards/rejected": -4.7394256591796875, "step": 1502 }, { "epoch": 2.59, "learning_rate": 3.9587760305992346e-07, "logits/chosen": -2.198549747467041, "logits/rejected": -2.240724563598633, "logps/chosen": -105.60176849365234, "logps/rejected": -172.776123046875, "loss": 0.066, "rewards/accuracies": 1.0, "rewards/chosen": -0.8649660348892212, "rewards/margins": 5.791354179382324, "rewards/rejected": -6.656320095062256, "step": 1503 }, { "epoch": 2.59, "learning_rate": 3.957713557161071e-07, "logits/chosen": -2.109353542327881, "logits/rejected": -2.2288804054260254, "logps/chosen": -78.57475280761719, "logps/rejected": -150.74172973632812, "loss": 0.228, "rewards/accuracies": 1.0, "rewards/chosen": -1.405853271484375, "rewards/margins": 6.4960150718688965, "rewards/rejected": -7.9018683433532715, "step": 1504 }, { "epoch": 2.59, "learning_rate": 3.9566510837229066e-07, "logits/chosen": -2.1614012718200684, "logits/rejected": -2.005516290664673, "logps/chosen": -105.93031311035156, "logps/rejected": -159.27825927734375, "loss": 0.2916, "rewards/accuracies": 1.0, "rewards/chosen": -0.858241856098175, "rewards/margins": 5.9153547286987305, "rewards/rejected": -6.77359676361084, "step": 1505 }, { "epoch": 2.59, "learning_rate": 3.9555886102847426e-07, "logits/chosen": -2.0290908813476562, "logits/rejected": -2.0852465629577637, "logps/chosen": -92.75419616699219, "logps/rejected": -139.00209045410156, "loss": 0.1147, "rewards/accuracies": 1.0, "rewards/chosen": -0.7804844379425049, "rewards/margins": 6.020548343658447, "rewards/rejected": -6.801032543182373, "step": 1506 }, { "epoch": 2.59, "learning_rate": 3.9545261368465786e-07, "logits/chosen": -2.3073179721832275, "logits/rejected": -2.288114547729492, "logps/chosen": -96.18301391601562, "logps/rejected": -183.26031494140625, "loss": 0.0659, "rewards/accuracies": 1.0, "rewards/chosen": -1.9587063789367676, "rewards/margins": 7.971615791320801, "rewards/rejected": -9.930322647094727, "step": 1507 }, { "epoch": 2.6, "learning_rate": 3.9534636634084145e-07, "logits/chosen": -1.8667140007019043, "logits/rejected": -2.4483604431152344, "logps/chosen": -81.69085693359375, "logps/rejected": -170.97213745117188, "loss": 0.1372, "rewards/accuracies": 1.0, "rewards/chosen": -0.6362564563751221, "rewards/margins": 5.058969974517822, "rewards/rejected": -5.695226669311523, "step": 1508 }, { "epoch": 2.6, "learning_rate": 3.9524011899702505e-07, "logits/chosen": -2.269371747970581, "logits/rejected": -2.334467887878418, "logps/chosen": -88.13530731201172, "logps/rejected": -136.34829711914062, "loss": 0.1045, "rewards/accuracies": 0.75, "rewards/chosen": -1.7495485544204712, "rewards/margins": 3.705301284790039, "rewards/rejected": -5.454849720001221, "step": 1509 }, { "epoch": 2.6, "learning_rate": 3.9513387165320865e-07, "logits/chosen": -2.188479423522949, "logits/rejected": -1.8604707717895508, "logps/chosen": -102.82194519042969, "logps/rejected": -128.92857360839844, "loss": 0.138, "rewards/accuracies": 0.75, "rewards/chosen": -1.268310785293579, "rewards/margins": 2.8914380073547363, "rewards/rejected": -4.1597490310668945, "step": 1510 }, { "epoch": 2.6, "learning_rate": 3.9502762430939225e-07, "logits/chosen": -2.0660629272460938, "logits/rejected": -2.369313955307007, "logps/chosen": -72.52122497558594, "logps/rejected": -139.2060546875, "loss": 0.0776, "rewards/accuracies": 1.0, "rewards/chosen": -2.1385653018951416, "rewards/margins": 5.362400054931641, "rewards/rejected": -7.5009660720825195, "step": 1511 }, { "epoch": 2.6, "learning_rate": 3.9492137696557585e-07, "logits/chosen": -2.0597429275512695, "logits/rejected": -2.215998649597168, "logps/chosen": -80.35243225097656, "logps/rejected": -195.7925567626953, "loss": 0.0884, "rewards/accuracies": 1.0, "rewards/chosen": -0.46790969371795654, "rewards/margins": 9.60801887512207, "rewards/rejected": -10.075928688049316, "step": 1512 }, { "epoch": 2.6, "learning_rate": 3.9481512962175945e-07, "logits/chosen": -2.304598331451416, "logits/rejected": -2.037964105606079, "logps/chosen": -99.96014404296875, "logps/rejected": -120.56884765625, "loss": 0.0935, "rewards/accuracies": 0.75, "rewards/chosen": -1.017856478691101, "rewards/margins": 3.0265064239501953, "rewards/rejected": -4.044363021850586, "step": 1513 }, { "epoch": 2.61, "learning_rate": 3.9470888227794305e-07, "logits/chosen": -2.35892915725708, "logits/rejected": -1.947750210762024, "logps/chosen": -116.9488296508789, "logps/rejected": -149.98976135253906, "loss": 0.0598, "rewards/accuracies": 1.0, "rewards/chosen": -2.6419947147369385, "rewards/margins": 3.829512119293213, "rewards/rejected": -6.471506595611572, "step": 1514 }, { "epoch": 2.61, "learning_rate": 3.9460263493412665e-07, "logits/chosen": -2.228747844696045, "logits/rejected": -2.230125665664673, "logps/chosen": -103.27947998046875, "logps/rejected": -180.1077880859375, "loss": 0.0766, "rewards/accuracies": 1.0, "rewards/chosen": -0.7721778750419617, "rewards/margins": 8.210708618164062, "rewards/rejected": -8.98288631439209, "step": 1515 }, { "epoch": 2.61, "learning_rate": 3.9449638759031025e-07, "logits/chosen": -2.0456271171569824, "logits/rejected": -2.277085065841675, "logps/chosen": -82.22108459472656, "logps/rejected": -150.46304321289062, "loss": 0.1064, "rewards/accuracies": 1.0, "rewards/chosen": -1.0199707746505737, "rewards/margins": 5.808768272399902, "rewards/rejected": -6.828739166259766, "step": 1516 }, { "epoch": 2.61, "learning_rate": 3.943901402464938e-07, "logits/chosen": -1.5835150480270386, "logits/rejected": -2.5318610668182373, "logps/chosen": -78.5367660522461, "logps/rejected": -160.5211639404297, "loss": 0.1516, "rewards/accuracies": 1.0, "rewards/chosen": -0.537682831287384, "rewards/margins": 5.310128211975098, "rewards/rejected": -5.847811222076416, "step": 1517 }, { "epoch": 2.61, "learning_rate": 3.9428389290267744e-07, "logits/chosen": -2.3256478309631348, "logits/rejected": -2.431426763534546, "logps/chosen": -98.75696563720703, "logps/rejected": -173.1877899169922, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -0.6840547323226929, "rewards/margins": 6.496614456176758, "rewards/rejected": -7.18066930770874, "step": 1518 }, { "epoch": 2.61, "learning_rate": 3.94177645558861e-07, "logits/chosen": -2.44427227973938, "logits/rejected": -2.27447247505188, "logps/chosen": -132.70761108398438, "logps/rejected": -120.16642761230469, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": -0.9974209070205688, "rewards/margins": 4.073351860046387, "rewards/rejected": -5.070772647857666, "step": 1519 }, { "epoch": 2.62, "learning_rate": 3.940713982150446e-07, "logits/chosen": -1.9435772895812988, "logits/rejected": -2.3881068229675293, "logps/chosen": -63.544315338134766, "logps/rejected": -144.67831420898438, "loss": 0.1524, "rewards/accuracies": 1.0, "rewards/chosen": 0.3959552049636841, "rewards/margins": 6.5885467529296875, "rewards/rejected": -6.192591667175293, "step": 1520 }, { "epoch": 2.62, "learning_rate": 3.9396515087122824e-07, "logits/chosen": -2.1116926670074463, "logits/rejected": -2.3472511768341064, "logps/chosen": -89.2000961303711, "logps/rejected": -166.44149780273438, "loss": 0.1843, "rewards/accuracies": 1.0, "rewards/chosen": -1.4954359531402588, "rewards/margins": 6.6695356369018555, "rewards/rejected": -8.164972305297852, "step": 1521 }, { "epoch": 2.62, "learning_rate": 3.938589035274118e-07, "logits/chosen": -1.8418700695037842, "logits/rejected": -2.1311917304992676, "logps/chosen": -97.35033416748047, "logps/rejected": -206.45184326171875, "loss": 0.163, "rewards/accuracies": 0.75, "rewards/chosen": -2.5823161602020264, "rewards/margins": 6.849366188049316, "rewards/rejected": -9.431682586669922, "step": 1522 }, { "epoch": 2.62, "learning_rate": 3.937526561835954e-07, "logits/chosen": -2.24505877494812, "logits/rejected": -2.0570263862609863, "logps/chosen": -108.63600158691406, "logps/rejected": -135.949462890625, "loss": 0.0667, "rewards/accuracies": 1.0, "rewards/chosen": -2.5821104049682617, "rewards/margins": 3.853942394256592, "rewards/rejected": -6.436052322387695, "step": 1523 }, { "epoch": 2.62, "learning_rate": 3.93646408839779e-07, "logits/chosen": -2.030355453491211, "logits/rejected": -2.0233263969421387, "logps/chosen": -112.576904296875, "logps/rejected": -115.50825500488281, "loss": 0.2, "rewards/accuracies": 0.75, "rewards/chosen": -2.9020776748657227, "rewards/margins": 1.4396426677703857, "rewards/rejected": -4.3417205810546875, "step": 1524 }, { "epoch": 2.62, "learning_rate": 3.935401614959626e-07, "logits/chosen": -2.244724750518799, "logits/rejected": -2.151669979095459, "logps/chosen": -100.18148803710938, "logps/rejected": -141.5711669921875, "loss": 0.0758, "rewards/accuracies": 1.0, "rewards/chosen": -1.4970035552978516, "rewards/margins": 4.612265110015869, "rewards/rejected": -6.109268665313721, "step": 1525 }, { "epoch": 2.63, "learning_rate": 3.934339141521462e-07, "logits/chosen": -2.445509195327759, "logits/rejected": -1.9452714920043945, "logps/chosen": -114.90777587890625, "logps/rejected": -114.68944549560547, "loss": 0.1875, "rewards/accuracies": 0.75, "rewards/chosen": -1.8671789169311523, "rewards/margins": 2.2017297744750977, "rewards/rejected": -4.06890869140625, "step": 1526 }, { "epoch": 2.63, "learning_rate": 3.933276668083298e-07, "logits/chosen": -1.9386297464370728, "logits/rejected": -2.428170680999756, "logps/chosen": -114.73960876464844, "logps/rejected": -182.6976776123047, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": -1.919554352760315, "rewards/margins": 5.088068008422852, "rewards/rejected": -7.007622718811035, "step": 1527 }, { "epoch": 2.63, "learning_rate": 3.932214194645134e-07, "logits/chosen": -2.1091010570526123, "logits/rejected": -2.437429904937744, "logps/chosen": -86.6117935180664, "logps/rejected": -149.79490661621094, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": -0.701917290687561, "rewards/margins": 4.922990322113037, "rewards/rejected": -5.624907493591309, "step": 1528 }, { "epoch": 2.63, "learning_rate": 3.931151721206969e-07, "logits/chosen": -2.292119026184082, "logits/rejected": -1.9999263286590576, "logps/chosen": -97.46805572509766, "logps/rejected": -150.30242919921875, "loss": 0.203, "rewards/accuracies": 0.75, "rewards/chosen": -0.9614146947860718, "rewards/margins": 5.551846504211426, "rewards/rejected": -6.513260841369629, "step": 1529 }, { "epoch": 2.63, "learning_rate": 3.930089247768806e-07, "logits/chosen": -2.3253469467163086, "logits/rejected": -2.3438329696655273, "logps/chosen": -68.248046875, "logps/rejected": -129.14845275878906, "loss": 0.211, "rewards/accuracies": 1.0, "rewards/chosen": -0.044832028448581696, "rewards/margins": 6.376726150512695, "rewards/rejected": -6.421558380126953, "step": 1530 }, { "epoch": 2.64, "learning_rate": 3.929026774330642e-07, "logits/chosen": -2.300321102142334, "logits/rejected": -2.2666454315185547, "logps/chosen": -106.71550750732422, "logps/rejected": -138.6062774658203, "loss": 0.1326, "rewards/accuracies": 0.75, "rewards/chosen": -1.5424431562423706, "rewards/margins": 4.094493389129639, "rewards/rejected": -5.636936187744141, "step": 1531 }, { "epoch": 2.64, "learning_rate": 3.927964300892477e-07, "logits/chosen": -2.5130512714385986, "logits/rejected": -2.308288335800171, "logps/chosen": -97.2171630859375, "logps/rejected": -166.3577117919922, "loss": 0.1534, "rewards/accuracies": 1.0, "rewards/chosen": -0.3980598747730255, "rewards/margins": 8.166004180908203, "rewards/rejected": -8.564064025878906, "step": 1532 }, { "epoch": 2.64, "learning_rate": 3.926901827454314e-07, "logits/chosen": -2.1734466552734375, "logits/rejected": -2.2863504886627197, "logps/chosen": -84.75099182128906, "logps/rejected": -150.38150024414062, "loss": 0.0714, "rewards/accuracies": 1.0, "rewards/chosen": -0.673401951789856, "rewards/margins": 6.027099609375, "rewards/rejected": -6.700501441955566, "step": 1533 }, { "epoch": 2.64, "learning_rate": 3.925839354016149e-07, "logits/chosen": -2.097566604614258, "logits/rejected": -2.392686367034912, "logps/chosen": -82.42265319824219, "logps/rejected": -137.12718200683594, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": -1.8907114267349243, "rewards/margins": 4.262678146362305, "rewards/rejected": -6.1533894538879395, "step": 1534 }, { "epoch": 2.64, "learning_rate": 3.924776880577985e-07, "logits/chosen": -2.144895315170288, "logits/rejected": -2.195903778076172, "logps/chosen": -103.20610809326172, "logps/rejected": -169.66610717773438, "loss": 0.0992, "rewards/accuracies": 1.0, "rewards/chosen": -0.7978733777999878, "rewards/margins": 6.507529258728027, "rewards/rejected": -7.305402755737305, "step": 1535 }, { "epoch": 2.64, "learning_rate": 3.9237144071398217e-07, "logits/chosen": -2.307624101638794, "logits/rejected": -2.2727489471435547, "logps/chosen": -118.67683410644531, "logps/rejected": -149.91522216796875, "loss": 0.1598, "rewards/accuracies": 0.75, "rewards/chosen": -2.068821668624878, "rewards/margins": 3.136274814605713, "rewards/rejected": -5.205096244812012, "step": 1536 }, { "epoch": 2.65, "learning_rate": 3.922651933701657e-07, "logits/chosen": -1.966153621673584, "logits/rejected": -2.0870070457458496, "logps/chosen": -123.80333709716797, "logps/rejected": -160.02044677734375, "loss": 0.1159, "rewards/accuracies": 0.75, "rewards/chosen": -3.9223272800445557, "rewards/margins": 3.389286518096924, "rewards/rejected": -7.311614036560059, "step": 1537 }, { "epoch": 2.65, "learning_rate": 3.921589460263493e-07, "logits/chosen": -2.196131467819214, "logits/rejected": -2.0480895042419434, "logps/chosen": -108.18220520019531, "logps/rejected": -135.70948791503906, "loss": 0.051, "rewards/accuracies": 1.0, "rewards/chosen": -0.7196975946426392, "rewards/margins": 3.9736177921295166, "rewards/rejected": -4.693315505981445, "step": 1538 }, { "epoch": 2.65, "learning_rate": 3.920526986825329e-07, "logits/chosen": -2.241018772125244, "logits/rejected": -2.250396251678467, "logps/chosen": -97.30467224121094, "logps/rejected": -147.0255126953125, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": -1.3102056980133057, "rewards/margins": 5.4392805099487305, "rewards/rejected": -6.749485969543457, "step": 1539 }, { "epoch": 2.65, "learning_rate": 3.919464513387165e-07, "logits/chosen": -1.9190726280212402, "logits/rejected": -2.085080623626709, "logps/chosen": -106.84323120117188, "logps/rejected": -178.80593872070312, "loss": 0.156, "rewards/accuracies": 1.0, "rewards/chosen": -1.0319290161132812, "rewards/margins": 6.952126979827881, "rewards/rejected": -7.984055995941162, "step": 1540 }, { "epoch": 2.65, "learning_rate": 3.918402039949001e-07, "logits/chosen": -2.3273682594299316, "logits/rejected": -2.0984692573547363, "logps/chosen": -117.8534164428711, "logps/rejected": -177.4553985595703, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": -2.058072566986084, "rewards/margins": 6.221766948699951, "rewards/rejected": -8.279839515686035, "step": 1541 }, { "epoch": 2.65, "learning_rate": 3.917339566510837e-07, "logits/chosen": -2.304917573928833, "logits/rejected": -2.213890790939331, "logps/chosen": -112.79959106445312, "logps/rejected": -181.130859375, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": -1.0618482828140259, "rewards/margins": 7.096935272216797, "rewards/rejected": -8.158782958984375, "step": 1542 }, { "epoch": 2.66, "learning_rate": 3.916277093072673e-07, "logits/chosen": -2.1797361373901367, "logits/rejected": -1.9740195274353027, "logps/chosen": -101.36703491210938, "logps/rejected": -165.8983917236328, "loss": 0.0717, "rewards/accuracies": 1.0, "rewards/chosen": -0.5615265965461731, "rewards/margins": 7.23142147064209, "rewards/rejected": -7.792947769165039, "step": 1543 }, { "epoch": 2.66, "learning_rate": 3.9152146196345086e-07, "logits/chosen": -2.098494529724121, "logits/rejected": -2.0041375160217285, "logps/chosen": -133.70513916015625, "logps/rejected": -187.28057861328125, "loss": 0.1089, "rewards/accuracies": 1.0, "rewards/chosen": -1.7996320724487305, "rewards/margins": 7.914601802825928, "rewards/rejected": -9.714234352111816, "step": 1544 }, { "epoch": 2.66, "learning_rate": 3.914152146196345e-07, "logits/chosen": -2.1095590591430664, "logits/rejected": -2.4437499046325684, "logps/chosen": -89.3748779296875, "logps/rejected": -167.2376251220703, "loss": 0.0889, "rewards/accuracies": 1.0, "rewards/chosen": -1.490951657295227, "rewards/margins": 6.1850175857543945, "rewards/rejected": -7.675969123840332, "step": 1545 }, { "epoch": 2.66, "learning_rate": 3.9130896727581805e-07, "logits/chosen": -2.4294166564941406, "logits/rejected": -2.0170183181762695, "logps/chosen": -127.22144317626953, "logps/rejected": -185.94711303710938, "loss": 0.0753, "rewards/accuracies": 1.0, "rewards/chosen": -1.7701539993286133, "rewards/margins": 7.486708164215088, "rewards/rejected": -9.256861686706543, "step": 1546 }, { "epoch": 2.66, "learning_rate": 3.9120271993200165e-07, "logits/chosen": -1.9977656602859497, "logits/rejected": -2.437450408935547, "logps/chosen": -90.25751495361328, "logps/rejected": -158.24072265625, "loss": 0.1613, "rewards/accuracies": 1.0, "rewards/chosen": -1.715458631515503, "rewards/margins": 6.102776527404785, "rewards/rejected": -7.818235397338867, "step": 1547 }, { "epoch": 2.66, "learning_rate": 3.910964725881853e-07, "logits/chosen": -2.3477678298950195, "logits/rejected": -2.0638248920440674, "logps/chosen": -92.52973175048828, "logps/rejected": -125.1358642578125, "loss": 0.1022, "rewards/accuracies": 0.5, "rewards/chosen": -0.42335474491119385, "rewards/margins": 3.611337661743164, "rewards/rejected": -4.034692287445068, "step": 1548 }, { "epoch": 2.67, "learning_rate": 3.9099022524436885e-07, "logits/chosen": -2.312130928039551, "logits/rejected": -2.2461342811584473, "logps/chosen": -100.0279312133789, "logps/rejected": -155.20620727539062, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -1.708290457725525, "rewards/margins": 5.545482635498047, "rewards/rejected": -7.253773212432861, "step": 1549 }, { "epoch": 2.67, "learning_rate": 3.9088397790055245e-07, "logits/chosen": -2.4625606536865234, "logits/rejected": -1.7901618480682373, "logps/chosen": -106.84290313720703, "logps/rejected": -120.65249633789062, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -0.35679781436920166, "rewards/margins": 2.8801066875457764, "rewards/rejected": -3.2369046211242676, "step": 1550 }, { "epoch": 2.67, "learning_rate": 3.9077773055673605e-07, "logits/chosen": -2.3389410972595215, "logits/rejected": -2.250703811645508, "logps/chosen": -112.25324249267578, "logps/rejected": -176.39224243164062, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": -1.9584870338439941, "rewards/margins": 7.91741943359375, "rewards/rejected": -9.875906944274902, "step": 1551 }, { "epoch": 2.67, "learning_rate": 3.9067148321291965e-07, "logits/chosen": -2.1925859451293945, "logits/rejected": -2.090749502182007, "logps/chosen": -124.32554626464844, "logps/rejected": -192.72280883789062, "loss": 0.0837, "rewards/accuracies": 1.0, "rewards/chosen": -1.7384452819824219, "rewards/margins": 8.009090423583984, "rewards/rejected": -9.747536659240723, "step": 1552 }, { "epoch": 2.67, "learning_rate": 3.905652358691033e-07, "logits/chosen": -2.434955596923828, "logits/rejected": -2.176908493041992, "logps/chosen": -82.53824615478516, "logps/rejected": -147.60519409179688, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": 0.021380439400672913, "rewards/margins": 6.614034652709961, "rewards/rejected": -6.592654705047607, "step": 1553 }, { "epoch": 2.67, "learning_rate": 3.9045898852528685e-07, "logits/chosen": -2.284165620803833, "logits/rejected": -2.0779311656951904, "logps/chosen": -131.2263946533203, "logps/rejected": -176.02606201171875, "loss": 0.1713, "rewards/accuracies": 1.0, "rewards/chosen": -1.5506261587142944, "rewards/margins": 6.511985778808594, "rewards/rejected": -8.062612533569336, "step": 1554 }, { "epoch": 2.68, "learning_rate": 3.9035274118147044e-07, "logits/chosen": -1.7282073497772217, "logits/rejected": -2.3602850437164307, "logps/chosen": -93.89115142822266, "logps/rejected": -165.4212646484375, "loss": 0.1227, "rewards/accuracies": 1.0, "rewards/chosen": -0.9622344970703125, "rewards/margins": 4.916715621948242, "rewards/rejected": -5.8789496421813965, "step": 1555 }, { "epoch": 2.68, "learning_rate": 3.9024649383765404e-07, "logits/chosen": -2.172769546508789, "logits/rejected": -2.226442337036133, "logps/chosen": -104.63433837890625, "logps/rejected": -180.06275939941406, "loss": 0.1553, "rewards/accuracies": 1.0, "rewards/chosen": -0.8264561295509338, "rewards/margins": 6.596789360046387, "rewards/rejected": -7.423245429992676, "step": 1556 }, { "epoch": 2.68, "learning_rate": 3.9014024649383764e-07, "logits/chosen": -2.2446465492248535, "logits/rejected": -2.6021053791046143, "logps/chosen": -94.30763244628906, "logps/rejected": -146.8133087158203, "loss": 0.1475, "rewards/accuracies": 1.0, "rewards/chosen": -0.717157781124115, "rewards/margins": 5.0781450271606445, "rewards/rejected": -5.795302391052246, "step": 1557 }, { "epoch": 2.68, "learning_rate": 3.9003399915002124e-07, "logits/chosen": -2.2587742805480957, "logits/rejected": -2.1312074661254883, "logps/chosen": -91.11575317382812, "logps/rejected": -179.24960327148438, "loss": 0.1159, "rewards/accuracies": 1.0, "rewards/chosen": -0.36903440952301025, "rewards/margins": 8.829742431640625, "rewards/rejected": -9.198776245117188, "step": 1558 }, { "epoch": 2.68, "learning_rate": 3.8992775180620484e-07, "logits/chosen": -2.370461940765381, "logits/rejected": -2.0174496173858643, "logps/chosen": -104.05191040039062, "logps/rejected": -165.0337677001953, "loss": 0.0889, "rewards/accuracies": 1.0, "rewards/chosen": -1.2317699193954468, "rewards/margins": 4.868751049041748, "rewards/rejected": -6.100521087646484, "step": 1559 }, { "epoch": 2.69, "learning_rate": 3.8982150446238844e-07, "logits/chosen": -2.389617443084717, "logits/rejected": -2.215467691421509, "logps/chosen": -107.70793914794922, "logps/rejected": -182.7801513671875, "loss": 0.1242, "rewards/accuracies": 1.0, "rewards/chosen": -0.7228571176528931, "rewards/margins": 6.257987976074219, "rewards/rejected": -6.9808454513549805, "step": 1560 }, { "epoch": 2.69, "learning_rate": 3.89715257118572e-07, "logits/chosen": -2.4269306659698486, "logits/rejected": -1.7967851161956787, "logps/chosen": -98.40896606445312, "logps/rejected": -130.0316925048828, "loss": 0.0855, "rewards/accuracies": 1.0, "rewards/chosen": -1.0513403415679932, "rewards/margins": 5.696817874908447, "rewards/rejected": -6.7481584548950195, "step": 1561 }, { "epoch": 2.69, "learning_rate": 3.8960900977475564e-07, "logits/chosen": -2.182194709777832, "logits/rejected": -2.2952046394348145, "logps/chosen": -85.14096069335938, "logps/rejected": -142.00462341308594, "loss": 0.0535, "rewards/accuracies": 1.0, "rewards/chosen": -0.8499709963798523, "rewards/margins": 4.896228313446045, "rewards/rejected": -5.746199607849121, "step": 1562 }, { "epoch": 2.69, "learning_rate": 3.8950276243093924e-07, "logits/chosen": -2.1936159133911133, "logits/rejected": -2.2451741695404053, "logps/chosen": -88.90257263183594, "logps/rejected": -144.5420684814453, "loss": 0.1105, "rewards/accuracies": 1.0, "rewards/chosen": -0.6194198131561279, "rewards/margins": 5.611578941345215, "rewards/rejected": -6.230998992919922, "step": 1563 }, { "epoch": 2.69, "learning_rate": 3.893965150871228e-07, "logits/chosen": -2.3684849739074707, "logits/rejected": -2.41347599029541, "logps/chosen": -88.61099243164062, "logps/rejected": -155.2030487060547, "loss": 0.2406, "rewards/accuracies": 0.75, "rewards/chosen": -1.111098051071167, "rewards/margins": 5.173271656036377, "rewards/rejected": -6.284369468688965, "step": 1564 }, { "epoch": 2.69, "learning_rate": 3.8929026774330643e-07, "logits/chosen": -2.015375852584839, "logits/rejected": -2.2644731998443604, "logps/chosen": -78.21815490722656, "logps/rejected": -154.994384765625, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": -2.2489590644836426, "rewards/margins": 6.075497150421143, "rewards/rejected": -8.324456214904785, "step": 1565 }, { "epoch": 2.7, "learning_rate": 3.8918402039949e-07, "logits/chosen": -2.155709981918335, "logits/rejected": -1.9602632522583008, "logps/chosen": -100.69684600830078, "logps/rejected": -155.03427124023438, "loss": 0.0638, "rewards/accuracies": 1.0, "rewards/chosen": -1.4276832342147827, "rewards/margins": 6.681303977966309, "rewards/rejected": -8.108987808227539, "step": 1566 }, { "epoch": 2.7, "learning_rate": 3.890777730556736e-07, "logits/chosen": -2.344205617904663, "logits/rejected": -1.9453600645065308, "logps/chosen": -100.41748809814453, "logps/rejected": -141.4186553955078, "loss": 0.2152, "rewards/accuracies": 1.0, "rewards/chosen": 0.017776310443878174, "rewards/margins": 5.768503189086914, "rewards/rejected": -5.75072717666626, "step": 1567 }, { "epoch": 2.7, "learning_rate": 3.8897152571185723e-07, "logits/chosen": -2.4078662395477295, "logits/rejected": -2.195690155029297, "logps/chosen": -87.47441101074219, "logps/rejected": -132.88494873046875, "loss": 0.1563, "rewards/accuracies": 1.0, "rewards/chosen": -0.874421238899231, "rewards/margins": 5.208187580108643, "rewards/rejected": -6.082608699798584, "step": 1568 }, { "epoch": 2.7, "learning_rate": 3.888652783680408e-07, "logits/chosen": -2.0548739433288574, "logits/rejected": -2.2569026947021484, "logps/chosen": -94.21197509765625, "logps/rejected": -157.56407165527344, "loss": 0.0682, "rewards/accuracies": 1.0, "rewards/chosen": -1.198696255683899, "rewards/margins": 4.980074882507324, "rewards/rejected": -6.17877197265625, "step": 1569 }, { "epoch": 2.7, "learning_rate": 3.887590310242244e-07, "logits/chosen": -2.0699243545532227, "logits/rejected": -2.291274309158325, "logps/chosen": -91.78826141357422, "logps/rejected": -182.2002716064453, "loss": 0.2067, "rewards/accuracies": 1.0, "rewards/chosen": -0.7681552767753601, "rewards/margins": 7.766387939453125, "rewards/rejected": -8.534543991088867, "step": 1570 }, { "epoch": 2.7, "learning_rate": 3.88652783680408e-07, "logits/chosen": -2.2459709644317627, "logits/rejected": -2.3002796173095703, "logps/chosen": -95.46165466308594, "logps/rejected": -153.35183715820312, "loss": 0.0649, "rewards/accuracies": 1.0, "rewards/chosen": -3.590651512145996, "rewards/margins": 5.080739974975586, "rewards/rejected": -8.671391487121582, "step": 1571 }, { "epoch": 2.71, "learning_rate": 3.8854653633659157e-07, "logits/chosen": -2.194242477416992, "logits/rejected": -2.4606218338012695, "logps/chosen": -101.78199005126953, "logps/rejected": -168.70953369140625, "loss": 0.0623, "rewards/accuracies": 1.0, "rewards/chosen": -0.06700289249420166, "rewards/margins": 5.702476501464844, "rewards/rejected": -5.769479751586914, "step": 1572 }, { "epoch": 2.71, "learning_rate": 3.884402889927751e-07, "logits/chosen": -2.1415505409240723, "logits/rejected": -2.0396029949188232, "logps/chosen": -100.94441223144531, "logps/rejected": -141.37234497070312, "loss": 0.1008, "rewards/accuracies": 1.0, "rewards/chosen": -1.7497570514678955, "rewards/margins": 3.5113511085510254, "rewards/rejected": -5.2611083984375, "step": 1573 }, { "epoch": 2.71, "learning_rate": 3.8833404164895877e-07, "logits/chosen": -2.1817407608032227, "logits/rejected": -2.2930424213409424, "logps/chosen": -112.5933837890625, "logps/rejected": -185.30084228515625, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": -1.4099432229995728, "rewards/margins": 6.148549556732178, "rewards/rejected": -7.558493614196777, "step": 1574 }, { "epoch": 2.71, "learning_rate": 3.8822779430514237e-07, "logits/chosen": -1.9095405340194702, "logits/rejected": -2.218052625656128, "logps/chosen": -110.32598114013672, "logps/rejected": -169.73048400878906, "loss": 0.1743, "rewards/accuracies": 0.75, "rewards/chosen": -1.9699698686599731, "rewards/margins": 4.167032718658447, "rewards/rejected": -6.137002468109131, "step": 1575 }, { "epoch": 2.71, "learning_rate": 3.881215469613259e-07, "logits/chosen": -1.9754626750946045, "logits/rejected": -2.348557472229004, "logps/chosen": -90.76569366455078, "logps/rejected": -191.97113037109375, "loss": 0.1135, "rewards/accuracies": 1.0, "rewards/chosen": -0.899039089679718, "rewards/margins": 8.429248809814453, "rewards/rejected": -9.328288078308105, "step": 1576 }, { "epoch": 2.71, "learning_rate": 3.8801529961750957e-07, "logits/chosen": -2.321309804916382, "logits/rejected": -2.1882619857788086, "logps/chosen": -117.33385467529297, "logps/rejected": -170.76025390625, "loss": 0.1681, "rewards/accuracies": 1.0, "rewards/chosen": -2.784611225128174, "rewards/margins": 5.903022766113281, "rewards/rejected": -8.687633514404297, "step": 1577 }, { "epoch": 2.72, "learning_rate": 3.879090522736931e-07, "logits/chosen": -2.5099856853485107, "logits/rejected": -2.429607391357422, "logps/chosen": -128.17869567871094, "logps/rejected": -190.48733520507812, "loss": 0.1039, "rewards/accuracies": 1.0, "rewards/chosen": -2.569108247756958, "rewards/margins": 6.576536655426025, "rewards/rejected": -9.145644187927246, "step": 1578 }, { "epoch": 2.72, "learning_rate": 3.878028049298767e-07, "logits/chosen": -2.108708143234253, "logits/rejected": -2.1827850341796875, "logps/chosen": -111.91315460205078, "logps/rejected": -152.90505981445312, "loss": 0.1892, "rewards/accuracies": 1.0, "rewards/chosen": -1.4924020767211914, "rewards/margins": 5.5432586669921875, "rewards/rejected": -7.035660266876221, "step": 1579 }, { "epoch": 2.72, "learning_rate": 3.8769655758606036e-07, "logits/chosen": -2.0616397857666016, "logits/rejected": -2.1610121726989746, "logps/chosen": -99.16414642333984, "logps/rejected": -156.52261352539062, "loss": 0.0682, "rewards/accuracies": 1.0, "rewards/chosen": -1.0348186492919922, "rewards/margins": 6.184913158416748, "rewards/rejected": -7.219731330871582, "step": 1580 }, { "epoch": 2.72, "learning_rate": 3.875903102422439e-07, "logits/chosen": -2.3086137771606445, "logits/rejected": -2.2381784915924072, "logps/chosen": -130.1898956298828, "logps/rejected": -163.6569061279297, "loss": 0.2312, "rewards/accuracies": 0.75, "rewards/chosen": -3.5491504669189453, "rewards/margins": 2.67368745803833, "rewards/rejected": -6.222837448120117, "step": 1581 }, { "epoch": 2.72, "learning_rate": 3.874840628984275e-07, "logits/chosen": -1.7274584770202637, "logits/rejected": -2.29099702835083, "logps/chosen": -94.70932006835938, "logps/rejected": -186.10731506347656, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/chosen": -1.6263662576675415, "rewards/margins": 6.531195640563965, "rewards/rejected": -8.157562255859375, "step": 1582 }, { "epoch": 2.72, "learning_rate": 3.873778155546111e-07, "logits/chosen": -2.2269158363342285, "logits/rejected": -2.3834941387176514, "logps/chosen": -89.17491912841797, "logps/rejected": -187.11483764648438, "loss": 0.0664, "rewards/accuracies": 1.0, "rewards/chosen": -0.16148489713668823, "rewards/margins": 9.749332427978516, "rewards/rejected": -9.910816192626953, "step": 1583 }, { "epoch": 2.73, "learning_rate": 3.872715682107947e-07, "logits/chosen": -2.044631004333496, "logits/rejected": -2.2980103492736816, "logps/chosen": -105.12210845947266, "logps/rejected": -150.23464965820312, "loss": 0.0664, "rewards/accuracies": 1.0, "rewards/chosen": -1.3716440200805664, "rewards/margins": 3.989093780517578, "rewards/rejected": -5.3607378005981445, "step": 1584 }, { "epoch": 2.73, "learning_rate": 3.871653208669783e-07, "logits/chosen": -2.332289218902588, "logits/rejected": -2.190981864929199, "logps/chosen": -108.92436218261719, "logps/rejected": -173.23687744140625, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -1.8424339294433594, "rewards/margins": 6.522712230682373, "rewards/rejected": -8.365145683288574, "step": 1585 }, { "epoch": 2.73, "learning_rate": 3.870590735231619e-07, "logits/chosen": -2.3616018295288086, "logits/rejected": -1.6662966012954712, "logps/chosen": -131.27627563476562, "logps/rejected": -172.04530334472656, "loss": 0.1776, "rewards/accuracies": 1.0, "rewards/chosen": -2.5612359046936035, "rewards/margins": 5.8843278884887695, "rewards/rejected": -8.445564270019531, "step": 1586 }, { "epoch": 2.73, "learning_rate": 3.869528261793455e-07, "logits/chosen": -2.179476261138916, "logits/rejected": -2.252331495285034, "logps/chosen": -98.4123306274414, "logps/rejected": -124.06692504882812, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": -1.283364176750183, "rewards/margins": 3.1768393516540527, "rewards/rejected": -4.460203647613525, "step": 1587 }, { "epoch": 2.73, "learning_rate": 3.8684657883552905e-07, "logits/chosen": -2.170121431350708, "logits/rejected": -2.361243724822998, "logps/chosen": -107.56086730957031, "logps/rejected": -137.0133514404297, "loss": 0.1514, "rewards/accuracies": 0.75, "rewards/chosen": -3.3670225143432617, "rewards/margins": 2.2492010593414307, "rewards/rejected": -5.616223335266113, "step": 1588 }, { "epoch": 2.73, "learning_rate": 3.867403314917127e-07, "logits/chosen": -2.035966396331787, "logits/rejected": -2.501136302947998, "logps/chosen": -101.75947570800781, "logps/rejected": -163.55685424804688, "loss": 0.1397, "rewards/accuracies": 1.0, "rewards/chosen": -1.5224356651306152, "rewards/margins": 4.482725620269775, "rewards/rejected": -6.005161285400391, "step": 1589 }, { "epoch": 2.74, "learning_rate": 3.866340841478963e-07, "logits/chosen": -2.4576187133789062, "logits/rejected": -2.2407219409942627, "logps/chosen": -145.15359497070312, "logps/rejected": -195.4174346923828, "loss": 0.1837, "rewards/accuracies": 1.0, "rewards/chosen": -2.835404396057129, "rewards/margins": 5.01380729675293, "rewards/rejected": -7.849211692810059, "step": 1590 }, { "epoch": 2.74, "learning_rate": 3.8652783680407985e-07, "logits/chosen": -2.3169569969177246, "logits/rejected": -2.228882312774658, "logps/chosen": -122.24357604980469, "logps/rejected": -167.9019775390625, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": -2.6066536903381348, "rewards/margins": 5.751095771789551, "rewards/rejected": -8.357748985290527, "step": 1591 }, { "epoch": 2.74, "learning_rate": 3.864215894602635e-07, "logits/chosen": -2.388716459274292, "logits/rejected": -2.1068811416625977, "logps/chosen": -90.4521713256836, "logps/rejected": -176.48147583007812, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": -0.8042745590209961, "rewards/margins": 8.876463890075684, "rewards/rejected": -9.68073844909668, "step": 1592 }, { "epoch": 2.74, "learning_rate": 3.8631534211644704e-07, "logits/chosen": -2.245087146759033, "logits/rejected": -2.1797468662261963, "logps/chosen": -106.8504409790039, "logps/rejected": -135.89598083496094, "loss": 0.1265, "rewards/accuracies": 0.75, "rewards/chosen": -1.7593799829483032, "rewards/margins": 4.750604629516602, "rewards/rejected": -6.509984970092773, "step": 1593 }, { "epoch": 2.74, "learning_rate": 3.8620909477263064e-07, "logits/chosen": -1.9241080284118652, "logits/rejected": -2.400062084197998, "logps/chosen": -82.87355041503906, "logps/rejected": -205.8744354248047, "loss": 0.0597, "rewards/accuracies": 1.0, "rewards/chosen": -1.5537643432617188, "rewards/margins": 10.51199722290039, "rewards/rejected": -12.06576156616211, "step": 1594 }, { "epoch": 2.75, "learning_rate": 3.861028474288143e-07, "logits/chosen": -1.9822232723236084, "logits/rejected": -2.399897575378418, "logps/chosen": -105.03435516357422, "logps/rejected": -165.03048706054688, "loss": 0.0865, "rewards/accuracies": 0.75, "rewards/chosen": -2.337965726852417, "rewards/margins": 5.9791083335876465, "rewards/rejected": -8.317073822021484, "step": 1595 }, { "epoch": 2.75, "learning_rate": 3.8599660008499784e-07, "logits/chosen": -2.1146597862243652, "logits/rejected": -2.1613380908966064, "logps/chosen": -109.98365783691406, "logps/rejected": -189.0074462890625, "loss": 0.109, "rewards/accuracies": 1.0, "rewards/chosen": -2.4460747241973877, "rewards/margins": 8.627317428588867, "rewards/rejected": -11.073392868041992, "step": 1596 }, { "epoch": 2.75, "learning_rate": 3.858903527411815e-07, "logits/chosen": -2.2629599571228027, "logits/rejected": -2.3850326538085938, "logps/chosen": -106.05043029785156, "logps/rejected": -186.8407745361328, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": -1.9005886316299438, "rewards/margins": 6.317946434020996, "rewards/rejected": -8.218534469604492, "step": 1597 }, { "epoch": 2.75, "learning_rate": 3.8578410539736504e-07, "logits/chosen": -2.314809560775757, "logits/rejected": -2.4032368659973145, "logps/chosen": -106.14305114746094, "logps/rejected": -183.59828186035156, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": -1.0640108585357666, "rewards/margins": 6.997262477874756, "rewards/rejected": -8.061273574829102, "step": 1598 }, { "epoch": 2.75, "learning_rate": 3.8567785805354864e-07, "logits/chosen": -2.0959558486938477, "logits/rejected": -2.2466983795166016, "logps/chosen": -124.77632141113281, "logps/rejected": -175.44577026367188, "loss": 0.0693, "rewards/accuracies": 1.0, "rewards/chosen": -3.334872245788574, "rewards/margins": 4.9523606300354, "rewards/rejected": -8.287232398986816, "step": 1599 }, { "epoch": 2.75, "learning_rate": 3.855716107097323e-07, "logits/chosen": -2.3351285457611084, "logits/rejected": -2.2573118209838867, "logps/chosen": -118.17864227294922, "logps/rejected": -180.0816650390625, "loss": 0.1958, "rewards/accuracies": 1.0, "rewards/chosen": -2.450535535812378, "rewards/margins": 5.807302951812744, "rewards/rejected": -8.25783920288086, "step": 1600 }, { "epoch": 2.76, "learning_rate": 3.8546536336591584e-07, "logits/chosen": -2.022524356842041, "logits/rejected": -2.3498950004577637, "logps/chosen": -71.84233856201172, "logps/rejected": -132.6390838623047, "loss": 0.106, "rewards/accuracies": 0.75, "rewards/chosen": -1.6773030757904053, "rewards/margins": 4.089421272277832, "rewards/rejected": -5.766724109649658, "step": 1601 }, { "epoch": 2.76, "learning_rate": 3.8535911602209943e-07, "logits/chosen": -2.4086108207702637, "logits/rejected": -2.23492431640625, "logps/chosen": -104.42049407958984, "logps/rejected": -136.83193969726562, "loss": 0.1416, "rewards/accuracies": 0.75, "rewards/chosen": -2.481924057006836, "rewards/margins": 4.430978298187256, "rewards/rejected": -6.912902355194092, "step": 1602 }, { "epoch": 2.76, "learning_rate": 3.8525286867828303e-07, "logits/chosen": -2.371777057647705, "logits/rejected": -2.061070442199707, "logps/chosen": -92.93233489990234, "logps/rejected": -170.9537353515625, "loss": 0.1674, "rewards/accuracies": 1.0, "rewards/chosen": -0.0304996520280838, "rewards/margins": 8.405686378479004, "rewards/rejected": -8.436185836791992, "step": 1603 }, { "epoch": 2.76, "learning_rate": 3.8514662133446663e-07, "logits/chosen": -1.9525184631347656, "logits/rejected": -2.2520267963409424, "logps/chosen": -57.75410079956055, "logps/rejected": -158.00372314453125, "loss": 0.1293, "rewards/accuracies": 1.0, "rewards/chosen": 0.6341692805290222, "rewards/margins": 7.51314640045166, "rewards/rejected": -6.878976821899414, "step": 1604 }, { "epoch": 2.76, "learning_rate": 3.850403739906502e-07, "logits/chosen": -2.304532766342163, "logits/rejected": -2.1744308471679688, "logps/chosen": -99.54615020751953, "logps/rejected": -158.51138305664062, "loss": 0.0964, "rewards/accuracies": 0.75, "rewards/chosen": -3.022777557373047, "rewards/margins": 4.9912309646606445, "rewards/rejected": -8.014008522033691, "step": 1605 }, { "epoch": 2.76, "learning_rate": 3.8493412664683383e-07, "logits/chosen": -2.3581104278564453, "logits/rejected": -2.188363552093506, "logps/chosen": -81.67160034179688, "logps/rejected": -182.83668518066406, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": -0.09951809048652649, "rewards/margins": 9.18604850769043, "rewards/rejected": -9.285566329956055, "step": 1606 }, { "epoch": 2.77, "learning_rate": 3.8482787930301743e-07, "logits/chosen": -2.345175266265869, "logits/rejected": -2.355522632598877, "logps/chosen": -89.25310516357422, "logps/rejected": -160.2462615966797, "loss": 0.0806, "rewards/accuracies": 1.0, "rewards/chosen": -1.1677409410476685, "rewards/margins": 7.018792152404785, "rewards/rejected": -8.186532974243164, "step": 1607 }, { "epoch": 2.77, "learning_rate": 3.84721631959201e-07, "logits/chosen": -2.1626791954040527, "logits/rejected": -2.214132070541382, "logps/chosen": -110.090087890625, "logps/rejected": -161.8603515625, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": -0.790107011795044, "rewards/margins": 6.077495574951172, "rewards/rejected": -6.867602348327637, "step": 1608 }, { "epoch": 2.77, "learning_rate": 3.8461538461538463e-07, "logits/chosen": -1.7887709140777588, "logits/rejected": -2.3281965255737305, "logps/chosen": -121.51968383789062, "logps/rejected": -171.39865112304688, "loss": 0.143, "rewards/accuracies": 1.0, "rewards/chosen": -2.2916064262390137, "rewards/margins": 3.164358377456665, "rewards/rejected": -5.455965042114258, "step": 1609 }, { "epoch": 2.77, "learning_rate": 3.8450913727156817e-07, "logits/chosen": -2.481846809387207, "logits/rejected": -2.311784029006958, "logps/chosen": -88.29595184326172, "logps/rejected": -152.793701171875, "loss": 0.2265, "rewards/accuracies": 1.0, "rewards/chosen": -1.2649034261703491, "rewards/margins": 5.8172454833984375, "rewards/rejected": -7.082149028778076, "step": 1610 }, { "epoch": 2.77, "learning_rate": 3.8440288992775177e-07, "logits/chosen": -2.3125014305114746, "logits/rejected": -2.236065626144409, "logps/chosen": -107.89373779296875, "logps/rejected": -113.06248474121094, "loss": 0.2135, "rewards/accuracies": 0.5, "rewards/chosen": -4.247597694396973, "rewards/margins": 0.8184940218925476, "rewards/rejected": -5.066092014312744, "step": 1611 }, { "epoch": 2.77, "learning_rate": 3.842966425839354e-07, "logits/chosen": -2.3007874488830566, "logits/rejected": -2.3205196857452393, "logps/chosen": -115.19694519042969, "logps/rejected": -134.93392944335938, "loss": 0.1013, "rewards/accuracies": 1.0, "rewards/chosen": -1.6056722402572632, "rewards/margins": 3.0948591232299805, "rewards/rejected": -4.700531005859375, "step": 1612 }, { "epoch": 2.78, "learning_rate": 3.8419039524011897e-07, "logits/chosen": -2.2824342250823975, "logits/rejected": -2.463177442550659, "logps/chosen": -116.34420776367188, "logps/rejected": -141.75070190429688, "loss": 0.1333, "rewards/accuracies": 0.75, "rewards/chosen": -3.4221527576446533, "rewards/margins": 2.9229209423065186, "rewards/rejected": -6.345073699951172, "step": 1613 }, { "epoch": 2.78, "learning_rate": 3.8408414789630257e-07, "logits/chosen": -2.0376839637756348, "logits/rejected": -2.2109713554382324, "logps/chosen": -88.07909393310547, "logps/rejected": -177.67147827148438, "loss": 0.0444, "rewards/accuracies": 1.0, "rewards/chosen": -1.664588451385498, "rewards/margins": 8.296472549438477, "rewards/rejected": -9.961061477661133, "step": 1614 }, { "epoch": 2.78, "learning_rate": 3.8397790055248617e-07, "logits/chosen": -2.413213014602661, "logits/rejected": -2.160451889038086, "logps/chosen": -96.95232391357422, "logps/rejected": -141.29513549804688, "loss": 0.0887, "rewards/accuracies": 1.0, "rewards/chosen": -1.0261248350143433, "rewards/margins": 5.664283752441406, "rewards/rejected": -6.690408706665039, "step": 1615 }, { "epoch": 2.78, "learning_rate": 3.8387165320866977e-07, "logits/chosen": -1.9148993492126465, "logits/rejected": -2.1764309406280518, "logps/chosen": -87.47686004638672, "logps/rejected": -185.06109619140625, "loss": 0.115, "rewards/accuracies": 1.0, "rewards/chosen": -1.0915437936782837, "rewards/margins": 9.324299812316895, "rewards/rejected": -10.415843963623047, "step": 1616 }, { "epoch": 2.78, "learning_rate": 3.8376540586485336e-07, "logits/chosen": -2.1782169342041016, "logits/rejected": -2.4282262325286865, "logps/chosen": -85.38978576660156, "logps/rejected": -164.95986938476562, "loss": 0.1445, "rewards/accuracies": 1.0, "rewards/chosen": -1.6506794691085815, "rewards/margins": 7.157868385314941, "rewards/rejected": -8.808547973632812, "step": 1617 }, { "epoch": 2.78, "learning_rate": 3.8365915852103696e-07, "logits/chosen": -1.9356844425201416, "logits/rejected": -2.354612350463867, "logps/chosen": -93.60179138183594, "logps/rejected": -153.92578125, "loss": 0.0703, "rewards/accuracies": 1.0, "rewards/chosen": -1.2137893438339233, "rewards/margins": 5.095592021942139, "rewards/rejected": -6.309381484985352, "step": 1618 }, { "epoch": 2.79, "learning_rate": 3.8355291117722056e-07, "logits/chosen": -1.7836475372314453, "logits/rejected": -2.4043514728546143, "logps/chosen": -91.6265869140625, "logps/rejected": -192.94216918945312, "loss": 0.1719, "rewards/accuracies": 1.0, "rewards/chosen": -0.8316135406494141, "rewards/margins": 7.032129287719727, "rewards/rejected": -7.863742828369141, "step": 1619 }, { "epoch": 2.79, "learning_rate": 3.834466638334041e-07, "logits/chosen": -2.205763339996338, "logits/rejected": -2.1166093349456787, "logps/chosen": -129.5463104248047, "logps/rejected": -171.72030639648438, "loss": 0.0696, "rewards/accuracies": 1.0, "rewards/chosen": -2.2795135974884033, "rewards/margins": 5.272188663482666, "rewards/rejected": -7.551702499389648, "step": 1620 }, { "epoch": 2.79, "learning_rate": 3.8334041648958776e-07, "logits/chosen": -2.395373582839966, "logits/rejected": -1.943469762802124, "logps/chosen": -117.3048095703125, "logps/rejected": -139.51766967773438, "loss": 0.2978, "rewards/accuracies": 0.75, "rewards/chosen": -2.021071195602417, "rewards/margins": 2.7325663566589355, "rewards/rejected": -4.753637790679932, "step": 1621 }, { "epoch": 2.79, "learning_rate": 3.8323416914577136e-07, "logits/chosen": -2.1332876682281494, "logits/rejected": -2.271740436553955, "logps/chosen": -99.82872009277344, "logps/rejected": -132.05856323242188, "loss": 0.1093, "rewards/accuracies": 1.0, "rewards/chosen": -3.1471123695373535, "rewards/margins": 3.22625994682312, "rewards/rejected": -6.373372554779053, "step": 1622 }, { "epoch": 2.79, "learning_rate": 3.831279218019549e-07, "logits/chosen": -2.22544527053833, "logits/rejected": -1.90654718875885, "logps/chosen": -95.31196594238281, "logps/rejected": -150.00668334960938, "loss": 0.1059, "rewards/accuracies": 1.0, "rewards/chosen": -1.2659562826156616, "rewards/margins": 5.971371650695801, "rewards/rejected": -7.237327575683594, "step": 1623 }, { "epoch": 2.8, "learning_rate": 3.8302167445813856e-07, "logits/chosen": -2.0822112560272217, "logits/rejected": -2.1560516357421875, "logps/chosen": -89.84040832519531, "logps/rejected": -135.45571899414062, "loss": 0.0795, "rewards/accuracies": 1.0, "rewards/chosen": -1.3757866621017456, "rewards/margins": 4.928056716918945, "rewards/rejected": -6.3038434982299805, "step": 1624 }, { "epoch": 2.8, "learning_rate": 3.829154271143221e-07, "logits/chosen": -2.3456759452819824, "logits/rejected": -2.403106451034546, "logps/chosen": -103.3765869140625, "logps/rejected": -147.51425170898438, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": -4.077391624450684, "rewards/margins": 3.08781099319458, "rewards/rejected": -7.1652021408081055, "step": 1625 }, { "epoch": 2.8, "learning_rate": 3.828091797705057e-07, "logits/chosen": -2.1182198524475098, "logits/rejected": -2.3867335319519043, "logps/chosen": -96.47643280029297, "logps/rejected": -173.2657012939453, "loss": 0.0785, "rewards/accuracies": 1.0, "rewards/chosen": -0.9587371945381165, "rewards/margins": 6.6909003257751465, "rewards/rejected": -7.649637699127197, "step": 1626 }, { "epoch": 2.8, "learning_rate": 3.8270293242668935e-07, "logits/chosen": -2.0626091957092285, "logits/rejected": -2.1468210220336914, "logps/chosen": -97.57859802246094, "logps/rejected": -144.91070556640625, "loss": 0.0819, "rewards/accuracies": 1.0, "rewards/chosen": -2.75626277923584, "rewards/margins": 3.8117151260375977, "rewards/rejected": -6.5679779052734375, "step": 1627 }, { "epoch": 2.8, "learning_rate": 3.825966850828729e-07, "logits/chosen": -2.2872445583343506, "logits/rejected": -2.004791736602783, "logps/chosen": -70.68659973144531, "logps/rejected": -142.58139038085938, "loss": 0.0666, "rewards/accuracies": 1.0, "rewards/chosen": -0.8780727386474609, "rewards/margins": 7.055238246917725, "rewards/rejected": -7.933311462402344, "step": 1628 }, { "epoch": 2.8, "learning_rate": 3.824904377390565e-07, "logits/chosen": -2.0799734592437744, "logits/rejected": -2.339010715484619, "logps/chosen": -84.129638671875, "logps/rejected": -139.76361083984375, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -1.5003068447113037, "rewards/margins": 3.6815991401672363, "rewards/rejected": -5.181905746459961, "step": 1629 }, { "epoch": 2.81, "learning_rate": 3.823841903952401e-07, "logits/chosen": -1.9960991144180298, "logits/rejected": -2.2775444984436035, "logps/chosen": -99.33696746826172, "logps/rejected": -141.1796417236328, "loss": 0.1775, "rewards/accuracies": 1.0, "rewards/chosen": -2.1058483123779297, "rewards/margins": 3.139777660369873, "rewards/rejected": -5.2456254959106445, "step": 1630 }, { "epoch": 2.81, "learning_rate": 3.822779430514237e-07, "logits/chosen": -2.345580816268921, "logits/rejected": -2.293943166732788, "logps/chosen": -99.78093719482422, "logps/rejected": -181.68861389160156, "loss": 0.0755, "rewards/accuracies": 1.0, "rewards/chosen": -1.699244499206543, "rewards/margins": 6.9198198318481445, "rewards/rejected": -8.619064331054688, "step": 1631 }, { "epoch": 2.81, "learning_rate": 3.8217169570760724e-07, "logits/chosen": -1.8505618572235107, "logits/rejected": -2.353602886199951, "logps/chosen": -91.49249267578125, "logps/rejected": -184.4048309326172, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -1.0669960975646973, "rewards/margins": 7.47685432434082, "rewards/rejected": -8.543850898742676, "step": 1632 }, { "epoch": 2.81, "learning_rate": 3.820654483637909e-07, "logits/chosen": -2.0509488582611084, "logits/rejected": -2.417025566101074, "logps/chosen": -79.67346954345703, "logps/rejected": -173.58526611328125, "loss": 0.231, "rewards/accuracies": 1.0, "rewards/chosen": -0.34111446142196655, "rewards/margins": 8.725996017456055, "rewards/rejected": -9.067110061645508, "step": 1633 }, { "epoch": 2.81, "learning_rate": 3.819592010199745e-07, "logits/chosen": -2.276503801345825, "logits/rejected": -2.1096315383911133, "logps/chosen": -85.47540283203125, "logps/rejected": -174.06704711914062, "loss": 0.2462, "rewards/accuracies": 1.0, "rewards/chosen": -0.9350864291191101, "rewards/margins": 8.679035186767578, "rewards/rejected": -9.61412239074707, "step": 1634 }, { "epoch": 2.81, "learning_rate": 3.8185295367615804e-07, "logits/chosen": -2.1078102588653564, "logits/rejected": -2.3021864891052246, "logps/chosen": -100.52406311035156, "logps/rejected": -140.58126831054688, "loss": 0.0678, "rewards/accuracies": 0.75, "rewards/chosen": -1.0950870513916016, "rewards/margins": 3.907559394836426, "rewards/rejected": -5.002646446228027, "step": 1635 }, { "epoch": 2.82, "learning_rate": 3.817467063323417e-07, "logits/chosen": -2.4077634811401367, "logits/rejected": -2.1588313579559326, "logps/chosen": -86.4149398803711, "logps/rejected": -164.75885009765625, "loss": 0.092, "rewards/accuracies": 1.0, "rewards/chosen": -0.7524995803833008, "rewards/margins": 8.14638614654541, "rewards/rejected": -8.898885726928711, "step": 1636 }, { "epoch": 2.82, "learning_rate": 3.8164045898852524e-07, "logits/chosen": -2.427934169769287, "logits/rejected": -2.1080482006073, "logps/chosen": -89.46192932128906, "logps/rejected": -158.87530517578125, "loss": 0.0923, "rewards/accuracies": 1.0, "rewards/chosen": -0.2840731143951416, "rewards/margins": 7.798294544219971, "rewards/rejected": -8.082367897033691, "step": 1637 }, { "epoch": 2.82, "learning_rate": 3.815342116447089e-07, "logits/chosen": -2.306105136871338, "logits/rejected": -2.26084041595459, "logps/chosen": -81.73857879638672, "logps/rejected": -171.04922485351562, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/chosen": 0.1303720474243164, "rewards/margins": 9.037590026855469, "rewards/rejected": -8.907217979431152, "step": 1638 }, { "epoch": 2.82, "learning_rate": 3.814279643008925e-07, "logits/chosen": -2.088613986968994, "logits/rejected": -2.2381725311279297, "logps/chosen": -93.82598114013672, "logps/rejected": -173.23159790039062, "loss": 0.1516, "rewards/accuracies": 1.0, "rewards/chosen": -2.2362709045410156, "rewards/margins": 6.9532880783081055, "rewards/rejected": -9.189558982849121, "step": 1639 }, { "epoch": 2.82, "learning_rate": 3.8132171695707603e-07, "logits/chosen": -1.9824451208114624, "logits/rejected": -2.2916083335876465, "logps/chosen": -90.40238952636719, "logps/rejected": -156.67510986328125, "loss": 0.1264, "rewards/accuracies": 1.0, "rewards/chosen": -1.4736248254776, "rewards/margins": 5.461732864379883, "rewards/rejected": -6.935358047485352, "step": 1640 }, { "epoch": 2.82, "learning_rate": 3.812154696132597e-07, "logits/chosen": -2.357771873474121, "logits/rejected": -1.8986824750900269, "logps/chosen": -118.39985656738281, "logps/rejected": -159.35459899902344, "loss": 0.0596, "rewards/accuracies": 1.0, "rewards/chosen": -1.4006671905517578, "rewards/margins": 5.696040153503418, "rewards/rejected": -7.096707344055176, "step": 1641 }, { "epoch": 2.83, "learning_rate": 3.8110922226944323e-07, "logits/chosen": -2.4407882690429688, "logits/rejected": -2.302476644515991, "logps/chosen": -101.09632873535156, "logps/rejected": -129.7027130126953, "loss": 0.1284, "rewards/accuracies": 0.75, "rewards/chosen": -1.6399682760238647, "rewards/margins": 2.659466028213501, "rewards/rejected": -4.299434185028076, "step": 1642 }, { "epoch": 2.83, "learning_rate": 3.8100297492562683e-07, "logits/chosen": -2.356856346130371, "logits/rejected": -1.8872140645980835, "logps/chosen": -112.64529418945312, "logps/rejected": -168.39553833007812, "loss": 0.0871, "rewards/accuracies": 1.0, "rewards/chosen": -1.818886637687683, "rewards/margins": 7.438796043395996, "rewards/rejected": -9.257682800292969, "step": 1643 }, { "epoch": 2.83, "learning_rate": 3.808967275818105e-07, "logits/chosen": -2.057124137878418, "logits/rejected": -2.3302226066589355, "logps/chosen": -126.3577880859375, "logps/rejected": -177.40679931640625, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -2.1228835582733154, "rewards/margins": 4.838527679443359, "rewards/rejected": -6.961411476135254, "step": 1644 }, { "epoch": 2.83, "learning_rate": 3.8079048023799403e-07, "logits/chosen": -2.341353178024292, "logits/rejected": -2.531688928604126, "logps/chosen": -99.13136291503906, "logps/rejected": -164.73367309570312, "loss": 0.1932, "rewards/accuracies": 1.0, "rewards/chosen": -0.654949963092804, "rewards/margins": 6.2878336906433105, "rewards/rejected": -6.942783355712891, "step": 1645 }, { "epoch": 2.83, "learning_rate": 3.8068423289417763e-07, "logits/chosen": -2.367554187774658, "logits/rejected": -1.889769434928894, "logps/chosen": -107.72008514404297, "logps/rejected": -114.31889343261719, "loss": 0.1689, "rewards/accuracies": 0.75, "rewards/chosen": -1.1779295206069946, "rewards/margins": 1.6208339929580688, "rewards/rejected": -2.7987635135650635, "step": 1646 }, { "epoch": 2.83, "learning_rate": 3.805779855503612e-07, "logits/chosen": -2.216921091079712, "logits/rejected": -2.4440786838531494, "logps/chosen": -98.97074890136719, "logps/rejected": -177.58331298828125, "loss": 0.1173, "rewards/accuracies": 1.0, "rewards/chosen": -2.224381685256958, "rewards/margins": 6.0331950187683105, "rewards/rejected": -8.257576942443848, "step": 1647 }, { "epoch": 2.84, "learning_rate": 3.804717382065448e-07, "logits/chosen": -2.192129373550415, "logits/rejected": -2.242950916290283, "logps/chosen": -94.78448486328125, "logps/rejected": -154.13584899902344, "loss": 0.0991, "rewards/accuracies": 1.0, "rewards/chosen": -1.834583044052124, "rewards/margins": 5.400535583496094, "rewards/rejected": -7.235119342803955, "step": 1648 }, { "epoch": 2.84, "learning_rate": 3.803654908627284e-07, "logits/chosen": -2.4035558700561523, "logits/rejected": -2.214998483657837, "logps/chosen": -89.55489349365234, "logps/rejected": -167.56988525390625, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": -1.214656114578247, "rewards/margins": 7.757574081420898, "rewards/rejected": -8.972229957580566, "step": 1649 }, { "epoch": 2.84, "learning_rate": 3.80259243518912e-07, "logits/chosen": -2.261507987976074, "logits/rejected": -2.3435134887695312, "logps/chosen": -92.28773498535156, "logps/rejected": -143.55224609375, "loss": 0.0876, "rewards/accuracies": 1.0, "rewards/chosen": -0.8613399863243103, "rewards/margins": 4.64174222946167, "rewards/rejected": -5.503082275390625, "step": 1650 }, { "epoch": 2.84, "learning_rate": 3.801529961750956e-07, "logits/chosen": -2.3288896083831787, "logits/rejected": -2.368060827255249, "logps/chosen": -92.92759704589844, "logps/rejected": -144.36334228515625, "loss": 0.1173, "rewards/accuracies": 1.0, "rewards/chosen": -0.9408773183822632, "rewards/margins": 5.522533416748047, "rewards/rejected": -6.463410377502441, "step": 1651 }, { "epoch": 2.84, "learning_rate": 3.8004674883127917e-07, "logits/chosen": -2.1003904342651367, "logits/rejected": -2.4804089069366455, "logps/chosen": -85.59675598144531, "logps/rejected": -133.974365234375, "loss": 0.2056, "rewards/accuracies": 1.0, "rewards/chosen": -0.37742942571640015, "rewards/margins": 3.8226816654205322, "rewards/rejected": -4.200111389160156, "step": 1652 }, { "epoch": 2.85, "learning_rate": 3.799405014874628e-07, "logits/chosen": -2.3162121772766113, "logits/rejected": -2.261288642883301, "logps/chosen": -80.09151458740234, "logps/rejected": -165.36453247070312, "loss": 0.1564, "rewards/accuracies": 1.0, "rewards/chosen": -1.25790536403656, "rewards/margins": 7.416831016540527, "rewards/rejected": -8.674736022949219, "step": 1653 }, { "epoch": 2.85, "learning_rate": 3.798342541436464e-07, "logits/chosen": -2.4513702392578125, "logits/rejected": -1.9627832174301147, "logps/chosen": -97.77986145019531, "logps/rejected": -142.25486755371094, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": -0.9445559978485107, "rewards/margins": 7.203108787536621, "rewards/rejected": -8.147664070129395, "step": 1654 }, { "epoch": 2.85, "learning_rate": 3.7972800679982996e-07, "logits/chosen": -2.1795551776885986, "logits/rejected": -2.112839698791504, "logps/chosen": -105.85646057128906, "logps/rejected": -167.66043090820312, "loss": 0.1384, "rewards/accuracies": 1.0, "rewards/chosen": -1.8643436431884766, "rewards/margins": 6.803062438964844, "rewards/rejected": -8.667407035827637, "step": 1655 }, { "epoch": 2.85, "learning_rate": 3.796217594560136e-07, "logits/chosen": -2.564302682876587, "logits/rejected": -2.0831453800201416, "logps/chosen": -112.97957611083984, "logps/rejected": -148.45770263671875, "loss": 0.1745, "rewards/accuracies": 1.0, "rewards/chosen": -0.9210346341133118, "rewards/margins": 4.697730541229248, "rewards/rejected": -5.618764877319336, "step": 1656 }, { "epoch": 2.85, "learning_rate": 3.7951551211219716e-07, "logits/chosen": -2.1083292961120605, "logits/rejected": -2.386122226715088, "logps/chosen": -126.57127380371094, "logps/rejected": -127.72882080078125, "loss": 0.1921, "rewards/accuracies": 0.5, "rewards/chosen": -3.5682575702667236, "rewards/margins": 0.3502933979034424, "rewards/rejected": -3.918550968170166, "step": 1657 }, { "epoch": 2.85, "learning_rate": 3.7940926476838076e-07, "logits/chosen": -2.4982571601867676, "logits/rejected": -1.9897637367248535, "logps/chosen": -102.1682357788086, "logps/rejected": -151.7366943359375, "loss": 0.0992, "rewards/accuracies": 1.0, "rewards/chosen": -0.959165096282959, "rewards/margins": 6.773233413696289, "rewards/rejected": -7.732398509979248, "step": 1658 }, { "epoch": 2.86, "learning_rate": 3.793030174245644e-07, "logits/chosen": -2.326537609100342, "logits/rejected": -2.1516165733337402, "logps/chosen": -126.52305603027344, "logps/rejected": -194.79928588867188, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": -3.195761203765869, "rewards/margins": 6.747647285461426, "rewards/rejected": -9.943408966064453, "step": 1659 }, { "epoch": 2.86, "learning_rate": 3.7919677008074796e-07, "logits/chosen": -2.4084243774414062, "logits/rejected": -2.5632386207580566, "logps/chosen": -80.63330078125, "logps/rejected": -166.69345092773438, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": -0.07328905165195465, "rewards/margins": 7.69064998626709, "rewards/rejected": -7.763938903808594, "step": 1660 }, { "epoch": 2.86, "learning_rate": 3.7909052273693156e-07, "logits/chosen": -2.260909080505371, "logits/rejected": -2.1392393112182617, "logps/chosen": -102.0841064453125, "logps/rejected": -184.52435302734375, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": -0.4554201662540436, "rewards/margins": 7.558716773986816, "rewards/rejected": -8.014137268066406, "step": 1661 }, { "epoch": 2.86, "learning_rate": 3.7898427539311516e-07, "logits/chosen": -2.4233415126800537, "logits/rejected": -2.281303644180298, "logps/chosen": -106.1381607055664, "logps/rejected": -185.61203002929688, "loss": 0.2485, "rewards/accuracies": 1.0, "rewards/chosen": -0.8650522828102112, "rewards/margins": 8.415689468383789, "rewards/rejected": -9.280740737915039, "step": 1662 }, { "epoch": 2.86, "learning_rate": 3.7887802804929876e-07, "logits/chosen": -2.2356886863708496, "logits/rejected": -2.3217878341674805, "logps/chosen": -87.54149627685547, "logps/rejected": -140.3092498779297, "loss": 0.2152, "rewards/accuracies": 1.0, "rewards/chosen": -1.1712185144424438, "rewards/margins": 4.693617343902588, "rewards/rejected": -5.864835739135742, "step": 1663 }, { "epoch": 2.86, "learning_rate": 3.787717807054823e-07, "logits/chosen": -2.476639986038208, "logits/rejected": -2.2307283878326416, "logps/chosen": -85.53787231445312, "logps/rejected": -125.33499145507812, "loss": 0.2008, "rewards/accuracies": 0.75, "rewards/chosen": 0.42391255497932434, "rewards/margins": 5.3529510498046875, "rewards/rejected": -4.929038047790527, "step": 1664 }, { "epoch": 2.87, "learning_rate": 3.7866553336166595e-07, "logits/chosen": -2.269619941711426, "logits/rejected": -1.883857011795044, "logps/chosen": -112.01115417480469, "logps/rejected": -157.57266235351562, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -1.7141512632369995, "rewards/margins": 6.027009010314941, "rewards/rejected": -7.7411603927612305, "step": 1665 }, { "epoch": 2.87, "learning_rate": 3.7855928601784955e-07, "logits/chosen": -2.1196672916412354, "logits/rejected": -2.30726957321167, "logps/chosen": -90.51346588134766, "logps/rejected": -158.47056579589844, "loss": 0.1914, "rewards/accuracies": 1.0, "rewards/chosen": -0.04499893635511398, "rewards/margins": 6.657516002655029, "rewards/rejected": -6.702515125274658, "step": 1666 }, { "epoch": 2.87, "learning_rate": 3.784530386740331e-07, "logits/chosen": -2.1377930641174316, "logits/rejected": -2.3513927459716797, "logps/chosen": -110.63475036621094, "logps/rejected": -193.7716064453125, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -3.0261526107788086, "rewards/margins": 6.606930732727051, "rewards/rejected": -9.63308334350586, "step": 1667 }, { "epoch": 2.87, "learning_rate": 3.7834679133021675e-07, "logits/chosen": -1.9848606586456299, "logits/rejected": -2.146251678466797, "logps/chosen": -79.83831787109375, "logps/rejected": -144.06625366210938, "loss": 0.0836, "rewards/accuracies": 1.0, "rewards/chosen": -0.851746141910553, "rewards/margins": 5.146737575531006, "rewards/rejected": -5.998483180999756, "step": 1668 }, { "epoch": 2.87, "learning_rate": 3.782405439864003e-07, "logits/chosen": -1.8001291751861572, "logits/rejected": -2.4180030822753906, "logps/chosen": -82.46214294433594, "logps/rejected": -154.78915405273438, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -1.4636224508285522, "rewards/margins": 3.8544044494628906, "rewards/rejected": -5.318027019500732, "step": 1669 }, { "epoch": 2.87, "learning_rate": 3.781342966425839e-07, "logits/chosen": -2.1612720489501953, "logits/rejected": -2.298398017883301, "logps/chosen": -109.47769165039062, "logps/rejected": -161.31935119628906, "loss": 0.1479, "rewards/accuracies": 1.0, "rewards/chosen": -2.6805381774902344, "rewards/margins": 5.226597785949707, "rewards/rejected": -7.907135963439941, "step": 1670 }, { "epoch": 2.88, "learning_rate": 3.7802804929876755e-07, "logits/chosen": -2.2977278232574463, "logits/rejected": -2.0382251739501953, "logps/chosen": -96.51866149902344, "logps/rejected": -145.82781982421875, "loss": 0.1301, "rewards/accuracies": 1.0, "rewards/chosen": -1.7452592849731445, "rewards/margins": 5.979598522186279, "rewards/rejected": -7.724858283996582, "step": 1671 }, { "epoch": 2.88, "learning_rate": 3.779218019549511e-07, "logits/chosen": -1.912020206451416, "logits/rejected": -2.2531251907348633, "logps/chosen": -86.2533187866211, "logps/rejected": -186.43695068359375, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -0.705219030380249, "rewards/margins": 9.24772834777832, "rewards/rejected": -9.952947616577148, "step": 1672 }, { "epoch": 2.88, "learning_rate": 3.778155546111347e-07, "logits/chosen": -2.12265682220459, "logits/rejected": -2.167269229888916, "logps/chosen": -111.69296264648438, "logps/rejected": -130.30905151367188, "loss": 0.1389, "rewards/accuracies": 0.75, "rewards/chosen": -3.2347798347473145, "rewards/margins": 2.949388027191162, "rewards/rejected": -6.184167861938477, "step": 1673 }, { "epoch": 2.88, "learning_rate": 3.777093072673183e-07, "logits/chosen": -2.2710750102996826, "logits/rejected": -2.4716227054595947, "logps/chosen": -120.9001693725586, "logps/rejected": -176.27188110351562, "loss": 0.1066, "rewards/accuracies": 1.0, "rewards/chosen": -2.1411800384521484, "rewards/margins": 5.791276454925537, "rewards/rejected": -7.9324564933776855, "step": 1674 }, { "epoch": 2.88, "learning_rate": 3.776030599235019e-07, "logits/chosen": -2.056490898132324, "logits/rejected": -2.130176067352295, "logps/chosen": -111.20287322998047, "logps/rejected": -198.28656005859375, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -2.522381544113159, "rewards/margins": 8.587221145629883, "rewards/rejected": -11.109601974487305, "step": 1675 }, { "epoch": 2.88, "learning_rate": 3.774968125796855e-07, "logits/chosen": -2.283712863922119, "logits/rejected": -2.008615016937256, "logps/chosen": -100.82453155517578, "logps/rejected": -146.84561157226562, "loss": 0.2422, "rewards/accuracies": 1.0, "rewards/chosen": -1.1794183254241943, "rewards/margins": 5.0817108154296875, "rewards/rejected": -6.261128902435303, "step": 1676 }, { "epoch": 2.89, "learning_rate": 3.773905652358691e-07, "logits/chosen": -2.113023281097412, "logits/rejected": -1.8621611595153809, "logps/chosen": -141.2206573486328, "logps/rejected": -142.67857360839844, "loss": 0.0848, "rewards/accuracies": 1.0, "rewards/chosen": -3.5413784980773926, "rewards/margins": 3.629115343093872, "rewards/rejected": -7.170494079589844, "step": 1677 }, { "epoch": 2.89, "learning_rate": 3.772843178920527e-07, "logits/chosen": -2.407759428024292, "logits/rejected": -1.863346815109253, "logps/chosen": -140.58209228515625, "logps/rejected": -169.17047119140625, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -5.052879810333252, "rewards/margins": 5.239941596984863, "rewards/rejected": -10.292821884155273, "step": 1678 }, { "epoch": 2.89, "learning_rate": 3.771780705482363e-07, "logits/chosen": -2.2897305488586426, "logits/rejected": -1.9526772499084473, "logps/chosen": -120.09523010253906, "logps/rejected": -124.71073913574219, "loss": 0.1822, "rewards/accuracies": 0.75, "rewards/chosen": -2.196617364883423, "rewards/margins": 2.8257830142974854, "rewards/rejected": -5.022400379180908, "step": 1679 }, { "epoch": 2.89, "learning_rate": 3.770718232044199e-07, "logits/chosen": -2.4684829711914062, "logits/rejected": -1.9151809215545654, "logps/chosen": -123.92347717285156, "logps/rejected": -165.34214782714844, "loss": 0.1662, "rewards/accuracies": 1.0, "rewards/chosen": -1.919182300567627, "rewards/margins": 6.093465805053711, "rewards/rejected": -8.01264762878418, "step": 1680 }, { "epoch": 2.89, "learning_rate": 3.769655758606035e-07, "logits/chosen": -2.2192790508270264, "logits/rejected": -2.1573948860168457, "logps/chosen": -106.75553894042969, "logps/rejected": -177.32186889648438, "loss": 0.0737, "rewards/accuracies": 1.0, "rewards/chosen": -1.4540634155273438, "rewards/margins": 6.8663482666015625, "rewards/rejected": -8.320411682128906, "step": 1681 }, { "epoch": 2.9, "learning_rate": 3.768593285167871e-07, "logits/chosen": -2.1068873405456543, "logits/rejected": -2.189181089401245, "logps/chosen": -110.07068634033203, "logps/rejected": -204.82150268554688, "loss": 0.1606, "rewards/accuracies": 1.0, "rewards/chosen": -2.841170072555542, "rewards/margins": 6.538886547088623, "rewards/rejected": -9.380056381225586, "step": 1682 }, { "epoch": 2.9, "learning_rate": 3.767530811729707e-07, "logits/chosen": -2.3032689094543457, "logits/rejected": -2.403656005859375, "logps/chosen": -116.24559020996094, "logps/rejected": -160.3591766357422, "loss": 0.0714, "rewards/accuracies": 1.0, "rewards/chosen": -2.74072003364563, "rewards/margins": 3.310347080230713, "rewards/rejected": -6.051067352294922, "step": 1683 }, { "epoch": 2.9, "learning_rate": 3.7664683382915423e-07, "logits/chosen": -2.2912940979003906, "logits/rejected": -2.3596343994140625, "logps/chosen": -110.94722747802734, "logps/rejected": -164.50698852539062, "loss": 0.2396, "rewards/accuracies": 0.75, "rewards/chosen": -1.7033677101135254, "rewards/margins": 4.459306716918945, "rewards/rejected": -6.162674427032471, "step": 1684 }, { "epoch": 2.9, "learning_rate": 3.765405864853379e-07, "logits/chosen": -2.185884714126587, "logits/rejected": -2.4375617504119873, "logps/chosen": -98.71707153320312, "logps/rejected": -169.45875549316406, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": -1.3419886827468872, "rewards/margins": 6.872677803039551, "rewards/rejected": -8.214666366577148, "step": 1685 }, { "epoch": 2.9, "learning_rate": 3.764343391415215e-07, "logits/chosen": -2.274728775024414, "logits/rejected": -2.1464462280273438, "logps/chosen": -155.45616149902344, "logps/rejected": -178.62789916992188, "loss": 0.2766, "rewards/accuracies": 1.0, "rewards/chosen": -4.078496932983398, "rewards/margins": 4.242104530334473, "rewards/rejected": -8.320602416992188, "step": 1686 }, { "epoch": 2.9, "learning_rate": 3.76328091797705e-07, "logits/chosen": -2.252230644226074, "logits/rejected": -2.182227849960327, "logps/chosen": -134.8097686767578, "logps/rejected": -163.6298828125, "loss": 0.1415, "rewards/accuracies": 1.0, "rewards/chosen": -4.142014980316162, "rewards/margins": 3.9145560264587402, "rewards/rejected": -8.056571006774902, "step": 1687 }, { "epoch": 2.91, "learning_rate": 3.762218444538887e-07, "logits/chosen": -2.0871284008026123, "logits/rejected": -2.3878655433654785, "logps/chosen": -108.95320129394531, "logps/rejected": -162.97006225585938, "loss": 0.1071, "rewards/accuracies": 1.0, "rewards/chosen": -2.6855032444000244, "rewards/margins": 3.9585328102111816, "rewards/rejected": -6.644036293029785, "step": 1688 }, { "epoch": 2.91, "learning_rate": 3.761155971100722e-07, "logits/chosen": -2.1915714740753174, "logits/rejected": -2.1817069053649902, "logps/chosen": -133.0728302001953, "logps/rejected": -156.70538330078125, "loss": 0.1397, "rewards/accuracies": 1.0, "rewards/chosen": -3.3513007164001465, "rewards/margins": 2.7664594650268555, "rewards/rejected": -6.117760181427002, "step": 1689 }, { "epoch": 2.91, "learning_rate": 3.760093497662558e-07, "logits/chosen": -2.432325839996338, "logits/rejected": -2.0849757194519043, "logps/chosen": -83.4472885131836, "logps/rejected": -146.02716064453125, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -0.6179265975952148, "rewards/margins": 6.337882995605469, "rewards/rejected": -6.955809593200684, "step": 1690 }, { "epoch": 2.91, "learning_rate": 3.759031024224394e-07, "logits/chosen": -2.1533844470977783, "logits/rejected": -2.2356319427490234, "logps/chosen": -122.278076171875, "logps/rejected": -142.6579132080078, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": -2.7451086044311523, "rewards/margins": 2.997408390045166, "rewards/rejected": -5.742516994476318, "step": 1691 }, { "epoch": 2.91, "learning_rate": 3.75796855078623e-07, "logits/chosen": -2.027418613433838, "logits/rejected": -2.260129690170288, "logps/chosen": -123.3725814819336, "logps/rejected": -191.9320068359375, "loss": 0.2349, "rewards/accuracies": 1.0, "rewards/chosen": -2.7889015674591064, "rewards/margins": 5.467875957489014, "rewards/rejected": -8.256776809692383, "step": 1692 }, { "epoch": 2.91, "learning_rate": 3.756906077348066e-07, "logits/chosen": -2.41261625289917, "logits/rejected": -2.2030866146087646, "logps/chosen": -123.34611511230469, "logps/rejected": -159.74375915527344, "loss": 0.0811, "rewards/accuracies": 1.0, "rewards/chosen": -1.7918527126312256, "rewards/margins": 5.747088432312012, "rewards/rejected": -7.538941383361816, "step": 1693 }, { "epoch": 2.92, "learning_rate": 3.755843603909902e-07, "logits/chosen": -2.159923553466797, "logits/rejected": -2.300509214401245, "logps/chosen": -133.40066528320312, "logps/rejected": -169.30316162109375, "loss": 0.2376, "rewards/accuracies": 0.75, "rewards/chosen": -4.23189115524292, "rewards/margins": 3.9793601036071777, "rewards/rejected": -8.211251258850098, "step": 1694 }, { "epoch": 2.92, "learning_rate": 3.754781130471738e-07, "logits/chosen": -2.389975070953369, "logits/rejected": -2.1430840492248535, "logps/chosen": -98.2174072265625, "logps/rejected": -167.4593963623047, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": -1.4290052652359009, "rewards/margins": 7.346184253692627, "rewards/rejected": -8.775190353393555, "step": 1695 }, { "epoch": 2.92, "learning_rate": 3.7537186570335736e-07, "logits/chosen": -2.2414817810058594, "logits/rejected": -2.5594429969787598, "logps/chosen": -121.95105743408203, "logps/rejected": -188.28260803222656, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": -2.556939125061035, "rewards/margins": 5.2751312255859375, "rewards/rejected": -7.832070350646973, "step": 1696 }, { "epoch": 2.92, "learning_rate": 3.75265618359541e-07, "logits/chosen": -2.1324589252471924, "logits/rejected": -2.4653749465942383, "logps/chosen": -87.39151763916016, "logps/rejected": -147.84286499023438, "loss": 0.2536, "rewards/accuracies": 1.0, "rewards/chosen": -1.9227190017700195, "rewards/margins": 5.4362077713012695, "rewards/rejected": -7.358926773071289, "step": 1697 }, { "epoch": 2.92, "learning_rate": 3.751593710157246e-07, "logits/chosen": -1.6485124826431274, "logits/rejected": -2.3207812309265137, "logps/chosen": -107.16334533691406, "logps/rejected": -176.44322204589844, "loss": 0.1206, "rewards/accuracies": 0.75, "rewards/chosen": -2.393585681915283, "rewards/margins": 5.671923637390137, "rewards/rejected": -8.065509796142578, "step": 1698 }, { "epoch": 2.92, "learning_rate": 3.7505312367190816e-07, "logits/chosen": -2.060399293899536, "logits/rejected": -2.2714569568634033, "logps/chosen": -111.43627166748047, "logps/rejected": -203.0938720703125, "loss": 0.2886, "rewards/accuracies": 1.0, "rewards/chosen": -1.9590895175933838, "rewards/margins": 8.289774894714355, "rewards/rejected": -10.248865127563477, "step": 1699 }, { "epoch": 2.93, "learning_rate": 3.749468763280918e-07, "logits/chosen": -2.1128931045532227, "logits/rejected": -2.0160460472106934, "logps/chosen": -103.80135345458984, "logps/rejected": -179.22198486328125, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -2.605449914932251, "rewards/margins": 6.9487624168396, "rewards/rejected": -9.55421257019043, "step": 1700 }, { "epoch": 2.93, "learning_rate": 3.7484062898427535e-07, "logits/chosen": -2.106781005859375, "logits/rejected": -2.1723599433898926, "logps/chosen": -92.0926742553711, "logps/rejected": -179.4837646484375, "loss": 0.15, "rewards/accuracies": 1.0, "rewards/chosen": -1.511044979095459, "rewards/margins": 8.929458618164062, "rewards/rejected": -10.440505027770996, "step": 1701 }, { "epoch": 2.93, "learning_rate": 3.7473438164045895e-07, "logits/chosen": -2.1800713539123535, "logits/rejected": -2.401139736175537, "logps/chosen": -123.20491027832031, "logps/rejected": -202.10140991210938, "loss": 0.107, "rewards/accuracies": 1.0, "rewards/chosen": -2.043053150177002, "rewards/margins": 6.539370536804199, "rewards/rejected": -8.582423210144043, "step": 1702 }, { "epoch": 2.93, "learning_rate": 3.746281342966426e-07, "logits/chosen": -2.1410887241363525, "logits/rejected": -2.359088897705078, "logps/chosen": -94.15215301513672, "logps/rejected": -175.03506469726562, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": -1.1471043825149536, "rewards/margins": 6.777036666870117, "rewards/rejected": -7.924140453338623, "step": 1703 }, { "epoch": 2.93, "learning_rate": 3.7452188695282615e-07, "logits/chosen": -2.4578287601470947, "logits/rejected": -2.269536256790161, "logps/chosen": -105.78910064697266, "logps/rejected": -123.45413208007812, "loss": 0.1069, "rewards/accuracies": 1.0, "rewards/chosen": -0.7155383825302124, "rewards/margins": 3.560238838195801, "rewards/rejected": -4.275777339935303, "step": 1704 }, { "epoch": 2.93, "learning_rate": 3.7441563960900975e-07, "logits/chosen": -2.279287815093994, "logits/rejected": -1.8892978429794312, "logps/chosen": -119.2442626953125, "logps/rejected": -198.37835693359375, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -2.4407029151916504, "rewards/margins": 8.501649856567383, "rewards/rejected": -10.942353248596191, "step": 1705 }, { "epoch": 2.94, "learning_rate": 3.7430939226519335e-07, "logits/chosen": -2.407231330871582, "logits/rejected": -2.047866106033325, "logps/chosen": -85.91746520996094, "logps/rejected": -152.37054443359375, "loss": 0.1088, "rewards/accuracies": 1.0, "rewards/chosen": 0.19608308374881744, "rewards/margins": 7.648414611816406, "rewards/rejected": -7.45233154296875, "step": 1706 }, { "epoch": 2.94, "learning_rate": 3.7420314492137695e-07, "logits/chosen": -2.358555793762207, "logits/rejected": -2.3541810512542725, "logps/chosen": -86.28952026367188, "logps/rejected": -138.09375, "loss": 0.1241, "rewards/accuracies": 0.75, "rewards/chosen": -1.9819111824035645, "rewards/margins": 4.3526530265808105, "rewards/rejected": -6.334564208984375, "step": 1707 }, { "epoch": 2.94, "learning_rate": 3.7409689757756055e-07, "logits/chosen": -2.272895097732544, "logits/rejected": -2.070998430252075, "logps/chosen": -120.91795349121094, "logps/rejected": -173.6326141357422, "loss": 0.1006, "rewards/accuracies": 1.0, "rewards/chosen": -1.0536788702011108, "rewards/margins": 7.4539361000061035, "rewards/rejected": -8.507615089416504, "step": 1708 }, { "epoch": 2.94, "learning_rate": 3.7399065023374415e-07, "logits/chosen": -1.7574703693389893, "logits/rejected": -2.4256439208984375, "logps/chosen": -102.39332580566406, "logps/rejected": -198.1090545654297, "loss": 0.1827, "rewards/accuracies": 1.0, "rewards/chosen": -0.9027559161186218, "rewards/margins": 8.042137145996094, "rewards/rejected": -8.944893836975098, "step": 1709 }, { "epoch": 2.94, "learning_rate": 3.7388440288992775e-07, "logits/chosen": -2.4082276821136475, "logits/rejected": -2.139357328414917, "logps/chosen": -114.18783569335938, "logps/rejected": -158.21255493164062, "loss": 0.111, "rewards/accuracies": 1.0, "rewards/chosen": -0.9136890172958374, "rewards/margins": 5.742562294006348, "rewards/rejected": -6.656251907348633, "step": 1710 }, { "epoch": 2.94, "learning_rate": 3.737781555461113e-07, "logits/chosen": -2.274669885635376, "logits/rejected": -2.065925121307373, "logps/chosen": -106.49280548095703, "logps/rejected": -131.93280029296875, "loss": 0.1667, "rewards/accuracies": 1.0, "rewards/chosen": -0.7979980707168579, "rewards/margins": 4.2200822830200195, "rewards/rejected": -5.018080234527588, "step": 1711 }, { "epoch": 2.95, "learning_rate": 3.7367190820229494e-07, "logits/chosen": -2.10768985748291, "logits/rejected": -2.4985580444335938, "logps/chosen": -99.0407485961914, "logps/rejected": -169.84051513671875, "loss": 0.1585, "rewards/accuracies": 1.0, "rewards/chosen": -1.1722493171691895, "rewards/margins": 6.900046348571777, "rewards/rejected": -8.072296142578125, "step": 1712 }, { "epoch": 2.95, "learning_rate": 3.735656608584785e-07, "logits/chosen": -2.268091917037964, "logits/rejected": -2.0866286754608154, "logps/chosen": -98.22682189941406, "logps/rejected": -165.09165954589844, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -1.10002863407135, "rewards/margins": 6.852530002593994, "rewards/rejected": -7.952558517456055, "step": 1713 }, { "epoch": 2.95, "learning_rate": 3.734594135146621e-07, "logits/chosen": -2.1052732467651367, "logits/rejected": -2.214167594909668, "logps/chosen": -121.237060546875, "logps/rejected": -172.24044799804688, "loss": 0.2164, "rewards/accuracies": 0.75, "rewards/chosen": -3.2309038639068604, "rewards/margins": 4.0170817375183105, "rewards/rejected": -7.247985363006592, "step": 1714 }, { "epoch": 2.95, "learning_rate": 3.7335316617084574e-07, "logits/chosen": -2.20866060256958, "logits/rejected": -2.399965286254883, "logps/chosen": -96.61540222167969, "logps/rejected": -189.21316528320312, "loss": 0.2857, "rewards/accuracies": 1.0, "rewards/chosen": -1.480474591255188, "rewards/margins": 7.128800868988037, "rewards/rejected": -8.609275817871094, "step": 1715 }, { "epoch": 2.95, "learning_rate": 3.732469188270293e-07, "logits/chosen": -2.4329702854156494, "logits/rejected": -2.3228368759155273, "logps/chosen": -116.79151916503906, "logps/rejected": -161.713134765625, "loss": 0.0724, "rewards/accuracies": 1.0, "rewards/chosen": -2.6166179180145264, "rewards/margins": 5.493486404418945, "rewards/rejected": -8.11010456085205, "step": 1716 }, { "epoch": 2.96, "learning_rate": 3.731406714832129e-07, "logits/chosen": -1.9086978435516357, "logits/rejected": -2.3713107109069824, "logps/chosen": -110.88526916503906, "logps/rejected": -168.50233459472656, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/chosen": -4.581708908081055, "rewards/margins": 4.302271842956543, "rewards/rejected": -8.883980751037598, "step": 1717 }, { "epoch": 2.96, "learning_rate": 3.730344241393965e-07, "logits/chosen": -2.2176272869110107, "logits/rejected": -2.0951340198516846, "logps/chosen": -110.43429565429688, "logps/rejected": -162.6502685546875, "loss": 0.1696, "rewards/accuracies": 1.0, "rewards/chosen": -3.6727757453918457, "rewards/margins": 5.655818462371826, "rewards/rejected": -9.328594207763672, "step": 1718 }, { "epoch": 2.96, "learning_rate": 3.729281767955801e-07, "logits/chosen": -2.369293212890625, "logits/rejected": -2.4587085247039795, "logps/chosen": -127.93727111816406, "logps/rejected": -209.0121612548828, "loss": 0.1308, "rewards/accuracies": 1.0, "rewards/chosen": -2.879263162612915, "rewards/margins": 6.493432998657227, "rewards/rejected": -9.372696876525879, "step": 1719 }, { "epoch": 2.96, "learning_rate": 3.728219294517637e-07, "logits/chosen": -2.3362913131713867, "logits/rejected": -2.255917549133301, "logps/chosen": -122.16848754882812, "logps/rejected": -159.64065551757812, "loss": 0.0968, "rewards/accuracies": 1.0, "rewards/chosen": -2.9738969802856445, "rewards/margins": 4.141639709472656, "rewards/rejected": -7.115536212921143, "step": 1720 }, { "epoch": 2.96, "learning_rate": 3.727156821079473e-07, "logits/chosen": -2.3018250465393066, "logits/rejected": -2.218000888824463, "logps/chosen": -107.7392578125, "logps/rejected": -174.41036987304688, "loss": 0.1207, "rewards/accuracies": 1.0, "rewards/chosen": -2.123112440109253, "rewards/margins": 5.429757118225098, "rewards/rejected": -7.55286979675293, "step": 1721 }, { "epoch": 2.96, "learning_rate": 3.726094347641309e-07, "logits/chosen": -2.1792232990264893, "logits/rejected": -2.169680118560791, "logps/chosen": -99.20391845703125, "logps/rejected": -147.289794921875, "loss": 0.1702, "rewards/accuracies": 1.0, "rewards/chosen": -2.593569040298462, "rewards/margins": 4.48139762878418, "rewards/rejected": -7.074966907501221, "step": 1722 }, { "epoch": 2.97, "learning_rate": 3.725031874203145e-07, "logits/chosen": -2.024252414703369, "logits/rejected": -2.084341526031494, "logps/chosen": -98.67278289794922, "logps/rejected": -163.17677307128906, "loss": 0.1051, "rewards/accuracies": 0.75, "rewards/chosen": -1.7270902395248413, "rewards/margins": 5.9356255531311035, "rewards/rejected": -7.662715911865234, "step": 1723 }, { "epoch": 2.97, "learning_rate": 3.723969400764981e-07, "logits/chosen": -2.2326323986053467, "logits/rejected": -2.2852954864501953, "logps/chosen": -109.31592559814453, "logps/rejected": -149.89947509765625, "loss": 0.1154, "rewards/accuracies": 1.0, "rewards/chosen": -1.9636867046356201, "rewards/margins": 4.50919246673584, "rewards/rejected": -6.472878932952881, "step": 1724 }, { "epoch": 2.97, "learning_rate": 3.722906927326817e-07, "logits/chosen": -2.195859432220459, "logits/rejected": -2.0763351917266846, "logps/chosen": -82.02460479736328, "logps/rejected": -141.01483154296875, "loss": 0.0743, "rewards/accuracies": 1.0, "rewards/chosen": -1.6157513856887817, "rewards/margins": 5.343620300292969, "rewards/rejected": -6.959371566772461, "step": 1725 }, { "epoch": 2.97, "learning_rate": 3.721844453888653e-07, "logits/chosen": -2.4931461811065674, "logits/rejected": -2.1636691093444824, "logps/chosen": -130.66978454589844, "logps/rejected": -146.96286010742188, "loss": 0.2211, "rewards/accuracies": 0.75, "rewards/chosen": -2.2787413597106934, "rewards/margins": 2.965485095977783, "rewards/rejected": -5.244226455688477, "step": 1726 }, { "epoch": 2.97, "learning_rate": 3.7207819804504887e-07, "logits/chosen": -2.2297608852386475, "logits/rejected": -2.3244738578796387, "logps/chosen": -106.67168426513672, "logps/rejected": -149.0705108642578, "loss": 0.1482, "rewards/accuracies": 1.0, "rewards/chosen": -0.25289666652679443, "rewards/margins": 4.417049884796143, "rewards/rejected": -4.669946670532227, "step": 1727 }, { "epoch": 2.97, "learning_rate": 3.719719507012324e-07, "logits/chosen": -2.334225654602051, "logits/rejected": -2.0619821548461914, "logps/chosen": -95.01752471923828, "logps/rejected": -137.1404571533203, "loss": 0.1155, "rewards/accuracies": 1.0, "rewards/chosen": -1.4438987970352173, "rewards/margins": 4.7349853515625, "rewards/rejected": -6.178884029388428, "step": 1728 }, { "epoch": 2.98, "learning_rate": 3.7186570335741607e-07, "logits/chosen": -1.446933388710022, "logits/rejected": -2.394792318344116, "logps/chosen": -81.8853759765625, "logps/rejected": -206.59857177734375, "loss": 0.0597, "rewards/accuracies": 1.0, "rewards/chosen": -1.0555330514907837, "rewards/margins": 7.542784214019775, "rewards/rejected": -8.598318099975586, "step": 1729 }, { "epoch": 2.98, "learning_rate": 3.7175945601359967e-07, "logits/chosen": -2.0365920066833496, "logits/rejected": -2.1840250492095947, "logps/chosen": -101.19447326660156, "logps/rejected": -124.41218566894531, "loss": 0.0625, "rewards/accuracies": 0.75, "rewards/chosen": -1.5431561470031738, "rewards/margins": 2.685441017150879, "rewards/rejected": -4.228597164154053, "step": 1730 }, { "epoch": 2.98, "learning_rate": 3.716532086697832e-07, "logits/chosen": -2.449047327041626, "logits/rejected": -2.08667254447937, "logps/chosen": -119.73959350585938, "logps/rejected": -142.34312438964844, "loss": 0.1896, "rewards/accuracies": 1.0, "rewards/chosen": -2.6281943321228027, "rewards/margins": 4.064145088195801, "rewards/rejected": -6.692338943481445, "step": 1731 }, { "epoch": 2.98, "learning_rate": 3.7154696132596687e-07, "logits/chosen": -2.124271869659424, "logits/rejected": -1.819561243057251, "logps/chosen": -64.43011474609375, "logps/rejected": -121.3703842163086, "loss": 0.2575, "rewards/accuracies": 1.0, "rewards/chosen": 0.5788697004318237, "rewards/margins": 6.8596649169921875, "rewards/rejected": -6.280795097351074, "step": 1732 }, { "epoch": 2.98, "learning_rate": 3.714407139821504e-07, "logits/chosen": -2.0232017040252686, "logits/rejected": -2.3749287128448486, "logps/chosen": -131.73654174804688, "logps/rejected": -191.91470336914062, "loss": 0.1159, "rewards/accuracies": 1.0, "rewards/chosen": -2.8305211067199707, "rewards/margins": 3.886713981628418, "rewards/rejected": -6.7172346115112305, "step": 1733 }, { "epoch": 2.98, "learning_rate": 3.71334466638334e-07, "logits/chosen": -1.9580622911453247, "logits/rejected": -2.1718740463256836, "logps/chosen": -98.26107788085938, "logps/rejected": -169.3089141845703, "loss": 0.1021, "rewards/accuracies": 1.0, "rewards/chosen": -1.769742488861084, "rewards/margins": 5.435945510864258, "rewards/rejected": -7.205687999725342, "step": 1734 }, { "epoch": 2.99, "learning_rate": 3.7122821929451766e-07, "logits/chosen": -2.4203248023986816, "logits/rejected": -2.233381509780884, "logps/chosen": -87.79469299316406, "logps/rejected": -111.12445068359375, "loss": 0.0963, "rewards/accuracies": 0.75, "rewards/chosen": -1.0873273611068726, "rewards/margins": 2.7883808612823486, "rewards/rejected": -3.8757081031799316, "step": 1735 }, { "epoch": 2.99, "learning_rate": 3.711219719507012e-07, "logits/chosen": -2.1667261123657227, "logits/rejected": -2.017200469970703, "logps/chosen": -84.1624755859375, "logps/rejected": -143.97702026367188, "loss": 0.087, "rewards/accuracies": 0.75, "rewards/chosen": -1.8044604063034058, "rewards/margins": 6.503853797912598, "rewards/rejected": -8.30831527709961, "step": 1736 }, { "epoch": 2.99, "learning_rate": 3.710157246068848e-07, "logits/chosen": -2.4186148643493652, "logits/rejected": -2.2246129512786865, "logps/chosen": -99.65194702148438, "logps/rejected": -142.95774841308594, "loss": 0.1153, "rewards/accuracies": 1.0, "rewards/chosen": -1.479273796081543, "rewards/margins": 4.864513397216797, "rewards/rejected": -6.34378719329834, "step": 1737 }, { "epoch": 2.99, "learning_rate": 3.709094772630684e-07, "logits/chosen": -1.9961422681808472, "logits/rejected": -2.4840266704559326, "logps/chosen": -105.99665832519531, "logps/rejected": -172.34423828125, "loss": 0.1806, "rewards/accuracies": 0.75, "rewards/chosen": -2.797544240951538, "rewards/margins": 4.912667274475098, "rewards/rejected": -7.710211277008057, "step": 1738 }, { "epoch": 2.99, "learning_rate": 3.70803229919252e-07, "logits/chosen": -2.191844940185547, "logits/rejected": -2.216020107269287, "logps/chosen": -128.8491668701172, "logps/rejected": -194.63232421875, "loss": 0.1419, "rewards/accuracies": 1.0, "rewards/chosen": -2.3678534030914307, "rewards/margins": 6.6034088134765625, "rewards/rejected": -8.971261978149414, "step": 1739 }, { "epoch": 2.99, "learning_rate": 3.7069698257543555e-07, "logits/chosen": -1.9596648216247559, "logits/rejected": -2.3216350078582764, "logps/chosen": -106.52922058105469, "logps/rejected": -188.40476989746094, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": -2.7000718116760254, "rewards/margins": 7.07007360458374, "rewards/rejected": -9.770145416259766, "step": 1740 }, { "epoch": 3.0, "learning_rate": 3.705907352316192e-07, "logits/chosen": -2.3409388065338135, "logits/rejected": -1.8686379194259644, "logps/chosen": -124.47489166259766, "logps/rejected": -182.38816833496094, "loss": 0.0609, "rewards/accuracies": 1.0, "rewards/chosen": -1.2238415479660034, "rewards/margins": 7.067203044891357, "rewards/rejected": -8.291045188903809, "step": 1741 }, { "epoch": 3.0, "learning_rate": 3.704844878878028e-07, "logits/chosen": -2.265718698501587, "logits/rejected": -2.1502833366394043, "logps/chosen": -100.23764038085938, "logps/rejected": -158.95620727539062, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": -1.2281079292297363, "rewards/margins": 7.2073974609375, "rewards/rejected": -8.435504913330078, "step": 1742 }, { "epoch": 3.0, "learning_rate": 3.7037824054398635e-07, "logits/chosen": -2.2928872108459473, "logits/rejected": -2.2984542846679688, "logps/chosen": -107.18451690673828, "logps/rejected": -168.5599365234375, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": -2.284548759460449, "rewards/margins": 4.5007452964782715, "rewards/rejected": -6.7852935791015625, "step": 1743 }, { "epoch": 3.0, "learning_rate": 3.7027199320017e-07, "logits/chosen": -2.2008864879608154, "logits/rejected": -2.090528964996338, "logps/chosen": -111.68143463134766, "logps/rejected": -189.81837463378906, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -2.770772695541382, "rewards/margins": 6.044206619262695, "rewards/rejected": -8.814979553222656, "step": 1744 }, { "epoch": 3.0, "learning_rate": 3.7016574585635355e-07, "logits/chosen": -2.296459674835205, "logits/rejected": -1.9081289768218994, "logps/chosen": -98.04021453857422, "logps/rejected": -143.86219787597656, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.0644158124923706, "rewards/margins": 5.8936567306518555, "rewards/rejected": -6.958072662353516, "step": 1745 }, { "epoch": 3.01, "learning_rate": 3.7005949851253715e-07, "logits/chosen": -2.166405200958252, "logits/rejected": -2.4249517917633057, "logps/chosen": -99.03565979003906, "logps/rejected": -142.3026580810547, "loss": 0.0494, "rewards/accuracies": 1.0, "rewards/chosen": -1.901679277420044, "rewards/margins": 3.73681640625, "rewards/rejected": -5.638495445251465, "step": 1746 }, { "epoch": 3.01, "learning_rate": 3.699532511687208e-07, "logits/chosen": -2.3196675777435303, "logits/rejected": -2.2685017585754395, "logps/chosen": -112.46778869628906, "logps/rejected": -168.5490264892578, "loss": 0.0856, "rewards/accuracies": 1.0, "rewards/chosen": -1.1666074991226196, "rewards/margins": 6.493888854980469, "rewards/rejected": -7.660496711730957, "step": 1747 }, { "epoch": 3.01, "learning_rate": 3.6984700382490434e-07, "logits/chosen": -2.3676042556762695, "logits/rejected": -1.9757022857666016, "logps/chosen": -98.28497314453125, "logps/rejected": -146.93612670898438, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -1.6025278568267822, "rewards/margins": 5.194392204284668, "rewards/rejected": -6.796920299530029, "step": 1748 }, { "epoch": 3.01, "learning_rate": 3.6974075648108794e-07, "logits/chosen": -2.063110828399658, "logits/rejected": -2.5407121181488037, "logps/chosen": -108.12355041503906, "logps/rejected": -168.2745819091797, "loss": 0.0729, "rewards/accuracies": 1.0, "rewards/chosen": -1.160874843597412, "rewards/margins": 5.892477512359619, "rewards/rejected": -7.053352355957031, "step": 1749 }, { "epoch": 3.01, "learning_rate": 3.6963450913727154e-07, "logits/chosen": -2.1597681045532227, "logits/rejected": -2.191188335418701, "logps/chosen": -114.99424743652344, "logps/rejected": -163.52529907226562, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": -1.4106168746948242, "rewards/margins": 5.739293575286865, "rewards/rejected": -7.1499104499816895, "step": 1750 }, { "epoch": 3.01, "learning_rate": 3.6952826179345514e-07, "logits/chosen": -2.1914288997650146, "logits/rejected": -2.2020742893218994, "logps/chosen": -110.807373046875, "logps/rejected": -182.39483642578125, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -2.036407947540283, "rewards/margins": 6.34421443939209, "rewards/rejected": -8.380622863769531, "step": 1751 }, { "epoch": 3.02, "learning_rate": 3.6942201444963874e-07, "logits/chosen": -2.1382241249084473, "logits/rejected": -2.140763759613037, "logps/chosen": -101.8101806640625, "logps/rejected": -171.3423614501953, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -3.21134614944458, "rewards/margins": 7.054388999938965, "rewards/rejected": -10.265734672546387, "step": 1752 }, { "epoch": 3.02, "learning_rate": 3.6931576710582234e-07, "logits/chosen": -2.1042864322662354, "logits/rejected": -2.216291666030884, "logps/chosen": -84.36602783203125, "logps/rejected": -135.8332061767578, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": -1.4372987747192383, "rewards/margins": 4.896080017089844, "rewards/rejected": -6.333378791809082, "step": 1753 }, { "epoch": 3.02, "learning_rate": 3.6920951976200594e-07, "logits/chosen": -2.342287540435791, "logits/rejected": -2.3746209144592285, "logps/chosen": -124.28240966796875, "logps/rejected": -189.03382873535156, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -1.9902136325836182, "rewards/margins": 7.113310813903809, "rewards/rejected": -9.103524208068848, "step": 1754 }, { "epoch": 3.02, "learning_rate": 3.691032724181895e-07, "logits/chosen": -2.114164352416992, "logits/rejected": -2.2315521240234375, "logps/chosen": -102.83995056152344, "logps/rejected": -151.77389526367188, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": -0.4907752573490143, "rewards/margins": 5.7667436599731445, "rewards/rejected": -6.257519245147705, "step": 1755 }, { "epoch": 3.02, "learning_rate": 3.6899702507437314e-07, "logits/chosen": -2.1514780521392822, "logits/rejected": -2.3963124752044678, "logps/chosen": -70.10020446777344, "logps/rejected": -155.1802215576172, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 0.7261434197425842, "rewards/margins": 6.636205673217773, "rewards/rejected": -5.910062789916992, "step": 1756 }, { "epoch": 3.02, "learning_rate": 3.6889077773055673e-07, "logits/chosen": -2.4120888710021973, "logits/rejected": -1.9814146757125854, "logps/chosen": -84.20438385009766, "logps/rejected": -127.61613464355469, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -0.47773075103759766, "rewards/margins": 5.099287986755371, "rewards/rejected": -5.577018737792969, "step": 1757 }, { "epoch": 3.03, "learning_rate": 3.687845303867403e-07, "logits/chosen": -2.365727424621582, "logits/rejected": -2.1108686923980713, "logps/chosen": -143.96957397460938, "logps/rejected": -213.2208251953125, "loss": 0.0744, "rewards/accuracies": 1.0, "rewards/chosen": -3.924412488937378, "rewards/margins": 8.198314666748047, "rewards/rejected": -12.12272834777832, "step": 1758 }, { "epoch": 3.03, "learning_rate": 3.6867828304292393e-07, "logits/chosen": -2.422039270401001, "logits/rejected": -2.243394374847412, "logps/chosen": -91.66146087646484, "logps/rejected": -118.35671997070312, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": -0.14718711376190186, "rewards/margins": 3.4504799842834473, "rewards/rejected": -3.5976672172546387, "step": 1759 }, { "epoch": 3.03, "learning_rate": 3.685720356991075e-07, "logits/chosen": -2.3705050945281982, "logits/rejected": -2.1543145179748535, "logps/chosen": -82.11296081542969, "logps/rejected": -161.68417358398438, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -0.18519821763038635, "rewards/margins": 7.53875732421875, "rewards/rejected": -7.723956108093262, "step": 1760 }, { "epoch": 3.03, "learning_rate": 3.684657883552911e-07, "logits/chosen": -2.3084824085235596, "logits/rejected": -2.3251640796661377, "logps/chosen": -115.40290832519531, "logps/rejected": -200.0089569091797, "loss": 0.0937, "rewards/accuracies": 1.0, "rewards/chosen": -2.283327102661133, "rewards/margins": 5.802491664886475, "rewards/rejected": -8.085819244384766, "step": 1761 }, { "epoch": 3.03, "learning_rate": 3.6835954101147473e-07, "logits/chosen": -2.1343483924865723, "logits/rejected": -2.3805863857269287, "logps/chosen": -115.19624328613281, "logps/rejected": -167.50596618652344, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.7828643321990967, "rewards/margins": 5.537076950073242, "rewards/rejected": -7.31994104385376, "step": 1762 }, { "epoch": 3.03, "learning_rate": 3.682532936676583e-07, "logits/chosen": -2.1491332054138184, "logits/rejected": -2.1580212116241455, "logps/chosen": -97.92988586425781, "logps/rejected": -154.023681640625, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": -2.19924259185791, "rewards/margins": 4.474766254425049, "rewards/rejected": -6.674009323120117, "step": 1763 }, { "epoch": 3.04, "learning_rate": 3.6814704632384193e-07, "logits/chosen": -2.227728843688965, "logits/rejected": -2.3857102394104004, "logps/chosen": -108.09486389160156, "logps/rejected": -210.25653076171875, "loss": 0.0898, "rewards/accuracies": 1.0, "rewards/chosen": -0.9095484018325806, "rewards/margins": 10.019189834594727, "rewards/rejected": -10.928738594055176, "step": 1764 }, { "epoch": 3.04, "learning_rate": 3.6804079898002547e-07, "logits/chosen": -2.1634838581085205, "logits/rejected": -2.1825602054595947, "logps/chosen": -108.82405090332031, "logps/rejected": -171.1779327392578, "loss": 0.0812, "rewards/accuracies": 1.0, "rewards/chosen": -1.5296217203140259, "rewards/margins": 5.343173027038574, "rewards/rejected": -6.872795104980469, "step": 1765 }, { "epoch": 3.04, "learning_rate": 3.6793455163620907e-07, "logits/chosen": -2.3208160400390625, "logits/rejected": -2.307110548019409, "logps/chosen": -92.55766296386719, "logps/rejected": -126.78656005859375, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -1.522938847541809, "rewards/margins": 3.3434972763061523, "rewards/rejected": -4.866436004638672, "step": 1766 }, { "epoch": 3.04, "learning_rate": 3.678283042923927e-07, "logits/chosen": -1.9489731788635254, "logits/rejected": -2.396578788757324, "logps/chosen": -63.21986770629883, "logps/rejected": -124.7294921875, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -0.20870688557624817, "rewards/margins": 3.919847249984741, "rewards/rejected": -4.128554344177246, "step": 1767 }, { "epoch": 3.04, "learning_rate": 3.6772205694857627e-07, "logits/chosen": -2.202397108078003, "logits/rejected": -2.0998144149780273, "logps/chosen": -109.00517272949219, "logps/rejected": -169.69529724121094, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -3.5314488410949707, "rewards/margins": 7.082172393798828, "rewards/rejected": -10.61362075805664, "step": 1768 }, { "epoch": 3.04, "learning_rate": 3.6761580960475987e-07, "logits/chosen": -2.122360944747925, "logits/rejected": -2.424610137939453, "logps/chosen": -92.65897369384766, "logps/rejected": -145.284423828125, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -1.069298505783081, "rewards/margins": 3.795121669769287, "rewards/rejected": -4.864419937133789, "step": 1769 }, { "epoch": 3.05, "learning_rate": 3.6750956226094347e-07, "logits/chosen": -2.317429542541504, "logits/rejected": -2.218498468399048, "logps/chosen": -95.80503845214844, "logps/rejected": -143.14576721191406, "loss": 0.065, "rewards/accuracies": 0.75, "rewards/chosen": -1.7403560876846313, "rewards/margins": 3.6375961303710938, "rewards/rejected": -5.377952575683594, "step": 1770 }, { "epoch": 3.05, "learning_rate": 3.6740331491712707e-07, "logits/chosen": -2.335808753967285, "logits/rejected": -2.0219669342041016, "logps/chosen": -84.32876586914062, "logps/rejected": -176.82012939453125, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": -0.4020339846611023, "rewards/margins": 9.28962516784668, "rewards/rejected": -9.691658973693848, "step": 1771 }, { "epoch": 3.05, "learning_rate": 3.672970675733106e-07, "logits/chosen": -2.0823895931243896, "logits/rejected": -2.1942875385284424, "logps/chosen": -109.98222351074219, "logps/rejected": -170.8343505859375, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -2.051222085952759, "rewards/margins": 5.633424758911133, "rewards/rejected": -7.6846466064453125, "step": 1772 }, { "epoch": 3.05, "learning_rate": 3.6719082022949426e-07, "logits/chosen": -2.27104115486145, "logits/rejected": -2.259082794189453, "logps/chosen": -131.01393127441406, "logps/rejected": -159.73948669433594, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -2.8451080322265625, "rewards/margins": 4.398632049560547, "rewards/rejected": -7.243740081787109, "step": 1773 }, { "epoch": 3.05, "learning_rate": 3.6708457288567786e-07, "logits/chosen": -2.235244035720825, "logits/rejected": -2.283463954925537, "logps/chosen": -96.92606353759766, "logps/rejected": -183.98971557617188, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -0.7728769779205322, "rewards/margins": 8.56747817993164, "rewards/rejected": -9.340354919433594, "step": 1774 }, { "epoch": 3.06, "learning_rate": 3.669783255418614e-07, "logits/chosen": -1.8384809494018555, "logits/rejected": -2.235051155090332, "logps/chosen": -106.06036376953125, "logps/rejected": -178.67660522460938, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -2.3916053771972656, "rewards/margins": 6.638689041137695, "rewards/rejected": -9.030295372009277, "step": 1775 }, { "epoch": 3.06, "learning_rate": 3.6687207819804506e-07, "logits/chosen": -2.42468523979187, "logits/rejected": -2.24591064453125, "logps/chosen": -95.47999572753906, "logps/rejected": -150.13595581054688, "loss": 0.0626, "rewards/accuracies": 1.0, "rewards/chosen": -1.7624232769012451, "rewards/margins": 6.776871681213379, "rewards/rejected": -8.539295196533203, "step": 1776 }, { "epoch": 3.06, "learning_rate": 3.667658308542286e-07, "logits/chosen": -2.0565967559814453, "logits/rejected": -2.3920843601226807, "logps/chosen": -82.82272338867188, "logps/rejected": -187.04933166503906, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8220612406730652, "rewards/margins": 10.06157112121582, "rewards/rejected": -10.883633613586426, "step": 1777 }, { "epoch": 3.06, "learning_rate": 3.666595835104122e-07, "logits/chosen": -2.2458713054656982, "logits/rejected": -2.1231987476348877, "logps/chosen": -116.86910247802734, "logps/rejected": -190.83168029785156, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -2.6938605308532715, "rewards/margins": 8.045291900634766, "rewards/rejected": -10.739151954650879, "step": 1778 }, { "epoch": 3.06, "learning_rate": 3.6655333616659586e-07, "logits/chosen": -2.128537893295288, "logits/rejected": -1.9805982112884521, "logps/chosen": -76.53585052490234, "logps/rejected": -186.3743438720703, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.04143540561199188, "rewards/margins": 11.13888168334961, "rewards/rejected": -11.180317878723145, "step": 1779 }, { "epoch": 3.06, "learning_rate": 3.664470888227794e-07, "logits/chosen": -2.0897445678710938, "logits/rejected": -2.2598536014556885, "logps/chosen": -110.9106216430664, "logps/rejected": -153.40045166015625, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -2.786151885986328, "rewards/margins": 4.629266738891602, "rewards/rejected": -7.415419101715088, "step": 1780 }, { "epoch": 3.07, "learning_rate": 3.66340841478963e-07, "logits/chosen": -2.2152140140533447, "logits/rejected": -1.8333262205123901, "logps/chosen": -113.34864044189453, "logps/rejected": -194.1183319091797, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -1.625993013381958, "rewards/margins": 10.009014129638672, "rewards/rejected": -11.63500690460205, "step": 1781 }, { "epoch": 3.07, "learning_rate": 3.662345941351466e-07, "logits/chosen": -2.225372314453125, "logits/rejected": -2.4641008377075195, "logps/chosen": -87.6614990234375, "logps/rejected": -172.36795043945312, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.667536973953247, "rewards/margins": 7.132492542266846, "rewards/rejected": -8.800029754638672, "step": 1782 }, { "epoch": 3.07, "learning_rate": 3.661283467913302e-07, "logits/chosen": -2.1000661849975586, "logits/rejected": -2.2343788146972656, "logps/chosen": -112.58729553222656, "logps/rejected": -177.46945190429688, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -1.4798829555511475, "rewards/margins": 6.867803573608398, "rewards/rejected": -8.347685813903809, "step": 1783 }, { "epoch": 3.07, "learning_rate": 3.660220994475138e-07, "logits/chosen": -2.073586940765381, "logits/rejected": -2.36965274810791, "logps/chosen": -97.60012817382812, "logps/rejected": -180.9801483154297, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.0478341579437256, "rewards/margins": 6.353253364562988, "rewards/rejected": -8.401086807250977, "step": 1784 }, { "epoch": 3.07, "learning_rate": 3.659158521036974e-07, "logits/chosen": -2.1443166732788086, "logits/rejected": -2.2934441566467285, "logps/chosen": -102.6419448852539, "logps/rejected": -189.9906005859375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -2.137155532836914, "rewards/margins": 8.32685375213623, "rewards/rejected": -10.464008331298828, "step": 1785 }, { "epoch": 3.07, "learning_rate": 3.65809604759881e-07, "logits/chosen": -2.1851632595062256, "logits/rejected": -1.9731875658035278, "logps/chosen": -98.36756896972656, "logps/rejected": -141.96804809570312, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": -1.3927637338638306, "rewards/margins": 4.270229339599609, "rewards/rejected": -5.66299295425415, "step": 1786 }, { "epoch": 3.08, "learning_rate": 3.6570335741606454e-07, "logits/chosen": -2.2458014488220215, "logits/rejected": -2.1864585876464844, "logps/chosen": -110.81135559082031, "logps/rejected": -165.28146362304688, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": -1.9518256187438965, "rewards/margins": 6.407218933105469, "rewards/rejected": -8.359044075012207, "step": 1787 }, { "epoch": 3.08, "learning_rate": 3.655971100722482e-07, "logits/chosen": -2.294010877609253, "logits/rejected": -2.2206592559814453, "logps/chosen": -104.68592834472656, "logps/rejected": -176.2859649658203, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -2.2519049644470215, "rewards/margins": 6.101701736450195, "rewards/rejected": -8.353607177734375, "step": 1788 }, { "epoch": 3.08, "learning_rate": 3.654908627284318e-07, "logits/chosen": -2.2634646892547607, "logits/rejected": -2.3969593048095703, "logps/chosen": -107.5816421508789, "logps/rejected": -165.57614135742188, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -1.5592856407165527, "rewards/margins": 5.330885410308838, "rewards/rejected": -6.890171051025391, "step": 1789 }, { "epoch": 3.08, "learning_rate": 3.6538461538461534e-07, "logits/chosen": -2.154284954071045, "logits/rejected": -2.2487897872924805, "logps/chosen": -91.56279754638672, "logps/rejected": -185.792236328125, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": -1.2860333919525146, "rewards/margins": 8.502367973327637, "rewards/rejected": -9.78840160369873, "step": 1790 }, { "epoch": 3.08, "learning_rate": 3.65278368040799e-07, "logits/chosen": -2.3098254203796387, "logits/rejected": -1.8101890087127686, "logps/chosen": -96.55143737792969, "logps/rejected": -168.12750244140625, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": -0.9481189250946045, "rewards/margins": 6.154416084289551, "rewards/rejected": -7.102534770965576, "step": 1791 }, { "epoch": 3.08, "learning_rate": 3.6517212069698254e-07, "logits/chosen": -2.3283135890960693, "logits/rejected": -2.074756145477295, "logps/chosen": -97.11937713623047, "logps/rejected": -131.88710021972656, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.2760009765625, "rewards/margins": 3.510915756225586, "rewards/rejected": -4.786916732788086, "step": 1792 }, { "epoch": 3.09, "learning_rate": 3.6506587335316614e-07, "logits/chosen": -2.2503113746643066, "logits/rejected": -1.912970781326294, "logps/chosen": -123.14321899414062, "logps/rejected": -178.2110137939453, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": -2.652296304702759, "rewards/margins": 7.390501499176025, "rewards/rejected": -10.042797088623047, "step": 1793 }, { "epoch": 3.09, "learning_rate": 3.649596260093498e-07, "logits/chosen": -2.0422966480255127, "logits/rejected": -2.3100829124450684, "logps/chosen": -113.0850601196289, "logps/rejected": -184.01319885253906, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -2.4560766220092773, "rewards/margins": 7.04421854019165, "rewards/rejected": -9.50029468536377, "step": 1794 }, { "epoch": 3.09, "learning_rate": 3.6485337866553333e-07, "logits/chosen": -2.2201626300811768, "logits/rejected": -2.2066168785095215, "logps/chosen": -129.84487915039062, "logps/rejected": -211.4161376953125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -3.946272850036621, "rewards/margins": 8.89985466003418, "rewards/rejected": -12.846126556396484, "step": 1795 }, { "epoch": 3.09, "learning_rate": 3.6474713132171693e-07, "logits/chosen": -2.0016307830810547, "logits/rejected": -2.368487596511841, "logps/chosen": -93.58769226074219, "logps/rejected": -198.92193603515625, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": -1.8368871212005615, "rewards/margins": 8.704347610473633, "rewards/rejected": -10.54123592376709, "step": 1796 }, { "epoch": 3.09, "learning_rate": 3.6464088397790053e-07, "logits/chosen": -2.2192511558532715, "logits/rejected": -2.2203140258789062, "logps/chosen": -133.60055541992188, "logps/rejected": -169.60165405273438, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.92582368850708, "rewards/margins": 5.853405475616455, "rewards/rejected": -8.779229164123535, "step": 1797 }, { "epoch": 3.09, "learning_rate": 3.6453463663408413e-07, "logits/chosen": -2.16019344329834, "logits/rejected": -2.381096601486206, "logps/chosen": -123.74737548828125, "logps/rejected": -200.92091369628906, "loss": 0.1084, "rewards/accuracies": 1.0, "rewards/chosen": -4.002119064331055, "rewards/margins": 6.101465225219727, "rewards/rejected": -10.103584289550781, "step": 1798 }, { "epoch": 3.1, "learning_rate": 3.644283892902677e-07, "logits/chosen": -2.0411484241485596, "logits/rejected": -1.8014256954193115, "logps/chosen": -117.68915557861328, "logps/rejected": -177.93275451660156, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": -1.9457646608352661, "rewards/margins": 8.051158905029297, "rewards/rejected": -9.996922492980957, "step": 1799 }, { "epoch": 3.1, "learning_rate": 3.6432214194645133e-07, "logits/chosen": -2.1573257446289062, "logits/rejected": -2.1262564659118652, "logps/chosen": -103.0384521484375, "logps/rejected": -145.14816284179688, "loss": 0.0625, "rewards/accuracies": 1.0, "rewards/chosen": -2.1303176879882812, "rewards/margins": 4.383070945739746, "rewards/rejected": -6.513388633728027, "step": 1800 }, { "epoch": 3.1, "learning_rate": 3.6421589460263493e-07, "logits/chosen": -1.8533518314361572, "logits/rejected": -2.271113395690918, "logps/chosen": -88.19524383544922, "logps/rejected": -209.24798583984375, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": -1.133425235748291, "rewards/margins": 9.846508026123047, "rewards/rejected": -10.979934692382812, "step": 1801 }, { "epoch": 3.1, "learning_rate": 3.6410964725881847e-07, "logits/chosen": -2.2449889183044434, "logits/rejected": -2.1454131603240967, "logps/chosen": -129.5009307861328, "logps/rejected": -206.45050048828125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -4.574685573577881, "rewards/margins": 7.896691799163818, "rewards/rejected": -12.4713773727417, "step": 1802 }, { "epoch": 3.1, "learning_rate": 3.640033999150021e-07, "logits/chosen": -2.437638521194458, "logits/rejected": -2.1043789386749268, "logps/chosen": -137.15902709960938, "logps/rejected": -186.11224365234375, "loss": 0.0652, "rewards/accuracies": 0.75, "rewards/chosen": -3.7522947788238525, "rewards/margins": 5.664675712585449, "rewards/rejected": -9.416971206665039, "step": 1803 }, { "epoch": 3.1, "learning_rate": 3.6389715257118567e-07, "logits/chosen": -1.8630443811416626, "logits/rejected": -2.259000301361084, "logps/chosen": -91.41815185546875, "logps/rejected": -188.26132202148438, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -1.0898529291152954, "rewards/margins": 8.537639617919922, "rewards/rejected": -9.627492904663086, "step": 1804 }, { "epoch": 3.11, "learning_rate": 3.637909052273693e-07, "logits/chosen": -1.9931213855743408, "logits/rejected": -2.1773180961608887, "logps/chosen": -110.27384185791016, "logps/rejected": -164.0891876220703, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": -0.9840148687362671, "rewards/margins": 5.180142879486084, "rewards/rejected": -6.164157867431641, "step": 1805 }, { "epoch": 3.11, "learning_rate": 3.636846578835529e-07, "logits/chosen": -2.207228899002075, "logits/rejected": -2.2315783500671387, "logps/chosen": -128.85577392578125, "logps/rejected": -201.63157653808594, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -3.319601058959961, "rewards/margins": 7.758720874786377, "rewards/rejected": -11.07832145690918, "step": 1806 }, { "epoch": 3.11, "learning_rate": 3.6357841053973647e-07, "logits/chosen": -2.078462600708008, "logits/rejected": -2.357266426086426, "logps/chosen": -96.66316223144531, "logps/rejected": -171.43951416015625, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": -2.5931873321533203, "rewards/margins": 7.61911678314209, "rewards/rejected": -10.21230411529541, "step": 1807 }, { "epoch": 3.11, "learning_rate": 3.634721631959201e-07, "logits/chosen": -2.154743194580078, "logits/rejected": -2.3016018867492676, "logps/chosen": -126.55635833740234, "logps/rejected": -174.65289306640625, "loss": 0.0626, "rewards/accuracies": 0.75, "rewards/chosen": -4.374270439147949, "rewards/margins": 3.634622812271118, "rewards/rejected": -8.008893966674805, "step": 1808 }, { "epoch": 3.11, "learning_rate": 3.6336591585210367e-07, "logits/chosen": -1.947676181793213, "logits/rejected": -2.3689768314361572, "logps/chosen": -76.1254653930664, "logps/rejected": -183.07891845703125, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/chosen": -0.3605450391769409, "rewards/margins": 8.316679000854492, "rewards/rejected": -8.677224159240723, "step": 1809 }, { "epoch": 3.12, "learning_rate": 3.6325966850828726e-07, "logits/chosen": -2.010519504547119, "logits/rejected": -1.8867837190628052, "logps/chosen": -100.926513671875, "logps/rejected": -196.59556579589844, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.9890046119689941, "rewards/margins": 8.649373054504395, "rewards/rejected": -10.63837718963623, "step": 1810 }, { "epoch": 3.12, "learning_rate": 3.631534211644709e-07, "logits/chosen": -2.239384651184082, "logits/rejected": -2.3095297813415527, "logps/chosen": -84.26604461669922, "logps/rejected": -210.75099182128906, "loss": 0.0874, "rewards/accuracies": 1.0, "rewards/chosen": -0.2826572358608246, "rewards/margins": 11.115269660949707, "rewards/rejected": -11.397926330566406, "step": 1811 }, { "epoch": 3.12, "learning_rate": 3.6304717382065446e-07, "logits/chosen": -1.9670310020446777, "logits/rejected": -2.106142997741699, "logps/chosen": -82.67243957519531, "logps/rejected": -169.41445922851562, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.574918270111084, "rewards/margins": 8.21152114868164, "rewards/rejected": -8.786438941955566, "step": 1812 }, { "epoch": 3.12, "learning_rate": 3.6294092647683806e-07, "logits/chosen": -2.1745760440826416, "logits/rejected": -2.3966329097747803, "logps/chosen": -108.6454086303711, "logps/rejected": -205.04075622558594, "loss": 0.0731, "rewards/accuracies": 1.0, "rewards/chosen": -1.8328012228012085, "rewards/margins": 8.71493911743164, "rewards/rejected": -10.54773998260498, "step": 1813 }, { "epoch": 3.12, "learning_rate": 3.6283467913302166e-07, "logits/chosen": -2.163057804107666, "logits/rejected": -2.242077350616455, "logps/chosen": -94.91514587402344, "logps/rejected": -137.5653076171875, "loss": 0.0394, "rewards/accuracies": 0.75, "rewards/chosen": -1.4637186527252197, "rewards/margins": 3.424832582473755, "rewards/rejected": -4.888551235198975, "step": 1814 }, { "epoch": 3.12, "learning_rate": 3.6272843178920526e-07, "logits/chosen": -2.247490644454956, "logits/rejected": -2.0680761337280273, "logps/chosen": -99.95565795898438, "logps/rejected": -180.53598022460938, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": -2.193983793258667, "rewards/margins": 8.922765731811523, "rewards/rejected": -11.116750717163086, "step": 1815 }, { "epoch": 3.13, "learning_rate": 3.6262218444538886e-07, "logits/chosen": -2.1910746097564697, "logits/rejected": -2.19337797164917, "logps/chosen": -117.73153686523438, "logps/rejected": -189.30361938476562, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -3.418290853500366, "rewards/margins": 6.147367477416992, "rewards/rejected": -9.565658569335938, "step": 1816 }, { "epoch": 3.13, "learning_rate": 3.6251593710157246e-07, "logits/chosen": -2.300553321838379, "logits/rejected": -2.1660521030426025, "logps/chosen": -99.11058044433594, "logps/rejected": -174.63551330566406, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -1.050462007522583, "rewards/margins": 7.7724103927612305, "rewards/rejected": -8.822872161865234, "step": 1817 }, { "epoch": 3.13, "learning_rate": 3.6240968975775606e-07, "logits/chosen": -2.304460287094116, "logits/rejected": -1.9960051774978638, "logps/chosen": -102.83306884765625, "logps/rejected": -181.29901123046875, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -1.1996324062347412, "rewards/margins": 7.610174179077148, "rewards/rejected": -8.809806823730469, "step": 1818 }, { "epoch": 3.13, "learning_rate": 3.623034424139396e-07, "logits/chosen": -2.30938982963562, "logits/rejected": -2.1006662845611572, "logps/chosen": -75.85060119628906, "logps/rejected": -169.83502197265625, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -0.7473479509353638, "rewards/margins": 10.106426239013672, "rewards/rejected": -10.85377311706543, "step": 1819 }, { "epoch": 3.13, "learning_rate": 3.6219719507012325e-07, "logits/chosen": -2.099555015563965, "logits/rejected": -2.1900572776794434, "logps/chosen": -106.84554290771484, "logps/rejected": -181.3992462158203, "loss": 0.0886, "rewards/accuracies": 1.0, "rewards/chosen": -1.8856675624847412, "rewards/margins": 7.528138160705566, "rewards/rejected": -9.413805961608887, "step": 1820 }, { "epoch": 3.13, "learning_rate": 3.6209094772630685e-07, "logits/chosen": -2.1189475059509277, "logits/rejected": -2.2263362407684326, "logps/chosen": -95.30547332763672, "logps/rejected": -175.48289489746094, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": -1.9462883472442627, "rewards/margins": 8.522831916809082, "rewards/rejected": -10.469120025634766, "step": 1821 }, { "epoch": 3.14, "learning_rate": 3.619847003824904e-07, "logits/chosen": -2.144449472427368, "logits/rejected": -2.2060489654541016, "logps/chosen": -127.28126525878906, "logps/rejected": -213.60092163085938, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -3.454209089279175, "rewards/margins": 7.15677547454834, "rewards/rejected": -10.610984802246094, "step": 1822 }, { "epoch": 3.14, "learning_rate": 3.6187845303867405e-07, "logits/chosen": -2.2407193183898926, "logits/rejected": -2.2445144653320312, "logps/chosen": -89.7257080078125, "logps/rejected": -166.15126037597656, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.1183234453201294, "rewards/margins": 8.444560050964355, "rewards/rejected": -9.562883377075195, "step": 1823 }, { "epoch": 3.14, "learning_rate": 3.617722056948576e-07, "logits/chosen": -2.185241222381592, "logits/rejected": -2.110513210296631, "logps/chosen": -101.96363067626953, "logps/rejected": -189.81361389160156, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": -1.693940281867981, "rewards/margins": 7.97490930557251, "rewards/rejected": -9.668848991394043, "step": 1824 }, { "epoch": 3.14, "learning_rate": 3.616659583510412e-07, "logits/chosen": -2.090853452682495, "logits/rejected": -2.287693500518799, "logps/chosen": -103.22974395751953, "logps/rejected": -162.89633178710938, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -2.5247297286987305, "rewards/margins": 5.377229690551758, "rewards/rejected": -7.901959419250488, "step": 1825 }, { "epoch": 3.14, "learning_rate": 3.6155971100722485e-07, "logits/chosen": -2.035061836242676, "logits/rejected": -2.284369945526123, "logps/chosen": -95.73301696777344, "logps/rejected": -201.9679412841797, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -2.7072620391845703, "rewards/margins": 8.850427627563477, "rewards/rejected": -11.557689666748047, "step": 1826 }, { "epoch": 3.14, "learning_rate": 3.614534636634084e-07, "logits/chosen": -1.8012464046478271, "logits/rejected": -2.316593885421753, "logps/chosen": -93.07911682128906, "logps/rejected": -212.86094665527344, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -1.422935128211975, "rewards/margins": 10.171812057495117, "rewards/rejected": -11.594747543334961, "step": 1827 }, { "epoch": 3.15, "learning_rate": 3.61347216319592e-07, "logits/chosen": -2.1193385124206543, "logits/rejected": -2.3468596935272217, "logps/chosen": -110.66468811035156, "logps/rejected": -165.13131713867188, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -2.5773696899414062, "rewards/margins": 5.544338226318359, "rewards/rejected": -8.121707916259766, "step": 1828 }, { "epoch": 3.15, "learning_rate": 3.612409689757756e-07, "logits/chosen": -1.9780752658843994, "logits/rejected": -2.287459373474121, "logps/chosen": -79.23362731933594, "logps/rejected": -198.98458862304688, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -0.46096840500831604, "rewards/margins": 9.47015380859375, "rewards/rejected": -9.931122779846191, "step": 1829 }, { "epoch": 3.15, "learning_rate": 3.611347216319592e-07, "logits/chosen": -2.327207088470459, "logits/rejected": -1.8746148347854614, "logps/chosen": -121.92218780517578, "logps/rejected": -203.72872924804688, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -2.715114116668701, "rewards/margins": 8.838807106018066, "rewards/rejected": -11.553921699523926, "step": 1830 }, { "epoch": 3.15, "learning_rate": 3.6102847428814274e-07, "logits/chosen": -2.2534618377685547, "logits/rejected": -2.2100226879119873, "logps/chosen": -100.78652954101562, "logps/rejected": -201.22293090820312, "loss": 0.058, "rewards/accuracies": 1.0, "rewards/chosen": -2.0946433544158936, "rewards/margins": 9.191949844360352, "rewards/rejected": -11.286593437194824, "step": 1831 }, { "epoch": 3.15, "learning_rate": 3.609222269443264e-07, "logits/chosen": -1.9266700744628906, "logits/rejected": -2.35341739654541, "logps/chosen": -112.37785339355469, "logps/rejected": -171.60772705078125, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -4.464326858520508, "rewards/margins": 4.713708877563477, "rewards/rejected": -9.1780366897583, "step": 1832 }, { "epoch": 3.15, "learning_rate": 3.6081597960051e-07, "logits/chosen": -2.2454779148101807, "logits/rejected": -2.1764140129089355, "logps/chosen": -99.9176025390625, "logps/rejected": -183.21060180664062, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.1849117279052734, "rewards/margins": 7.4896745681762695, "rewards/rejected": -9.674586296081543, "step": 1833 }, { "epoch": 3.16, "learning_rate": 3.6070973225669353e-07, "logits/chosen": -2.04174542427063, "logits/rejected": -2.2092363834381104, "logps/chosen": -97.21183776855469, "logps/rejected": -168.410400390625, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -1.79445219039917, "rewards/margins": 6.281770706176758, "rewards/rejected": -8.07622241973877, "step": 1834 }, { "epoch": 3.16, "learning_rate": 3.606034849128772e-07, "logits/chosen": -1.7051584720611572, "logits/rejected": -2.2131271362304688, "logps/chosen": -100.49806213378906, "logps/rejected": -163.68637084960938, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.550760269165039, "rewards/margins": 5.895857334136963, "rewards/rejected": -7.446617603302002, "step": 1835 }, { "epoch": 3.16, "learning_rate": 3.6049723756906073e-07, "logits/chosen": -2.098783493041992, "logits/rejected": -2.247145891189575, "logps/chosen": -87.8550033569336, "logps/rejected": -163.3584442138672, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -1.1054069995880127, "rewards/margins": 7.790093421936035, "rewards/rejected": -8.895500183105469, "step": 1836 }, { "epoch": 3.16, "learning_rate": 3.6039099022524433e-07, "logits/chosen": -2.4238266944885254, "logits/rejected": -2.1934921741485596, "logps/chosen": -149.8623504638672, "logps/rejected": -206.6076202392578, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -4.048390865325928, "rewards/margins": 7.006619453430176, "rewards/rejected": -11.055009841918945, "step": 1837 }, { "epoch": 3.16, "learning_rate": 3.60284742881428e-07, "logits/chosen": -2.1918587684631348, "logits/rejected": -1.739487886428833, "logps/chosen": -111.52079010009766, "logps/rejected": -155.62721252441406, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -2.9410829544067383, "rewards/margins": 5.863468647003174, "rewards/rejected": -8.804551124572754, "step": 1838 }, { "epoch": 3.17, "learning_rate": 3.6017849553761153e-07, "logits/chosen": -1.8425331115722656, "logits/rejected": -2.18789005279541, "logps/chosen": -91.58920288085938, "logps/rejected": -161.00271606445312, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -1.7329318523406982, "rewards/margins": 5.398642063140869, "rewards/rejected": -7.131573677062988, "step": 1839 }, { "epoch": 3.17, "learning_rate": 3.600722481937951e-07, "logits/chosen": -2.205944299697876, "logits/rejected": -2.2631869316101074, "logps/chosen": -113.8641128540039, "logps/rejected": -220.42152404785156, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -3.4516525268554688, "rewards/margins": 9.180781364440918, "rewards/rejected": -12.632433891296387, "step": 1840 }, { "epoch": 3.17, "learning_rate": 3.599660008499787e-07, "logits/chosen": -2.129293918609619, "logits/rejected": -1.8833215236663818, "logps/chosen": -102.56123352050781, "logps/rejected": -161.95443725585938, "loss": 0.079, "rewards/accuracies": 0.75, "rewards/chosen": -2.067389488220215, "rewards/margins": 5.827015399932861, "rewards/rejected": -7.894404411315918, "step": 1841 }, { "epoch": 3.17, "learning_rate": 3.598597535061623e-07, "logits/chosen": -1.8635756969451904, "logits/rejected": -2.2505877017974854, "logps/chosen": -100.16666412353516, "logps/rejected": -184.4335174560547, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -1.6142407655715942, "rewards/margins": 7.292367935180664, "rewards/rejected": -8.906608581542969, "step": 1842 }, { "epoch": 3.17, "learning_rate": 3.597535061623459e-07, "logits/chosen": -1.8397293090820312, "logits/rejected": -1.9217488765716553, "logps/chosen": -107.81051635742188, "logps/rejected": -180.7271728515625, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -3.883513927459717, "rewards/margins": 6.564797878265381, "rewards/rejected": -10.448310852050781, "step": 1843 }, { "epoch": 3.17, "learning_rate": 3.596472588185295e-07, "logits/chosen": -2.160646438598633, "logits/rejected": -2.2693376541137695, "logps/chosen": -120.36640930175781, "logps/rejected": -156.280029296875, "loss": 0.0806, "rewards/accuracies": 0.75, "rewards/chosen": -4.5374321937561035, "rewards/margins": 3.4597907066345215, "rewards/rejected": -7.997222900390625, "step": 1844 }, { "epoch": 3.18, "learning_rate": 3.595410114747131e-07, "logits/chosen": -2.0049734115600586, "logits/rejected": -2.2148654460906982, "logps/chosen": -99.09100341796875, "logps/rejected": -161.0390625, "loss": 0.1638, "rewards/accuracies": 1.0, "rewards/chosen": -3.0129623413085938, "rewards/margins": 6.325962543487549, "rewards/rejected": -9.338924407958984, "step": 1845 }, { "epoch": 3.18, "learning_rate": 3.5943476413089667e-07, "logits/chosen": -2.17694091796875, "logits/rejected": -2.35709285736084, "logps/chosen": -116.21871948242188, "logps/rejected": -188.91787719726562, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -3.384089469909668, "rewards/margins": 5.699140548706055, "rewards/rejected": -9.083230018615723, "step": 1846 }, { "epoch": 3.18, "learning_rate": 3.593285167870803e-07, "logits/chosen": -2.321133852005005, "logits/rejected": -1.9678573608398438, "logps/chosen": -97.25385284423828, "logps/rejected": -151.99490356445312, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.905472993850708, "rewards/margins": 7.188300132751465, "rewards/rejected": -9.09377384185791, "step": 1847 }, { "epoch": 3.18, "learning_rate": 3.592222694432639e-07, "logits/chosen": -2.12502121925354, "logits/rejected": -2.4578657150268555, "logps/chosen": -93.70831298828125, "logps/rejected": -211.90103149414062, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": -1.8796743154525757, "rewards/margins": 9.745262145996094, "rewards/rejected": -11.624937057495117, "step": 1848 }, { "epoch": 3.18, "learning_rate": 3.591160220994475e-07, "logits/chosen": -1.7054344415664673, "logits/rejected": -2.2675600051879883, "logps/chosen": -99.69511413574219, "logps/rejected": -181.77447509765625, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.7727410793304443, "rewards/margins": 7.2557759284973145, "rewards/rejected": -9.02851676940918, "step": 1849 }, { "epoch": 3.18, "learning_rate": 3.590097747556311e-07, "logits/chosen": -1.7945361137390137, "logits/rejected": -2.095975637435913, "logps/chosen": -120.94926452636719, "logps/rejected": -190.09254455566406, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -2.4431028366088867, "rewards/margins": 6.287247180938721, "rewards/rejected": -8.730350494384766, "step": 1850 }, { "epoch": 3.19, "learning_rate": 3.5890352741181466e-07, "logits/chosen": -2.0995874404907227, "logits/rejected": -2.1564199924468994, "logps/chosen": -115.99942016601562, "logps/rejected": -174.96298217773438, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -3.307448387145996, "rewards/margins": 6.467897891998291, "rewards/rejected": -9.775346755981445, "step": 1851 }, { "epoch": 3.19, "learning_rate": 3.587972800679983e-07, "logits/chosen": -2.1308891773223877, "logits/rejected": -2.0819969177246094, "logps/chosen": -130.060546875, "logps/rejected": -189.52520751953125, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -3.0567805767059326, "rewards/margins": 6.243283748626709, "rewards/rejected": -9.300064086914062, "step": 1852 }, { "epoch": 3.19, "learning_rate": 3.586910327241819e-07, "logits/chosen": -2.144596815109253, "logits/rejected": -2.162820339202881, "logps/chosen": -137.35366821289062, "logps/rejected": -212.88348388671875, "loss": 0.0942, "rewards/accuracies": 1.0, "rewards/chosen": -4.8270745277404785, "rewards/margins": 7.398219108581543, "rewards/rejected": -12.22529411315918, "step": 1853 }, { "epoch": 3.19, "learning_rate": 3.5858478538036546e-07, "logits/chosen": -2.3139591217041016, "logits/rejected": -2.1970882415771484, "logps/chosen": -94.93728637695312, "logps/rejected": -185.1336669921875, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -2.68508243560791, "rewards/margins": 8.111164093017578, "rewards/rejected": -10.796246528625488, "step": 1854 }, { "epoch": 3.19, "learning_rate": 3.584785380365491e-07, "logits/chosen": -2.2759358882904053, "logits/rejected": -2.076078176498413, "logps/chosen": -125.89674377441406, "logps/rejected": -175.7750244140625, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -4.915647983551025, "rewards/margins": 4.611371040344238, "rewards/rejected": -9.527018547058105, "step": 1855 }, { "epoch": 3.19, "learning_rate": 3.5837229069273266e-07, "logits/chosen": -2.235826253890991, "logits/rejected": -2.3609306812286377, "logps/chosen": -93.31495666503906, "logps/rejected": -195.37657165527344, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -2.0476279258728027, "rewards/margins": 9.025779724121094, "rewards/rejected": -11.073408126831055, "step": 1856 }, { "epoch": 3.2, "learning_rate": 3.5826604334891625e-07, "logits/chosen": -2.189079523086548, "logits/rejected": -2.308229446411133, "logps/chosen": -94.79996490478516, "logps/rejected": -186.84054565429688, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -1.2607567310333252, "rewards/margins": 8.462512016296387, "rewards/rejected": -9.723268508911133, "step": 1857 }, { "epoch": 3.2, "learning_rate": 3.5815979600509985e-07, "logits/chosen": -2.2962942123413086, "logits/rejected": -1.8690462112426758, "logps/chosen": -83.699462890625, "logps/rejected": -151.56112670898438, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.8091021180152893, "rewards/margins": 7.019127368927002, "rewards/rejected": -7.8282294273376465, "step": 1858 }, { "epoch": 3.2, "learning_rate": 3.5805354866128345e-07, "logits/chosen": -2.0881824493408203, "logits/rejected": -2.0937507152557373, "logps/chosen": -109.23969268798828, "logps/rejected": -179.2538604736328, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -2.386765956878662, "rewards/margins": 7.479422569274902, "rewards/rejected": -9.866189002990723, "step": 1859 }, { "epoch": 3.2, "learning_rate": 3.5794730131746705e-07, "logits/chosen": -1.8013025522232056, "logits/rejected": -2.3971664905548096, "logps/chosen": -82.999267578125, "logps/rejected": -161.14974975585938, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -1.1549742221832275, "rewards/margins": 6.745770454406738, "rewards/rejected": -7.900744438171387, "step": 1860 }, { "epoch": 3.2, "learning_rate": 3.5784105397365065e-07, "logits/chosen": -2.117612600326538, "logits/rejected": -2.332275867462158, "logps/chosen": -124.23738098144531, "logps/rejected": -222.51364135742188, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.597456216812134, "rewards/margins": 8.9693021774292, "rewards/rejected": -11.56675910949707, "step": 1861 }, { "epoch": 3.2, "learning_rate": 3.5773480662983425e-07, "logits/chosen": -1.9570432901382446, "logits/rejected": -2.256941556930542, "logps/chosen": -130.551025390625, "logps/rejected": -188.88119506835938, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -3.503970146179199, "rewards/margins": 5.731527328491211, "rewards/rejected": -9.235498428344727, "step": 1862 }, { "epoch": 3.21, "learning_rate": 3.576285592860178e-07, "logits/chosen": -2.2929868698120117, "logits/rejected": -1.944518804550171, "logps/chosen": -122.04703521728516, "logps/rejected": -158.506103515625, "loss": 0.0925, "rewards/accuracies": 1.0, "rewards/chosen": -3.296424627304077, "rewards/margins": 6.427978515625, "rewards/rejected": -9.724403381347656, "step": 1863 }, { "epoch": 3.21, "learning_rate": 3.5752231194220145e-07, "logits/chosen": -2.132037401199341, "logits/rejected": -2.285581588745117, "logps/chosen": -125.18025207519531, "logps/rejected": -212.8818817138672, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -4.459353923797607, "rewards/margins": 7.471027374267578, "rewards/rejected": -11.930380821228027, "step": 1864 }, { "epoch": 3.21, "learning_rate": 3.5741606459838505e-07, "logits/chosen": -2.2447333335876465, "logits/rejected": -2.126007080078125, "logps/chosen": -100.92790985107422, "logps/rejected": -210.57398986816406, "loss": 0.0843, "rewards/accuracies": 1.0, "rewards/chosen": -1.469188928604126, "rewards/margins": 10.973649978637695, "rewards/rejected": -12.442839622497559, "step": 1865 }, { "epoch": 3.21, "learning_rate": 3.573098172545686e-07, "logits/chosen": -2.119898796081543, "logits/rejected": -2.2250893115997314, "logps/chosen": -107.57637023925781, "logps/rejected": -189.05197143554688, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": -2.043940544128418, "rewards/margins": 7.561648368835449, "rewards/rejected": -9.605588912963867, "step": 1866 }, { "epoch": 3.21, "learning_rate": 3.5720356991075224e-07, "logits/chosen": -2.2410106658935547, "logits/rejected": -1.6433968544006348, "logps/chosen": -115.22162628173828, "logps/rejected": -137.98477172851562, "loss": 0.0798, "rewards/accuracies": 1.0, "rewards/chosen": -3.4711742401123047, "rewards/margins": 4.269561767578125, "rewards/rejected": -7.74073600769043, "step": 1867 }, { "epoch": 3.22, "learning_rate": 3.570973225669358e-07, "logits/chosen": -2.267117977142334, "logits/rejected": -2.2023348808288574, "logps/chosen": -84.45912170410156, "logps/rejected": -150.94308471679688, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -1.5442860126495361, "rewards/margins": 6.039620876312256, "rewards/rejected": -7.583907127380371, "step": 1868 }, { "epoch": 3.22, "learning_rate": 3.569910752231194e-07, "logits/chosen": -1.7680962085723877, "logits/rejected": -2.2497856616973877, "logps/chosen": -101.46054077148438, "logps/rejected": -224.64862060546875, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -3.0415263175964355, "rewards/margins": 9.494600296020508, "rewards/rejected": -12.536125183105469, "step": 1869 }, { "epoch": 3.22, "learning_rate": 3.5688482787930304e-07, "logits/chosen": -1.8577853441238403, "logits/rejected": -2.2816858291625977, "logps/chosen": -99.62281799316406, "logps/rejected": -208.9998016357422, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": -2.313941478729248, "rewards/margins": 8.740987777709961, "rewards/rejected": -11.054929733276367, "step": 1870 }, { "epoch": 3.22, "learning_rate": 3.567785805354866e-07, "logits/chosen": -1.697443962097168, "logits/rejected": -2.1343867778778076, "logps/chosen": -104.91966247558594, "logps/rejected": -197.53866577148438, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": -3.8069381713867188, "rewards/margins": 8.21631908416748, "rewards/rejected": -12.0232572555542, "step": 1871 }, { "epoch": 3.22, "learning_rate": 3.566723331916702e-07, "logits/chosen": -2.0063605308532715, "logits/rejected": -2.225043296813965, "logps/chosen": -115.89020538330078, "logps/rejected": -217.94625854492188, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -3.717050313949585, "rewards/margins": 8.56418228149414, "rewards/rejected": -12.281232833862305, "step": 1872 }, { "epoch": 3.22, "learning_rate": 3.565660858478538e-07, "logits/chosen": -2.443575859069824, "logits/rejected": -2.2249395847320557, "logps/chosen": -104.75780487060547, "logps/rejected": -207.98028564453125, "loss": 0.1003, "rewards/accuracies": 1.0, "rewards/chosen": -1.0434826612472534, "rewards/margins": 11.289200782775879, "rewards/rejected": -12.332683563232422, "step": 1873 }, { "epoch": 3.23, "learning_rate": 3.564598385040374e-07, "logits/chosen": -2.0394821166992188, "logits/rejected": -2.198716640472412, "logps/chosen": -86.45410919189453, "logps/rejected": -176.29486083984375, "loss": 0.099, "rewards/accuracies": 1.0, "rewards/chosen": -1.0933830738067627, "rewards/margins": 8.566457748413086, "rewards/rejected": -9.65984058380127, "step": 1874 }, { "epoch": 3.23, "learning_rate": 3.56353591160221e-07, "logits/chosen": -1.8233859539031982, "logits/rejected": -2.307363271713257, "logps/chosen": -116.00115966796875, "logps/rejected": -191.71041870117188, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -3.285003662109375, "rewards/margins": 5.576744079589844, "rewards/rejected": -8.861747741699219, "step": 1875 }, { "epoch": 3.23, "learning_rate": 3.562473438164046e-07, "logits/chosen": -2.366621732711792, "logits/rejected": -2.0590569972991943, "logps/chosen": -119.26997375488281, "logps/rejected": -189.32855224609375, "loss": 0.0872, "rewards/accuracies": 1.0, "rewards/chosen": -3.9878954887390137, "rewards/margins": 7.5650200843811035, "rewards/rejected": -11.552915573120117, "step": 1876 }, { "epoch": 3.23, "learning_rate": 3.561410964725882e-07, "logits/chosen": -2.1308741569519043, "logits/rejected": -2.108823299407959, "logps/chosen": -112.42401123046875, "logps/rejected": -176.48928833007812, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -2.367865562438965, "rewards/margins": 5.89475679397583, "rewards/rejected": -8.262621879577637, "step": 1877 }, { "epoch": 3.23, "learning_rate": 3.560348491287717e-07, "logits/chosen": -2.034353733062744, "logits/rejected": -2.0070345401763916, "logps/chosen": -106.31581115722656, "logps/rejected": -187.31216430664062, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": -2.2671914100646973, "rewards/margins": 9.353339195251465, "rewards/rejected": -11.620530128479004, "step": 1878 }, { "epoch": 3.23, "learning_rate": 3.559286017849554e-07, "logits/chosen": -2.1247057914733887, "logits/rejected": -2.101562023162842, "logps/chosen": -113.82218170166016, "logps/rejected": -176.17848205566406, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.3201653957366943, "rewards/margins": 6.882555961608887, "rewards/rejected": -9.20272159576416, "step": 1879 }, { "epoch": 3.24, "learning_rate": 3.55822354441139e-07, "logits/chosen": -1.8500330448150635, "logits/rejected": -2.2746424674987793, "logps/chosen": -86.14523315429688, "logps/rejected": -190.54751586914062, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.33850613236427307, "rewards/margins": 9.191890716552734, "rewards/rejected": -9.530396461486816, "step": 1880 }, { "epoch": 3.24, "learning_rate": 3.557161070973225e-07, "logits/chosen": -2.0210564136505127, "logits/rejected": -2.188505172729492, "logps/chosen": -90.95657348632812, "logps/rejected": -207.15440368652344, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.6114952564239502, "rewards/margins": 9.616741180419922, "rewards/rejected": -11.228236198425293, "step": 1881 }, { "epoch": 3.24, "learning_rate": 3.556098597535062e-07, "logits/chosen": -1.9756801128387451, "logits/rejected": -2.148043394088745, "logps/chosen": -98.40995788574219, "logps/rejected": -204.84701538085938, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -1.8360490798950195, "rewards/margins": 10.834527015686035, "rewards/rejected": -12.670576095581055, "step": 1882 }, { "epoch": 3.24, "learning_rate": 3.555036124096897e-07, "logits/chosen": -2.081282138824463, "logits/rejected": -1.8798941373825073, "logps/chosen": -118.18854522705078, "logps/rejected": -161.4613494873047, "loss": 0.0579, "rewards/accuracies": 0.75, "rewards/chosen": -2.828544855117798, "rewards/margins": 5.448787212371826, "rewards/rejected": -8.277332305908203, "step": 1883 }, { "epoch": 3.24, "learning_rate": 3.553973650658733e-07, "logits/chosen": -2.218816041946411, "logits/rejected": -1.8997740745544434, "logps/chosen": -96.1634521484375, "logps/rejected": -191.475830078125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.0114718675613403, "rewards/margins": 9.867199897766113, "rewards/rejected": -10.878671646118164, "step": 1884 }, { "epoch": 3.24, "learning_rate": 3.552911177220569e-07, "logits/chosen": -1.7728385925292969, "logits/rejected": -2.3889083862304688, "logps/chosen": -92.97858428955078, "logps/rejected": -185.34649658203125, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -2.5819239616394043, "rewards/margins": 6.442138671875, "rewards/rejected": -9.024062156677246, "step": 1885 }, { "epoch": 3.25, "learning_rate": 3.551848703782405e-07, "logits/chosen": -1.9723458290100098, "logits/rejected": -2.2603766918182373, "logps/chosen": -124.17711639404297, "logps/rejected": -208.03756713867188, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.2179861068725586, "rewards/margins": 8.566638946533203, "rewards/rejected": -11.784626007080078, "step": 1886 }, { "epoch": 3.25, "learning_rate": 3.550786230344241e-07, "logits/chosen": -1.9370930194854736, "logits/rejected": -2.3861570358276367, "logps/chosen": -133.27239990234375, "logps/rejected": -213.2945098876953, "loss": 0.0911, "rewards/accuracies": 1.0, "rewards/chosen": -3.3605854511260986, "rewards/margins": 6.086418151855469, "rewards/rejected": -9.447003364562988, "step": 1887 }, { "epoch": 3.25, "learning_rate": 3.549723756906077e-07, "logits/chosen": -2.299942970275879, "logits/rejected": -2.1675562858581543, "logps/chosen": -121.93476867675781, "logps/rejected": -209.89012145996094, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -2.887221574783325, "rewards/margins": 9.249958038330078, "rewards/rejected": -12.13718032836914, "step": 1888 }, { "epoch": 3.25, "learning_rate": 3.548661283467913e-07, "logits/chosen": -2.316804885864258, "logits/rejected": -1.8415199518203735, "logps/chosen": -123.38939666748047, "logps/rejected": -202.84170532226562, "loss": 0.0755, "rewards/accuracies": 1.0, "rewards/chosen": -4.197047710418701, "rewards/margins": 7.659706115722656, "rewards/rejected": -11.856754302978516, "step": 1889 }, { "epoch": 3.25, "learning_rate": 3.547598810029749e-07, "logits/chosen": -2.155363082885742, "logits/rejected": -2.308479070663452, "logps/chosen": -107.01641082763672, "logps/rejected": -207.613037109375, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -2.796628952026367, "rewards/margins": 8.58639144897461, "rewards/rejected": -11.383020401000977, "step": 1890 }, { "epoch": 3.25, "learning_rate": 3.546536336591585e-07, "logits/chosen": -2.156830072402954, "logits/rejected": -2.034717559814453, "logps/chosen": -126.87844848632812, "logps/rejected": -200.13369750976562, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": -4.055049896240234, "rewards/margins": 8.540971755981445, "rewards/rejected": -12.59602165222168, "step": 1891 }, { "epoch": 3.26, "learning_rate": 3.545473863153421e-07, "logits/chosen": -2.171959161758423, "logits/rejected": -2.2662370204925537, "logps/chosen": -109.93292999267578, "logps/rejected": -199.60401916503906, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": -1.3957204818725586, "rewards/margins": 8.972216606140137, "rewards/rejected": -10.367937088012695, "step": 1892 }, { "epoch": 3.26, "learning_rate": 3.544411389715257e-07, "logits/chosen": -1.964914321899414, "logits/rejected": -2.2864794731140137, "logps/chosen": -119.4831771850586, "logps/rejected": -170.73934936523438, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": -2.8331947326660156, "rewards/margins": 4.366860866546631, "rewards/rejected": -7.2000555992126465, "step": 1893 }, { "epoch": 3.26, "learning_rate": 3.543348916277093e-07, "logits/chosen": -2.277614116668701, "logits/rejected": -2.3901543617248535, "logps/chosen": -101.67534637451172, "logps/rejected": -197.61767578125, "loss": 0.0903, "rewards/accuracies": 1.0, "rewards/chosen": -1.0900609493255615, "rewards/margins": 10.153987884521484, "rewards/rejected": -11.244049072265625, "step": 1894 }, { "epoch": 3.26, "learning_rate": 3.5422864428389285e-07, "logits/chosen": -2.2661526203155518, "logits/rejected": -2.1882941722869873, "logps/chosen": -127.694091796875, "logps/rejected": -187.46853637695312, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -3.3420395851135254, "rewards/margins": 7.219307899475098, "rewards/rejected": -10.561347007751465, "step": 1895 }, { "epoch": 3.26, "learning_rate": 3.541223969400765e-07, "logits/chosen": -2.1383297443389893, "logits/rejected": -1.9603835344314575, "logps/chosen": -106.79853820800781, "logps/rejected": -175.75567626953125, "loss": 0.0466, "rewards/accuracies": 1.0, "rewards/chosen": -1.7305059432983398, "rewards/margins": 8.020030975341797, "rewards/rejected": -9.750537872314453, "step": 1896 }, { "epoch": 3.27, "learning_rate": 3.540161495962601e-07, "logits/chosen": -2.1879377365112305, "logits/rejected": -2.184699058532715, "logps/chosen": -106.376953125, "logps/rejected": -160.8245849609375, "loss": 0.1112, "rewards/accuracies": 1.0, "rewards/chosen": -2.621392250061035, "rewards/margins": 6.323024272918701, "rewards/rejected": -8.944416046142578, "step": 1897 }, { "epoch": 3.27, "learning_rate": 3.5390990225244365e-07, "logits/chosen": -2.095493793487549, "logits/rejected": -2.160029411315918, "logps/chosen": -116.7291259765625, "logps/rejected": -210.8971405029297, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -3.411738634109497, "rewards/margins": 9.343503952026367, "rewards/rejected": -12.755243301391602, "step": 1898 }, { "epoch": 3.27, "learning_rate": 3.538036549086273e-07, "logits/chosen": -2.0482406616210938, "logits/rejected": -2.320687770843506, "logps/chosen": -95.1606216430664, "logps/rejected": -174.62399291992188, "loss": 0.106, "rewards/accuracies": 0.75, "rewards/chosen": -3.159104824066162, "rewards/margins": 5.978691577911377, "rewards/rejected": -9.137796401977539, "step": 1899 }, { "epoch": 3.27, "learning_rate": 3.5369740756481085e-07, "logits/chosen": -2.298375368118286, "logits/rejected": -2.1530239582061768, "logps/chosen": -102.8799057006836, "logps/rejected": -188.20567321777344, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -2.850393056869507, "rewards/margins": 9.098336219787598, "rewards/rejected": -11.948728561401367, "step": 1900 }, { "epoch": 3.27, "learning_rate": 3.5359116022099445e-07, "logits/chosen": -2.3359334468841553, "logits/rejected": -2.031728506088257, "logps/chosen": -119.20780944824219, "logps/rejected": -189.58154296875, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -3.675990343093872, "rewards/margins": 7.443896293640137, "rewards/rejected": -11.11988639831543, "step": 1901 }, { "epoch": 3.27, "learning_rate": 3.534849128771781e-07, "logits/chosen": -2.2281856536865234, "logits/rejected": -2.346341848373413, "logps/chosen": -141.10452270507812, "logps/rejected": -219.11318969726562, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -4.4710493087768555, "rewards/margins": 7.949299335479736, "rewards/rejected": -12.42034912109375, "step": 1902 }, { "epoch": 3.28, "learning_rate": 3.5337866553336164e-07, "logits/chosen": -2.516505002975464, "logits/rejected": -2.060196876525879, "logps/chosen": -141.53543090820312, "logps/rejected": -220.47747802734375, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -4.018714904785156, "rewards/margins": 9.055784225463867, "rewards/rejected": -13.074499130249023, "step": 1903 }, { "epoch": 3.28, "learning_rate": 3.5327241818954524e-07, "logits/chosen": -2.316240072250366, "logits/rejected": -2.053154468536377, "logps/chosen": -121.53386688232422, "logps/rejected": -187.1411895751953, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": -3.7055530548095703, "rewards/margins": 7.572687149047852, "rewards/rejected": -11.278239250183105, "step": 1904 }, { "epoch": 3.28, "learning_rate": 3.5316617084572884e-07, "logits/chosen": -2.1456263065338135, "logits/rejected": -2.091322898864746, "logps/chosen": -105.36050415039062, "logps/rejected": -209.62733459472656, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -2.017277956008911, "rewards/margins": 11.208868980407715, "rewards/rejected": -13.226146697998047, "step": 1905 }, { "epoch": 3.28, "learning_rate": 3.5305992350191244e-07, "logits/chosen": -2.0830774307250977, "logits/rejected": -2.197953939437866, "logps/chosen": -101.90369415283203, "logps/rejected": -210.31690979003906, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.15886652469635, "rewards/margins": 9.279873847961426, "rewards/rejected": -10.438739776611328, "step": 1906 }, { "epoch": 3.28, "learning_rate": 3.5295367615809604e-07, "logits/chosen": -2.303217887878418, "logits/rejected": -2.113046646118164, "logps/chosen": -97.15463256835938, "logps/rejected": -157.59226989746094, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -2.1551096439361572, "rewards/margins": 5.674201965332031, "rewards/rejected": -7.829312324523926, "step": 1907 }, { "epoch": 3.28, "learning_rate": 3.5284742881427964e-07, "logits/chosen": -2.477426767349243, "logits/rejected": -2.026784896850586, "logps/chosen": -134.75933837890625, "logps/rejected": -184.844970703125, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -2.0745301246643066, "rewards/margins": 6.879499912261963, "rewards/rejected": -8.95403003692627, "step": 1908 }, { "epoch": 3.29, "learning_rate": 3.5274118147046324e-07, "logits/chosen": -2.114363431930542, "logits/rejected": -1.8458493947982788, "logps/chosen": -111.12751770019531, "logps/rejected": -159.1112060546875, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": -2.536383628845215, "rewards/margins": 4.850970268249512, "rewards/rejected": -7.387353420257568, "step": 1909 }, { "epoch": 3.29, "learning_rate": 3.526349341266468e-07, "logits/chosen": -2.3870019912719727, "logits/rejected": -2.077819585800171, "logps/chosen": -105.83980560302734, "logps/rejected": -165.2649688720703, "loss": 0.0515, "rewards/accuracies": 1.0, "rewards/chosen": -2.934469223022461, "rewards/margins": 7.228365898132324, "rewards/rejected": -10.162835121154785, "step": 1910 }, { "epoch": 3.29, "learning_rate": 3.5252868678283044e-07, "logits/chosen": -2.188901662826538, "logits/rejected": -2.0701568126678467, "logps/chosen": -114.8265151977539, "logps/rejected": -154.0730438232422, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -4.927719593048096, "rewards/margins": 4.153033256530762, "rewards/rejected": -9.080753326416016, "step": 1911 }, { "epoch": 3.29, "learning_rate": 3.52422439439014e-07, "logits/chosen": -1.9165065288543701, "logits/rejected": -2.022686004638672, "logps/chosen": -113.06906127929688, "logps/rejected": -205.35043334960938, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.4403975009918213, "rewards/margins": 7.795864105224609, "rewards/rejected": -10.236261367797852, "step": 1912 }, { "epoch": 3.29, "learning_rate": 3.523161920951976e-07, "logits/chosen": -1.932686448097229, "logits/rejected": -2.2986905574798584, "logps/chosen": -96.62179565429688, "logps/rejected": -201.05274963378906, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -2.727745294570923, "rewards/margins": 9.232549667358398, "rewards/rejected": -11.960293769836426, "step": 1913 }, { "epoch": 3.29, "learning_rate": 3.5220994475138123e-07, "logits/chosen": -2.221896171569824, "logits/rejected": -2.404877185821533, "logps/chosen": -112.76406860351562, "logps/rejected": -161.33914184570312, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -1.865985631942749, "rewards/margins": 5.582570552825928, "rewards/rejected": -7.448556900024414, "step": 1914 }, { "epoch": 3.3, "learning_rate": 3.521036974075648e-07, "logits/chosen": -2.0502541065216064, "logits/rejected": -2.460597038269043, "logps/chosen": -118.25824737548828, "logps/rejected": -202.36019897460938, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -2.7057414054870605, "rewards/margins": 7.5941901206970215, "rewards/rejected": -10.299931526184082, "step": 1915 }, { "epoch": 3.3, "learning_rate": 3.519974500637484e-07, "logits/chosen": -2.0860629081726074, "logits/rejected": -2.3031859397888184, "logps/chosen": -145.17605590820312, "logps/rejected": -218.47134399414062, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": -5.366467475891113, "rewards/margins": 6.629027366638184, "rewards/rejected": -11.995495796203613, "step": 1916 }, { "epoch": 3.3, "learning_rate": 3.51891202719932e-07, "logits/chosen": -2.0301122665405273, "logits/rejected": -2.145770311355591, "logps/chosen": -108.42440795898438, "logps/rejected": -197.51324462890625, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": -2.605362892150879, "rewards/margins": 8.575713157653809, "rewards/rejected": -11.181076049804688, "step": 1917 }, { "epoch": 3.3, "learning_rate": 3.517849553761156e-07, "logits/chosen": -1.9983720779418945, "logits/rejected": -2.213264226913452, "logps/chosen": -106.88433074951172, "logps/rejected": -197.2941436767578, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -3.073653221130371, "rewards/margins": 9.112279891967773, "rewards/rejected": -12.185934066772461, "step": 1918 }, { "epoch": 3.3, "learning_rate": 3.516787080322992e-07, "logits/chosen": -2.2914013862609863, "logits/rejected": -2.276878595352173, "logps/chosen": -85.10562133789062, "logps/rejected": -155.02017211914062, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.1167646646499634, "rewards/margins": 6.513155460357666, "rewards/rejected": -7.629920482635498, "step": 1919 }, { "epoch": 3.3, "learning_rate": 3.5157246068848277e-07, "logits/chosen": -2.388632297515869, "logits/rejected": -2.237571954727173, "logps/chosen": -141.13815307617188, "logps/rejected": -184.80157470703125, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -3.0114572048187256, "rewards/margins": 5.7525129318237305, "rewards/rejected": -8.763969421386719, "step": 1920 }, { "epoch": 3.31, "learning_rate": 3.5146621334466637e-07, "logits/chosen": -2.0468063354492188, "logits/rejected": -2.273277997970581, "logps/chosen": -91.01835632324219, "logps/rejected": -158.9666748046875, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": -2.3429224491119385, "rewards/margins": 6.10564661026001, "rewards/rejected": -8.448568344116211, "step": 1921 }, { "epoch": 3.31, "learning_rate": 3.513599660008499e-07, "logits/chosen": -2.2879700660705566, "logits/rejected": -2.299039363861084, "logps/chosen": -127.09772491455078, "logps/rejected": -199.33804321289062, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -3.4626901149749756, "rewards/margins": 7.860572338104248, "rewards/rejected": -11.323262214660645, "step": 1922 }, { "epoch": 3.31, "learning_rate": 3.5125371865703357e-07, "logits/chosen": -2.0938169956207275, "logits/rejected": -2.4319393634796143, "logps/chosen": -90.49284362792969, "logps/rejected": -213.74819946289062, "loss": 0.0523, "rewards/accuracies": 1.0, "rewards/chosen": -1.4784271717071533, "rewards/margins": 10.678735733032227, "rewards/rejected": -12.157163619995117, "step": 1923 }, { "epoch": 3.31, "learning_rate": 3.5114747131321717e-07, "logits/chosen": -2.277888298034668, "logits/rejected": -2.007779836654663, "logps/chosen": -94.46932983398438, "logps/rejected": -193.9739990234375, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.5491005182266235, "rewards/margins": 9.466835021972656, "rewards/rejected": -11.015935897827148, "step": 1924 }, { "epoch": 3.31, "learning_rate": 3.510412239694007e-07, "logits/chosen": -1.6064424514770508, "logits/rejected": -2.368511438369751, "logps/chosen": -122.0791015625, "logps/rejected": -211.57936096191406, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -4.1804118156433105, "rewards/margins": 7.224958896636963, "rewards/rejected": -11.405369758605957, "step": 1925 }, { "epoch": 3.31, "learning_rate": 3.5093497662558437e-07, "logits/chosen": -2.204932928085327, "logits/rejected": -1.993077039718628, "logps/chosen": -136.47848510742188, "logps/rejected": -210.06881713867188, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -3.922809362411499, "rewards/margins": 7.8622965812683105, "rewards/rejected": -11.785104751586914, "step": 1926 }, { "epoch": 3.32, "learning_rate": 3.508287292817679e-07, "logits/chosen": -2.391392946243286, "logits/rejected": -1.808899998664856, "logps/chosen": -107.56006622314453, "logps/rejected": -190.6521759033203, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -1.264380693435669, "rewards/margins": 10.861104011535645, "rewards/rejected": -12.125484466552734, "step": 1927 }, { "epoch": 3.32, "learning_rate": 3.507224819379515e-07, "logits/chosen": -2.323439598083496, "logits/rejected": -2.2102956771850586, "logps/chosen": -111.0394515991211, "logps/rejected": -207.00946044921875, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": -1.154863953590393, "rewards/margins": 10.77619743347168, "rewards/rejected": -11.931060791015625, "step": 1928 }, { "epoch": 3.32, "learning_rate": 3.5061623459413516e-07, "logits/chosen": -2.0879440307617188, "logits/rejected": -2.0632569789886475, "logps/chosen": -106.10816192626953, "logps/rejected": -160.60317993164062, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.801582098007202, "rewards/margins": 5.892449378967285, "rewards/rejected": -8.694031715393066, "step": 1929 }, { "epoch": 3.32, "learning_rate": 3.505099872503187e-07, "logits/chosen": -2.1990396976470947, "logits/rejected": -2.1944384574890137, "logps/chosen": -121.2677230834961, "logps/rejected": -205.11280822753906, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -3.6382029056549072, "rewards/margins": 8.269826889038086, "rewards/rejected": -11.908029556274414, "step": 1930 }, { "epoch": 3.32, "learning_rate": 3.5040373990650236e-07, "logits/chosen": -2.0547361373901367, "logits/rejected": -2.272059917449951, "logps/chosen": -107.61489868164062, "logps/rejected": -204.8165283203125, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -2.4438676834106445, "rewards/margins": 8.267850875854492, "rewards/rejected": -10.711718559265137, "step": 1931 }, { "epoch": 3.33, "learning_rate": 3.502974925626859e-07, "logits/chosen": -1.8632819652557373, "logits/rejected": -2.2923331260681152, "logps/chosen": -107.71409606933594, "logps/rejected": -212.75257873535156, "loss": 0.1091, "rewards/accuracies": 1.0, "rewards/chosen": -2.9813668727874756, "rewards/margins": 9.066309928894043, "rewards/rejected": -12.047677040100098, "step": 1932 }, { "epoch": 3.33, "learning_rate": 3.501912452188695e-07, "logits/chosen": -2.046201705932617, "logits/rejected": -2.355361223220825, "logps/chosen": -143.77809143066406, "logps/rejected": -266.9043273925781, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": -4.086525917053223, "rewards/margins": 10.148859024047852, "rewards/rejected": -14.235383987426758, "step": 1933 }, { "epoch": 3.33, "learning_rate": 3.5008499787505316e-07, "logits/chosen": -2.3997554779052734, "logits/rejected": -1.8088074922561646, "logps/chosen": -107.90145874023438, "logps/rejected": -142.3148193359375, "loss": 0.0725, "rewards/accuracies": 0.75, "rewards/chosen": -1.5940356254577637, "rewards/margins": 5.998514175415039, "rewards/rejected": -7.592549800872803, "step": 1934 }, { "epoch": 3.33, "learning_rate": 3.499787505312367e-07, "logits/chosen": -2.037937641143799, "logits/rejected": -2.125239133834839, "logps/chosen": -80.52163696289062, "logps/rejected": -201.22537231445312, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": -1.6373521089553833, "rewards/margins": 11.045886993408203, "rewards/rejected": -12.683238983154297, "step": 1935 }, { "epoch": 3.33, "learning_rate": 3.498725031874203e-07, "logits/chosen": -2.103245496749878, "logits/rejected": -2.1625308990478516, "logps/chosen": -96.95569610595703, "logps/rejected": -217.43421936035156, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -1.2740188837051392, "rewards/margins": 10.925436019897461, "rewards/rejected": -12.199455261230469, "step": 1936 }, { "epoch": 3.33, "learning_rate": 3.497662558436039e-07, "logits/chosen": -2.0172371864318848, "logits/rejected": -2.3256683349609375, "logps/chosen": -107.16539001464844, "logps/rejected": -170.5994873046875, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -2.4137043952941895, "rewards/margins": 5.688381671905518, "rewards/rejected": -8.102086067199707, "step": 1937 }, { "epoch": 3.34, "learning_rate": 3.496600084997875e-07, "logits/chosen": -1.770223617553711, "logits/rejected": -2.1569323539733887, "logps/chosen": -102.06816101074219, "logps/rejected": -214.87716674804688, "loss": 0.082, "rewards/accuracies": 1.0, "rewards/chosen": -3.2392375469207764, "rewards/margins": 10.366870880126953, "rewards/rejected": -13.606107711791992, "step": 1938 }, { "epoch": 3.34, "learning_rate": 3.4955376115597105e-07, "logits/chosen": -1.8292255401611328, "logits/rejected": -2.201200246810913, "logps/chosen": -113.65985870361328, "logps/rejected": -245.21653747558594, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -3.7938857078552246, "rewards/margins": 9.827415466308594, "rewards/rejected": -13.62130069732666, "step": 1939 }, { "epoch": 3.34, "learning_rate": 3.494475138121547e-07, "logits/chosen": -2.2082021236419678, "logits/rejected": -2.3232498168945312, "logps/chosen": -110.67672729492188, "logps/rejected": -212.40087890625, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -3.627753973007202, "rewards/margins": 9.634382247924805, "rewards/rejected": -13.262136459350586, "step": 1940 }, { "epoch": 3.34, "learning_rate": 3.493412664683383e-07, "logits/chosen": -2.2263240814208984, "logits/rejected": -2.3886117935180664, "logps/chosen": -107.36358642578125, "logps/rejected": -196.49810791015625, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -1.8463534116744995, "rewards/margins": 8.279111862182617, "rewards/rejected": -10.125465393066406, "step": 1941 }, { "epoch": 3.34, "learning_rate": 3.4923501912452184e-07, "logits/chosen": -2.0714635848999023, "logits/rejected": -2.243752956390381, "logps/chosen": -130.53863525390625, "logps/rejected": -199.72598266601562, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -4.004471302032471, "rewards/margins": 7.369318008422852, "rewards/rejected": -11.37378978729248, "step": 1942 }, { "epoch": 3.34, "learning_rate": 3.491287717807055e-07, "logits/chosen": -2.1779141426086426, "logits/rejected": -2.09458589553833, "logps/chosen": -115.67549896240234, "logps/rejected": -215.61865234375, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -3.045114278793335, "rewards/margins": 10.113031387329102, "rewards/rejected": -13.158145904541016, "step": 1943 }, { "epoch": 3.35, "learning_rate": 3.4902252443688904e-07, "logits/chosen": -1.7332532405853271, "logits/rejected": -2.3151743412017822, "logps/chosen": -91.55551147460938, "logps/rejected": -200.403076171875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.6206499338150024, "rewards/margins": 7.468377113342285, "rewards/rejected": -9.089027404785156, "step": 1944 }, { "epoch": 3.35, "learning_rate": 3.4891627709307264e-07, "logits/chosen": -2.045137405395508, "logits/rejected": -2.318136692047119, "logps/chosen": -117.80487823486328, "logps/rejected": -178.22695922851562, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -4.750150680541992, "rewards/margins": 4.5493059158325195, "rewards/rejected": -9.299457550048828, "step": 1945 }, { "epoch": 3.35, "learning_rate": 3.488100297492563e-07, "logits/chosen": -1.7524409294128418, "logits/rejected": -2.3925528526306152, "logps/chosen": -96.30784606933594, "logps/rejected": -221.04739379882812, "loss": 0.0863, "rewards/accuracies": 1.0, "rewards/chosen": -2.4387664794921875, "rewards/margins": 10.147250175476074, "rewards/rejected": -12.586015701293945, "step": 1946 }, { "epoch": 3.35, "learning_rate": 3.4870378240543984e-07, "logits/chosen": -2.0739829540252686, "logits/rejected": -2.213717460632324, "logps/chosen": -97.9610595703125, "logps/rejected": -205.91287231445312, "loss": 0.0621, "rewards/accuracies": 1.0, "rewards/chosen": -1.4984931945800781, "rewards/margins": 9.422446250915527, "rewards/rejected": -10.920938491821289, "step": 1947 }, { "epoch": 3.35, "learning_rate": 3.4859753506162344e-07, "logits/chosen": -2.077794313430786, "logits/rejected": -2.163335084915161, "logps/chosen": -122.42552947998047, "logps/rejected": -231.2874298095703, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -2.9984874725341797, "rewards/margins": 10.545677185058594, "rewards/rejected": -13.544164657592773, "step": 1948 }, { "epoch": 3.35, "learning_rate": 3.4849128771780704e-07, "logits/chosen": -1.877705454826355, "logits/rejected": -2.3377413749694824, "logps/chosen": -128.38160705566406, "logps/rejected": -240.54122924804688, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": -5.5747222900390625, "rewards/margins": 8.1810302734375, "rewards/rejected": -13.755753517150879, "step": 1949 }, { "epoch": 3.36, "learning_rate": 3.4838504037399063e-07, "logits/chosen": -2.160829782485962, "logits/rejected": -2.2280454635620117, "logps/chosen": -91.89546203613281, "logps/rejected": -194.13497924804688, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -2.743922233581543, "rewards/margins": 8.755249977111816, "rewards/rejected": -11.49917221069336, "step": 1950 }, { "epoch": 3.36, "learning_rate": 3.4827879303017423e-07, "logits/chosen": -2.4108920097351074, "logits/rejected": -1.805833339691162, "logps/chosen": -122.74989318847656, "logps/rejected": -173.068115234375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.9309520721435547, "rewards/margins": 7.334902286529541, "rewards/rejected": -10.265854835510254, "step": 1951 }, { "epoch": 3.36, "learning_rate": 3.4817254568635783e-07, "logits/chosen": -2.089238166809082, "logits/rejected": -2.426804542541504, "logps/chosen": -116.75547790527344, "logps/rejected": -171.15980529785156, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -3.5392775535583496, "rewards/margins": 4.746065139770508, "rewards/rejected": -8.285343170166016, "step": 1952 }, { "epoch": 3.36, "learning_rate": 3.4806629834254143e-07, "logits/chosen": -2.086528778076172, "logits/rejected": -1.92962646484375, "logps/chosen": -119.67475891113281, "logps/rejected": -216.37396240234375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -3.113302707672119, "rewards/margins": 9.461438179016113, "rewards/rejected": -12.57474136352539, "step": 1953 }, { "epoch": 3.36, "learning_rate": 3.47960050998725e-07, "logits/chosen": -1.984379768371582, "logits/rejected": -2.1161868572235107, "logps/chosen": -138.2034912109375, "logps/rejected": -236.80374145507812, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -5.053505897521973, "rewards/margins": 9.31325912475586, "rewards/rejected": -14.366765975952148, "step": 1954 }, { "epoch": 3.36, "learning_rate": 3.4785380365490863e-07, "logits/chosen": -2.001913547515869, "logits/rejected": -2.2903149127960205, "logps/chosen": -110.27910614013672, "logps/rejected": -201.94387817382812, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -3.3617022037506104, "rewards/margins": 8.021181106567383, "rewards/rejected": -11.382882118225098, "step": 1955 }, { "epoch": 3.37, "learning_rate": 3.4774755631109223e-07, "logits/chosen": -1.4543542861938477, "logits/rejected": -2.384909152984619, "logps/chosen": -95.55412292480469, "logps/rejected": -235.30044555664062, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.8641839027404785, "rewards/margins": 10.313879013061523, "rewards/rejected": -12.178062438964844, "step": 1956 }, { "epoch": 3.37, "learning_rate": 3.476413089672758e-07, "logits/chosen": -2.1907296180725098, "logits/rejected": -2.059303045272827, "logps/chosen": -93.40790557861328, "logps/rejected": -200.82305908203125, "loss": 0.1101, "rewards/accuracies": 1.0, "rewards/chosen": -1.8197764158248901, "rewards/margins": 10.97609806060791, "rewards/rejected": -12.79587459564209, "step": 1957 }, { "epoch": 3.37, "learning_rate": 3.475350616234594e-07, "logits/chosen": -1.727502465248108, "logits/rejected": -2.3470346927642822, "logps/chosen": -130.33346557617188, "logps/rejected": -213.753173828125, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -4.551438808441162, "rewards/margins": 7.583824634552002, "rewards/rejected": -12.13526439666748, "step": 1958 }, { "epoch": 3.37, "learning_rate": 3.4742881427964297e-07, "logits/chosen": -1.941053032875061, "logits/rejected": -2.2048795223236084, "logps/chosen": -112.70380401611328, "logps/rejected": -213.45614624023438, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -2.391230583190918, "rewards/margins": 10.669708251953125, "rewards/rejected": -13.060938835144043, "step": 1959 }, { "epoch": 3.37, "learning_rate": 3.4732256693582657e-07, "logits/chosen": -2.037454128265381, "logits/rejected": -1.9879341125488281, "logps/chosen": -105.8843002319336, "logps/rejected": -186.49398803710938, "loss": 0.0615, "rewards/accuracies": 1.0, "rewards/chosen": -2.374451160430908, "rewards/margins": 7.8080267906188965, "rewards/rejected": -10.182477951049805, "step": 1960 }, { "epoch": 3.38, "learning_rate": 3.472163195920102e-07, "logits/chosen": -2.1395630836486816, "logits/rejected": -2.1562867164611816, "logps/chosen": -89.74276733398438, "logps/rejected": -189.11561584472656, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": -1.4400639533996582, "rewards/margins": 9.120641708374023, "rewards/rejected": -10.560705184936523, "step": 1961 }, { "epoch": 3.38, "learning_rate": 3.4711007224819377e-07, "logits/chosen": -1.9240597486495972, "logits/rejected": -2.2310330867767334, "logps/chosen": -96.43234252929688, "logps/rejected": -242.648681640625, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -1.8859378099441528, "rewards/margins": 12.873873710632324, "rewards/rejected": -14.759811401367188, "step": 1962 }, { "epoch": 3.38, "learning_rate": 3.4700382490437737e-07, "logits/chosen": -2.4169068336486816, "logits/rejected": -2.149930000305176, "logps/chosen": -89.68338775634766, "logps/rejected": -233.6000213623047, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.1596460342407227, "rewards/margins": 14.693425178527832, "rewards/rejected": -15.853071212768555, "step": 1963 }, { "epoch": 3.38, "learning_rate": 3.4689757756056097e-07, "logits/chosen": -2.15918231010437, "logits/rejected": -2.3308513164520264, "logps/chosen": -93.9031753540039, "logps/rejected": -215.27438354492188, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -2.0019612312316895, "rewards/margins": 10.156513214111328, "rewards/rejected": -12.15847396850586, "step": 1964 }, { "epoch": 3.38, "learning_rate": 3.4679133021674457e-07, "logits/chosen": -2.1280462741851807, "logits/rejected": -2.089193344116211, "logps/chosen": -151.40805053710938, "logps/rejected": -236.61181640625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -6.069986343383789, "rewards/margins": 8.489128112792969, "rewards/rejected": -14.559114456176758, "step": 1965 }, { "epoch": 3.38, "learning_rate": 3.466850828729281e-07, "logits/chosen": -2.3630120754241943, "logits/rejected": -2.1871092319488525, "logps/chosen": -127.4388656616211, "logps/rejected": -155.98780822753906, "loss": 0.0706, "rewards/accuracies": 0.75, "rewards/chosen": -3.686532497406006, "rewards/margins": 4.9392499923706055, "rewards/rejected": -8.62578296661377, "step": 1966 }, { "epoch": 3.39, "learning_rate": 3.4657883552911176e-07, "logits/chosen": -1.722363829612732, "logits/rejected": -2.3987526893615723, "logps/chosen": -92.0568618774414, "logps/rejected": -216.6081085205078, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -2.693275213241577, "rewards/margins": 8.300914764404297, "rewards/rejected": -10.994190216064453, "step": 1967 }, { "epoch": 3.39, "learning_rate": 3.4647258818529536e-07, "logits/chosen": -1.8979268074035645, "logits/rejected": -2.2731716632843018, "logps/chosen": -84.99600219726562, "logps/rejected": -210.6129913330078, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": -0.49914607405662537, "rewards/margins": 10.500448226928711, "rewards/rejected": -10.999594688415527, "step": 1968 }, { "epoch": 3.39, "learning_rate": 3.463663408414789e-07, "logits/chosen": -1.5547453165054321, "logits/rejected": -2.2910571098327637, "logps/chosen": -83.67683410644531, "logps/rejected": -207.02891540527344, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -1.3417237997055054, "rewards/margins": 9.70676040649414, "rewards/rejected": -11.048484802246094, "step": 1969 }, { "epoch": 3.39, "learning_rate": 3.4626009349766256e-07, "logits/chosen": -1.8486579656600952, "logits/rejected": -2.213621139526367, "logps/chosen": -114.94308471679688, "logps/rejected": -219.05044555664062, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -2.328185558319092, "rewards/margins": 10.04953384399414, "rewards/rejected": -12.37771987915039, "step": 1970 }, { "epoch": 3.39, "learning_rate": 3.461538461538461e-07, "logits/chosen": -2.0819807052612305, "logits/rejected": -2.260636806488037, "logps/chosen": -89.00367736816406, "logps/rejected": -213.90867614746094, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -2.070214033126831, "rewards/margins": 10.88310718536377, "rewards/rejected": -12.95332145690918, "step": 1971 }, { "epoch": 3.39, "learning_rate": 3.460475988100297e-07, "logits/chosen": -2.156433343887329, "logits/rejected": -2.170619010925293, "logps/chosen": -110.6474609375, "logps/rejected": -195.30462646484375, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": -3.1631948947906494, "rewards/margins": 7.791452407836914, "rewards/rejected": -10.954648971557617, "step": 1972 }, { "epoch": 3.4, "learning_rate": 3.4594135146621336e-07, "logits/chosen": -1.967104434967041, "logits/rejected": -2.260931968688965, "logps/chosen": -83.70194244384766, "logps/rejected": -213.38406372070312, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": -1.468015193939209, "rewards/margins": 10.639877319335938, "rewards/rejected": -12.107892990112305, "step": 1973 }, { "epoch": 3.4, "learning_rate": 3.458351041223969e-07, "logits/chosen": -1.9730956554412842, "logits/rejected": -2.2243127822875977, "logps/chosen": -96.45579528808594, "logps/rejected": -198.8378143310547, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -1.9253342151641846, "rewards/margins": 10.423544883728027, "rewards/rejected": -12.348878860473633, "step": 1974 }, { "epoch": 3.4, "learning_rate": 3.4572885677858055e-07, "logits/chosen": -2.133183002471924, "logits/rejected": -2.2138991355895996, "logps/chosen": -120.98741912841797, "logps/rejected": -196.79742431640625, "loss": 0.0952, "rewards/accuracies": 1.0, "rewards/chosen": -3.537562131881714, "rewards/margins": 6.074627876281738, "rewards/rejected": -9.612190246582031, "step": 1975 }, { "epoch": 3.4, "learning_rate": 3.456226094347641e-07, "logits/chosen": -2.172328233718872, "logits/rejected": -1.8952457904815674, "logps/chosen": -137.10067749023438, "logps/rejected": -235.43710327148438, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -5.2989935874938965, "rewards/margins": 9.725406646728516, "rewards/rejected": -15.024399757385254, "step": 1976 }, { "epoch": 3.4, "learning_rate": 3.455163620909477e-07, "logits/chosen": -2.44712233543396, "logits/rejected": -1.9473215341567993, "logps/chosen": -120.32858276367188, "logps/rejected": -181.04190063476562, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": -3.558349609375, "rewards/margins": 6.862231254577637, "rewards/rejected": -10.420580863952637, "step": 1977 }, { "epoch": 3.4, "learning_rate": 3.4541011474713135e-07, "logits/chosen": -1.7736891508102417, "logits/rejected": -2.296696662902832, "logps/chosen": -79.39356231689453, "logps/rejected": -224.91397094726562, "loss": 0.0843, "rewards/accuracies": 1.0, "rewards/chosen": -2.0670299530029297, "rewards/margins": 12.338835716247559, "rewards/rejected": -14.405864715576172, "step": 1978 }, { "epoch": 3.41, "learning_rate": 3.453038674033149e-07, "logits/chosen": -2.2111382484436035, "logits/rejected": -2.3009109497070312, "logps/chosen": -92.60774993896484, "logps/rejected": -182.0062713623047, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -1.6801378726959229, "rewards/margins": 8.171278953552246, "rewards/rejected": -9.85141658782959, "step": 1979 }, { "epoch": 3.41, "learning_rate": 3.451976200594985e-07, "logits/chosen": -2.0853500366210938, "logits/rejected": -2.1271843910217285, "logps/chosen": -114.43374633789062, "logps/rejected": -206.23265075683594, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -3.4582936763763428, "rewards/margins": 9.069416999816895, "rewards/rejected": -12.527710914611816, "step": 1980 }, { "epoch": 3.41, "learning_rate": 3.450913727156821e-07, "logits/chosen": -2.312453269958496, "logits/rejected": -1.9164161682128906, "logps/chosen": -102.55662536621094, "logps/rejected": -166.80548095703125, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.5555522441864014, "rewards/margins": 6.8764238357543945, "rewards/rejected": -8.431975364685059, "step": 1981 }, { "epoch": 3.41, "learning_rate": 3.449851253718657e-07, "logits/chosen": -1.8811609745025635, "logits/rejected": -2.337435722351074, "logps/chosen": -99.24263000488281, "logps/rejected": -206.61073303222656, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -1.6945405006408691, "rewards/margins": 8.85428524017334, "rewards/rejected": -10.548826217651367, "step": 1982 }, { "epoch": 3.41, "learning_rate": 3.448788780280493e-07, "logits/chosen": -2.330075740814209, "logits/rejected": -2.2499046325683594, "logps/chosen": -116.63615417480469, "logps/rejected": -236.579833984375, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -2.678353786468506, "rewards/margins": 11.542434692382812, "rewards/rejected": -14.22078800201416, "step": 1983 }, { "epoch": 3.41, "learning_rate": 3.447726306842329e-07, "logits/chosen": -2.3296406269073486, "logits/rejected": -2.2555899620056152, "logps/chosen": -85.02800750732422, "logps/rejected": -179.43508911132812, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -2.0254478454589844, "rewards/margins": 8.752114295959473, "rewards/rejected": -10.777563095092773, "step": 1984 }, { "epoch": 3.42, "learning_rate": 3.446663833404165e-07, "logits/chosen": -2.233062744140625, "logits/rejected": -2.294177770614624, "logps/chosen": -108.87611389160156, "logps/rejected": -211.01116943359375, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -1.7793787717819214, "rewards/margins": 10.28410816192627, "rewards/rejected": -12.063486099243164, "step": 1985 }, { "epoch": 3.42, "learning_rate": 3.4456013599660004e-07, "logits/chosen": -1.7615387439727783, "logits/rejected": -2.292510986328125, "logps/chosen": -109.84982299804688, "logps/rejected": -203.55548095703125, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": -2.627044677734375, "rewards/margins": 8.231139183044434, "rewards/rejected": -10.858183860778809, "step": 1986 }, { "epoch": 3.42, "learning_rate": 3.444538886527837e-07, "logits/chosen": -2.092690944671631, "logits/rejected": -2.244018077850342, "logps/chosen": -97.36280822753906, "logps/rejected": -208.47760009765625, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -2.3010189533233643, "rewards/margins": 9.238759994506836, "rewards/rejected": -11.539778709411621, "step": 1987 }, { "epoch": 3.42, "learning_rate": 3.443476413089673e-07, "logits/chosen": -1.9167354106903076, "logits/rejected": -2.3924522399902344, "logps/chosen": -119.05630493164062, "logps/rejected": -183.14866638183594, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -3.440058946609497, "rewards/margins": 5.161425590515137, "rewards/rejected": -8.601484298706055, "step": 1988 }, { "epoch": 3.42, "learning_rate": 3.4424139396515083e-07, "logits/chosen": -2.2737672328948975, "logits/rejected": -2.1366374492645264, "logps/chosen": -137.89776611328125, "logps/rejected": -210.77871704101562, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -3.6345572471618652, "rewards/margins": 7.731241226196289, "rewards/rejected": -11.365797996520996, "step": 1989 }, { "epoch": 3.43, "learning_rate": 3.441351466213345e-07, "logits/chosen": -2.09112286567688, "logits/rejected": -2.4287233352661133, "logps/chosen": -172.73341369628906, "logps/rejected": -252.4967041015625, "loss": 0.174, "rewards/accuracies": 1.0, "rewards/chosen": -7.750895023345947, "rewards/margins": 6.727166652679443, "rewards/rejected": -14.47806167602539, "step": 1990 }, { "epoch": 3.43, "learning_rate": 3.4402889927751803e-07, "logits/chosen": -2.2322256565093994, "logits/rejected": -1.8549976348876953, "logps/chosen": -108.569580078125, "logps/rejected": -190.6571044921875, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -2.0725059509277344, "rewards/margins": 9.637866020202637, "rewards/rejected": -11.710372924804688, "step": 1991 }, { "epoch": 3.43, "learning_rate": 3.4392265193370163e-07, "logits/chosen": -2.1588895320892334, "logits/rejected": -2.140073776245117, "logps/chosen": -113.62019348144531, "logps/rejected": -219.64581298828125, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": -2.567204236984253, "rewards/margins": 11.646804809570312, "rewards/rejected": -14.214008331298828, "step": 1992 }, { "epoch": 3.43, "learning_rate": 3.438164045898853e-07, "logits/chosen": -2.1703944206237793, "logits/rejected": -2.3248276710510254, "logps/chosen": -75.85224151611328, "logps/rejected": -177.09217834472656, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.7649379968643188, "rewards/margins": 9.288758277893066, "rewards/rejected": -11.053696632385254, "step": 1993 }, { "epoch": 3.43, "learning_rate": 3.4371015724606883e-07, "logits/chosen": -2.02167010307312, "logits/rejected": -2.264660596847534, "logps/chosen": -79.80867004394531, "logps/rejected": -244.6109161376953, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": -1.5207860469818115, "rewards/margins": 13.773070335388184, "rewards/rejected": -15.293856620788574, "step": 1994 }, { "epoch": 3.43, "learning_rate": 3.436039099022524e-07, "logits/chosen": -2.3096511363983154, "logits/rejected": -1.9295848608016968, "logps/chosen": -143.42718505859375, "logps/rejected": -197.90777587890625, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -5.666443824768066, "rewards/margins": 6.199796676635742, "rewards/rejected": -11.866241455078125, "step": 1995 }, { "epoch": 3.44, "learning_rate": 3.43497662558436e-07, "logits/chosen": -2.153369426727295, "logits/rejected": -2.3691341876983643, "logps/chosen": -104.22261047363281, "logps/rejected": -199.99693298339844, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -3.7656924724578857, "rewards/margins": 8.802218437194824, "rewards/rejected": -12.567911148071289, "step": 1996 }, { "epoch": 3.44, "learning_rate": 3.433914152146196e-07, "logits/chosen": -2.1548619270324707, "logits/rejected": -2.04887318611145, "logps/chosen": -112.70899200439453, "logps/rejected": -195.70211791992188, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -3.1454904079437256, "rewards/margins": 8.757680892944336, "rewards/rejected": -11.90317153930664, "step": 1997 }, { "epoch": 3.44, "learning_rate": 3.4328516787080317e-07, "logits/chosen": -2.2005233764648438, "logits/rejected": -2.1192104816436768, "logps/chosen": -110.87094116210938, "logps/rejected": -182.50921630859375, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -2.7448580265045166, "rewards/margins": 7.943367004394531, "rewards/rejected": -10.688224792480469, "step": 1998 }, { "epoch": 3.44, "learning_rate": 3.431789205269868e-07, "logits/chosen": -2.0213937759399414, "logits/rejected": -2.057079315185547, "logps/chosen": -89.84873962402344, "logps/rejected": -220.19342041015625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.179727792739868, "rewards/margins": 10.756315231323242, "rewards/rejected": -12.936043739318848, "step": 1999 }, { "epoch": 3.44, "learning_rate": 3.430726731831704e-07, "logits/chosen": -2.295414924621582, "logits/rejected": -2.086848735809326, "logps/chosen": -105.96678924560547, "logps/rejected": -166.33749389648438, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -2.8231053352355957, "rewards/margins": 5.804222106933594, "rewards/rejected": -8.627327919006348, "step": 2000 }, { "epoch": 3.44, "learning_rate": 3.4296642583935397e-07, "logits/chosen": -1.9807361364364624, "logits/rejected": -2.291426658630371, "logps/chosen": -77.86241149902344, "logps/rejected": -202.4632568359375, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9704483151435852, "rewards/margins": 10.024380683898926, "rewards/rejected": -10.994829177856445, "step": 2001 }, { "epoch": 3.45, "learning_rate": 3.428601784955376e-07, "logits/chosen": -2.052743911743164, "logits/rejected": -2.2633659839630127, "logps/chosen": -146.11111450195312, "logps/rejected": -236.72528076171875, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": -4.299592018127441, "rewards/margins": 10.309416770935059, "rewards/rejected": -14.6090087890625, "step": 2002 }, { "epoch": 3.45, "learning_rate": 3.4275393115172116e-07, "logits/chosen": -2.421661376953125, "logits/rejected": -2.063112735748291, "logps/chosen": -116.3292465209961, "logps/rejected": -204.51516723632812, "loss": 0.0741, "rewards/accuracies": 1.0, "rewards/chosen": -2.7067341804504395, "rewards/margins": 9.203170776367188, "rewards/rejected": -11.909904479980469, "step": 2003 }, { "epoch": 3.45, "learning_rate": 3.4264768380790476e-07, "logits/chosen": -2.1845898628234863, "logits/rejected": -2.327986240386963, "logps/chosen": -104.08171844482422, "logps/rejected": -195.37754821777344, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -2.3016579151153564, "rewards/margins": 8.572572708129883, "rewards/rejected": -10.87423038482666, "step": 2004 }, { "epoch": 3.45, "learning_rate": 3.425414364640884e-07, "logits/chosen": -2.2064826488494873, "logits/rejected": -1.9393563270568848, "logps/chosen": -117.88058471679688, "logps/rejected": -181.05215454101562, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -2.7112536430358887, "rewards/margins": 7.367593765258789, "rewards/rejected": -10.078847885131836, "step": 2005 }, { "epoch": 3.45, "learning_rate": 3.4243518912027196e-07, "logits/chosen": -2.1456589698791504, "logits/rejected": -2.331418037414551, "logps/chosen": -106.4740982055664, "logps/rejected": -252.52711486816406, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.8729327917099, "rewards/margins": 12.65429973602295, "rewards/rejected": -14.527231216430664, "step": 2006 }, { "epoch": 3.45, "learning_rate": 3.4232894177645556e-07, "logits/chosen": -2.3103132247924805, "logits/rejected": -2.4899611473083496, "logps/chosen": -133.44326782226562, "logps/rejected": -239.91995239257812, "loss": 0.0522, "rewards/accuracies": 1.0, "rewards/chosen": -4.832530498504639, "rewards/margins": 8.342767715454102, "rewards/rejected": -13.175297737121582, "step": 2007 }, { "epoch": 3.46, "learning_rate": 3.4222269443263916e-07, "logits/chosen": -1.931085228919983, "logits/rejected": -2.4046237468719482, "logps/chosen": -83.03186798095703, "logps/rejected": -189.73446655273438, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -2.053870677947998, "rewards/margins": 8.918366432189941, "rewards/rejected": -10.972236633300781, "step": 2008 }, { "epoch": 3.46, "learning_rate": 3.4211644708882276e-07, "logits/chosen": -1.7526662349700928, "logits/rejected": -2.2047266960144043, "logps/chosen": -98.44650268554688, "logps/rejected": -184.4359588623047, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": -2.5303306579589844, "rewards/margins": 6.597438335418701, "rewards/rejected": -9.127769470214844, "step": 2009 }, { "epoch": 3.46, "learning_rate": 3.4201019974500636e-07, "logits/chosen": -2.2850186824798584, "logits/rejected": -1.9598236083984375, "logps/chosen": -120.74545288085938, "logps/rejected": -173.22451782226562, "loss": 0.2023, "rewards/accuracies": 1.0, "rewards/chosen": -2.6283771991729736, "rewards/margins": 7.557450294494629, "rewards/rejected": -10.18582820892334, "step": 2010 }, { "epoch": 3.46, "learning_rate": 3.4190395240118996e-07, "logits/chosen": -2.1545491218566895, "logits/rejected": -2.1929054260253906, "logps/chosen": -111.02442932128906, "logps/rejected": -169.09988403320312, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": -3.0563435554504395, "rewards/margins": 5.23121452331543, "rewards/rejected": -8.287557601928711, "step": 2011 }, { "epoch": 3.46, "learning_rate": 3.4179770505737355e-07, "logits/chosen": -2.2719295024871826, "logits/rejected": -2.0966591835021973, "logps/chosen": -136.77511596679688, "logps/rejected": -200.18601989746094, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -2.493540048599243, "rewards/margins": 8.770059585571289, "rewards/rejected": -11.263599395751953, "step": 2012 }, { "epoch": 3.46, "learning_rate": 3.416914577135571e-07, "logits/chosen": -1.8459105491638184, "logits/rejected": -2.394322633743286, "logps/chosen": -109.3215560913086, "logps/rejected": -197.45953369140625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.1514596939086914, "rewards/margins": 7.706193447113037, "rewards/rejected": -9.85765266418457, "step": 2013 }, { "epoch": 3.47, "learning_rate": 3.4158521036974075e-07, "logits/chosen": -2.384305000305176, "logits/rejected": -2.40690279006958, "logps/chosen": -103.21820831298828, "logps/rejected": -175.19168090820312, "loss": 0.0656, "rewards/accuracies": 1.0, "rewards/chosen": -1.6752188205718994, "rewards/margins": 7.33649206161499, "rewards/rejected": -9.011711120605469, "step": 2014 }, { "epoch": 3.47, "learning_rate": 3.4147896302592435e-07, "logits/chosen": -2.1452829837799072, "logits/rejected": -2.2879912853240967, "logps/chosen": -129.14108276367188, "logps/rejected": -229.095703125, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -3.1887102127075195, "rewards/margins": 10.364577293395996, "rewards/rejected": -13.553287506103516, "step": 2015 }, { "epoch": 3.47, "learning_rate": 3.4137271568210795e-07, "logits/chosen": -2.124997138977051, "logits/rejected": -1.9995613098144531, "logps/chosen": -112.8743896484375, "logps/rejected": -195.69029235839844, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": -1.6080471277236938, "rewards/margins": 8.444968223571777, "rewards/rejected": -10.053014755249023, "step": 2016 }, { "epoch": 3.47, "learning_rate": 3.4126646833829155e-07, "logits/chosen": -2.3697509765625, "logits/rejected": -2.3164992332458496, "logps/chosen": -103.38130187988281, "logps/rejected": -180.1083221435547, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.659345030784607, "rewards/margins": 7.549598693847656, "rewards/rejected": -9.208944320678711, "step": 2017 }, { "epoch": 3.47, "learning_rate": 3.411602209944751e-07, "logits/chosen": -2.4148635864257812, "logits/rejected": -2.275879144668579, "logps/chosen": -127.65776062011719, "logps/rejected": -175.0711669921875, "loss": 0.1175, "rewards/accuracies": 0.75, "rewards/chosen": -3.776153087615967, "rewards/margins": 4.58249568939209, "rewards/rejected": -8.358649253845215, "step": 2018 }, { "epoch": 3.48, "learning_rate": 3.4105397365065875e-07, "logits/chosen": -2.0501694679260254, "logits/rejected": -2.256540298461914, "logps/chosen": -105.86844635009766, "logps/rejected": -208.11741638183594, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -1.1361324787139893, "rewards/margins": 9.282142639160156, "rewards/rejected": -10.418274879455566, "step": 2019 }, { "epoch": 3.48, "learning_rate": 3.4094772630684235e-07, "logits/chosen": -1.7283422946929932, "logits/rejected": -2.275789499282837, "logps/chosen": -135.9237060546875, "logps/rejected": -204.18353271484375, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -4.807718276977539, "rewards/margins": 5.677184104919434, "rewards/rejected": -10.484902381896973, "step": 2020 }, { "epoch": 3.48, "learning_rate": 3.408414789630259e-07, "logits/chosen": -2.1065244674682617, "logits/rejected": -2.313804864883423, "logps/chosen": -113.75912475585938, "logps/rejected": -237.885986328125, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -3.718597650527954, "rewards/margins": 11.949421882629395, "rewards/rejected": -15.668020248413086, "step": 2021 }, { "epoch": 3.48, "learning_rate": 3.4073523161920954e-07, "logits/chosen": -2.133878231048584, "logits/rejected": -2.025001049041748, "logps/chosen": -107.81632232666016, "logps/rejected": -136.00010681152344, "loss": 0.0528, "rewards/accuracies": 0.75, "rewards/chosen": -3.638946533203125, "rewards/margins": 3.2702972888946533, "rewards/rejected": -6.909244060516357, "step": 2022 }, { "epoch": 3.48, "learning_rate": 3.406289842753931e-07, "logits/chosen": -2.1930885314941406, "logits/rejected": -2.1101481914520264, "logps/chosen": -129.0353546142578, "logps/rejected": -190.44931030273438, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -3.892139196395874, "rewards/margins": 6.495368957519531, "rewards/rejected": -10.387507438659668, "step": 2023 }, { "epoch": 3.48, "learning_rate": 3.405227369315767e-07, "logits/chosen": -2.1012165546417236, "logits/rejected": -2.2829558849334717, "logps/chosen": -120.4486312866211, "logps/rejected": -202.86172485351562, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.8967947959899902, "rewards/margins": 8.521769523620605, "rewards/rejected": -11.418563842773438, "step": 2024 }, { "epoch": 3.49, "learning_rate": 3.404164895877603e-07, "logits/chosen": -1.6889193058013916, "logits/rejected": -2.352810859680176, "logps/chosen": -147.23248291015625, "logps/rejected": -257.56011962890625, "loss": 0.0868, "rewards/accuracies": 1.0, "rewards/chosen": -6.451507091522217, "rewards/margins": 8.562638282775879, "rewards/rejected": -15.014145851135254, "step": 2025 }, { "epoch": 3.49, "learning_rate": 3.403102422439439e-07, "logits/chosen": -1.8913592100143433, "logits/rejected": -2.1161069869995117, "logps/chosen": -109.38789367675781, "logps/rejected": -193.88519287109375, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -2.7646539211273193, "rewards/margins": 7.629355430603027, "rewards/rejected": -10.39400863647461, "step": 2026 }, { "epoch": 3.49, "learning_rate": 3.402039949001275e-07, "logits/chosen": -2.1668989658355713, "logits/rejected": -2.1335604190826416, "logps/chosen": -112.82852172851562, "logps/rejected": -223.960205078125, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -3.058361530303955, "rewards/margins": 9.227441787719727, "rewards/rejected": -12.28580379486084, "step": 2027 }, { "epoch": 3.49, "learning_rate": 3.400977475563111e-07, "logits/chosen": -2.3503077030181885, "logits/rejected": -2.122184991836548, "logps/chosen": -105.81161499023438, "logps/rejected": -208.64889526367188, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.9992605447769165, "rewards/margins": 11.74526309967041, "rewards/rejected": -13.744524002075195, "step": 2028 }, { "epoch": 3.49, "learning_rate": 3.399915002124947e-07, "logits/chosen": -2.2126121520996094, "logits/rejected": -2.040816068649292, "logps/chosen": -123.25309753417969, "logps/rejected": -216.26260375976562, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.588291645050049, "rewards/margins": 9.612313270568848, "rewards/rejected": -12.200605392456055, "step": 2029 }, { "epoch": 3.49, "learning_rate": 3.3988525286867823e-07, "logits/chosen": -2.31904935836792, "logits/rejected": -2.1621079444885254, "logps/chosen": -107.81147766113281, "logps/rejected": -207.84844970703125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -3.3140478134155273, "rewards/margins": 9.796918869018555, "rewards/rejected": -13.110966682434082, "step": 2030 }, { "epoch": 3.5, "learning_rate": 3.397790055248619e-07, "logits/chosen": -1.9218690395355225, "logits/rejected": -2.338749647140503, "logps/chosen": -82.2856674194336, "logps/rejected": -187.64944458007812, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -2.058558702468872, "rewards/margins": 8.732343673706055, "rewards/rejected": -10.790903091430664, "step": 2031 }, { "epoch": 3.5, "learning_rate": 3.396727581810455e-07, "logits/chosen": -1.970496654510498, "logits/rejected": -2.403629779815674, "logps/chosen": -107.56723022460938, "logps/rejected": -196.47735595703125, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": -3.2759642601013184, "rewards/margins": 7.928752899169922, "rewards/rejected": -11.204717636108398, "step": 2032 }, { "epoch": 3.5, "learning_rate": 3.39566510837229e-07, "logits/chosen": -2.237830400466919, "logits/rejected": -2.0358424186706543, "logps/chosen": -89.52471923828125, "logps/rejected": -202.8619384765625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.1373852491378784, "rewards/margins": 11.34968376159668, "rewards/rejected": -12.487069129943848, "step": 2033 }, { "epoch": 3.5, "learning_rate": 3.394602634934127e-07, "logits/chosen": -2.183501720428467, "logits/rejected": -2.126829147338867, "logps/chosen": -130.29380798339844, "logps/rejected": -278.04034423828125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -4.251673698425293, "rewards/margins": 12.81580924987793, "rewards/rejected": -17.067481994628906, "step": 2034 }, { "epoch": 3.5, "learning_rate": 3.393540161495962e-07, "logits/chosen": -2.2209670543670654, "logits/rejected": -2.218331813812256, "logps/chosen": -118.5508804321289, "logps/rejected": -219.28619384765625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -4.085451126098633, "rewards/margins": 9.272027969360352, "rewards/rejected": -13.357478141784668, "step": 2035 }, { "epoch": 3.5, "learning_rate": 3.392477688057798e-07, "logits/chosen": -1.9768462181091309, "logits/rejected": -2.213956832885742, "logps/chosen": -140.5599822998047, "logps/rejected": -259.5936279296875, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -5.7105560302734375, "rewards/margins": 10.120368957519531, "rewards/rejected": -15.830924987792969, "step": 2036 }, { "epoch": 3.51, "learning_rate": 3.391415214619635e-07, "logits/chosen": -2.1595444679260254, "logits/rejected": -2.432793617248535, "logps/chosen": -120.31658935546875, "logps/rejected": -210.6682586669922, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -4.039462566375732, "rewards/margins": 9.2394437789917, "rewards/rejected": -13.278905868530273, "step": 2037 }, { "epoch": 3.51, "learning_rate": 3.39035274118147e-07, "logits/chosen": -1.8875763416290283, "logits/rejected": -1.8994941711425781, "logps/chosen": -127.1899642944336, "logps/rejected": -196.29910278320312, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -4.059083938598633, "rewards/margins": 6.399777889251709, "rewards/rejected": -10.4588623046875, "step": 2038 }, { "epoch": 3.51, "learning_rate": 3.389290267743306e-07, "logits/chosen": -1.9232518672943115, "logits/rejected": -2.4424099922180176, "logps/chosen": -111.486083984375, "logps/rejected": -211.78477478027344, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0768123865127563, "rewards/margins": 9.263055801391602, "rewards/rejected": -10.339868545532227, "step": 2039 }, { "epoch": 3.51, "learning_rate": 3.388227794305142e-07, "logits/chosen": -2.3414273262023926, "logits/rejected": -1.920290470123291, "logps/chosen": -115.79960632324219, "logps/rejected": -153.88327026367188, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.70241117477417, "rewards/margins": 5.4828290939331055, "rewards/rejected": -7.185240745544434, "step": 2040 }, { "epoch": 3.51, "learning_rate": 3.387165320866978e-07, "logits/chosen": -2.2882447242736816, "logits/rejected": -2.1210436820983887, "logps/chosen": -137.66018676757812, "logps/rejected": -199.51663208007812, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -3.7324845790863037, "rewards/margins": 7.2999067306518555, "rewards/rejected": -11.032392501831055, "step": 2041 }, { "epoch": 3.51, "learning_rate": 3.386102847428814e-07, "logits/chosen": -2.1963753700256348, "logits/rejected": -2.3163247108459473, "logps/chosen": -118.19779968261719, "logps/rejected": -195.18499755859375, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": -3.4586033821105957, "rewards/margins": 7.247439861297607, "rewards/rejected": -10.706042289733887, "step": 2042 }, { "epoch": 3.52, "learning_rate": 3.38504037399065e-07, "logits/chosen": -2.2047574520111084, "logits/rejected": -2.048729658126831, "logps/chosen": -88.86312866210938, "logps/rejected": -237.1056365966797, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.3419822454452515, "rewards/margins": 14.191503524780273, "rewards/rejected": -15.533486366271973, "step": 2043 }, { "epoch": 3.52, "learning_rate": 3.383977900552486e-07, "logits/chosen": -2.2159509658813477, "logits/rejected": -2.5023279190063477, "logps/chosen": -102.27823638916016, "logps/rejected": -198.58529663085938, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -2.9571380615234375, "rewards/margins": 8.085947036743164, "rewards/rejected": -11.043085098266602, "step": 2044 }, { "epoch": 3.52, "learning_rate": 3.3829154271143216e-07, "logits/chosen": -1.9889227151870728, "logits/rejected": -2.1547679901123047, "logps/chosen": -137.8288116455078, "logps/rejected": -241.1344451904297, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -3.69268798828125, "rewards/margins": 9.50592041015625, "rewards/rejected": -13.198609352111816, "step": 2045 }, { "epoch": 3.52, "learning_rate": 3.381852953676158e-07, "logits/chosen": -2.309866189956665, "logits/rejected": -1.9379650354385376, "logps/chosen": -129.19842529296875, "logps/rejected": -181.21987915039062, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -3.7463603019714355, "rewards/margins": 6.609571933746338, "rewards/rejected": -10.355932235717773, "step": 2046 }, { "epoch": 3.52, "learning_rate": 3.380790480237994e-07, "logits/chosen": -2.512834310531616, "logits/rejected": -2.100893497467041, "logps/chosen": -131.2244110107422, "logps/rejected": -204.73228454589844, "loss": 0.0475, "rewards/accuracies": 1.0, "rewards/chosen": -4.400886535644531, "rewards/margins": 8.652374267578125, "rewards/rejected": -13.053260803222656, "step": 2047 }, { "epoch": 3.52, "learning_rate": 3.3797280067998296e-07, "logits/chosen": -2.0261757373809814, "logits/rejected": -2.489537000656128, "logps/chosen": -106.65692138671875, "logps/rejected": -185.2572479248047, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -2.1654248237609863, "rewards/margins": 6.254279136657715, "rewards/rejected": -8.419703483581543, "step": 2048 }, { "epoch": 3.53, "learning_rate": 3.378665533361666e-07, "logits/chosen": -2.0666046142578125, "logits/rejected": -2.364243268966675, "logps/chosen": -104.5857925415039, "logps/rejected": -193.91209411621094, "loss": 0.1339, "rewards/accuracies": 1.0, "rewards/chosen": -3.2227251529693604, "rewards/margins": 6.929954528808594, "rewards/rejected": -10.152679443359375, "step": 2049 }, { "epoch": 3.53, "learning_rate": 3.3776030599235015e-07, "logits/chosen": -2.4605488777160645, "logits/rejected": -2.4058470726013184, "logps/chosen": -112.431884765625, "logps/rejected": -185.78805541992188, "loss": 0.1239, "rewards/accuracies": 1.0, "rewards/chosen": -1.2536606788635254, "rewards/margins": 8.952465057373047, "rewards/rejected": -10.20612621307373, "step": 2050 }, { "epoch": 3.53, "learning_rate": 3.3765405864853375e-07, "logits/chosen": -2.159459114074707, "logits/rejected": -2.307616710662842, "logps/chosen": -113.01323699951172, "logps/rejected": -152.86544799804688, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -3.501513957977295, "rewards/margins": 4.254675388336182, "rewards/rejected": -7.756189346313477, "step": 2051 }, { "epoch": 3.53, "learning_rate": 3.3754781130471735e-07, "logits/chosen": -2.30454158782959, "logits/rejected": -2.1846208572387695, "logps/chosen": -117.99028778076172, "logps/rejected": -176.07815551757812, "loss": 0.0702, "rewards/accuracies": 1.0, "rewards/chosen": -3.172508955001831, "rewards/margins": 6.673980236053467, "rewards/rejected": -9.846488952636719, "step": 2052 }, { "epoch": 3.53, "learning_rate": 3.3744156396090095e-07, "logits/chosen": -2.2905116081237793, "logits/rejected": -2.06345796585083, "logps/chosen": -107.0716552734375, "logps/rejected": -195.28244018554688, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.7197344303131104, "rewards/margins": 8.455058097839355, "rewards/rejected": -10.17479133605957, "step": 2053 }, { "epoch": 3.54, "learning_rate": 3.3733531661708455e-07, "logits/chosen": -2.1764888763427734, "logits/rejected": -2.2354674339294434, "logps/chosen": -142.61630249023438, "logps/rejected": -216.02694702148438, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.40118408203125, "rewards/margins": 8.567634582519531, "rewards/rejected": -11.968819618225098, "step": 2054 }, { "epoch": 3.54, "learning_rate": 3.3722906927326815e-07, "logits/chosen": -2.1743969917297363, "logits/rejected": -2.3018131256103516, "logps/chosen": -104.63066101074219, "logps/rejected": -206.00338745117188, "loss": 0.0349, "rewards/accuracies": 1.0, "rewards/chosen": -2.7231791019439697, "rewards/margins": 9.1924467086792, "rewards/rejected": -11.91562557220459, "step": 2055 }, { "epoch": 3.54, "learning_rate": 3.3712282192945175e-07, "logits/chosen": -1.954159140586853, "logits/rejected": -2.243100643157959, "logps/chosen": -107.24449920654297, "logps/rejected": -217.70680236816406, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": -2.40841007232666, "rewards/margins": 10.541765213012695, "rewards/rejected": -12.950176239013672, "step": 2056 }, { "epoch": 3.54, "learning_rate": 3.3701657458563535e-07, "logits/chosen": -1.9263768196105957, "logits/rejected": -2.189572334289551, "logps/chosen": -124.90616607666016, "logps/rejected": -226.8557586669922, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": -3.482792615890503, "rewards/margins": 8.979364395141602, "rewards/rejected": -12.462157249450684, "step": 2057 }, { "epoch": 3.54, "learning_rate": 3.3691032724181895e-07, "logits/chosen": -1.753374457359314, "logits/rejected": -2.443249225616455, "logps/chosen": -104.84619140625, "logps/rejected": -198.8741912841797, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -3.6782350540161133, "rewards/margins": 7.239447593688965, "rewards/rejected": -10.917682647705078, "step": 2058 }, { "epoch": 3.54, "learning_rate": 3.3680407989800254e-07, "logits/chosen": -2.036921501159668, "logits/rejected": -2.159158706665039, "logps/chosen": -111.9093017578125, "logps/rejected": -231.26699829101562, "loss": 0.0663, "rewards/accuracies": 1.0, "rewards/chosen": -2.9156651496887207, "rewards/margins": 10.90294361114502, "rewards/rejected": -13.818608283996582, "step": 2059 }, { "epoch": 3.55, "learning_rate": 3.3669783255418614e-07, "logits/chosen": -2.06591796875, "logits/rejected": -2.0116677284240723, "logps/chosen": -134.38189697265625, "logps/rejected": -222.0554656982422, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -3.9346730709075928, "rewards/margins": 9.217106819152832, "rewards/rejected": -13.151779174804688, "step": 2060 }, { "epoch": 3.55, "learning_rate": 3.3659158521036974e-07, "logits/chosen": -2.0170254707336426, "logits/rejected": -2.1203083992004395, "logps/chosen": -112.25791931152344, "logps/rejected": -204.5155029296875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.881586790084839, "rewards/margins": 8.547477722167969, "rewards/rejected": -12.429064750671387, "step": 2061 }, { "epoch": 3.55, "learning_rate": 3.364853378665533e-07, "logits/chosen": -2.3186635971069336, "logits/rejected": -2.2016079425811768, "logps/chosen": -138.0384063720703, "logps/rejected": -246.45260620117188, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -4.417926788330078, "rewards/margins": 11.206517219543457, "rewards/rejected": -15.624444007873535, "step": 2062 }, { "epoch": 3.55, "learning_rate": 3.3637909052273694e-07, "logits/chosen": -2.263213634490967, "logits/rejected": -2.2217321395874023, "logps/chosen": -121.13935089111328, "logps/rejected": -211.72935485839844, "loss": 0.1005, "rewards/accuracies": 1.0, "rewards/chosen": -4.040210247039795, "rewards/margins": 7.799770832061768, "rewards/rejected": -11.839982032775879, "step": 2063 }, { "epoch": 3.55, "learning_rate": 3.3627284317892054e-07, "logits/chosen": -2.207244396209717, "logits/rejected": -2.1817972660064697, "logps/chosen": -114.25865173339844, "logps/rejected": -185.42544555664062, "loss": 0.0799, "rewards/accuracies": 0.75, "rewards/chosen": -4.086650848388672, "rewards/margins": 6.857064247131348, "rewards/rejected": -10.943716049194336, "step": 2064 }, { "epoch": 3.55, "learning_rate": 3.361665958351041e-07, "logits/chosen": -2.207523822784424, "logits/rejected": -2.3038315773010254, "logps/chosen": -116.71609497070312, "logps/rejected": -172.34658813476562, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -3.185622215270996, "rewards/margins": 5.196242332458496, "rewards/rejected": -8.381864547729492, "step": 2065 }, { "epoch": 3.56, "learning_rate": 3.3606034849128774e-07, "logits/chosen": -2.273236036300659, "logits/rejected": -2.0339438915252686, "logps/chosen": -140.00062561035156, "logps/rejected": -157.65615844726562, "loss": 0.0436, "rewards/accuracies": 0.75, "rewards/chosen": -3.5808653831481934, "rewards/margins": 2.892460823059082, "rewards/rejected": -6.473326206207275, "step": 2066 }, { "epoch": 3.56, "learning_rate": 3.359541011474713e-07, "logits/chosen": -2.1727585792541504, "logits/rejected": -1.847521185874939, "logps/chosen": -117.69945526123047, "logps/rejected": -188.46646118164062, "loss": 0.0912, "rewards/accuracies": 0.75, "rewards/chosen": -3.6028623580932617, "rewards/margins": 7.793565273284912, "rewards/rejected": -11.396428108215332, "step": 2067 }, { "epoch": 3.56, "learning_rate": 3.358478538036549e-07, "logits/chosen": -2.0578665733337402, "logits/rejected": -2.2390708923339844, "logps/chosen": -132.12942504882812, "logps/rejected": -230.50917053222656, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": -3.7091023921966553, "rewards/margins": 9.873466491699219, "rewards/rejected": -13.582569122314453, "step": 2068 }, { "epoch": 3.56, "learning_rate": 3.3574160645983853e-07, "logits/chosen": -2.1550464630126953, "logits/rejected": -2.2303972244262695, "logps/chosen": -104.79983520507812, "logps/rejected": -205.93304443359375, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -2.3127408027648926, "rewards/margins": 8.80107593536377, "rewards/rejected": -11.113816261291504, "step": 2069 }, { "epoch": 3.56, "learning_rate": 3.356353591160221e-07, "logits/chosen": -2.0683345794677734, "logits/rejected": -2.130993604660034, "logps/chosen": -133.61692810058594, "logps/rejected": -230.9097900390625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -4.4704999923706055, "rewards/margins": 9.443319320678711, "rewards/rejected": -13.913818359375, "step": 2070 }, { "epoch": 3.56, "learning_rate": 3.355291117722057e-07, "logits/chosen": -2.0970380306243896, "logits/rejected": -1.9263277053833008, "logps/chosen": -100.75631713867188, "logps/rejected": -182.60658264160156, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -3.428701400756836, "rewards/margins": 7.510857582092285, "rewards/rejected": -10.939558982849121, "step": 2071 }, { "epoch": 3.57, "learning_rate": 3.354228644283893e-07, "logits/chosen": -2.0983164310455322, "logits/rejected": -2.1873700618743896, "logps/chosen": -141.49276733398438, "logps/rejected": -194.91807556152344, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -4.70698356628418, "rewards/margins": 7.336458206176758, "rewards/rejected": -12.043440818786621, "step": 2072 }, { "epoch": 3.57, "learning_rate": 3.353166170845729e-07, "logits/chosen": -2.2440290451049805, "logits/rejected": -1.865932822227478, "logps/chosen": -136.1667938232422, "logps/rejected": -212.99127197265625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.591432571411133, "rewards/margins": 9.844736099243164, "rewards/rejected": -13.436168670654297, "step": 2073 }, { "epoch": 3.57, "learning_rate": 3.352103697407565e-07, "logits/chosen": -2.2768142223358154, "logits/rejected": -2.312342643737793, "logps/chosen": -97.48912048339844, "logps/rejected": -187.84674072265625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.7324037551879883, "rewards/margins": 8.799482345581055, "rewards/rejected": -10.531885147094727, "step": 2074 }, { "epoch": 3.57, "learning_rate": 3.351041223969401e-07, "logits/chosen": -1.7366235256195068, "logits/rejected": -2.278823137283325, "logps/chosen": -131.2175750732422, "logps/rejected": -259.01348876953125, "loss": 0.0642, "rewards/accuracies": 1.0, "rewards/chosen": -3.672772169113159, "rewards/margins": 12.248973846435547, "rewards/rejected": -15.921746253967285, "step": 2075 }, { "epoch": 3.57, "learning_rate": 3.3499787505312367e-07, "logits/chosen": -1.7723339796066284, "logits/rejected": -2.365447521209717, "logps/chosen": -111.8658676147461, "logps/rejected": -221.08941650390625, "loss": 0.0424, "rewards/accuracies": 0.75, "rewards/chosen": -3.737487316131592, "rewards/margins": 9.027215003967285, "rewards/rejected": -12.764701843261719, "step": 2076 }, { "epoch": 3.57, "learning_rate": 3.348916277093072e-07, "logits/chosen": -2.140859365463257, "logits/rejected": -1.9565784931182861, "logps/chosen": -132.36402893066406, "logps/rejected": -228.24188232421875, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": -4.348552227020264, "rewards/margins": 10.296363830566406, "rewards/rejected": -14.644916534423828, "step": 2077 }, { "epoch": 3.58, "learning_rate": 3.3478538036549087e-07, "logits/chosen": -2.2667503356933594, "logits/rejected": -2.0275888442993164, "logps/chosen": -163.78643798828125, "logps/rejected": -217.4378662109375, "loss": 0.0361, "rewards/accuracies": 1.0, "rewards/chosen": -6.538717269897461, "rewards/margins": 6.160821914672852, "rewards/rejected": -12.699539184570312, "step": 2078 }, { "epoch": 3.58, "learning_rate": 3.346791330216744e-07, "logits/chosen": -2.2998673915863037, "logits/rejected": -2.136859655380249, "logps/chosen": -124.90367126464844, "logps/rejected": -222.96282958984375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -3.6130597591400146, "rewards/margins": 11.116158485412598, "rewards/rejected": -14.729217529296875, "step": 2079 }, { "epoch": 3.58, "learning_rate": 3.34572885677858e-07, "logits/chosen": -2.064032793045044, "logits/rejected": -2.0917153358459473, "logps/chosen": -147.31936645507812, "logps/rejected": -226.4766845703125, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": -5.17443323135376, "rewards/margins": 8.461406707763672, "rewards/rejected": -13.635839462280273, "step": 2080 }, { "epoch": 3.58, "learning_rate": 3.3446663833404167e-07, "logits/chosen": -2.1584997177124023, "logits/rejected": -1.8799771070480347, "logps/chosen": -131.758544921875, "logps/rejected": -199.1793212890625, "loss": 0.1216, "rewards/accuracies": 1.0, "rewards/chosen": -4.220189094543457, "rewards/margins": 6.257400989532471, "rewards/rejected": -10.47758960723877, "step": 2081 }, { "epoch": 3.58, "learning_rate": 3.343603909902252e-07, "logits/chosen": -2.0045459270477295, "logits/rejected": -2.3437325954437256, "logps/chosen": -92.03219604492188, "logps/rejected": -193.27635192871094, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": -2.2227909564971924, "rewards/margins": 8.87081241607666, "rewards/rejected": -11.09360408782959, "step": 2082 }, { "epoch": 3.59, "learning_rate": 3.342541436464088e-07, "logits/chosen": -2.121886730194092, "logits/rejected": -2.0977084636688232, "logps/chosen": -119.69901275634766, "logps/rejected": -195.82467651367188, "loss": 0.1114, "rewards/accuracies": 1.0, "rewards/chosen": -4.339006423950195, "rewards/margins": 7.271803379058838, "rewards/rejected": -11.610809326171875, "step": 2083 }, { "epoch": 3.59, "learning_rate": 3.341478963025924e-07, "logits/chosen": -2.390652656555176, "logits/rejected": -1.6828672885894775, "logps/chosen": -123.6213150024414, "logps/rejected": -181.75042724609375, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -2.3231756687164307, "rewards/margins": 8.213375091552734, "rewards/rejected": -10.53654956817627, "step": 2084 }, { "epoch": 3.59, "learning_rate": 3.34041648958776e-07, "logits/chosen": -2.0163745880126953, "logits/rejected": -2.2867794036865234, "logps/chosen": -141.67503356933594, "logps/rejected": -217.0853271484375, "loss": 0.0476, "rewards/accuracies": 1.0, "rewards/chosen": -5.596599102020264, "rewards/margins": 6.461659908294678, "rewards/rejected": -12.058259963989258, "step": 2085 }, { "epoch": 3.59, "learning_rate": 3.339354016149596e-07, "logits/chosen": -2.2720634937286377, "logits/rejected": -2.297607898712158, "logps/chosen": -159.28729248046875, "logps/rejected": -235.00958251953125, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": -5.928171157836914, "rewards/margins": 7.93603515625, "rewards/rejected": -13.864206314086914, "step": 2086 }, { "epoch": 3.59, "learning_rate": 3.338291542711432e-07, "logits/chosen": -2.0730481147766113, "logits/rejected": -2.241457462310791, "logps/chosen": -106.25408172607422, "logps/rejected": -217.2978973388672, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.239931106567383, "rewards/margins": 9.712469100952148, "rewards/rejected": -13.952400207519531, "step": 2087 }, { "epoch": 3.59, "learning_rate": 3.337229069273268e-07, "logits/chosen": -2.232008934020996, "logits/rejected": -2.1384243965148926, "logps/chosen": -132.98934936523438, "logps/rejected": -208.78533935546875, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -2.952404022216797, "rewards/margins": 8.190672874450684, "rewards/rejected": -11.14307689666748, "step": 2088 }, { "epoch": 3.6, "learning_rate": 3.3361665958351035e-07, "logits/chosen": -2.292090892791748, "logits/rejected": -2.1777491569519043, "logps/chosen": -102.39459228515625, "logps/rejected": -204.7060546875, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -2.794390916824341, "rewards/margins": 8.287209510803223, "rewards/rejected": -11.0816011428833, "step": 2089 }, { "epoch": 3.6, "learning_rate": 3.33510412239694e-07, "logits/chosen": -2.109938144683838, "logits/rejected": -2.1605231761932373, "logps/chosen": -106.45770263671875, "logps/rejected": -236.0238037109375, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -3.790191888809204, "rewards/margins": 11.986806869506836, "rewards/rejected": -15.776998519897461, "step": 2090 }, { "epoch": 3.6, "learning_rate": 3.334041648958776e-07, "logits/chosen": -1.8431847095489502, "logits/rejected": -2.3091979026794434, "logps/chosen": -143.69418334960938, "logps/rejected": -251.37901306152344, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": -6.284023284912109, "rewards/margins": 8.699495315551758, "rewards/rejected": -14.983518600463867, "step": 2091 }, { "epoch": 3.6, "learning_rate": 3.3329791755206115e-07, "logits/chosen": -1.7280116081237793, "logits/rejected": -2.3578779697418213, "logps/chosen": -99.44036865234375, "logps/rejected": -225.84658813476562, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -2.8456637859344482, "rewards/margins": 10.568333625793457, "rewards/rejected": -13.413997650146484, "step": 2092 }, { "epoch": 3.6, "learning_rate": 3.331916702082448e-07, "logits/chosen": -1.8684606552124023, "logits/rejected": -2.228914737701416, "logps/chosen": -143.70948791503906, "logps/rejected": -244.3684539794922, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -7.322041988372803, "rewards/margins": 8.731866836547852, "rewards/rejected": -16.05390739440918, "step": 2093 }, { "epoch": 3.6, "learning_rate": 3.3308542286442835e-07, "logits/chosen": -2.187063455581665, "logits/rejected": -2.2064948081970215, "logps/chosen": -145.00082397460938, "logps/rejected": -242.3121337890625, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": -4.948951721191406, "rewards/margins": 9.446516036987305, "rewards/rejected": -14.395467758178711, "step": 2094 }, { "epoch": 3.61, "learning_rate": 3.3297917552061195e-07, "logits/chosen": -2.499323844909668, "logits/rejected": -1.9675136804580688, "logps/chosen": -111.07052612304688, "logps/rejected": -198.58265686035156, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.9940967559814453, "rewards/margins": 10.030290603637695, "rewards/rejected": -12.02438735961914, "step": 2095 }, { "epoch": 3.61, "learning_rate": 3.328729281767956e-07, "logits/chosen": -1.9846699237823486, "logits/rejected": -2.1264257431030273, "logps/chosen": -104.28239440917969, "logps/rejected": -189.38491821289062, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": -3.294497489929199, "rewards/margins": 7.04554557800293, "rewards/rejected": -10.340043067932129, "step": 2096 }, { "epoch": 3.61, "learning_rate": 3.3276668083297914e-07, "logits/chosen": -2.0943424701690674, "logits/rejected": -2.23309326171875, "logps/chosen": -115.8682861328125, "logps/rejected": -216.4946746826172, "loss": 0.0713, "rewards/accuracies": 1.0, "rewards/chosen": -4.747474193572998, "rewards/margins": 9.024370193481445, "rewards/rejected": -13.771843910217285, "step": 2097 }, { "epoch": 3.61, "learning_rate": 3.3266043348916274e-07, "logits/chosen": -1.8787875175476074, "logits/rejected": -2.2930727005004883, "logps/chosen": -133.65737915039062, "logps/rejected": -236.83766174316406, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -6.376807689666748, "rewards/margins": 8.531417846679688, "rewards/rejected": -14.908225059509277, "step": 2098 }, { "epoch": 3.61, "learning_rate": 3.3255418614534634e-07, "logits/chosen": -2.3648440837860107, "logits/rejected": -1.9827336072921753, "logps/chosen": -160.08995056152344, "logps/rejected": -248.402099609375, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -4.182727813720703, "rewards/margins": 11.586742401123047, "rewards/rejected": -15.76947021484375, "step": 2099 }, { "epoch": 3.61, "learning_rate": 3.3244793880152994e-07, "logits/chosen": -2.3517608642578125, "logits/rejected": -1.8738539218902588, "logps/chosen": -159.3886260986328, "logps/rejected": -196.71388244628906, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -6.030392169952393, "rewards/margins": 6.498980522155762, "rewards/rejected": -12.529373168945312, "step": 2100 }, { "epoch": 3.62, "learning_rate": 3.323416914577136e-07, "logits/chosen": -2.1618776321411133, "logits/rejected": -2.341752290725708, "logps/chosen": -135.08169555664062, "logps/rejected": -239.41925048828125, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -5.350248336791992, "rewards/margins": 9.299383163452148, "rewards/rejected": -14.64963150024414, "step": 2101 }, { "epoch": 3.62, "learning_rate": 3.3223544411389714e-07, "logits/chosen": -2.2002968788146973, "logits/rejected": -2.2093453407287598, "logps/chosen": -137.71563720703125, "logps/rejected": -239.5636444091797, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -5.178469181060791, "rewards/margins": 10.527968406677246, "rewards/rejected": -15.706438064575195, "step": 2102 }, { "epoch": 3.62, "learning_rate": 3.3212919677008074e-07, "logits/chosen": -2.3567843437194824, "logits/rejected": -2.076214075088501, "logps/chosen": -125.36784362792969, "logps/rejected": -186.69863891601562, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -3.220876693725586, "rewards/margins": 6.656476020812988, "rewards/rejected": -9.877352714538574, "step": 2103 }, { "epoch": 3.62, "learning_rate": 3.3202294942626434e-07, "logits/chosen": -2.160482406616211, "logits/rejected": -2.0141661167144775, "logps/chosen": -131.8095703125, "logps/rejected": -171.11160278320312, "loss": 0.0415, "rewards/accuracies": 0.75, "rewards/chosen": -5.242403984069824, "rewards/margins": 4.544171333312988, "rewards/rejected": -9.786575317382812, "step": 2104 }, { "epoch": 3.62, "learning_rate": 3.3191670208244793e-07, "logits/chosen": -2.0348916053771973, "logits/rejected": -2.151907444000244, "logps/chosen": -142.11465454101562, "logps/rejected": -188.26739501953125, "loss": 0.1121, "rewards/accuracies": 1.0, "rewards/chosen": -4.621457099914551, "rewards/margins": 5.453835964202881, "rewards/rejected": -10.07529354095459, "step": 2105 }, { "epoch": 3.62, "learning_rate": 3.318104547386315e-07, "logits/chosen": -1.9469584226608276, "logits/rejected": -2.294847249984741, "logps/chosen": -120.34368896484375, "logps/rejected": -252.331298828125, "loss": 0.1146, "rewards/accuracies": 1.0, "rewards/chosen": -4.701187610626221, "rewards/margins": 12.134631156921387, "rewards/rejected": -16.835819244384766, "step": 2106 }, { "epoch": 3.63, "learning_rate": 3.3170420739481513e-07, "logits/chosen": -2.1559324264526367, "logits/rejected": -2.084836721420288, "logps/chosen": -131.24371337890625, "logps/rejected": -191.99127197265625, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -5.324938774108887, "rewards/margins": 5.94645881652832, "rewards/rejected": -11.271397590637207, "step": 2107 }, { "epoch": 3.63, "learning_rate": 3.3159796005099873e-07, "logits/chosen": -2.0135929584503174, "logits/rejected": -2.22078275680542, "logps/chosen": -122.49559783935547, "logps/rejected": -179.24942016601562, "loss": 0.128, "rewards/accuracies": 1.0, "rewards/chosen": -5.556551933288574, "rewards/margins": 4.394214153289795, "rewards/rejected": -9.950765609741211, "step": 2108 }, { "epoch": 3.63, "learning_rate": 3.314917127071823e-07, "logits/chosen": -2.063861131668091, "logits/rejected": -2.292410373687744, "logps/chosen": -105.66633605957031, "logps/rejected": -249.93862915039062, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -2.155529499053955, "rewards/margins": 12.850587844848633, "rewards/rejected": -15.00611686706543, "step": 2109 }, { "epoch": 3.63, "learning_rate": 3.3138546536336593e-07, "logits/chosen": -2.1368470191955566, "logits/rejected": -2.143728256225586, "logps/chosen": -136.630615234375, "logps/rejected": -227.91134643554688, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -5.237507343292236, "rewards/margins": 9.310160636901855, "rewards/rejected": -14.54766845703125, "step": 2110 }, { "epoch": 3.63, "learning_rate": 3.312792180195495e-07, "logits/chosen": -2.349095344543457, "logits/rejected": -1.9726582765579224, "logps/chosen": -122.79608154296875, "logps/rejected": -167.52783203125, "loss": 0.0708, "rewards/accuracies": 0.75, "rewards/chosen": -3.0611042976379395, "rewards/margins": 6.245941162109375, "rewards/rejected": -9.307045936584473, "step": 2111 }, { "epoch": 3.64, "learning_rate": 3.311729706757331e-07, "logits/chosen": -2.2320849895477295, "logits/rejected": -2.127352714538574, "logps/chosen": -114.99879455566406, "logps/rejected": -217.23184204101562, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -3.6333346366882324, "rewards/margins": 10.019225120544434, "rewards/rejected": -13.652559280395508, "step": 2112 }, { "epoch": 3.64, "learning_rate": 3.310667233319167e-07, "logits/chosen": -2.3480138778686523, "logits/rejected": -1.8595799207687378, "logps/chosen": -110.57395935058594, "logps/rejected": -192.34298706054688, "loss": 0.0674, "rewards/accuracies": 1.0, "rewards/chosen": -4.231653213500977, "rewards/margins": 8.054428100585938, "rewards/rejected": -12.286081314086914, "step": 2113 }, { "epoch": 3.64, "learning_rate": 3.3096047598810027e-07, "logits/chosen": -2.220335006713867, "logits/rejected": -2.31949782371521, "logps/chosen": -165.4965362548828, "logps/rejected": -229.25091552734375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -6.1644206047058105, "rewards/margins": 6.863431930541992, "rewards/rejected": -13.027853012084961, "step": 2114 }, { "epoch": 3.64, "learning_rate": 3.3085422864428387e-07, "logits/chosen": -2.087017774581909, "logits/rejected": -2.1406469345092773, "logps/chosen": -139.81825256347656, "logps/rejected": -177.31369018554688, "loss": 0.032, "rewards/accuracies": 0.75, "rewards/chosen": -5.102823257446289, "rewards/margins": 4.224679470062256, "rewards/rejected": -9.327502250671387, "step": 2115 }, { "epoch": 3.64, "learning_rate": 3.3074798130046747e-07, "logits/chosen": -2.164215087890625, "logits/rejected": -2.2623350620269775, "logps/chosen": -112.6174087524414, "logps/rejected": -209.7527313232422, "loss": 0.0524, "rewards/accuracies": 1.0, "rewards/chosen": -3.74556827545166, "rewards/margins": 6.646530628204346, "rewards/rejected": -10.392098426818848, "step": 2116 }, { "epoch": 3.64, "learning_rate": 3.3064173395665107e-07, "logits/chosen": -2.2450156211853027, "logits/rejected": -2.0722384452819824, "logps/chosen": -158.650390625, "logps/rejected": -229.02664184570312, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/chosen": -6.224549293518066, "rewards/margins": 9.010335922241211, "rewards/rejected": -15.234885215759277, "step": 2117 }, { "epoch": 3.65, "learning_rate": 3.3053548661283467e-07, "logits/chosen": -1.9592671394348145, "logits/rejected": -2.295154571533203, "logps/chosen": -111.76360321044922, "logps/rejected": -211.16127014160156, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -3.9918694496154785, "rewards/margins": 8.705343246459961, "rewards/rejected": -12.697213172912598, "step": 2118 }, { "epoch": 3.65, "learning_rate": 3.3042923926901827e-07, "logits/chosen": -2.1565635204315186, "logits/rejected": -2.066235065460205, "logps/chosen": -136.24087524414062, "logps/rejected": -186.68983459472656, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -4.5903425216674805, "rewards/margins": 6.6493659019470215, "rewards/rejected": -11.23970890045166, "step": 2119 }, { "epoch": 3.65, "learning_rate": 3.3032299192520187e-07, "logits/chosen": -2.2014455795288086, "logits/rejected": -2.1148569583892822, "logps/chosen": -117.93995666503906, "logps/rejected": -193.3973846435547, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": -4.264455795288086, "rewards/margins": 6.841717720031738, "rewards/rejected": -11.10617446899414, "step": 2120 }, { "epoch": 3.65, "learning_rate": 3.302167445813854e-07, "logits/chosen": -1.9374980926513672, "logits/rejected": -2.392991304397583, "logps/chosen": -124.89883422851562, "logps/rejected": -231.58438110351562, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -5.035871505737305, "rewards/margins": 8.573773384094238, "rewards/rejected": -13.609644889831543, "step": 2121 }, { "epoch": 3.65, "learning_rate": 3.3011049723756906e-07, "logits/chosen": -2.174534320831299, "logits/rejected": -2.2070493698120117, "logps/chosen": -152.75149536132812, "logps/rejected": -215.65533447265625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -4.581692218780518, "rewards/margins": 7.594958782196045, "rewards/rejected": -12.176650047302246, "step": 2122 }, { "epoch": 3.65, "learning_rate": 3.3000424989375266e-07, "logits/chosen": -2.169577121734619, "logits/rejected": -2.3589911460876465, "logps/chosen": -113.02351379394531, "logps/rejected": -227.4406280517578, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -2.643320083618164, "rewards/margins": 10.887764930725098, "rewards/rejected": -13.531085014343262, "step": 2123 }, { "epoch": 3.66, "learning_rate": 3.298980025499362e-07, "logits/chosen": -2.0871591567993164, "logits/rejected": -1.9093332290649414, "logps/chosen": -137.81971740722656, "logps/rejected": -223.58810424804688, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -3.399632453918457, "rewards/margins": 9.750728607177734, "rewards/rejected": -13.150361061096191, "step": 2124 }, { "epoch": 3.66, "learning_rate": 3.2979175520611986e-07, "logits/chosen": -2.371785879135132, "logits/rejected": -2.3545687198638916, "logps/chosen": -126.3852767944336, "logps/rejected": -243.098876953125, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/chosen": -3.2786288261413574, "rewards/margins": 12.059800148010254, "rewards/rejected": -15.338428497314453, "step": 2125 }, { "epoch": 3.66, "learning_rate": 3.296855078623034e-07, "logits/chosen": -2.2109408378601074, "logits/rejected": -1.9884971380233765, "logps/chosen": -130.7945556640625, "logps/rejected": -194.9739990234375, "loss": 0.1928, "rewards/accuracies": 1.0, "rewards/chosen": -4.164790153503418, "rewards/margins": 7.609887599945068, "rewards/rejected": -11.774678230285645, "step": 2126 }, { "epoch": 3.66, "learning_rate": 3.29579260518487e-07, "logits/chosen": -1.899113655090332, "logits/rejected": -2.322615623474121, "logps/chosen": -127.9449234008789, "logps/rejected": -241.33541870117188, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -4.312490940093994, "rewards/margins": 11.311155319213867, "rewards/rejected": -15.623645782470703, "step": 2127 }, { "epoch": 3.66, "learning_rate": 3.2947301317467066e-07, "logits/chosen": -2.0191755294799805, "logits/rejected": -2.151649236679077, "logps/chosen": -141.76356506347656, "logps/rejected": -214.0495147705078, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -5.6767659187316895, "rewards/margins": 6.301497936248779, "rewards/rejected": -11.978263854980469, "step": 2128 }, { "epoch": 3.66, "learning_rate": 3.293667658308542e-07, "logits/chosen": -2.0355417728424072, "logits/rejected": -2.0193567276000977, "logps/chosen": -130.0417022705078, "logps/rejected": -212.2381134033203, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -3.359670400619507, "rewards/margins": 8.54484748840332, "rewards/rejected": -11.904518127441406, "step": 2129 }, { "epoch": 3.67, "learning_rate": 3.292605184870378e-07, "logits/chosen": -2.111264944076538, "logits/rejected": -2.188218832015991, "logps/chosen": -124.56553649902344, "logps/rejected": -193.83750915527344, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -4.382358074188232, "rewards/margins": 5.882946014404297, "rewards/rejected": -10.265302658081055, "step": 2130 }, { "epoch": 3.67, "learning_rate": 3.291542711432214e-07, "logits/chosen": -1.8988330364227295, "logits/rejected": -2.255598306655884, "logps/chosen": -127.5069580078125, "logps/rejected": -273.3316650390625, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -3.6709811687469482, "rewards/margins": 13.793184280395508, "rewards/rejected": -17.46416664123535, "step": 2131 }, { "epoch": 3.67, "learning_rate": 3.29048023799405e-07, "logits/chosen": -2.2494256496429443, "logits/rejected": -2.08528995513916, "logps/chosen": -136.21337890625, "logps/rejected": -240.8350830078125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.282823085784912, "rewards/margins": 9.489729881286621, "rewards/rejected": -14.772552490234375, "step": 2132 }, { "epoch": 3.67, "learning_rate": 3.2894177645558855e-07, "logits/chosen": -2.0598599910736084, "logits/rejected": -2.12662410736084, "logps/chosen": -104.81620788574219, "logps/rejected": -179.65524291992188, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -3.5005531311035156, "rewards/margins": 6.155576705932617, "rewards/rejected": -9.656129837036133, "step": 2133 }, { "epoch": 3.67, "learning_rate": 3.288355291117722e-07, "logits/chosen": -1.9711428880691528, "logits/rejected": -2.299837589263916, "logps/chosen": -117.91314697265625, "logps/rejected": -224.2867431640625, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": -2.9250869750976562, "rewards/margins": 10.617429733276367, "rewards/rejected": -13.542516708374023, "step": 2134 }, { "epoch": 3.67, "learning_rate": 3.287292817679558e-07, "logits/chosen": -1.8433330059051514, "logits/rejected": -2.2491371631622314, "logps/chosen": -116.46686553955078, "logps/rejected": -211.09361267089844, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -4.275516986846924, "rewards/margins": 8.023402214050293, "rewards/rejected": -12.298919677734375, "step": 2135 }, { "epoch": 3.68, "learning_rate": 3.2862303442413934e-07, "logits/chosen": -1.9789011478424072, "logits/rejected": -2.1111621856689453, "logps/chosen": -118.841796875, "logps/rejected": -234.4149627685547, "loss": 0.1101, "rewards/accuracies": 1.0, "rewards/chosen": -2.773594379425049, "rewards/margins": 11.190314292907715, "rewards/rejected": -13.963908195495605, "step": 2136 }, { "epoch": 3.68, "learning_rate": 3.28516787080323e-07, "logits/chosen": -2.046509265899658, "logits/rejected": -1.9623024463653564, "logps/chosen": -132.92849731445312, "logps/rejected": -208.30001831054688, "loss": 0.0979, "rewards/accuracies": 0.75, "rewards/chosen": -5.24398136138916, "rewards/margins": 7.3748459815979, "rewards/rejected": -12.618827819824219, "step": 2137 }, { "epoch": 3.68, "learning_rate": 3.2841053973650654e-07, "logits/chosen": -1.9670934677124023, "logits/rejected": -2.285552978515625, "logps/chosen": -101.98750305175781, "logps/rejected": -197.00656127929688, "loss": 0.0636, "rewards/accuracies": 1.0, "rewards/chosen": -4.117440700531006, "rewards/margins": 8.28825855255127, "rewards/rejected": -12.40570068359375, "step": 2138 }, { "epoch": 3.68, "learning_rate": 3.2830429239269014e-07, "logits/chosen": -2.1372060775756836, "logits/rejected": -2.2547149658203125, "logps/chosen": -115.20048522949219, "logps/rejected": -201.72438049316406, "loss": 0.0397, "rewards/accuracies": 1.0, "rewards/chosen": -3.143808126449585, "rewards/margins": 6.7864484786987305, "rewards/rejected": -9.930255889892578, "step": 2139 }, { "epoch": 3.68, "learning_rate": 3.281980450488738e-07, "logits/chosen": -2.034545421600342, "logits/rejected": -1.975592851638794, "logps/chosen": -125.28152465820312, "logps/rejected": -182.15419006347656, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -4.567990303039551, "rewards/margins": 5.9554123878479, "rewards/rejected": -10.52340316772461, "step": 2140 }, { "epoch": 3.69, "learning_rate": 3.2809179770505734e-07, "logits/chosen": -1.9493498802185059, "logits/rejected": -2.1811318397521973, "logps/chosen": -128.9571533203125, "logps/rejected": -187.16502380371094, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/chosen": -4.83760929107666, "rewards/margins": 5.88754940032959, "rewards/rejected": -10.72515869140625, "step": 2141 }, { "epoch": 3.69, "learning_rate": 3.27985550361241e-07, "logits/chosen": -2.1850199699401855, "logits/rejected": -2.137585163116455, "logps/chosen": -98.12928009033203, "logps/rejected": -227.19859313964844, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.8318182229995728, "rewards/margins": 13.359893798828125, "rewards/rejected": -14.191712379455566, "step": 2142 }, { "epoch": 3.69, "learning_rate": 3.2787930301742453e-07, "logits/chosen": -2.02549409866333, "logits/rejected": -1.992297887802124, "logps/chosen": -114.02259826660156, "logps/rejected": -216.53530883789062, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -3.412431240081787, "rewards/margins": 10.00008487701416, "rewards/rejected": -13.412515640258789, "step": 2143 }, { "epoch": 3.69, "learning_rate": 3.2777305567360813e-07, "logits/chosen": -1.8404468297958374, "logits/rejected": -2.2722787857055664, "logps/chosen": -122.78019714355469, "logps/rejected": -256.587158203125, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -3.413801670074463, "rewards/margins": 11.001152038574219, "rewards/rejected": -14.414953231811523, "step": 2144 }, { "epoch": 3.69, "learning_rate": 3.276668083297918e-07, "logits/chosen": -2.0015957355499268, "logits/rejected": -2.3417916297912598, "logps/chosen": -106.34503173828125, "logps/rejected": -224.17825317382812, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -3.127924919128418, "rewards/margins": 9.490978240966797, "rewards/rejected": -12.618904113769531, "step": 2145 }, { "epoch": 3.69, "learning_rate": 3.2756056098597533e-07, "logits/chosen": -2.254729986190796, "logits/rejected": -2.1114797592163086, "logps/chosen": -130.0112762451172, "logps/rejected": -204.7879638671875, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -4.156741142272949, "rewards/margins": 7.240596294403076, "rewards/rejected": -11.397336959838867, "step": 2146 }, { "epoch": 3.7, "learning_rate": 3.2745431364215893e-07, "logits/chosen": -1.907960057258606, "logits/rejected": -2.349010944366455, "logps/chosen": -102.37386322021484, "logps/rejected": -197.6236572265625, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -1.9272053241729736, "rewards/margins": 8.85072135925293, "rewards/rejected": -10.77792739868164, "step": 2147 }, { "epoch": 3.7, "learning_rate": 3.2734806629834253e-07, "logits/chosen": -2.1595406532287598, "logits/rejected": -2.0583791732788086, "logps/chosen": -123.11602783203125, "logps/rejected": -203.98545837402344, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -3.547267436981201, "rewards/margins": 8.134895324707031, "rewards/rejected": -11.682162284851074, "step": 2148 }, { "epoch": 3.7, "learning_rate": 3.2724181895452613e-07, "logits/chosen": -2.0632588863372803, "logits/rejected": -2.3050150871276855, "logps/chosen": -105.1591796875, "logps/rejected": -230.4100799560547, "loss": 0.0747, "rewards/accuracies": 1.0, "rewards/chosen": -2.856454849243164, "rewards/margins": 11.485641479492188, "rewards/rejected": -14.342096328735352, "step": 2149 }, { "epoch": 3.7, "learning_rate": 3.2713557161070973e-07, "logits/chosen": -2.1753783226013184, "logits/rejected": -2.3176186084747314, "logps/chosen": -112.63553619384766, "logps/rejected": -216.14273071289062, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -3.854879856109619, "rewards/margins": 9.609918594360352, "rewards/rejected": -13.464797973632812, "step": 2150 }, { "epoch": 3.7, "learning_rate": 3.270293242668933e-07, "logits/chosen": -2.290632486343384, "logits/rejected": -2.276637077331543, "logps/chosen": -143.22561645507812, "logps/rejected": -213.87396240234375, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -5.083654880523682, "rewards/margins": 7.29972505569458, "rewards/rejected": -12.383378982543945, "step": 2151 }, { "epoch": 3.7, "learning_rate": 3.269230769230769e-07, "logits/chosen": -1.921335220336914, "logits/rejected": -2.2229390144348145, "logps/chosen": -119.16128540039062, "logps/rejected": -195.3600311279297, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -3.0680789947509766, "rewards/margins": 6.969980239868164, "rewards/rejected": -10.03805923461914, "step": 2152 }, { "epoch": 3.71, "learning_rate": 3.2681682957926047e-07, "logits/chosen": -1.7386715412139893, "logits/rejected": -2.288327693939209, "logps/chosen": -101.6999740600586, "logps/rejected": -176.84683227539062, "loss": 0.0812, "rewards/accuracies": 1.0, "rewards/chosen": -3.2877302169799805, "rewards/margins": 7.077514171600342, "rewards/rejected": -10.365243911743164, "step": 2153 }, { "epoch": 3.71, "learning_rate": 3.267105822354441e-07, "logits/chosen": -2.303239345550537, "logits/rejected": -2.139030933380127, "logps/chosen": -134.33258056640625, "logps/rejected": -247.85726928710938, "loss": 0.1527, "rewards/accuracies": 1.0, "rewards/chosen": -3.250960350036621, "rewards/margins": 11.735962867736816, "rewards/rejected": -14.986924171447754, "step": 2154 }, { "epoch": 3.71, "learning_rate": 3.266043348916277e-07, "logits/chosen": -2.1296074390411377, "logits/rejected": -2.112896680831909, "logps/chosen": -119.09899139404297, "logps/rejected": -226.45425415039062, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -2.2124710083007812, "rewards/margins": 9.149782180786133, "rewards/rejected": -11.362253189086914, "step": 2155 }, { "epoch": 3.71, "learning_rate": 3.2649808754781127e-07, "logits/chosen": -2.258910655975342, "logits/rejected": -1.59001886844635, "logps/chosen": -143.89926147460938, "logps/rejected": -215.71290588378906, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -4.125050067901611, "rewards/margins": 9.424300193786621, "rewards/rejected": -13.54935073852539, "step": 2156 }, { "epoch": 3.71, "learning_rate": 3.263918402039949e-07, "logits/chosen": -2.0169150829315186, "logits/rejected": -2.2525243759155273, "logps/chosen": -121.4141616821289, "logps/rejected": -223.32630920410156, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.9750630855560303, "rewards/margins": 9.170476913452148, "rewards/rejected": -12.145540237426758, "step": 2157 }, { "epoch": 3.71, "learning_rate": 3.2628559286017847e-07, "logits/chosen": -2.09788179397583, "logits/rejected": -2.0975379943847656, "logps/chosen": -105.10595703125, "logps/rejected": -165.64691162109375, "loss": 0.1403, "rewards/accuracies": 1.0, "rewards/chosen": -2.460099220275879, "rewards/margins": 6.227445125579834, "rewards/rejected": -8.687543869018555, "step": 2158 }, { "epoch": 3.72, "learning_rate": 3.2617934551636206e-07, "logits/chosen": -2.055234432220459, "logits/rejected": -1.9500844478607178, "logps/chosen": -129.82037353515625, "logps/rejected": -196.65530395507812, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -3.2823896408081055, "rewards/margins": 7.095830917358398, "rewards/rejected": -10.37822151184082, "step": 2159 }, { "epoch": 3.72, "learning_rate": 3.260730981725457e-07, "logits/chosen": -2.3723809719085693, "logits/rejected": -1.6588398218154907, "logps/chosen": -113.487060546875, "logps/rejected": -212.69430541992188, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -2.188046932220459, "rewards/margins": 11.283767700195312, "rewards/rejected": -13.471814155578613, "step": 2160 }, { "epoch": 3.72, "learning_rate": 3.2596685082872926e-07, "logits/chosen": -2.3264408111572266, "logits/rejected": -2.047389507293701, "logps/chosen": -116.41403198242188, "logps/rejected": -193.9404296875, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": -2.515194892883301, "rewards/margins": 10.495628356933594, "rewards/rejected": -13.010822296142578, "step": 2161 }, { "epoch": 3.72, "learning_rate": 3.2586060348491286e-07, "logits/chosen": -2.235555410385132, "logits/rejected": -1.9310131072998047, "logps/chosen": -142.94952392578125, "logps/rejected": -210.92782592773438, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -4.1637864112854, "rewards/margins": 7.3543701171875, "rewards/rejected": -11.518156051635742, "step": 2162 }, { "epoch": 3.72, "learning_rate": 3.2575435614109646e-07, "logits/chosen": -2.349561929702759, "logits/rejected": -1.849453330039978, "logps/chosen": -122.55921936035156, "logps/rejected": -191.1208953857422, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -3.182249069213867, "rewards/margins": 7.8190693855285645, "rewards/rejected": -11.001317977905273, "step": 2163 }, { "epoch": 3.72, "learning_rate": 3.2564810879728006e-07, "logits/chosen": -2.2391505241394043, "logits/rejected": -2.1009814739227295, "logps/chosen": -142.0996551513672, "logps/rejected": -208.04510498046875, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -4.532982349395752, "rewards/margins": 5.148499011993408, "rewards/rejected": -9.68148136138916, "step": 2164 }, { "epoch": 3.73, "learning_rate": 3.255418614534636e-07, "logits/chosen": -2.404650926589966, "logits/rejected": -2.2116177082061768, "logps/chosen": -89.98632049560547, "logps/rejected": -188.41864013671875, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": -1.6890456676483154, "rewards/margins": 9.972532272338867, "rewards/rejected": -11.661578178405762, "step": 2165 }, { "epoch": 3.73, "learning_rate": 3.2543561410964726e-07, "logits/chosen": -2.259945869445801, "logits/rejected": -2.0872690677642822, "logps/chosen": -122.35154724121094, "logps/rejected": -212.69210815429688, "loss": 0.2027, "rewards/accuracies": 1.0, "rewards/chosen": -2.777008533477783, "rewards/margins": 8.797515869140625, "rewards/rejected": -11.57452392578125, "step": 2166 }, { "epoch": 3.73, "learning_rate": 3.2532936676583086e-07, "logits/chosen": -2.293236494064331, "logits/rejected": -2.100987195968628, "logps/chosen": -81.62382507324219, "logps/rejected": -140.1158447265625, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": -1.3826963901519775, "rewards/margins": 6.785358428955078, "rewards/rejected": -8.168054580688477, "step": 2167 }, { "epoch": 3.73, "learning_rate": 3.252231194220144e-07, "logits/chosen": -1.411037564277649, "logits/rejected": -2.3236119747161865, "logps/chosen": -110.6934585571289, "logps/rejected": -271.6126403808594, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -3.4718143939971924, "rewards/margins": 11.202493667602539, "rewards/rejected": -14.674308776855469, "step": 2168 }, { "epoch": 3.73, "learning_rate": 3.2511687207819805e-07, "logits/chosen": -2.027055025100708, "logits/rejected": -2.1984810829162598, "logps/chosen": -117.49610137939453, "logps/rejected": -192.87660217285156, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -3.5018699169158936, "rewards/margins": 7.490588665008545, "rewards/rejected": -10.99245834350586, "step": 2169 }, { "epoch": 3.73, "learning_rate": 3.250106247343816e-07, "logits/chosen": -2.119791030883789, "logits/rejected": -1.8617591857910156, "logps/chosen": -109.84650421142578, "logps/rejected": -195.4688720703125, "loss": 0.154, "rewards/accuracies": 1.0, "rewards/chosen": -2.8333933353424072, "rewards/margins": 9.433485984802246, "rewards/rejected": -12.26688003540039, "step": 2170 }, { "epoch": 3.74, "learning_rate": 3.249043773905652e-07, "logits/chosen": -1.9989701509475708, "logits/rejected": -2.1800904273986816, "logps/chosen": -127.30720520019531, "logps/rejected": -214.36419677734375, "loss": 0.0742, "rewards/accuracies": 1.0, "rewards/chosen": -3.2607202529907227, "rewards/margins": 8.95671272277832, "rewards/rejected": -12.21743392944336, "step": 2171 }, { "epoch": 3.74, "learning_rate": 3.2479813004674885e-07, "logits/chosen": -2.1929941177368164, "logits/rejected": -2.1697192192077637, "logps/chosen": -91.61710357666016, "logps/rejected": -183.40225219726562, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -2.172799825668335, "rewards/margins": 10.065383911132812, "rewards/rejected": -12.238184928894043, "step": 2172 }, { "epoch": 3.74, "learning_rate": 3.246918827029324e-07, "logits/chosen": -2.2837963104248047, "logits/rejected": -2.037686347961426, "logps/chosen": -108.32484436035156, "logps/rejected": -196.3570556640625, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -2.0281312465667725, "rewards/margins": 8.018339157104492, "rewards/rejected": -10.046469688415527, "step": 2173 }, { "epoch": 3.74, "learning_rate": 3.24585635359116e-07, "logits/chosen": -2.486497402191162, "logits/rejected": -2.1535024642944336, "logps/chosen": -114.64292907714844, "logps/rejected": -186.45530700683594, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.768810510635376, "rewards/margins": 7.610645294189453, "rewards/rejected": -9.379456520080566, "step": 2174 }, { "epoch": 3.74, "learning_rate": 3.244793880152996e-07, "logits/chosen": -2.249345302581787, "logits/rejected": -1.9595012664794922, "logps/chosen": -108.01210021972656, "logps/rejected": -204.1109161376953, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -2.4829068183898926, "rewards/margins": 10.276350021362305, "rewards/rejected": -12.759257316589355, "step": 2175 }, { "epoch": 3.75, "learning_rate": 3.243731406714832e-07, "logits/chosen": -2.27458119392395, "logits/rejected": -2.1751041412353516, "logps/chosen": -105.13255310058594, "logps/rejected": -226.72357177734375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.1172609329223633, "rewards/margins": 11.61479377746582, "rewards/rejected": -13.7320556640625, "step": 2176 }, { "epoch": 3.75, "learning_rate": 3.242668933276668e-07, "logits/chosen": -1.9862549304962158, "logits/rejected": -1.824182152748108, "logps/chosen": -117.7582778930664, "logps/rejected": -224.92832946777344, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -3.3665103912353516, "rewards/margins": 10.679890632629395, "rewards/rejected": -14.046401977539062, "step": 2177 }, { "epoch": 3.75, "learning_rate": 3.241606459838504e-07, "logits/chosen": -1.863547921180725, "logits/rejected": -2.280953884124756, "logps/chosen": -101.04194641113281, "logps/rejected": -229.59906005859375, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -1.8116681575775146, "rewards/margins": 10.755481719970703, "rewards/rejected": -12.567150115966797, "step": 2178 }, { "epoch": 3.75, "learning_rate": 3.24054398640034e-07, "logits/chosen": -2.279632806777954, "logits/rejected": -2.1053264141082764, "logps/chosen": -145.3723602294922, "logps/rejected": -174.83209228515625, "loss": 0.1084, "rewards/accuracies": 0.75, "rewards/chosen": -5.2845048904418945, "rewards/margins": 4.1456990242004395, "rewards/rejected": -9.430204391479492, "step": 2179 }, { "epoch": 3.75, "learning_rate": 3.2394815129621754e-07, "logits/chosen": -2.171104907989502, "logits/rejected": -2.2746105194091797, "logps/chosen": -113.09098815917969, "logps/rejected": -218.68067932128906, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.578037142753601, "rewards/margins": 9.509374618530273, "rewards/rejected": -11.087411880493164, "step": 2180 }, { "epoch": 3.75, "learning_rate": 3.238419039524012e-07, "logits/chosen": -2.175043821334839, "logits/rejected": -2.214054584503174, "logps/chosen": -122.57810974121094, "logps/rejected": -190.97933959960938, "loss": 0.0482, "rewards/accuracies": 1.0, "rewards/chosen": -3.5338430404663086, "rewards/margins": 6.620035648345947, "rewards/rejected": -10.153878211975098, "step": 2181 }, { "epoch": 3.76, "learning_rate": 3.237356566085848e-07, "logits/chosen": -2.0231094360351562, "logits/rejected": -2.108912467956543, "logps/chosen": -98.90986633300781, "logps/rejected": -217.3997802734375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -2.3012819290161133, "rewards/margins": 9.87631607055664, "rewards/rejected": -12.177597999572754, "step": 2182 }, { "epoch": 3.76, "learning_rate": 3.236294092647684e-07, "logits/chosen": -2.0214931964874268, "logits/rejected": -2.1096677780151367, "logps/chosen": -118.17665100097656, "logps/rejected": -201.8839569091797, "loss": 0.0952, "rewards/accuracies": 1.0, "rewards/chosen": -4.020310878753662, "rewards/margins": 7.179752826690674, "rewards/rejected": -11.200063705444336, "step": 2183 }, { "epoch": 3.76, "learning_rate": 3.23523161920952e-07, "logits/chosen": -2.3629355430603027, "logits/rejected": -2.0089786052703857, "logps/chosen": -98.45695495605469, "logps/rejected": -194.9987335205078, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -2.783435821533203, "rewards/margins": 9.06300163269043, "rewards/rejected": -11.846436500549316, "step": 2184 }, { "epoch": 3.76, "learning_rate": 3.2341691457713553e-07, "logits/chosen": -2.1507039070129395, "logits/rejected": -1.9610390663146973, "logps/chosen": -142.03750610351562, "logps/rejected": -214.45355224609375, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": -5.822575092315674, "rewards/margins": 8.030218124389648, "rewards/rejected": -13.85279369354248, "step": 2185 }, { "epoch": 3.76, "learning_rate": 3.233106672333192e-07, "logits/chosen": -1.5028027296066284, "logits/rejected": -2.3045907020568848, "logps/chosen": -118.136962890625, "logps/rejected": -220.33203125, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -2.676546812057495, "rewards/margins": 7.687349796295166, "rewards/rejected": -10.363897323608398, "step": 2186 }, { "epoch": 3.76, "learning_rate": 3.232044198895028e-07, "logits/chosen": -2.253215789794922, "logits/rejected": -2.0991365909576416, "logps/chosen": -160.73876953125, "logps/rejected": -199.78463745117188, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -5.9040117263793945, "rewards/margins": 4.174455642700195, "rewards/rejected": -10.07846736907959, "step": 2187 }, { "epoch": 3.77, "learning_rate": 3.230981725456863e-07, "logits/chosen": -2.301941156387329, "logits/rejected": -2.1903088092803955, "logps/chosen": -107.94273376464844, "logps/rejected": -218.23402404785156, "loss": 0.0527, "rewards/accuracies": 1.0, "rewards/chosen": -2.2017972469329834, "rewards/margins": 10.879949569702148, "rewards/rejected": -13.081747055053711, "step": 2188 }, { "epoch": 3.77, "learning_rate": 3.2299192520187e-07, "logits/chosen": -2.167964458465576, "logits/rejected": -2.0159060955047607, "logps/chosen": -123.52169799804688, "logps/rejected": -168.58303833007812, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/chosen": -2.433650493621826, "rewards/margins": 6.167099952697754, "rewards/rejected": -8.600749969482422, "step": 2189 }, { "epoch": 3.77, "learning_rate": 3.228856778580535e-07, "logits/chosen": -2.055783271789551, "logits/rejected": -1.9682016372680664, "logps/chosen": -127.67945861816406, "logps/rejected": -181.11260986328125, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -4.669848442077637, "rewards/margins": 4.964272499084473, "rewards/rejected": -9.63412094116211, "step": 2190 }, { "epoch": 3.77, "learning_rate": 3.227794305142371e-07, "logits/chosen": -2.270551919937134, "logits/rejected": -2.3529553413391113, "logps/chosen": -125.00523376464844, "logps/rejected": -181.867919921875, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -5.4594340324401855, "rewards/margins": 5.837340354919434, "rewards/rejected": -11.296774864196777, "step": 2191 }, { "epoch": 3.77, "learning_rate": 3.226731831704208e-07, "logits/chosen": -2.3698787689208984, "logits/rejected": -2.2485899925231934, "logps/chosen": -112.13644409179688, "logps/rejected": -208.01388549804688, "loss": 0.1564, "rewards/accuracies": 1.0, "rewards/chosen": -2.092195510864258, "rewards/margins": 9.818161964416504, "rewards/rejected": -11.910358428955078, "step": 2192 }, { "epoch": 3.77, "learning_rate": 3.225669358266043e-07, "logits/chosen": -2.0216176509857178, "logits/rejected": -2.309687614440918, "logps/chosen": -136.86328125, "logps/rejected": -182.96038818359375, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -4.468733787536621, "rewards/margins": 4.378748416900635, "rewards/rejected": -8.847481727600098, "step": 2193 }, { "epoch": 3.78, "learning_rate": 3.224606884827879e-07, "logits/chosen": -1.895282506942749, "logits/rejected": -2.342635154724121, "logps/chosen": -125.26399993896484, "logps/rejected": -196.85824584960938, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -4.302395820617676, "rewards/margins": 6.551324367523193, "rewards/rejected": -10.853719711303711, "step": 2194 }, { "epoch": 3.78, "learning_rate": 3.223544411389715e-07, "logits/chosen": -2.1574697494506836, "logits/rejected": -1.9387931823730469, "logps/chosen": -122.88460540771484, "logps/rejected": -200.85833740234375, "loss": 0.0959, "rewards/accuracies": 0.75, "rewards/chosen": -3.6456298828125, "rewards/margins": 8.801198959350586, "rewards/rejected": -12.446829795837402, "step": 2195 }, { "epoch": 3.78, "learning_rate": 3.222481937951551e-07, "logits/chosen": -1.9925472736358643, "logits/rejected": -2.2249035835266113, "logps/chosen": -92.02488708496094, "logps/rejected": -192.1014404296875, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/chosen": -2.56526255607605, "rewards/margins": 8.315006256103516, "rewards/rejected": -10.880269050598145, "step": 2196 }, { "epoch": 3.78, "learning_rate": 3.2214194645133866e-07, "logits/chosen": -2.1360583305358887, "logits/rejected": -2.1991167068481445, "logps/chosen": -126.10800170898438, "logps/rejected": -243.9280242919922, "loss": 0.052, "rewards/accuracies": 1.0, "rewards/chosen": -2.868570566177368, "rewards/margins": 11.792765617370605, "rewards/rejected": -14.661336898803711, "step": 2197 }, { "epoch": 3.78, "learning_rate": 3.220356991075223e-07, "logits/chosen": -2.156839370727539, "logits/rejected": -1.9619108438491821, "logps/chosen": -135.60488891601562, "logps/rejected": -187.88385009765625, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": -3.501769542694092, "rewards/margins": 6.6197381019592285, "rewards/rejected": -10.121508598327637, "step": 2198 }, { "epoch": 3.78, "learning_rate": 3.219294517637059e-07, "logits/chosen": -2.3267502784729004, "logits/rejected": -2.0466854572296143, "logps/chosen": -118.30541229248047, "logps/rejected": -175.13685607910156, "loss": 0.1087, "rewards/accuracies": 1.0, "rewards/chosen": -3.731445789337158, "rewards/margins": 7.162257194519043, "rewards/rejected": -10.893702507019043, "step": 2199 }, { "epoch": 3.79, "learning_rate": 3.2182320441988946e-07, "logits/chosen": -1.801418423652649, "logits/rejected": -2.1833581924438477, "logps/chosen": -127.43353271484375, "logps/rejected": -220.05320739746094, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": -4.777395725250244, "rewards/margins": 7.213653564453125, "rewards/rejected": -11.991048812866211, "step": 2200 }, { "epoch": 3.79, "learning_rate": 3.217169570760731e-07, "logits/chosen": -1.6539191007614136, "logits/rejected": -2.2365283966064453, "logps/chosen": -93.30521392822266, "logps/rejected": -230.47357177734375, "loss": 0.1406, "rewards/accuracies": 1.0, "rewards/chosen": -3.1111137866973877, "rewards/margins": 9.495405197143555, "rewards/rejected": -12.606518745422363, "step": 2201 }, { "epoch": 3.79, "learning_rate": 3.2161070973225666e-07, "logits/chosen": -1.4120912551879883, "logits/rejected": -2.3022968769073486, "logps/chosen": -78.96908569335938, "logps/rejected": -235.4512481689453, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.0934789180755615, "rewards/margins": 12.417108535766602, "rewards/rejected": -13.510586738586426, "step": 2202 }, { "epoch": 3.79, "learning_rate": 3.2150446238844026e-07, "logits/chosen": -2.182248115539551, "logits/rejected": -2.128966808319092, "logps/chosen": -117.99969482421875, "logps/rejected": -214.47531127929688, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -4.39430570602417, "rewards/margins": 8.886322021484375, "rewards/rejected": -13.280628204345703, "step": 2203 }, { "epoch": 3.79, "learning_rate": 3.213982150446239e-07, "logits/chosen": -2.1598143577575684, "logits/rejected": -2.2097747325897217, "logps/chosen": -116.05329895019531, "logps/rejected": -208.93142700195312, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": -3.349571466445923, "rewards/margins": 10.257713317871094, "rewards/rejected": -13.607285499572754, "step": 2204 }, { "epoch": 3.8, "learning_rate": 3.2129196770080745e-07, "logits/chosen": -2.2328383922576904, "logits/rejected": -2.114027976989746, "logps/chosen": -108.58937072753906, "logps/rejected": -193.16004943847656, "loss": 0.0492, "rewards/accuracies": 0.75, "rewards/chosen": -2.095550775527954, "rewards/margins": 9.202323913574219, "rewards/rejected": -11.297874450683594, "step": 2205 }, { "epoch": 3.8, "learning_rate": 3.2118572035699105e-07, "logits/chosen": -2.244445323944092, "logits/rejected": -2.2943313121795654, "logps/chosen": -117.25199127197266, "logps/rejected": -223.443359375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.3404717445373535, "rewards/margins": 9.630985260009766, "rewards/rejected": -11.971458435058594, "step": 2206 }, { "epoch": 3.8, "learning_rate": 3.2107947301317465e-07, "logits/chosen": -2.3474502563476562, "logits/rejected": -1.966051459312439, "logps/chosen": -130.96365356445312, "logps/rejected": -220.48895263671875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -4.378432273864746, "rewards/margins": 10.141033172607422, "rewards/rejected": -14.519466400146484, "step": 2207 }, { "epoch": 3.8, "learning_rate": 3.2097322566935825e-07, "logits/chosen": -2.213366985321045, "logits/rejected": -2.06052827835083, "logps/chosen": -126.20962524414062, "logps/rejected": -201.3697509765625, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -3.8012583255767822, "rewards/margins": 7.522556781768799, "rewards/rejected": -11.323814392089844, "step": 2208 }, { "epoch": 3.8, "learning_rate": 3.2086697832554185e-07, "logits/chosen": -2.206948757171631, "logits/rejected": -1.9606050252914429, "logps/chosen": -123.77804565429688, "logps/rejected": -198.03907775878906, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -2.874544620513916, "rewards/margins": 8.261192321777344, "rewards/rejected": -11.135736465454102, "step": 2209 }, { "epoch": 3.8, "learning_rate": 3.2076073098172545e-07, "logits/chosen": -2.2960798740386963, "logits/rejected": -2.20499587059021, "logps/chosen": -115.68241119384766, "logps/rejected": -141.5253143310547, "loss": 0.0836, "rewards/accuracies": 1.0, "rewards/chosen": -4.392730236053467, "rewards/margins": 2.643357753753662, "rewards/rejected": -7.036087989807129, "step": 2210 }, { "epoch": 3.81, "learning_rate": 3.2065448363790905e-07, "logits/chosen": -2.0819520950317383, "logits/rejected": -2.19264554977417, "logps/chosen": -139.17080688476562, "logps/rejected": -182.42144775390625, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -5.6299028396606445, "rewards/margins": 4.795809745788574, "rewards/rejected": -10.425712585449219, "step": 2211 }, { "epoch": 3.81, "learning_rate": 3.205482362940926e-07, "logits/chosen": -1.9056038856506348, "logits/rejected": -2.178706407546997, "logps/chosen": -111.86820220947266, "logps/rejected": -216.11293029785156, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -2.6759414672851562, "rewards/margins": 9.46712589263916, "rewards/rejected": -12.143067359924316, "step": 2212 }, { "epoch": 3.81, "learning_rate": 3.2044198895027625e-07, "logits/chosen": -2.247833013534546, "logits/rejected": -2.05712628364563, "logps/chosen": -115.34071350097656, "logps/rejected": -139.08042907714844, "loss": 0.2421, "rewards/accuracies": 0.75, "rewards/chosen": -3.882345676422119, "rewards/margins": 4.290214538574219, "rewards/rejected": -8.172560691833496, "step": 2213 }, { "epoch": 3.81, "learning_rate": 3.2033574160645984e-07, "logits/chosen": -2.124502658843994, "logits/rejected": -2.265957832336426, "logps/chosen": -111.66698455810547, "logps/rejected": -208.17880249023438, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.1664533615112305, "rewards/margins": 9.246580123901367, "rewards/rejected": -12.413034439086914, "step": 2214 }, { "epoch": 3.81, "learning_rate": 3.202294942626434e-07, "logits/chosen": -2.332958221435547, "logits/rejected": -1.8616317510604858, "logps/chosen": -172.3430633544922, "logps/rejected": -232.81326293945312, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -5.698759078979492, "rewards/margins": 7.810887336730957, "rewards/rejected": -13.50964641571045, "step": 2215 }, { "epoch": 3.81, "learning_rate": 3.2012324691882704e-07, "logits/chosen": -2.242140769958496, "logits/rejected": -2.2866382598876953, "logps/chosen": -117.65699005126953, "logps/rejected": -210.12188720703125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -3.674759864807129, "rewards/margins": 8.90585994720459, "rewards/rejected": -12.580619812011719, "step": 2216 }, { "epoch": 3.82, "learning_rate": 3.200169995750106e-07, "logits/chosen": -2.0070180892944336, "logits/rejected": -2.120208501815796, "logps/chosen": -118.63773345947266, "logps/rejected": -224.7711181640625, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -3.0725183486938477, "rewards/margins": 9.711755752563477, "rewards/rejected": -12.784273147583008, "step": 2217 }, { "epoch": 3.82, "learning_rate": 3.199107522311942e-07, "logits/chosen": -2.15128755569458, "logits/rejected": -2.3365118503570557, "logps/chosen": -112.41551208496094, "logps/rejected": -226.33926391601562, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.7763872146606445, "rewards/margins": 11.330802917480469, "rewards/rejected": -13.107189178466797, "step": 2218 }, { "epoch": 3.82, "learning_rate": 3.1980450488737784e-07, "logits/chosen": -2.3529138565063477, "logits/rejected": -2.388853073120117, "logps/chosen": -113.94728088378906, "logps/rejected": -163.7037811279297, "loss": 0.0315, "rewards/accuracies": 0.75, "rewards/chosen": -1.827362060546875, "rewards/margins": 7.414735794067383, "rewards/rejected": -9.242096900939941, "step": 2219 }, { "epoch": 3.82, "learning_rate": 3.196982575435614e-07, "logits/chosen": -2.3253843784332275, "logits/rejected": -1.8587549924850464, "logps/chosen": -105.41242980957031, "logps/rejected": -162.30662536621094, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -2.5373446941375732, "rewards/margins": 7.256266117095947, "rewards/rejected": -9.793611526489258, "step": 2220 }, { "epoch": 3.82, "learning_rate": 3.19592010199745e-07, "logits/chosen": -2.1648471355438232, "logits/rejected": -2.128204345703125, "logps/chosen": -99.71341705322266, "logps/rejected": -186.46636962890625, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": -2.3236911296844482, "rewards/margins": 8.645142555236816, "rewards/rejected": -10.968833923339844, "step": 2221 }, { "epoch": 3.82, "learning_rate": 3.194857628559286e-07, "logits/chosen": -2.2701621055603027, "logits/rejected": -1.8759281635284424, "logps/chosen": -115.64555358886719, "logps/rejected": -179.5514373779297, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.8488271236419678, "rewards/margins": 8.355962753295898, "rewards/rejected": -11.204790115356445, "step": 2222 }, { "epoch": 3.83, "learning_rate": 3.193795155121122e-07, "logits/chosen": -2.2070648670196533, "logits/rejected": -2.1178603172302246, "logps/chosen": -117.67111206054688, "logps/rejected": -222.97360229492188, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -2.340428352355957, "rewards/margins": 10.834918975830078, "rewards/rejected": -13.175346374511719, "step": 2223 }, { "epoch": 3.83, "learning_rate": 3.1927326816829573e-07, "logits/chosen": -2.307682514190674, "logits/rejected": -2.195230722427368, "logps/chosen": -101.83467102050781, "logps/rejected": -171.47525024414062, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -2.1402482986450195, "rewards/margins": 6.656911849975586, "rewards/rejected": -8.797160148620605, "step": 2224 }, { "epoch": 3.83, "learning_rate": 3.191670208244794e-07, "logits/chosen": -1.824318766593933, "logits/rejected": -2.115725517272949, "logps/chosen": -95.33935546875, "logps/rejected": -213.09664916992188, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -0.49458715319633484, "rewards/margins": 11.752347946166992, "rewards/rejected": -12.24693489074707, "step": 2225 }, { "epoch": 3.83, "learning_rate": 3.19060773480663e-07, "logits/chosen": -2.137425422668457, "logits/rejected": -2.2607710361480713, "logps/chosen": -118.6207504272461, "logps/rejected": -215.59304809570312, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.8489599227905273, "rewards/margins": 6.92687463760376, "rewards/rejected": -10.775835037231445, "step": 2226 }, { "epoch": 3.83, "learning_rate": 3.189545261368466e-07, "logits/chosen": -2.246732711791992, "logits/rejected": -2.164721965789795, "logps/chosen": -115.02375793457031, "logps/rejected": -220.5501251220703, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -3.8840348720550537, "rewards/margins": 10.22144603729248, "rewards/rejected": -14.105481147766113, "step": 2227 }, { "epoch": 3.83, "learning_rate": 3.188482787930302e-07, "logits/chosen": -2.1095995903015137, "logits/rejected": -2.244725227355957, "logps/chosen": -108.10423278808594, "logps/rejected": -169.62643432617188, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": -2.9698214530944824, "rewards/margins": 6.44327449798584, "rewards/rejected": -9.41309642791748, "step": 2228 }, { "epoch": 3.84, "learning_rate": 3.187420314492137e-07, "logits/chosen": -2.0157065391540527, "logits/rejected": -2.288695812225342, "logps/chosen": -102.54022979736328, "logps/rejected": -204.72384643554688, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -2.0133535861968994, "rewards/margins": 9.682363510131836, "rewards/rejected": -11.695716857910156, "step": 2229 }, { "epoch": 3.84, "learning_rate": 3.186357841053974e-07, "logits/chosen": -2.1650049686431885, "logits/rejected": -2.2839112281799316, "logps/chosen": -121.57295227050781, "logps/rejected": -206.93966674804688, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -5.235128402709961, "rewards/margins": 7.312631607055664, "rewards/rejected": -12.547760009765625, "step": 2230 }, { "epoch": 3.84, "learning_rate": 3.1852953676158097e-07, "logits/chosen": -1.83826744556427, "logits/rejected": -2.200366735458374, "logps/chosen": -95.70075225830078, "logps/rejected": -238.57199096679688, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.5668699741363525, "rewards/margins": 12.247200965881348, "rewards/rejected": -14.814069747924805, "step": 2231 }, { "epoch": 3.84, "learning_rate": 3.184232894177645e-07, "logits/chosen": -2.063126802444458, "logits/rejected": -2.2731711864471436, "logps/chosen": -110.24679565429688, "logps/rejected": -221.43434143066406, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -3.3144688606262207, "rewards/margins": 9.44994068145752, "rewards/rejected": -12.764410018920898, "step": 2232 }, { "epoch": 3.84, "learning_rate": 3.1831704207394817e-07, "logits/chosen": -2.2544162273406982, "logits/rejected": -2.3709774017333984, "logps/chosen": -131.98117065429688, "logps/rejected": -239.20314025878906, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -4.103428840637207, "rewards/margins": 9.764901161193848, "rewards/rejected": -13.868330955505371, "step": 2233 }, { "epoch": 3.85, "learning_rate": 3.182107947301317e-07, "logits/chosen": -1.9880043268203735, "logits/rejected": -2.328394889831543, "logps/chosen": -123.90689086914062, "logps/rejected": -198.3743896484375, "loss": 0.045, "rewards/accuracies": 0.75, "rewards/chosen": -4.9991841316223145, "rewards/margins": 6.9186906814575195, "rewards/rejected": -11.917874336242676, "step": 2234 }, { "epoch": 3.85, "learning_rate": 3.181045473863153e-07, "logits/chosen": -2.0411269664764404, "logits/rejected": -2.069429636001587, "logps/chosen": -118.78082275390625, "logps/rejected": -173.00790405273438, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -3.3525800704956055, "rewards/margins": 6.037004470825195, "rewards/rejected": -9.389583587646484, "step": 2235 }, { "epoch": 3.85, "learning_rate": 3.1799830004249897e-07, "logits/chosen": -2.387349843978882, "logits/rejected": -1.9456124305725098, "logps/chosen": -116.3061752319336, "logps/rejected": -195.49609375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.6246814727783203, "rewards/margins": 9.997753143310547, "rewards/rejected": -11.622434616088867, "step": 2236 }, { "epoch": 3.85, "learning_rate": 3.178920526986825e-07, "logits/chosen": -1.6873443126678467, "logits/rejected": -2.395963191986084, "logps/chosen": -92.04766845703125, "logps/rejected": -233.21469116210938, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.8223838806152344, "rewards/margins": 11.323893547058105, "rewards/rejected": -13.146278381347656, "step": 2237 }, { "epoch": 3.85, "learning_rate": 3.177858053548661e-07, "logits/chosen": -2.2349390983581543, "logits/rejected": -2.2671847343444824, "logps/chosen": -98.49977111816406, "logps/rejected": -166.3035888671875, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -2.833479642868042, "rewards/margins": 6.256839752197266, "rewards/rejected": -9.090319633483887, "step": 2238 }, { "epoch": 3.85, "learning_rate": 3.176795580110497e-07, "logits/chosen": -2.1564388275146484, "logits/rejected": -2.202418565750122, "logps/chosen": -134.90191650390625, "logps/rejected": -221.197021484375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.389815330505371, "rewards/margins": 9.828949928283691, "rewards/rejected": -13.218765258789062, "step": 2239 }, { "epoch": 3.86, "learning_rate": 3.175733106672333e-07, "logits/chosen": -2.2579755783081055, "logits/rejected": -2.1064181327819824, "logps/chosen": -116.07595825195312, "logps/rejected": -226.85952758789062, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.0476369857788086, "rewards/margins": 10.786507606506348, "rewards/rejected": -13.834145545959473, "step": 2240 }, { "epoch": 3.86, "learning_rate": 3.174670633234169e-07, "logits/chosen": -2.3056538105010986, "logits/rejected": -1.8467512130737305, "logps/chosen": -136.7633819580078, "logps/rejected": -206.1151885986328, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -3.8059191703796387, "rewards/margins": 9.2146577835083, "rewards/rejected": -13.020576477050781, "step": 2241 }, { "epoch": 3.86, "learning_rate": 3.173608159796005e-07, "logits/chosen": -2.2798917293548584, "logits/rejected": -2.076253652572632, "logps/chosen": -113.89872741699219, "logps/rejected": -249.2162322998047, "loss": 0.0458, "rewards/accuracies": 1.0, "rewards/chosen": -2.4685747623443604, "rewards/margins": 13.774866104125977, "rewards/rejected": -16.243440628051758, "step": 2242 }, { "epoch": 3.86, "learning_rate": 3.172545686357841e-07, "logits/chosen": -2.2335774898529053, "logits/rejected": -2.0059564113616943, "logps/chosen": -134.01028442382812, "logps/rejected": -222.81723022460938, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": -4.550604820251465, "rewards/margins": 9.012581825256348, "rewards/rejected": -13.563186645507812, "step": 2243 }, { "epoch": 3.86, "learning_rate": 3.1714832129196765e-07, "logits/chosen": -2.146108388900757, "logits/rejected": -2.064023733139038, "logps/chosen": -156.49227905273438, "logps/rejected": -258.0333557128906, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -6.426227569580078, "rewards/margins": 8.819836616516113, "rewards/rejected": -15.246064186096191, "step": 2244 }, { "epoch": 3.86, "learning_rate": 3.170420739481513e-07, "logits/chosen": -2.092866897583008, "logits/rejected": -1.992485523223877, "logps/chosen": -121.13691711425781, "logps/rejected": -193.78985595703125, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -3.9220528602600098, "rewards/margins": 7.200164794921875, "rewards/rejected": -11.122218132019043, "step": 2245 }, { "epoch": 3.87, "learning_rate": 3.1693582660433485e-07, "logits/chosen": -2.283320665359497, "logits/rejected": -1.3676668405532837, "logps/chosen": -133.3212432861328, "logps/rejected": -186.27133178710938, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -3.518470287322998, "rewards/margins": 7.568263053894043, "rewards/rejected": -11.086732864379883, "step": 2246 }, { "epoch": 3.87, "learning_rate": 3.1682957926051845e-07, "logits/chosen": -2.024498462677002, "logits/rejected": -2.288588523864746, "logps/chosen": -120.54063415527344, "logps/rejected": -195.80902099609375, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -4.001536846160889, "rewards/margins": 7.008925914764404, "rewards/rejected": -11.010462760925293, "step": 2247 }, { "epoch": 3.87, "learning_rate": 3.167233319167021e-07, "logits/chosen": -2.2119369506835938, "logits/rejected": -2.206005573272705, "logps/chosen": -123.00370788574219, "logps/rejected": -179.71478271484375, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -3.5843758583068848, "rewards/margins": 6.458742618560791, "rewards/rejected": -10.043118476867676, "step": 2248 }, { "epoch": 3.87, "learning_rate": 3.1661708457288565e-07, "logits/chosen": -2.1284313201904297, "logits/rejected": -2.0699126720428467, "logps/chosen": -134.03440856933594, "logps/rejected": -234.78997802734375, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -3.2489092350006104, "rewards/margins": 10.324965476989746, "rewards/rejected": -13.573874473571777, "step": 2249 }, { "epoch": 3.87, "learning_rate": 3.1651083722906925e-07, "logits/chosen": -2.1673202514648438, "logits/rejected": -2.1592540740966797, "logps/chosen": -147.28785705566406, "logps/rejected": -250.2382049560547, "loss": 0.0856, "rewards/accuracies": 1.0, "rewards/chosen": -4.768425464630127, "rewards/margins": 11.620891571044922, "rewards/rejected": -16.38931655883789, "step": 2250 }, { "epoch": 3.87, "learning_rate": 3.1640458988525285e-07, "logits/chosen": -2.216299533843994, "logits/rejected": -2.184264659881592, "logps/chosen": -131.60464477539062, "logps/rejected": -217.3634033203125, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": -4.989749908447266, "rewards/margins": 8.862873077392578, "rewards/rejected": -13.85262393951416, "step": 2251 }, { "epoch": 3.88, "learning_rate": 3.1629834254143644e-07, "logits/chosen": -2.0729827880859375, "logits/rejected": -1.9198976755142212, "logps/chosen": -124.20111083984375, "logps/rejected": -186.04965209960938, "loss": 0.0883, "rewards/accuracies": 1.0, "rewards/chosen": -4.0707011222839355, "rewards/margins": 6.778458595275879, "rewards/rejected": -10.849159240722656, "step": 2252 }, { "epoch": 3.88, "learning_rate": 3.1619209519762004e-07, "logits/chosen": -1.771310567855835, "logits/rejected": -2.118544101715088, "logps/chosen": -147.90951538085938, "logps/rejected": -233.74630737304688, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -5.446127891540527, "rewards/margins": 7.975683212280273, "rewards/rejected": -13.4218111038208, "step": 2253 }, { "epoch": 3.88, "learning_rate": 3.1608584785380364e-07, "logits/chosen": -2.19301176071167, "logits/rejected": -2.2565717697143555, "logps/chosen": -148.0819549560547, "logps/rejected": -231.6881561279297, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": -5.032900810241699, "rewards/margins": 9.015856742858887, "rewards/rejected": -14.048757553100586, "step": 2254 }, { "epoch": 3.88, "learning_rate": 3.1597960050998724e-07, "logits/chosen": -2.304185628890991, "logits/rejected": -1.9204316139221191, "logps/chosen": -128.72096252441406, "logps/rejected": -212.30453491210938, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": -3.717555284500122, "rewards/margins": 9.439994812011719, "rewards/rejected": -13.157550811767578, "step": 2255 }, { "epoch": 3.88, "learning_rate": 3.158733531661708e-07, "logits/chosen": -1.8727601766586304, "logits/rejected": -2.2060368061065674, "logps/chosen": -120.51519775390625, "logps/rejected": -269.7455139160156, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -3.319413185119629, "rewards/margins": 12.70290470123291, "rewards/rejected": -16.02231788635254, "step": 2256 }, { "epoch": 3.88, "learning_rate": 3.1576710582235444e-07, "logits/chosen": -2.1529271602630615, "logits/rejected": -1.9516903162002563, "logps/chosen": -99.5755615234375, "logps/rejected": -205.22207641601562, "loss": 0.1295, "rewards/accuracies": 1.0, "rewards/chosen": -2.473329544067383, "rewards/margins": 11.264129638671875, "rewards/rejected": -13.737459182739258, "step": 2257 }, { "epoch": 3.89, "learning_rate": 3.1566085847853804e-07, "logits/chosen": -2.0261714458465576, "logits/rejected": -2.1930394172668457, "logps/chosen": -118.65335083007812, "logps/rejected": -205.5396728515625, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -3.353368043899536, "rewards/margins": 9.21870231628418, "rewards/rejected": -12.572071075439453, "step": 2258 }, { "epoch": 3.89, "learning_rate": 3.155546111347216e-07, "logits/chosen": -2.1232643127441406, "logits/rejected": -2.2397522926330566, "logps/chosen": -135.90151977539062, "logps/rejected": -200.20347595214844, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": -4.493211269378662, "rewards/margins": 7.006227016448975, "rewards/rejected": -11.499438285827637, "step": 2259 }, { "epoch": 3.89, "learning_rate": 3.1544836379090524e-07, "logits/chosen": -2.1128830909729004, "logits/rejected": -2.172055244445801, "logps/chosen": -131.54153442382812, "logps/rejected": -200.2108612060547, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -4.4352312088012695, "rewards/margins": 7.658012390136719, "rewards/rejected": -12.093242645263672, "step": 2260 }, { "epoch": 3.89, "learning_rate": 3.153421164470888e-07, "logits/chosen": -2.163053512573242, "logits/rejected": -2.336519241333008, "logps/chosen": -130.84298706054688, "logps/rejected": -245.67416381835938, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -4.576406478881836, "rewards/margins": 8.70015811920166, "rewards/rejected": -13.276564598083496, "step": 2261 }, { "epoch": 3.89, "learning_rate": 3.152358691032724e-07, "logits/chosen": -1.778170108795166, "logits/rejected": -2.0530104637145996, "logps/chosen": -149.2801513671875, "logps/rejected": -252.42202758789062, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.455329895019531, "rewards/margins": 9.301164627075195, "rewards/rejected": -15.756494522094727, "step": 2262 }, { "epoch": 3.9, "learning_rate": 3.1512962175945603e-07, "logits/chosen": -1.9742722511291504, "logits/rejected": -2.218209981918335, "logps/chosen": -122.52789306640625, "logps/rejected": -207.84713745117188, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -4.354581832885742, "rewards/margins": 8.829097747802734, "rewards/rejected": -13.183679580688477, "step": 2263 }, { "epoch": 3.9, "learning_rate": 3.150233744156396e-07, "logits/chosen": -2.3816306591033936, "logits/rejected": -2.207183361053467, "logps/chosen": -138.9967041015625, "logps/rejected": -212.56309509277344, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -4.342706680297852, "rewards/margins": 7.368103981018066, "rewards/rejected": -11.710811614990234, "step": 2264 }, { "epoch": 3.9, "learning_rate": 3.149171270718232e-07, "logits/chosen": -2.2058091163635254, "logits/rejected": -2.1826884746551514, "logps/chosen": -107.22854614257812, "logps/rejected": -206.22947692871094, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -3.1742353439331055, "rewards/margins": 9.297704696655273, "rewards/rejected": -12.471940994262695, "step": 2265 }, { "epoch": 3.9, "learning_rate": 3.148108797280068e-07, "logits/chosen": -2.1778478622436523, "logits/rejected": -2.0533785820007324, "logps/chosen": -160.4285888671875, "logps/rejected": -215.5598602294922, "loss": 0.1008, "rewards/accuracies": 0.75, "rewards/chosen": -7.675981044769287, "rewards/margins": 4.835824012756348, "rewards/rejected": -12.511804580688477, "step": 2266 }, { "epoch": 3.9, "learning_rate": 3.147046323841904e-07, "logits/chosen": -2.2716164588928223, "logits/rejected": -2.2764434814453125, "logps/chosen": -107.56947326660156, "logps/rejected": -231.6670379638672, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -2.5977697372436523, "rewards/margins": 12.224468231201172, "rewards/rejected": -14.822237014770508, "step": 2267 }, { "epoch": 3.9, "learning_rate": 3.1459838504037403e-07, "logits/chosen": -2.2307353019714355, "logits/rejected": -2.0000674724578857, "logps/chosen": -131.33856201171875, "logps/rejected": -190.4063262939453, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -4.525855541229248, "rewards/margins": 5.772800445556641, "rewards/rejected": -10.29865550994873, "step": 2268 }, { "epoch": 3.91, "learning_rate": 3.1449213769655757e-07, "logits/chosen": -2.132338285446167, "logits/rejected": -2.0409603118896484, "logps/chosen": -151.79710388183594, "logps/rejected": -271.2362365722656, "loss": 0.0809, "rewards/accuracies": 1.0, "rewards/chosen": -4.615269184112549, "rewards/margins": 12.152523040771484, "rewards/rejected": -16.767791748046875, "step": 2269 }, { "epoch": 3.91, "learning_rate": 3.1438589035274117e-07, "logits/chosen": -2.298832893371582, "logits/rejected": -2.193338632583618, "logps/chosen": -96.20398712158203, "logps/rejected": -185.29263305664062, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.958284854888916, "rewards/margins": 8.522501945495605, "rewards/rejected": -10.480786323547363, "step": 2270 }, { "epoch": 3.91, "learning_rate": 3.1427964300892477e-07, "logits/chosen": -2.259613513946533, "logits/rejected": -1.8241392374038696, "logps/chosen": -120.58531188964844, "logps/rejected": -191.2967529296875, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -3.5539121627807617, "rewards/margins": 9.91680908203125, "rewards/rejected": -13.470720291137695, "step": 2271 }, { "epoch": 3.91, "learning_rate": 3.1417339566510837e-07, "logits/chosen": -1.8781288862228394, "logits/rejected": -1.9835468530654907, "logps/chosen": -97.14483642578125, "logps/rejected": -260.08563232421875, "loss": 0.1534, "rewards/accuracies": 1.0, "rewards/chosen": -1.8345081806182861, "rewards/margins": 12.996356964111328, "rewards/rejected": -14.830864906311035, "step": 2272 }, { "epoch": 3.91, "learning_rate": 3.140671483212919e-07, "logits/chosen": -1.9254508018493652, "logits/rejected": -2.327277660369873, "logps/chosen": -118.3178939819336, "logps/rejected": -241.50927734375, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -3.9027838706970215, "rewards/margins": 10.232126235961914, "rewards/rejected": -14.134909629821777, "step": 2273 }, { "epoch": 3.91, "learning_rate": 3.1396090097747557e-07, "logits/chosen": -2.147012233734131, "logits/rejected": -2.380127429962158, "logps/chosen": -151.26129150390625, "logps/rejected": -247.59988403320312, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -4.6345109939575195, "rewards/margins": 9.188180923461914, "rewards/rejected": -13.82269287109375, "step": 2274 }, { "epoch": 3.92, "learning_rate": 3.1385465363365917e-07, "logits/chosen": -2.3097262382507324, "logits/rejected": -2.003075361251831, "logps/chosen": -158.02816772460938, "logps/rejected": -208.1587677001953, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -6.8850932121276855, "rewards/margins": 6.925663471221924, "rewards/rejected": -13.81075668334961, "step": 2275 }, { "epoch": 3.92, "learning_rate": 3.137484062898427e-07, "logits/chosen": -2.323741912841797, "logits/rejected": -1.832848310470581, "logps/chosen": -154.41073608398438, "logps/rejected": -201.00946044921875, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -5.632110118865967, "rewards/margins": 7.0212602615356445, "rewards/rejected": -12.653369903564453, "step": 2276 }, { "epoch": 3.92, "learning_rate": 3.1364215894602636e-07, "logits/chosen": -2.208413600921631, "logits/rejected": -2.0763654708862305, "logps/chosen": -109.95429992675781, "logps/rejected": -202.84739685058594, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.78190279006958, "rewards/margins": 9.166082382202148, "rewards/rejected": -12.947985649108887, "step": 2277 }, { "epoch": 3.92, "learning_rate": 3.135359116022099e-07, "logits/chosen": -2.276510000228882, "logits/rejected": -2.1633214950561523, "logps/chosen": -127.8351821899414, "logps/rejected": -222.82211303710938, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -5.056825160980225, "rewards/margins": 8.80964183807373, "rewards/rejected": -13.866467475891113, "step": 2278 }, { "epoch": 3.92, "learning_rate": 3.134296642583935e-07, "logits/chosen": -1.9875582456588745, "logits/rejected": -2.222750663757324, "logps/chosen": -118.7178726196289, "logps/rejected": -174.97158813476562, "loss": 0.0979, "rewards/accuracies": 0.75, "rewards/chosen": -4.665177822113037, "rewards/margins": 5.385127544403076, "rewards/rejected": -10.050305366516113, "step": 2279 }, { "epoch": 3.92, "learning_rate": 3.1332341691457716e-07, "logits/chosen": -1.6231111288070679, "logits/rejected": -2.245194673538208, "logps/chosen": -103.08724975585938, "logps/rejected": -224.81370544433594, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": -2.280919075012207, "rewards/margins": 10.669488906860352, "rewards/rejected": -12.950408935546875, "step": 2280 }, { "epoch": 3.93, "learning_rate": 3.132171695707607e-07, "logits/chosen": -2.104558229446411, "logits/rejected": -2.232869863510132, "logps/chosen": -137.99871826171875, "logps/rejected": -259.9052734375, "loss": 0.079, "rewards/accuracies": 1.0, "rewards/chosen": -5.521299362182617, "rewards/margins": 11.667837142944336, "rewards/rejected": -17.189136505126953, "step": 2281 }, { "epoch": 3.93, "learning_rate": 3.131109222269443e-07, "logits/chosen": -2.0905115604400635, "logits/rejected": -2.156975269317627, "logps/chosen": -127.03044128417969, "logps/rejected": -213.236083984375, "loss": 0.1053, "rewards/accuracies": 1.0, "rewards/chosen": -3.7341299057006836, "rewards/margins": 9.16767406463623, "rewards/rejected": -12.901803970336914, "step": 2282 }, { "epoch": 3.93, "learning_rate": 3.130046748831279e-07, "logits/chosen": -1.8409417867660522, "logits/rejected": -2.351489782333374, "logps/chosen": -100.08154296875, "logps/rejected": -175.9529571533203, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -3.4224138259887695, "rewards/margins": 6.414620399475098, "rewards/rejected": -9.837034225463867, "step": 2283 }, { "epoch": 3.93, "learning_rate": 3.128984275393115e-07, "logits/chosen": -2.107030153274536, "logits/rejected": -2.2520620822906494, "logps/chosen": -141.84255981445312, "logps/rejected": -229.16497802734375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -5.399320602416992, "rewards/margins": 8.612502098083496, "rewards/rejected": -14.011823654174805, "step": 2284 }, { "epoch": 3.93, "learning_rate": 3.127921801954951e-07, "logits/chosen": -2.197587251663208, "logits/rejected": -2.2359237670898438, "logps/chosen": -110.51496887207031, "logps/rejected": -194.43484497070312, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -3.991323471069336, "rewards/margins": 7.316738605499268, "rewards/rejected": -11.308062553405762, "step": 2285 }, { "epoch": 3.93, "learning_rate": 3.126859328516787e-07, "logits/chosen": -2.159092664718628, "logits/rejected": -2.0094940662384033, "logps/chosen": -96.6094970703125, "logps/rejected": -239.51702880859375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.0694613456726074, "rewards/margins": 14.05478286743164, "rewards/rejected": -16.124244689941406, "step": 2286 }, { "epoch": 3.94, "learning_rate": 3.125796855078623e-07, "logits/chosen": -2.03975772857666, "logits/rejected": -2.2848055362701416, "logps/chosen": -114.5382080078125, "logps/rejected": -224.12440490722656, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -3.797125816345215, "rewards/margins": 10.132375717163086, "rewards/rejected": -13.929502487182617, "step": 2287 }, { "epoch": 3.94, "learning_rate": 3.1247343816404585e-07, "logits/chosen": -2.3201842308044434, "logits/rejected": -1.6587923765182495, "logps/chosen": -145.20875549316406, "logps/rejected": -214.87142944335938, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.711441993713379, "rewards/margins": 7.691916465759277, "rewards/rejected": -12.403358459472656, "step": 2288 }, { "epoch": 3.94, "learning_rate": 3.123671908202295e-07, "logits/chosen": -2.3823039531707764, "logits/rejected": -1.4494438171386719, "logps/chosen": -148.03050231933594, "logps/rejected": -197.76434326171875, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -4.038261413574219, "rewards/margins": 9.27598762512207, "rewards/rejected": -13.314249038696289, "step": 2289 }, { "epoch": 3.94, "learning_rate": 3.122609434764131e-07, "logits/chosen": -2.2987875938415527, "logits/rejected": -2.2175686359405518, "logps/chosen": -152.60377502441406, "logps/rejected": -237.62469482421875, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -5.305246353149414, "rewards/margins": 8.82901382446289, "rewards/rejected": -14.134259223937988, "step": 2290 }, { "epoch": 3.94, "learning_rate": 3.1215469613259664e-07, "logits/chosen": -1.9017198085784912, "logits/rejected": -2.32899808883667, "logps/chosen": -93.19816589355469, "logps/rejected": -238.12298583984375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -3.2723727226257324, "rewards/margins": 12.274009704589844, "rewards/rejected": -15.54638385772705, "step": 2291 }, { "epoch": 3.94, "learning_rate": 3.120484487887803e-07, "logits/chosen": -2.102858066558838, "logits/rejected": -2.066650867462158, "logps/chosen": -127.05287170410156, "logps/rejected": -188.6245880126953, "loss": 0.0249, "rewards/accuracies": 0.75, "rewards/chosen": -3.3862969875335693, "rewards/margins": 7.176644325256348, "rewards/rejected": -10.562941551208496, "step": 2292 }, { "epoch": 3.95, "learning_rate": 3.1194220144496384e-07, "logits/chosen": -2.1201870441436768, "logits/rejected": -2.3743724822998047, "logps/chosen": -146.67733764648438, "logps/rejected": -274.15081787109375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -5.064462661743164, "rewards/margins": 10.60085391998291, "rewards/rejected": -15.66531753540039, "step": 2293 }, { "epoch": 3.95, "learning_rate": 3.1183595410114744e-07, "logits/chosen": -1.7474273443222046, "logits/rejected": -2.120459794998169, "logps/chosen": -137.3887939453125, "logps/rejected": -243.97979736328125, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -5.702602386474609, "rewards/margins": 9.705782890319824, "rewards/rejected": -15.408385276794434, "step": 2294 }, { "epoch": 3.95, "learning_rate": 3.117297067573311e-07, "logits/chosen": -2.0579872131347656, "logits/rejected": -2.272054672241211, "logps/chosen": -137.45355224609375, "logps/rejected": -238.34161376953125, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -4.670898914337158, "rewards/margins": 9.225765228271484, "rewards/rejected": -13.8966646194458, "step": 2295 }, { "epoch": 3.95, "learning_rate": 3.1162345941351464e-07, "logits/chosen": -2.0988755226135254, "logits/rejected": -2.290743589401245, "logps/chosen": -105.33602905273438, "logps/rejected": -221.57379150390625, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -2.8726511001586914, "rewards/margins": 10.921892166137695, "rewards/rejected": -13.79454231262207, "step": 2296 }, { "epoch": 3.95, "learning_rate": 3.1151721206969824e-07, "logits/chosen": -1.9700767993927002, "logits/rejected": -2.1571903228759766, "logps/chosen": -122.9309310913086, "logps/rejected": -235.78057861328125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -3.21708607673645, "rewards/margins": 9.724729537963867, "rewards/rejected": -12.941816329956055, "step": 2297 }, { "epoch": 3.96, "learning_rate": 3.1141096472588183e-07, "logits/chosen": -2.142340660095215, "logits/rejected": -2.0480668544769287, "logps/chosen": -131.21788024902344, "logps/rejected": -216.41192626953125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -5.188521385192871, "rewards/margins": 9.463966369628906, "rewards/rejected": -14.652487754821777, "step": 2298 }, { "epoch": 3.96, "learning_rate": 3.1130471738206543e-07, "logits/chosen": -2.1926541328430176, "logits/rejected": -1.9979054927825928, "logps/chosen": -124.33171081542969, "logps/rejected": -217.71522521972656, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": -4.465545654296875, "rewards/margins": 9.582685470581055, "rewards/rejected": -14.04823112487793, "step": 2299 }, { "epoch": 3.96, "learning_rate": 3.11198470038249e-07, "logits/chosen": -2.3027124404907227, "logits/rejected": -2.108537197113037, "logps/chosen": -131.892822265625, "logps/rejected": -200.2627410888672, "loss": 0.0897, "rewards/accuracies": 1.0, "rewards/chosen": -4.673721790313721, "rewards/margins": 7.827713489532471, "rewards/rejected": -12.501436233520508, "step": 2300 }, { "epoch": 3.96, "learning_rate": 3.1109222269443263e-07, "logits/chosen": -2.225419521331787, "logits/rejected": -1.6996792554855347, "logps/chosen": -116.02790832519531, "logps/rejected": -211.64028930664062, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -3.8423736095428467, "rewards/margins": 10.375946044921875, "rewards/rejected": -14.218320846557617, "step": 2301 }, { "epoch": 3.96, "learning_rate": 3.1098597535061623e-07, "logits/chosen": -2.1444997787475586, "logits/rejected": -2.142321825027466, "logps/chosen": -134.43362426757812, "logps/rejected": -202.21104431152344, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -3.2492144107818604, "rewards/margins": 7.947614669799805, "rewards/rejected": -11.196828842163086, "step": 2302 }, { "epoch": 3.96, "learning_rate": 3.108797280067998e-07, "logits/chosen": -2.221230983734131, "logits/rejected": -2.219125986099243, "logps/chosen": -132.5652313232422, "logps/rejected": -206.27972412109375, "loss": 0.0562, "rewards/accuracies": 0.75, "rewards/chosen": -4.330785751342773, "rewards/margins": 7.7439961433410645, "rewards/rejected": -12.07478141784668, "step": 2303 }, { "epoch": 3.97, "learning_rate": 3.1077348066298343e-07, "logits/chosen": -2.318527936935425, "logits/rejected": -2.0796918869018555, "logps/chosen": -105.16232299804688, "logps/rejected": -220.7604217529297, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": -2.2389450073242188, "rewards/margins": 11.890867233276367, "rewards/rejected": -14.129813194274902, "step": 2304 }, { "epoch": 3.97, "learning_rate": 3.10667233319167e-07, "logits/chosen": -2.2519867420196533, "logits/rejected": -2.137683629989624, "logps/chosen": -113.04559326171875, "logps/rejected": -202.21861267089844, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.5606815814971924, "rewards/margins": 9.320880889892578, "rewards/rejected": -11.881563186645508, "step": 2305 }, { "epoch": 3.97, "learning_rate": 3.1056098597535057e-07, "logits/chosen": -1.796843409538269, "logits/rejected": -2.134413957595825, "logps/chosen": -126.61294555664062, "logps/rejected": -253.3708953857422, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -4.1634931564331055, "rewards/margins": 11.076236724853516, "rewards/rejected": -15.239729881286621, "step": 2306 }, { "epoch": 3.97, "learning_rate": 3.104547386315342e-07, "logits/chosen": -2.1806437969207764, "logits/rejected": -1.9779491424560547, "logps/chosen": -137.93756103515625, "logps/rejected": -221.88653564453125, "loss": 0.0598, "rewards/accuracies": 1.0, "rewards/chosen": -6.753722190856934, "rewards/margins": 8.86447811126709, "rewards/rejected": -15.618199348449707, "step": 2307 }, { "epoch": 3.97, "learning_rate": 3.1034849128771777e-07, "logits/chosen": -1.8803024291992188, "logits/rejected": -2.3250365257263184, "logps/chosen": -150.7313995361328, "logps/rejected": -255.7545166015625, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -6.126584529876709, "rewards/margins": 9.559347152709961, "rewards/rejected": -15.685932159423828, "step": 2308 }, { "epoch": 3.97, "learning_rate": 3.102422439439014e-07, "logits/chosen": -1.5757040977478027, "logits/rejected": -2.121494770050049, "logps/chosen": -112.13447570800781, "logps/rejected": -242.64706420898438, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -3.8926541805267334, "rewards/margins": 10.97542953491211, "rewards/rejected": -14.868083953857422, "step": 2309 }, { "epoch": 3.98, "learning_rate": 3.1013599660008497e-07, "logits/chosen": -1.971497654914856, "logits/rejected": -2.2883524894714355, "logps/chosen": -113.88069152832031, "logps/rejected": -228.2238006591797, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -3.260955572128296, "rewards/margins": 9.786663055419922, "rewards/rejected": -13.047618865966797, "step": 2310 }, { "epoch": 3.98, "learning_rate": 3.1002974925626857e-07, "logits/chosen": -1.9694985151290894, "logits/rejected": -2.037261486053467, "logps/chosen": -117.76506805419922, "logps/rejected": -205.04324340820312, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -5.909588813781738, "rewards/margins": 8.22472858428955, "rewards/rejected": -14.134317398071289, "step": 2311 }, { "epoch": 3.98, "learning_rate": 3.099235019124522e-07, "logits/chosen": -1.9381643533706665, "logits/rejected": -2.399667739868164, "logps/chosen": -131.3464813232422, "logps/rejected": -202.10720825195312, "loss": 0.126, "rewards/accuracies": 1.0, "rewards/chosen": -4.363377571105957, "rewards/margins": 5.55964469909668, "rewards/rejected": -9.923022270202637, "step": 2312 }, { "epoch": 3.98, "learning_rate": 3.0981725456863577e-07, "logits/chosen": -2.187349796295166, "logits/rejected": -2.1765565872192383, "logps/chosen": -110.80441284179688, "logps/rejected": -204.69793701171875, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -3.3463706970214844, "rewards/margins": 8.801597595214844, "rewards/rejected": -12.147968292236328, "step": 2313 }, { "epoch": 3.98, "learning_rate": 3.0971100722481936e-07, "logits/chosen": -1.9807109832763672, "logits/rejected": -2.2255594730377197, "logps/chosen": -133.10484313964844, "logps/rejected": -221.65615844726562, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -5.170632362365723, "rewards/margins": 6.825947284698486, "rewards/rejected": -11.99657917022705, "step": 2314 }, { "epoch": 3.98, "learning_rate": 3.0960475988100296e-07, "logits/chosen": -2.2899162769317627, "logits/rejected": -2.022585868835449, "logps/chosen": -135.19003295898438, "logps/rejected": -197.80615234375, "loss": 0.1575, "rewards/accuracies": 1.0, "rewards/chosen": -4.626062393188477, "rewards/margins": 7.873939037322998, "rewards/rejected": -12.500001907348633, "step": 2315 }, { "epoch": 3.99, "learning_rate": 3.0949851253718656e-07, "logits/chosen": -2.0550382137298584, "logits/rejected": -2.476294994354248, "logps/chosen": -93.96807861328125, "logps/rejected": -195.21104431152344, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.9541797637939453, "rewards/margins": 9.642679214477539, "rewards/rejected": -11.596858978271484, "step": 2316 }, { "epoch": 3.99, "learning_rate": 3.0939226519337016e-07, "logits/chosen": -2.3286421298980713, "logits/rejected": -1.571778655052185, "logps/chosen": -166.60894775390625, "logps/rejected": -208.28738403320312, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -6.852140426635742, "rewards/margins": 6.816952705383301, "rewards/rejected": -13.66909408569336, "step": 2317 }, { "epoch": 3.99, "learning_rate": 3.0928601784955376e-07, "logits/chosen": -2.2515406608581543, "logits/rejected": -2.1430633068084717, "logps/chosen": -133.08404541015625, "logps/rejected": -206.05795288085938, "loss": 0.1229, "rewards/accuracies": 0.75, "rewards/chosen": -3.842498302459717, "rewards/margins": 7.048697471618652, "rewards/rejected": -10.891196250915527, "step": 2318 }, { "epoch": 3.99, "learning_rate": 3.0917977050573736e-07, "logits/chosen": -2.035714864730835, "logits/rejected": -2.294421434402466, "logps/chosen": -130.65431213378906, "logps/rejected": -203.88446044921875, "loss": 0.1532, "rewards/accuracies": 1.0, "rewards/chosen": -5.614397048950195, "rewards/margins": 6.644561290740967, "rewards/rejected": -12.25895881652832, "step": 2319 }, { "epoch": 3.99, "learning_rate": 3.090735231619209e-07, "logits/chosen": -1.9814878702163696, "logits/rejected": -2.2818713188171387, "logps/chosen": -130.1280059814453, "logps/rejected": -229.38455200195312, "loss": 0.1324, "rewards/accuracies": 1.0, "rewards/chosen": -5.147054195404053, "rewards/margins": 7.449304103851318, "rewards/rejected": -12.596358299255371, "step": 2320 }, { "epoch": 3.99, "learning_rate": 3.0896727581810456e-07, "logits/chosen": -2.2854349613189697, "logits/rejected": -2.2925918102264404, "logps/chosen": -137.22314453125, "logps/rejected": -230.28836059570312, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -4.830197334289551, "rewards/margins": 9.1104154586792, "rewards/rejected": -13.94061279296875, "step": 2321 }, { "epoch": 4.0, "learning_rate": 3.0886102847428816e-07, "logits/chosen": -2.0309600830078125, "logits/rejected": -2.13564395904541, "logps/chosen": -112.9156265258789, "logps/rejected": -202.51605224609375, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -2.681098461151123, "rewards/margins": 8.827988624572754, "rewards/rejected": -11.509087562561035, "step": 2322 }, { "epoch": 4.0, "learning_rate": 3.087547811304717e-07, "logits/chosen": -2.2061686515808105, "logits/rejected": -1.7514634132385254, "logps/chosen": -130.88470458984375, "logps/rejected": -210.42124938964844, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -4.9389119148254395, "rewards/margins": 7.94821834564209, "rewards/rejected": -12.887130737304688, "step": 2323 }, { "epoch": 4.0, "learning_rate": 3.0864853378665535e-07, "logits/chosen": -2.1105237007141113, "logits/rejected": -2.264573097229004, "logps/chosen": -118.86567687988281, "logps/rejected": -209.74114990234375, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/chosen": -3.9326555728912354, "rewards/margins": 9.178487777709961, "rewards/rejected": -13.111144065856934, "step": 2324 }, { "epoch": 4.0, "learning_rate": 3.085422864428389e-07, "logits/chosen": -2.176640510559082, "logits/rejected": -2.3685359954833984, "logps/chosen": -114.37723541259766, "logps/rejected": -201.7198944091797, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.359264850616455, "rewards/margins": 8.849662780761719, "rewards/rejected": -11.208928108215332, "step": 2325 }, { "epoch": 4.0, "learning_rate": 3.084360390990225e-07, "logits/chosen": -2.1467676162719727, "logits/rejected": -2.210082769393921, "logps/chosen": -162.7275390625, "logps/rejected": -243.28518676757812, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -7.604156494140625, "rewards/margins": 7.908136367797852, "rewards/rejected": -15.512292861938477, "step": 2326 }, { "epoch": 4.01, "learning_rate": 3.0832979175520615e-07, "logits/chosen": -2.1134657859802246, "logits/rejected": -2.0445568561553955, "logps/chosen": -127.40421295166016, "logps/rejected": -242.8414306640625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.780463695526123, "rewards/margins": 10.865427017211914, "rewards/rejected": -16.645891189575195, "step": 2327 }, { "epoch": 4.01, "learning_rate": 3.082235444113897e-07, "logits/chosen": -2.194772243499756, "logits/rejected": -2.1211938858032227, "logps/chosen": -151.37387084960938, "logps/rejected": -246.14324951171875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -6.7804155349731445, "rewards/margins": 9.362648010253906, "rewards/rejected": -16.143062591552734, "step": 2328 }, { "epoch": 4.01, "learning_rate": 3.081172970675733e-07, "logits/chosen": -2.1364004611968994, "logits/rejected": -2.297687292098999, "logps/chosen": -134.01138305664062, "logps/rejected": -236.33743286132812, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.601160526275635, "rewards/margins": 9.718443870544434, "rewards/rejected": -14.319604873657227, "step": 2329 }, { "epoch": 4.01, "learning_rate": 3.080110497237569e-07, "logits/chosen": -1.8506019115447998, "logits/rejected": -2.148043632507324, "logps/chosen": -133.56982421875, "logps/rejected": -251.79043579101562, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.706721782684326, "rewards/margins": 11.10433578491211, "rewards/rejected": -16.81105613708496, "step": 2330 }, { "epoch": 4.01, "learning_rate": 3.079048023799405e-07, "logits/chosen": -2.1007165908813477, "logits/rejected": -2.3693695068359375, "logps/chosen": -75.61033630371094, "logps/rejected": -249.4969024658203, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.3102645874023438, "rewards/margins": 16.556379318237305, "rewards/rejected": -17.86664390563965, "step": 2331 }, { "epoch": 4.01, "learning_rate": 3.0779855503612404e-07, "logits/chosen": -2.1472291946411133, "logits/rejected": -2.369823455810547, "logps/chosen": -126.7929916381836, "logps/rejected": -235.3115234375, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -4.428932189941406, "rewards/margins": 8.760780334472656, "rewards/rejected": -13.189712524414062, "step": 2332 }, { "epoch": 4.02, "learning_rate": 3.076923076923077e-07, "logits/chosen": -2.3248255252838135, "logits/rejected": -2.1454100608825684, "logps/chosen": -131.44476318359375, "logps/rejected": -219.88768005371094, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -3.8242244720458984, "rewards/margins": 9.60673713684082, "rewards/rejected": -13.430961608886719, "step": 2333 }, { "epoch": 4.02, "learning_rate": 3.075860603484913e-07, "logits/chosen": -2.2659060955047607, "logits/rejected": -2.0450446605682373, "logps/chosen": -123.82843017578125, "logps/rejected": -236.2897186279297, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.02729606628418, "rewards/margins": 11.365199089050293, "rewards/rejected": -15.392494201660156, "step": 2334 }, { "epoch": 4.02, "learning_rate": 3.0747981300467484e-07, "logits/chosen": -1.845292568206787, "logits/rejected": -2.368098735809326, "logps/chosen": -116.29548645019531, "logps/rejected": -206.3726806640625, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -5.556488990783691, "rewards/margins": 7.275601387023926, "rewards/rejected": -12.832090377807617, "step": 2335 }, { "epoch": 4.02, "learning_rate": 3.073735656608585e-07, "logits/chosen": -2.022582530975342, "logits/rejected": -2.00362229347229, "logps/chosen": -142.45504760742188, "logps/rejected": -222.94107055664062, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.473020553588867, "rewards/margins": 10.390449523925781, "rewards/rejected": -14.863471031188965, "step": 2336 }, { "epoch": 4.02, "learning_rate": 3.0726731831704203e-07, "logits/chosen": -2.1459643840789795, "logits/rejected": -2.1256189346313477, "logps/chosen": -129.24046325683594, "logps/rejected": -223.24392700195312, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -6.187946796417236, "rewards/margins": 9.201876640319824, "rewards/rejected": -15.389823913574219, "step": 2337 }, { "epoch": 4.02, "learning_rate": 3.0716107097322563e-07, "logits/chosen": -2.3500044345855713, "logits/rejected": -1.9210689067840576, "logps/chosen": -101.60823059082031, "logps/rejected": -204.88868713378906, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -1.269189476966858, "rewards/margins": 10.640174865722656, "rewards/rejected": -11.9093656539917, "step": 2338 }, { "epoch": 4.03, "learning_rate": 3.070548236294093e-07, "logits/chosen": -1.9456830024719238, "logits/rejected": -2.237492084503174, "logps/chosen": -104.14344024658203, "logps/rejected": -190.87640380859375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -3.473921298980713, "rewards/margins": 8.449573516845703, "rewards/rejected": -11.923495292663574, "step": 2339 }, { "epoch": 4.03, "learning_rate": 3.0694857628559283e-07, "logits/chosen": -2.3303589820861816, "logits/rejected": -1.858647346496582, "logps/chosen": -124.15840148925781, "logps/rejected": -238.56854248046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.561771869659424, "rewards/margins": 11.893426895141602, "rewards/rejected": -15.455198287963867, "step": 2340 }, { "epoch": 4.03, "learning_rate": 3.0684232894177643e-07, "logits/chosen": -1.800940752029419, "logits/rejected": -2.037519693374634, "logps/chosen": -125.60099792480469, "logps/rejected": -226.81256103515625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.531818866729736, "rewards/margins": 9.347344398498535, "rewards/rejected": -13.87916374206543, "step": 2341 }, { "epoch": 4.03, "learning_rate": 3.0673608159796003e-07, "logits/chosen": -2.0627386569976807, "logits/rejected": -2.0517783164978027, "logps/chosen": -140.06082153320312, "logps/rejected": -255.197021484375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -5.403217792510986, "rewards/margins": 11.032493591308594, "rewards/rejected": -16.435710906982422, "step": 2342 }, { "epoch": 4.03, "learning_rate": 3.0662983425414363e-07, "logits/chosen": -2.0850815773010254, "logits/rejected": -2.2148563861846924, "logps/chosen": -104.61599731445312, "logps/rejected": -189.3096160888672, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -3.5210230350494385, "rewards/margins": 7.742550373077393, "rewards/rejected": -11.26357364654541, "step": 2343 }, { "epoch": 4.03, "learning_rate": 3.065235869103272e-07, "logits/chosen": -2.0878188610076904, "logits/rejected": -2.3365793228149414, "logps/chosen": -90.40047454833984, "logps/rejected": -235.4969482421875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.680649518966675, "rewards/margins": 13.175848960876465, "rewards/rejected": -15.856497764587402, "step": 2344 }, { "epoch": 4.04, "learning_rate": 3.064173395665108e-07, "logits/chosen": -2.0945310592651367, "logits/rejected": -2.3108620643615723, "logps/chosen": -106.96769714355469, "logps/rejected": -214.79173278808594, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.5804779529571533, "rewards/margins": 10.029942512512207, "rewards/rejected": -13.610421180725098, "step": 2345 }, { "epoch": 4.04, "learning_rate": 3.063110922226944e-07, "logits/chosen": -2.1223530769348145, "logits/rejected": -2.1456902027130127, "logps/chosen": -128.52011108398438, "logps/rejected": -281.84332275390625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.085261821746826, "rewards/margins": 10.963969230651855, "rewards/rejected": -15.049230575561523, "step": 2346 }, { "epoch": 4.04, "learning_rate": 3.0620484487887797e-07, "logits/chosen": -2.076627731323242, "logits/rejected": -2.199535846710205, "logps/chosen": -140.26806640625, "logps/rejected": -245.6345672607422, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -4.85352087020874, "rewards/margins": 10.177990913391113, "rewards/rejected": -15.031511306762695, "step": 2347 }, { "epoch": 4.04, "learning_rate": 3.060985975350616e-07, "logits/chosen": -2.286050796508789, "logits/rejected": -1.596954107284546, "logps/chosen": -125.5084228515625, "logps/rejected": -179.39306640625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.163918495178223, "rewards/margins": 7.584061145782471, "rewards/rejected": -11.747979164123535, "step": 2348 }, { "epoch": 4.04, "learning_rate": 3.059923501912452e-07, "logits/chosen": -2.1439690589904785, "logits/rejected": -2.2589399814605713, "logps/chosen": -114.01573181152344, "logps/rejected": -195.5591583251953, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.1398963928222656, "rewards/margins": 8.427228927612305, "rewards/rejected": -11.56712532043457, "step": 2349 }, { "epoch": 4.04, "learning_rate": 3.0588610284742877e-07, "logits/chosen": -2.0890159606933594, "logits/rejected": -2.2379539012908936, "logps/chosen": -103.1308822631836, "logps/rejected": -224.58773803710938, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.3360183238983154, "rewards/margins": 12.042325973510742, "rewards/rejected": -15.37834358215332, "step": 2350 }, { "epoch": 4.05, "learning_rate": 3.057798555036124e-07, "logits/chosen": -1.861839771270752, "logits/rejected": -2.135225772857666, "logps/chosen": -113.47901153564453, "logps/rejected": -222.89678955078125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -3.3176071643829346, "rewards/margins": 9.909074783325195, "rewards/rejected": -13.226682662963867, "step": 2351 }, { "epoch": 4.05, "learning_rate": 3.0567360815979596e-07, "logits/chosen": -2.229111433029175, "logits/rejected": -2.295738935470581, "logps/chosen": -140.38238525390625, "logps/rejected": -233.50323486328125, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -4.562093734741211, "rewards/margins": 10.265698432922363, "rewards/rejected": -14.827792167663574, "step": 2352 }, { "epoch": 4.05, "learning_rate": 3.055673608159796e-07, "logits/chosen": -2.067211866378784, "logits/rejected": -2.1071417331695557, "logps/chosen": -125.21302795410156, "logps/rejected": -217.80088806152344, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -4.565755844116211, "rewards/margins": 8.81084156036377, "rewards/rejected": -13.376598358154297, "step": 2353 }, { "epoch": 4.05, "learning_rate": 3.054611134721632e-07, "logits/chosen": -2.1224212646484375, "logits/rejected": -2.244633674621582, "logps/chosen": -125.08142852783203, "logps/rejected": -224.38815307617188, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.576580047607422, "rewards/margins": 9.562845230102539, "rewards/rejected": -13.139425277709961, "step": 2354 }, { "epoch": 4.05, "learning_rate": 3.0535486612834676e-07, "logits/chosen": -1.994885802268982, "logits/rejected": -2.0196259021759033, "logps/chosen": -145.91603088378906, "logps/rejected": -214.3695526123047, "loss": 0.0393, "rewards/accuracies": 0.75, "rewards/chosen": -4.770391464233398, "rewards/margins": 6.945499420166016, "rewards/rejected": -11.715889930725098, "step": 2355 }, { "epoch": 4.06, "learning_rate": 3.052486187845304e-07, "logits/chosen": -2.1997382640838623, "logits/rejected": -2.0204854011535645, "logps/chosen": -142.32879638671875, "logps/rejected": -241.20420837402344, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.9723453521728516, "rewards/margins": 10.286648750305176, "rewards/rejected": -14.258993148803711, "step": 2356 }, { "epoch": 4.06, "learning_rate": 3.0514237144071396e-07, "logits/chosen": -1.8857240676879883, "logits/rejected": -2.270841598510742, "logps/chosen": -90.53703308105469, "logps/rejected": -253.21554565429688, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.9174258708953857, "rewards/margins": 13.38460636138916, "rewards/rejected": -15.302033424377441, "step": 2357 }, { "epoch": 4.06, "learning_rate": 3.0503612409689756e-07, "logits/chosen": -2.1285810470581055, "logits/rejected": -2.208791732788086, "logps/chosen": -116.80879974365234, "logps/rejected": -190.903076171875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.7274041175842285, "rewards/margins": 6.843044281005859, "rewards/rejected": -11.570448875427246, "step": 2358 }, { "epoch": 4.06, "learning_rate": 3.049298767530812e-07, "logits/chosen": -1.926150918006897, "logits/rejected": -2.2119665145874023, "logps/chosen": -104.84957885742188, "logps/rejected": -220.68544006347656, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.367884159088135, "rewards/margins": 9.880365371704102, "rewards/rejected": -15.248250007629395, "step": 2359 }, { "epoch": 4.06, "learning_rate": 3.0482362940926476e-07, "logits/chosen": -1.7034049034118652, "logits/rejected": -2.279895544052124, "logps/chosen": -117.19343566894531, "logps/rejected": -275.4193115234375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.210834503173828, "rewards/margins": 10.989659309387207, "rewards/rejected": -15.200493812561035, "step": 2360 }, { "epoch": 4.06, "learning_rate": 3.0471738206544835e-07, "logits/chosen": -2.337400197982788, "logits/rejected": -2.1865158081054688, "logps/chosen": -135.1411895751953, "logps/rejected": -261.6150207519531, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -4.5383405685424805, "rewards/margins": 11.957993507385254, "rewards/rejected": -16.496334075927734, "step": 2361 }, { "epoch": 4.07, "learning_rate": 3.0461113472163195e-07, "logits/chosen": -2.012030839920044, "logits/rejected": -2.077639102935791, "logps/chosen": -92.25218200683594, "logps/rejected": -214.05160522460938, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.9454917907714844, "rewards/margins": 12.234240531921387, "rewards/rejected": -15.179732322692871, "step": 2362 }, { "epoch": 4.07, "learning_rate": 3.0450488737781555e-07, "logits/chosen": -1.7292563915252686, "logits/rejected": -2.4136195182800293, "logps/chosen": -149.03457641601562, "logps/rejected": -251.97389221191406, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -4.9330854415893555, "rewards/margins": 8.880095481872559, "rewards/rejected": -13.81318187713623, "step": 2363 }, { "epoch": 4.07, "learning_rate": 3.043986400339991e-07, "logits/chosen": -1.808403491973877, "logits/rejected": -1.9839518070220947, "logps/chosen": -126.81831359863281, "logps/rejected": -254.26101684570312, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -3.2226414680480957, "rewards/margins": 12.093387603759766, "rewards/rejected": -15.31602954864502, "step": 2364 }, { "epoch": 4.07, "learning_rate": 3.0429239269018275e-07, "logits/chosen": -2.139369010925293, "logits/rejected": -2.0627455711364746, "logps/chosen": -125.43168640136719, "logps/rejected": -215.0802001953125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -4.28258752822876, "rewards/margins": 8.776429176330566, "rewards/rejected": -13.059017181396484, "step": 2365 }, { "epoch": 4.07, "learning_rate": 3.0418614534636635e-07, "logits/chosen": -2.1299915313720703, "logits/rejected": -1.9428396224975586, "logps/chosen": -125.84085083007812, "logps/rejected": -223.34768676757812, "loss": 0.0611, "rewards/accuracies": 1.0, "rewards/chosen": -4.691798686981201, "rewards/margins": 10.021923065185547, "rewards/rejected": -14.71372127532959, "step": 2366 }, { "epoch": 4.07, "learning_rate": 3.040798980025499e-07, "logits/chosen": -2.180082082748413, "logits/rejected": -2.0508275032043457, "logps/chosen": -128.71527099609375, "logps/rejected": -233.03956604003906, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.147552013397217, "rewards/margins": 9.892740249633789, "rewards/rejected": -15.040292739868164, "step": 2367 }, { "epoch": 4.08, "learning_rate": 3.0397365065873355e-07, "logits/chosen": -2.0970869064331055, "logits/rejected": -2.178283452987671, "logps/chosen": -152.9388885498047, "logps/rejected": -277.073974609375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -5.164791584014893, "rewards/margins": 11.913213729858398, "rewards/rejected": -17.078004837036133, "step": 2368 }, { "epoch": 4.08, "learning_rate": 3.038674033149171e-07, "logits/chosen": -1.947426438331604, "logits/rejected": -1.8426752090454102, "logps/chosen": -130.02137756347656, "logps/rejected": -183.24046325683594, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.692310810089111, "rewards/margins": 6.740206718444824, "rewards/rejected": -11.432518005371094, "step": 2369 }, { "epoch": 4.08, "learning_rate": 3.037611559711007e-07, "logits/chosen": -1.4127402305603027, "logits/rejected": -2.0738120079040527, "logps/chosen": -104.16923522949219, "logps/rejected": -226.62637329101562, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -3.6039466857910156, "rewards/margins": 10.077411651611328, "rewards/rejected": -13.68135929107666, "step": 2370 }, { "epoch": 4.08, "learning_rate": 3.0365490862728434e-07, "logits/chosen": -1.8494577407836914, "logits/rejected": -1.9236820936203003, "logps/chosen": -180.35523986816406, "logps/rejected": -294.01239013671875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -7.279383659362793, "rewards/margins": 12.488859176635742, "rewards/rejected": -19.76824378967285, "step": 2371 }, { "epoch": 4.08, "learning_rate": 3.035486612834679e-07, "logits/chosen": -1.8379814624786377, "logits/rejected": -2.0838472843170166, "logps/chosen": -122.0501708984375, "logps/rejected": -274.2073059082031, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -4.693883895874023, "rewards/margins": 12.8878173828125, "rewards/rejected": -17.581701278686523, "step": 2372 }, { "epoch": 4.08, "learning_rate": 3.034424139396515e-07, "logits/chosen": -2.0359647274017334, "logits/rejected": -1.9208264350891113, "logps/chosen": -149.7729949951172, "logps/rejected": -227.01095581054688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -7.152807235717773, "rewards/margins": 9.145346641540527, "rewards/rejected": -16.298154830932617, "step": 2373 }, { "epoch": 4.09, "learning_rate": 3.033361665958351e-07, "logits/chosen": -1.9850292205810547, "logits/rejected": -1.9495162963867188, "logps/chosen": -127.31078338623047, "logps/rejected": -197.50643920898438, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -4.70225715637207, "rewards/margins": 5.505699157714844, "rewards/rejected": -10.20795726776123, "step": 2374 }, { "epoch": 4.09, "learning_rate": 3.032299192520187e-07, "logits/chosen": -1.7648851871490479, "logits/rejected": -2.0091004371643066, "logps/chosen": -140.69676208496094, "logps/rejected": -300.14385986328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.4701762199401855, "rewards/margins": 14.317137718200684, "rewards/rejected": -19.78731346130371, "step": 2375 }, { "epoch": 4.09, "learning_rate": 3.031236719082023e-07, "logits/chosen": -2.125645399093628, "logits/rejected": -2.059807538986206, "logps/chosen": -121.03263854980469, "logps/rejected": -217.2699432373047, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -4.593633651733398, "rewards/margins": 9.74663257598877, "rewards/rejected": -14.340265274047852, "step": 2376 }, { "epoch": 4.09, "learning_rate": 3.030174245643859e-07, "logits/chosen": -2.1818959712982178, "logits/rejected": -2.0402486324310303, "logps/chosen": -139.32635498046875, "logps/rejected": -251.79635620117188, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -4.345427513122559, "rewards/margins": 12.165246963500977, "rewards/rejected": -16.51067352294922, "step": 2377 }, { "epoch": 4.09, "learning_rate": 3.029111772205695e-07, "logits/chosen": -2.08554744720459, "logits/rejected": -1.8379021883010864, "logps/chosen": -147.822998046875, "logps/rejected": -238.8435821533203, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -6.620139122009277, "rewards/margins": 9.412189483642578, "rewards/rejected": -16.03232765197754, "step": 2378 }, { "epoch": 4.09, "learning_rate": 3.0280492987675303e-07, "logits/chosen": -1.5993294715881348, "logits/rejected": -2.177914619445801, "logps/chosen": -132.67483520507812, "logps/rejected": -242.80596923828125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.705228805541992, "rewards/margins": 9.766008377075195, "rewards/rejected": -14.471237182617188, "step": 2379 }, { "epoch": 4.1, "learning_rate": 3.026986825329367e-07, "logits/chosen": -2.1123690605163574, "logits/rejected": -2.1392993927001953, "logps/chosen": -179.108154296875, "logps/rejected": -298.39849853515625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.887455940246582, "rewards/margins": 11.095436096191406, "rewards/rejected": -18.982891082763672, "step": 2380 }, { "epoch": 4.1, "learning_rate": 3.025924351891203e-07, "logits/chosen": -1.5488131046295166, "logits/rejected": -2.082909107208252, "logps/chosen": -148.73037719726562, "logps/rejected": -268.737548828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -5.586255073547363, "rewards/margins": 10.844643592834473, "rewards/rejected": -16.430898666381836, "step": 2381 }, { "epoch": 4.1, "learning_rate": 3.024861878453038e-07, "logits/chosen": -2.061842918395996, "logits/rejected": -2.2433838844299316, "logps/chosen": -131.17295837402344, "logps/rejected": -273.48004150390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.255974292755127, "rewards/margins": 13.597342491149902, "rewards/rejected": -17.853317260742188, "step": 2382 }, { "epoch": 4.1, "learning_rate": 3.023799405014875e-07, "logits/chosen": -1.9004499912261963, "logits/rejected": -1.8546819686889648, "logps/chosen": -137.52243041992188, "logps/rejected": -238.2533416748047, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.358266830444336, "rewards/margins": 10.099949836730957, "rewards/rejected": -16.458215713500977, "step": 2383 }, { "epoch": 4.1, "learning_rate": 3.02273693157671e-07, "logits/chosen": -2.034614086151123, "logits/rejected": -1.6945421695709229, "logps/chosen": -126.86998748779297, "logps/rejected": -247.06031799316406, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.333225727081299, "rewards/margins": 12.881340026855469, "rewards/rejected": -17.214567184448242, "step": 2384 }, { "epoch": 4.1, "learning_rate": 3.021674458138546e-07, "logits/chosen": -1.8280718326568604, "logits/rejected": -1.9799089431762695, "logps/chosen": -126.65219116210938, "logps/rejected": -216.84361267089844, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -5.3615899085998535, "rewards/margins": 8.326671600341797, "rewards/rejected": -13.688261032104492, "step": 2385 }, { "epoch": 4.11, "learning_rate": 3.020611984700383e-07, "logits/chosen": -1.6837594509124756, "logits/rejected": -2.1458816528320312, "logps/chosen": -111.19351196289062, "logps/rejected": -257.67230224609375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.2289228439331055, "rewards/margins": 12.83556079864502, "rewards/rejected": -17.064483642578125, "step": 2386 }, { "epoch": 4.11, "learning_rate": 3.019549511262218e-07, "logits/chosen": -1.9212737083435059, "logits/rejected": -1.5908160209655762, "logps/chosen": -105.46227264404297, "logps/rejected": -226.83006286621094, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -3.1257166862487793, "rewards/margins": 12.257390975952148, "rewards/rejected": -15.383108139038086, "step": 2387 }, { "epoch": 4.11, "learning_rate": 3.018487037824054e-07, "logits/chosen": -2.080636501312256, "logits/rejected": -2.235555648803711, "logps/chosen": -154.6305389404297, "logps/rejected": -261.88629150390625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.00788688659668, "rewards/margins": 11.955028533935547, "rewards/rejected": -16.96291732788086, "step": 2388 }, { "epoch": 4.11, "learning_rate": 3.01742456438589e-07, "logits/chosen": -2.0281825065612793, "logits/rejected": -1.9653706550598145, "logps/chosen": -121.52749633789062, "logps/rejected": -229.2446746826172, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -3.259490489959717, "rewards/margins": 11.368937492370605, "rewards/rejected": -14.62842845916748, "step": 2389 }, { "epoch": 4.11, "learning_rate": 3.016362090947726e-07, "logits/chosen": -2.004758358001709, "logits/rejected": -2.086676597595215, "logps/chosen": -105.65864562988281, "logps/rejected": -194.75509643554688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.047977447509766, "rewards/margins": 8.72041130065918, "rewards/rejected": -12.768388748168945, "step": 2390 }, { "epoch": 4.12, "learning_rate": 3.0152996175095616e-07, "logits/chosen": -1.972715139389038, "logits/rejected": -2.333249807357788, "logps/chosen": -156.64527893066406, "logps/rejected": -228.994873046875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -6.093503952026367, "rewards/margins": 8.34372329711914, "rewards/rejected": -14.437226295471191, "step": 2391 }, { "epoch": 4.12, "learning_rate": 3.014237144071398e-07, "logits/chosen": -2.0670831203460693, "logits/rejected": -2.185659885406494, "logps/chosen": -125.19426727294922, "logps/rejected": -208.57281494140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.880331516265869, "rewards/margins": 8.083332061767578, "rewards/rejected": -11.963663101196289, "step": 2392 }, { "epoch": 4.12, "learning_rate": 3.013174670633234e-07, "logits/chosen": -1.6381075382232666, "logits/rejected": -2.1216137409210205, "logps/chosen": -160.28590393066406, "logps/rejected": -273.4568176269531, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.978769302368164, "rewards/margins": 10.538864135742188, "rewards/rejected": -18.517635345458984, "step": 2393 }, { "epoch": 4.12, "learning_rate": 3.01211219719507e-07, "logits/chosen": -1.807998776435852, "logits/rejected": -2.0013909339904785, "logps/chosen": -123.41244506835938, "logps/rejected": -230.59677124023438, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -5.254967212677002, "rewards/margins": 9.088767051696777, "rewards/rejected": -14.343733787536621, "step": 2394 }, { "epoch": 4.12, "learning_rate": 3.011049723756906e-07, "logits/chosen": -1.62554931640625, "logits/rejected": -2.140718460083008, "logps/chosen": -163.68331909179688, "logps/rejected": -275.7508544921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -8.112081527709961, "rewards/margins": 9.035816192626953, "rewards/rejected": -17.147897720336914, "step": 2395 }, { "epoch": 4.12, "learning_rate": 3.0099872503187416e-07, "logits/chosen": -1.9779837131500244, "logits/rejected": -1.949305534362793, "logps/chosen": -128.44668579101562, "logps/rejected": -232.68756103515625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -5.187570095062256, "rewards/margins": 9.25540542602539, "rewards/rejected": -14.442975044250488, "step": 2396 }, { "epoch": 4.13, "learning_rate": 3.008924776880578e-07, "logits/chosen": -1.8342407941818237, "logits/rejected": -1.942123532295227, "logps/chosen": -170.77108764648438, "logps/rejected": -276.07244873046875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -8.212907791137695, "rewards/margins": 9.67033576965332, "rewards/rejected": -17.88324546813965, "step": 2397 }, { "epoch": 4.13, "learning_rate": 3.007862303442414e-07, "logits/chosen": -2.1455020904541016, "logits/rejected": -2.134981155395508, "logps/chosen": -136.89462280273438, "logps/rejected": -310.0031433105469, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.3470072746276855, "rewards/margins": 15.835649490356445, "rewards/rejected": -22.18265724182129, "step": 2398 }, { "epoch": 4.13, "learning_rate": 3.0067998300042495e-07, "logits/chosen": -2.010591506958008, "logits/rejected": -2.0350468158721924, "logps/chosen": -127.232421875, "logps/rejected": -267.0736389160156, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -3.566321849822998, "rewards/margins": 15.211222648620605, "rewards/rejected": -18.777544021606445, "step": 2399 }, { "epoch": 4.13, "learning_rate": 3.005737356566086e-07, "logits/chosen": -1.492569923400879, "logits/rejected": -2.0475246906280518, "logps/chosen": -106.24691009521484, "logps/rejected": -240.29067993164062, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -4.421181678771973, "rewards/margins": 10.479742050170898, "rewards/rejected": -14.900924682617188, "step": 2400 }, { "epoch": 4.13, "learning_rate": 3.0046748831279215e-07, "logits/chosen": -2.1416006088256836, "logits/rejected": -1.8381946086883545, "logps/chosen": -146.708740234375, "logps/rejected": -222.35775756835938, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -4.921308517456055, "rewards/margins": 8.107111930847168, "rewards/rejected": -13.028421401977539, "step": 2401 }, { "epoch": 4.13, "learning_rate": 3.0036124096897575e-07, "logits/chosen": -1.731548547744751, "logits/rejected": -1.8970155715942383, "logps/chosen": -119.67652893066406, "logps/rejected": -264.7410583496094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.619497299194336, "rewards/margins": 14.101541519165039, "rewards/rejected": -18.721038818359375, "step": 2402 }, { "epoch": 4.14, "learning_rate": 3.002549936251594e-07, "logits/chosen": -2.1103355884552, "logits/rejected": -1.9507403373718262, "logps/chosen": -152.3909912109375, "logps/rejected": -275.2162780761719, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -5.817244529724121, "rewards/margins": 12.002885818481445, "rewards/rejected": -17.82012939453125, "step": 2403 }, { "epoch": 4.14, "learning_rate": 3.0014874628134295e-07, "logits/chosen": -1.591400384902954, "logits/rejected": -2.166994333267212, "logps/chosen": -126.7528305053711, "logps/rejected": -239.02621459960938, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.046938896179199, "rewards/margins": 9.224387168884277, "rewards/rejected": -15.271326065063477, "step": 2404 }, { "epoch": 4.14, "learning_rate": 3.0004249893752655e-07, "logits/chosen": -2.0580708980560303, "logits/rejected": -1.9962774515151978, "logps/chosen": -147.73350524902344, "logps/rejected": -271.26971435546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.870586395263672, "rewards/margins": 10.911356925964355, "rewards/rejected": -16.781944274902344, "step": 2405 }, { "epoch": 4.14, "learning_rate": 2.9993625159371015e-07, "logits/chosen": -1.8762750625610352, "logits/rejected": -2.4070048332214355, "logps/chosen": -130.0784912109375, "logps/rejected": -238.93975830078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.475822925567627, "rewards/margins": 9.146127700805664, "rewards/rejected": -13.62195110321045, "step": 2406 }, { "epoch": 4.14, "learning_rate": 2.9983000424989374e-07, "logits/chosen": -2.0040736198425293, "logits/rejected": -1.9662425518035889, "logps/chosen": -122.17878723144531, "logps/rejected": -221.4340057373047, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.277311325073242, "rewards/margins": 9.487115859985352, "rewards/rejected": -13.76442813873291, "step": 2407 }, { "epoch": 4.14, "learning_rate": 2.9972375690607734e-07, "logits/chosen": -1.938323974609375, "logits/rejected": -1.3969701528549194, "logps/chosen": -155.89324951171875, "logps/rejected": -257.9056396484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.924201965332031, "rewards/margins": 11.946853637695312, "rewards/rejected": -16.871055603027344, "step": 2408 }, { "epoch": 4.15, "learning_rate": 2.9961750956226094e-07, "logits/chosen": -2.1159605979919434, "logits/rejected": -1.9940340518951416, "logps/chosen": -125.16637420654297, "logps/rejected": -223.96511840820312, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -5.096870422363281, "rewards/margins": 9.993072509765625, "rewards/rejected": -15.089942932128906, "step": 2409 }, { "epoch": 4.15, "learning_rate": 2.9951126221844454e-07, "logits/chosen": -1.952324390411377, "logits/rejected": -1.9621362686157227, "logps/chosen": -141.66299438476562, "logps/rejected": -264.99725341796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.860304832458496, "rewards/margins": 12.24153995513916, "rewards/rejected": -17.101844787597656, "step": 2410 }, { "epoch": 4.15, "learning_rate": 2.994050148746281e-07, "logits/chosen": -2.182542324066162, "logits/rejected": -1.8578691482543945, "logps/chosen": -132.36581420898438, "logps/rejected": -232.08953857421875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -5.1316118240356445, "rewards/margins": 10.042993545532227, "rewards/rejected": -15.174604415893555, "step": 2411 }, { "epoch": 4.15, "learning_rate": 2.9929876753081174e-07, "logits/chosen": -2.0263075828552246, "logits/rejected": -1.8164448738098145, "logps/chosen": -147.90667724609375, "logps/rejected": -215.6170196533203, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -6.23654317855835, "rewards/margins": 8.476009368896484, "rewards/rejected": -14.712552070617676, "step": 2412 }, { "epoch": 4.15, "learning_rate": 2.9919252018699534e-07, "logits/chosen": -1.9600415229797363, "logits/rejected": -1.8393831253051758, "logps/chosen": -110.72623443603516, "logps/rejected": -184.8036651611328, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.801732063293457, "rewards/margins": 8.910333633422852, "rewards/rejected": -11.712066650390625, "step": 2413 }, { "epoch": 4.15, "learning_rate": 2.990862728431789e-07, "logits/chosen": -2.2207751274108887, "logits/rejected": -1.311215877532959, "logps/chosen": -143.31509399414062, "logps/rejected": -209.837158203125, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -4.48710823059082, "rewards/margins": 10.757552146911621, "rewards/rejected": -15.244660377502441, "step": 2414 }, { "epoch": 4.16, "learning_rate": 2.9898002549936254e-07, "logits/chosen": -1.9952995777130127, "logits/rejected": -2.000370740890503, "logps/chosen": -139.01123046875, "logps/rejected": -276.76165771484375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.254604339599609, "rewards/margins": 12.788447380065918, "rewards/rejected": -18.043052673339844, "step": 2415 }, { "epoch": 4.16, "learning_rate": 2.988737781555461e-07, "logits/chosen": -2.22965145111084, "logits/rejected": -2.211958885192871, "logps/chosen": -109.49607849121094, "logps/rejected": -240.41925048828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.1928622722625732, "rewards/margins": 11.716349601745605, "rewards/rejected": -14.909212112426758, "step": 2416 }, { "epoch": 4.16, "learning_rate": 2.987675308117297e-07, "logits/chosen": -2.038306474685669, "logits/rejected": -1.796851634979248, "logps/chosen": -155.7628631591797, "logps/rejected": -229.2422637939453, "loss": 0.0265, "rewards/accuracies": 0.75, "rewards/chosen": -6.391966819763184, "rewards/margins": 8.462270736694336, "rewards/rejected": -14.85423755645752, "step": 2417 }, { "epoch": 4.16, "learning_rate": 2.986612834679133e-07, "logits/chosen": -2.096341848373413, "logits/rejected": -1.8887277841567993, "logps/chosen": -111.69659423828125, "logps/rejected": -219.49441528320312, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.574954032897949, "rewards/margins": 11.472334861755371, "rewards/rejected": -16.04728889465332, "step": 2418 }, { "epoch": 4.16, "learning_rate": 2.985550361240969e-07, "logits/chosen": -2.1156628131866455, "logits/rejected": -1.6522036790847778, "logps/chosen": -124.04702758789062, "logps/rejected": -197.10130310058594, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.287759304046631, "rewards/margins": 8.329410552978516, "rewards/rejected": -12.617170333862305, "step": 2419 }, { "epoch": 4.17, "learning_rate": 2.984487887802805e-07, "logits/chosen": -2.036034107208252, "logits/rejected": -2.0239098072052, "logps/chosen": -143.39007568359375, "logps/rejected": -249.12338256835938, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.151936054229736, "rewards/margins": 10.843435287475586, "rewards/rejected": -15.99537181854248, "step": 2420 }, { "epoch": 4.17, "learning_rate": 2.983425414364641e-07, "logits/chosen": -2.211352586746216, "logits/rejected": -2.1101646423339844, "logps/chosen": -124.060791015625, "logps/rejected": -214.546142578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.077012538909912, "rewards/margins": 9.169888496398926, "rewards/rejected": -13.24690055847168, "step": 2421 }, { "epoch": 4.17, "learning_rate": 2.982362940926477e-07, "logits/chosen": -2.1025195121765137, "logits/rejected": -2.3192501068115234, "logps/chosen": -158.3645477294922, "logps/rejected": -263.0553894042969, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -8.193272590637207, "rewards/margins": 8.796321868896484, "rewards/rejected": -16.989593505859375, "step": 2422 }, { "epoch": 4.17, "learning_rate": 2.981300467488312e-07, "logits/chosen": -1.8435184955596924, "logits/rejected": -1.9206829071044922, "logps/chosen": -138.35911560058594, "logps/rejected": -273.78021240234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.677886009216309, "rewards/margins": 14.102886199951172, "rewards/rejected": -18.780771255493164, "step": 2423 }, { "epoch": 4.17, "learning_rate": 2.9802379940501487e-07, "logits/chosen": -2.2303197383880615, "logits/rejected": -2.083484172821045, "logps/chosen": -124.088623046875, "logps/rejected": -257.9029541015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.646018028259277, "rewards/margins": 12.091695785522461, "rewards/rejected": -17.737714767456055, "step": 2424 }, { "epoch": 4.17, "learning_rate": 2.9791755206119847e-07, "logits/chosen": -2.050875663757324, "logits/rejected": -2.1846158504486084, "logps/chosen": -152.11756896972656, "logps/rejected": -262.2452392578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.20750617980957, "rewards/margins": 11.149643898010254, "rewards/rejected": -15.357149124145508, "step": 2425 }, { "epoch": 4.18, "learning_rate": 2.97811304717382e-07, "logits/chosen": -1.942924976348877, "logits/rejected": -2.013791084289551, "logps/chosen": -123.74456787109375, "logps/rejected": -172.18081665039062, "loss": 0.0376, "rewards/accuracies": 0.75, "rewards/chosen": -5.568423748016357, "rewards/margins": 5.274342060089111, "rewards/rejected": -10.842766761779785, "step": 2426 }, { "epoch": 4.18, "learning_rate": 2.9770505737356567e-07, "logits/chosen": -2.1797382831573486, "logits/rejected": -2.105534315109253, "logps/chosen": -155.7509002685547, "logps/rejected": -263.71063232421875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.572376251220703, "rewards/margins": 11.44929313659668, "rewards/rejected": -16.021671295166016, "step": 2427 }, { "epoch": 4.18, "learning_rate": 2.975988100297492e-07, "logits/chosen": -1.943058729171753, "logits/rejected": -2.1164259910583496, "logps/chosen": -110.86456298828125, "logps/rejected": -228.31884765625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.8371424674987793, "rewards/margins": 10.062192916870117, "rewards/rejected": -13.899334907531738, "step": 2428 }, { "epoch": 4.18, "learning_rate": 2.974925626859328e-07, "logits/chosen": -1.7472445964813232, "logits/rejected": -2.065213441848755, "logps/chosen": -99.2537612915039, "logps/rejected": -259.01715087890625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.487635374069214, "rewards/margins": 13.304327011108398, "rewards/rejected": -15.791961669921875, "step": 2429 }, { "epoch": 4.18, "learning_rate": 2.9738631534211647e-07, "logits/chosen": -1.7187601327896118, "logits/rejected": -2.2374863624572754, "logps/chosen": -129.5856170654297, "logps/rejected": -260.27392578125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -4.542540073394775, "rewards/margins": 12.540708541870117, "rewards/rejected": -17.083248138427734, "step": 2430 }, { "epoch": 4.18, "learning_rate": 2.972800679983e-07, "logits/chosen": -1.9431500434875488, "logits/rejected": -2.1229617595672607, "logps/chosen": -96.08805084228516, "logps/rejected": -230.67800903320312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.8789291381835938, "rewards/margins": 11.907953262329102, "rewards/rejected": -14.786882400512695, "step": 2431 }, { "epoch": 4.19, "learning_rate": 2.971738206544836e-07, "logits/chosen": -1.6297826766967773, "logits/rejected": -1.9654062986373901, "logps/chosen": -118.25555419921875, "logps/rejected": -222.13816833496094, "loss": 0.0501, "rewards/accuracies": 1.0, "rewards/chosen": -5.8844170570373535, "rewards/margins": 9.76742172241211, "rewards/rejected": -15.651838302612305, "step": 2432 }, { "epoch": 4.19, "learning_rate": 2.970675733106672e-07, "logits/chosen": -2.076650381088257, "logits/rejected": -2.1841654777526855, "logps/chosen": -141.13629150390625, "logps/rejected": -234.07896423339844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.09912109375, "rewards/margins": 8.919624328613281, "rewards/rejected": -15.018745422363281, "step": 2433 }, { "epoch": 4.19, "learning_rate": 2.969613259668508e-07, "logits/chosen": -1.9579616785049438, "logits/rejected": -2.192507743835449, "logps/chosen": -163.63963317871094, "logps/rejected": -280.99517822265625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -6.5557169914245605, "rewards/margins": 11.906915664672852, "rewards/rejected": -18.462631225585938, "step": 2434 }, { "epoch": 4.19, "learning_rate": 2.9685507862303446e-07, "logits/chosen": -2.0856246948242188, "logits/rejected": -2.161933422088623, "logps/chosen": -123.48033905029297, "logps/rejected": -251.5290985107422, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.313458204269409, "rewards/margins": 12.381575584411621, "rewards/rejected": -15.695033073425293, "step": 2435 }, { "epoch": 4.19, "learning_rate": 2.96748831279218e-07, "logits/chosen": -1.7243456840515137, "logits/rejected": -2.265285015106201, "logps/chosen": -87.14257049560547, "logps/rejected": -269.9751892089844, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.497685670852661, "rewards/margins": 15.466911315917969, "rewards/rejected": -17.964597702026367, "step": 2436 }, { "epoch": 4.19, "learning_rate": 2.966425839354016e-07, "logits/chosen": -1.7428624629974365, "logits/rejected": -2.082719326019287, "logps/chosen": -159.15048217773438, "logps/rejected": -286.4778137207031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.105125904083252, "rewards/margins": 10.444188117980957, "rewards/rejected": -17.549314498901367, "step": 2437 }, { "epoch": 4.2, "learning_rate": 2.965363365915852e-07, "logits/chosen": -2.014704465866089, "logits/rejected": -2.0870108604431152, "logps/chosen": -154.52243041992188, "logps/rejected": -297.5589599609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.709403038024902, "rewards/margins": 12.817350387573242, "rewards/rejected": -18.526752471923828, "step": 2438 }, { "epoch": 4.2, "learning_rate": 2.964300892477688e-07, "logits/chosen": -2.0474679470062256, "logits/rejected": -1.9040740728378296, "logps/chosen": -146.68389892578125, "logps/rejected": -224.98358154296875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -5.478584289550781, "rewards/margins": 8.717108726501465, "rewards/rejected": -14.195693969726562, "step": 2439 }, { "epoch": 4.2, "learning_rate": 2.963238419039524e-07, "logits/chosen": -1.8768947124481201, "logits/rejected": -2.0139312744140625, "logps/chosen": -105.5257797241211, "logps/rejected": -234.145263671875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.836958646774292, "rewards/margins": 13.027902603149414, "rewards/rejected": -14.864860534667969, "step": 2440 }, { "epoch": 4.2, "learning_rate": 2.96217594560136e-07, "logits/chosen": -2.1900057792663574, "logits/rejected": -1.8956294059753418, "logps/chosen": -143.0529022216797, "logps/rejected": -244.6771240234375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.350706100463867, "rewards/margins": 9.604987144470215, "rewards/rejected": -14.955692291259766, "step": 2441 }, { "epoch": 4.2, "learning_rate": 2.961113472163196e-07, "logits/chosen": -1.6391947269439697, "logits/rejected": -2.1661078929901123, "logps/chosen": -102.10121154785156, "logps/rejected": -264.58355712890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.356837034225464, "rewards/margins": 14.532665252685547, "rewards/rejected": -17.889503479003906, "step": 2442 }, { "epoch": 4.2, "learning_rate": 2.9600509987250315e-07, "logits/chosen": -1.9908798933029175, "logits/rejected": -2.234132766723633, "logps/chosen": -128.28794860839844, "logps/rejected": -250.9603271484375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.7919921875, "rewards/margins": 12.344923973083496, "rewards/rejected": -17.13691520690918, "step": 2443 }, { "epoch": 4.21, "learning_rate": 2.958988525286868e-07, "logits/chosen": -2.095026969909668, "logits/rejected": -1.766771912574768, "logps/chosen": -142.54788208007812, "logps/rejected": -224.9415283203125, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -5.201080322265625, "rewards/margins": 8.505066871643066, "rewards/rejected": -13.706147193908691, "step": 2444 }, { "epoch": 4.21, "learning_rate": 2.9579260518487034e-07, "logits/chosen": -1.747914433479309, "logits/rejected": -2.129124164581299, "logps/chosen": -133.63046264648438, "logps/rejected": -236.6167755126953, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -5.10146951675415, "rewards/margins": 10.84807014465332, "rewards/rejected": -15.949538230895996, "step": 2445 }, { "epoch": 4.21, "learning_rate": 2.9568635784105394e-07, "logits/chosen": -2.0241551399230957, "logits/rejected": -2.0607781410217285, "logps/chosen": -227.71136474609375, "logps/rejected": -287.188232421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -9.432229995727539, "rewards/margins": 9.21602725982666, "rewards/rejected": -18.648258209228516, "step": 2446 }, { "epoch": 4.21, "learning_rate": 2.955801104972376e-07, "logits/chosen": -1.9334766864776611, "logits/rejected": -2.0195910930633545, "logps/chosen": -110.87613677978516, "logps/rejected": -244.88067626953125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.6086716651916504, "rewards/margins": 13.194427490234375, "rewards/rejected": -16.8031005859375, "step": 2447 }, { "epoch": 4.21, "learning_rate": 2.9547386315342114e-07, "logits/chosen": -1.9611345529556274, "logits/rejected": -1.8869202136993408, "logps/chosen": -110.39610290527344, "logps/rejected": -239.96359252929688, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.5463638305664062, "rewards/margins": 12.17172622680664, "rewards/rejected": -15.718090057373047, "step": 2448 }, { "epoch": 4.22, "learning_rate": 2.9536761580960474e-07, "logits/chosen": -1.9975433349609375, "logits/rejected": -1.8480631113052368, "logps/chosen": -154.24697875976562, "logps/rejected": -289.1301574707031, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.804906845092773, "rewards/margins": 13.174722671508789, "rewards/rejected": -18.979631423950195, "step": 2449 }, { "epoch": 4.22, "learning_rate": 2.9526136846578834e-07, "logits/chosen": -2.1096174716949463, "logits/rejected": -2.2624316215515137, "logps/chosen": -138.68084716796875, "logps/rejected": -242.8457489013672, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.0279693603515625, "rewards/margins": 10.029016494750977, "rewards/rejected": -15.056985855102539, "step": 2450 }, { "epoch": 4.22, "learning_rate": 2.9515512112197194e-07, "logits/chosen": -1.9560980796813965, "logits/rejected": -1.959580659866333, "logps/chosen": -139.43873596191406, "logps/rejected": -228.34796142578125, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -5.933353424072266, "rewards/margins": 8.826215744018555, "rewards/rejected": -14.75956916809082, "step": 2451 }, { "epoch": 4.22, "learning_rate": 2.9504887377815554e-07, "logits/chosen": -2.0110559463500977, "logits/rejected": -1.8279767036437988, "logps/chosen": -146.20091247558594, "logps/rejected": -253.24417114257812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.175497531890869, "rewards/margins": 12.460561752319336, "rewards/rejected": -17.63606071472168, "step": 2452 }, { "epoch": 4.22, "learning_rate": 2.9494262643433914e-07, "logits/chosen": -2.0677762031555176, "logits/rejected": -1.9636142253875732, "logps/chosen": -131.4776611328125, "logps/rejected": -218.55377197265625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -4.074586868286133, "rewards/margins": 10.336723327636719, "rewards/rejected": -14.411310195922852, "step": 2453 }, { "epoch": 4.22, "learning_rate": 2.9483637909052273e-07, "logits/chosen": -2.0239129066467285, "logits/rejected": -2.022663116455078, "logps/chosen": -146.98468017578125, "logps/rejected": -254.6491241455078, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -6.918841361999512, "rewards/margins": 11.406576156616211, "rewards/rejected": -18.32541847229004, "step": 2454 }, { "epoch": 4.23, "learning_rate": 2.947301317467063e-07, "logits/chosen": -2.063056468963623, "logits/rejected": -1.9080681800842285, "logps/chosen": -125.19107055664062, "logps/rejected": -222.16021728515625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -5.800730228424072, "rewards/margins": 9.936582565307617, "rewards/rejected": -15.737313270568848, "step": 2455 }, { "epoch": 4.23, "learning_rate": 2.9462388440288993e-07, "logits/chosen": -1.9208511114120483, "logits/rejected": -2.02933406829834, "logps/chosen": -161.6536865234375, "logps/rejected": -277.2232666015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.888646125793457, "rewards/margins": 11.722173690795898, "rewards/rejected": -19.610820770263672, "step": 2456 }, { "epoch": 4.23, "learning_rate": 2.9451763705907353e-07, "logits/chosen": -2.0735349655151367, "logits/rejected": -1.8440747261047363, "logps/chosen": -98.19988250732422, "logps/rejected": -191.05178833007812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.2384464740753174, "rewards/margins": 9.434477806091309, "rewards/rejected": -11.672924995422363, "step": 2457 }, { "epoch": 4.23, "learning_rate": 2.944113897152571e-07, "logits/chosen": -2.0463626384735107, "logits/rejected": -1.9174212217330933, "logps/chosen": -148.1145782470703, "logps/rejected": -225.03883361816406, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.301697731018066, "rewards/margins": 8.886604309082031, "rewards/rejected": -15.188302040100098, "step": 2458 }, { "epoch": 4.23, "learning_rate": 2.9430514237144073e-07, "logits/chosen": -1.7219727039337158, "logits/rejected": -1.897006869316101, "logps/chosen": -136.3373260498047, "logps/rejected": -302.7232971191406, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.991969108581543, "rewards/margins": 14.143437385559082, "rewards/rejected": -20.135406494140625, "step": 2459 }, { "epoch": 4.23, "learning_rate": 2.941988950276243e-07, "logits/chosen": -1.8167535066604614, "logits/rejected": -1.8796700239181519, "logps/chosen": -133.4654083251953, "logps/rejected": -225.45401000976562, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -5.405361175537109, "rewards/margins": 8.007906913757324, "rewards/rejected": -13.41326904296875, "step": 2460 }, { "epoch": 4.24, "learning_rate": 2.940926476838079e-07, "logits/chosen": -1.8000361919403076, "logits/rejected": -2.2066073417663574, "logps/chosen": -131.79425048828125, "logps/rejected": -257.5247497558594, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -4.916847229003906, "rewards/margins": 12.45343017578125, "rewards/rejected": -17.370277404785156, "step": 2461 }, { "epoch": 4.24, "learning_rate": 2.939864003399915e-07, "logits/chosen": -1.800816297531128, "logits/rejected": -2.0085644721984863, "logps/chosen": -155.1382293701172, "logps/rejected": -263.0213623046875, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -6.466789722442627, "rewards/margins": 10.204742431640625, "rewards/rejected": -16.671531677246094, "step": 2462 }, { "epoch": 4.24, "learning_rate": 2.9388015299617507e-07, "logits/chosen": -2.0387165546417236, "logits/rejected": -1.9958038330078125, "logps/chosen": -164.56202697753906, "logps/rejected": -271.879150390625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -7.333292007446289, "rewards/margins": 9.952375411987305, "rewards/rejected": -17.285667419433594, "step": 2463 }, { "epoch": 4.24, "learning_rate": 2.9377390565235867e-07, "logits/chosen": -1.8848851919174194, "logits/rejected": -2.2089293003082275, "logps/chosen": -166.29974365234375, "logps/rejected": -269.009033203125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -8.481247901916504, "rewards/margins": 8.741387367248535, "rewards/rejected": -17.22263526916504, "step": 2464 }, { "epoch": 4.24, "learning_rate": 2.9366765830854227e-07, "logits/chosen": -1.9905247688293457, "logits/rejected": -1.7725379467010498, "logps/chosen": -141.06619262695312, "logps/rejected": -298.4923095703125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.729164123535156, "rewards/margins": 16.232059478759766, "rewards/rejected": -21.961223602294922, "step": 2465 }, { "epoch": 4.24, "learning_rate": 2.9356141096472587e-07, "logits/chosen": -2.174051284790039, "logits/rejected": -1.9428396224975586, "logps/chosen": -158.1348876953125, "logps/rejected": -277.83245849609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.682092189788818, "rewards/margins": 12.05207633972168, "rewards/rejected": -18.734167098999023, "step": 2466 }, { "epoch": 4.25, "learning_rate": 2.934551636209094e-07, "logits/chosen": -1.7548670768737793, "logits/rejected": -2.1267447471618652, "logps/chosen": -127.15204620361328, "logps/rejected": -270.14373779296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.838007926940918, "rewards/margins": 13.730733871459961, "rewards/rejected": -19.568740844726562, "step": 2467 }, { "epoch": 4.25, "learning_rate": 2.9334891627709307e-07, "logits/chosen": -1.9148447513580322, "logits/rejected": -2.0988030433654785, "logps/chosen": -128.2401123046875, "logps/rejected": -250.19607543945312, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -5.075090408325195, "rewards/margins": 12.020014762878418, "rewards/rejected": -17.09510612487793, "step": 2468 }, { "epoch": 4.25, "learning_rate": 2.9324266893327666e-07, "logits/chosen": -1.9050863981246948, "logits/rejected": -1.9452121257781982, "logps/chosen": -120.15009307861328, "logps/rejected": -233.39202880859375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.034683704376221, "rewards/margins": 10.25650405883789, "rewards/rejected": -14.29118824005127, "step": 2469 }, { "epoch": 4.25, "learning_rate": 2.931364215894602e-07, "logits/chosen": -1.709417462348938, "logits/rejected": -2.0567073822021484, "logps/chosen": -133.43093872070312, "logps/rejected": -278.7051696777344, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.5477142333984375, "rewards/margins": 13.201412200927734, "rewards/rejected": -19.749126434326172, "step": 2470 }, { "epoch": 4.25, "learning_rate": 2.9303017424564386e-07, "logits/chosen": -2.0280601978302, "logits/rejected": -2.037830352783203, "logps/chosen": -152.3285369873047, "logps/rejected": -227.63412475585938, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -6.3092522621154785, "rewards/margins": 9.206469535827637, "rewards/rejected": -15.515722274780273, "step": 2471 }, { "epoch": 4.25, "learning_rate": 2.929239269018274e-07, "logits/chosen": -1.621518850326538, "logits/rejected": -2.052582263946533, "logps/chosen": -133.24566650390625, "logps/rejected": -239.0714111328125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.476653099060059, "rewards/margins": 10.688711166381836, "rewards/rejected": -15.165363311767578, "step": 2472 }, { "epoch": 4.26, "learning_rate": 2.92817679558011e-07, "logits/chosen": -2.128356695175171, "logits/rejected": -1.8997137546539307, "logps/chosen": -189.9595947265625, "logps/rejected": -279.16064453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -8.907228469848633, "rewards/margins": 9.964827537536621, "rewards/rejected": -18.872055053710938, "step": 2473 }, { "epoch": 4.26, "learning_rate": 2.9271143221419466e-07, "logits/chosen": -1.6637263298034668, "logits/rejected": -1.868931770324707, "logps/chosen": -131.756103515625, "logps/rejected": -266.3962097167969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.1434550285339355, "rewards/margins": 13.164457321166992, "rewards/rejected": -17.307912826538086, "step": 2474 }, { "epoch": 4.26, "learning_rate": 2.926051848703782e-07, "logits/chosen": -1.9725501537322998, "logits/rejected": -1.9409668445587158, "logps/chosen": -137.4520263671875, "logps/rejected": -276.4214782714844, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.294955253601074, "rewards/margins": 13.276443481445312, "rewards/rejected": -18.571399688720703, "step": 2475 }, { "epoch": 4.26, "learning_rate": 2.924989375265618e-07, "logits/chosen": -2.0788159370422363, "logits/rejected": -1.7447395324707031, "logps/chosen": -173.82489013671875, "logps/rejected": -297.0769348144531, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -7.8872480392456055, "rewards/margins": 12.875493049621582, "rewards/rejected": -20.762741088867188, "step": 2476 }, { "epoch": 4.26, "learning_rate": 2.923926901827454e-07, "logits/chosen": -1.9492905139923096, "logits/rejected": -2.1205358505249023, "logps/chosen": -134.23374938964844, "logps/rejected": -275.1326904296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.863193988800049, "rewards/margins": 12.220317840576172, "rewards/rejected": -18.083511352539062, "step": 2477 }, { "epoch": 4.27, "learning_rate": 2.92286442838929e-07, "logits/chosen": -1.6635148525238037, "logits/rejected": -1.9330353736877441, "logps/chosen": -111.83773803710938, "logps/rejected": -260.59844970703125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -3.955657482147217, "rewards/margins": 13.576193809509277, "rewards/rejected": -17.531850814819336, "step": 2478 }, { "epoch": 4.27, "learning_rate": 2.9218019549511265e-07, "logits/chosen": -1.9458435773849487, "logits/rejected": -2.07122802734375, "logps/chosen": -123.70475006103516, "logps/rejected": -260.0963134765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.385733604431152, "rewards/margins": 13.265190124511719, "rewards/rejected": -17.650922775268555, "step": 2479 }, { "epoch": 4.27, "learning_rate": 2.920739481512962e-07, "logits/chosen": -1.9032875299453735, "logits/rejected": -2.026348352432251, "logps/chosen": -123.82555389404297, "logps/rejected": -231.96128845214844, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -5.478815078735352, "rewards/margins": 11.320423126220703, "rewards/rejected": -16.799238204956055, "step": 2480 }, { "epoch": 4.27, "learning_rate": 2.919677008074798e-07, "logits/chosen": -2.0352394580841064, "logits/rejected": -1.8621478080749512, "logps/chosen": -154.955322265625, "logps/rejected": -288.3630676269531, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -5.578922748565674, "rewards/margins": 12.699362754821777, "rewards/rejected": -18.27828598022461, "step": 2481 }, { "epoch": 4.27, "learning_rate": 2.918614534636634e-07, "logits/chosen": -1.857408046722412, "logits/rejected": -2.167910575866699, "logps/chosen": -124.6454849243164, "logps/rejected": -258.5937805175781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.7538704872131348, "rewards/margins": 12.600737571716309, "rewards/rejected": -16.3546085357666, "step": 2482 }, { "epoch": 4.27, "learning_rate": 2.91755206119847e-07, "logits/chosen": -1.728712558746338, "logits/rejected": -1.997578501701355, "logps/chosen": -135.99176025390625, "logps/rejected": -253.48751831054688, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.8741047382354736, "rewards/margins": 11.857269287109375, "rewards/rejected": -15.73137378692627, "step": 2483 }, { "epoch": 4.28, "learning_rate": 2.916489587760306e-07, "logits/chosen": -2.1269567012786865, "logits/rejected": -1.9446786642074585, "logps/chosen": -130.10792541503906, "logps/rejected": -278.9725036621094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.149675369262695, "rewards/margins": 14.194660186767578, "rewards/rejected": -18.344335556030273, "step": 2484 }, { "epoch": 4.28, "learning_rate": 2.915427114322142e-07, "logits/chosen": -1.9195959568023682, "logits/rejected": -1.9548295736312866, "logps/chosen": -122.74404907226562, "logps/rejected": -217.1043243408203, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -4.222332000732422, "rewards/margins": 9.85356330871582, "rewards/rejected": -14.075895309448242, "step": 2485 }, { "epoch": 4.28, "learning_rate": 2.914364640883978e-07, "logits/chosen": -1.7578409910202026, "logits/rejected": -2.1443979740142822, "logps/chosen": -151.4614715576172, "logps/rejected": -279.5909423828125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -8.085137367248535, "rewards/margins": 11.24136734008789, "rewards/rejected": -19.326505661010742, "step": 2486 }, { "epoch": 4.28, "learning_rate": 2.9133021674458134e-07, "logits/chosen": -2.0562057495117188, "logits/rejected": -1.9670312404632568, "logps/chosen": -151.7706298828125, "logps/rejected": -256.53179931640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.590739727020264, "rewards/margins": 10.995026588439941, "rewards/rejected": -17.585765838623047, "step": 2487 }, { "epoch": 4.28, "learning_rate": 2.91223969400765e-07, "logits/chosen": -2.159271240234375, "logits/rejected": -2.001856565475464, "logps/chosen": -150.94302368164062, "logps/rejected": -303.2073669433594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.776317596435547, "rewards/margins": 14.601304054260254, "rewards/rejected": -19.377622604370117, "step": 2488 }, { "epoch": 4.28, "learning_rate": 2.911177220569486e-07, "logits/chosen": -1.828893780708313, "logits/rejected": -1.855193853378296, "logps/chosen": -153.04632568359375, "logps/rejected": -249.82693481445312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.809230327606201, "rewards/margins": 10.851966857910156, "rewards/rejected": -15.661197662353516, "step": 2489 }, { "epoch": 4.29, "learning_rate": 2.9101147471313214e-07, "logits/chosen": -2.118300199508667, "logits/rejected": -1.8346354961395264, "logps/chosen": -134.5297393798828, "logps/rejected": -259.9264831542969, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.107053279876709, "rewards/margins": 13.00228500366211, "rewards/rejected": -18.109336853027344, "step": 2490 }, { "epoch": 4.29, "learning_rate": 2.909052273693158e-07, "logits/chosen": -1.4328632354736328, "logits/rejected": -2.052438497543335, "logps/chosen": -104.16096496582031, "logps/rejected": -290.237548828125, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -4.334986209869385, "rewards/margins": 15.517635345458984, "rewards/rejected": -19.85262107849121, "step": 2491 }, { "epoch": 4.29, "learning_rate": 2.9079898002549933e-07, "logits/chosen": -1.8401129245758057, "logits/rejected": -1.9544531106948853, "logps/chosen": -129.83242797851562, "logps/rejected": -253.26593017578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.816057205200195, "rewards/margins": 11.571025848388672, "rewards/rejected": -17.387083053588867, "step": 2492 }, { "epoch": 4.29, "learning_rate": 2.9069273268168293e-07, "logits/chosen": -2.227292060852051, "logits/rejected": -2.0158162117004395, "logps/chosen": -129.47328186035156, "logps/rejected": -214.06683349609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.1788182258605957, "rewards/margins": 9.458086967468262, "rewards/rejected": -12.6369047164917, "step": 2493 }, { "epoch": 4.29, "learning_rate": 2.905864853378666e-07, "logits/chosen": -2.0473246574401855, "logits/rejected": -2.064079999923706, "logps/chosen": -104.68447875976562, "logps/rejected": -278.238037109375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.744349479675293, "rewards/margins": 16.96506118774414, "rewards/rejected": -19.70941162109375, "step": 2494 }, { "epoch": 4.29, "learning_rate": 2.9048023799405013e-07, "logits/chosen": -1.9066370725631714, "logits/rejected": -2.06028413772583, "logps/chosen": -98.23945617675781, "logps/rejected": -255.4513397216797, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.5613698959350586, "rewards/margins": 14.019506454467773, "rewards/rejected": -16.580875396728516, "step": 2495 }, { "epoch": 4.3, "learning_rate": 2.9037399065023373e-07, "logits/chosen": -1.9527580738067627, "logits/rejected": -2.0076699256896973, "logps/chosen": -95.76438903808594, "logps/rejected": -240.1934814453125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.156228542327881, "rewards/margins": 13.7080659866333, "rewards/rejected": -16.864294052124023, "step": 2496 }, { "epoch": 4.3, "learning_rate": 2.9026774330641733e-07, "logits/chosen": -2.0900259017944336, "logits/rejected": -1.8714957237243652, "logps/chosen": -130.61997985839844, "logps/rejected": -240.36074829101562, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.9498491287231445, "rewards/margins": 11.277978897094727, "rewards/rejected": -17.227828979492188, "step": 2497 }, { "epoch": 4.3, "learning_rate": 2.9016149596260093e-07, "logits/chosen": -1.836836338043213, "logits/rejected": -1.8139007091522217, "logps/chosen": -142.26663208007812, "logps/rejected": -233.09510803222656, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.008587837219238, "rewards/margins": 11.973828315734863, "rewards/rejected": -15.982416152954102, "step": 2498 }, { "epoch": 4.3, "learning_rate": 2.9005524861878447e-07, "logits/chosen": -1.9086471796035767, "logits/rejected": -2.0861904621124268, "logps/chosen": -106.69715881347656, "logps/rejected": -235.69764709472656, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.899951457977295, "rewards/margins": 11.919139862060547, "rewards/rejected": -15.819089889526367, "step": 2499 }, { "epoch": 4.3, "learning_rate": 2.899490012749681e-07, "logits/chosen": -1.8867524862289429, "logits/rejected": -2.055654525756836, "logps/chosen": -113.66975402832031, "logps/rejected": -227.86209106445312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.988739013671875, "rewards/margins": 10.786809921264648, "rewards/rejected": -14.77554988861084, "step": 2500 }, { "epoch": 4.3, "learning_rate": 2.898427539311517e-07, "logits/chosen": -1.9037692546844482, "logits/rejected": -1.9362170696258545, "logps/chosen": -117.07171630859375, "logps/rejected": -233.9656982421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.499828338623047, "rewards/margins": 11.710676193237305, "rewards/rejected": -17.21050453186035, "step": 2501 }, { "epoch": 4.31, "learning_rate": 2.8973650658733527e-07, "logits/chosen": -1.8060781955718994, "logits/rejected": -2.048584461212158, "logps/chosen": -99.80465698242188, "logps/rejected": -209.8468017578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.709906816482544, "rewards/margins": 9.427921295166016, "rewards/rejected": -13.137828826904297, "step": 2502 }, { "epoch": 4.31, "learning_rate": 2.896302592435189e-07, "logits/chosen": -1.9175912141799927, "logits/rejected": -1.7373273372650146, "logps/chosen": -143.30807495117188, "logps/rejected": -262.8630676269531, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": -5.011703968048096, "rewards/margins": 12.966180801391602, "rewards/rejected": -17.977886199951172, "step": 2503 }, { "epoch": 4.31, "learning_rate": 2.8952401189970247e-07, "logits/chosen": -1.7502422332763672, "logits/rejected": -2.0797178745269775, "logps/chosen": -174.11302185058594, "logps/rejected": -259.30340576171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -7.9339599609375, "rewards/margins": 8.131088256835938, "rewards/rejected": -16.065048217773438, "step": 2504 }, { "epoch": 4.31, "learning_rate": 2.8941776455588607e-07, "logits/chosen": -1.9790633916854858, "logits/rejected": -2.0023114681243896, "logps/chosen": -111.86769104003906, "logps/rejected": -244.8631591796875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -4.718418121337891, "rewards/margins": 13.059758186340332, "rewards/rejected": -17.778175354003906, "step": 2505 }, { "epoch": 4.31, "learning_rate": 2.893115172120697e-07, "logits/chosen": -2.164567232131958, "logits/rejected": -1.9922614097595215, "logps/chosen": -182.05953979492188, "logps/rejected": -304.0146179199219, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.164058685302734, "rewards/margins": 13.670980453491211, "rewards/rejected": -21.835039138793945, "step": 2506 }, { "epoch": 4.31, "learning_rate": 2.8920526986825326e-07, "logits/chosen": -1.9020981788635254, "logits/rejected": -2.110964059829712, "logps/chosen": -150.7642364501953, "logps/rejected": -246.52456665039062, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -5.5896830558776855, "rewards/margins": 9.253840446472168, "rewards/rejected": -14.843523025512695, "step": 2507 }, { "epoch": 4.32, "learning_rate": 2.8909902252443686e-07, "logits/chosen": -1.7510898113250732, "logits/rejected": -1.882934808731079, "logps/chosen": -152.07745361328125, "logps/rejected": -261.00726318359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.533479690551758, "rewards/margins": 11.957826614379883, "rewards/rejected": -17.49130630493164, "step": 2508 }, { "epoch": 4.32, "learning_rate": 2.8899277518062046e-07, "logits/chosen": -1.9467172622680664, "logits/rejected": -2.0460104942321777, "logps/chosen": -148.88609313964844, "logps/rejected": -228.70159912109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.367496967315674, "rewards/margins": 9.66777229309082, "rewards/rejected": -15.035268783569336, "step": 2509 }, { "epoch": 4.32, "learning_rate": 2.8888652783680406e-07, "logits/chosen": -2.081482410430908, "logits/rejected": -2.061006546020508, "logps/chosen": -164.91127014160156, "logps/rejected": -288.8172607421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.432741641998291, "rewards/margins": 12.657092094421387, "rewards/rejected": -20.089834213256836, "step": 2510 }, { "epoch": 4.32, "learning_rate": 2.8878028049298766e-07, "logits/chosen": -2.15950345993042, "logits/rejected": -1.7106860876083374, "logps/chosen": -159.3591766357422, "logps/rejected": -274.99432373046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.57414436340332, "rewards/margins": 11.669779777526855, "rewards/rejected": -19.24392318725586, "step": 2511 }, { "epoch": 4.32, "learning_rate": 2.8867403314917126e-07, "logits/chosen": -2.1381077766418457, "logits/rejected": -2.1827683448791504, "logps/chosen": -116.05825805664062, "logps/rejected": -235.32403564453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.111548900604248, "rewards/margins": 10.774782180786133, "rewards/rejected": -14.886331558227539, "step": 2512 }, { "epoch": 4.33, "learning_rate": 2.8856778580535486e-07, "logits/chosen": -2.165029287338257, "logits/rejected": -1.8970924615859985, "logps/chosen": -165.6482391357422, "logps/rejected": -269.63623046875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.020596504211426, "rewards/margins": 12.042266845703125, "rewards/rejected": -19.062862396240234, "step": 2513 }, { "epoch": 4.33, "learning_rate": 2.884615384615384e-07, "logits/chosen": -1.8757312297821045, "logits/rejected": -1.8868625164031982, "logps/chosen": -147.16171264648438, "logps/rejected": -246.83099365234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.513175964355469, "rewards/margins": 9.34262466430664, "rewards/rejected": -15.85580062866211, "step": 2514 }, { "epoch": 4.33, "learning_rate": 2.8835529111772206e-07, "logits/chosen": -2.015033483505249, "logits/rejected": -1.8997973203659058, "logps/chosen": -136.4960479736328, "logps/rejected": -244.08575439453125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -4.664586544036865, "rewards/margins": 10.843940734863281, "rewards/rejected": -15.508527755737305, "step": 2515 }, { "epoch": 4.33, "learning_rate": 2.8824904377390565e-07, "logits/chosen": -1.4830007553100586, "logits/rejected": -1.9565881490707397, "logps/chosen": -117.6956558227539, "logps/rejected": -232.67845153808594, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.256766319274902, "rewards/margins": 9.879958152770996, "rewards/rejected": -14.136724472045898, "step": 2516 }, { "epoch": 4.33, "learning_rate": 2.881427964300892e-07, "logits/chosen": -2.2073886394500732, "logits/rejected": -1.8965129852294922, "logps/chosen": -166.62496948242188, "logps/rejected": -264.73077392578125, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -7.42307710647583, "rewards/margins": 11.869237899780273, "rewards/rejected": -19.292316436767578, "step": 2517 }, { "epoch": 4.33, "learning_rate": 2.8803654908627285e-07, "logits/chosen": -1.9266607761383057, "logits/rejected": -1.8318109512329102, "logps/chosen": -165.82485961914062, "logps/rejected": -297.6846618652344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.859292030334473, "rewards/margins": 14.371644973754883, "rewards/rejected": -20.23093605041504, "step": 2518 }, { "epoch": 4.34, "learning_rate": 2.879303017424564e-07, "logits/chosen": -1.901068091392517, "logits/rejected": -2.2144153118133545, "logps/chosen": -159.79293823242188, "logps/rejected": -273.4386291503906, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.637336254119873, "rewards/margins": 9.292338371276855, "rewards/rejected": -15.92967414855957, "step": 2519 }, { "epoch": 4.34, "learning_rate": 2.8782405439864005e-07, "logits/chosen": -1.8703721761703491, "logits/rejected": -2.06941819190979, "logps/chosen": -125.16148376464844, "logps/rejected": -259.83575439453125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -3.5455679893493652, "rewards/margins": 11.62099838256836, "rewards/rejected": -15.16656494140625, "step": 2520 }, { "epoch": 4.34, "learning_rate": 2.8771780705482365e-07, "logits/chosen": -2.154364824295044, "logits/rejected": -1.8096184730529785, "logps/chosen": -169.14987182617188, "logps/rejected": -236.87615966796875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -8.406853675842285, "rewards/margins": 7.914608001708984, "rewards/rejected": -16.321460723876953, "step": 2521 }, { "epoch": 4.34, "learning_rate": 2.876115597110072e-07, "logits/chosen": -1.8247734308242798, "logits/rejected": -1.9861159324645996, "logps/chosen": -135.39639282226562, "logps/rejected": -268.7041015625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -6.011064529418945, "rewards/margins": 12.788717269897461, "rewards/rejected": -18.799781799316406, "step": 2522 }, { "epoch": 4.34, "learning_rate": 2.8750531236719085e-07, "logits/chosen": -1.791506290435791, "logits/rejected": -1.8942034244537354, "logps/chosen": -130.34727478027344, "logps/rejected": -278.6983642578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.643200397491455, "rewards/margins": 15.602192878723145, "rewards/rejected": -20.245391845703125, "step": 2523 }, { "epoch": 4.34, "learning_rate": 2.873990650233744e-07, "logits/chosen": -2.259922504425049, "logits/rejected": -1.9223108291625977, "logps/chosen": -150.28436279296875, "logps/rejected": -227.91049194335938, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.233503341674805, "rewards/margins": 7.978682518005371, "rewards/rejected": -14.212186813354492, "step": 2524 }, { "epoch": 4.35, "learning_rate": 2.87292817679558e-07, "logits/chosen": -2.1494369506835938, "logits/rejected": -2.0954036712646484, "logps/chosen": -135.6579132080078, "logps/rejected": -248.05667114257812, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.403295516967773, "rewards/margins": 11.895803451538086, "rewards/rejected": -16.29909896850586, "step": 2525 }, { "epoch": 4.35, "learning_rate": 2.8718657033574164e-07, "logits/chosen": -1.530121922492981, "logits/rejected": -2.2362592220306396, "logps/chosen": -167.68072509765625, "logps/rejected": -332.0926513671875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -7.729043960571289, "rewards/margins": 12.944770812988281, "rewards/rejected": -20.67381477355957, "step": 2526 }, { "epoch": 4.35, "learning_rate": 2.870803229919252e-07, "logits/chosen": -1.2709449529647827, "logits/rejected": -2.131410598754883, "logps/chosen": -129.68777465820312, "logps/rejected": -275.0184326171875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.857274532318115, "rewards/margins": 10.656837463378906, "rewards/rejected": -17.514110565185547, "step": 2527 }, { "epoch": 4.35, "learning_rate": 2.869740756481088e-07, "logits/chosen": -2.188835859298706, "logits/rejected": -1.7397959232330322, "logps/chosen": -111.40876770019531, "logps/rejected": -230.21917724609375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.7056796550750732, "rewards/margins": 11.760931015014648, "rewards/rejected": -15.466611862182617, "step": 2528 }, { "epoch": 4.35, "learning_rate": 2.868678283042924e-07, "logits/chosen": -2.064164400100708, "logits/rejected": -1.8827307224273682, "logps/chosen": -115.57611083984375, "logps/rejected": -222.5684051513672, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.332947254180908, "rewards/margins": 12.528663635253906, "rewards/rejected": -15.861610412597656, "step": 2529 }, { "epoch": 4.35, "learning_rate": 2.86761580960476e-07, "logits/chosen": -1.901174545288086, "logits/rejected": -1.9925930500030518, "logps/chosen": -111.63571166992188, "logps/rejected": -266.8799133300781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.0338759422302246, "rewards/margins": 13.909143447875977, "rewards/rejected": -16.94301986694336, "step": 2530 }, { "epoch": 4.36, "learning_rate": 2.8665533361665953e-07, "logits/chosen": -2.0949482917785645, "logits/rejected": -2.0943515300750732, "logps/chosen": -159.2994384765625, "logps/rejected": -240.3362274169922, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.474536895751953, "rewards/margins": 7.848850727081299, "rewards/rejected": -14.323387145996094, "step": 2531 }, { "epoch": 4.36, "learning_rate": 2.865490862728432e-07, "logits/chosen": -2.1470556259155273, "logits/rejected": -2.0869081020355225, "logps/chosen": -132.62033081054688, "logps/rejected": -256.1195068359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.698694944381714, "rewards/margins": 12.457000732421875, "rewards/rejected": -16.155696868896484, "step": 2532 }, { "epoch": 4.36, "learning_rate": 2.864428389290268e-07, "logits/chosen": -1.8639402389526367, "logits/rejected": -2.030364513397217, "logps/chosen": -140.57742309570312, "logps/rejected": -214.06651306152344, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.716082572937012, "rewards/margins": 7.209816932678223, "rewards/rejected": -12.925899505615234, "step": 2533 }, { "epoch": 4.36, "learning_rate": 2.8633659158521033e-07, "logits/chosen": -1.9976046085357666, "logits/rejected": -1.8569345474243164, "logps/chosen": -152.703369140625, "logps/rejected": -265.1969909667969, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.946671485900879, "rewards/margins": 13.225685119628906, "rewards/rejected": -18.1723575592041, "step": 2534 }, { "epoch": 4.36, "learning_rate": 2.86230344241394e-07, "logits/chosen": -1.8690954446792603, "logits/rejected": -2.0408222675323486, "logps/chosen": -171.66104125976562, "logps/rejected": -289.9903564453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.660646438598633, "rewards/margins": 13.012609481811523, "rewards/rejected": -19.673255920410156, "step": 2535 }, { "epoch": 4.36, "learning_rate": 2.8612409689757753e-07, "logits/chosen": -2.174715757369995, "logits/rejected": -1.4573798179626465, "logps/chosen": -153.72833251953125, "logps/rejected": -264.2215576171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.343571662902832, "rewards/margins": 13.905618667602539, "rewards/rejected": -19.249191284179688, "step": 2536 }, { "epoch": 4.37, "learning_rate": 2.860178495537611e-07, "logits/chosen": -2.0517308712005615, "logits/rejected": -2.2915637493133545, "logps/chosen": -137.4862060546875, "logps/rejected": -275.52581787109375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.211367130279541, "rewards/margins": 12.337181091308594, "rewards/rejected": -16.54854965209961, "step": 2537 }, { "epoch": 4.37, "learning_rate": 2.859116022099448e-07, "logits/chosen": -1.6880367994308472, "logits/rejected": -2.0672988891601562, "logps/chosen": -128.6338348388672, "logps/rejected": -268.3788757324219, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -4.4524736404418945, "rewards/margins": 12.096710205078125, "rewards/rejected": -16.549182891845703, "step": 2538 }, { "epoch": 4.37, "learning_rate": 2.858053548661283e-07, "logits/chosen": -2.0808541774749756, "logits/rejected": -1.89842689037323, "logps/chosen": -119.17544555664062, "logps/rejected": -250.90908813476562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.356220245361328, "rewards/margins": 12.923482894897461, "rewards/rejected": -17.279705047607422, "step": 2539 }, { "epoch": 4.37, "learning_rate": 2.856991075223119e-07, "logits/chosen": -2.0034852027893066, "logits/rejected": -1.9463547468185425, "logps/chosen": -120.31787872314453, "logps/rejected": -213.4351806640625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.4260854721069336, "rewards/margins": 10.609800338745117, "rewards/rejected": -14.035886764526367, "step": 2540 }, { "epoch": 4.37, "learning_rate": 2.855928601784955e-07, "logits/chosen": -2.0052788257598877, "logits/rejected": -1.782137155532837, "logps/chosen": -149.60446166992188, "logps/rejected": -239.6378631591797, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.21369743347168, "rewards/margins": 9.478804588317871, "rewards/rejected": -15.692502975463867, "step": 2541 }, { "epoch": 4.38, "learning_rate": 2.854866128346791e-07, "logits/chosen": -1.8801078796386719, "logits/rejected": -2.067997932434082, "logps/chosen": -139.11297607421875, "logps/rejected": -271.33807373046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.968869209289551, "rewards/margins": 12.595719337463379, "rewards/rejected": -18.56458854675293, "step": 2542 }, { "epoch": 4.38, "learning_rate": 2.853803654908627e-07, "logits/chosen": -1.998067021369934, "logits/rejected": -1.7568957805633545, "logps/chosen": -132.4618377685547, "logps/rejected": -229.51834106445312, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.515755653381348, "rewards/margins": 10.954629898071289, "rewards/rejected": -15.470385551452637, "step": 2543 }, { "epoch": 4.38, "learning_rate": 2.852741181470463e-07, "logits/chosen": -2.0188887119293213, "logits/rejected": -2.125532627105713, "logps/chosen": -116.89990234375, "logps/rejected": -246.7264404296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.092111587524414, "rewards/margins": 12.698780059814453, "rewards/rejected": -16.790891647338867, "step": 2544 }, { "epoch": 4.38, "learning_rate": 2.851678708032299e-07, "logits/chosen": -2.027202844619751, "logits/rejected": -1.6504344940185547, "logps/chosen": -150.5048370361328, "logps/rejected": -256.67181396484375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.617198467254639, "rewards/margins": 11.661652565002441, "rewards/rejected": -16.278850555419922, "step": 2545 }, { "epoch": 4.38, "learning_rate": 2.8506162345941346e-07, "logits/chosen": -1.4130032062530518, "logits/rejected": -2.223975658416748, "logps/chosen": -154.00714111328125, "logps/rejected": -296.7228088378906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.19122838973999, "rewards/margins": 12.862288475036621, "rewards/rejected": -20.053516387939453, "step": 2546 }, { "epoch": 4.38, "learning_rate": 2.849553761155971e-07, "logits/chosen": -1.9136834144592285, "logits/rejected": -2.040109157562256, "logps/chosen": -143.84063720703125, "logps/rejected": -290.29254150390625, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -5.779117584228516, "rewards/margins": 13.071686744689941, "rewards/rejected": -18.85080337524414, "step": 2547 }, { "epoch": 4.39, "learning_rate": 2.848491287717807e-07, "logits/chosen": -2.1094069480895996, "logits/rejected": -1.9506430625915527, "logps/chosen": -156.47512817382812, "logps/rejected": -248.17092895507812, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.769948959350586, "rewards/margins": 10.65541934967041, "rewards/rejected": -16.425369262695312, "step": 2548 }, { "epoch": 4.39, "learning_rate": 2.8474288142796426e-07, "logits/chosen": -2.0432188510894775, "logits/rejected": -2.100318670272827, "logps/chosen": -125.83636474609375, "logps/rejected": -275.0405578613281, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.057969093322754, "rewards/margins": 12.715087890625, "rewards/rejected": -16.77305793762207, "step": 2549 }, { "epoch": 4.39, "learning_rate": 2.846366340841479e-07, "logits/chosen": -1.915915608406067, "logits/rejected": -1.8602287769317627, "logps/chosen": -114.69932556152344, "logps/rejected": -223.1489715576172, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -3.427922248840332, "rewards/margins": 11.18237018585205, "rewards/rejected": -14.610292434692383, "step": 2550 }, { "epoch": 4.39, "learning_rate": 2.8453038674033146e-07, "logits/chosen": -2.058928966522217, "logits/rejected": -1.8325870037078857, "logps/chosen": -180.5150146484375, "logps/rejected": -238.63551330566406, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -8.48279857635498, "rewards/margins": 6.862127780914307, "rewards/rejected": -15.344926834106445, "step": 2551 }, { "epoch": 4.39, "learning_rate": 2.8442413939651506e-07, "logits/chosen": -2.0864038467407227, "logits/rejected": -1.8373712301254272, "logps/chosen": -126.23204803466797, "logps/rejected": -241.99774169921875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -4.297736644744873, "rewards/margins": 12.486106872558594, "rewards/rejected": -16.783843994140625, "step": 2552 }, { "epoch": 4.39, "learning_rate": 2.843178920526987e-07, "logits/chosen": -1.8276538848876953, "logits/rejected": -1.9517202377319336, "logps/chosen": -134.93084716796875, "logps/rejected": -255.35586547851562, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -6.090085983276367, "rewards/margins": 11.175681114196777, "rewards/rejected": -17.26576805114746, "step": 2553 }, { "epoch": 4.4, "learning_rate": 2.8421164470888225e-07, "logits/chosen": -1.8913285732269287, "logits/rejected": -2.108290195465088, "logps/chosen": -135.7935791015625, "logps/rejected": -265.9289245605469, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.023514747619629, "rewards/margins": 12.386129379272461, "rewards/rejected": -18.409643173217773, "step": 2554 }, { "epoch": 4.4, "learning_rate": 2.8410539736506585e-07, "logits/chosen": -2.079779624938965, "logits/rejected": -1.7913353443145752, "logps/chosen": -124.59185028076172, "logps/rejected": -230.999267578125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.379359006881714, "rewards/margins": 13.069255828857422, "rewards/rejected": -16.44861602783203, "step": 2555 }, { "epoch": 4.4, "learning_rate": 2.8399915002124945e-07, "logits/chosen": -2.0400848388671875, "logits/rejected": -1.8836355209350586, "logps/chosen": -109.13688659667969, "logps/rejected": -242.51779174804688, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.2089178562164307, "rewards/margins": 13.393940925598145, "rewards/rejected": -16.602859497070312, "step": 2556 }, { "epoch": 4.4, "learning_rate": 2.8389290267743305e-07, "logits/chosen": -2.2746782302856445, "logits/rejected": -1.6131575107574463, "logps/chosen": -177.23934936523438, "logps/rejected": -280.8975524902344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.760186672210693, "rewards/margins": 11.223716735839844, "rewards/rejected": -18.983903884887695, "step": 2557 }, { "epoch": 4.4, "learning_rate": 2.837866553336166e-07, "logits/chosen": -1.4523580074310303, "logits/rejected": -2.1378514766693115, "logps/chosen": -149.70657348632812, "logps/rejected": -281.85809326171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.760785102844238, "rewards/margins": 11.736291885375977, "rewards/rejected": -18.49707794189453, "step": 2558 }, { "epoch": 4.4, "learning_rate": 2.8368040798980025e-07, "logits/chosen": -1.8433630466461182, "logits/rejected": -2.105391263961792, "logps/chosen": -155.0065155029297, "logps/rejected": -312.7604064941406, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.982755661010742, "rewards/margins": 14.456684112548828, "rewards/rejected": -20.43943977355957, "step": 2559 }, { "epoch": 4.41, "learning_rate": 2.8357416064598385e-07, "logits/chosen": -1.7712461948394775, "logits/rejected": -2.042046546936035, "logps/chosen": -161.90887451171875, "logps/rejected": -309.1562805175781, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -6.314163684844971, "rewards/margins": 14.19803237915039, "rewards/rejected": -20.512195587158203, "step": 2560 }, { "epoch": 4.41, "learning_rate": 2.8346791330216745e-07, "logits/chosen": -2.0213372707366943, "logits/rejected": -1.7006272077560425, "logps/chosen": -161.76437377929688, "logps/rejected": -251.70394897460938, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -6.072468280792236, "rewards/margins": 11.086244583129883, "rewards/rejected": -17.15871238708496, "step": 2561 }, { "epoch": 4.41, "learning_rate": 2.8336166595835105e-07, "logits/chosen": -2.095165967941284, "logits/rejected": -2.102339744567871, "logps/chosen": -119.33992004394531, "logps/rejected": -304.4215087890625, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -5.012618541717529, "rewards/margins": 15.506182670593262, "rewards/rejected": -20.518800735473633, "step": 2562 }, { "epoch": 4.41, "learning_rate": 2.832554186145346e-07, "logits/chosen": -1.8313006162643433, "logits/rejected": -2.076746940612793, "logps/chosen": -118.23396301269531, "logps/rejected": -263.1672668457031, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.765483856201172, "rewards/margins": 13.995555877685547, "rewards/rejected": -17.76103973388672, "step": 2563 }, { "epoch": 4.41, "learning_rate": 2.8314917127071824e-07, "logits/chosen": -1.9242407083511353, "logits/rejected": -1.9547698497772217, "logps/chosen": -144.9589385986328, "logps/rejected": -317.67706298828125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.058939456939697, "rewards/margins": 15.926036834716797, "rewards/rejected": -21.984975814819336, "step": 2564 }, { "epoch": 4.41, "learning_rate": 2.8304292392690184e-07, "logits/chosen": -1.9814622402191162, "logits/rejected": -2.1038708686828613, "logps/chosen": -170.44219970703125, "logps/rejected": -291.5667419433594, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.00811243057251, "rewards/margins": 12.317570686340332, "rewards/rejected": -19.32568359375, "step": 2565 }, { "epoch": 4.42, "learning_rate": 2.829366765830854e-07, "logits/chosen": -2.0918073654174805, "logits/rejected": -2.0764288902282715, "logps/chosen": -142.02328491210938, "logps/rejected": -289.2406921386719, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.327544689178467, "rewards/margins": 14.27769660949707, "rewards/rejected": -20.605243682861328, "step": 2566 }, { "epoch": 4.42, "learning_rate": 2.8283042923926904e-07, "logits/chosen": -2.0273184776306152, "logits/rejected": -1.8119792938232422, "logps/chosen": -129.1085968017578, "logps/rejected": -234.66717529296875, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -3.932344913482666, "rewards/margins": 10.044711112976074, "rewards/rejected": -13.977056503295898, "step": 2567 }, { "epoch": 4.42, "learning_rate": 2.827241818954526e-07, "logits/chosen": -2.055851459503174, "logits/rejected": -2.0979349613189697, "logps/chosen": -149.87603759765625, "logps/rejected": -280.94708251953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.605684280395508, "rewards/margins": 13.316411972045898, "rewards/rejected": -18.922096252441406, "step": 2568 }, { "epoch": 4.42, "learning_rate": 2.826179345516362e-07, "logits/chosen": -2.0949881076812744, "logits/rejected": -2.1210339069366455, "logps/chosen": -126.7257080078125, "logps/rejected": -271.09637451171875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -3.7859840393066406, "rewards/margins": 13.21272087097168, "rewards/rejected": -16.99870491027832, "step": 2569 }, { "epoch": 4.42, "learning_rate": 2.8251168720781984e-07, "logits/chosen": -2.0037899017333984, "logits/rejected": -2.1863975524902344, "logps/chosen": -131.23171997070312, "logps/rejected": -242.37338256835938, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.529478073120117, "rewards/margins": 10.812612533569336, "rewards/rejected": -16.342090606689453, "step": 2570 }, { "epoch": 4.43, "learning_rate": 2.824054398640034e-07, "logits/chosen": -1.9370694160461426, "logits/rejected": -1.555909514427185, "logps/chosen": -184.8927764892578, "logps/rejected": -311.0061950683594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.801535129547119, "rewards/margins": 13.241108894348145, "rewards/rejected": -21.042644500732422, "step": 2571 }, { "epoch": 4.43, "learning_rate": 2.82299192520187e-07, "logits/chosen": -1.8895323276519775, "logits/rejected": -1.9294540882110596, "logps/chosen": -144.2119903564453, "logps/rejected": -254.6326141357422, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -7.216547966003418, "rewards/margins": 11.30400562286377, "rewards/rejected": -18.52055549621582, "step": 2572 }, { "epoch": 4.43, "learning_rate": 2.821929451763706e-07, "logits/chosen": -1.9745327234268188, "logits/rejected": -2.073831081390381, "logps/chosen": -148.64413452148438, "logps/rejected": -262.47601318359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.2226667404174805, "rewards/margins": 10.944744110107422, "rewards/rejected": -17.16741180419922, "step": 2573 }, { "epoch": 4.43, "learning_rate": 2.820866978325542e-07, "logits/chosen": -1.964972972869873, "logits/rejected": -1.992574691772461, "logps/chosen": -104.38318634033203, "logps/rejected": -202.58172607421875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.2359209060668945, "rewards/margins": 8.722535133361816, "rewards/rejected": -12.958456039428711, "step": 2574 }, { "epoch": 4.43, "learning_rate": 2.819804504887378e-07, "logits/chosen": -1.8073959350585938, "logits/rejected": -2.097686290740967, "logps/chosen": -126.32821655273438, "logps/rejected": -259.2762145996094, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.283657073974609, "rewards/margins": 11.843046188354492, "rewards/rejected": -16.1267032623291, "step": 2575 }, { "epoch": 4.43, "learning_rate": 2.818742031449214e-07, "logits/chosen": -2.1109535694122314, "logits/rejected": -1.6438050270080566, "logps/chosen": -158.831787109375, "logps/rejected": -273.25274658203125, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -6.346169471740723, "rewards/margins": 10.045969009399414, "rewards/rejected": -16.39213752746582, "step": 2576 }, { "epoch": 4.44, "learning_rate": 2.81767955801105e-07, "logits/chosen": -1.5430572032928467, "logits/rejected": -2.069571018218994, "logps/chosen": -135.67391967773438, "logps/rejected": -278.18505859375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.015945911407471, "rewards/margins": 13.115169525146484, "rewards/rejected": -19.131114959716797, "step": 2577 }, { "epoch": 4.44, "learning_rate": 2.816617084572885e-07, "logits/chosen": -2.1798439025878906, "logits/rejected": -1.7971855401992798, "logps/chosen": -123.30473327636719, "logps/rejected": -247.5328369140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.418259143829346, "rewards/margins": 12.808040618896484, "rewards/rejected": -17.226299285888672, "step": 2578 }, { "epoch": 4.44, "learning_rate": 2.8155546111347217e-07, "logits/chosen": -1.8727664947509766, "logits/rejected": -2.06215763092041, "logps/chosen": -131.6510009765625, "logps/rejected": -315.1374816894531, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.625494956970215, "rewards/margins": 16.95732879638672, "rewards/rejected": -21.582822799682617, "step": 2579 }, { "epoch": 4.44, "learning_rate": 2.8144921376965577e-07, "logits/chosen": -1.5403249263763428, "logits/rejected": -2.0480122566223145, "logps/chosen": -133.39935302734375, "logps/rejected": -283.665283203125, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -6.0862717628479, "rewards/margins": 12.73611068725586, "rewards/rejected": -18.8223819732666, "step": 2580 }, { "epoch": 4.44, "learning_rate": 2.813429664258393e-07, "logits/chosen": -1.8889480829238892, "logits/rejected": -2.1171741485595703, "logps/chosen": -150.23016357421875, "logps/rejected": -263.8712158203125, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -8.266283988952637, "rewards/margins": 9.645856857299805, "rewards/rejected": -17.912141799926758, "step": 2581 }, { "epoch": 4.44, "learning_rate": 2.8123671908202297e-07, "logits/chosen": -1.9083874225616455, "logits/rejected": -2.137765407562256, "logps/chosen": -102.67781066894531, "logps/rejected": -205.8055419921875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.1699225902557373, "rewards/margins": 9.759586334228516, "rewards/rejected": -12.929509162902832, "step": 2582 }, { "epoch": 4.45, "learning_rate": 2.811304717382065e-07, "logits/chosen": -1.6929255723953247, "logits/rejected": -1.9927160739898682, "logps/chosen": -145.7010040283203, "logps/rejected": -266.228759765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.09061861038208, "rewards/margins": 10.709789276123047, "rewards/rejected": -17.80040740966797, "step": 2583 }, { "epoch": 4.45, "learning_rate": 2.810242243943901e-07, "logits/chosen": -1.964721441268921, "logits/rejected": -1.9045979976654053, "logps/chosen": -103.56900787353516, "logps/rejected": -256.60333251953125, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -4.028616428375244, "rewards/margins": 14.154693603515625, "rewards/rejected": -18.18330955505371, "step": 2584 }, { "epoch": 4.45, "learning_rate": 2.809179770505737e-07, "logits/chosen": -2.1982741355895996, "logits/rejected": -2.021097183227539, "logps/chosen": -156.43392944335938, "logps/rejected": -252.38621520996094, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -7.13238525390625, "rewards/margins": 10.21170425415039, "rewards/rejected": -17.34408950805664, "step": 2585 }, { "epoch": 4.45, "learning_rate": 2.808117297067573e-07, "logits/chosen": -1.6817177534103394, "logits/rejected": -2.0414533615112305, "logps/chosen": -101.87509155273438, "logps/rejected": -276.9922790527344, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.830150604248047, "rewards/margins": 14.850647926330566, "rewards/rejected": -18.680797576904297, "step": 2586 }, { "epoch": 4.45, "learning_rate": 2.807054823629409e-07, "logits/chosen": -1.7422313690185547, "logits/rejected": -1.94435453414917, "logps/chosen": -159.62213134765625, "logps/rejected": -277.0834045410156, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.167347431182861, "rewards/margins": 10.861980438232422, "rewards/rejected": -18.029327392578125, "step": 2587 }, { "epoch": 4.45, "learning_rate": 2.805992350191245e-07, "logits/chosen": -1.8712892532348633, "logits/rejected": -2.0706300735473633, "logps/chosen": -124.81211853027344, "logps/rejected": -253.20632934570312, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.083135604858398, "rewards/margins": 12.438032150268555, "rewards/rejected": -16.521167755126953, "step": 2588 }, { "epoch": 4.46, "learning_rate": 2.804929876753081e-07, "logits/chosen": -2.1389386653900146, "logits/rejected": -1.825620412826538, "logps/chosen": -113.68801879882812, "logps/rejected": -234.31396484375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.2230772972106934, "rewards/margins": 12.747820854187012, "rewards/rejected": -15.970897674560547, "step": 2589 }, { "epoch": 4.46, "learning_rate": 2.8038674033149166e-07, "logits/chosen": -2.0674502849578857, "logits/rejected": -1.9383389949798584, "logps/chosen": -152.5761260986328, "logps/rejected": -256.5271301269531, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.0100860595703125, "rewards/margins": 11.774932861328125, "rewards/rejected": -17.785018920898438, "step": 2590 }, { "epoch": 4.46, "learning_rate": 2.802804929876753e-07, "logits/chosen": -1.8451342582702637, "logits/rejected": -2.169074773788452, "logps/chosen": -100.72028350830078, "logps/rejected": -239.90464782714844, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.172916889190674, "rewards/margins": 12.995159149169922, "rewards/rejected": -15.168076515197754, "step": 2591 }, { "epoch": 4.46, "learning_rate": 2.801742456438589e-07, "logits/chosen": -1.3551890850067139, "logits/rejected": -2.031484842300415, "logps/chosen": -167.33038330078125, "logps/rejected": -299.99755859375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.883918762207031, "rewards/margins": 11.806962013244629, "rewards/rejected": -19.690879821777344, "step": 2592 }, { "epoch": 4.46, "learning_rate": 2.8006799830004245e-07, "logits/chosen": -1.8658642768859863, "logits/rejected": -1.9144692420959473, "logps/chosen": -118.97250366210938, "logps/rejected": -274.71685791015625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.944911003112793, "rewards/margins": 14.579724311828613, "rewards/rejected": -19.524635314941406, "step": 2593 }, { "epoch": 4.46, "learning_rate": 2.799617509562261e-07, "logits/chosen": -1.9681103229522705, "logits/rejected": -2.0774641036987305, "logps/chosen": -131.43768310546875, "logps/rejected": -281.633056640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.338314533233643, "rewards/margins": 14.069374084472656, "rewards/rejected": -19.40768814086914, "step": 2594 }, { "epoch": 4.47, "learning_rate": 2.7985550361240965e-07, "logits/chosen": -1.8953347206115723, "logits/rejected": -2.333599090576172, "logps/chosen": -154.83981323242188, "logps/rejected": -257.5467529296875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.934107780456543, "rewards/margins": 11.322572708129883, "rewards/rejected": -17.256681442260742, "step": 2595 }, { "epoch": 4.47, "learning_rate": 2.7974925626859325e-07, "logits/chosen": -1.712682843208313, "logits/rejected": -1.9206528663635254, "logps/chosen": -106.09671020507812, "logps/rejected": -200.44296264648438, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.051198959350586, "rewards/margins": 9.385290145874023, "rewards/rejected": -12.43648910522461, "step": 2596 }, { "epoch": 4.47, "learning_rate": 2.796430089247769e-07, "logits/chosen": -1.9208712577819824, "logits/rejected": -2.083376407623291, "logps/chosen": -113.15312194824219, "logps/rejected": -247.97637939453125, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -3.611577033996582, "rewards/margins": 12.380273818969727, "rewards/rejected": -15.991851806640625, "step": 2597 }, { "epoch": 4.47, "learning_rate": 2.7953676158096045e-07, "logits/chosen": -1.8514413833618164, "logits/rejected": -1.8934967517852783, "logps/chosen": -130.47998046875, "logps/rejected": -248.2315673828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.374736785888672, "rewards/margins": 11.259429931640625, "rewards/rejected": -16.634166717529297, "step": 2598 }, { "epoch": 4.47, "learning_rate": 2.7943051423714405e-07, "logits/chosen": -1.6996573209762573, "logits/rejected": -1.9708051681518555, "logps/chosen": -127.34205627441406, "logps/rejected": -269.0093688964844, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.115859508514404, "rewards/margins": 12.273406982421875, "rewards/rejected": -17.389265060424805, "step": 2599 }, { "epoch": 4.48, "learning_rate": 2.7932426689332764e-07, "logits/chosen": -1.9231112003326416, "logits/rejected": -1.955392837524414, "logps/chosen": -144.18472290039062, "logps/rejected": -230.63558959960938, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -8.538667678833008, "rewards/margins": 8.4679594039917, "rewards/rejected": -17.00662612915039, "step": 2600 }, { "epoch": 4.48, "learning_rate": 2.7921801954951124e-07, "logits/chosen": -2.148416042327881, "logits/rejected": -1.8554550409317017, "logps/chosen": -140.41021728515625, "logps/rejected": -240.95892333984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.089689254760742, "rewards/margins": 10.801470756530762, "rewards/rejected": -15.89116096496582, "step": 2601 }, { "epoch": 4.48, "learning_rate": 2.7911177220569484e-07, "logits/chosen": -2.264420747756958, "logits/rejected": -1.971227765083313, "logps/chosen": -143.6090087890625, "logps/rejected": -280.184814453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.427013874053955, "rewards/margins": 14.790075302124023, "rewards/rejected": -20.21708869934082, "step": 2602 }, { "epoch": 4.48, "learning_rate": 2.7900552486187844e-07, "logits/chosen": -2.0995826721191406, "logits/rejected": -2.237931251525879, "logps/chosen": -161.9561767578125, "logps/rejected": -295.55889892578125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -8.067304611206055, "rewards/margins": 12.744811058044434, "rewards/rejected": -20.812116622924805, "step": 2603 }, { "epoch": 4.48, "learning_rate": 2.7889927751806204e-07, "logits/chosen": -1.9134602546691895, "logits/rejected": -1.825269103050232, "logps/chosen": -109.57588958740234, "logps/rejected": -230.1064453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.397539138793945, "rewards/margins": 11.715521812438965, "rewards/rejected": -16.113061904907227, "step": 2604 }, { "epoch": 4.48, "learning_rate": 2.7879303017424564e-07, "logits/chosen": -1.5966920852661133, "logits/rejected": -2.2140355110168457, "logps/chosen": -118.01412200927734, "logps/rejected": -279.3214416503906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7786853313446045, "rewards/margins": 13.182324409484863, "rewards/rejected": -16.961009979248047, "step": 2605 }, { "epoch": 4.49, "learning_rate": 2.7868678283042924e-07, "logits/chosen": -1.8303617238998413, "logits/rejected": -2.1777777671813965, "logps/chosen": -122.361328125, "logps/rejected": -308.3139343261719, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.794499397277832, "rewards/margins": 15.907317161560059, "rewards/rejected": -19.70181655883789, "step": 2606 }, { "epoch": 4.49, "learning_rate": 2.7858053548661284e-07, "logits/chosen": -1.7896614074707031, "logits/rejected": -2.0322494506835938, "logps/chosen": -138.01246643066406, "logps/rejected": -320.41046142578125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.2823486328125, "rewards/margins": 14.735479354858398, "rewards/rejected": -21.01782989501953, "step": 2607 }, { "epoch": 4.49, "learning_rate": 2.7847428814279644e-07, "logits/chosen": -1.9271256923675537, "logits/rejected": -2.1392672061920166, "logps/chosen": -164.50991821289062, "logps/rejected": -262.83575439453125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -8.129761695861816, "rewards/margins": 9.674535751342773, "rewards/rejected": -17.804298400878906, "step": 2608 }, { "epoch": 4.49, "learning_rate": 2.7836804079898003e-07, "logits/chosen": -2.2919602394104004, "logits/rejected": -1.7424598932266235, "logps/chosen": -113.1609115600586, "logps/rejected": -204.41348266601562, "loss": 0.0895, "rewards/accuracies": 0.75, "rewards/chosen": -2.978240966796875, "rewards/margins": 10.961616516113281, "rewards/rejected": -13.939858436584473, "step": 2609 }, { "epoch": 4.49, "learning_rate": 2.782617934551636e-07, "logits/chosen": -2.2562193870544434, "logits/rejected": -2.009432792663574, "logps/chosen": -134.6588134765625, "logps/rejected": -225.3726806640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.143318176269531, "rewards/margins": 10.218942642211914, "rewards/rejected": -15.362260818481445, "step": 2610 }, { "epoch": 4.49, "learning_rate": 2.7815554611134723e-07, "logits/chosen": -1.4397341012954712, "logits/rejected": -1.9887702465057373, "logps/chosen": -133.7926025390625, "logps/rejected": -281.08758544921875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.7279582023620605, "rewards/margins": 12.894268035888672, "rewards/rejected": -19.62222671508789, "step": 2611 }, { "epoch": 4.5, "learning_rate": 2.780492987675308e-07, "logits/chosen": -2.200319766998291, "logits/rejected": -1.6322637796401978, "logps/chosen": -157.39688110351562, "logps/rejected": -246.7306365966797, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.840193748474121, "rewards/margins": 11.24749755859375, "rewards/rejected": -17.087692260742188, "step": 2612 }, { "epoch": 4.5, "learning_rate": 2.779430514237144e-07, "logits/chosen": -1.961185336112976, "logits/rejected": -1.9617540836334229, "logps/chosen": -157.3904266357422, "logps/rejected": -316.4356689453125, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -6.143916606903076, "rewards/margins": 15.156903266906738, "rewards/rejected": -21.300819396972656, "step": 2613 }, { "epoch": 4.5, "learning_rate": 2.7783680407989803e-07, "logits/chosen": -1.9868311882019043, "logits/rejected": -2.0561537742614746, "logps/chosen": -139.44570922851562, "logps/rejected": -269.064208984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.760173797607422, "rewards/margins": 13.554790496826172, "rewards/rejected": -18.314964294433594, "step": 2614 }, { "epoch": 4.5, "learning_rate": 2.777305567360816e-07, "logits/chosen": -2.035405397415161, "logits/rejected": -2.1439905166625977, "logps/chosen": -161.68804931640625, "logps/rejected": -268.5908508300781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.97704553604126, "rewards/margins": 11.304779052734375, "rewards/rejected": -17.28182601928711, "step": 2615 }, { "epoch": 4.5, "learning_rate": 2.776243093922652e-07, "logits/chosen": -1.6048798561096191, "logits/rejected": -1.9877533912658691, "logps/chosen": -100.85946655273438, "logps/rejected": -288.898193359375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.0826406478881836, "rewards/margins": 16.86817169189453, "rewards/rejected": -19.95081329345703, "step": 2616 }, { "epoch": 4.5, "learning_rate": 2.7751806204844877e-07, "logits/chosen": -2.200255870819092, "logits/rejected": -1.9159520864486694, "logps/chosen": -142.2318878173828, "logps/rejected": -275.8578796386719, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -6.270297050476074, "rewards/margins": 13.412508964538574, "rewards/rejected": -19.68280601501465, "step": 2617 }, { "epoch": 4.51, "learning_rate": 2.7741181470463237e-07, "logits/chosen": -1.972912311553955, "logits/rejected": -1.579378366470337, "logps/chosen": -143.8651885986328, "logps/rejected": -243.35906982421875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -5.026383399963379, "rewards/margins": 11.590217590332031, "rewards/rejected": -16.616601943969727, "step": 2618 }, { "epoch": 4.51, "learning_rate": 2.7730556736081597e-07, "logits/chosen": -2.0462167263031006, "logits/rejected": -1.8322423696517944, "logps/chosen": -156.5526580810547, "logps/rejected": -240.95448303222656, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -5.563757419586182, "rewards/margins": 9.678824424743652, "rewards/rejected": -15.242582321166992, "step": 2619 }, { "epoch": 4.51, "learning_rate": 2.7719932001699957e-07, "logits/chosen": -1.8974800109863281, "logits/rejected": -2.096203088760376, "logps/chosen": -140.38687133789062, "logps/rejected": -265.4172668457031, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.718262195587158, "rewards/margins": 11.700692176818848, "rewards/rejected": -18.418954849243164, "step": 2620 }, { "epoch": 4.51, "learning_rate": 2.7709307267318317e-07, "logits/chosen": -2.081984519958496, "logits/rejected": -2.1882617473602295, "logps/chosen": -147.04217529296875, "logps/rejected": -254.90101623535156, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -6.674229621887207, "rewards/margins": 10.069574356079102, "rewards/rejected": -16.743804931640625, "step": 2621 }, { "epoch": 4.51, "learning_rate": 2.769868253293667e-07, "logits/chosen": -2.0498123168945312, "logits/rejected": -1.9556210041046143, "logps/chosen": -147.6562042236328, "logps/rejected": -285.3567810058594, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -5.1100640296936035, "rewards/margins": 15.09897232055664, "rewards/rejected": -20.20903778076172, "step": 2622 }, { "epoch": 4.51, "learning_rate": 2.7688057798555037e-07, "logits/chosen": -1.9616172313690186, "logits/rejected": -1.7235443592071533, "logps/chosen": -161.11642456054688, "logps/rejected": -264.6096496582031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -8.265419006347656, "rewards/margins": 10.971162796020508, "rewards/rejected": -19.236583709716797, "step": 2623 }, { "epoch": 4.52, "learning_rate": 2.7677433064173397e-07, "logits/chosen": -1.93365478515625, "logits/rejected": -2.1470394134521484, "logps/chosen": -182.77452087402344, "logps/rejected": -300.3008728027344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.162525177001953, "rewards/margins": 11.73057746887207, "rewards/rejected": -19.893102645874023, "step": 2624 }, { "epoch": 4.52, "learning_rate": 2.766680832979175e-07, "logits/chosen": -1.988746166229248, "logits/rejected": -1.8239351511001587, "logps/chosen": -156.83299255371094, "logps/rejected": -267.3650207519531, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -6.4783124923706055, "rewards/margins": 11.538076400756836, "rewards/rejected": -18.016387939453125, "step": 2625 }, { "epoch": 4.52, "learning_rate": 2.7656183595410116e-07, "logits/chosen": -1.9999914169311523, "logits/rejected": -2.1695141792297363, "logps/chosen": -202.03369140625, "logps/rejected": -298.52105712890625, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -10.29935073852539, "rewards/margins": 9.520004272460938, "rewards/rejected": -19.819355010986328, "step": 2626 }, { "epoch": 4.52, "learning_rate": 2.764555886102847e-07, "logits/chosen": -1.6908191442489624, "logits/rejected": -1.9506735801696777, "logps/chosen": -144.93524169921875, "logps/rejected": -279.50164794921875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -7.2388505935668945, "rewards/margins": 12.15884780883789, "rewards/rejected": -19.3976993560791, "step": 2627 }, { "epoch": 4.52, "learning_rate": 2.763493412664683e-07, "logits/chosen": -1.9557571411132812, "logits/rejected": -1.8401026725769043, "logps/chosen": -152.1585693359375, "logps/rejected": -244.077880859375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.3299760818481445, "rewards/margins": 10.225032806396484, "rewards/rejected": -16.555007934570312, "step": 2628 }, { "epoch": 4.52, "learning_rate": 2.7624309392265196e-07, "logits/chosen": -2.0928122997283936, "logits/rejected": -2.1001510620117188, "logps/chosen": -164.12261962890625, "logps/rejected": -247.28121948242188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.879565715789795, "rewards/margins": 9.51313304901123, "rewards/rejected": -17.392698287963867, "step": 2629 }, { "epoch": 4.53, "learning_rate": 2.761368465788355e-07, "logits/chosen": -2.054319381713867, "logits/rejected": -2.221060276031494, "logps/chosen": -181.28697204589844, "logps/rejected": -280.61749267578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.906696319580078, "rewards/margins": 9.755385398864746, "rewards/rejected": -17.66208267211914, "step": 2630 }, { "epoch": 4.53, "learning_rate": 2.760305992350191e-07, "logits/chosen": -1.9577500820159912, "logits/rejected": -1.991211175918579, "logps/chosen": -141.1326446533203, "logps/rejected": -280.94622802734375, "loss": 0.0474, "rewards/accuracies": 1.0, "rewards/chosen": -5.222556114196777, "rewards/margins": 13.754114151000977, "rewards/rejected": -18.976669311523438, "step": 2631 }, { "epoch": 4.53, "learning_rate": 2.759243518912027e-07, "logits/chosen": -1.939199447631836, "logits/rejected": -2.13222599029541, "logps/chosen": -126.34776306152344, "logps/rejected": -273.43841552734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.307368516921997, "rewards/margins": 13.659172058105469, "rewards/rejected": -16.966541290283203, "step": 2632 }, { "epoch": 4.53, "learning_rate": 2.758181045473863e-07, "logits/chosen": -2.038166046142578, "logits/rejected": -1.9225398302078247, "logps/chosen": -148.86785888671875, "logps/rejected": -268.6122741699219, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.48309850692749, "rewards/margins": 11.979225158691406, "rewards/rejected": -18.462324142456055, "step": 2633 }, { "epoch": 4.53, "learning_rate": 2.757118572035699e-07, "logits/chosen": -1.7728021144866943, "logits/rejected": -2.027919054031372, "logps/chosen": -117.20782470703125, "logps/rejected": -268.0565185546875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.409855842590332, "rewards/margins": 13.863153457641602, "rewards/rejected": -17.273008346557617, "step": 2634 }, { "epoch": 4.54, "learning_rate": 2.756056098597535e-07, "logits/chosen": -1.7896835803985596, "logits/rejected": -1.9362714290618896, "logps/chosen": -145.18841552734375, "logps/rejected": -283.3200378417969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.9573469161987305, "rewards/margins": 13.11361026763916, "rewards/rejected": -19.07095718383789, "step": 2635 }, { "epoch": 4.54, "learning_rate": 2.754993625159371e-07, "logits/chosen": -1.824319839477539, "logits/rejected": -1.9516689777374268, "logps/chosen": -174.62509155273438, "logps/rejected": -285.1161804199219, "loss": 0.0721, "rewards/accuracies": 1.0, "rewards/chosen": -8.798941612243652, "rewards/margins": 9.114874839782715, "rewards/rejected": -17.913816452026367, "step": 2636 }, { "epoch": 4.54, "learning_rate": 2.7539311517212065e-07, "logits/chosen": -2.0451807975769043, "logits/rejected": -1.6874206066131592, "logps/chosen": -173.27801513671875, "logps/rejected": -247.73348999023438, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.573809623718262, "rewards/margins": 10.567444801330566, "rewards/rejected": -17.141254425048828, "step": 2637 }, { "epoch": 4.54, "learning_rate": 2.752868678283043e-07, "logits/chosen": -2.0479073524475098, "logits/rejected": -2.104032278060913, "logps/chosen": -129.3793182373047, "logps/rejected": -319.32049560546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.212723731994629, "rewards/margins": 17.45960807800293, "rewards/rejected": -21.672332763671875, "step": 2638 }, { "epoch": 4.54, "learning_rate": 2.7518062048448784e-07, "logits/chosen": -1.581174373626709, "logits/rejected": -2.224081516265869, "logps/chosen": -106.30443572998047, "logps/rejected": -277.4696350097656, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -3.694641590118408, "rewards/margins": 14.27740478515625, "rewards/rejected": -17.9720458984375, "step": 2639 }, { "epoch": 4.54, "learning_rate": 2.7507437314067144e-07, "logits/chosen": -2.1146976947784424, "logits/rejected": -2.2572555541992188, "logps/chosen": -146.31265258789062, "logps/rejected": -300.0085144042969, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.097329616546631, "rewards/margins": 14.639358520507812, "rewards/rejected": -20.7366886138916, "step": 2640 }, { "epoch": 4.55, "learning_rate": 2.749681257968551e-07, "logits/chosen": -1.954843521118164, "logits/rejected": -1.9437642097473145, "logps/chosen": -106.47618865966797, "logps/rejected": -234.0509033203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.207485198974609, "rewards/margins": 12.53182601928711, "rewards/rejected": -16.73931121826172, "step": 2641 }, { "epoch": 4.55, "learning_rate": 2.7486187845303864e-07, "logits/chosen": -1.921523094177246, "logits/rejected": -1.7753841876983643, "logps/chosen": -133.86158752441406, "logps/rejected": -243.1389617919922, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -5.0698113441467285, "rewards/margins": 11.340559005737305, "rewards/rejected": -16.410369873046875, "step": 2642 }, { "epoch": 4.55, "learning_rate": 2.7475563110922224e-07, "logits/chosen": -2.164332389831543, "logits/rejected": -1.9017632007598877, "logps/chosen": -172.05043029785156, "logps/rejected": -266.6474304199219, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.240564346313477, "rewards/margins": 11.97999382019043, "rewards/rejected": -19.220558166503906, "step": 2643 }, { "epoch": 4.55, "learning_rate": 2.7464938376540584e-07, "logits/chosen": -2.110621690750122, "logits/rejected": -1.9401285648345947, "logps/chosen": -153.6819610595703, "logps/rejected": -249.18589782714844, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -6.440149307250977, "rewards/margins": 11.113554954528809, "rewards/rejected": -17.55370330810547, "step": 2644 }, { "epoch": 4.55, "learning_rate": 2.7454313642158944e-07, "logits/chosen": -2.036827325820923, "logits/rejected": -1.8944088220596313, "logps/chosen": -136.1639862060547, "logps/rejected": -267.9054870605469, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -5.900671482086182, "rewards/margins": 13.022042274475098, "rewards/rejected": -18.922714233398438, "step": 2645 }, { "epoch": 4.55, "learning_rate": 2.744368890777731e-07, "logits/chosen": -1.6606004238128662, "logits/rejected": -1.9158756732940674, "logps/chosen": -145.4109344482422, "logps/rejected": -301.43585205078125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -6.373993873596191, "rewards/margins": 14.408435821533203, "rewards/rejected": -20.782428741455078, "step": 2646 }, { "epoch": 4.56, "learning_rate": 2.7433064173395663e-07, "logits/chosen": -2.0384440422058105, "logits/rejected": -1.9671549797058105, "logps/chosen": -151.37075805664062, "logps/rejected": -250.6970977783203, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.720297336578369, "rewards/margins": 9.986193656921387, "rewards/rejected": -17.706491470336914, "step": 2647 }, { "epoch": 4.56, "learning_rate": 2.7422439439014023e-07, "logits/chosen": -1.9675931930541992, "logits/rejected": -2.2156333923339844, "logps/chosen": -132.6219482421875, "logps/rejected": -262.5018310546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.119839191436768, "rewards/margins": 11.437697410583496, "rewards/rejected": -16.557537078857422, "step": 2648 }, { "epoch": 4.56, "learning_rate": 2.7411814704632383e-07, "logits/chosen": -1.9674631357192993, "logits/rejected": -2.041666269302368, "logps/chosen": -125.794921875, "logps/rejected": -218.5325927734375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.03081750869751, "rewards/margins": 9.069839477539062, "rewards/rejected": -14.100656509399414, "step": 2649 }, { "epoch": 4.56, "learning_rate": 2.7401189970250743e-07, "logits/chosen": -1.8109891414642334, "logits/rejected": -1.8026505708694458, "logps/chosen": -156.60897827148438, "logps/rejected": -286.6151428222656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.663417816162109, "rewards/margins": 13.585506439208984, "rewards/rejected": -20.248924255371094, "step": 2650 }, { "epoch": 4.56, "learning_rate": 2.7390565235869103e-07, "logits/chosen": -1.9769089221954346, "logits/rejected": -2.1450705528259277, "logps/chosen": -170.22885131835938, "logps/rejected": -270.742919921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.933955192565918, "rewards/margins": 10.615028381347656, "rewards/rejected": -18.548982620239258, "step": 2651 }, { "epoch": 4.56, "learning_rate": 2.7379940501487463e-07, "logits/chosen": -2.0492515563964844, "logits/rejected": -1.8419556617736816, "logps/chosen": -150.02964782714844, "logps/rejected": -236.16339111328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.672863006591797, "rewards/margins": 8.71286678314209, "rewards/rejected": -15.38572883605957, "step": 2652 }, { "epoch": 4.57, "learning_rate": 2.7369315767105823e-07, "logits/chosen": -1.8899354934692383, "logits/rejected": -2.0389883518218994, "logps/chosen": -129.4612579345703, "logps/rejected": -245.02867126464844, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.501784324645996, "rewards/margins": 9.93047046661377, "rewards/rejected": -14.432254791259766, "step": 2653 }, { "epoch": 4.57, "learning_rate": 2.735869103272418e-07, "logits/chosen": -1.9328315258026123, "logits/rejected": -2.015113353729248, "logps/chosen": -124.29133605957031, "logps/rejected": -240.15975952148438, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -4.093634128570557, "rewards/margins": 11.575177192687988, "rewards/rejected": -15.668811798095703, "step": 2654 }, { "epoch": 4.57, "learning_rate": 2.734806629834254e-07, "logits/chosen": -2.119584321975708, "logits/rejected": -1.7697808742523193, "logps/chosen": -178.80564880371094, "logps/rejected": -277.691162109375, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -7.67371940612793, "rewards/margins": 11.738306045532227, "rewards/rejected": -19.412025451660156, "step": 2655 }, { "epoch": 4.57, "learning_rate": 2.73374415639609e-07, "logits/chosen": -1.9447362422943115, "logits/rejected": -2.311936855316162, "logps/chosen": -118.62228393554688, "logps/rejected": -260.1988525390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.926068067550659, "rewards/margins": 11.542773246765137, "rewards/rejected": -15.468841552734375, "step": 2656 }, { "epoch": 4.57, "learning_rate": 2.7326816829579257e-07, "logits/chosen": -2.0146427154541016, "logits/rejected": -2.0226340293884277, "logps/chosen": -121.9530029296875, "logps/rejected": -215.97146606445312, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.565608501434326, "rewards/margins": 9.445220947265625, "rewards/rejected": -14.010828971862793, "step": 2657 }, { "epoch": 4.57, "learning_rate": 2.731619209519762e-07, "logits/chosen": -1.7615108489990234, "logits/rejected": -1.783033013343811, "logps/chosen": -126.63574981689453, "logps/rejected": -252.36117553710938, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.312653064727783, "rewards/margins": 12.059646606445312, "rewards/rejected": -17.372299194335938, "step": 2658 }, { "epoch": 4.58, "learning_rate": 2.7305567360815977e-07, "logits/chosen": -1.7998416423797607, "logits/rejected": -2.0184128284454346, "logps/chosen": -114.8475341796875, "logps/rejected": -265.9718017578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.500597953796387, "rewards/margins": 14.878240585327148, "rewards/rejected": -19.37883949279785, "step": 2659 }, { "epoch": 4.58, "learning_rate": 2.7294942626434337e-07, "logits/chosen": -2.0658211708068848, "logits/rejected": -2.033006191253662, "logps/chosen": -145.0970458984375, "logps/rejected": -242.0736083984375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.685393333435059, "rewards/margins": 10.456193923950195, "rewards/rejected": -17.141586303710938, "step": 2660 }, { "epoch": 4.58, "learning_rate": 2.72843178920527e-07, "logits/chosen": -2.067058563232422, "logits/rejected": -1.3208388090133667, "logps/chosen": -143.67642211914062, "logps/rejected": -231.872802734375, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -5.408435344696045, "rewards/margins": 9.986071586608887, "rewards/rejected": -15.39450740814209, "step": 2661 }, { "epoch": 4.58, "learning_rate": 2.7273693157671056e-07, "logits/chosen": -1.9228395223617554, "logits/rejected": -2.081878900527954, "logps/chosen": -164.1063690185547, "logps/rejected": -248.3798370361328, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -7.360682964324951, "rewards/margins": 7.434726238250732, "rewards/rejected": -14.79541015625, "step": 2662 }, { "epoch": 4.58, "learning_rate": 2.7263068423289416e-07, "logits/chosen": -2.1255950927734375, "logits/rejected": -1.678823471069336, "logps/chosen": -133.4875030517578, "logps/rejected": -241.52987670898438, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.158808708190918, "rewards/margins": 12.522674560546875, "rewards/rejected": -17.681482315063477, "step": 2663 }, { "epoch": 4.59, "learning_rate": 2.7252443688907776e-07, "logits/chosen": -1.8136600255966187, "logits/rejected": -2.1014630794525146, "logps/chosen": -126.56359100341797, "logps/rejected": -243.47549438476562, "loss": 0.0955, "rewards/accuracies": 1.0, "rewards/chosen": -5.542049407958984, "rewards/margins": 10.059295654296875, "rewards/rejected": -15.60134506225586, "step": 2664 }, { "epoch": 4.59, "learning_rate": 2.7241818954526136e-07, "logits/chosen": -1.446282148361206, "logits/rejected": -1.9919888973236084, "logps/chosen": -100.03976440429688, "logps/rejected": -264.8145751953125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.369666576385498, "rewards/margins": 12.73234748840332, "rewards/rejected": -18.102014541625977, "step": 2665 }, { "epoch": 4.59, "learning_rate": 2.723119422014449e-07, "logits/chosen": -1.9110972881317139, "logits/rejected": -1.8840144872665405, "logps/chosen": -135.4020538330078, "logps/rejected": -233.03793334960938, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.047850608825684, "rewards/margins": 10.467564582824707, "rewards/rejected": -14.515414237976074, "step": 2666 }, { "epoch": 4.59, "learning_rate": 2.7220569485762856e-07, "logits/chosen": -2.2133097648620605, "logits/rejected": -1.9945695400238037, "logps/chosen": -152.78330993652344, "logps/rejected": -295.903076171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.4583306312561035, "rewards/margins": 15.495626449584961, "rewards/rejected": -20.953956604003906, "step": 2667 }, { "epoch": 4.59, "learning_rate": 2.7209944751381216e-07, "logits/chosen": -2.0142955780029297, "logits/rejected": -1.7446949481964111, "logps/chosen": -157.29367065429688, "logps/rejected": -225.60902404785156, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -6.459124565124512, "rewards/margins": 9.42664909362793, "rewards/rejected": -15.885773658752441, "step": 2668 }, { "epoch": 4.59, "learning_rate": 2.719932001699957e-07, "logits/chosen": -1.8217198848724365, "logits/rejected": -2.011223077774048, "logps/chosen": -146.73434448242188, "logps/rejected": -312.9198913574219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.716800212860107, "rewards/margins": 17.392578125, "rewards/rejected": -23.109378814697266, "step": 2669 }, { "epoch": 4.6, "learning_rate": 2.7188695282617936e-07, "logits/chosen": -2.153520107269287, "logits/rejected": -2.0048880577087402, "logps/chosen": -136.55584716796875, "logps/rejected": -251.95144653320312, "loss": 0.1687, "rewards/accuracies": 1.0, "rewards/chosen": -6.165807247161865, "rewards/margins": 11.506704330444336, "rewards/rejected": -17.672510147094727, "step": 2670 }, { "epoch": 4.6, "learning_rate": 2.717807054823629e-07, "logits/chosen": -1.8087489604949951, "logits/rejected": -2.0713882446289062, "logps/chosen": -105.81574249267578, "logps/rejected": -230.07354736328125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.0332202911376953, "rewards/margins": 11.70938777923584, "rewards/rejected": -14.742609024047852, "step": 2671 }, { "epoch": 4.6, "learning_rate": 2.716744581385465e-07, "logits/chosen": -1.9308676719665527, "logits/rejected": -2.211799144744873, "logps/chosen": -107.25859069824219, "logps/rejected": -204.15769958496094, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.252820014953613, "rewards/margins": 8.814149856567383, "rewards/rejected": -13.066970825195312, "step": 2672 }, { "epoch": 4.6, "learning_rate": 2.7156821079473015e-07, "logits/chosen": -1.899910807609558, "logits/rejected": -2.0184333324432373, "logps/chosen": -122.20716094970703, "logps/rejected": -275.5130920410156, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -4.113717555999756, "rewards/margins": 14.327081680297852, "rewards/rejected": -18.440799713134766, "step": 2673 }, { "epoch": 4.6, "learning_rate": 2.714619634509137e-07, "logits/chosen": -1.735565185546875, "logits/rejected": -2.1202640533447266, "logps/chosen": -161.46734619140625, "logps/rejected": -294.6398620605469, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -8.538899421691895, "rewards/margins": 12.526934623718262, "rewards/rejected": -21.065834045410156, "step": 2674 }, { "epoch": 4.6, "learning_rate": 2.713557161070973e-07, "logits/chosen": -1.9716873168945312, "logits/rejected": -1.9671118259429932, "logps/chosen": -105.06438446044922, "logps/rejected": -231.17828369140625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.015380620956421, "rewards/margins": 12.51566219329834, "rewards/rejected": -15.53104305267334, "step": 2675 }, { "epoch": 4.61, "learning_rate": 2.712494687632809e-07, "logits/chosen": -1.84474515914917, "logits/rejected": -1.8515154123306274, "logps/chosen": -116.69389343261719, "logps/rejected": -252.01161193847656, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.383699893951416, "rewards/margins": 12.505694389343262, "rewards/rejected": -15.889395713806152, "step": 2676 }, { "epoch": 4.61, "learning_rate": 2.711432214194645e-07, "logits/chosen": -2.1448917388916016, "logits/rejected": -1.8962900638580322, "logps/chosen": -143.44664001464844, "logps/rejected": -260.9432373046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.811320781707764, "rewards/margins": 13.17848014831543, "rewards/rejected": -17.98980140686035, "step": 2677 }, { "epoch": 4.61, "learning_rate": 2.710369740756481e-07, "logits/chosen": -1.9801323413848877, "logits/rejected": -1.9073519706726074, "logps/chosen": -124.4266357421875, "logps/rejected": -240.01004028320312, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.811519622802734, "rewards/margins": 12.424564361572266, "rewards/rejected": -17.236083984375, "step": 2678 }, { "epoch": 4.61, "learning_rate": 2.709307267318317e-07, "logits/chosen": -1.9323430061340332, "logits/rejected": -1.8155734539031982, "logps/chosen": -180.45553588867188, "logps/rejected": -281.2259521484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.350264072418213, "rewards/margins": 11.412347793579102, "rewards/rejected": -18.762611389160156, "step": 2679 }, { "epoch": 4.61, "learning_rate": 2.708244793880153e-07, "logits/chosen": -2.0724124908447266, "logits/rejected": -1.7244436740875244, "logps/chosen": -122.64201354980469, "logps/rejected": -227.89100646972656, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6504673957824707, "rewards/margins": 11.60792350769043, "rewards/rejected": -15.258390426635742, "step": 2680 }, { "epoch": 4.61, "learning_rate": 2.7071823204419884e-07, "logits/chosen": -1.7175345420837402, "logits/rejected": -2.1523256301879883, "logps/chosen": -110.00162506103516, "logps/rejected": -257.0853576660156, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -3.8627920150756836, "rewards/margins": 12.706103324890137, "rewards/rejected": -16.56889533996582, "step": 2681 }, { "epoch": 4.62, "learning_rate": 2.706119847003825e-07, "logits/chosen": -1.7751193046569824, "logits/rejected": -2.181997299194336, "logps/chosen": -188.1739959716797, "logps/rejected": -357.93206787109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -9.393355369567871, "rewards/margins": 14.883888244628906, "rewards/rejected": -24.27724266052246, "step": 2682 }, { "epoch": 4.62, "learning_rate": 2.705057373565661e-07, "logits/chosen": -1.9338061809539795, "logits/rejected": -1.868151068687439, "logps/chosen": -115.65789031982422, "logps/rejected": -241.38038635253906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.4620490074157715, "rewards/margins": 12.82327938079834, "rewards/rejected": -16.285327911376953, "step": 2683 }, { "epoch": 4.62, "learning_rate": 2.7039949001274963e-07, "logits/chosen": -1.7910120487213135, "logits/rejected": -2.069140911102295, "logps/chosen": -135.28627014160156, "logps/rejected": -276.98419189453125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.724132061004639, "rewards/margins": 12.556684494018555, "rewards/rejected": -18.28081703186035, "step": 2684 }, { "epoch": 4.62, "learning_rate": 2.702932426689333e-07, "logits/chosen": -2.085597038269043, "logits/rejected": -1.4925099611282349, "logps/chosen": -157.28280639648438, "logps/rejected": -221.33995056152344, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.470301628112793, "rewards/margins": 10.078617095947266, "rewards/rejected": -14.548917770385742, "step": 2685 }, { "epoch": 4.62, "learning_rate": 2.7018699532511683e-07, "logits/chosen": -2.00327205657959, "logits/rejected": -1.810571551322937, "logps/chosen": -138.6940460205078, "logps/rejected": -214.63568115234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.206053733825684, "rewards/margins": 8.102265357971191, "rewards/rejected": -13.308319091796875, "step": 2686 }, { "epoch": 4.62, "learning_rate": 2.700807479813005e-07, "logits/chosen": -2.1224653720855713, "logits/rejected": -2.0644869804382324, "logps/chosen": -153.91592407226562, "logps/rejected": -251.7987060546875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -5.570552825927734, "rewards/margins": 11.232833862304688, "rewards/rejected": -16.803386688232422, "step": 2687 }, { "epoch": 4.63, "learning_rate": 2.699745006374841e-07, "logits/chosen": -2.017825126647949, "logits/rejected": -2.246145248413086, "logps/chosen": -127.98795318603516, "logps/rejected": -281.0344543457031, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.870010852813721, "rewards/margins": 14.572784423828125, "rewards/rejected": -19.442794799804688, "step": 2688 }, { "epoch": 4.63, "learning_rate": 2.6986825329366763e-07, "logits/chosen": -2.3216376304626465, "logits/rejected": -1.9020456075668335, "logps/chosen": -140.7144775390625, "logps/rejected": -210.18763732910156, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.9285407066345215, "rewards/margins": 8.744670867919922, "rewards/rejected": -13.673212051391602, "step": 2689 }, { "epoch": 4.63, "learning_rate": 2.697620059498513e-07, "logits/chosen": -2.098694086074829, "logits/rejected": -1.6943978071212769, "logps/chosen": -177.23507690429688, "logps/rejected": -275.5085754394531, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.962427139282227, "rewards/margins": 11.8165864944458, "rewards/rejected": -18.77901268005371, "step": 2690 }, { "epoch": 4.63, "learning_rate": 2.6965575860603483e-07, "logits/chosen": -2.1199698448181152, "logits/rejected": -1.8486099243164062, "logps/chosen": -137.99801635742188, "logps/rejected": -266.0281982421875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.002151012420654, "rewards/margins": 13.224237442016602, "rewards/rejected": -17.226388931274414, "step": 2691 }, { "epoch": 4.63, "learning_rate": 2.695495112622184e-07, "logits/chosen": -1.983482003211975, "logits/rejected": -2.0732650756835938, "logps/chosen": -112.76824951171875, "logps/rejected": -251.02613830566406, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -3.2611210346221924, "rewards/margins": 12.714000701904297, "rewards/rejected": -15.975122451782227, "step": 2692 }, { "epoch": 4.64, "learning_rate": 2.694432639184021e-07, "logits/chosen": -2.094444990158081, "logits/rejected": -2.167579412460327, "logps/chosen": -144.47430419921875, "logps/rejected": -282.7381286621094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.0826263427734375, "rewards/margins": 11.117132186889648, "rewards/rejected": -17.199758529663086, "step": 2693 }, { "epoch": 4.64, "learning_rate": 2.693370165745856e-07, "logits/chosen": -2.0352749824523926, "logits/rejected": -1.9257760047912598, "logps/chosen": -120.05830383300781, "logps/rejected": -235.08029174804688, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -5.285333633422852, "rewards/margins": 11.18116569519043, "rewards/rejected": -16.46649932861328, "step": 2694 }, { "epoch": 4.64, "learning_rate": 2.692307692307692e-07, "logits/chosen": -2.032257318496704, "logits/rejected": -1.9449549913406372, "logps/chosen": -132.29440307617188, "logps/rejected": -256.7003479003906, "loss": 0.1161, "rewards/accuracies": 1.0, "rewards/chosen": -5.516889572143555, "rewards/margins": 12.489419937133789, "rewards/rejected": -18.006309509277344, "step": 2695 }, { "epoch": 4.64, "learning_rate": 2.691245218869528e-07, "logits/chosen": -2.0502610206604004, "logits/rejected": -1.8682622909545898, "logps/chosen": -157.78439331054688, "logps/rejected": -267.8739013671875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -7.6907758712768555, "rewards/margins": 12.541078567504883, "rewards/rejected": -20.231855392456055, "step": 2696 }, { "epoch": 4.64, "learning_rate": 2.690182745431364e-07, "logits/chosen": -1.9585853815078735, "logits/rejected": -1.7913618087768555, "logps/chosen": -113.22431182861328, "logps/rejected": -250.8793487548828, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.0684168338775635, "rewards/margins": 13.695778846740723, "rewards/rejected": -16.764196395874023, "step": 2697 }, { "epoch": 4.64, "learning_rate": 2.6891202719931997e-07, "logits/chosen": -2.1604537963867188, "logits/rejected": -1.679943561553955, "logps/chosen": -117.95375061035156, "logps/rejected": -228.25094604492188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.6476492881774902, "rewards/margins": 12.527129173278809, "rewards/rejected": -16.17477798461914, "step": 2698 }, { "epoch": 4.65, "learning_rate": 2.688057798555036e-07, "logits/chosen": -1.91403329372406, "logits/rejected": -1.835809588432312, "logps/chosen": -149.39166259765625, "logps/rejected": -247.31556701660156, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.4834136962890625, "rewards/margins": 10.195671081542969, "rewards/rejected": -15.679084777832031, "step": 2699 }, { "epoch": 4.65, "learning_rate": 2.686995325116872e-07, "logits/chosen": -1.604374885559082, "logits/rejected": -1.9968080520629883, "logps/chosen": -132.1942138671875, "logps/rejected": -275.20904541015625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.269494533538818, "rewards/margins": 13.304073333740234, "rewards/rejected": -18.57356834411621, "step": 2700 }, { "epoch": 4.65, "learning_rate": 2.6859328516787076e-07, "logits/chosen": -1.8985674381256104, "logits/rejected": -1.7037088871002197, "logps/chosen": -109.59492492675781, "logps/rejected": -234.79371643066406, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": -2.8696680068969727, "rewards/margins": 13.266040802001953, "rewards/rejected": -16.13570785522461, "step": 2701 }, { "epoch": 4.65, "learning_rate": 2.684870378240544e-07, "logits/chosen": -2.042890787124634, "logits/rejected": -2.180964946746826, "logps/chosen": -117.89765930175781, "logps/rejected": -275.45233154296875, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -4.897900104522705, "rewards/margins": 13.902064323425293, "rewards/rejected": -18.799964904785156, "step": 2702 }, { "epoch": 4.65, "learning_rate": 2.6838079048023796e-07, "logits/chosen": -1.7994234561920166, "logits/rejected": -2.029733896255493, "logps/chosen": -132.33364868164062, "logps/rejected": -264.4222717285156, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.220032215118408, "rewards/margins": 11.202005386352539, "rewards/rejected": -16.42203712463379, "step": 2703 }, { "epoch": 4.65, "learning_rate": 2.6827454313642156e-07, "logits/chosen": -2.118028163909912, "logits/rejected": -1.8303956985473633, "logps/chosen": -196.6674041748047, "logps/rejected": -299.01025390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.375628471374512, "rewards/margins": 11.35059928894043, "rewards/rejected": -20.726226806640625, "step": 2704 }, { "epoch": 4.66, "learning_rate": 2.681682957926052e-07, "logits/chosen": -1.9007747173309326, "logits/rejected": -2.2485299110412598, "logps/chosen": -162.16184997558594, "logps/rejected": -286.5069580078125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -6.061338424682617, "rewards/margins": 11.39250373840332, "rewards/rejected": -17.453842163085938, "step": 2705 }, { "epoch": 4.66, "learning_rate": 2.6806204844878876e-07, "logits/chosen": -2.1718459129333496, "logits/rejected": -2.2190725803375244, "logps/chosen": -134.18785095214844, "logps/rejected": -241.84353637695312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.269268989562988, "rewards/margins": 9.930248260498047, "rewards/rejected": -16.19951820373535, "step": 2706 }, { "epoch": 4.66, "learning_rate": 2.6795580110497236e-07, "logits/chosen": -1.921142339706421, "logits/rejected": -1.735482931137085, "logps/chosen": -137.93472290039062, "logps/rejected": -258.86712646484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.937943458557129, "rewards/margins": 12.200555801391602, "rewards/rejected": -18.138500213623047, "step": 2707 }, { "epoch": 4.66, "learning_rate": 2.6784955376115596e-07, "logits/chosen": -1.9555768966674805, "logits/rejected": -2.004854679107666, "logps/chosen": -139.69961547851562, "logps/rejected": -256.1828308105469, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -6.626825332641602, "rewards/margins": 9.42347526550293, "rewards/rejected": -16.05030059814453, "step": 2708 }, { "epoch": 4.66, "learning_rate": 2.6774330641733955e-07, "logits/chosen": -2.0827431678771973, "logits/rejected": -2.1409530639648438, "logps/chosen": -166.68048095703125, "logps/rejected": -300.0635986328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.113890171051025, "rewards/margins": 13.527335166931152, "rewards/rejected": -19.641223907470703, "step": 2709 }, { "epoch": 4.66, "learning_rate": 2.6763705907352315e-07, "logits/chosen": -2.2175698280334473, "logits/rejected": -1.9882854223251343, "logps/chosen": -171.5674591064453, "logps/rejected": -274.07452392578125, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -6.715054988861084, "rewards/margins": 11.552715301513672, "rewards/rejected": -18.267770767211914, "step": 2710 }, { "epoch": 4.67, "learning_rate": 2.6753081172970675e-07, "logits/chosen": -1.8239725828170776, "logits/rejected": -2.0241427421569824, "logps/chosen": -159.6952667236328, "logps/rejected": -317.1060485839844, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.492520332336426, "rewards/margins": 14.743925094604492, "rewards/rejected": -21.236446380615234, "step": 2711 }, { "epoch": 4.67, "learning_rate": 2.6742456438589035e-07, "logits/chosen": -2.0624256134033203, "logits/rejected": -1.935379981994629, "logps/chosen": -161.69146728515625, "logps/rejected": -248.91204833984375, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -7.176948547363281, "rewards/margins": 9.974638938903809, "rewards/rejected": -17.151586532592773, "step": 2712 }, { "epoch": 4.67, "learning_rate": 2.673183170420739e-07, "logits/chosen": -1.9444416761398315, "logits/rejected": -2.1463499069213867, "logps/chosen": -114.17522430419922, "logps/rejected": -246.46844482421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.27266788482666, "rewards/margins": 12.076772689819336, "rewards/rejected": -16.349441528320312, "step": 2713 }, { "epoch": 4.67, "learning_rate": 2.6721206969825755e-07, "logits/chosen": -2.017382860183716, "logits/rejected": -2.218074321746826, "logps/chosen": -162.890625, "logps/rejected": -264.2354736328125, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -6.990757942199707, "rewards/margins": 10.769559860229492, "rewards/rejected": -17.760318756103516, "step": 2714 }, { "epoch": 4.67, "learning_rate": 2.6710582235444115e-07, "logits/chosen": -1.947066307067871, "logits/rejected": -1.8658524751663208, "logps/chosen": -147.0270538330078, "logps/rejected": -255.27325439453125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -6.641728401184082, "rewards/margins": 10.8551025390625, "rewards/rejected": -17.4968318939209, "step": 2715 }, { "epoch": 4.67, "learning_rate": 2.669995750106247e-07, "logits/chosen": -2.175767421722412, "logits/rejected": -1.721675992012024, "logps/chosen": -139.5303497314453, "logps/rejected": -234.81265258789062, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -6.1391472816467285, "rewards/margins": 10.220181465148926, "rewards/rejected": -16.359329223632812, "step": 2716 }, { "epoch": 4.68, "learning_rate": 2.6689332766680835e-07, "logits/chosen": -2.0077056884765625, "logits/rejected": -2.122889280319214, "logps/chosen": -135.3450469970703, "logps/rejected": -241.23068237304688, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -5.088562488555908, "rewards/margins": 9.692822456359863, "rewards/rejected": -14.78138542175293, "step": 2717 }, { "epoch": 4.68, "learning_rate": 2.667870803229919e-07, "logits/chosen": -2.094265937805176, "logits/rejected": -2.058610439300537, "logps/chosen": -156.3363037109375, "logps/rejected": -251.42567443847656, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -8.906691551208496, "rewards/margins": 8.957745552062988, "rewards/rejected": -17.864437103271484, "step": 2718 }, { "epoch": 4.68, "learning_rate": 2.666808329791755e-07, "logits/chosen": -2.1050822734832764, "logits/rejected": -1.7235426902770996, "logps/chosen": -107.86126708984375, "logps/rejected": -194.93826293945312, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.9892642498016357, "rewards/margins": 9.727193832397461, "rewards/rejected": -12.71645736694336, "step": 2719 }, { "epoch": 4.68, "learning_rate": 2.6657458563535914e-07, "logits/chosen": -2.191770553588867, "logits/rejected": -1.7043834924697876, "logps/chosen": -176.43035888671875, "logps/rejected": -267.96234130859375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -7.0772905349731445, "rewards/margins": 10.845283508300781, "rewards/rejected": -17.92257308959961, "step": 2720 }, { "epoch": 4.68, "learning_rate": 2.664683382915427e-07, "logits/chosen": -1.488320231437683, "logits/rejected": -2.021864891052246, "logps/chosen": -112.09660339355469, "logps/rejected": -307.61126708984375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -4.151058673858643, "rewards/margins": 16.968769073486328, "rewards/rejected": -21.119829177856445, "step": 2721 }, { "epoch": 4.69, "learning_rate": 2.663620909477263e-07, "logits/chosen": -2.0430173873901367, "logits/rejected": -1.9516823291778564, "logps/chosen": -122.84918212890625, "logps/rejected": -238.94544982910156, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.522947788238525, "rewards/margins": 11.676889419555664, "rewards/rejected": -17.19983673095703, "step": 2722 }, { "epoch": 4.69, "learning_rate": 2.662558436039099e-07, "logits/chosen": -1.9030500650405884, "logits/rejected": -2.11923885345459, "logps/chosen": -151.55230712890625, "logps/rejected": -267.28656005859375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.396944046020508, "rewards/margins": 10.651018142700195, "rewards/rejected": -17.047962188720703, "step": 2723 }, { "epoch": 4.69, "learning_rate": 2.661495962600935e-07, "logits/chosen": -1.9152920246124268, "logits/rejected": -1.9578351974487305, "logps/chosen": -153.6429443359375, "logps/rejected": -295.48004150390625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.04555606842041, "rewards/margins": 12.981534957885742, "rewards/rejected": -18.02709197998047, "step": 2724 }, { "epoch": 4.69, "learning_rate": 2.6604334891627703e-07, "logits/chosen": -1.8299295902252197, "logits/rejected": -2.194045066833496, "logps/chosen": -119.12879943847656, "logps/rejected": -267.7515563964844, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": -4.733334064483643, "rewards/margins": 12.916604042053223, "rewards/rejected": -17.649938583374023, "step": 2725 }, { "epoch": 4.69, "learning_rate": 2.659371015724607e-07, "logits/chosen": -2.0051326751708984, "logits/rejected": -1.9942398071289062, "logps/chosen": -135.48968505859375, "logps/rejected": -230.5699920654297, "loss": 0.0687, "rewards/accuracies": 0.75, "rewards/chosen": -6.753681659698486, "rewards/margins": 8.674722671508789, "rewards/rejected": -15.428404808044434, "step": 2726 }, { "epoch": 4.69, "learning_rate": 2.658308542286443e-07, "logits/chosen": -1.8825273513793945, "logits/rejected": -1.96242094039917, "logps/chosen": -168.678466796875, "logps/rejected": -290.93914794921875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.220557689666748, "rewards/margins": 11.497099876403809, "rewards/rejected": -17.7176570892334, "step": 2727 }, { "epoch": 4.7, "learning_rate": 2.6572460688482783e-07, "logits/chosen": -2.098341464996338, "logits/rejected": -2.182033061981201, "logps/chosen": -197.15907287597656, "logps/rejected": -304.1241455078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -10.52858829498291, "rewards/margins": 10.682010650634766, "rewards/rejected": -21.21059799194336, "step": 2728 }, { "epoch": 4.7, "learning_rate": 2.656183595410115e-07, "logits/chosen": -2.0189249515533447, "logits/rejected": -1.6602256298065186, "logps/chosen": -178.3004913330078, "logps/rejected": -309.05816650390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.649147987365723, "rewards/margins": 14.130228996276855, "rewards/rejected": -22.779375076293945, "step": 2729 }, { "epoch": 4.7, "learning_rate": 2.65512112197195e-07, "logits/chosen": -2.1054513454437256, "logits/rejected": -1.6291478872299194, "logps/chosen": -116.09979248046875, "logps/rejected": -274.2624816894531, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.781543254852295, "rewards/margins": 15.377908706665039, "rewards/rejected": -19.15945053100586, "step": 2730 }, { "epoch": 4.7, "learning_rate": 2.654058648533787e-07, "logits/chosen": -2.1165833473205566, "logits/rejected": -1.7340750694274902, "logps/chosen": -118.69549560546875, "logps/rejected": -275.86968994140625, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": -3.993239402770996, "rewards/margins": 15.9363374710083, "rewards/rejected": -19.929576873779297, "step": 2731 }, { "epoch": 4.7, "learning_rate": 2.652996175095623e-07, "logits/chosen": -2.142580986022949, "logits/rejected": -1.991262435913086, "logps/chosen": -167.8447265625, "logps/rejected": -276.47650146484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.017828464508057, "rewards/margins": 12.868526458740234, "rewards/rejected": -18.8863525390625, "step": 2732 }, { "epoch": 4.7, "learning_rate": 2.651933701657458e-07, "logits/chosen": -2.072005033493042, "logits/rejected": -1.8940303325653076, "logps/chosen": -147.42176818847656, "logps/rejected": -238.70960998535156, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.739014625549316, "rewards/margins": 8.623407363891602, "rewards/rejected": -14.362421989440918, "step": 2733 }, { "epoch": 4.71, "learning_rate": 2.650871228219295e-07, "logits/chosen": -2.281196117401123, "logits/rejected": -1.3632296323776245, "logps/chosen": -127.85359191894531, "logps/rejected": -196.17605590820312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.1629419326782227, "rewards/margins": 10.441896438598633, "rewards/rejected": -13.604839324951172, "step": 2734 }, { "epoch": 4.71, "learning_rate": 2.64980875478113e-07, "logits/chosen": -2.1913833618164062, "logits/rejected": -2.2548880577087402, "logps/chosen": -139.09414672851562, "logps/rejected": -292.57275390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.693967342376709, "rewards/margins": 15.207712173461914, "rewards/rejected": -20.90167999267578, "step": 2735 }, { "epoch": 4.71, "learning_rate": 2.648746281342966e-07, "logits/chosen": -2.0972352027893066, "logits/rejected": -2.203113079071045, "logps/chosen": -73.3901596069336, "logps/rejected": -249.60263061523438, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.0981837511062622, "rewards/margins": 16.742389678955078, "rewards/rejected": -17.840574264526367, "step": 2736 }, { "epoch": 4.71, "learning_rate": 2.6476838079048027e-07, "logits/chosen": -2.0689079761505127, "logits/rejected": -2.048642158508301, "logps/chosen": -113.31959533691406, "logps/rejected": -231.97625732421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.639585494995117, "rewards/margins": 11.547872543334961, "rewards/rejected": -14.187458992004395, "step": 2737 }, { "epoch": 4.71, "learning_rate": 2.646621334466638e-07, "logits/chosen": -1.8765374422073364, "logits/rejected": -1.8988280296325684, "logps/chosen": -129.074951171875, "logps/rejected": -234.21820068359375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.436221122741699, "rewards/margins": 11.763257026672363, "rewards/rejected": -17.19947624206543, "step": 2738 }, { "epoch": 4.71, "learning_rate": 2.645558861028474e-07, "logits/chosen": -2.124211311340332, "logits/rejected": -1.9732224941253662, "logps/chosen": -167.3634033203125, "logps/rejected": -268.34014892578125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.8171234130859375, "rewards/margins": 10.650843620300293, "rewards/rejected": -18.467967987060547, "step": 2739 }, { "epoch": 4.72, "learning_rate": 2.64449638759031e-07, "logits/chosen": -1.8413599729537964, "logits/rejected": -2.284669876098633, "logps/chosen": -113.2149429321289, "logps/rejected": -246.0528106689453, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -3.749051570892334, "rewards/margins": 11.436655044555664, "rewards/rejected": -15.185707092285156, "step": 2740 }, { "epoch": 4.72, "learning_rate": 2.643433914152146e-07, "logits/chosen": -1.963750958442688, "logits/rejected": -1.8855690956115723, "logps/chosen": -141.91546630859375, "logps/rejected": -214.380126953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.291935443878174, "rewards/margins": 9.730710983276367, "rewards/rejected": -14.022645950317383, "step": 2741 }, { "epoch": 4.72, "learning_rate": 2.642371440713982e-07, "logits/chosen": -1.960123062133789, "logits/rejected": -1.9745564460754395, "logps/chosen": -178.8067626953125, "logps/rejected": -260.0921936035156, "loss": 0.2273, "rewards/accuracies": 0.75, "rewards/chosen": -6.575587272644043, "rewards/margins": 9.062186241149902, "rewards/rejected": -15.637773513793945, "step": 2742 }, { "epoch": 4.72, "learning_rate": 2.641308967275818e-07, "logits/chosen": -1.9939631223678589, "logits/rejected": -2.135561227798462, "logps/chosen": -157.7957763671875, "logps/rejected": -260.5682067871094, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.267598628997803, "rewards/margins": 10.240945816040039, "rewards/rejected": -16.508546829223633, "step": 2743 }, { "epoch": 4.72, "learning_rate": 2.640246493837654e-07, "logits/chosen": -1.813794732093811, "logits/rejected": -1.872603416442871, "logps/chosen": -147.02117919921875, "logps/rejected": -316.3665466308594, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.358736038208008, "rewards/margins": 15.888021469116211, "rewards/rejected": -23.24675750732422, "step": 2744 }, { "epoch": 4.72, "learning_rate": 2.6391840203994896e-07, "logits/chosen": -2.0545058250427246, "logits/rejected": -2.1741061210632324, "logps/chosen": -103.67294311523438, "logps/rejected": -253.32925415039062, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.0252256393432617, "rewards/margins": 13.821043968200684, "rewards/rejected": -16.846269607543945, "step": 2745 }, { "epoch": 4.73, "learning_rate": 2.638121546961326e-07, "logits/chosen": -2.2682154178619385, "logits/rejected": -1.5584452152252197, "logps/chosen": -143.2244873046875, "logps/rejected": -207.91587829589844, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -4.211037635803223, "rewards/margins": 9.652920722961426, "rewards/rejected": -13.863957405090332, "step": 2746 }, { "epoch": 4.73, "learning_rate": 2.637059073523162e-07, "logits/chosen": -2.125523090362549, "logits/rejected": -2.1508421897888184, "logps/chosen": -137.0603485107422, "logps/rejected": -204.54412841796875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.271374225616455, "rewards/margins": 9.201873779296875, "rewards/rejected": -14.473247528076172, "step": 2747 }, { "epoch": 4.73, "learning_rate": 2.6359966000849975e-07, "logits/chosen": -2.268603563308716, "logits/rejected": -2.0291929244995117, "logps/chosen": -157.38282775878906, "logps/rejected": -288.63848876953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.2151899337768555, "rewards/margins": 12.403108596801758, "rewards/rejected": -18.61829948425293, "step": 2748 }, { "epoch": 4.73, "learning_rate": 2.634934126646834e-07, "logits/chosen": -2.0968198776245117, "logits/rejected": -1.9446521997451782, "logps/chosen": -129.41696166992188, "logps/rejected": -211.99765014648438, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.438839912414551, "rewards/margins": 9.09351921081543, "rewards/rejected": -13.532360076904297, "step": 2749 }, { "epoch": 4.73, "learning_rate": 2.6338716532086695e-07, "logits/chosen": -2.2058167457580566, "logits/rejected": -2.071108102798462, "logps/chosen": -170.08267211914062, "logps/rejected": -250.7501220703125, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -5.3523736000061035, "rewards/margins": 10.229076385498047, "rewards/rejected": -15.581449508666992, "step": 2750 }, { "epoch": 4.73, "learning_rate": 2.6328091797705055e-07, "logits/chosen": -1.9789916276931763, "logits/rejected": -1.8967697620391846, "logps/chosen": -142.66465759277344, "logps/rejected": -273.8955078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.136981964111328, "rewards/margins": 13.247491836547852, "rewards/rejected": -19.384471893310547, "step": 2751 }, { "epoch": 4.74, "learning_rate": 2.631746706332342e-07, "logits/chosen": -1.9032548666000366, "logits/rejected": -1.8234105110168457, "logps/chosen": -140.16360473632812, "logps/rejected": -259.494140625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.052398681640625, "rewards/margins": 14.23966121673584, "rewards/rejected": -19.29206085205078, "step": 2752 }, { "epoch": 4.74, "learning_rate": 2.6306842328941775e-07, "logits/chosen": -1.990303874015808, "logits/rejected": -2.081111431121826, "logps/chosen": -171.96969604492188, "logps/rejected": -266.20751953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.626532077789307, "rewards/margins": 9.489567756652832, "rewards/rejected": -17.116100311279297, "step": 2753 }, { "epoch": 4.74, "learning_rate": 2.6296217594560135e-07, "logits/chosen": -1.8623963594436646, "logits/rejected": -2.1620876789093018, "logps/chosen": -140.62570190429688, "logps/rejected": -277.5297546386719, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.88062858581543, "rewards/margins": 11.529224395751953, "rewards/rejected": -17.409854888916016, "step": 2754 }, { "epoch": 4.74, "learning_rate": 2.6285592860178495e-07, "logits/chosen": -1.8459184169769287, "logits/rejected": -2.0135931968688965, "logps/chosen": -116.1392822265625, "logps/rejected": -269.173095703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.586890697479248, "rewards/margins": 14.293731689453125, "rewards/rejected": -17.88062286376953, "step": 2755 }, { "epoch": 4.74, "learning_rate": 2.6274968125796854e-07, "logits/chosen": -1.9034831523895264, "logits/rejected": -2.0559582710266113, "logps/chosen": -105.65335845947266, "logps/rejected": -238.40586853027344, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.259183645248413, "rewards/margins": 13.042591094970703, "rewards/rejected": -16.301774978637695, "step": 2756 }, { "epoch": 4.75, "learning_rate": 2.626434339141521e-07, "logits/chosen": -2.0875840187072754, "logits/rejected": -1.9501094818115234, "logps/chosen": -162.94418334960938, "logps/rejected": -244.11688232421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.831872940063477, "rewards/margins": 8.893529891967773, "rewards/rejected": -17.72540283203125, "step": 2757 }, { "epoch": 4.75, "learning_rate": 2.6253718657033574e-07, "logits/chosen": -2.039463758468628, "logits/rejected": -2.286820411682129, "logps/chosen": -121.4288101196289, "logps/rejected": -238.05758666992188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.038440704345703, "rewards/margins": 12.455324172973633, "rewards/rejected": -15.493765830993652, "step": 2758 }, { "epoch": 4.75, "learning_rate": 2.6243093922651934e-07, "logits/chosen": -1.8773998022079468, "logits/rejected": -2.1692380905151367, "logps/chosen": -118.05982971191406, "logps/rejected": -256.933349609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.197602272033691, "rewards/margins": 14.315399169921875, "rewards/rejected": -18.513002395629883, "step": 2759 }, { "epoch": 4.75, "learning_rate": 2.623246918827029e-07, "logits/chosen": -2.043320655822754, "logits/rejected": -2.132518768310547, "logps/chosen": -153.15626525878906, "logps/rejected": -289.173095703125, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -5.454593658447266, "rewards/margins": 12.816312789916992, "rewards/rejected": -18.27090835571289, "step": 2760 }, { "epoch": 4.75, "learning_rate": 2.6221844453888654e-07, "logits/chosen": -1.703155517578125, "logits/rejected": -2.0736851692199707, "logps/chosen": -174.51730346679688, "logps/rejected": -276.0461730957031, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -8.070779800415039, "rewards/margins": 10.291958808898926, "rewards/rejected": -18.36273956298828, "step": 2761 }, { "epoch": 4.75, "learning_rate": 2.621121971950701e-07, "logits/chosen": -1.9532612562179565, "logits/rejected": -1.7607524394989014, "logps/chosen": -139.28350830078125, "logps/rejected": -248.22265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.342388153076172, "rewards/margins": 12.718644142150879, "rewards/rejected": -18.061033248901367, "step": 2762 }, { "epoch": 4.76, "learning_rate": 2.620059498512537e-07, "logits/chosen": -2.004607677459717, "logits/rejected": -2.0286197662353516, "logps/chosen": -179.5618896484375, "logps/rejected": -253.30133056640625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -8.195525169372559, "rewards/margins": 8.7904634475708, "rewards/rejected": -16.98598861694336, "step": 2763 }, { "epoch": 4.76, "learning_rate": 2.6189970250743734e-07, "logits/chosen": -2.1734695434570312, "logits/rejected": -2.1488513946533203, "logps/chosen": -125.34613037109375, "logps/rejected": -264.8323059082031, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.645326137542725, "rewards/margins": 13.194375991821289, "rewards/rejected": -17.839702606201172, "step": 2764 }, { "epoch": 4.76, "learning_rate": 2.617934551636209e-07, "logits/chosen": -1.9955408573150635, "logits/rejected": -1.996778964996338, "logps/chosen": -147.9451446533203, "logps/rejected": -289.92059326171875, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -6.823573112487793, "rewards/margins": 13.79319953918457, "rewards/rejected": -20.61677360534668, "step": 2765 }, { "epoch": 4.76, "learning_rate": 2.616872078198045e-07, "logits/chosen": -1.760658621788025, "logits/rejected": -2.2887563705444336, "logps/chosen": -130.86834716796875, "logps/rejected": -245.02755737304688, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.960172653198242, "rewards/margins": 10.758699417114258, "rewards/rejected": -15.7188720703125, "step": 2766 }, { "epoch": 4.76, "learning_rate": 2.615809604759881e-07, "logits/chosen": -1.9615728855133057, "logits/rejected": -1.9732791185379028, "logps/chosen": -158.97225952148438, "logps/rejected": -257.2919921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.967270374298096, "rewards/margins": 10.210997581481934, "rewards/rejected": -17.178268432617188, "step": 2767 }, { "epoch": 4.76, "learning_rate": 2.614747131321717e-07, "logits/chosen": -1.588073968887329, "logits/rejected": -2.0637459754943848, "logps/chosen": -130.08712768554688, "logps/rejected": -265.19659423828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.352591514587402, "rewards/margins": 12.418601989746094, "rewards/rejected": -16.771194458007812, "step": 2768 }, { "epoch": 4.77, "learning_rate": 2.613684657883553e-07, "logits/chosen": -2.0072269439697266, "logits/rejected": -2.2451794147491455, "logps/chosen": -111.57438659667969, "logps/rejected": -265.9734191894531, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.7540669441223145, "rewards/margins": 14.2536039352417, "rewards/rejected": -17.007671356201172, "step": 2769 }, { "epoch": 4.77, "learning_rate": 2.612622184445389e-07, "logits/chosen": -1.939496636390686, "logits/rejected": -1.7700679302215576, "logps/chosen": -124.17092895507812, "logps/rejected": -222.18666076660156, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.0882415771484375, "rewards/margins": 11.138937950134277, "rewards/rejected": -15.227179527282715, "step": 2770 }, { "epoch": 4.77, "learning_rate": 2.611559711007225e-07, "logits/chosen": -2.082730770111084, "logits/rejected": -1.8594623804092407, "logps/chosen": -152.27685546875, "logps/rejected": -250.07131958007812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.2807536125183105, "rewards/margins": 10.787052154541016, "rewards/rejected": -17.067806243896484, "step": 2771 }, { "epoch": 4.77, "learning_rate": 2.6104972375690607e-07, "logits/chosen": -2.0307729244232178, "logits/rejected": -2.127312660217285, "logps/chosen": -174.57302856445312, "logps/rejected": -295.33123779296875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.493841171264648, "rewards/margins": 12.429780960083008, "rewards/rejected": -18.923622131347656, "step": 2772 }, { "epoch": 4.77, "learning_rate": 2.6094347641308967e-07, "logits/chosen": -1.8957070112228394, "logits/rejected": -1.815651774406433, "logps/chosen": -139.86595153808594, "logps/rejected": -213.86355590820312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.41713285446167, "rewards/margins": 5.713579177856445, "rewards/rejected": -12.130712509155273, "step": 2773 }, { "epoch": 4.77, "learning_rate": 2.6083722906927327e-07, "logits/chosen": -1.8671848773956299, "logits/rejected": -2.0833630561828613, "logps/chosen": -136.7374267578125, "logps/rejected": -260.20111083984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.262881755828857, "rewards/margins": 11.70418930053711, "rewards/rejected": -16.967071533203125, "step": 2774 }, { "epoch": 4.78, "learning_rate": 2.6073098172545687e-07, "logits/chosen": -2.069349765777588, "logits/rejected": -2.103484869003296, "logps/chosen": -162.357421875, "logps/rejected": -267.8282470703125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -6.804072856903076, "rewards/margins": 10.798186302185059, "rewards/rejected": -17.602258682250977, "step": 2775 }, { "epoch": 4.78, "learning_rate": 2.6062473438164047e-07, "logits/chosen": -1.7814745903015137, "logits/rejected": -2.0362255573272705, "logps/chosen": -149.23825073242188, "logps/rejected": -290.5816345214844, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.4613471031188965, "rewards/margins": 13.368087768554688, "rewards/rejected": -19.82943344116211, "step": 2776 }, { "epoch": 4.78, "learning_rate": 2.60518487037824e-07, "logits/chosen": -2.2602996826171875, "logits/rejected": -2.073729991912842, "logps/chosen": -168.67303466796875, "logps/rejected": -257.79779052734375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -6.961647987365723, "rewards/margins": 11.023825645446777, "rewards/rejected": -17.9854736328125, "step": 2777 }, { "epoch": 4.78, "learning_rate": 2.6041223969400767e-07, "logits/chosen": -2.009329319000244, "logits/rejected": -2.0889320373535156, "logps/chosen": -164.15240478515625, "logps/rejected": -249.71022033691406, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -8.531913757324219, "rewards/margins": 8.837104797363281, "rewards/rejected": -17.3690185546875, "step": 2778 }, { "epoch": 4.78, "learning_rate": 2.603059923501912e-07, "logits/chosen": -1.9278721809387207, "logits/rejected": -1.7827560901641846, "logps/chosen": -139.6308135986328, "logps/rejected": -274.786865234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.750186443328857, "rewards/margins": 13.685895919799805, "rewards/rejected": -19.43608283996582, "step": 2779 }, { "epoch": 4.78, "learning_rate": 2.601997450063748e-07, "logits/chosen": -2.0539352893829346, "logits/rejected": -1.8573635816574097, "logps/chosen": -127.06146240234375, "logps/rejected": -224.51162719726562, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.1040780544281006, "rewards/margins": 11.096953392028809, "rewards/rejected": -14.201031684875488, "step": 2780 }, { "epoch": 4.79, "learning_rate": 2.6009349766255846e-07, "logits/chosen": -1.9488669633865356, "logits/rejected": -1.8903374671936035, "logps/chosen": -152.12249755859375, "logps/rejected": -232.28330993652344, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.860742092132568, "rewards/margins": 9.085041046142578, "rewards/rejected": -15.945783615112305, "step": 2781 }, { "epoch": 4.79, "learning_rate": 2.59987250318742e-07, "logits/chosen": -1.4828815460205078, "logits/rejected": -2.096580982208252, "logps/chosen": -138.32485961914062, "logps/rejected": -262.7971496582031, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.821523189544678, "rewards/margins": 11.367725372314453, "rewards/rejected": -17.189247131347656, "step": 2782 }, { "epoch": 4.79, "learning_rate": 2.598810029749256e-07, "logits/chosen": -1.8626960515975952, "logits/rejected": -2.0621113777160645, "logps/chosen": -136.6329803466797, "logps/rejected": -280.5402526855469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.633668422698975, "rewards/margins": 12.41102409362793, "rewards/rejected": -19.044692993164062, "step": 2783 }, { "epoch": 4.79, "learning_rate": 2.597747556311092e-07, "logits/chosen": -1.6983987092971802, "logits/rejected": -1.8839902877807617, "logps/chosen": -158.58486938476562, "logps/rejected": -312.5716857910156, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.083805084228516, "rewards/margins": 14.642013549804688, "rewards/rejected": -21.725818634033203, "step": 2784 }, { "epoch": 4.79, "learning_rate": 2.596685082872928e-07, "logits/chosen": -1.745854139328003, "logits/rejected": -2.2345376014709473, "logps/chosen": -108.85995483398438, "logps/rejected": -244.35678100585938, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.6887388229370117, "rewards/margins": 12.997108459472656, "rewards/rejected": -15.685846328735352, "step": 2785 }, { "epoch": 4.8, "learning_rate": 2.595622609434764e-07, "logits/chosen": -1.8926599025726318, "logits/rejected": -2.0133109092712402, "logps/chosen": -109.59996795654297, "logps/rejected": -238.41806030273438, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.502894401550293, "rewards/margins": 12.271202087402344, "rewards/rejected": -16.774097442626953, "step": 2786 }, { "epoch": 4.8, "learning_rate": 2.5945601359966e-07, "logits/chosen": -1.4792169332504272, "logits/rejected": -2.2304069995880127, "logps/chosen": -134.0445098876953, "logps/rejected": -312.57196044921875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -4.6611552238464355, "rewards/margins": 14.441551208496094, "rewards/rejected": -19.102706909179688, "step": 2787 }, { "epoch": 4.8, "learning_rate": 2.593497662558436e-07, "logits/chosen": -1.9200639724731445, "logits/rejected": -1.9324119091033936, "logps/chosen": -120.280517578125, "logps/rejected": -208.52920532226562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.7159814834594727, "rewards/margins": 8.300384521484375, "rewards/rejected": -12.016366004943848, "step": 2788 }, { "epoch": 4.8, "learning_rate": 2.5924351891202715e-07, "logits/chosen": -1.2807223796844482, "logits/rejected": -2.172734022140503, "logps/chosen": -97.38246154785156, "logps/rejected": -262.2222595214844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.7548773288726807, "rewards/margins": 13.315546989440918, "rewards/rejected": -16.070425033569336, "step": 2789 }, { "epoch": 4.8, "learning_rate": 2.591372715682108e-07, "logits/chosen": -1.781484842300415, "logits/rejected": -1.9290566444396973, "logps/chosen": -101.39772033691406, "logps/rejected": -276.4649658203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.6011738777160645, "rewards/margins": 15.930739402770996, "rewards/rejected": -20.53191375732422, "step": 2790 }, { "epoch": 4.8, "learning_rate": 2.590310242243944e-07, "logits/chosen": -1.7982654571533203, "logits/rejected": -2.1049530506134033, "logps/chosen": -151.06900024414062, "logps/rejected": -295.0009765625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -6.304581642150879, "rewards/margins": 13.17203426361084, "rewards/rejected": -19.47661590576172, "step": 2791 }, { "epoch": 4.81, "learning_rate": 2.5892477688057795e-07, "logits/chosen": -1.6129655838012695, "logits/rejected": -1.980439305305481, "logps/chosen": -150.23365783691406, "logps/rejected": -300.994384765625, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -6.876035690307617, "rewards/margins": 13.598037719726562, "rewards/rejected": -20.47407341003418, "step": 2792 }, { "epoch": 4.81, "learning_rate": 2.588185295367616e-07, "logits/chosen": -1.9317631721496582, "logits/rejected": -1.9101085662841797, "logps/chosen": -154.2973175048828, "logps/rejected": -259.8447570800781, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -7.18361759185791, "rewards/margins": 9.421655654907227, "rewards/rejected": -16.605274200439453, "step": 2793 }, { "epoch": 4.81, "learning_rate": 2.5871228219294514e-07, "logits/chosen": -1.992821216583252, "logits/rejected": -1.5723789930343628, "logps/chosen": -187.62527465820312, "logps/rejected": -288.4224853515625, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -8.446338653564453, "rewards/margins": 12.166128158569336, "rewards/rejected": -20.612464904785156, "step": 2794 }, { "epoch": 4.81, "learning_rate": 2.5860603484912874e-07, "logits/chosen": -2.0618419647216797, "logits/rejected": -1.996510624885559, "logps/chosen": -130.35623168945312, "logps/rejected": -288.9126281738281, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -4.359704971313477, "rewards/margins": 16.51921844482422, "rewards/rejected": -20.878921508789062, "step": 2795 }, { "epoch": 4.81, "learning_rate": 2.584997875053124e-07, "logits/chosen": -1.8691083192825317, "logits/rejected": -1.9893670082092285, "logps/chosen": -148.0125732421875, "logps/rejected": -258.08123779296875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -7.132524013519287, "rewards/margins": 9.878376007080078, "rewards/rejected": -17.010900497436523, "step": 2796 }, { "epoch": 4.81, "learning_rate": 2.5839354016149594e-07, "logits/chosen": -2.015641689300537, "logits/rejected": -1.675333857536316, "logps/chosen": -178.89382934570312, "logps/rejected": -292.0050048828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.661238670349121, "rewards/margins": 12.47165584564209, "rewards/rejected": -21.13289451599121, "step": 2797 }, { "epoch": 4.82, "learning_rate": 2.5828729281767954e-07, "logits/chosen": -1.8438360691070557, "logits/rejected": -1.9742827415466309, "logps/chosen": -130.27297973632812, "logps/rejected": -246.90347290039062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.222330570220947, "rewards/margins": 10.514902114868164, "rewards/rejected": -16.737232208251953, "step": 2798 }, { "epoch": 4.82, "learning_rate": 2.5818104547386314e-07, "logits/chosen": -2.025806188583374, "logits/rejected": -1.5576541423797607, "logps/chosen": -176.17234802246094, "logps/rejected": -263.5052185058594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.292598724365234, "rewards/margins": 11.160362243652344, "rewards/rejected": -19.452960968017578, "step": 2799 }, { "epoch": 4.82, "learning_rate": 2.5807479813004674e-07, "logits/chosen": -2.010802984237671, "logits/rejected": -1.9517221450805664, "logps/chosen": -102.79820251464844, "logps/rejected": -251.3988800048828, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.286846876144409, "rewards/margins": 15.305426597595215, "rewards/rejected": -17.592273712158203, "step": 2800 }, { "epoch": 4.82, "learning_rate": 2.5796855078623034e-07, "logits/chosen": -1.6971564292907715, "logits/rejected": -1.7007620334625244, "logps/chosen": -134.01426696777344, "logps/rejected": -228.46803283691406, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.9831695556640625, "rewards/margins": 9.4744234085083, "rewards/rejected": -15.457592010498047, "step": 2801 }, { "epoch": 4.82, "learning_rate": 2.5786230344241393e-07, "logits/chosen": -1.8373862504959106, "logits/rejected": -1.9727144241333008, "logps/chosen": -119.60006713867188, "logps/rejected": -272.3977966308594, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.345320224761963, "rewards/margins": 13.645305633544922, "rewards/rejected": -17.990625381469727, "step": 2802 }, { "epoch": 4.82, "learning_rate": 2.5775605609859753e-07, "logits/chosen": -2.0052337646484375, "logits/rejected": -1.7752946615219116, "logps/chosen": -137.9716339111328, "logps/rejected": -279.7607421875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.228633403778076, "rewards/margins": 15.127483367919922, "rewards/rejected": -19.356115341186523, "step": 2803 }, { "epoch": 4.83, "learning_rate": 2.576498087547811e-07, "logits/chosen": -2.007903575897217, "logits/rejected": -1.9272328615188599, "logps/chosen": -134.17677307128906, "logps/rejected": -300.646484375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -4.622711658477783, "rewards/margins": 15.929536819458008, "rewards/rejected": -20.552248001098633, "step": 2804 }, { "epoch": 4.83, "learning_rate": 2.5754356141096473e-07, "logits/chosen": -2.0496578216552734, "logits/rejected": -1.8573524951934814, "logps/chosen": -159.89910888671875, "logps/rejected": -283.63604736328125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -5.918145656585693, "rewards/margins": 13.063776016235352, "rewards/rejected": -18.981922149658203, "step": 2805 }, { "epoch": 4.83, "learning_rate": 2.574373140671483e-07, "logits/chosen": -1.7770429849624634, "logits/rejected": -1.8808735609054565, "logps/chosen": -120.96556854248047, "logps/rejected": -310.7790832519531, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.267009735107422, "rewards/margins": 18.0519962310791, "rewards/rejected": -22.319005966186523, "step": 2806 }, { "epoch": 4.83, "learning_rate": 2.573310667233319e-07, "logits/chosen": -1.7284257411956787, "logits/rejected": -1.9039827585220337, "logps/chosen": -166.932861328125, "logps/rejected": -303.89178466796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.735700607299805, "rewards/margins": 14.029167175292969, "rewards/rejected": -21.764867782592773, "step": 2807 }, { "epoch": 4.83, "learning_rate": 2.5722481937951553e-07, "logits/chosen": -1.8695017099380493, "logits/rejected": -1.9100704193115234, "logps/chosen": -129.79237365722656, "logps/rejected": -217.6770782470703, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -4.523972034454346, "rewards/margins": 8.685113906860352, "rewards/rejected": -13.209085464477539, "step": 2808 }, { "epoch": 4.83, "learning_rate": 2.571185720356991e-07, "logits/chosen": -1.745563268661499, "logits/rejected": -2.0286381244659424, "logps/chosen": -148.56625366210938, "logps/rejected": -307.04638671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.2819671630859375, "rewards/margins": 14.286543846130371, "rewards/rejected": -21.568511962890625, "step": 2809 }, { "epoch": 4.84, "learning_rate": 2.5701232469188267e-07, "logits/chosen": -1.5788817405700684, "logits/rejected": -1.9884510040283203, "logps/chosen": -121.51468658447266, "logps/rejected": -252.30076599121094, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -5.665923595428467, "rewards/margins": 11.305642127990723, "rewards/rejected": -16.97156524658203, "step": 2810 }, { "epoch": 4.84, "learning_rate": 2.5690607734806627e-07, "logits/chosen": -2.09615159034729, "logits/rejected": -1.848232626914978, "logps/chosen": -121.98050689697266, "logps/rejected": -248.78150939941406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.209692001342773, "rewards/margins": 10.962690353393555, "rewards/rejected": -16.172380447387695, "step": 2811 }, { "epoch": 4.84, "learning_rate": 2.5679983000424987e-07, "logits/chosen": -1.9064576625823975, "logits/rejected": -2.092519760131836, "logps/chosen": -140.951416015625, "logps/rejected": -249.6666259765625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -5.3560919761657715, "rewards/margins": 11.334794044494629, "rewards/rejected": -16.690885543823242, "step": 2812 }, { "epoch": 4.84, "learning_rate": 2.566935826604335e-07, "logits/chosen": -1.8772917985916138, "logits/rejected": -1.7923941612243652, "logps/chosen": -127.90596008300781, "logps/rejected": -214.491943359375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -5.183886528015137, "rewards/margins": 9.488953590393066, "rewards/rejected": -14.672840118408203, "step": 2813 }, { "epoch": 4.84, "learning_rate": 2.5658733531661707e-07, "logits/chosen": -1.9774808883666992, "logits/rejected": -1.815306305885315, "logps/chosen": -121.57191467285156, "logps/rejected": -222.60887145996094, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.336181163787842, "rewards/margins": 10.788161277770996, "rewards/rejected": -16.12434196472168, "step": 2814 }, { "epoch": 4.85, "learning_rate": 2.5648108797280067e-07, "logits/chosen": -1.805654525756836, "logits/rejected": -1.9496701955795288, "logps/chosen": -167.4473419189453, "logps/rejected": -258.7530517578125, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -8.503101348876953, "rewards/margins": 9.971490859985352, "rewards/rejected": -18.474594116210938, "step": 2815 }, { "epoch": 4.85, "learning_rate": 2.5637484062898427e-07, "logits/chosen": -1.9447938203811646, "logits/rejected": -2.0102405548095703, "logps/chosen": -159.49880981445312, "logps/rejected": -280.6517333984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.635310173034668, "rewards/margins": 11.269854545593262, "rewards/rejected": -18.90516471862793, "step": 2816 }, { "epoch": 4.85, "learning_rate": 2.5626859328516787e-07, "logits/chosen": -2.193027973175049, "logits/rejected": -1.7232201099395752, "logps/chosen": -157.08941650390625, "logps/rejected": -263.2068176269531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.046455383300781, "rewards/margins": 13.098044395446777, "rewards/rejected": -19.144498825073242, "step": 2817 }, { "epoch": 4.85, "learning_rate": 2.5616234594135146e-07, "logits/chosen": -1.8520756959915161, "logits/rejected": -1.9872925281524658, "logps/chosen": -143.96665954589844, "logps/rejected": -322.0653076171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.713782787322998, "rewards/margins": 16.276920318603516, "rewards/rejected": -21.990703582763672, "step": 2818 }, { "epoch": 4.85, "learning_rate": 2.5605609859753506e-07, "logits/chosen": -1.8786077499389648, "logits/rejected": -2.038605213165283, "logps/chosen": -114.16807556152344, "logps/rejected": -244.21743774414062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.828518390655518, "rewards/margins": 12.985225677490234, "rewards/rejected": -17.813743591308594, "step": 2819 }, { "epoch": 4.85, "learning_rate": 2.5594985125371866e-07, "logits/chosen": -2.029719352722168, "logits/rejected": -2.205583333969116, "logps/chosen": -133.73898315429688, "logps/rejected": -292.0040283203125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.601160049438477, "rewards/margins": 16.023603439331055, "rewards/rejected": -20.62476348876953, "step": 2820 }, { "epoch": 4.86, "learning_rate": 2.558436039099022e-07, "logits/chosen": -1.8448054790496826, "logits/rejected": -1.9442062377929688, "logps/chosen": -144.7635498046875, "logps/rejected": -272.0259704589844, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -6.662275791168213, "rewards/margins": 11.838356971740723, "rewards/rejected": -18.500633239746094, "step": 2821 }, { "epoch": 4.86, "learning_rate": 2.5573735656608586e-07, "logits/chosen": -1.8765649795532227, "logits/rejected": -1.9605255126953125, "logps/chosen": -125.91017150878906, "logps/rejected": -264.8731994628906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.41389274597168, "rewards/margins": 13.163956642150879, "rewards/rejected": -17.577850341796875, "step": 2822 }, { "epoch": 4.86, "learning_rate": 2.5563110922226946e-07, "logits/chosen": -1.6697139739990234, "logits/rejected": -1.7633345127105713, "logps/chosen": -197.74288940429688, "logps/rejected": -299.1212158203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.352119445800781, "rewards/margins": 10.366825103759766, "rewards/rejected": -19.718944549560547, "step": 2823 }, { "epoch": 4.86, "learning_rate": 2.55524861878453e-07, "logits/chosen": -1.8550509214401245, "logits/rejected": -1.9783360958099365, "logps/chosen": -155.35855102539062, "logps/rejected": -315.109619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.000879287719727, "rewards/margins": 15.718525886535645, "rewards/rejected": -22.719406127929688, "step": 2824 }, { "epoch": 4.86, "learning_rate": 2.5541861453463666e-07, "logits/chosen": -1.8594435453414917, "logits/rejected": -1.953956127166748, "logps/chosen": -150.28140258789062, "logps/rejected": -301.88787841796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.87204647064209, "rewards/margins": 15.490825653076172, "rewards/rejected": -21.362873077392578, "step": 2825 }, { "epoch": 4.86, "learning_rate": 2.553123671908202e-07, "logits/chosen": -1.5050921440124512, "logits/rejected": -2.086892604827881, "logps/chosen": -147.4493408203125, "logps/rejected": -281.2018737792969, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.175688743591309, "rewards/margins": 11.693511009216309, "rewards/rejected": -18.869199752807617, "step": 2826 }, { "epoch": 4.87, "learning_rate": 2.552061198470038e-07, "logits/chosen": -1.8115949630737305, "logits/rejected": -1.917409896850586, "logps/chosen": -133.64695739746094, "logps/rejected": -251.2688446044922, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.2314839363098145, "rewards/margins": 11.155473709106445, "rewards/rejected": -16.386959075927734, "step": 2827 }, { "epoch": 4.87, "learning_rate": 2.5509987250318745e-07, "logits/chosen": -1.7185190916061401, "logits/rejected": -2.168365716934204, "logps/chosen": -137.8020782470703, "logps/rejected": -287.66461181640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.561840057373047, "rewards/margins": 12.864666938781738, "rewards/rejected": -18.4265079498291, "step": 2828 }, { "epoch": 4.87, "learning_rate": 2.54993625159371e-07, "logits/chosen": -2.0721986293792725, "logits/rejected": -2.0106122493743896, "logps/chosen": -147.02293395996094, "logps/rejected": -244.36727905273438, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.842290878295898, "rewards/margins": 10.023855209350586, "rewards/rejected": -15.866146087646484, "step": 2829 }, { "epoch": 4.87, "learning_rate": 2.548873778155546e-07, "logits/chosen": -1.7512598037719727, "logits/rejected": -2.145597457885742, "logps/chosen": -162.72909545898438, "logps/rejected": -313.0533142089844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.828214168548584, "rewards/margins": 14.119840621948242, "rewards/rejected": -20.948055267333984, "step": 2830 }, { "epoch": 4.87, "learning_rate": 2.547811304717382e-07, "logits/chosen": -1.5808145999908447, "logits/rejected": -2.3041465282440186, "logps/chosen": -147.68968200683594, "logps/rejected": -292.17303466796875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -7.9041748046875, "rewards/margins": 11.652437210083008, "rewards/rejected": -19.556612014770508, "step": 2831 }, { "epoch": 4.87, "learning_rate": 2.546748831279218e-07, "logits/chosen": -1.7483179569244385, "logits/rejected": -2.1460158824920654, "logps/chosen": -132.0443115234375, "logps/rejected": -290.75384521484375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.436643123626709, "rewards/margins": 14.127437591552734, "rewards/rejected": -19.5640811920166, "step": 2832 }, { "epoch": 4.88, "learning_rate": 2.5456863578410534e-07, "logits/chosen": -2.126657247543335, "logits/rejected": -1.8095654249191284, "logps/chosen": -124.31294250488281, "logps/rejected": -252.29258728027344, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.103187084197998, "rewards/margins": 13.209623336791992, "rewards/rejected": -18.31281089782715, "step": 2833 }, { "epoch": 4.88, "learning_rate": 2.54462388440289e-07, "logits/chosen": -1.9221575260162354, "logits/rejected": -1.6980183124542236, "logps/chosen": -167.6566162109375, "logps/rejected": -266.97198486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.170440673828125, "rewards/margins": 10.832245826721191, "rewards/rejected": -18.002687454223633, "step": 2834 }, { "epoch": 4.88, "learning_rate": 2.543561410964726e-07, "logits/chosen": -2.1624467372894287, "logits/rejected": -1.8564832210540771, "logps/chosen": -143.3609619140625, "logps/rejected": -261.7593078613281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.901399612426758, "rewards/margins": 12.003459930419922, "rewards/rejected": -18.904857635498047, "step": 2835 }, { "epoch": 4.88, "learning_rate": 2.5424989375265614e-07, "logits/chosen": -1.945865511894226, "logits/rejected": -1.847092866897583, "logps/chosen": -132.8150177001953, "logps/rejected": -263.3357238769531, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -5.003694534301758, "rewards/margins": 13.427011489868164, "rewards/rejected": -18.430706024169922, "step": 2836 }, { "epoch": 4.88, "learning_rate": 2.541436464088398e-07, "logits/chosen": -1.7463455200195312, "logits/rejected": -2.233250617980957, "logps/chosen": -124.4206314086914, "logps/rejected": -269.307861328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.271828651428223, "rewards/margins": 12.043354034423828, "rewards/rejected": -17.315183639526367, "step": 2837 }, { "epoch": 4.88, "learning_rate": 2.5403739906502334e-07, "logits/chosen": -1.5836801528930664, "logits/rejected": -1.7517406940460205, "logps/chosen": -145.22216796875, "logps/rejected": -281.10711669921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.435775279998779, "rewards/margins": 12.972688674926758, "rewards/rejected": -19.408464431762695, "step": 2838 }, { "epoch": 4.89, "learning_rate": 2.5393115172120694e-07, "logits/chosen": -2.04888653755188, "logits/rejected": -1.9037573337554932, "logps/chosen": -157.72996520996094, "logps/rejected": -240.525634765625, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -6.985180854797363, "rewards/margins": 8.666101455688477, "rewards/rejected": -15.651283264160156, "step": 2839 }, { "epoch": 4.89, "learning_rate": 2.538249043773906e-07, "logits/chosen": -1.7767523527145386, "logits/rejected": -1.4217004776000977, "logps/chosen": -176.88446044921875, "logps/rejected": -300.66571044921875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -6.763400554656982, "rewards/margins": 13.883316993713379, "rewards/rejected": -20.646718978881836, "step": 2840 }, { "epoch": 4.89, "learning_rate": 2.5371865703357413e-07, "logits/chosen": -2.0971107482910156, "logits/rejected": -1.9583877325057983, "logps/chosen": -175.2191925048828, "logps/rejected": -274.71337890625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.996434211730957, "rewards/margins": 10.284045219421387, "rewards/rejected": -17.280479431152344, "step": 2841 }, { "epoch": 4.89, "learning_rate": 2.5361240968975773e-07, "logits/chosen": -1.894753098487854, "logits/rejected": -1.8032889366149902, "logps/chosen": -187.38760375976562, "logps/rejected": -300.2742614746094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.177302360534668, "rewards/margins": 11.249807357788086, "rewards/rejected": -20.42711067199707, "step": 2842 }, { "epoch": 4.89, "learning_rate": 2.5350616234594133e-07, "logits/chosen": -2.030578851699829, "logits/rejected": -1.8128509521484375, "logps/chosen": -135.62991333007812, "logps/rejected": -270.2891845703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.91214656829834, "rewards/margins": 15.414068222045898, "rewards/rejected": -21.326217651367188, "step": 2843 }, { "epoch": 4.9, "learning_rate": 2.5339991500212493e-07, "logits/chosen": -1.3804360628128052, "logits/rejected": -2.142544746398926, "logps/chosen": -138.0723419189453, "logps/rejected": -298.11322021484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.555575847625732, "rewards/margins": 13.778053283691406, "rewards/rejected": -21.333629608154297, "step": 2844 }, { "epoch": 4.9, "learning_rate": 2.5329366765830853e-07, "logits/chosen": -1.9559873342514038, "logits/rejected": -1.9250209331512451, "logps/chosen": -133.49288940429688, "logps/rejected": -246.22573852539062, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.4811854362487793, "rewards/margins": 11.118353843688965, "rewards/rejected": -14.599539756774902, "step": 2845 }, { "epoch": 4.9, "learning_rate": 2.5318742031449213e-07, "logits/chosen": -2.099454879760742, "logits/rejected": -1.9331800937652588, "logps/chosen": -104.2186050415039, "logps/rejected": -246.18194580078125, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -3.563988208770752, "rewards/margins": 14.330757141113281, "rewards/rejected": -17.894744873046875, "step": 2846 }, { "epoch": 4.9, "learning_rate": 2.530811729706757e-07, "logits/chosen": -1.8638101816177368, "logits/rejected": -2.101116418838501, "logps/chosen": -139.03062438964844, "logps/rejected": -249.1029815673828, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.51155948638916, "rewards/margins": 10.972254753112793, "rewards/rejected": -15.483814239501953, "step": 2847 }, { "epoch": 4.9, "learning_rate": 2.5297492562685927e-07, "logits/chosen": -1.6035213470458984, "logits/rejected": -1.9735993146896362, "logps/chosen": -126.4669418334961, "logps/rejected": -297.8193054199219, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.777828216552734, "rewards/margins": 14.927813529968262, "rewards/rejected": -19.70564079284668, "step": 2848 }, { "epoch": 4.9, "learning_rate": 2.528686782830429e-07, "logits/chosen": -2.2347376346588135, "logits/rejected": -1.9631798267364502, "logps/chosen": -109.33126068115234, "logps/rejected": -267.32891845703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.05526065826416, "rewards/margins": 16.391597747802734, "rewards/rejected": -19.44685935974121, "step": 2849 }, { "epoch": 4.91, "learning_rate": 2.527624309392265e-07, "logits/chosen": -2.0371413230895996, "logits/rejected": -2.1867828369140625, "logps/chosen": -173.39089965820312, "logps/rejected": -270.70599365234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.699662208557129, "rewards/margins": 9.745479583740234, "rewards/rejected": -17.445140838623047, "step": 2850 }, { "epoch": 4.91, "learning_rate": 2.5265618359541007e-07, "logits/chosen": -2.0953798294067383, "logits/rejected": -1.8230276107788086, "logps/chosen": -173.15492248535156, "logps/rejected": -274.8208923339844, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -8.087231636047363, "rewards/margins": 11.133259773254395, "rewards/rejected": -19.22049331665039, "step": 2851 }, { "epoch": 4.91, "learning_rate": 2.525499362515937e-07, "logits/chosen": -1.849827766418457, "logits/rejected": -2.184176445007324, "logps/chosen": -140.73292541503906, "logps/rejected": -271.27813720703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.583203315734863, "rewards/margins": 12.001200675964355, "rewards/rejected": -17.58440399169922, "step": 2852 }, { "epoch": 4.91, "learning_rate": 2.5244368890777727e-07, "logits/chosen": -2.039609670639038, "logits/rejected": -2.046945095062256, "logps/chosen": -135.47772216796875, "logps/rejected": -256.5037536621094, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -4.705132007598877, "rewards/margins": 11.628819465637207, "rewards/rejected": -16.333951950073242, "step": 2853 }, { "epoch": 4.91, "learning_rate": 2.5233744156396087e-07, "logits/chosen": -2.2347867488861084, "logits/rejected": -1.8711824417114258, "logps/chosen": -130.90386962890625, "logps/rejected": -239.03094482421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.706541061401367, "rewards/margins": 12.290685653686523, "rewards/rejected": -17.99722671508789, "step": 2854 }, { "epoch": 4.91, "learning_rate": 2.522311942201445e-07, "logits/chosen": -1.9075896739959717, "logits/rejected": -2.142697334289551, "logps/chosen": -114.72116088867188, "logps/rejected": -279.3973083496094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.168768882751465, "rewards/margins": 15.342657089233398, "rewards/rejected": -19.511425018310547, "step": 2855 }, { "epoch": 4.92, "learning_rate": 2.5212494687632806e-07, "logits/chosen": -1.9395921230316162, "logits/rejected": -1.8171886205673218, "logps/chosen": -171.0816192626953, "logps/rejected": -277.6062316894531, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -6.813554763793945, "rewards/margins": 11.45283317565918, "rewards/rejected": -18.266387939453125, "step": 2856 }, { "epoch": 4.92, "learning_rate": 2.520186995325117e-07, "logits/chosen": -2.1656594276428223, "logits/rejected": -1.8618371486663818, "logps/chosen": -161.19720458984375, "logps/rejected": -264.65435791015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.521031379699707, "rewards/margins": 12.737873077392578, "rewards/rejected": -19.25890350341797, "step": 2857 }, { "epoch": 4.92, "learning_rate": 2.5191245218869526e-07, "logits/chosen": -1.801679015159607, "logits/rejected": -2.2910354137420654, "logps/chosen": -103.63737487792969, "logps/rejected": -248.98388671875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.21030330657959, "rewards/margins": 12.723695755004883, "rewards/rejected": -15.933998107910156, "step": 2858 }, { "epoch": 4.92, "learning_rate": 2.5180620484487886e-07, "logits/chosen": -2.25923752784729, "logits/rejected": -1.9882471561431885, "logps/chosen": -132.74603271484375, "logps/rejected": -232.2513427734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.221652984619141, "rewards/margins": 11.020479202270508, "rewards/rejected": -16.24213218688965, "step": 2859 }, { "epoch": 4.92, "learning_rate": 2.516999575010625e-07, "logits/chosen": -1.9841803312301636, "logits/rejected": -2.1202268600463867, "logps/chosen": -143.85963439941406, "logps/rejected": -291.2183837890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.914463996887207, "rewards/margins": 14.39075756072998, "rewards/rejected": -20.305221557617188, "step": 2860 }, { "epoch": 4.92, "learning_rate": 2.5159371015724606e-07, "logits/chosen": -2.006936550140381, "logits/rejected": -2.156811237335205, "logps/chosen": -144.05397033691406, "logps/rejected": -334.5611877441406, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -5.859553337097168, "rewards/margins": 17.796043395996094, "rewards/rejected": -23.655595779418945, "step": 2861 }, { "epoch": 4.93, "learning_rate": 2.5148746281342966e-07, "logits/chosen": -2.0216023921966553, "logits/rejected": -2.285615921020508, "logps/chosen": -137.33485412597656, "logps/rejected": -269.40911865234375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -5.320112228393555, "rewards/margins": 13.293412208557129, "rewards/rejected": -18.613525390625, "step": 2862 }, { "epoch": 4.93, "learning_rate": 2.5138121546961326e-07, "logits/chosen": -1.9126662015914917, "logits/rejected": -1.8955047130584717, "logps/chosen": -143.9814910888672, "logps/rejected": -263.2618103027344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.7707672119140625, "rewards/margins": 11.532279968261719, "rewards/rejected": -16.30304718017578, "step": 2863 }, { "epoch": 4.93, "learning_rate": 2.5127496812579685e-07, "logits/chosen": -2.1433792114257812, "logits/rejected": -2.115734815597534, "logps/chosen": -101.36125183105469, "logps/rejected": -286.134521484375, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -1.8560888767242432, "rewards/margins": 17.286115646362305, "rewards/rejected": -19.14220428466797, "step": 2864 }, { "epoch": 4.93, "learning_rate": 2.511687207819804e-07, "logits/chosen": -1.8938900232315063, "logits/rejected": -1.6280077695846558, "logps/chosen": -133.51705932617188, "logps/rejected": -284.79327392578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.132190704345703, "rewards/margins": 15.760403633117676, "rewards/rejected": -20.892595291137695, "step": 2865 }, { "epoch": 4.93, "learning_rate": 2.5106247343816405e-07, "logits/chosen": -1.8010011911392212, "logits/rejected": -2.0187158584594727, "logps/chosen": -166.65907287597656, "logps/rejected": -261.901123046875, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/chosen": -7.721988201141357, "rewards/margins": 9.238513946533203, "rewards/rejected": -16.96050262451172, "step": 2866 }, { "epoch": 4.93, "learning_rate": 2.5095622609434765e-07, "logits/chosen": -1.6976048946380615, "logits/rejected": -1.978849172592163, "logps/chosen": -143.0804443359375, "logps/rejected": -284.1902160644531, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.274082183837891, "rewards/margins": 12.735793113708496, "rewards/rejected": -19.009876251220703, "step": 2867 }, { "epoch": 4.94, "learning_rate": 2.508499787505312e-07, "logits/chosen": -1.563955307006836, "logits/rejected": -2.1057374477386475, "logps/chosen": -141.6823272705078, "logps/rejected": -302.06243896484375, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -6.301192760467529, "rewards/margins": 14.407707214355469, "rewards/rejected": -20.708900451660156, "step": 2868 }, { "epoch": 4.94, "learning_rate": 2.5074373140671485e-07, "logits/chosen": -2.0944175720214844, "logits/rejected": -2.1133785247802734, "logps/chosen": -142.13262939453125, "logps/rejected": -251.58749389648438, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -5.754604339599609, "rewards/margins": 10.550487518310547, "rewards/rejected": -16.305091857910156, "step": 2869 }, { "epoch": 4.94, "learning_rate": 2.506374840628984e-07, "logits/chosen": -1.5801231861114502, "logits/rejected": -2.313570261001587, "logps/chosen": -120.44389343261719, "logps/rejected": -257.81976318359375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -5.365596771240234, "rewards/margins": 10.858301162719727, "rewards/rejected": -16.22389793395996, "step": 2870 }, { "epoch": 4.94, "learning_rate": 2.50531236719082e-07, "logits/chosen": -1.8447914123535156, "logits/rejected": -1.8839423656463623, "logps/chosen": -124.49077606201172, "logps/rejected": -247.10113525390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.363531589508057, "rewards/margins": 12.554415702819824, "rewards/rejected": -16.91794776916504, "step": 2871 }, { "epoch": 4.94, "learning_rate": 2.5042498937526565e-07, "logits/chosen": -2.01113224029541, "logits/rejected": -1.5830410718917847, "logps/chosen": -146.38796997070312, "logps/rejected": -277.19384765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.3280534744262695, "rewards/margins": 14.91161060333252, "rewards/rejected": -20.239662170410156, "step": 2872 }, { "epoch": 4.94, "learning_rate": 2.503187420314492e-07, "logits/chosen": -1.8089509010314941, "logits/rejected": -2.0521254539489746, "logps/chosen": -112.8727798461914, "logps/rejected": -274.16064453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.675690174102783, "rewards/margins": 15.248680114746094, "rewards/rejected": -18.92436981201172, "step": 2873 }, { "epoch": 4.95, "learning_rate": 2.502124946876328e-07, "logits/chosen": -2.017308235168457, "logits/rejected": -1.5223313570022583, "logps/chosen": -134.951171875, "logps/rejected": -219.18836975097656, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -4.686033248901367, "rewards/margins": 10.04974365234375, "rewards/rejected": -14.7357759475708, "step": 2874 }, { "epoch": 4.95, "learning_rate": 2.501062473438164e-07, "logits/chosen": -1.8332645893096924, "logits/rejected": -1.7884143590927124, "logps/chosen": -199.7036895751953, "logps/rejected": -321.4771728515625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.006587028503418, "rewards/margins": 14.241891860961914, "rewards/rejected": -23.248477935791016, "step": 2875 }, { "epoch": 4.95, "learning_rate": 2.5e-07, "logits/chosen": -2.123032569885254, "logits/rejected": -1.952341079711914, "logps/chosen": -128.0360870361328, "logps/rejected": -255.01063537597656, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -4.2148756980896, "rewards/margins": 13.553290367126465, "rewards/rejected": -17.768165588378906, "step": 2876 }, { "epoch": 4.95, "learning_rate": 2.498937526561836e-07, "logits/chosen": -1.6980257034301758, "logits/rejected": -2.1602683067321777, "logps/chosen": -129.4802703857422, "logps/rejected": -313.16259765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.6264753341674805, "rewards/margins": 15.28305435180664, "rewards/rejected": -20.909528732299805, "step": 2877 }, { "epoch": 4.95, "learning_rate": 2.497875053123672e-07, "logits/chosen": -1.7815790176391602, "logits/rejected": -1.9858782291412354, "logps/chosen": -116.72372436523438, "logps/rejected": -281.3907775878906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.61991024017334, "rewards/margins": 16.192413330078125, "rewards/rejected": -19.81232452392578, "step": 2878 }, { "epoch": 4.96, "learning_rate": 2.496812579685508e-07, "logits/chosen": -2.15509033203125, "logits/rejected": -1.9724581241607666, "logps/chosen": -153.5326385498047, "logps/rejected": -287.9627380371094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.7051849365234375, "rewards/margins": 13.599645614624023, "rewards/rejected": -19.304832458496094, "step": 2879 }, { "epoch": 4.96, "learning_rate": 2.495750106247344e-07, "logits/chosen": -2.036726474761963, "logits/rejected": -1.881269931793213, "logps/chosen": -117.8032455444336, "logps/rejected": -260.4586486816406, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": -3.2651114463806152, "rewards/margins": 13.330192565917969, "rewards/rejected": -16.59530258178711, "step": 2880 }, { "epoch": 4.96, "learning_rate": 2.4946876328091793e-07, "logits/chosen": -1.688959002494812, "logits/rejected": -2.038403034210205, "logps/chosen": -129.79458618164062, "logps/rejected": -292.71002197265625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -6.45730447769165, "rewards/margins": 13.203669548034668, "rewards/rejected": -19.660974502563477, "step": 2881 }, { "epoch": 4.96, "learning_rate": 2.493625159371016e-07, "logits/chosen": -2.023557186126709, "logits/rejected": -2.253836154937744, "logps/chosen": -139.572509765625, "logps/rejected": -295.4026184082031, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -4.980332374572754, "rewards/margins": 15.068572998046875, "rewards/rejected": -20.048904418945312, "step": 2882 }, { "epoch": 4.96, "learning_rate": 2.492562685932852e-07, "logits/chosen": -2.021571159362793, "logits/rejected": -1.699755072593689, "logps/chosen": -140.88572692871094, "logps/rejected": -252.19520568847656, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.101515769958496, "rewards/margins": 12.16303825378418, "rewards/rejected": -17.264554977416992, "step": 2883 }, { "epoch": 4.96, "learning_rate": 2.4915002124946873e-07, "logits/chosen": -1.6248888969421387, "logits/rejected": -2.188142776489258, "logps/chosen": -151.70144653320312, "logps/rejected": -311.08233642578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.539835453033447, "rewards/margins": 13.149903297424316, "rewards/rejected": -19.689739227294922, "step": 2884 }, { "epoch": 4.97, "learning_rate": 2.490437739056523e-07, "logits/chosen": -2.041942596435547, "logits/rejected": -1.9688786268234253, "logps/chosen": -166.15164184570312, "logps/rejected": -265.86676025390625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.287126064300537, "rewards/margins": 11.775443077087402, "rewards/rejected": -18.06256866455078, "step": 2885 }, { "epoch": 4.97, "learning_rate": 2.489375265618359e-07, "logits/chosen": -2.042677402496338, "logits/rejected": -2.15962815284729, "logps/chosen": -138.12948608398438, "logps/rejected": -288.7339782714844, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.4094319343566895, "rewards/margins": 13.618598937988281, "rewards/rejected": -19.028030395507812, "step": 2886 }, { "epoch": 4.97, "learning_rate": 2.488312792180196e-07, "logits/chosen": -1.7073038816452026, "logits/rejected": -1.979407548904419, "logps/chosen": -160.12698364257812, "logps/rejected": -302.98822021484375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.114090442657471, "rewards/margins": 12.659403800964355, "rewards/rejected": -18.773494720458984, "step": 2887 }, { "epoch": 4.97, "learning_rate": 2.487250318742031e-07, "logits/chosen": -2.0369620323181152, "logits/rejected": -2.083449602127075, "logps/chosen": -149.08981323242188, "logps/rejected": -266.173583984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.531621932983398, "rewards/margins": 11.97645378112793, "rewards/rejected": -18.508075714111328, "step": 2888 }, { "epoch": 4.97, "learning_rate": 2.486187845303867e-07, "logits/chosen": -1.7719178199768066, "logits/rejected": -2.0972464084625244, "logps/chosen": -137.68380737304688, "logps/rejected": -289.8346252441406, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.379350662231445, "rewards/margins": 13.647841453552246, "rewards/rejected": -20.027193069458008, "step": 2889 }, { "epoch": 4.97, "learning_rate": 2.485125371865703e-07, "logits/chosen": -1.8538525104522705, "logits/rejected": -1.7643741369247437, "logps/chosen": -166.11878967285156, "logps/rejected": -257.3101806640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.15422248840332, "rewards/margins": 10.376235961914062, "rewards/rejected": -18.530460357666016, "step": 2890 }, { "epoch": 4.98, "learning_rate": 2.484062898427539e-07, "logits/chosen": -1.7850761413574219, "logits/rejected": -2.043442964553833, "logps/chosen": -146.81224060058594, "logps/rejected": -256.9666748046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.188957214355469, "rewards/margins": 10.223715782165527, "rewards/rejected": -17.412673950195312, "step": 2891 }, { "epoch": 4.98, "learning_rate": 2.483000424989375e-07, "logits/chosen": -1.9749119281768799, "logits/rejected": -1.8699870109558105, "logps/chosen": -157.18212890625, "logps/rejected": -264.1999206542969, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.617771148681641, "rewards/margins": 10.786273002624512, "rewards/rejected": -18.404043197631836, "step": 2892 }, { "epoch": 4.98, "learning_rate": 2.481937951551211e-07, "logits/chosen": -2.0559444427490234, "logits/rejected": -1.989444375038147, "logps/chosen": -162.427734375, "logps/rejected": -233.39382934570312, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -7.549221038818359, "rewards/margins": 8.888587951660156, "rewards/rejected": -16.437807083129883, "step": 2893 }, { "epoch": 4.98, "learning_rate": 2.480875478113047e-07, "logits/chosen": -1.9965541362762451, "logits/rejected": -1.9620405435562134, "logps/chosen": -157.989501953125, "logps/rejected": -253.66842651367188, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -7.232119560241699, "rewards/margins": 9.892172813415527, "rewards/rejected": -17.124290466308594, "step": 2894 }, { "epoch": 4.98, "learning_rate": 2.479813004674883e-07, "logits/chosen": -2.0872244834899902, "logits/rejected": -1.831887125968933, "logps/chosen": -136.2733917236328, "logps/rejected": -231.02171325683594, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -5.992909908294678, "rewards/margins": 9.336532592773438, "rewards/rejected": -15.329442024230957, "step": 2895 }, { "epoch": 4.98, "learning_rate": 2.478750531236719e-07, "logits/chosen": -1.9992954730987549, "logits/rejected": -2.0403687953948975, "logps/chosen": -152.463623046875, "logps/rejected": -268.5257873535156, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.380317211151123, "rewards/margins": 11.543310165405273, "rewards/rejected": -18.923627853393555, "step": 2896 }, { "epoch": 4.99, "learning_rate": 2.4776880577985546e-07, "logits/chosen": -1.9939172267913818, "logits/rejected": -2.0724868774414062, "logps/chosen": -155.36224365234375, "logps/rejected": -285.8258056640625, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -7.113039016723633, "rewards/margins": 12.74354362487793, "rewards/rejected": -19.856582641601562, "step": 2897 }, { "epoch": 4.99, "learning_rate": 2.476625584360391e-07, "logits/chosen": -1.841396450996399, "logits/rejected": -1.9181050062179565, "logps/chosen": -186.87850952148438, "logps/rejected": -296.420166015625, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -10.963088989257812, "rewards/margins": 9.831924438476562, "rewards/rejected": -20.795013427734375, "step": 2898 }, { "epoch": 4.99, "learning_rate": 2.475563110922227e-07, "logits/chosen": -1.661186933517456, "logits/rejected": -2.154733657836914, "logps/chosen": -115.75666809082031, "logps/rejected": -269.7496643066406, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": -4.679167747497559, "rewards/margins": 14.309757232666016, "rewards/rejected": -18.98892593383789, "step": 2899 }, { "epoch": 4.99, "learning_rate": 2.4745006374840626e-07, "logits/chosen": -2.0676159858703613, "logits/rejected": -1.923432469367981, "logps/chosen": -144.3881378173828, "logps/rejected": -254.65603637695312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.964487075805664, "rewards/margins": 11.01809024810791, "rewards/rejected": -16.982576370239258, "step": 2900 }, { "epoch": 4.99, "learning_rate": 2.4734381640458986e-07, "logits/chosen": -1.8140027523040771, "logits/rejected": -1.7587250471115112, "logps/chosen": -140.4825439453125, "logps/rejected": -268.58172607421875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -5.804687023162842, "rewards/margins": 13.326096534729004, "rewards/rejected": -19.13078498840332, "step": 2901 }, { "epoch": 4.99, "learning_rate": 2.4723756906077345e-07, "logits/chosen": -1.9864075183868408, "logits/rejected": -2.051506519317627, "logps/chosen": -114.66675567626953, "logps/rejected": -232.97100830078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.537203073501587, "rewards/margins": 13.039335250854492, "rewards/rejected": -16.576539993286133, "step": 2902 }, { "epoch": 5.0, "learning_rate": 2.4713132171695705e-07, "logits/chosen": -1.9930918216705322, "logits/rejected": -2.112719774246216, "logps/chosen": -155.46316528320312, "logps/rejected": -288.5775146484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.932356834411621, "rewards/margins": 14.019353866577148, "rewards/rejected": -18.951709747314453, "step": 2903 }, { "epoch": 5.0, "learning_rate": 2.4702507437314065e-07, "logits/chosen": -2.086977958679199, "logits/rejected": -2.157219409942627, "logps/chosen": -152.82521057128906, "logps/rejected": -275.2181701660156, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -8.043237686157227, "rewards/margins": 11.287923812866211, "rewards/rejected": -19.331161499023438, "step": 2904 }, { "epoch": 5.0, "learning_rate": 2.4691882702932425e-07, "logits/chosen": -1.6701412200927734, "logits/rejected": -2.006619930267334, "logps/chosen": -137.79087829589844, "logps/rejected": -282.14410400390625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.854092121124268, "rewards/margins": 12.78837776184082, "rewards/rejected": -18.64246940612793, "step": 2905 }, { "epoch": 5.0, "learning_rate": 2.4681257968550785e-07, "logits/chosen": -1.8434638977050781, "logits/rejected": -2.139659881591797, "logps/chosen": -138.61038208007812, "logps/rejected": -287.88702392578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.358231544494629, "rewards/margins": 13.753793716430664, "rewards/rejected": -19.11202621459961, "step": 2906 }, { "epoch": 5.0, "learning_rate": 2.4670633234169145e-07, "logits/chosen": -2.0265238285064697, "logits/rejected": -2.2044739723205566, "logps/chosen": -129.8271484375, "logps/rejected": -277.4444580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.641862869262695, "rewards/margins": 12.841761589050293, "rewards/rejected": -18.483625411987305, "step": 2907 }, { "epoch": 5.01, "learning_rate": 2.4660008499787505e-07, "logits/chosen": -1.8551685810089111, "logits/rejected": -1.7865972518920898, "logps/chosen": -160.49066162109375, "logps/rejected": -287.3567199707031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.6237969398498535, "rewards/margins": 11.449434280395508, "rewards/rejected": -18.073230743408203, "step": 2908 }, { "epoch": 5.01, "learning_rate": 2.4649383765405865e-07, "logits/chosen": -1.8574209213256836, "logits/rejected": -1.976161003112793, "logps/chosen": -132.21624755859375, "logps/rejected": -298.34326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.68284797668457, "rewards/margins": 15.481616973876953, "rewards/rejected": -21.164466857910156, "step": 2909 }, { "epoch": 5.01, "learning_rate": 2.4638759031024225e-07, "logits/chosen": -1.8874775171279907, "logits/rejected": -2.1385436058044434, "logps/chosen": -136.36965942382812, "logps/rejected": -280.6715393066406, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.453640460968018, "rewards/margins": 12.24703598022461, "rewards/rejected": -18.70067596435547, "step": 2910 }, { "epoch": 5.01, "learning_rate": 2.4628134296642584e-07, "logits/chosen": -1.8259540796279907, "logits/rejected": -2.008993148803711, "logps/chosen": -114.2210693359375, "logps/rejected": -254.3001708984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.095064163208008, "rewards/margins": 12.773998260498047, "rewards/rejected": -17.869062423706055, "step": 2911 }, { "epoch": 5.01, "learning_rate": 2.4617509562260944e-07, "logits/chosen": -2.122241497039795, "logits/rejected": -1.984809160232544, "logps/chosen": -153.4862518310547, "logps/rejected": -289.1067199707031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.178449630737305, "rewards/margins": 12.30514144897461, "rewards/rejected": -18.483592987060547, "step": 2912 }, { "epoch": 5.01, "learning_rate": 2.46068848278793e-07, "logits/chosen": -1.9954397678375244, "logits/rejected": -1.9737532138824463, "logps/chosen": -155.70924377441406, "logps/rejected": -259.6205749511719, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.223086357116699, "rewards/margins": 10.077486991882324, "rewards/rejected": -17.300573348999023, "step": 2913 }, { "epoch": 5.02, "learning_rate": 2.4596260093497664e-07, "logits/chosen": -2.054614543914795, "logits/rejected": -2.2087438106536865, "logps/chosen": -156.21641540527344, "logps/rejected": -319.7223815917969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.321749687194824, "rewards/margins": 16.426851272583008, "rewards/rejected": -21.748600006103516, "step": 2914 }, { "epoch": 5.02, "learning_rate": 2.4585635359116024e-07, "logits/chosen": -1.7526848316192627, "logits/rejected": -2.1632068157196045, "logps/chosen": -112.6348648071289, "logps/rejected": -236.49969482421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.692798137664795, "rewards/margins": 11.714828491210938, "rewards/rejected": -16.40762710571289, "step": 2915 }, { "epoch": 5.02, "learning_rate": 2.457501062473438e-07, "logits/chosen": -2.088625431060791, "logits/rejected": -2.1202540397644043, "logps/chosen": -143.10150146484375, "logps/rejected": -253.89907836914062, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.93690299987793, "rewards/margins": 11.893169403076172, "rewards/rejected": -17.830074310302734, "step": 2916 }, { "epoch": 5.02, "learning_rate": 2.456438589035274e-07, "logits/chosen": -1.6746572256088257, "logits/rejected": -2.1820802688598633, "logps/chosen": -129.7283477783203, "logps/rejected": -287.8896789550781, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.297687530517578, "rewards/margins": 14.351295471191406, "rewards/rejected": -18.648983001708984, "step": 2917 }, { "epoch": 5.02, "learning_rate": 2.45537611559711e-07, "logits/chosen": -1.8366897106170654, "logits/rejected": -2.027893543243408, "logps/chosen": -137.81271362304688, "logps/rejected": -291.69183349609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.660367965698242, "rewards/margins": 13.804374694824219, "rewards/rejected": -21.46474266052246, "step": 2918 }, { "epoch": 5.02, "learning_rate": 2.454313642158946e-07, "logits/chosen": -1.9501643180847168, "logits/rejected": -2.150404691696167, "logps/chosen": -121.07571411132812, "logps/rejected": -261.73138427734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.403875350952148, "rewards/margins": 13.938039779663086, "rewards/rejected": -18.341915130615234, "step": 2919 }, { "epoch": 5.03, "learning_rate": 2.453251168720782e-07, "logits/chosen": -1.5854876041412354, "logits/rejected": -1.986097812652588, "logps/chosen": -116.07056427001953, "logps/rejected": -281.55242919921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.2586989402771, "rewards/margins": 15.144853591918945, "rewards/rejected": -20.403553009033203, "step": 2920 }, { "epoch": 5.03, "learning_rate": 2.452188695282618e-07, "logits/chosen": -2.05987548828125, "logits/rejected": -1.4421730041503906, "logps/chosen": -114.68391418457031, "logps/rejected": -251.6352081298828, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.684236526489258, "rewards/margins": 14.7291259765625, "rewards/rejected": -17.413362503051758, "step": 2921 }, { "epoch": 5.03, "learning_rate": 2.451126221844454e-07, "logits/chosen": -2.0392534732818604, "logits/rejected": -2.3131775856018066, "logps/chosen": -173.7549285888672, "logps/rejected": -312.7613830566406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.558865547180176, "rewards/margins": 13.865767478942871, "rewards/rejected": -20.424633026123047, "step": 2922 }, { "epoch": 5.03, "learning_rate": 2.45006374840629e-07, "logits/chosen": -2.2422678470611572, "logits/rejected": -2.103449821472168, "logps/chosen": -135.8878173828125, "logps/rejected": -276.2587585449219, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -5.195713996887207, "rewards/margins": 12.222480773925781, "rewards/rejected": -17.418195724487305, "step": 2923 }, { "epoch": 5.03, "learning_rate": 2.449001274968126e-07, "logits/chosen": -2.0835070610046387, "logits/rejected": -2.0369772911071777, "logps/chosen": -163.7894287109375, "logps/rejected": -269.91070556640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.857205390930176, "rewards/margins": 11.583051681518555, "rewards/rejected": -19.440258026123047, "step": 2924 }, { "epoch": 5.03, "learning_rate": 2.447938801529962e-07, "logits/chosen": -1.7294315099716187, "logits/rejected": -1.9383176565170288, "logps/chosen": -116.56452178955078, "logps/rejected": -285.2322692871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.029542922973633, "rewards/margins": 16.395992279052734, "rewards/rejected": -20.425533294677734, "step": 2925 }, { "epoch": 5.04, "learning_rate": 2.446876328091798e-07, "logits/chosen": -1.894425868988037, "logits/rejected": -1.8735392093658447, "logps/chosen": -148.6852569580078, "logps/rejected": -308.62005615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.959092140197754, "rewards/margins": 15.4818115234375, "rewards/rejected": -21.44090461730957, "step": 2926 }, { "epoch": 5.04, "learning_rate": 2.445813854653634e-07, "logits/chosen": -1.9387245178222656, "logits/rejected": -1.903930902481079, "logps/chosen": -114.24655151367188, "logps/rejected": -219.35647583007812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.097341537475586, "rewards/margins": 10.736416816711426, "rewards/rejected": -14.833758354187012, "step": 2927 }, { "epoch": 5.04, "learning_rate": 2.444751381215469e-07, "logits/chosen": -1.9178162813186646, "logits/rejected": -2.110161542892456, "logps/chosen": -118.58736419677734, "logps/rejected": -249.59112548828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.491161346435547, "rewards/margins": 11.122588157653809, "rewards/rejected": -15.613748550415039, "step": 2928 }, { "epoch": 5.04, "learning_rate": 2.443688907777305e-07, "logits/chosen": -1.8879468441009521, "logits/rejected": -1.9926022291183472, "logps/chosen": -129.12408447265625, "logps/rejected": -261.2416687011719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.153092384338379, "rewards/margins": 13.42205810546875, "rewards/rejected": -17.575149536132812, "step": 2929 }, { "epoch": 5.04, "learning_rate": 2.4426264343391417e-07, "logits/chosen": -1.652083158493042, "logits/rejected": -2.019564151763916, "logps/chosen": -137.6114044189453, "logps/rejected": -298.0748291015625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.608301162719727, "rewards/margins": 13.08155632019043, "rewards/rejected": -19.689857482910156, "step": 2930 }, { "epoch": 5.04, "learning_rate": 2.4415639609009777e-07, "logits/chosen": -2.1254754066467285, "logits/rejected": -1.8286235332489014, "logps/chosen": -146.01084899902344, "logps/rejected": -231.9991912841797, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.097970008850098, "rewards/margins": 10.256571769714355, "rewards/rejected": -16.354543685913086, "step": 2931 }, { "epoch": 5.05, "learning_rate": 2.440501487462813e-07, "logits/chosen": -2.002742052078247, "logits/rejected": -1.8874711990356445, "logps/chosen": -133.65118408203125, "logps/rejected": -283.31536865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.452912330627441, "rewards/margins": 15.967968940734863, "rewards/rejected": -20.420881271362305, "step": 2932 }, { "epoch": 5.05, "learning_rate": 2.439439014024649e-07, "logits/chosen": -1.8217376470565796, "logits/rejected": -1.4602077007293701, "logps/chosen": -120.11833190917969, "logps/rejected": -201.91891479492188, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.778212070465088, "rewards/margins": 9.397860527038574, "rewards/rejected": -14.176072120666504, "step": 2933 }, { "epoch": 5.05, "learning_rate": 2.438376540586485e-07, "logits/chosen": -2.006042957305908, "logits/rejected": -2.116992235183716, "logps/chosen": -156.11634826660156, "logps/rejected": -303.43341064453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.35840368270874, "rewards/margins": 14.696603775024414, "rewards/rejected": -20.05500602722168, "step": 2934 }, { "epoch": 5.05, "learning_rate": 2.437314067148321e-07, "logits/chosen": -1.7097142934799194, "logits/rejected": -2.1152517795562744, "logps/chosen": -124.90887451171875, "logps/rejected": -255.18626403808594, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.9908246994018555, "rewards/margins": 12.565929412841797, "rewards/rejected": -16.55675506591797, "step": 2935 }, { "epoch": 5.05, "learning_rate": 2.436251593710157e-07, "logits/chosen": -1.6994789838790894, "logits/rejected": -1.9317963123321533, "logps/chosen": -121.260986328125, "logps/rejected": -252.4855499267578, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.453664779663086, "rewards/margins": 12.496857643127441, "rewards/rejected": -17.950523376464844, "step": 2936 }, { "epoch": 5.06, "learning_rate": 2.435189120271993e-07, "logits/chosen": -1.8015295267105103, "logits/rejected": -1.7017414569854736, "logps/chosen": -145.88690185546875, "logps/rejected": -287.05706787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.980611801147461, "rewards/margins": 13.889065742492676, "rewards/rejected": -19.869678497314453, "step": 2937 }, { "epoch": 5.06, "learning_rate": 2.434126646833829e-07, "logits/chosen": -1.9771500825881958, "logits/rejected": -1.8119550943374634, "logps/chosen": -171.4832763671875, "logps/rejected": -289.96673583984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.637650012969971, "rewards/margins": 12.385353088378906, "rewards/rejected": -20.02300262451172, "step": 2938 }, { "epoch": 5.06, "learning_rate": 2.433064173395665e-07, "logits/chosen": -1.855946660041809, "logits/rejected": -1.878061294555664, "logps/chosen": -161.93850708007812, "logps/rejected": -280.44305419921875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.669880390167236, "rewards/margins": 12.103910446166992, "rewards/rejected": -18.77379035949707, "step": 2939 }, { "epoch": 5.06, "learning_rate": 2.432001699957501e-07, "logits/chosen": -2.2887773513793945, "logits/rejected": -1.7976006269454956, "logps/chosen": -114.71408081054688, "logps/rejected": -239.02685546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.1217968463897705, "rewards/margins": 14.075007438659668, "rewards/rejected": -17.19680404663086, "step": 2940 }, { "epoch": 5.06, "learning_rate": 2.430939226519337e-07, "logits/chosen": -1.999446153640747, "logits/rejected": -1.8462153673171997, "logps/chosen": -176.3123016357422, "logps/rejected": -287.50640869140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.472011566162109, "rewards/margins": 13.178248405456543, "rewards/rejected": -19.650259017944336, "step": 2941 }, { "epoch": 5.06, "learning_rate": 2.429876753081173e-07, "logits/chosen": -1.6245625019073486, "logits/rejected": -2.1720871925354004, "logps/chosen": -142.9151611328125, "logps/rejected": -276.62286376953125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.752242088317871, "rewards/margins": 11.949041366577148, "rewards/rejected": -18.701282501220703, "step": 2942 }, { "epoch": 5.07, "learning_rate": 2.428814279643009e-07, "logits/chosen": -1.9607138633728027, "logits/rejected": -1.932887315750122, "logps/chosen": -116.0040283203125, "logps/rejected": -230.4880828857422, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.535323619842529, "rewards/margins": 10.680227279663086, "rewards/rejected": -16.21554946899414, "step": 2943 }, { "epoch": 5.07, "learning_rate": 2.4277518062048445e-07, "logits/chosen": -1.9764751195907593, "logits/rejected": -1.9876368045806885, "logps/chosen": -153.2281494140625, "logps/rejected": -287.060791015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.653498649597168, "rewards/margins": 13.9267578125, "rewards/rejected": -20.58025550842285, "step": 2944 }, { "epoch": 5.07, "learning_rate": 2.4266893327666805e-07, "logits/chosen": -2.0656354427337646, "logits/rejected": -1.9407657384872437, "logps/chosen": -153.32528686523438, "logps/rejected": -291.2879638671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.030450820922852, "rewards/margins": 13.950454711914062, "rewards/rejected": -19.980905532836914, "step": 2945 }, { "epoch": 5.07, "learning_rate": 2.425626859328517e-07, "logits/chosen": -2.1032087802886963, "logits/rejected": -2.0598697662353516, "logps/chosen": -128.3248291015625, "logps/rejected": -257.394775390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.448786735534668, "rewards/margins": 12.805413246154785, "rewards/rejected": -18.254199981689453, "step": 2946 }, { "epoch": 5.07, "learning_rate": 2.4245643858903525e-07, "logits/chosen": -1.4821362495422363, "logits/rejected": -2.075688362121582, "logps/chosen": -180.8423309326172, "logps/rejected": -317.8009948730469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.79613208770752, "rewards/margins": 11.301459312438965, "rewards/rejected": -21.097591400146484, "step": 2947 }, { "epoch": 5.07, "learning_rate": 2.4235019124521884e-07, "logits/chosen": -1.7018816471099854, "logits/rejected": -1.9739491939544678, "logps/chosen": -141.2209014892578, "logps/rejected": -295.072509765625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -6.0038533210754395, "rewards/margins": 13.482760429382324, "rewards/rejected": -19.486614227294922, "step": 2948 }, { "epoch": 5.08, "learning_rate": 2.4224394390140244e-07, "logits/chosen": -2.059126377105713, "logits/rejected": -2.252196788787842, "logps/chosen": -117.65544128417969, "logps/rejected": -282.196044921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3910293579101562, "rewards/margins": 15.332508087158203, "rewards/rejected": -18.72353744506836, "step": 2949 }, { "epoch": 5.08, "learning_rate": 2.4213769655758604e-07, "logits/chosen": -1.7641501426696777, "logits/rejected": -1.9402430057525635, "logps/chosen": -169.12030029296875, "logps/rejected": -304.12835693359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.748703002929688, "rewards/margins": 13.000726699829102, "rewards/rejected": -21.749427795410156, "step": 2950 }, { "epoch": 5.08, "learning_rate": 2.4203144921376964e-07, "logits/chosen": -1.9769953489303589, "logits/rejected": -2.0812342166900635, "logps/chosen": -140.63674926757812, "logps/rejected": -304.78594970703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.306299686431885, "rewards/margins": 16.3048095703125, "rewards/rejected": -22.611108779907227, "step": 2951 }, { "epoch": 5.08, "learning_rate": 2.4192520186995324e-07, "logits/chosen": -1.888671636581421, "logits/rejected": -2.0072524547576904, "logps/chosen": -142.56004333496094, "logps/rejected": -332.1352844238281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.113227367401123, "rewards/margins": 18.281654357910156, "rewards/rejected": -25.394882202148438, "step": 2952 }, { "epoch": 5.08, "learning_rate": 2.4181895452613684e-07, "logits/chosen": -1.6196335554122925, "logits/rejected": -2.0384833812713623, "logps/chosen": -114.85623168945312, "logps/rejected": -249.6420440673828, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.863973617553711, "rewards/margins": 11.662911415100098, "rewards/rejected": -16.526884078979492, "step": 2953 }, { "epoch": 5.08, "learning_rate": 2.4171270718232044e-07, "logits/chosen": -2.2059810161590576, "logits/rejected": -1.7754417657852173, "logps/chosen": -147.4945068359375, "logps/rejected": -261.4339904785156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.244601249694824, "rewards/margins": 12.23588752746582, "rewards/rejected": -17.48048973083496, "step": 2954 }, { "epoch": 5.09, "learning_rate": 2.4160645983850404e-07, "logits/chosen": -1.6820499897003174, "logits/rejected": -1.892586350440979, "logps/chosen": -112.7157974243164, "logps/rejected": -294.3074951171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.6813645362854, "rewards/margins": 16.785003662109375, "rewards/rejected": -21.46636962890625, "step": 2955 }, { "epoch": 5.09, "learning_rate": 2.4150021249468764e-07, "logits/chosen": -1.809744119644165, "logits/rejected": -1.8900752067565918, "logps/chosen": -115.35643005371094, "logps/rejected": -259.8822021484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.290682792663574, "rewards/margins": 13.371963500976562, "rewards/rejected": -17.662647247314453, "step": 2956 }, { "epoch": 5.09, "learning_rate": 2.4139396515087124e-07, "logits/chosen": -1.999780535697937, "logits/rejected": -1.498867392539978, "logps/chosen": -198.20819091796875, "logps/rejected": -303.48101806640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.751175880432129, "rewards/margins": 12.537402153015137, "rewards/rejected": -22.288578033447266, "step": 2957 }, { "epoch": 5.09, "learning_rate": 2.4128771780705483e-07, "logits/chosen": -1.9045372009277344, "logits/rejected": -1.9155735969543457, "logps/chosen": -174.10276794433594, "logps/rejected": -309.7454528808594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.700812339782715, "rewards/margins": 14.01253890991211, "rewards/rejected": -21.71335220336914, "step": 2958 }, { "epoch": 5.09, "learning_rate": 2.4118147046323843e-07, "logits/chosen": -2.25451922416687, "logits/rejected": -2.091174364089966, "logps/chosen": -145.3292999267578, "logps/rejected": -283.31158447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.358508110046387, "rewards/margins": 13.769126892089844, "rewards/rejected": -19.127635955810547, "step": 2959 }, { "epoch": 5.09, "learning_rate": 2.41075223119422e-07, "logits/chosen": -2.062030792236328, "logits/rejected": -1.760538101196289, "logps/chosen": -167.2672882080078, "logps/rejected": -255.6068115234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.5063347816467285, "rewards/margins": 10.813560485839844, "rewards/rejected": -17.319894790649414, "step": 2960 }, { "epoch": 5.1, "learning_rate": 2.409689757756056e-07, "logits/chosen": -1.9981071949005127, "logits/rejected": -2.0625617504119873, "logps/chosen": -144.36065673828125, "logps/rejected": -251.35324096679688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.615340709686279, "rewards/margins": 10.45992660522461, "rewards/rejected": -16.075267791748047, "step": 2961 }, { "epoch": 5.1, "learning_rate": 2.408627284317892e-07, "logits/chosen": -2.134197235107422, "logits/rejected": -1.6526708602905273, "logps/chosen": -125.00045776367188, "logps/rejected": -222.13145446777344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.376962900161743, "rewards/margins": 13.125874519348145, "rewards/rejected": -16.502838134765625, "step": 2962 }, { "epoch": 5.1, "learning_rate": 2.407564810879728e-07, "logits/chosen": -1.850163459777832, "logits/rejected": -1.8531594276428223, "logps/chosen": -158.22535705566406, "logps/rejected": -257.5146179199219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.224693298339844, "rewards/margins": 9.909061431884766, "rewards/rejected": -18.13375473022461, "step": 2963 }, { "epoch": 5.1, "learning_rate": 2.406502337441564e-07, "logits/chosen": -1.7875607013702393, "logits/rejected": -2.13307523727417, "logps/chosen": -137.10252380371094, "logps/rejected": -309.15020751953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.584885120391846, "rewards/margins": 16.224925994873047, "rewards/rejected": -21.809810638427734, "step": 2964 }, { "epoch": 5.1, "learning_rate": 2.4054398640033997e-07, "logits/chosen": -1.9111162424087524, "logits/rejected": -1.9861148595809937, "logps/chosen": -138.84556579589844, "logps/rejected": -285.623046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.931210517883301, "rewards/margins": 14.753544807434082, "rewards/rejected": -20.684755325317383, "step": 2965 }, { "epoch": 5.1, "learning_rate": 2.4043773905652357e-07, "logits/chosen": -1.8452200889587402, "logits/rejected": -1.6370258331298828, "logps/chosen": -175.36001586914062, "logps/rejected": -286.31500244140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.273648262023926, "rewards/margins": 12.094263076782227, "rewards/rejected": -20.367910385131836, "step": 2966 }, { "epoch": 5.11, "learning_rate": 2.4033149171270717e-07, "logits/chosen": -1.9141623973846436, "logits/rejected": -1.8927454948425293, "logps/chosen": -136.99444580078125, "logps/rejected": -251.34048461914062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.957876205444336, "rewards/margins": 11.273950576782227, "rewards/rejected": -17.231826782226562, "step": 2967 }, { "epoch": 5.11, "learning_rate": 2.4022524436889077e-07, "logits/chosen": -2.0710020065307617, "logits/rejected": -1.4526214599609375, "logps/chosen": -162.49951171875, "logps/rejected": -302.34326171875, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -6.153738975524902, "rewards/margins": 16.518352508544922, "rewards/rejected": -22.67209243774414, "step": 2968 }, { "epoch": 5.11, "learning_rate": 2.4011899702507437e-07, "logits/chosen": -1.8012433052062988, "logits/rejected": -2.06784725189209, "logps/chosen": -137.85682678222656, "logps/rejected": -302.50250244140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.207849979400635, "rewards/margins": 14.441555976867676, "rewards/rejected": -20.64940643310547, "step": 2969 }, { "epoch": 5.11, "learning_rate": 2.4001274968125797e-07, "logits/chosen": -2.0409042835235596, "logits/rejected": -2.097626209259033, "logps/chosen": -159.22218322753906, "logps/rejected": -265.7147216796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.2143707275390625, "rewards/margins": 10.218140602111816, "rewards/rejected": -16.432510375976562, "step": 2970 }, { "epoch": 5.11, "learning_rate": 2.3990650233744157e-07, "logits/chosen": -1.880042314529419, "logits/rejected": -1.8791377544403076, "logps/chosen": -123.67550659179688, "logps/rejected": -285.231689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.020718574523926, "rewards/margins": 16.410186767578125, "rewards/rejected": -20.430904388427734, "step": 2971 }, { "epoch": 5.12, "learning_rate": 2.3980025499362517e-07, "logits/chosen": -1.7966400384902954, "logits/rejected": -1.9969316720962524, "logps/chosen": -132.82058715820312, "logps/rejected": -263.8417663574219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.853982448577881, "rewards/margins": 12.495678901672363, "rewards/rejected": -17.349660873413086, "step": 2972 }, { "epoch": 5.12, "learning_rate": 2.3969400764980876e-07, "logits/chosen": -1.157863736152649, "logits/rejected": -2.2109129428863525, "logps/chosen": -117.66201782226562, "logps/rejected": -384.8197326660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.867321491241455, "rewards/margins": 19.181203842163086, "rewards/rejected": -24.048524856567383, "step": 2973 }, { "epoch": 5.12, "learning_rate": 2.3958776030599236e-07, "logits/chosen": -1.8313437700271606, "logits/rejected": -1.7573864459991455, "logps/chosen": -168.7274627685547, "logps/rejected": -283.545654296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.188663482666016, "rewards/margins": 11.100920677185059, "rewards/rejected": -18.289583206176758, "step": 2974 }, { "epoch": 5.12, "learning_rate": 2.3948151296217596e-07, "logits/chosen": -1.6595263481140137, "logits/rejected": -2.0913562774658203, "logps/chosen": -162.0412139892578, "logps/rejected": -298.77630615234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.693415641784668, "rewards/margins": 12.173506736755371, "rewards/rejected": -19.86692237854004, "step": 2975 }, { "epoch": 5.12, "learning_rate": 2.393752656183595e-07, "logits/chosen": -2.054107666015625, "logits/rejected": -1.9210911989212036, "logps/chosen": -174.43731689453125, "logps/rejected": -303.6465759277344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.915552139282227, "rewards/margins": 14.617273330688477, "rewards/rejected": -21.532825469970703, "step": 2976 }, { "epoch": 5.12, "learning_rate": 2.392690182745431e-07, "logits/chosen": -1.9807655811309814, "logits/rejected": -1.9796241521835327, "logps/chosen": -157.2502899169922, "logps/rejected": -303.3751525878906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.249765872955322, "rewards/margins": 14.872835159301758, "rewards/rejected": -21.122602462768555, "step": 2977 }, { "epoch": 5.13, "learning_rate": 2.391627709307267e-07, "logits/chosen": -1.8816509246826172, "logits/rejected": -1.7686790227890015, "logps/chosen": -134.66094970703125, "logps/rejected": -283.1697998046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.788479804992676, "rewards/margins": 13.921652793884277, "rewards/rejected": -19.71013069152832, "step": 2978 }, { "epoch": 5.13, "learning_rate": 2.390565235869103e-07, "logits/chosen": -2.0311179161071777, "logits/rejected": -1.8278117179870605, "logps/chosen": -148.68130493164062, "logps/rejected": -232.86785888671875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.223972320556641, "rewards/margins": 9.669282913208008, "rewards/rejected": -15.893256187438965, "step": 2979 }, { "epoch": 5.13, "learning_rate": 2.389502762430939e-07, "logits/chosen": -1.6103811264038086, "logits/rejected": -2.005063056945801, "logps/chosen": -123.04365539550781, "logps/rejected": -289.32452392578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.249196529388428, "rewards/margins": 14.495259284973145, "rewards/rejected": -20.744457244873047, "step": 2980 }, { "epoch": 5.13, "learning_rate": 2.388440288992775e-07, "logits/chosen": -1.9879850149154663, "logits/rejected": -1.6521012783050537, "logps/chosen": -167.60208129882812, "logps/rejected": -271.1695251464844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.3589935302734375, "rewards/margins": 11.591741561889648, "rewards/rejected": -17.950735092163086, "step": 2981 }, { "epoch": 5.13, "learning_rate": 2.387377815554611e-07, "logits/chosen": -1.7446262836456299, "logits/rejected": -2.0718674659729004, "logps/chosen": -120.92471313476562, "logps/rejected": -298.46832275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.133722305297852, "rewards/margins": 15.527080535888672, "rewards/rejected": -20.660802841186523, "step": 2982 }, { "epoch": 5.13, "learning_rate": 2.386315342116447e-07, "logits/chosen": -1.9561656713485718, "logits/rejected": -2.0246315002441406, "logps/chosen": -171.13302612304688, "logps/rejected": -347.33721923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.94391393661499, "rewards/margins": 16.615772247314453, "rewards/rejected": -24.5596866607666, "step": 2983 }, { "epoch": 5.14, "learning_rate": 2.385252868678283e-07, "logits/chosen": -1.7328537702560425, "logits/rejected": -1.8774034976959229, "logps/chosen": -110.82977294921875, "logps/rejected": -227.74725341796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.2015295028686523, "rewards/margins": 11.391974449157715, "rewards/rejected": -14.593503952026367, "step": 2984 }, { "epoch": 5.14, "learning_rate": 2.384190395240119e-07, "logits/chosen": -1.834000587463379, "logits/rejected": -1.9204246997833252, "logps/chosen": -145.38302612304688, "logps/rejected": -289.8179016113281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.002507209777832, "rewards/margins": 14.414175033569336, "rewards/rejected": -20.41668128967285, "step": 2985 }, { "epoch": 5.14, "learning_rate": 2.3831279218019547e-07, "logits/chosen": -1.7496907711029053, "logits/rejected": -1.7303746938705444, "logps/chosen": -160.36102294921875, "logps/rejected": -270.8093566894531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.529852867126465, "rewards/margins": 11.286380767822266, "rewards/rejected": -19.816234588623047, "step": 2986 }, { "epoch": 5.14, "learning_rate": 2.382065448363791e-07, "logits/chosen": -1.6523373126983643, "logits/rejected": -1.861755609512329, "logps/chosen": -140.19789123535156, "logps/rejected": -315.242431640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.721425533294678, "rewards/margins": 16.892593383789062, "rewards/rejected": -22.6140193939209, "step": 2987 }, { "epoch": 5.14, "learning_rate": 2.3810029749256267e-07, "logits/chosen": -1.8856275081634521, "logits/rejected": -1.6831845045089722, "logps/chosen": -138.5012664794922, "logps/rejected": -284.0602722167969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.845170497894287, "rewards/margins": 15.269001960754395, "rewards/rejected": -21.114171981811523, "step": 2988 }, { "epoch": 5.14, "learning_rate": 2.3799405014874627e-07, "logits/chosen": -1.9397698640823364, "logits/rejected": -2.006772518157959, "logps/chosen": -168.3777618408203, "logps/rejected": -329.2552185058594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.298008918762207, "rewards/margins": 16.106048583984375, "rewards/rejected": -24.4040584564209, "step": 2989 }, { "epoch": 5.15, "learning_rate": 2.3788780280492987e-07, "logits/chosen": -1.8576905727386475, "logits/rejected": -2.0754213333129883, "logps/chosen": -141.00347900390625, "logps/rejected": -301.7977294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.651786804199219, "rewards/margins": 13.502931594848633, "rewards/rejected": -18.15471839904785, "step": 2990 }, { "epoch": 5.15, "learning_rate": 2.3778155546111344e-07, "logits/chosen": -1.9867360591888428, "logits/rejected": -1.8582932949066162, "logps/chosen": -170.52488708496094, "logps/rejected": -307.2984313964844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.560423851013184, "rewards/margins": 12.67971420288086, "rewards/rejected": -21.24013900756836, "step": 2991 }, { "epoch": 5.15, "learning_rate": 2.3767530811729706e-07, "logits/chosen": -2.0085270404815674, "logits/rejected": -1.7443426847457886, "logps/chosen": -156.54714965820312, "logps/rejected": -258.34820556640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.760735034942627, "rewards/margins": 11.108274459838867, "rewards/rejected": -17.869009017944336, "step": 2992 }, { "epoch": 5.15, "learning_rate": 2.3756906077348066e-07, "logits/chosen": -1.7643781900405884, "logits/rejected": -1.81471586227417, "logps/chosen": -133.0749053955078, "logps/rejected": -272.98712158203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.599724292755127, "rewards/margins": 12.66315746307373, "rewards/rejected": -18.262882232666016, "step": 2993 }, { "epoch": 5.15, "learning_rate": 2.3746281342966426e-07, "logits/chosen": -1.7492696046829224, "logits/rejected": -1.8496856689453125, "logps/chosen": -140.45179748535156, "logps/rejected": -284.9178771972656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.175840377807617, "rewards/margins": 13.29169750213623, "rewards/rejected": -19.46753692626953, "step": 2994 }, { "epoch": 5.15, "learning_rate": 2.3735656608584783e-07, "logits/chosen": -1.7530708312988281, "logits/rejected": -2.0997562408447266, "logps/chosen": -142.5994873046875, "logps/rejected": -304.306640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.814718723297119, "rewards/margins": 15.249720573425293, "rewards/rejected": -21.06443977355957, "step": 2995 }, { "epoch": 5.16, "learning_rate": 2.3725031874203143e-07, "logits/chosen": -1.9132988452911377, "logits/rejected": -1.9289913177490234, "logps/chosen": -189.04893493652344, "logps/rejected": -298.4505310058594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.621696472167969, "rewards/margins": 10.483484268188477, "rewards/rejected": -20.105180740356445, "step": 2996 }, { "epoch": 5.16, "learning_rate": 2.3714407139821503e-07, "logits/chosen": -1.9503923654556274, "logits/rejected": -1.9347641468048096, "logps/chosen": -153.72666931152344, "logps/rejected": -295.98065185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.22207498550415, "rewards/margins": 14.199821472167969, "rewards/rejected": -20.421897888183594, "step": 2997 }, { "epoch": 5.16, "learning_rate": 2.3703782405439863e-07, "logits/chosen": -2.0859174728393555, "logits/rejected": -1.6212100982666016, "logps/chosen": -140.10198974609375, "logps/rejected": -278.394287109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.175042152404785, "rewards/margins": 14.130423545837402, "rewards/rejected": -20.305465698242188, "step": 2998 }, { "epoch": 5.16, "learning_rate": 2.3693157671058223e-07, "logits/chosen": -2.1730806827545166, "logits/rejected": -1.9900436401367188, "logps/chosen": -170.05892944335938, "logps/rejected": -285.5736083984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.697836399078369, "rewards/margins": 12.09813117980957, "rewards/rejected": -18.795969009399414, "step": 2999 }, { "epoch": 5.16, "learning_rate": 2.3682532936676583e-07, "logits/chosen": -1.9828944206237793, "logits/rejected": -2.031752586364746, "logps/chosen": -165.23953247070312, "logps/rejected": -274.88958740234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.269837379455566, "rewards/margins": 12.04426383972168, "rewards/rejected": -18.314102172851562, "step": 3000 }, { "epoch": 5.17, "learning_rate": 2.367190820229494e-07, "logits/chosen": -1.7214395999908447, "logits/rejected": -2.056873083114624, "logps/chosen": -113.23912811279297, "logps/rejected": -251.38235473632812, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.169595241546631, "rewards/margins": 13.40407943725586, "rewards/rejected": -17.573673248291016, "step": 3001 }, { "epoch": 5.17, "learning_rate": 2.36612834679133e-07, "logits/chosen": -1.831705093383789, "logits/rejected": -1.9426281452178955, "logps/chosen": -143.677978515625, "logps/rejected": -323.2685546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.456342697143555, "rewards/margins": 16.089191436767578, "rewards/rejected": -22.5455322265625, "step": 3002 }, { "epoch": 5.17, "learning_rate": 2.3650658733531663e-07, "logits/chosen": -1.950871467590332, "logits/rejected": -1.973872184753418, "logps/chosen": -143.41571044921875, "logps/rejected": -313.9106140136719, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.747913837432861, "rewards/margins": 16.268827438354492, "rewards/rejected": -21.016742706298828, "step": 3003 }, { "epoch": 5.17, "learning_rate": 2.364003399915002e-07, "logits/chosen": -1.82236909866333, "logits/rejected": -1.6663373708724976, "logps/chosen": -131.99453735351562, "logps/rejected": -268.08966064453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.689847469329834, "rewards/margins": 12.942414283752441, "rewards/rejected": -18.63226318359375, "step": 3004 }, { "epoch": 5.17, "learning_rate": 2.362940926476838e-07, "logits/chosen": -1.7028017044067383, "logits/rejected": -2.0091373920440674, "logps/chosen": -165.9170379638672, "logps/rejected": -301.7669372558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.772154331207275, "rewards/margins": 13.754443168640137, "rewards/rejected": -20.52659797668457, "step": 3005 }, { "epoch": 5.17, "learning_rate": 2.361878453038674e-07, "logits/chosen": -1.7771539688110352, "logits/rejected": -1.7491496801376343, "logps/chosen": -143.70492553710938, "logps/rejected": -257.6663513183594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.823620796203613, "rewards/margins": 11.727560043334961, "rewards/rejected": -19.55118179321289, "step": 3006 }, { "epoch": 5.18, "learning_rate": 2.3608159796005097e-07, "logits/chosen": -1.8675847053527832, "logits/rejected": -1.9580700397491455, "logps/chosen": -200.50167846679688, "logps/rejected": -301.210693359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.377416610717773, "rewards/margins": 11.043707847595215, "rewards/rejected": -19.421123504638672, "step": 3007 }, { "epoch": 5.18, "learning_rate": 2.359753506162346e-07, "logits/chosen": -1.8313400745391846, "logits/rejected": -1.8783740997314453, "logps/chosen": -91.84629821777344, "logps/rejected": -234.5408477783203, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2728328704833984, "rewards/margins": 13.940423011779785, "rewards/rejected": -16.2132568359375, "step": 3008 }, { "epoch": 5.18, "learning_rate": 2.358691032724182e-07, "logits/chosen": -1.9294955730438232, "logits/rejected": -1.6503524780273438, "logps/chosen": -116.70561218261719, "logps/rejected": -230.76150512695312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.362531661987305, "rewards/margins": 11.824010848999023, "rewards/rejected": -16.186542510986328, "step": 3009 }, { "epoch": 5.18, "learning_rate": 2.3576285592860177e-07, "logits/chosen": -1.8556091785430908, "logits/rejected": -1.6073927879333496, "logps/chosen": -126.46790313720703, "logps/rejected": -301.10540771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.20878791809082, "rewards/margins": 16.7963924407959, "rewards/rejected": -22.00518035888672, "step": 3010 }, { "epoch": 5.18, "learning_rate": 2.3565660858478536e-07, "logits/chosen": -1.9560816287994385, "logits/rejected": -1.9033358097076416, "logps/chosen": -146.8652801513672, "logps/rejected": -263.27203369140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.925585746765137, "rewards/margins": 11.503572463989258, "rewards/rejected": -17.429157257080078, "step": 3011 }, { "epoch": 5.18, "learning_rate": 2.3555036124096896e-07, "logits/chosen": -1.4858348369598389, "logits/rejected": -1.974951982498169, "logps/chosen": -155.1088409423828, "logps/rejected": -311.6733093261719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.164201736450195, "rewards/margins": 13.737531661987305, "rewards/rejected": -21.901735305786133, "step": 3012 }, { "epoch": 5.19, "learning_rate": 2.3544411389715256e-07, "logits/chosen": -2.018860340118408, "logits/rejected": -1.9854334592819214, "logps/chosen": -112.09996795654297, "logps/rejected": -272.21636962890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4473061561584473, "rewards/margins": 15.583152770996094, "rewards/rejected": -19.030458450317383, "step": 3013 }, { "epoch": 5.19, "learning_rate": 2.3533786655333616e-07, "logits/chosen": -2.03599214553833, "logits/rejected": -2.0173940658569336, "logps/chosen": -123.80816650390625, "logps/rejected": -248.35006713867188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.019747734069824, "rewards/margins": 12.809062957763672, "rewards/rejected": -17.828811645507812, "step": 3014 }, { "epoch": 5.19, "learning_rate": 2.3523161920951976e-07, "logits/chosen": -1.8895454406738281, "logits/rejected": -2.0534369945526123, "logps/chosen": -159.81954956054688, "logps/rejected": -317.05780029296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.552815914154053, "rewards/margins": 14.27973747253418, "rewards/rejected": -20.83255386352539, "step": 3015 }, { "epoch": 5.19, "learning_rate": 2.3512537186570336e-07, "logits/chosen": -1.8717637062072754, "logits/rejected": -1.903814673423767, "logps/chosen": -122.91575622558594, "logps/rejected": -301.1086730957031, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.3826568126678467, "rewards/margins": 18.045698165893555, "rewards/rejected": -21.42835235595703, "step": 3016 }, { "epoch": 5.19, "learning_rate": 2.3501912452188693e-07, "logits/chosen": -1.949163794517517, "logits/rejected": -1.744295358657837, "logps/chosen": -152.006103515625, "logps/rejected": -300.85540771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.87216854095459, "rewards/margins": 14.820732116699219, "rewards/rejected": -20.692901611328125, "step": 3017 }, { "epoch": 5.19, "learning_rate": 2.3491287717807053e-07, "logits/chosen": -1.9653385877609253, "logits/rejected": -1.7412124872207642, "logps/chosen": -140.26617431640625, "logps/rejected": -267.5777587890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.6547417640686035, "rewards/margins": 13.953508377075195, "rewards/rejected": -18.608251571655273, "step": 3018 }, { "epoch": 5.2, "learning_rate": 2.3480662983425416e-07, "logits/chosen": -1.7258410453796387, "logits/rejected": -1.995882511138916, "logps/chosen": -133.0359344482422, "logps/rejected": -269.598388671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.838204860687256, "rewards/margins": 11.955026626586914, "rewards/rejected": -17.793231964111328, "step": 3019 }, { "epoch": 5.2, "learning_rate": 2.3470038249043773e-07, "logits/chosen": -1.7344056367874146, "logits/rejected": -1.7967702150344849, "logps/chosen": -141.7810516357422, "logps/rejected": -276.562744140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.108436107635498, "rewards/margins": 12.270727157592773, "rewards/rejected": -18.37916374206543, "step": 3020 }, { "epoch": 5.2, "learning_rate": 2.3459413514662133e-07, "logits/chosen": -1.733196496963501, "logits/rejected": -2.0772435665130615, "logps/chosen": -135.8207244873047, "logps/rejected": -256.02313232421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.860219955444336, "rewards/margins": 10.903389930725098, "rewards/rejected": -16.763608932495117, "step": 3021 }, { "epoch": 5.2, "learning_rate": 2.3448788780280493e-07, "logits/chosen": -1.8585121631622314, "logits/rejected": -2.101309061050415, "logps/chosen": -180.58364868164062, "logps/rejected": -332.3584289550781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.476887702941895, "rewards/margins": 14.858989715576172, "rewards/rejected": -24.33587646484375, "step": 3022 }, { "epoch": 5.2, "learning_rate": 2.343816404589885e-07, "logits/chosen": -1.823763132095337, "logits/rejected": -1.9697425365447998, "logps/chosen": -123.19705963134766, "logps/rejected": -254.1773681640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.664271831512451, "rewards/margins": 12.747661590576172, "rewards/rejected": -16.41193199157715, "step": 3023 }, { "epoch": 5.2, "learning_rate": 2.342753931151721e-07, "logits/chosen": -1.5497701168060303, "logits/rejected": -2.2119264602661133, "logps/chosen": -150.1474151611328, "logps/rejected": -288.18841552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.471602439880371, "rewards/margins": 11.963239669799805, "rewards/rejected": -19.434843063354492, "step": 3024 }, { "epoch": 5.21, "learning_rate": 2.3416914577135572e-07, "logits/chosen": -2.0802392959594727, "logits/rejected": -1.5687495470046997, "logps/chosen": -168.67007446289062, "logps/rejected": -243.31736755371094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.257794380187988, "rewards/margins": 9.073996543884277, "rewards/rejected": -16.331790924072266, "step": 3025 }, { "epoch": 5.21, "learning_rate": 2.340628984275393e-07, "logits/chosen": -1.8141075372695923, "logits/rejected": -1.9248789548873901, "logps/chosen": -151.35154724121094, "logps/rejected": -317.4068603515625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.080484390258789, "rewards/margins": 16.179180145263672, "rewards/rejected": -22.25966453552246, "step": 3026 }, { "epoch": 5.21, "learning_rate": 2.339566510837229e-07, "logits/chosen": -1.708970069885254, "logits/rejected": -1.9142370223999023, "logps/chosen": -112.10026550292969, "logps/rejected": -258.932373046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.154748439788818, "rewards/margins": 14.320172309875488, "rewards/rejected": -18.47492218017578, "step": 3027 }, { "epoch": 5.21, "learning_rate": 2.338504037399065e-07, "logits/chosen": -1.9531382322311401, "logits/rejected": -1.702129602432251, "logps/chosen": -150.1632843017578, "logps/rejected": -257.731689453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.4132466316223145, "rewards/margins": 11.468441009521484, "rewards/rejected": -17.88168716430664, "step": 3028 }, { "epoch": 5.21, "learning_rate": 2.3374415639609006e-07, "logits/chosen": -2.0083069801330566, "logits/rejected": -1.9283052682876587, "logps/chosen": -127.60884857177734, "logps/rejected": -287.236572265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.977736949920654, "rewards/margins": 15.117568016052246, "rewards/rejected": -21.095304489135742, "step": 3029 }, { "epoch": 5.22, "learning_rate": 2.336379090522737e-07, "logits/chosen": -1.3839799165725708, "logits/rejected": -1.9458863735198975, "logps/chosen": -118.97421264648438, "logps/rejected": -270.4382629394531, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.441396713256836, "rewards/margins": 13.488779067993164, "rewards/rejected": -18.93017578125, "step": 3030 }, { "epoch": 5.22, "learning_rate": 2.335316617084573e-07, "logits/chosen": -1.9832897186279297, "logits/rejected": -2.0467004776000977, "logps/chosen": -108.84812927246094, "logps/rejected": -224.818115234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.713217258453369, "rewards/margins": 11.047277450561523, "rewards/rejected": -14.760496139526367, "step": 3031 }, { "epoch": 5.22, "learning_rate": 2.3342541436464086e-07, "logits/chosen": -1.6663026809692383, "logits/rejected": -1.7487730979919434, "logps/chosen": -175.88949584960938, "logps/rejected": -294.09637451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.518646240234375, "rewards/margins": 11.935420989990234, "rewards/rejected": -21.45406723022461, "step": 3032 }, { "epoch": 5.22, "learning_rate": 2.3331916702082446e-07, "logits/chosen": -1.5396935939788818, "logits/rejected": -2.060257911682129, "logps/chosen": -127.13516235351562, "logps/rejected": -302.6020202636719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.303182125091553, "rewards/margins": 14.432587623596191, "rewards/rejected": -19.735769271850586, "step": 3033 }, { "epoch": 5.22, "learning_rate": 2.3321291967700806e-07, "logits/chosen": -1.9382171630859375, "logits/rejected": -1.8997974395751953, "logps/chosen": -131.67715454101562, "logps/rejected": -225.4281768798828, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.456480503082275, "rewards/margins": 9.898099899291992, "rewards/rejected": -15.354580879211426, "step": 3034 }, { "epoch": 5.22, "learning_rate": 2.3310667233319168e-07, "logits/chosen": -1.8053796291351318, "logits/rejected": -2.093461751937866, "logps/chosen": -110.56816101074219, "logps/rejected": -289.1156921386719, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.817905426025391, "rewards/margins": 15.801552772521973, "rewards/rejected": -20.619457244873047, "step": 3035 }, { "epoch": 5.23, "learning_rate": 2.3300042498937526e-07, "logits/chosen": -1.9379907846450806, "logits/rejected": -1.9359079599380493, "logps/chosen": -134.00506591796875, "logps/rejected": -330.9836730957031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.2897162437438965, "rewards/margins": 18.368896484375, "rewards/rejected": -24.658611297607422, "step": 3036 }, { "epoch": 5.23, "learning_rate": 2.3289417764555886e-07, "logits/chosen": -1.961991310119629, "logits/rejected": -1.8580347299575806, "logps/chosen": -187.2231903076172, "logps/rejected": -325.75213623046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.619965553283691, "rewards/margins": 14.08510684967041, "rewards/rejected": -22.70507049560547, "step": 3037 }, { "epoch": 5.23, "learning_rate": 2.3278793030174245e-07, "logits/chosen": -1.997213363647461, "logits/rejected": -1.7875847816467285, "logps/chosen": -171.40585327148438, "logps/rejected": -301.07818603515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.876051425933838, "rewards/margins": 13.405790328979492, "rewards/rejected": -20.281841278076172, "step": 3038 }, { "epoch": 5.23, "learning_rate": 2.3268168295792603e-07, "logits/chosen": -1.6361072063446045, "logits/rejected": -2.092193841934204, "logps/chosen": -132.58200073242188, "logps/rejected": -300.02117919921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.7481489181518555, "rewards/margins": 15.342550277709961, "rewards/rejected": -20.0906982421875, "step": 3039 }, { "epoch": 5.23, "learning_rate": 2.3257543561410963e-07, "logits/chosen": -1.6313116550445557, "logits/rejected": -1.897863507270813, "logps/chosen": -88.46693420410156, "logps/rejected": -236.34449768066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.684793472290039, "rewards/margins": 13.686124801635742, "rewards/rejected": -17.37091827392578, "step": 3040 }, { "epoch": 5.23, "learning_rate": 2.3246918827029325e-07, "logits/chosen": -1.662408709526062, "logits/rejected": -2.015528917312622, "logps/chosen": -132.2455291748047, "logps/rejected": -273.19854736328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.553768157958984, "rewards/margins": 13.507083892822266, "rewards/rejected": -18.060850143432617, "step": 3041 }, { "epoch": 5.24, "learning_rate": 2.3236294092647682e-07, "logits/chosen": -2.055872917175293, "logits/rejected": -1.6886436939239502, "logps/chosen": -190.24627685546875, "logps/rejected": -305.6994323730469, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.7605719566345215, "rewards/margins": 14.511548042297363, "rewards/rejected": -22.272119522094727, "step": 3042 }, { "epoch": 5.24, "learning_rate": 2.3225669358266042e-07, "logits/chosen": -1.8923234939575195, "logits/rejected": -1.796806812286377, "logps/chosen": -142.174072265625, "logps/rejected": -285.8128662109375, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -5.883618354797363, "rewards/margins": 15.079586029052734, "rewards/rejected": -20.963205337524414, "step": 3043 }, { "epoch": 5.24, "learning_rate": 2.3215044623884402e-07, "logits/chosen": -1.7399917840957642, "logits/rejected": -2.104754686355591, "logps/chosen": -120.0306167602539, "logps/rejected": -269.8756408691406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.9257872104644775, "rewards/margins": 15.63322639465332, "rewards/rejected": -19.55901527404785, "step": 3044 }, { "epoch": 5.24, "learning_rate": 2.320441988950276e-07, "logits/chosen": -1.8214912414550781, "logits/rejected": -1.9020602703094482, "logps/chosen": -151.23391723632812, "logps/rejected": -309.01898193359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.652775764465332, "rewards/margins": 13.804570198059082, "rewards/rejected": -20.457345962524414, "step": 3045 }, { "epoch": 5.24, "learning_rate": 2.3193795155121122e-07, "logits/chosen": -1.9556827545166016, "logits/rejected": -1.7247040271759033, "logps/chosen": -189.7586669921875, "logps/rejected": -288.8469543457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.894058227539062, "rewards/margins": 10.453252792358398, "rewards/rejected": -19.347309112548828, "step": 3046 }, { "epoch": 5.24, "learning_rate": 2.3183170420739482e-07, "logits/chosen": -1.7929000854492188, "logits/rejected": -1.940803050994873, "logps/chosen": -155.81939697265625, "logps/rejected": -287.766845703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.812749862670898, "rewards/margins": 13.279215812683105, "rewards/rejected": -20.091964721679688, "step": 3047 }, { "epoch": 5.25, "learning_rate": 2.317254568635784e-07, "logits/chosen": -1.9948341846466064, "logits/rejected": -1.449829339981079, "logps/chosen": -161.64617919921875, "logps/rejected": -283.2466735839844, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.820730686187744, "rewards/margins": 13.058223724365234, "rewards/rejected": -20.87895393371582, "step": 3048 }, { "epoch": 5.25, "learning_rate": 2.31619209519762e-07, "logits/chosen": -1.7218997478485107, "logits/rejected": -1.7928240299224854, "logps/chosen": -173.60275268554688, "logps/rejected": -292.27215576171875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -9.055804252624512, "rewards/margins": 11.682821273803711, "rewards/rejected": -20.738624572753906, "step": 3049 }, { "epoch": 5.25, "learning_rate": 2.315129621759456e-07, "logits/chosen": -1.8422964811325073, "logits/rejected": -1.9606940746307373, "logps/chosen": -127.14741516113281, "logps/rejected": -262.1267395019531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.0260162353515625, "rewards/margins": 12.336609840393066, "rewards/rejected": -16.362627029418945, "step": 3050 }, { "epoch": 5.25, "learning_rate": 2.3140671483212916e-07, "logits/chosen": -1.7297883033752441, "logits/rejected": -2.113629102706909, "logps/chosen": -155.02975463867188, "logps/rejected": -297.2823486328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.21755838394165, "rewards/margins": 13.10468864440918, "rewards/rejected": -20.322246551513672, "step": 3051 }, { "epoch": 5.25, "learning_rate": 2.3130046748831279e-07, "logits/chosen": -1.8354058265686035, "logits/rejected": -1.9988371133804321, "logps/chosen": -138.885498046875, "logps/rejected": -284.53009033203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.7471923828125, "rewards/margins": 14.437472343444824, "rewards/rejected": -20.184663772583008, "step": 3052 }, { "epoch": 5.25, "learning_rate": 2.3119422014449639e-07, "logits/chosen": -1.8213508129119873, "logits/rejected": -1.7652794122695923, "logps/chosen": -124.60091400146484, "logps/rejected": -258.4493408203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.289084434509277, "rewards/margins": 12.992959022521973, "rewards/rejected": -17.28204345703125, "step": 3053 }, { "epoch": 5.26, "learning_rate": 2.3108797280067996e-07, "logits/chosen": -1.9543604850769043, "logits/rejected": -1.8157727718353271, "logps/chosen": -178.72927856445312, "logps/rejected": -316.861572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.284256935119629, "rewards/margins": 14.323530197143555, "rewards/rejected": -21.6077880859375, "step": 3054 }, { "epoch": 5.26, "learning_rate": 2.3098172545686356e-07, "logits/chosen": -1.7643638849258423, "logits/rejected": -1.9790055751800537, "logps/chosen": -127.723876953125, "logps/rejected": -287.457275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.456906795501709, "rewards/margins": 15.219690322875977, "rewards/rejected": -19.676597595214844, "step": 3055 }, { "epoch": 5.26, "learning_rate": 2.3087547811304716e-07, "logits/chosen": -1.736331820487976, "logits/rejected": -2.0187060832977295, "logps/chosen": -131.24081420898438, "logps/rejected": -322.0757751464844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.7908735275268555, "rewards/margins": 16.200143814086914, "rewards/rejected": -20.991016387939453, "step": 3056 }, { "epoch": 5.26, "learning_rate": 2.3076923076923078e-07, "logits/chosen": -1.8669341802597046, "logits/rejected": -1.479429006576538, "logps/chosen": -152.15032958984375, "logps/rejected": -247.69635009765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.229454517364502, "rewards/margins": 10.361148834228516, "rewards/rejected": -17.59060287475586, "step": 3057 }, { "epoch": 5.26, "learning_rate": 2.3066298342541435e-07, "logits/chosen": -1.7398955821990967, "logits/rejected": -2.0115911960601807, "logps/chosen": -160.26025390625, "logps/rejected": -301.7603759765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.113336563110352, "rewards/margins": 14.381242752075195, "rewards/rejected": -20.494579315185547, "step": 3058 }, { "epoch": 5.27, "learning_rate": 2.3055673608159795e-07, "logits/chosen": -2.049262285232544, "logits/rejected": -1.9586249589920044, "logps/chosen": -167.62619018554688, "logps/rejected": -312.6477966308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.563434600830078, "rewards/margins": 14.449405670166016, "rewards/rejected": -22.012840270996094, "step": 3059 }, { "epoch": 5.27, "learning_rate": 2.3045048873778155e-07, "logits/chosen": -1.6869499683380127, "logits/rejected": -1.9141499996185303, "logps/chosen": -146.40493774414062, "logps/rejected": -270.4045715332031, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.138361930847168, "rewards/margins": 12.230807304382324, "rewards/rejected": -19.369169235229492, "step": 3060 }, { "epoch": 5.27, "learning_rate": 2.3034424139396512e-07, "logits/chosen": -1.6166067123413086, "logits/rejected": -2.0671825408935547, "logps/chosen": -134.79078674316406, "logps/rejected": -274.7637634277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.651644706726074, "rewards/margins": 12.483003616333008, "rewards/rejected": -18.134647369384766, "step": 3061 }, { "epoch": 5.27, "learning_rate": 2.3023799405014875e-07, "logits/chosen": -1.8947460651397705, "logits/rejected": -2.05916690826416, "logps/chosen": -110.21368408203125, "logps/rejected": -285.3495788574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.014020919799805, "rewards/margins": 15.245640754699707, "rewards/rejected": -20.259662628173828, "step": 3062 }, { "epoch": 5.27, "learning_rate": 2.3013174670633235e-07, "logits/chosen": -1.991652011871338, "logits/rejected": -2.0256261825561523, "logps/chosen": -154.6181640625, "logps/rejected": -289.46282958984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.10917854309082, "rewards/margins": 14.341080665588379, "rewards/rejected": -19.450260162353516, "step": 3063 }, { "epoch": 5.27, "learning_rate": 2.3002549936251592e-07, "logits/chosen": -1.7886998653411865, "logits/rejected": -1.7122260332107544, "logps/chosen": -113.24828338623047, "logps/rejected": -264.4062805175781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.1246466636657715, "rewards/margins": 16.067834854125977, "rewards/rejected": -20.192481994628906, "step": 3064 }, { "epoch": 5.28, "learning_rate": 2.2991925201869952e-07, "logits/chosen": -2.204010248184204, "logits/rejected": -2.046079397201538, "logps/chosen": -127.56272888183594, "logps/rejected": -308.2913513183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.753112316131592, "rewards/margins": 17.599863052368164, "rewards/rejected": -22.352975845336914, "step": 3065 }, { "epoch": 5.28, "learning_rate": 2.2981300467488312e-07, "logits/chosen": -1.7128738164901733, "logits/rejected": -1.788745403289795, "logps/chosen": -141.27581787109375, "logps/rejected": -220.5253143310547, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.490504741668701, "rewards/margins": 8.40157699584961, "rewards/rejected": -14.892080307006836, "step": 3066 }, { "epoch": 5.28, "learning_rate": 2.297067573310667e-07, "logits/chosen": -1.8042545318603516, "logits/rejected": -1.855027675628662, "logps/chosen": -159.6019744873047, "logps/rejected": -270.25531005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.349637508392334, "rewards/margins": 11.093463897705078, "rewards/rejected": -18.44310188293457, "step": 3067 }, { "epoch": 5.28, "learning_rate": 2.2960050998725032e-07, "logits/chosen": -2.0390896797180176, "logits/rejected": -1.3294910192489624, "logps/chosen": -149.73196411132812, "logps/rejected": -245.4655303955078, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.101628303527832, "rewards/margins": 12.415769577026367, "rewards/rejected": -18.517398834228516, "step": 3068 }, { "epoch": 5.28, "learning_rate": 2.2949426264343392e-07, "logits/chosen": -1.7603063583374023, "logits/rejected": -1.8667256832122803, "logps/chosen": -178.9120330810547, "logps/rejected": -295.06396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.589463233947754, "rewards/margins": 10.567623138427734, "rewards/rejected": -19.157085418701172, "step": 3069 }, { "epoch": 5.28, "learning_rate": 2.293880152996175e-07, "logits/chosen": -1.8915143013000488, "logits/rejected": -1.8886494636535645, "logps/chosen": -125.69342041015625, "logps/rejected": -355.75604248046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.060786247253418, "rewards/margins": 21.315744400024414, "rewards/rejected": -26.37653160095215, "step": 3070 }, { "epoch": 5.29, "learning_rate": 2.2928176795580109e-07, "logits/chosen": -1.9221339225769043, "logits/rejected": -1.7898547649383545, "logps/chosen": -179.01528930664062, "logps/rejected": -314.2656555175781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.437934875488281, "rewards/margins": 13.873566627502441, "rewards/rejected": -22.311500549316406, "step": 3071 }, { "epoch": 5.29, "learning_rate": 2.2917552061198469e-07, "logits/chosen": -2.029172420501709, "logits/rejected": -1.8231890201568604, "logps/chosen": -160.653076171875, "logps/rejected": -301.363525390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.3994574546813965, "rewards/margins": 15.298931121826172, "rewards/rejected": -20.698387145996094, "step": 3072 }, { "epoch": 5.29, "learning_rate": 2.2906927326816828e-07, "logits/chosen": -1.807847261428833, "logits/rejected": -1.9672831296920776, "logps/chosen": -143.96185302734375, "logps/rejected": -290.7276611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.658138275146484, "rewards/margins": 13.559854507446289, "rewards/rejected": -19.21799087524414, "step": 3073 }, { "epoch": 5.29, "learning_rate": 2.2896302592435188e-07, "logits/chosen": -2.05458927154541, "logits/rejected": -1.9878573417663574, "logps/chosen": -147.83551025390625, "logps/rejected": -255.4879150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.524254322052002, "rewards/margins": 11.968090057373047, "rewards/rejected": -17.49234390258789, "step": 3074 }, { "epoch": 5.29, "learning_rate": 2.2885677858053548e-07, "logits/chosen": -2.155545711517334, "logits/rejected": -1.4677371978759766, "logps/chosen": -150.05374145507812, "logps/rejected": -271.9063720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.624264717102051, "rewards/margins": 14.812399864196777, "rewards/rejected": -19.436664581298828, "step": 3075 }, { "epoch": 5.29, "learning_rate": 2.2875053123671908e-07, "logits/chosen": -1.6482738256454468, "logits/rejected": -1.8496062755584717, "logps/chosen": -159.43203735351562, "logps/rejected": -303.69915771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.992550849914551, "rewards/margins": 13.40738296508789, "rewards/rejected": -21.399934768676758, "step": 3076 }, { "epoch": 5.3, "learning_rate": 2.2864428389290265e-07, "logits/chosen": -1.7400892972946167, "logits/rejected": -1.995298147201538, "logps/chosen": -133.77630615234375, "logps/rejected": -300.8763732910156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.807653427124023, "rewards/margins": 15.793004989624023, "rewards/rejected": -20.600658416748047, "step": 3077 }, { "epoch": 5.3, "learning_rate": 2.2853803654908628e-07, "logits/chosen": -1.5820391178131104, "logits/rejected": -1.95248544216156, "logps/chosen": -143.47750854492188, "logps/rejected": -316.7316589355469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.395259380340576, "rewards/margins": 15.804830551147461, "rewards/rejected": -21.200090408325195, "step": 3078 }, { "epoch": 5.3, "learning_rate": 2.2843178920526988e-07, "logits/chosen": -1.5457106828689575, "logits/rejected": -1.9744817018508911, "logps/chosen": -133.04551696777344, "logps/rejected": -306.11639404296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.912478446960449, "rewards/margins": 15.342605590820312, "rewards/rejected": -21.255083084106445, "step": 3079 }, { "epoch": 5.3, "learning_rate": 2.2832554186145345e-07, "logits/chosen": -1.9432659149169922, "logits/rejected": -2.0326716899871826, "logps/chosen": -133.1251220703125, "logps/rejected": -248.33197021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.6140031814575195, "rewards/margins": 10.484634399414062, "rewards/rejected": -16.098636627197266, "step": 3080 }, { "epoch": 5.3, "learning_rate": 2.2821929451763705e-07, "logits/chosen": -1.8431096076965332, "logits/rejected": -1.8633321523666382, "logps/chosen": -153.71505737304688, "logps/rejected": -309.720458984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.05894947052002, "rewards/margins": 14.825908660888672, "rewards/rejected": -22.884859085083008, "step": 3081 }, { "epoch": 5.3, "learning_rate": 2.2811304717382065e-07, "logits/chosen": -1.7858643531799316, "logits/rejected": -1.7326648235321045, "logps/chosen": -147.3937225341797, "logps/rejected": -252.11697387695312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.195295810699463, "rewards/margins": 10.473002433776855, "rewards/rejected": -17.668298721313477, "step": 3082 }, { "epoch": 5.31, "learning_rate": 2.2800679983000422e-07, "logits/chosen": -1.9303703308105469, "logits/rejected": -1.9749125242233276, "logps/chosen": -125.66925811767578, "logps/rejected": -225.62620544433594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.237357139587402, "rewards/margins": 10.133890151977539, "rewards/rejected": -15.371248245239258, "step": 3083 }, { "epoch": 5.31, "learning_rate": 2.2790055248618785e-07, "logits/chosen": -1.197432518005371, "logits/rejected": -1.989394187927246, "logps/chosen": -147.04319763183594, "logps/rejected": -295.390380859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.318070888519287, "rewards/margins": 13.100743293762207, "rewards/rejected": -20.418813705444336, "step": 3084 }, { "epoch": 5.31, "learning_rate": 2.2779430514237144e-07, "logits/chosen": -1.836946964263916, "logits/rejected": -1.7074785232543945, "logps/chosen": -188.23681640625, "logps/rejected": -276.9261169433594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.431037902832031, "rewards/margins": 10.073237419128418, "rewards/rejected": -19.504274368286133, "step": 3085 }, { "epoch": 5.31, "learning_rate": 2.2768805779855502e-07, "logits/chosen": -1.7436656951904297, "logits/rejected": -1.8767614364624023, "logps/chosen": -99.22296142578125, "logps/rejected": -256.7596130371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.0552897453308105, "rewards/margins": 15.702611923217773, "rewards/rejected": -18.757902145385742, "step": 3086 }, { "epoch": 5.31, "learning_rate": 2.2758181045473862e-07, "logits/chosen": -2.150587797164917, "logits/rejected": -2.012144088745117, "logps/chosen": -177.83355712890625, "logps/rejected": -292.6025390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.4255876541137695, "rewards/margins": 11.753105163574219, "rewards/rejected": -19.178691864013672, "step": 3087 }, { "epoch": 5.31, "learning_rate": 2.2747556311092221e-07, "logits/chosen": -1.5946613550186157, "logits/rejected": -2.0991525650024414, "logps/chosen": -170.1899871826172, "logps/rejected": -299.4714660644531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.831140041351318, "rewards/margins": 12.844160079956055, "rewards/rejected": -20.6752986907959, "step": 3088 }, { "epoch": 5.32, "learning_rate": 2.2736931576710581e-07, "logits/chosen": -1.6625745296478271, "logits/rejected": -1.9630857706069946, "logps/chosen": -117.33770751953125, "logps/rejected": -297.99407958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.1413140296936035, "rewards/margins": 16.34450340270996, "rewards/rejected": -21.485816955566406, "step": 3089 }, { "epoch": 5.32, "learning_rate": 2.272630684232894e-07, "logits/chosen": -1.888136863708496, "logits/rejected": -1.6971389055252075, "logps/chosen": -139.57171630859375, "logps/rejected": -257.0776672363281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.299453258514404, "rewards/margins": 11.877957344055176, "rewards/rejected": -19.177410125732422, "step": 3090 }, { "epoch": 5.32, "learning_rate": 2.27156821079473e-07, "logits/chosen": -1.8104207515716553, "logits/rejected": -2.0403709411621094, "logps/chosen": -147.8704071044922, "logps/rejected": -264.1530456542969, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.0155029296875, "rewards/margins": 12.427446365356445, "rewards/rejected": -18.442949295043945, "step": 3091 }, { "epoch": 5.32, "learning_rate": 2.2705057373565658e-07, "logits/chosen": -1.755650520324707, "logits/rejected": -1.98467218875885, "logps/chosen": -147.41574096679688, "logps/rejected": -272.07891845703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.9623236656188965, "rewards/margins": 10.858881950378418, "rewards/rejected": -18.821205139160156, "step": 3092 }, { "epoch": 5.32, "learning_rate": 2.2694432639184018e-07, "logits/chosen": -1.749325156211853, "logits/rejected": -1.9436967372894287, "logps/chosen": -143.5575408935547, "logps/rejected": -321.49505615234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.203094005584717, "rewards/margins": 14.911158561706543, "rewards/rejected": -21.1142520904541, "step": 3093 }, { "epoch": 5.33, "learning_rate": 2.2683807904802378e-07, "logits/chosen": -1.5659712553024292, "logits/rejected": -1.9174737930297852, "logps/chosen": -162.56500244140625, "logps/rejected": -343.5448303222656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.256371974945068, "rewards/margins": 17.991044998168945, "rewards/rejected": -25.247417449951172, "step": 3094 }, { "epoch": 5.33, "learning_rate": 2.2673183170420738e-07, "logits/chosen": -1.679699420928955, "logits/rejected": -2.017836809158325, "logps/chosen": -107.08883666992188, "logps/rejected": -260.05816650390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.275871992111206, "rewards/margins": 14.269651412963867, "rewards/rejected": -17.54552459716797, "step": 3095 }, { "epoch": 5.33, "learning_rate": 2.2662558436039098e-07, "logits/chosen": -1.4862385988235474, "logits/rejected": -1.9804248809814453, "logps/chosen": -139.94766235351562, "logps/rejected": -289.2088623046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.7985053062438965, "rewards/margins": 14.156159400939941, "rewards/rejected": -19.95466423034668, "step": 3096 }, { "epoch": 5.33, "learning_rate": 2.2651933701657458e-07, "logits/chosen": -1.637572169303894, "logits/rejected": -1.968087911605835, "logps/chosen": -162.95013427734375, "logps/rejected": -337.17547607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.441096305847168, "rewards/margins": 16.113880157470703, "rewards/rejected": -24.554977416992188, "step": 3097 }, { "epoch": 5.33, "learning_rate": 2.2641308967275818e-07, "logits/chosen": -1.8067554235458374, "logits/rejected": -1.7021647691726685, "logps/chosen": -111.7017822265625, "logps/rejected": -243.7063751220703, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.06001615524292, "rewards/margins": 14.234622955322266, "rewards/rejected": -17.294639587402344, "step": 3098 }, { "epoch": 5.33, "learning_rate": 2.2630684232894175e-07, "logits/chosen": -2.1388683319091797, "logits/rejected": -1.858830451965332, "logps/chosen": -149.10975646972656, "logps/rejected": -277.483154296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.723221778869629, "rewards/margins": 14.93878173828125, "rewards/rejected": -20.662004470825195, "step": 3099 }, { "epoch": 5.34, "learning_rate": 2.2620059498512538e-07, "logits/chosen": -1.756942629814148, "logits/rejected": -1.835118293762207, "logps/chosen": -119.06884765625, "logps/rejected": -281.568115234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.145259857177734, "rewards/margins": 15.252864837646484, "rewards/rejected": -19.39812469482422, "step": 3100 }, { "epoch": 5.34, "learning_rate": 2.2609434764130897e-07, "logits/chosen": -1.9332952499389648, "logits/rejected": -1.9395257234573364, "logps/chosen": -142.0472412109375, "logps/rejected": -286.44189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.059970855712891, "rewards/margins": 14.039300918579102, "rewards/rejected": -20.099271774291992, "step": 3101 }, { "epoch": 5.34, "learning_rate": 2.2598810029749255e-07, "logits/chosen": -2.002603054046631, "logits/rejected": -1.7867965698242188, "logps/chosen": -94.74247741699219, "logps/rejected": -222.09152221679688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.9482083320617676, "rewards/margins": 12.492526054382324, "rewards/rejected": -16.44073486328125, "step": 3102 }, { "epoch": 5.34, "learning_rate": 2.2588185295367615e-07, "logits/chosen": -1.885148286819458, "logits/rejected": -1.953305959701538, "logps/chosen": -137.74453735351562, "logps/rejected": -261.072021484375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.288819313049316, "rewards/margins": 11.79152774810791, "rewards/rejected": -17.080345153808594, "step": 3103 }, { "epoch": 5.34, "learning_rate": 2.2577560560985974e-07, "logits/chosen": -1.9295196533203125, "logits/rejected": -1.698885202407837, "logps/chosen": -98.29141235351562, "logps/rejected": -243.80801391601562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.909980535507202, "rewards/margins": 13.718803405761719, "rewards/rejected": -17.6287841796875, "step": 3104 }, { "epoch": 5.34, "learning_rate": 2.2566935826604334e-07, "logits/chosen": -1.5329256057739258, "logits/rejected": -1.8206264972686768, "logps/chosen": -158.08901977539062, "logps/rejected": -324.1402893066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.42809534072876, "rewards/margins": 14.848111152648926, "rewards/rejected": -22.276206970214844, "step": 3105 }, { "epoch": 5.35, "learning_rate": 2.2556311092222694e-07, "logits/chosen": -1.8363478183746338, "logits/rejected": -1.8797599077224731, "logps/chosen": -132.66143798828125, "logps/rejected": -284.68109130859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.973385810852051, "rewards/margins": 15.421828269958496, "rewards/rejected": -20.395214080810547, "step": 3106 }, { "epoch": 5.35, "learning_rate": 2.2545686357841054e-07, "logits/chosen": -1.7595616579055786, "logits/rejected": -1.9533090591430664, "logps/chosen": -150.50393676757812, "logps/rejected": -257.5505065917969, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.211265563964844, "rewards/margins": 10.136012077331543, "rewards/rejected": -17.34727668762207, "step": 3107 }, { "epoch": 5.35, "learning_rate": 2.2535061623459411e-07, "logits/chosen": -1.5084621906280518, "logits/rejected": -1.9225690364837646, "logps/chosen": -159.68447875976562, "logps/rejected": -299.41265869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.642269611358643, "rewards/margins": 12.296245574951172, "rewards/rejected": -19.938514709472656, "step": 3108 }, { "epoch": 5.35, "learning_rate": 2.252443688907777e-07, "logits/chosen": -1.7430434226989746, "logits/rejected": -1.7492324113845825, "logps/chosen": -153.63702392578125, "logps/rejected": -266.99932861328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.206759452819824, "rewards/margins": 11.995238304138184, "rewards/rejected": -19.201997756958008, "step": 3109 }, { "epoch": 5.35, "learning_rate": 2.251381215469613e-07, "logits/chosen": -1.8395079374313354, "logits/rejected": -1.9006394147872925, "logps/chosen": -126.99087524414062, "logps/rejected": -308.0126953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.990999937057495, "rewards/margins": 16.9064998626709, "rewards/rejected": -20.897499084472656, "step": 3110 }, { "epoch": 5.35, "learning_rate": 2.250318742031449e-07, "logits/chosen": -1.9027116298675537, "logits/rejected": -1.7011268138885498, "logps/chosen": -140.82090759277344, "logps/rejected": -268.095458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.362127304077148, "rewards/margins": 14.295426368713379, "rewards/rejected": -19.657554626464844, "step": 3111 }, { "epoch": 5.36, "learning_rate": 2.249256268593285e-07, "logits/chosen": -1.698112964630127, "logits/rejected": -1.8926094770431519, "logps/chosen": -107.29705047607422, "logps/rejected": -248.20516967773438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.379688739776611, "rewards/margins": 13.149528503417969, "rewards/rejected": -17.529216766357422, "step": 3112 }, { "epoch": 5.36, "learning_rate": 2.248193795155121e-07, "logits/chosen": -1.8511333465576172, "logits/rejected": -2.042160987854004, "logps/chosen": -112.4508056640625, "logps/rejected": -284.329833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.278889179229736, "rewards/margins": 16.229026794433594, "rewards/rejected": -20.507915496826172, "step": 3113 }, { "epoch": 5.36, "learning_rate": 2.2471313217169568e-07, "logits/chosen": -1.8218095302581787, "logits/rejected": -1.35793137550354, "logps/chosen": -125.38677215576172, "logps/rejected": -239.84243774414062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.813877105712891, "rewards/margins": 11.411924362182617, "rewards/rejected": -17.22580337524414, "step": 3114 }, { "epoch": 5.36, "learning_rate": 2.2460688482787928e-07, "logits/chosen": -1.7466645240783691, "logits/rejected": -1.7847118377685547, "logps/chosen": -188.8447265625, "logps/rejected": -364.533203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.311723709106445, "rewards/margins": 17.065235137939453, "rewards/rejected": -27.37696075439453, "step": 3115 }, { "epoch": 5.36, "learning_rate": 2.245006374840629e-07, "logits/chosen": -1.5247788429260254, "logits/rejected": -2.132284164428711, "logps/chosen": -131.4318389892578, "logps/rejected": -338.49267578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.418778419494629, "rewards/margins": 18.175312042236328, "rewards/rejected": -23.59408950805664, "step": 3116 }, { "epoch": 5.36, "learning_rate": 2.2439439014024648e-07, "logits/chosen": -2.1747379302978516, "logits/rejected": -2.1177940368652344, "logps/chosen": -148.79473876953125, "logps/rejected": -286.7766418457031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.245092391967773, "rewards/margins": 13.648153305053711, "rewards/rejected": -20.893245697021484, "step": 3117 }, { "epoch": 5.37, "learning_rate": 2.2428814279643008e-07, "logits/chosen": -1.7290273904800415, "logits/rejected": -2.1079206466674805, "logps/chosen": -122.17430114746094, "logps/rejected": -295.0968322753906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.159682750701904, "rewards/margins": 15.5360689163208, "rewards/rejected": -21.695751190185547, "step": 3118 }, { "epoch": 5.37, "learning_rate": 2.2418189545261367e-07, "logits/chosen": -1.8536770343780518, "logits/rejected": -1.5815259218215942, "logps/chosen": -155.64520263671875, "logps/rejected": -282.6091003417969, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.9229021072387695, "rewards/margins": 13.332844734191895, "rewards/rejected": -20.255746841430664, "step": 3119 }, { "epoch": 5.37, "learning_rate": 2.2407564810879727e-07, "logits/chosen": -1.776992917060852, "logits/rejected": -1.6832482814788818, "logps/chosen": -192.13050842285156, "logps/rejected": -354.08233642578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.1057710647583, "rewards/margins": 16.886478424072266, "rewards/rejected": -25.99224853515625, "step": 3120 }, { "epoch": 5.37, "learning_rate": 2.2396940076498085e-07, "logits/chosen": -1.849747896194458, "logits/rejected": -1.8218504190444946, "logps/chosen": -144.61184692382812, "logps/rejected": -260.5394287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.780836582183838, "rewards/margins": 11.932289123535156, "rewards/rejected": -18.71312713623047, "step": 3121 }, { "epoch": 5.37, "learning_rate": 2.2386315342116447e-07, "logits/chosen": -2.0348854064941406, "logits/rejected": -1.902981162071228, "logps/chosen": -176.13998413085938, "logps/rejected": -297.0821533203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.730895042419434, "rewards/margins": 12.129341125488281, "rewards/rejected": -19.8602352142334, "step": 3122 }, { "epoch": 5.38, "learning_rate": 2.2375690607734807e-07, "logits/chosen": -1.829807162284851, "logits/rejected": -2.117898464202881, "logps/chosen": -121.83697509765625, "logps/rejected": -284.255615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.021164894104004, "rewards/margins": 14.981745719909668, "rewards/rejected": -20.002910614013672, "step": 3123 }, { "epoch": 5.38, "learning_rate": 2.2365065873353164e-07, "logits/chosen": -1.9846400022506714, "logits/rejected": -2.0050253868103027, "logps/chosen": -168.1082763671875, "logps/rejected": -320.0431823730469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.806005477905273, "rewards/margins": 14.73485279083252, "rewards/rejected": -22.540857315063477, "step": 3124 }, { "epoch": 5.38, "learning_rate": 2.2354441138971524e-07, "logits/chosen": -1.5689032077789307, "logits/rejected": -1.828008770942688, "logps/chosen": -138.15872192382812, "logps/rejected": -316.8389892578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.161396503448486, "rewards/margins": 16.84207534790039, "rewards/rejected": -23.00347328186035, "step": 3125 }, { "epoch": 5.38, "learning_rate": 2.2343816404589884e-07, "logits/chosen": -1.722171664237976, "logits/rejected": -2.008432626724243, "logps/chosen": -167.18690490722656, "logps/rejected": -342.21917724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2540812492370605, "rewards/margins": 16.46034049987793, "rewards/rejected": -23.71442222595215, "step": 3126 }, { "epoch": 5.38, "learning_rate": 2.2333191670208244e-07, "logits/chosen": -1.9140979051589966, "logits/rejected": -1.8731515407562256, "logps/chosen": -143.2040557861328, "logps/rejected": -263.4169616699219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.4426045417785645, "rewards/margins": 11.81320571899414, "rewards/rejected": -17.255809783935547, "step": 3127 }, { "epoch": 5.38, "learning_rate": 2.2322566935826604e-07, "logits/chosen": -1.9471564292907715, "logits/rejected": -2.0820398330688477, "logps/chosen": -113.71973419189453, "logps/rejected": -284.6634521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8658738136291504, "rewards/margins": 16.158973693847656, "rewards/rejected": -20.024845123291016, "step": 3128 }, { "epoch": 5.39, "learning_rate": 2.2311942201444964e-07, "logits/chosen": -2.021686553955078, "logits/rejected": -1.8578686714172363, "logps/chosen": -194.26979064941406, "logps/rejected": -303.4630432128906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.60644245147705, "rewards/margins": 11.885984420776367, "rewards/rejected": -21.492427825927734, "step": 3129 }, { "epoch": 5.39, "learning_rate": 2.230131746706332e-07, "logits/chosen": -2.1450095176696777, "logits/rejected": -1.5755667686462402, "logps/chosen": -205.19491577148438, "logps/rejected": -304.21246337890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.301713943481445, "rewards/margins": 11.856415748596191, "rewards/rejected": -21.158130645751953, "step": 3130 }, { "epoch": 5.39, "learning_rate": 2.229069273268168e-07, "logits/chosen": -1.8916347026824951, "logits/rejected": -1.9689103364944458, "logps/chosen": -152.30287170410156, "logps/rejected": -333.3814697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.653239727020264, "rewards/margins": 16.79655647277832, "rewards/rejected": -21.44979476928711, "step": 3131 }, { "epoch": 5.39, "learning_rate": 2.2280067998300043e-07, "logits/chosen": -2.2731587886810303, "logits/rejected": -1.8363182544708252, "logps/chosen": -177.5804443359375, "logps/rejected": -292.89031982421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.439995765686035, "rewards/margins": 13.140623092651367, "rewards/rejected": -20.58061981201172, "step": 3132 }, { "epoch": 5.39, "learning_rate": 2.22694432639184e-07, "logits/chosen": -1.8207941055297852, "logits/rejected": -1.6984931230545044, "logps/chosen": -142.30322265625, "logps/rejected": -302.4699401855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.538998603820801, "rewards/margins": 15.65847396850586, "rewards/rejected": -21.197473526000977, "step": 3133 }, { "epoch": 5.39, "learning_rate": 2.225881852953676e-07, "logits/chosen": -1.579143762588501, "logits/rejected": -2.077075481414795, "logps/chosen": -162.88485717773438, "logps/rejected": -308.4357604980469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.34937858581543, "rewards/margins": 11.668652534484863, "rewards/rejected": -20.01803207397461, "step": 3134 }, { "epoch": 5.4, "learning_rate": 2.224819379515512e-07, "logits/chosen": -1.594923734664917, "logits/rejected": -2.0466012954711914, "logps/chosen": -166.59259033203125, "logps/rejected": -288.5339050292969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.041622161865234, "rewards/margins": 10.766022682189941, "rewards/rejected": -18.80764389038086, "step": 3135 }, { "epoch": 5.4, "learning_rate": 2.2237569060773478e-07, "logits/chosen": -1.8177626132965088, "logits/rejected": -2.0306448936462402, "logps/chosen": -191.70562744140625, "logps/rejected": -309.1379699707031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.871992111206055, "rewards/margins": 12.362842559814453, "rewards/rejected": -22.234832763671875, "step": 3136 }, { "epoch": 5.4, "learning_rate": 2.2226944326391838e-07, "logits/chosen": -1.93369460105896, "logits/rejected": -1.9311535358428955, "logps/chosen": -143.7947998046875, "logps/rejected": -290.7330322265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.593366622924805, "rewards/margins": 15.503429412841797, "rewards/rejected": -21.0967960357666, "step": 3137 }, { "epoch": 5.4, "learning_rate": 2.22163195920102e-07, "logits/chosen": -2.018005847930908, "logits/rejected": -1.7595932483673096, "logps/chosen": -153.48941040039062, "logps/rejected": -315.9073486328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.515279769897461, "rewards/margins": 17.191343307495117, "rewards/rejected": -23.706623077392578, "step": 3138 }, { "epoch": 5.4, "learning_rate": 2.220569485762856e-07, "logits/chosen": -2.0382094383239746, "logits/rejected": -1.6733214855194092, "logps/chosen": -151.9879608154297, "logps/rejected": -281.79986572265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.0530877113342285, "rewards/margins": 14.464916229248047, "rewards/rejected": -20.518003463745117, "step": 3139 }, { "epoch": 5.4, "learning_rate": 2.2195070123246917e-07, "logits/chosen": -1.9362152814865112, "logits/rejected": -1.7597845792770386, "logps/chosen": -168.63987731933594, "logps/rejected": -306.42236328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.2112016677856445, "rewards/margins": 14.029787063598633, "rewards/rejected": -20.24098777770996, "step": 3140 }, { "epoch": 5.41, "learning_rate": 2.2184445388865277e-07, "logits/chosen": -1.7741574048995972, "logits/rejected": -1.7352371215820312, "logps/chosen": -121.09716033935547, "logps/rejected": -244.689208984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.010416030883789, "rewards/margins": 11.481003761291504, "rewards/rejected": -16.491418838500977, "step": 3141 }, { "epoch": 5.41, "learning_rate": 2.2173820654483637e-07, "logits/chosen": -1.4142539501190186, "logits/rejected": -1.7629363536834717, "logps/chosen": -123.94895935058594, "logps/rejected": -273.910888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.528130531311035, "rewards/margins": 14.36422348022461, "rewards/rejected": -19.89235496520996, "step": 3142 }, { "epoch": 5.41, "learning_rate": 2.2163195920101997e-07, "logits/chosen": -1.9375689029693604, "logits/rejected": -1.9951127767562866, "logps/chosen": -192.44302368164062, "logps/rejected": -310.2738037109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.444648742675781, "rewards/margins": 12.973856925964355, "rewards/rejected": -21.41850471496582, "step": 3143 }, { "epoch": 5.41, "learning_rate": 2.2152571185720357e-07, "logits/chosen": -1.7255198955535889, "logits/rejected": -2.01815128326416, "logps/chosen": -146.61045837402344, "logps/rejected": -287.8861083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.681950569152832, "rewards/margins": 14.105712890625, "rewards/rejected": -18.787662506103516, "step": 3144 }, { "epoch": 5.41, "learning_rate": 2.2141946451338717e-07, "logits/chosen": -2.169532537460327, "logits/rejected": -2.0376479625701904, "logps/chosen": -149.11805725097656, "logps/rejected": -320.6923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.615950584411621, "rewards/margins": 16.38714027404785, "rewards/rejected": -22.00309181213379, "step": 3145 }, { "epoch": 5.41, "learning_rate": 2.2131321716957074e-07, "logits/chosen": -1.9186261892318726, "logits/rejected": -2.117379665374756, "logps/chosen": -161.2353515625, "logps/rejected": -278.9854431152344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.199155807495117, "rewards/margins": 12.287694931030273, "rewards/rejected": -18.48685073852539, "step": 3146 }, { "epoch": 5.42, "learning_rate": 2.2120696982575434e-07, "logits/chosen": -1.5370897054672241, "logits/rejected": -1.5513668060302734, "logps/chosen": -127.91191864013672, "logps/rejected": -242.81373596191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.671425819396973, "rewards/margins": 11.54153823852539, "rewards/rejected": -17.212963104248047, "step": 3147 }, { "epoch": 5.42, "learning_rate": 2.2110072248193796e-07, "logits/chosen": -1.7646194696426392, "logits/rejected": -1.9789984226226807, "logps/chosen": -129.49757385253906, "logps/rejected": -230.17742919921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.0365376472473145, "rewards/margins": 10.4442138671875, "rewards/rejected": -17.480751037597656, "step": 3148 }, { "epoch": 5.42, "learning_rate": 2.2099447513812154e-07, "logits/chosen": -1.904752492904663, "logits/rejected": -2.063148021697998, "logps/chosen": -165.30213928222656, "logps/rejected": -323.7644958496094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.3577880859375, "rewards/margins": 15.617972373962402, "rewards/rejected": -23.975759506225586, "step": 3149 }, { "epoch": 5.42, "learning_rate": 2.2088822779430513e-07, "logits/chosen": -1.9446417093276978, "logits/rejected": -2.0444369316101074, "logps/chosen": -139.5073699951172, "logps/rejected": -253.529296875, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -6.695865631103516, "rewards/margins": 10.286968231201172, "rewards/rejected": -16.982833862304688, "step": 3150 }, { "epoch": 5.42, "learning_rate": 2.2078198045048873e-07, "logits/chosen": -1.9270687103271484, "logits/rejected": -2.0398459434509277, "logps/chosen": -136.90614318847656, "logps/rejected": -313.41961669921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.859428405761719, "rewards/margins": 15.064095497131348, "rewards/rejected": -20.923524856567383, "step": 3151 }, { "epoch": 5.43, "learning_rate": 2.206757331066723e-07, "logits/chosen": -1.7722718715667725, "logits/rejected": -2.0690808296203613, "logps/chosen": -186.1407928466797, "logps/rejected": -306.5541076660156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.849417686462402, "rewards/margins": 10.042657852172852, "rewards/rejected": -18.892074584960938, "step": 3152 }, { "epoch": 5.43, "learning_rate": 2.205694857628559e-07, "logits/chosen": -1.6015410423278809, "logits/rejected": -2.11907958984375, "logps/chosen": -129.999755859375, "logps/rejected": -281.2840270996094, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.032685279846191, "rewards/margins": 13.001412391662598, "rewards/rejected": -18.034095764160156, "step": 3153 }, { "epoch": 5.43, "learning_rate": 2.2046323841903953e-07, "logits/chosen": -2.0949878692626953, "logits/rejected": -2.1015968322753906, "logps/chosen": -153.62864685058594, "logps/rejected": -257.93756103515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.031984329223633, "rewards/margins": 10.559789657592773, "rewards/rejected": -17.591773986816406, "step": 3154 }, { "epoch": 5.43, "learning_rate": 2.203569910752231e-07, "logits/chosen": -1.6971499919891357, "logits/rejected": -1.9602701663970947, "logps/chosen": -145.0244598388672, "logps/rejected": -276.9139404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.790083885192871, "rewards/margins": 12.340935707092285, "rewards/rejected": -20.13102149963379, "step": 3155 }, { "epoch": 5.43, "learning_rate": 2.202507437314067e-07, "logits/chosen": -1.6825796365737915, "logits/rejected": -1.8845696449279785, "logps/chosen": -144.3638458251953, "logps/rejected": -309.4263916015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.202328681945801, "rewards/margins": 15.108173370361328, "rewards/rejected": -22.310503005981445, "step": 3156 }, { "epoch": 5.43, "learning_rate": 2.201444963875903e-07, "logits/chosen": -1.7498695850372314, "logits/rejected": -1.719796895980835, "logps/chosen": -158.46237182617188, "logps/rejected": -281.6934509277344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.052334308624268, "rewards/margins": 11.681278228759766, "rewards/rejected": -18.733612060546875, "step": 3157 }, { "epoch": 5.44, "learning_rate": 2.2003824904377387e-07, "logits/chosen": -1.596630573272705, "logits/rejected": -2.12449312210083, "logps/chosen": -113.33257293701172, "logps/rejected": -309.47076416015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6354525089263916, "rewards/margins": 18.14570426940918, "rewards/rejected": -21.781158447265625, "step": 3158 }, { "epoch": 5.44, "learning_rate": 2.199320016999575e-07, "logits/chosen": -1.8566313982009888, "logits/rejected": -1.9815399646759033, "logps/chosen": -187.30079650878906, "logps/rejected": -306.0569152832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.916450500488281, "rewards/margins": 12.12319564819336, "rewards/rejected": -21.03964614868164, "step": 3159 }, { "epoch": 5.44, "learning_rate": 2.198257543561411e-07, "logits/chosen": -1.9432694911956787, "logits/rejected": -1.8784971237182617, "logps/chosen": -160.3899688720703, "logps/rejected": -262.385986328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.5391716957092285, "rewards/margins": 10.63629150390625, "rewards/rejected": -16.17546272277832, "step": 3160 }, { "epoch": 5.44, "learning_rate": 2.197195070123247e-07, "logits/chosen": -1.6394933462142944, "logits/rejected": -2.330522060394287, "logps/chosen": -142.90087890625, "logps/rejected": -298.4557800292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.994387626647949, "rewards/margins": 14.269410133361816, "rewards/rejected": -20.263797760009766, "step": 3161 }, { "epoch": 5.44, "learning_rate": 2.1961325966850827e-07, "logits/chosen": -1.4534196853637695, "logits/rejected": -2.133498430252075, "logps/chosen": -91.13313293457031, "logps/rejected": -296.4799499511719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.474539041519165, "rewards/margins": 18.462257385253906, "rewards/rejected": -20.936798095703125, "step": 3162 }, { "epoch": 5.44, "learning_rate": 2.1950701232469187e-07, "logits/chosen": -2.0039706230163574, "logits/rejected": -1.5999658107757568, "logps/chosen": -138.9867401123047, "logps/rejected": -268.915771484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.067291259765625, "rewards/margins": 14.187174797058105, "rewards/rejected": -19.254467010498047, "step": 3163 }, { "epoch": 5.45, "learning_rate": 2.194007649808755e-07, "logits/chosen": -2.110382318496704, "logits/rejected": -1.7776775360107422, "logps/chosen": -134.9442138671875, "logps/rejected": -224.96224975585938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.303257465362549, "rewards/margins": 9.956050872802734, "rewards/rejected": -15.259307861328125, "step": 3164 }, { "epoch": 5.45, "learning_rate": 2.1929451763705907e-07, "logits/chosen": -1.8927162885665894, "logits/rejected": -1.8523669242858887, "logps/chosen": -186.00576782226562, "logps/rejected": -309.3597717285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.446846008300781, "rewards/margins": 14.922959327697754, "rewards/rejected": -21.36980438232422, "step": 3165 }, { "epoch": 5.45, "learning_rate": 2.1918827029324266e-07, "logits/chosen": -1.7551754713058472, "logits/rejected": -1.9280261993408203, "logps/chosen": -159.887939453125, "logps/rejected": -322.11602783203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.796111106872559, "rewards/margins": 15.429214477539062, "rewards/rejected": -21.225326538085938, "step": 3166 }, { "epoch": 5.45, "learning_rate": 2.1908202294942626e-07, "logits/chosen": -1.7120035886764526, "logits/rejected": -1.9129009246826172, "logps/chosen": -116.59320068359375, "logps/rejected": -278.9957275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.105064392089844, "rewards/margins": 15.282943725585938, "rewards/rejected": -19.38800811767578, "step": 3167 }, { "epoch": 5.45, "learning_rate": 2.1897577560560984e-07, "logits/chosen": -1.9003925323486328, "logits/rejected": -1.7136445045471191, "logps/chosen": -175.2487335205078, "logps/rejected": -277.837646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.399168014526367, "rewards/margins": 11.448610305786133, "rewards/rejected": -19.847780227661133, "step": 3168 }, { "epoch": 5.45, "learning_rate": 2.1886952826179343e-07, "logits/chosen": -1.9266386032104492, "logits/rejected": -1.7101683616638184, "logps/chosen": -100.17090606689453, "logps/rejected": -228.95919799804688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.9131269454956055, "rewards/margins": 14.118911743164062, "rewards/rejected": -18.032039642333984, "step": 3169 }, { "epoch": 5.46, "learning_rate": 2.1876328091797706e-07, "logits/chosen": -1.937480092048645, "logits/rejected": -1.7523332834243774, "logps/chosen": -196.61219787597656, "logps/rejected": -333.76922607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.424845695495605, "rewards/margins": 13.401524543762207, "rewards/rejected": -23.82636833190918, "step": 3170 }, { "epoch": 5.46, "learning_rate": 2.1865703357416063e-07, "logits/chosen": -1.7640008926391602, "logits/rejected": -2.0085158348083496, "logps/chosen": -141.7305450439453, "logps/rejected": -281.626708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.648868560791016, "rewards/margins": 13.279144287109375, "rewards/rejected": -19.928010940551758, "step": 3171 }, { "epoch": 5.46, "learning_rate": 2.1855078623034423e-07, "logits/chosen": -1.6762837171554565, "logits/rejected": -1.6202607154846191, "logps/chosen": -119.80840301513672, "logps/rejected": -260.9976806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.189392566680908, "rewards/margins": 12.215036392211914, "rewards/rejected": -18.404428482055664, "step": 3172 }, { "epoch": 5.46, "learning_rate": 2.1844453888652783e-07, "logits/chosen": -2.017853260040283, "logits/rejected": -1.7816368341445923, "logps/chosen": -146.74722290039062, "logps/rejected": -287.58782958984375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -5.536623001098633, "rewards/margins": 14.817420959472656, "rewards/rejected": -20.354042053222656, "step": 3173 }, { "epoch": 5.46, "learning_rate": 2.183382915427114e-07, "logits/chosen": -1.7977819442749023, "logits/rejected": -1.9350008964538574, "logps/chosen": -119.82223510742188, "logps/rejected": -302.10009765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.204113483428955, "rewards/margins": 16.525827407836914, "rewards/rejected": -19.729942321777344, "step": 3174 }, { "epoch": 5.46, "learning_rate": 2.1823204419889503e-07, "logits/chosen": -2.0133309364318848, "logits/rejected": -2.1482462882995605, "logps/chosen": -144.67739868164062, "logps/rejected": -295.544921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.729177474975586, "rewards/margins": 14.092694282531738, "rewards/rejected": -20.82187271118164, "step": 3175 }, { "epoch": 5.47, "learning_rate": 2.1812579685507863e-07, "logits/chosen": -2.0272586345672607, "logits/rejected": -1.768902063369751, "logps/chosen": -145.69406127929688, "logps/rejected": -269.0284423828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.408358573913574, "rewards/margins": 12.957073211669922, "rewards/rejected": -19.36543083190918, "step": 3176 }, { "epoch": 5.47, "learning_rate": 2.180195495112622e-07, "logits/chosen": -2.1358249187469482, "logits/rejected": -2.051241159439087, "logps/chosen": -141.85769653320312, "logps/rejected": -249.2509307861328, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.870115280151367, "rewards/margins": 11.052757263183594, "rewards/rejected": -17.92287254333496, "step": 3177 }, { "epoch": 5.47, "learning_rate": 2.179133021674458e-07, "logits/chosen": -2.1404194831848145, "logits/rejected": -1.8940718173980713, "logps/chosen": -167.64361572265625, "logps/rejected": -322.4163818359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.850230693817139, "rewards/margins": 14.731847763061523, "rewards/rejected": -22.582077026367188, "step": 3178 }, { "epoch": 5.47, "learning_rate": 2.178070548236294e-07, "logits/chosen": -1.692527413368225, "logits/rejected": -1.9932903051376343, "logps/chosen": -155.17898559570312, "logps/rejected": -323.27740478515625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.69224739074707, "rewards/margins": 15.729304313659668, "rewards/rejected": -22.421552658081055, "step": 3179 }, { "epoch": 5.47, "learning_rate": 2.1770080747981297e-07, "logits/chosen": -1.7050329446792603, "logits/rejected": -1.8072052001953125, "logps/chosen": -154.68067932128906, "logps/rejected": -289.7035827636719, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.448400974273682, "rewards/margins": 13.437507629394531, "rewards/rejected": -20.885910034179688, "step": 3180 }, { "epoch": 5.48, "learning_rate": 2.175945601359966e-07, "logits/chosen": -1.9317514896392822, "logits/rejected": -1.5643807649612427, "logps/chosen": -135.05474853515625, "logps/rejected": -342.1269226074219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8693971633911133, "rewards/margins": 22.494388580322266, "rewards/rejected": -26.363786697387695, "step": 3181 }, { "epoch": 5.48, "learning_rate": 2.174883127921802e-07, "logits/chosen": -2.119896411895752, "logits/rejected": -1.888977289199829, "logps/chosen": -145.43844604492188, "logps/rejected": -267.4757080078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.986374855041504, "rewards/margins": 13.872727394104004, "rewards/rejected": -19.859102249145508, "step": 3182 }, { "epoch": 5.48, "learning_rate": 2.173820654483638e-07, "logits/chosen": -1.913942813873291, "logits/rejected": -1.6998199224472046, "logps/chosen": -141.8682861328125, "logps/rejected": -288.3936767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.604781150817871, "rewards/margins": 15.500327110290527, "rewards/rejected": -22.1051082611084, "step": 3183 }, { "epoch": 5.48, "learning_rate": 2.1727581810454737e-07, "logits/chosen": -1.6633539199829102, "logits/rejected": -1.991815447807312, "logps/chosen": -134.46371459960938, "logps/rejected": -232.0272674560547, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.9054388999938965, "rewards/margins": 9.478691101074219, "rewards/rejected": -16.384130477905273, "step": 3184 }, { "epoch": 5.48, "learning_rate": 2.1716957076073096e-07, "logits/chosen": -1.9735933542251587, "logits/rejected": -1.9299514293670654, "logps/chosen": -157.31703186035156, "logps/rejected": -302.94378662109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.870356559753418, "rewards/margins": 14.777881622314453, "rewards/rejected": -20.648239135742188, "step": 3185 }, { "epoch": 5.48, "learning_rate": 2.170633234169146e-07, "logits/chosen": -1.8954377174377441, "logits/rejected": -1.9602971076965332, "logps/chosen": -181.1857147216797, "logps/rejected": -284.59661865234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.374792098999023, "rewards/margins": 10.739091873168945, "rewards/rejected": -20.113882064819336, "step": 3186 }, { "epoch": 5.49, "learning_rate": 2.1695707607309816e-07, "logits/chosen": -1.8706103563308716, "logits/rejected": -1.5585757493972778, "logps/chosen": -153.75732421875, "logps/rejected": -304.66339111328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.515194892883301, "rewards/margins": 15.088008880615234, "rewards/rejected": -21.60320472717285, "step": 3187 }, { "epoch": 5.49, "learning_rate": 2.1685082872928176e-07, "logits/chosen": -1.8225469589233398, "logits/rejected": -2.0837063789367676, "logps/chosen": -152.5930938720703, "logps/rejected": -302.83966064453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -7.531585693359375, "rewards/margins": 14.816978454589844, "rewards/rejected": -22.34856605529785, "step": 3188 }, { "epoch": 5.49, "learning_rate": 2.1674458138546536e-07, "logits/chosen": -1.8277573585510254, "logits/rejected": -1.9873236417770386, "logps/chosen": -153.120849609375, "logps/rejected": -337.0594482421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.607328414916992, "rewards/margins": 18.88716697692871, "rewards/rejected": -24.494495391845703, "step": 3189 }, { "epoch": 5.49, "learning_rate": 2.1663833404164893e-07, "logits/chosen": -1.81316339969635, "logits/rejected": -2.007133960723877, "logps/chosen": -163.94297790527344, "logps/rejected": -294.1089172363281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.195849418640137, "rewards/margins": 13.691585540771484, "rewards/rejected": -20.887434005737305, "step": 3190 }, { "epoch": 5.49, "learning_rate": 2.1653208669783256e-07, "logits/chosen": -1.9563871622085571, "logits/rejected": -1.7121680974960327, "logps/chosen": -165.1893310546875, "logps/rejected": -289.67791748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.737720489501953, "rewards/margins": 11.993061065673828, "rewards/rejected": -20.73078155517578, "step": 3191 }, { "epoch": 5.49, "learning_rate": 2.1642583935401616e-07, "logits/chosen": -1.6576809883117676, "logits/rejected": -2.0879781246185303, "logps/chosen": -154.95291137695312, "logps/rejected": -322.2140808105469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.853307723999023, "rewards/margins": 15.01285171508789, "rewards/rejected": -22.866161346435547, "step": 3192 }, { "epoch": 5.5, "learning_rate": 2.1631959201019973e-07, "logits/chosen": -2.0565333366394043, "logits/rejected": -1.7807413339614868, "logps/chosen": -138.45260620117188, "logps/rejected": -283.9274597167969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.99208927154541, "rewards/margins": 14.521745681762695, "rewards/rejected": -20.51383399963379, "step": 3193 }, { "epoch": 5.5, "learning_rate": 2.1621334466638333e-07, "logits/chosen": -1.7730844020843506, "logits/rejected": -1.8369100093841553, "logps/chosen": -108.41842651367188, "logps/rejected": -308.0651550292969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.653843402862549, "rewards/margins": 18.851049423217773, "rewards/rejected": -23.504892349243164, "step": 3194 }, { "epoch": 5.5, "learning_rate": 2.1610709732256693e-07, "logits/chosen": -1.5598394870758057, "logits/rejected": -1.9258538484573364, "logps/chosen": -136.81381225585938, "logps/rejected": -262.9710693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.350366115570068, "rewards/margins": 12.30721664428711, "rewards/rejected": -18.657583236694336, "step": 3195 }, { "epoch": 5.5, "learning_rate": 2.160008499787505e-07, "logits/chosen": -1.2928295135498047, "logits/rejected": -2.095905303955078, "logps/chosen": -159.80662536621094, "logps/rejected": -278.5296630859375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.0644612312316895, "rewards/margins": 11.239890098571777, "rewards/rejected": -18.304351806640625, "step": 3196 }, { "epoch": 5.5, "learning_rate": 2.1589460263493412e-07, "logits/chosen": -2.0041027069091797, "logits/rejected": -1.8541781902313232, "logps/chosen": -167.1494903564453, "logps/rejected": -328.11419677734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.403304100036621, "rewards/margins": 16.6019344329834, "rewards/rejected": -24.005239486694336, "step": 3197 }, { "epoch": 5.5, "learning_rate": 2.1578835529111772e-07, "logits/chosen": -1.9218227863311768, "logits/rejected": -1.586743712425232, "logps/chosen": -164.55999755859375, "logps/rejected": -313.51385498046875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -8.446444511413574, "rewards/margins": 13.93448543548584, "rewards/rejected": -22.380931854248047, "step": 3198 }, { "epoch": 5.51, "learning_rate": 2.156821079473013e-07, "logits/chosen": -2.144360065460205, "logits/rejected": -1.8068735599517822, "logps/chosen": -207.0072479248047, "logps/rejected": -331.49822998046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.243727684020996, "rewards/margins": 13.310396194458008, "rewards/rejected": -23.554122924804688, "step": 3199 }, { "epoch": 5.51, "learning_rate": 2.155758606034849e-07, "logits/chosen": -2.1221024990081787, "logits/rejected": -1.8250617980957031, "logps/chosen": -172.81759643554688, "logps/rejected": -334.05389404296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.161171913146973, "rewards/margins": 17.4157657623291, "rewards/rejected": -23.57693862915039, "step": 3200 }, { "epoch": 5.51, "learning_rate": 2.154696132596685e-07, "logits/chosen": -1.9997832775115967, "logits/rejected": -1.7685301303863525, "logps/chosen": -164.06085205078125, "logps/rejected": -280.4283447265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.941529273986816, "rewards/margins": 12.639381408691406, "rewards/rejected": -20.580909729003906, "step": 3201 }, { "epoch": 5.51, "learning_rate": 2.1536336591585212e-07, "logits/chosen": -1.875966191291809, "logits/rejected": -1.9553793668746948, "logps/chosen": -185.56649780273438, "logps/rejected": -326.48529052734375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -11.230690002441406, "rewards/margins": 12.405828475952148, "rewards/rejected": -23.636520385742188, "step": 3202 }, { "epoch": 5.51, "learning_rate": 2.152571185720357e-07, "logits/chosen": -1.5309669971466064, "logits/rejected": -2.0644588470458984, "logps/chosen": -138.78746032714844, "logps/rejected": -285.1512145996094, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.35435152053833, "rewards/margins": 12.014472007751465, "rewards/rejected": -18.368824005126953, "step": 3203 }, { "epoch": 5.51, "learning_rate": 2.151508712282193e-07, "logits/chosen": -1.4738399982452393, "logits/rejected": -1.750154972076416, "logps/chosen": -189.08065795898438, "logps/rejected": -318.8064270019531, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.215892791748047, "rewards/margins": 13.17060661315918, "rewards/rejected": -22.386499404907227, "step": 3204 }, { "epoch": 5.52, "learning_rate": 2.150446238844029e-07, "logits/chosen": -1.9864873886108398, "logits/rejected": -1.7554349899291992, "logps/chosen": -158.1236572265625, "logps/rejected": -286.5382080078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.38525915145874, "rewards/margins": 13.428512573242188, "rewards/rejected": -19.813772201538086, "step": 3205 }, { "epoch": 5.52, "learning_rate": 2.1493837654058646e-07, "logits/chosen": -1.8547672033309937, "logits/rejected": -2.128330707550049, "logps/chosen": -101.58671569824219, "logps/rejected": -259.9477233886719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.702983856201172, "rewards/margins": 15.132335662841797, "rewards/rejected": -17.835317611694336, "step": 3206 }, { "epoch": 5.52, "learning_rate": 2.1483212919677006e-07, "logits/chosen": -1.65566086769104, "logits/rejected": -2.0823981761932373, "logps/chosen": -160.10304260253906, "logps/rejected": -314.045654296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.640678405761719, "rewards/margins": 14.181730270385742, "rewards/rejected": -21.822406768798828, "step": 3207 }, { "epoch": 5.52, "learning_rate": 2.1472588185295369e-07, "logits/chosen": -2.1369428634643555, "logits/rejected": -1.8959925174713135, "logps/chosen": -177.17010498046875, "logps/rejected": -287.4367980957031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.457152366638184, "rewards/margins": 11.557340621948242, "rewards/rejected": -18.014493942260742, "step": 3208 }, { "epoch": 5.52, "learning_rate": 2.1461963450913726e-07, "logits/chosen": -2.0994842052459717, "logits/rejected": -1.6350672245025635, "logps/chosen": -162.33245849609375, "logps/rejected": -254.22390747070312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.322432518005371, "rewards/margins": 11.31340217590332, "rewards/rejected": -17.635833740234375, "step": 3209 }, { "epoch": 5.52, "learning_rate": 2.1451338716532086e-07, "logits/chosen": -2.0387020111083984, "logits/rejected": -1.9629786014556885, "logps/chosen": -182.13453674316406, "logps/rejected": -307.40753173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.537969589233398, "rewards/margins": 12.244400024414062, "rewards/rejected": -20.78236961364746, "step": 3210 }, { "epoch": 5.53, "learning_rate": 2.1440713982150446e-07, "logits/chosen": -2.2579431533813477, "logits/rejected": -1.617436408996582, "logps/chosen": -155.7837371826172, "logps/rejected": -267.41143798828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.84030294418335, "rewards/margins": 11.807012557983398, "rewards/rejected": -18.647315979003906, "step": 3211 }, { "epoch": 5.53, "learning_rate": 2.1430089247768803e-07, "logits/chosen": -2.207134246826172, "logits/rejected": -1.403205156326294, "logps/chosen": -171.4623260498047, "logps/rejected": -292.8128662109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.269700050354004, "rewards/margins": 13.860459327697754, "rewards/rejected": -21.130159378051758, "step": 3212 }, { "epoch": 5.53, "learning_rate": 2.1419464513387165e-07, "logits/chosen": -1.677773118019104, "logits/rejected": -1.8752098083496094, "logps/chosen": -140.85617065429688, "logps/rejected": -256.76116943359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.635019302368164, "rewards/margins": 10.170312881469727, "rewards/rejected": -16.80533218383789, "step": 3213 }, { "epoch": 5.53, "learning_rate": 2.1408839779005525e-07, "logits/chosen": -2.1264705657958984, "logits/rejected": -1.9878100156784058, "logps/chosen": -148.92909240722656, "logps/rejected": -291.4259338378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.5231404304504395, "rewards/margins": 13.835304260253906, "rewards/rejected": -19.358444213867188, "step": 3214 }, { "epoch": 5.53, "learning_rate": 2.1398215044623883e-07, "logits/chosen": -1.5944828987121582, "logits/rejected": -2.03171706199646, "logps/chosen": -183.42369079589844, "logps/rejected": -361.1317138671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.756540298461914, "rewards/margins": 15.79461669921875, "rewards/rejected": -24.551158905029297, "step": 3215 }, { "epoch": 5.54, "learning_rate": 2.1387590310242242e-07, "logits/chosen": -1.7163540124893188, "logits/rejected": -1.8561913967132568, "logps/chosen": -143.13246154785156, "logps/rejected": -260.84332275390625, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -5.788857460021973, "rewards/margins": 11.393173217773438, "rewards/rejected": -17.182029724121094, "step": 3216 }, { "epoch": 5.54, "learning_rate": 2.1376965575860602e-07, "logits/chosen": -1.788095474243164, "logits/rejected": -1.9825689792633057, "logps/chosen": -159.24615478515625, "logps/rejected": -325.870849609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.577138423919678, "rewards/margins": 16.324674606323242, "rewards/rejected": -23.901813507080078, "step": 3217 }, { "epoch": 5.54, "learning_rate": 2.136634084147896e-07, "logits/chosen": -2.210895538330078, "logits/rejected": -1.8378220796585083, "logps/chosen": -175.68043518066406, "logps/rejected": -281.44305419921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.341087818145752, "rewards/margins": 12.845130920410156, "rewards/rejected": -18.18621826171875, "step": 3218 }, { "epoch": 5.54, "learning_rate": 2.1355716107097322e-07, "logits/chosen": -1.5702571868896484, "logits/rejected": -1.875833511352539, "logps/chosen": -120.06318664550781, "logps/rejected": -280.7981872558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.317139625549316, "rewards/margins": 15.214865684509277, "rewards/rejected": -20.532005310058594, "step": 3219 }, { "epoch": 5.54, "learning_rate": 2.1345091372715682e-07, "logits/chosen": -1.6537857055664062, "logits/rejected": -1.7901362180709839, "logps/chosen": -156.115234375, "logps/rejected": -301.65216064453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.808900833129883, "rewards/margins": 13.57606315612793, "rewards/rejected": -20.384963989257812, "step": 3220 }, { "epoch": 5.54, "learning_rate": 2.133446663833404e-07, "logits/chosen": -1.9772368669509888, "logits/rejected": -2.0258948802948, "logps/chosen": -159.6542510986328, "logps/rejected": -294.6138916015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.144514083862305, "rewards/margins": 13.395357131958008, "rewards/rejected": -21.539873123168945, "step": 3221 }, { "epoch": 5.55, "learning_rate": 2.13238419039524e-07, "logits/chosen": -1.6420204639434814, "logits/rejected": -2.014049530029297, "logps/chosen": -174.4840087890625, "logps/rejected": -327.6398620605469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.113428115844727, "rewards/margins": 13.982433319091797, "rewards/rejected": -22.095861434936523, "step": 3222 }, { "epoch": 5.55, "learning_rate": 2.131321716957076e-07, "logits/chosen": -1.8141322135925293, "logits/rejected": -1.974187970161438, "logps/chosen": -176.5231475830078, "logps/rejected": -325.67803955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.79238224029541, "rewards/margins": 14.116425514221191, "rewards/rejected": -22.908809661865234, "step": 3223 }, { "epoch": 5.55, "learning_rate": 2.1302592435189122e-07, "logits/chosen": -1.907529592514038, "logits/rejected": -1.8084843158721924, "logps/chosen": -152.7115020751953, "logps/rejected": -312.9856262207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.9325456619262695, "rewards/margins": 14.977855682373047, "rewards/rejected": -20.910400390625, "step": 3224 }, { "epoch": 5.55, "learning_rate": 2.129196770080748e-07, "logits/chosen": -1.8123860359191895, "logits/rejected": -1.891719102859497, "logps/chosen": -185.6465301513672, "logps/rejected": -374.45709228515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.267030715942383, "rewards/margins": 16.446517944335938, "rewards/rejected": -26.71354866027832, "step": 3225 }, { "epoch": 5.55, "learning_rate": 2.128134296642584e-07, "logits/chosen": -1.901611566543579, "logits/rejected": -2.015322685241699, "logps/chosen": -111.3126449584961, "logps/rejected": -297.09429931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8621299266815186, "rewards/margins": 17.411968231201172, "rewards/rejected": -21.274097442626953, "step": 3226 }, { "epoch": 5.55, "learning_rate": 2.1270718232044199e-07, "logits/chosen": -1.8062660694122314, "logits/rejected": -1.6373244524002075, "logps/chosen": -166.9457550048828, "logps/rejected": -281.879150390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.9797868728637695, "rewards/margins": 11.31298828125, "rewards/rejected": -19.292774200439453, "step": 3227 }, { "epoch": 5.56, "learning_rate": 2.1260093497662556e-07, "logits/chosen": -2.1493358612060547, "logits/rejected": -1.8889093399047852, "logps/chosen": -152.97610473632812, "logps/rejected": -287.88519287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.048852920532227, "rewards/margins": 14.773756980895996, "rewards/rejected": -20.82261085510254, "step": 3228 }, { "epoch": 5.56, "learning_rate": 2.1249468763280918e-07, "logits/chosen": -1.6611943244934082, "logits/rejected": -1.9821512699127197, "logps/chosen": -153.64222717285156, "logps/rejected": -349.3637390136719, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.3401689529418945, "rewards/margins": 18.504514694213867, "rewards/rejected": -25.844682693481445, "step": 3229 }, { "epoch": 5.56, "learning_rate": 2.1238844028899278e-07, "logits/chosen": -2.0553929805755615, "logits/rejected": -1.8561758995056152, "logps/chosen": -153.6519317626953, "logps/rejected": -265.50567626953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.5295634269714355, "rewards/margins": 11.404699325561523, "rewards/rejected": -17.934263229370117, "step": 3230 }, { "epoch": 5.56, "learning_rate": 2.1228219294517635e-07, "logits/chosen": -1.73860502243042, "logits/rejected": -1.9476194381713867, "logps/chosen": -191.81903076171875, "logps/rejected": -313.7579650878906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.056524276733398, "rewards/margins": 12.12352466583252, "rewards/rejected": -23.180049896240234, "step": 3231 }, { "epoch": 5.56, "learning_rate": 2.1217594560135995e-07, "logits/chosen": -1.7632198333740234, "logits/rejected": -1.842449426651001, "logps/chosen": -176.96807861328125, "logps/rejected": -294.78790283203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.035593032836914, "rewards/margins": 10.997255325317383, "rewards/rejected": -19.032848358154297, "step": 3232 }, { "epoch": 5.56, "learning_rate": 2.1206969825754355e-07, "logits/chosen": -1.761888027191162, "logits/rejected": -2.1960089206695557, "logps/chosen": -130.38424682617188, "logps/rejected": -293.67529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.405966758728027, "rewards/margins": 15.038175582885742, "rewards/rejected": -20.444141387939453, "step": 3233 }, { "epoch": 5.57, "learning_rate": 2.1196345091372713e-07, "logits/chosen": -1.7650487422943115, "logits/rejected": -2.172874689102173, "logps/chosen": -134.32293701171875, "logps/rejected": -323.2501220703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.717147350311279, "rewards/margins": 16.1467227935791, "rewards/rejected": -21.86387062072754, "step": 3234 }, { "epoch": 5.57, "learning_rate": 2.1185720356991075e-07, "logits/chosen": -1.8965857028961182, "logits/rejected": -2.082227945327759, "logps/chosen": -150.61444091796875, "logps/rejected": -281.8070373535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.077154636383057, "rewards/margins": 14.707136154174805, "rewards/rejected": -20.784290313720703, "step": 3235 }, { "epoch": 5.57, "learning_rate": 2.1175095622609435e-07, "logits/chosen": -1.6188321113586426, "logits/rejected": -2.1070504188537598, "logps/chosen": -182.96115112304688, "logps/rejected": -337.23699951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.205009460449219, "rewards/margins": 15.445535659790039, "rewards/rejected": -22.65054702758789, "step": 3236 }, { "epoch": 5.57, "learning_rate": 2.1164470888227792e-07, "logits/chosen": -2.056579113006592, "logits/rejected": -1.9864838123321533, "logps/chosen": -168.0880126953125, "logps/rejected": -260.5057373046875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -8.662026405334473, "rewards/margins": 9.998023986816406, "rewards/rejected": -18.660051345825195, "step": 3237 }, { "epoch": 5.57, "learning_rate": 2.1153846153846152e-07, "logits/chosen": -1.863612413406372, "logits/rejected": -2.0364041328430176, "logps/chosen": -175.66575622558594, "logps/rejected": -335.898681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.226600646972656, "rewards/margins": 16.243684768676758, "rewards/rejected": -24.470285415649414, "step": 3238 }, { "epoch": 5.57, "learning_rate": 2.1143221419464512e-07, "logits/chosen": -1.7783880233764648, "logits/rejected": -2.040741443634033, "logps/chosen": -129.6102294921875, "logps/rejected": -305.7590637207031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.776677131652832, "rewards/margins": 15.05537223815918, "rewards/rejected": -20.832050323486328, "step": 3239 }, { "epoch": 5.58, "learning_rate": 2.1132596685082872e-07, "logits/chosen": -1.9039678573608398, "logits/rejected": -2.0749197006225586, "logps/chosen": -165.25567626953125, "logps/rejected": -319.8013000488281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.068546295166016, "rewards/margins": 14.70700740814209, "rewards/rejected": -21.77555274963379, "step": 3240 }, { "epoch": 5.58, "learning_rate": 2.1121971950701232e-07, "logits/chosen": -1.7904572486877441, "logits/rejected": -2.086660861968994, "logps/chosen": -180.9200439453125, "logps/rejected": -310.6736755371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.3898344039917, "rewards/margins": 11.244580268859863, "rewards/rejected": -21.63441276550293, "step": 3241 }, { "epoch": 5.58, "learning_rate": 2.1111347216319592e-07, "logits/chosen": -1.6191575527191162, "logits/rejected": -2.0617220401763916, "logps/chosen": -152.922119140625, "logps/rejected": -312.5723571777344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.082512855529785, "rewards/margins": 14.82046890258789, "rewards/rejected": -21.902982711791992, "step": 3242 }, { "epoch": 5.58, "learning_rate": 2.110072248193795e-07, "logits/chosen": -2.0266621112823486, "logits/rejected": -1.76507568359375, "logps/chosen": -144.9654083251953, "logps/rejected": -280.91375732421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.233143329620361, "rewards/margins": 13.773366928100586, "rewards/rejected": -20.006511688232422, "step": 3243 }, { "epoch": 5.58, "learning_rate": 2.109009774755631e-07, "logits/chosen": -1.6462067365646362, "logits/rejected": -2.123321533203125, "logps/chosen": -213.47344970703125, "logps/rejected": -334.0902099609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.52562427520752, "rewards/margins": 10.938674926757812, "rewards/rejected": -21.464298248291016, "step": 3244 }, { "epoch": 5.59, "learning_rate": 2.107947301317467e-07, "logits/chosen": -1.7585296630859375, "logits/rejected": -1.9971455335617065, "logps/chosen": -184.9752960205078, "logps/rejected": -345.511474609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.43497371673584, "rewards/margins": 14.95963191986084, "rewards/rejected": -25.39460563659668, "step": 3245 }, { "epoch": 5.59, "learning_rate": 2.106884827879303e-07, "logits/chosen": -1.6559220552444458, "logits/rejected": -1.8492093086242676, "logps/chosen": -154.38380432128906, "logps/rejected": -325.13641357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.173062324523926, "rewards/margins": 16.698972702026367, "rewards/rejected": -23.872034072875977, "step": 3246 }, { "epoch": 5.59, "learning_rate": 2.1058223544411388e-07, "logits/chosen": -2.111128568649292, "logits/rejected": -1.3291277885437012, "logps/chosen": -169.212646484375, "logps/rejected": -244.51092529296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.79256534576416, "rewards/margins": 11.773017883300781, "rewards/rejected": -17.565582275390625, "step": 3247 }, { "epoch": 5.59, "learning_rate": 2.1047598810029748e-07, "logits/chosen": -1.7563047409057617, "logits/rejected": -1.9983386993408203, "logps/chosen": -124.49913024902344, "logps/rejected": -285.10784912109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.612150192260742, "rewards/margins": 15.658331871032715, "rewards/rejected": -21.27048110961914, "step": 3248 }, { "epoch": 5.59, "learning_rate": 2.1036974075648108e-07, "logits/chosen": -1.9850043058395386, "logits/rejected": -1.9382449388504028, "logps/chosen": -167.5595245361328, "logps/rejected": -327.73565673828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.117481231689453, "rewards/margins": 16.385990142822266, "rewards/rejected": -24.50347137451172, "step": 3249 }, { "epoch": 5.59, "learning_rate": 2.1026349341266465e-07, "logits/chosen": -2.0978903770446777, "logits/rejected": -1.8106436729431152, "logps/chosen": -172.14752197265625, "logps/rejected": -268.6871032714844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.565364837646484, "rewards/margins": 11.248635292053223, "rewards/rejected": -19.81399917602539, "step": 3250 }, { "epoch": 5.6, "learning_rate": 2.1015724606884828e-07, "logits/chosen": -2.013791084289551, "logits/rejected": -2.120784282684326, "logps/chosen": -138.42428588867188, "logps/rejected": -295.4610595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.144400596618652, "rewards/margins": 14.86524486541748, "rewards/rejected": -21.0096435546875, "step": 3251 }, { "epoch": 5.6, "learning_rate": 2.1005099872503188e-07, "logits/chosen": -2.022010087966919, "logits/rejected": -1.9497110843658447, "logps/chosen": -156.61964416503906, "logps/rejected": -268.77960205078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.319369316101074, "rewards/margins": 12.55018138885498, "rewards/rejected": -17.869550704956055, "step": 3252 }, { "epoch": 5.6, "learning_rate": 2.0994475138121545e-07, "logits/chosen": -1.8179545402526855, "logits/rejected": -2.16353702545166, "logps/chosen": -131.71893310546875, "logps/rejected": -338.1400451660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.492173194885254, "rewards/margins": 18.718345642089844, "rewards/rejected": -24.21051788330078, "step": 3253 }, { "epoch": 5.6, "learning_rate": 2.0983850403739905e-07, "logits/chosen": -1.7292027473449707, "logits/rejected": -1.9621391296386719, "logps/chosen": -131.3051300048828, "logps/rejected": -321.60369873046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.119619846343994, "rewards/margins": 17.537979125976562, "rewards/rejected": -22.65760040283203, "step": 3254 }, { "epoch": 5.6, "learning_rate": 2.0973225669358265e-07, "logits/chosen": -1.6807942390441895, "logits/rejected": -1.980743646621704, "logps/chosen": -123.4222412109375, "logps/rejected": -256.75836181640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.856342792510986, "rewards/margins": 12.43576431274414, "rewards/rejected": -17.29210662841797, "step": 3255 }, { "epoch": 5.6, "learning_rate": 2.0962600934976625e-07, "logits/chosen": -2.113740921020508, "logits/rejected": -2.073861598968506, "logps/chosen": -131.55062866210938, "logps/rejected": -293.87591552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.652102470397949, "rewards/margins": 14.348753929138184, "rewards/rejected": -20.000856399536133, "step": 3256 }, { "epoch": 5.61, "learning_rate": 2.0951976200594985e-07, "logits/chosen": -1.9109336137771606, "logits/rejected": -1.930504560470581, "logps/chosen": -119.37574768066406, "logps/rejected": -278.0733642578125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.952821731567383, "rewards/margins": 15.684865951538086, "rewards/rejected": -20.63768768310547, "step": 3257 }, { "epoch": 5.61, "learning_rate": 2.0941351466213345e-07, "logits/chosen": -1.876044511795044, "logits/rejected": -1.797865390777588, "logps/chosen": -156.02020263671875, "logps/rejected": -285.0809326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.696843147277832, "rewards/margins": 13.72839069366455, "rewards/rejected": -20.425233840942383, "step": 3258 }, { "epoch": 5.61, "learning_rate": 2.0930726731831702e-07, "logits/chosen": -1.8078420162200928, "logits/rejected": -1.9764487743377686, "logps/chosen": -186.90049743652344, "logps/rejected": -316.0804443359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.244152069091797, "rewards/margins": 14.162792205810547, "rewards/rejected": -22.406944274902344, "step": 3259 }, { "epoch": 5.61, "learning_rate": 2.0920101997450062e-07, "logits/chosen": -1.6476653814315796, "logits/rejected": -1.9913591146469116, "logps/chosen": -182.83944702148438, "logps/rejected": -336.26251220703125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -9.7801513671875, "rewards/margins": 14.122435569763184, "rewards/rejected": -23.902585983276367, "step": 3260 }, { "epoch": 5.61, "learning_rate": 2.0909477263068424e-07, "logits/chosen": -1.933609962463379, "logits/rejected": -1.9367718696594238, "logps/chosen": -187.4707794189453, "logps/rejected": -339.2627868652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.225902557373047, "rewards/margins": 15.113724708557129, "rewards/rejected": -23.339628219604492, "step": 3261 }, { "epoch": 5.61, "learning_rate": 2.0898852528686781e-07, "logits/chosen": -1.262335181236267, "logits/rejected": -2.046480417251587, "logps/chosen": -144.6371612548828, "logps/rejected": -322.4789733886719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.73492956161499, "rewards/margins": 15.710929870605469, "rewards/rejected": -23.445859909057617, "step": 3262 }, { "epoch": 5.62, "learning_rate": 2.0888227794305141e-07, "logits/chosen": -1.9143378734588623, "logits/rejected": -2.205721139907837, "logps/chosen": -164.5731201171875, "logps/rejected": -344.2900390625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -8.802769660949707, "rewards/margins": 16.707870483398438, "rewards/rejected": -25.51064109802246, "step": 3263 }, { "epoch": 5.62, "learning_rate": 2.08776030599235e-07, "logits/chosen": -2.075636386871338, "logits/rejected": -1.840657114982605, "logps/chosen": -139.5049285888672, "logps/rejected": -273.60845947265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.103654861450195, "rewards/margins": 12.484382629394531, "rewards/rejected": -18.588037490844727, "step": 3264 }, { "epoch": 5.62, "learning_rate": 2.086697832554186e-07, "logits/chosen": -1.7217036485671997, "logits/rejected": -2.0885419845581055, "logps/chosen": -161.05419921875, "logps/rejected": -274.8562316894531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.870450973510742, "rewards/margins": 11.986688613891602, "rewards/rejected": -18.857139587402344, "step": 3265 }, { "epoch": 5.62, "learning_rate": 2.0856353591160218e-07, "logits/chosen": -2.073143720626831, "logits/rejected": -1.9014254808425903, "logps/chosen": -141.5640411376953, "logps/rejected": -297.9645080566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.530937671661377, "rewards/margins": 14.998083114624023, "rewards/rejected": -19.529022216796875, "step": 3266 }, { "epoch": 5.62, "learning_rate": 2.084572885677858e-07, "logits/chosen": -1.891493320465088, "logits/rejected": -1.8512325286865234, "logps/chosen": -173.97763061523438, "logps/rejected": -332.38250732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.599494934082031, "rewards/margins": 15.365392684936523, "rewards/rejected": -21.964889526367188, "step": 3267 }, { "epoch": 5.62, "learning_rate": 2.083510412239694e-07, "logits/chosen": -1.7542606592178345, "logits/rejected": -1.9180779457092285, "logps/chosen": -158.0545654296875, "logps/rejected": -297.53515625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.973278045654297, "rewards/margins": 14.01490592956543, "rewards/rejected": -20.98818588256836, "step": 3268 }, { "epoch": 5.63, "learning_rate": 2.0824479388015298e-07, "logits/chosen": -1.9557361602783203, "logits/rejected": -1.688924789428711, "logps/chosen": -144.99600219726562, "logps/rejected": -280.3312072753906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.548398971557617, "rewards/margins": 13.780051231384277, "rewards/rejected": -19.328449249267578, "step": 3269 }, { "epoch": 5.63, "learning_rate": 2.0813854653633658e-07, "logits/chosen": -1.9251247644424438, "logits/rejected": -1.7784000635147095, "logps/chosen": -168.3917236328125, "logps/rejected": -330.1892395019531, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.56792688369751, "rewards/margins": 15.056938171386719, "rewards/rejected": -22.62486457824707, "step": 3270 }, { "epoch": 5.63, "learning_rate": 2.0803229919252018e-07, "logits/chosen": -1.7698909044265747, "logits/rejected": -1.7743303775787354, "logps/chosen": -123.5206527709961, "logps/rejected": -307.65289306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.722422122955322, "rewards/margins": 17.754972457885742, "rewards/rejected": -22.477394104003906, "step": 3271 }, { "epoch": 5.63, "learning_rate": 2.0792605184870378e-07, "logits/chosen": -1.9202144145965576, "logits/rejected": -1.9447416067123413, "logps/chosen": -149.1820068359375, "logps/rejected": -360.09283447265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.299093246459961, "rewards/margins": 19.953298568725586, "rewards/rejected": -26.252391815185547, "step": 3272 }, { "epoch": 5.63, "learning_rate": 2.0781980450488738e-07, "logits/chosen": -1.7850675582885742, "logits/rejected": -2.0570106506347656, "logps/chosen": -127.00813293457031, "logps/rejected": -305.50445556640625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.471241474151611, "rewards/margins": 16.341552734375, "rewards/rejected": -21.812795639038086, "step": 3273 }, { "epoch": 5.64, "learning_rate": 2.0771355716107098e-07, "logits/chosen": -2.0179824829101562, "logits/rejected": -1.8199219703674316, "logps/chosen": -158.3243408203125, "logps/rejected": -295.3258972167969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.522502899169922, "rewards/margins": 14.425969123840332, "rewards/rejected": -21.94847297668457, "step": 3274 }, { "epoch": 5.64, "learning_rate": 2.0760730981725455e-07, "logits/chosen": -1.8632755279541016, "logits/rejected": -1.9321446418762207, "logps/chosen": -166.17526245117188, "logps/rejected": -292.4542236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.312439918518066, "rewards/margins": 12.883668899536133, "rewards/rejected": -21.196109771728516, "step": 3275 }, { "epoch": 5.64, "learning_rate": 2.0750106247343815e-07, "logits/chosen": -1.6694319248199463, "logits/rejected": -1.9259238243103027, "logps/chosen": -117.53398132324219, "logps/rejected": -266.7049255371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.664386749267578, "rewards/margins": 13.825772285461426, "rewards/rejected": -19.490158081054688, "step": 3276 }, { "epoch": 5.64, "learning_rate": 2.0739481512962175e-07, "logits/chosen": -2.066896438598633, "logits/rejected": -1.9761290550231934, "logps/chosen": -184.4097442626953, "logps/rejected": -349.8161315917969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.4460248947143555, "rewards/margins": 16.23169708251953, "rewards/rejected": -23.67772102355957, "step": 3277 }, { "epoch": 5.64, "learning_rate": 2.0728856778580534e-07, "logits/chosen": -1.5064325332641602, "logits/rejected": -2.0084266662597656, "logps/chosen": -154.20001220703125, "logps/rejected": -329.08184814453125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.066652774810791, "rewards/margins": 15.33763313293457, "rewards/rejected": -22.404287338256836, "step": 3278 }, { "epoch": 5.64, "learning_rate": 2.0718232044198894e-07, "logits/chosen": -2.0139541625976562, "logits/rejected": -1.835386872291565, "logps/chosen": -190.37460327148438, "logps/rejected": -359.92254638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.113893508911133, "rewards/margins": 17.679773330688477, "rewards/rejected": -27.79366683959961, "step": 3279 }, { "epoch": 5.65, "learning_rate": 2.0707607309817254e-07, "logits/chosen": -2.048582077026367, "logits/rejected": -1.3782203197479248, "logps/chosen": -174.72024536132812, "logps/rejected": -279.7952880859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.960539817810059, "rewards/margins": 12.965073585510254, "rewards/rejected": -19.925613403320312, "step": 3280 }, { "epoch": 5.65, "learning_rate": 2.0696982575435611e-07, "logits/chosen": -1.6352765560150146, "logits/rejected": -2.0385918617248535, "logps/chosen": -121.29484558105469, "logps/rejected": -303.17059326171875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.43775749206543, "rewards/margins": 15.714082717895508, "rewards/rejected": -21.151840209960938, "step": 3281 }, { "epoch": 5.65, "learning_rate": 2.0686357841053971e-07, "logits/chosen": -1.7140417098999023, "logits/rejected": -1.8654825687408447, "logps/chosen": -147.30520629882812, "logps/rejected": -314.72332763671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.340602874755859, "rewards/margins": 15.708438873291016, "rewards/rejected": -22.049041748046875, "step": 3282 }, { "epoch": 5.65, "learning_rate": 2.0675733106672334e-07, "logits/chosen": -1.9096970558166504, "logits/rejected": -1.5697760581970215, "logps/chosen": -172.84616088867188, "logps/rejected": -296.5683898925781, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -7.163252830505371, "rewards/margins": 13.1321382522583, "rewards/rejected": -20.295391082763672, "step": 3283 }, { "epoch": 5.65, "learning_rate": 2.066510837229069e-07, "logits/chosen": -1.5181422233581543, "logits/rejected": -2.135446071624756, "logps/chosen": -123.28214263916016, "logps/rejected": -330.4319763183594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.934420108795166, "rewards/margins": 17.920761108398438, "rewards/rejected": -22.855180740356445, "step": 3284 }, { "epoch": 5.65, "learning_rate": 2.065448363790905e-07, "logits/chosen": -1.9926934242248535, "logits/rejected": -1.6356087923049927, "logps/chosen": -138.07699584960938, "logps/rejected": -299.93426513671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.656418800354004, "rewards/margins": 16.430429458618164, "rewards/rejected": -23.08684730529785, "step": 3285 }, { "epoch": 5.66, "learning_rate": 2.064385890352741e-07, "logits/chosen": -2.0910346508026123, "logits/rejected": -1.9327629804611206, "logps/chosen": -178.12643432617188, "logps/rejected": -307.748779296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.355321884155273, "rewards/margins": 13.706785202026367, "rewards/rejected": -22.06210708618164, "step": 3286 }, { "epoch": 5.66, "learning_rate": 2.063323416914577e-07, "logits/chosen": -1.563215732574463, "logits/rejected": -2.116312026977539, "logps/chosen": -100.85618591308594, "logps/rejected": -249.64955139160156, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.8795876502990723, "rewards/margins": 13.800032615661621, "rewards/rejected": -17.67961883544922, "step": 3287 }, { "epoch": 5.66, "learning_rate": 2.062260943476413e-07, "logits/chosen": -2.0147223472595215, "logits/rejected": -1.7825502157211304, "logps/chosen": -152.92642211914062, "logps/rejected": -284.13330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.693927764892578, "rewards/margins": 14.873464584350586, "rewards/rejected": -21.567392349243164, "step": 3288 }, { "epoch": 5.66, "learning_rate": 2.061198470038249e-07, "logits/chosen": -1.8868913650512695, "logits/rejected": -1.648514986038208, "logps/chosen": -217.94281005859375, "logps/rejected": -369.87164306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -13.884929656982422, "rewards/margins": 15.48964786529541, "rewards/rejected": -29.374576568603516, "step": 3289 }, { "epoch": 5.66, "learning_rate": 2.060135996600085e-07, "logits/chosen": -1.8907761573791504, "logits/rejected": -2.013054370880127, "logps/chosen": -170.62075805664062, "logps/rejected": -332.408447265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.1270599365234375, "rewards/margins": 16.55036163330078, "rewards/rejected": -23.67742156982422, "step": 3290 }, { "epoch": 5.66, "learning_rate": 2.0590735231619208e-07, "logits/chosen": -1.943386435508728, "logits/rejected": -1.7611651420593262, "logps/chosen": -195.41952514648438, "logps/rejected": -323.746337890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.971071243286133, "rewards/margins": 14.064452171325684, "rewards/rejected": -25.035524368286133, "step": 3291 }, { "epoch": 5.67, "learning_rate": 2.0580110497237568e-07, "logits/chosen": -1.9111100435256958, "logits/rejected": -2.0430383682250977, "logps/chosen": -137.25051879882812, "logps/rejected": -304.28564453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.965021133422852, "rewards/margins": 15.539759635925293, "rewards/rejected": -21.50478172302246, "step": 3292 }, { "epoch": 5.67, "learning_rate": 2.0569485762855928e-07, "logits/chosen": -2.0629401206970215, "logits/rejected": -1.7480520009994507, "logps/chosen": -144.40635681152344, "logps/rejected": -266.74456787109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.085881233215332, "rewards/margins": 12.177663803100586, "rewards/rejected": -19.2635440826416, "step": 3293 }, { "epoch": 5.67, "learning_rate": 2.0558861028474287e-07, "logits/chosen": -1.7682936191558838, "logits/rejected": -1.6000142097473145, "logps/chosen": -159.3955078125, "logps/rejected": -274.8077697753906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.629621505737305, "rewards/margins": 12.93155288696289, "rewards/rejected": -19.561174392700195, "step": 3294 }, { "epoch": 5.67, "learning_rate": 2.0548236294092647e-07, "logits/chosen": -1.7096028327941895, "logits/rejected": -1.8551228046417236, "logps/chosen": -151.21253967285156, "logps/rejected": -328.7333679199219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.733748912811279, "rewards/margins": 15.96346664428711, "rewards/rejected": -23.697216033935547, "step": 3295 }, { "epoch": 5.67, "learning_rate": 2.0537611559711007e-07, "logits/chosen": -1.5327178239822388, "logits/rejected": -1.7408241033554077, "logps/chosen": -116.06239318847656, "logps/rejected": -300.7091979980469, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.80623722076416, "rewards/margins": 16.072263717651367, "rewards/rejected": -20.878501892089844, "step": 3296 }, { "epoch": 5.67, "learning_rate": 2.0526986825329364e-07, "logits/chosen": -1.7640098333358765, "logits/rejected": -2.0303473472595215, "logps/chosen": -121.54277801513672, "logps/rejected": -252.55386352539062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.33708381652832, "rewards/margins": 12.997823715209961, "rewards/rejected": -18.33490753173828, "step": 3297 }, { "epoch": 5.68, "learning_rate": 2.0516362090947724e-07, "logits/chosen": -1.9024943113327026, "logits/rejected": -1.9382431507110596, "logps/chosen": -159.39549255371094, "logps/rejected": -331.07354736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.092484474182129, "rewards/margins": 17.80921173095703, "rewards/rejected": -24.901695251464844, "step": 3298 }, { "epoch": 5.68, "learning_rate": 2.0505737356566087e-07, "logits/chosen": -1.406109094619751, "logits/rejected": -2.223088502883911, "logps/chosen": -138.634521484375, "logps/rejected": -303.96917724609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.594274520874023, "rewards/margins": 14.79697036743164, "rewards/rejected": -21.391244888305664, "step": 3299 }, { "epoch": 5.68, "learning_rate": 2.0495112622184444e-07, "logits/chosen": -2.0274219512939453, "logits/rejected": -1.7765220403671265, "logps/chosen": -138.43576049804688, "logps/rejected": -262.030517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.195992469787598, "rewards/margins": 13.573163986206055, "rewards/rejected": -17.76915740966797, "step": 3300 }, { "epoch": 5.68, "learning_rate": 2.0484487887802804e-07, "logits/chosen": -1.9439105987548828, "logits/rejected": -1.8652136325836182, "logps/chosen": -142.76766967773438, "logps/rejected": -309.3554992675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.341048240661621, "rewards/margins": 17.743621826171875, "rewards/rejected": -23.084671020507812, "step": 3301 }, { "epoch": 5.68, "learning_rate": 2.0473863153421164e-07, "logits/chosen": -1.859457015991211, "logits/rejected": -1.9125381708145142, "logps/chosen": -95.08146667480469, "logps/rejected": -237.15484619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.4180169105529785, "rewards/margins": 13.674910545349121, "rewards/rejected": -16.092927932739258, "step": 3302 }, { "epoch": 5.69, "learning_rate": 2.046323841903952e-07, "logits/chosen": -1.9052938222885132, "logits/rejected": -2.1345818042755127, "logps/chosen": -154.5875244140625, "logps/rejected": -298.8111572265625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -6.7123918533325195, "rewards/margins": 13.113382339477539, "rewards/rejected": -19.825775146484375, "step": 3303 }, { "epoch": 5.69, "learning_rate": 2.045261368465788e-07, "logits/chosen": -2.0437545776367188, "logits/rejected": -1.918955683708191, "logps/chosen": -147.82284545898438, "logps/rejected": -291.0999450683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.435306549072266, "rewards/margins": 13.968816757202148, "rewards/rejected": -21.404125213623047, "step": 3304 }, { "epoch": 5.69, "learning_rate": 2.0441988950276244e-07, "logits/chosen": -1.9206866025924683, "logits/rejected": -1.5639796257019043, "logps/chosen": -177.98251342773438, "logps/rejected": -277.0548095703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.236194610595703, "rewards/margins": 10.847168922424316, "rewards/rejected": -20.083362579345703, "step": 3305 }, { "epoch": 5.69, "learning_rate": 2.04313642158946e-07, "logits/chosen": -1.9951223134994507, "logits/rejected": -1.7695505619049072, "logps/chosen": -163.11465454101562, "logps/rejected": -306.3931579589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.820509910583496, "rewards/margins": 15.46034049987793, "rewards/rejected": -22.280851364135742, "step": 3306 }, { "epoch": 5.69, "learning_rate": 2.042073948151296e-07, "logits/chosen": -1.7567206621170044, "logits/rejected": -1.5939689874649048, "logps/chosen": -183.4469757080078, "logps/rejected": -273.4739990234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.324579238891602, "rewards/margins": 10.828559875488281, "rewards/rejected": -20.153139114379883, "step": 3307 }, { "epoch": 5.69, "learning_rate": 2.041011474713132e-07, "logits/chosen": -1.539646863937378, "logits/rejected": -1.9106769561767578, "logps/chosen": -165.8121795654297, "logps/rejected": -326.2350769042969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.277490615844727, "rewards/margins": 13.81946849822998, "rewards/rejected": -22.09695816040039, "step": 3308 }, { "epoch": 5.7, "learning_rate": 2.039949001274968e-07, "logits/chosen": -1.5588691234588623, "logits/rejected": -1.9341233968734741, "logps/chosen": -159.32823181152344, "logps/rejected": -275.9462890625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -6.677565574645996, "rewards/margins": 10.589282035827637, "rewards/rejected": -17.266847610473633, "step": 3309 }, { "epoch": 5.7, "learning_rate": 2.038886527836804e-07, "logits/chosen": -2.0986294746398926, "logits/rejected": -1.8152238130569458, "logps/chosen": -139.50537109375, "logps/rejected": -248.14627075195312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.2562255859375, "rewards/margins": 11.242963790893555, "rewards/rejected": -17.499189376831055, "step": 3310 }, { "epoch": 5.7, "learning_rate": 2.03782405439864e-07, "logits/chosen": -2.1698453426361084, "logits/rejected": -1.507552981376648, "logps/chosen": -165.63296508789062, "logps/rejected": -298.9764404296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.561572074890137, "rewards/margins": 14.068503379821777, "rewards/rejected": -22.630077362060547, "step": 3311 }, { "epoch": 5.7, "learning_rate": 2.036761580960476e-07, "logits/chosen": -2.0012924671173096, "logits/rejected": -2.0244317054748535, "logps/chosen": -150.80067443847656, "logps/rejected": -287.4134216308594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.3699493408203125, "rewards/margins": 13.258180618286133, "rewards/rejected": -20.628129959106445, "step": 3312 }, { "epoch": 5.7, "learning_rate": 2.0356991075223117e-07, "logits/chosen": -1.5819287300109863, "logits/rejected": -2.0199778079986572, "logps/chosen": -120.31800842285156, "logps/rejected": -308.34356689453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.669408798217773, "rewards/margins": 16.651714324951172, "rewards/rejected": -21.321125030517578, "step": 3313 }, { "epoch": 5.7, "learning_rate": 2.0346366340841477e-07, "logits/chosen": -1.9233062267303467, "logits/rejected": -1.9336391687393188, "logps/chosen": -190.5757293701172, "logps/rejected": -297.19232177734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -9.90923023223877, "rewards/margins": 10.998468399047852, "rewards/rejected": -20.907697677612305, "step": 3314 }, { "epoch": 5.71, "learning_rate": 2.033574160645984e-07, "logits/chosen": -1.9816317558288574, "logits/rejected": -1.889035701751709, "logps/chosen": -155.27464294433594, "logps/rejected": -347.66778564453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.6488752365112305, "rewards/margins": 18.283889770507812, "rewards/rejected": -25.932764053344727, "step": 3315 }, { "epoch": 5.71, "learning_rate": 2.0325116872078197e-07, "logits/chosen": -1.9935194253921509, "logits/rejected": -1.9983580112457275, "logps/chosen": -201.47406005859375, "logps/rejected": -299.2627258300781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.592510223388672, "rewards/margins": 10.452022552490234, "rewards/rejected": -20.044532775878906, "step": 3316 }, { "epoch": 5.71, "learning_rate": 2.0314492137696557e-07, "logits/chosen": -1.9320662021636963, "logits/rejected": -1.8094067573547363, "logps/chosen": -118.26271057128906, "logps/rejected": -284.00555419921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.2285308837890625, "rewards/margins": 16.717208862304688, "rewards/rejected": -20.94573974609375, "step": 3317 }, { "epoch": 5.71, "learning_rate": 2.0303867403314917e-07, "logits/chosen": -1.6511231660842896, "logits/rejected": -1.7585749626159668, "logps/chosen": -166.37930297851562, "logps/rejected": -279.1322021484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.126081466674805, "rewards/margins": 12.222993850708008, "rewards/rejected": -19.349075317382812, "step": 3318 }, { "epoch": 5.71, "learning_rate": 2.0293242668933274e-07, "logits/chosen": -1.6105738878250122, "logits/rejected": -1.8539209365844727, "logps/chosen": -160.60072326660156, "logps/rejected": -310.1310119628906, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -7.285401344299316, "rewards/margins": 14.444258689880371, "rewards/rejected": -21.729660034179688, "step": 3319 }, { "epoch": 5.71, "learning_rate": 2.0282617934551634e-07, "logits/chosen": -1.7965013980865479, "logits/rejected": -2.0133631229400635, "logps/chosen": -163.70257568359375, "logps/rejected": -285.52685546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.932652473449707, "rewards/margins": 12.9795503616333, "rewards/rejected": -20.912202835083008, "step": 3320 }, { "epoch": 5.72, "learning_rate": 2.0271993200169996e-07, "logits/chosen": -1.9010483026504517, "logits/rejected": -1.741044282913208, "logps/chosen": -141.5223388671875, "logps/rejected": -308.9141845703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.938231945037842, "rewards/margins": 17.740718841552734, "rewards/rejected": -22.678951263427734, "step": 3321 }, { "epoch": 5.72, "learning_rate": 2.0261368465788354e-07, "logits/chosen": -1.9092516899108887, "logits/rejected": -1.8656615018844604, "logps/chosen": -152.65858459472656, "logps/rejected": -268.3322448730469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.8549652099609375, "rewards/margins": 12.244831085205078, "rewards/rejected": -19.099796295166016, "step": 3322 }, { "epoch": 5.72, "learning_rate": 2.0250743731406714e-07, "logits/chosen": -1.6504077911376953, "logits/rejected": -1.7353613376617432, "logps/chosen": -140.30076599121094, "logps/rejected": -286.4268798828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.244480133056641, "rewards/margins": 14.328908920288086, "rewards/rejected": -20.573389053344727, "step": 3323 }, { "epoch": 5.72, "learning_rate": 2.0240118997025074e-07, "logits/chosen": -1.754317283630371, "logits/rejected": -1.942941665649414, "logps/chosen": -170.86386108398438, "logps/rejected": -299.2106628417969, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.650030136108398, "rewards/margins": 13.148794174194336, "rewards/rejected": -20.798824310302734, "step": 3324 }, { "epoch": 5.72, "learning_rate": 2.022949426264343e-07, "logits/chosen": -1.7928863763809204, "logits/rejected": -1.8096349239349365, "logps/chosen": -173.96324157714844, "logps/rejected": -291.08990478515625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -8.361141204833984, "rewards/margins": 11.686746597290039, "rewards/rejected": -20.047887802124023, "step": 3325 }, { "epoch": 5.72, "learning_rate": 2.0218869528261793e-07, "logits/chosen": -1.9203743934631348, "logits/rejected": -2.008885145187378, "logps/chosen": -174.93614196777344, "logps/rejected": -303.99713134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.587508201599121, "rewards/margins": 12.997323036193848, "rewards/rejected": -21.5848331451416, "step": 3326 }, { "epoch": 5.73, "learning_rate": 2.0208244793880153e-07, "logits/chosen": -1.6969105005264282, "logits/rejected": -2.000666856765747, "logps/chosen": -155.11538696289062, "logps/rejected": -307.3875732421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.603132247924805, "rewards/margins": 14.665218353271484, "rewards/rejected": -22.26835060119629, "step": 3327 }, { "epoch": 5.73, "learning_rate": 2.0197620059498513e-07, "logits/chosen": -1.6375727653503418, "logits/rejected": -1.462329626083374, "logps/chosen": -149.88641357421875, "logps/rejected": -240.32937622070312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.5293450355529785, "rewards/margins": 10.877461433410645, "rewards/rejected": -16.40680503845215, "step": 3328 }, { "epoch": 5.73, "learning_rate": 2.018699532511687e-07, "logits/chosen": -1.8087888956069946, "logits/rejected": -2.0159752368927, "logps/chosen": -121.92774200439453, "logps/rejected": -311.8898010253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.2208333015441895, "rewards/margins": 17.049135208129883, "rewards/rejected": -22.269968032836914, "step": 3329 }, { "epoch": 5.73, "learning_rate": 2.017637059073523e-07, "logits/chosen": -1.7353383302688599, "logits/rejected": -1.8267643451690674, "logps/chosen": -127.54289245605469, "logps/rejected": -279.06060791015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.235291481018066, "rewards/margins": 14.39105224609375, "rewards/rejected": -19.626344680786133, "step": 3330 }, { "epoch": 5.73, "learning_rate": 2.0165745856353593e-07, "logits/chosen": -1.6695427894592285, "logits/rejected": -1.8439595699310303, "logps/chosen": -111.42388916015625, "logps/rejected": -299.6439514160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0762057304382324, "rewards/margins": 17.575992584228516, "rewards/rejected": -20.652196884155273, "step": 3331 }, { "epoch": 5.73, "learning_rate": 2.015512112197195e-07, "logits/chosen": -1.928104281425476, "logits/rejected": -1.6601760387420654, "logps/chosen": -162.023681640625, "logps/rejected": -293.97808837890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.619399070739746, "rewards/margins": 13.214241027832031, "rewards/rejected": -20.83363914489746, "step": 3332 }, { "epoch": 5.74, "learning_rate": 2.014449638759031e-07, "logits/chosen": -1.939391851425171, "logits/rejected": -1.753697156906128, "logps/chosen": -175.3560791015625, "logps/rejected": -308.43048095703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.543478012084961, "rewards/margins": 12.991109848022461, "rewards/rejected": -21.534587860107422, "step": 3333 }, { "epoch": 5.74, "learning_rate": 2.013387165320867e-07, "logits/chosen": -2.0630900859832764, "logits/rejected": -2.054056406021118, "logps/chosen": -145.90928649902344, "logps/rejected": -310.2729797363281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6173882484436035, "rewards/margins": 16.401092529296875, "rewards/rejected": -23.01848030090332, "step": 3334 }, { "epoch": 5.74, "learning_rate": 2.0123246918827027e-07, "logits/chosen": -1.489855170249939, "logits/rejected": -1.676954984664917, "logps/chosen": -166.08221435546875, "logps/rejected": -297.33563232421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.220592498779297, "rewards/margins": 12.215327262878418, "rewards/rejected": -21.43592071533203, "step": 3335 }, { "epoch": 5.74, "learning_rate": 2.0112622184445387e-07, "logits/chosen": -2.0134286880493164, "logits/rejected": -1.9071812629699707, "logps/chosen": -162.80752563476562, "logps/rejected": -295.6025695800781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.886995792388916, "rewards/margins": 14.711891174316406, "rewards/rejected": -21.598886489868164, "step": 3336 }, { "epoch": 5.74, "learning_rate": 2.010199745006375e-07, "logits/chosen": -2.0276875495910645, "logits/rejected": -1.8679356575012207, "logps/chosen": -138.3529510498047, "logps/rejected": -255.32107543945312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.6145853996276855, "rewards/margins": 12.067808151245117, "rewards/rejected": -17.68239402770996, "step": 3337 }, { "epoch": 5.75, "learning_rate": 2.0091372715682107e-07, "logits/chosen": -1.8644248247146606, "logits/rejected": -1.9320257902145386, "logps/chosen": -147.52618408203125, "logps/rejected": -321.25634765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.872931957244873, "rewards/margins": 16.58514404296875, "rewards/rejected": -22.458078384399414, "step": 3338 }, { "epoch": 5.75, "learning_rate": 2.0080747981300467e-07, "logits/chosen": -1.6350420713424683, "logits/rejected": -2.0753159523010254, "logps/chosen": -163.82839965820312, "logps/rejected": -306.9857177734375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.1227388381958, "rewards/margins": 12.755025863647461, "rewards/rejected": -20.877763748168945, "step": 3339 }, { "epoch": 5.75, "learning_rate": 2.0070123246918826e-07, "logits/chosen": -2.134948968887329, "logits/rejected": -1.8018200397491455, "logps/chosen": -162.8809814453125, "logps/rejected": -305.3445129394531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.449192523956299, "rewards/margins": 14.932265281677246, "rewards/rejected": -21.381458282470703, "step": 3340 }, { "epoch": 5.75, "learning_rate": 2.0059498512537184e-07, "logits/chosen": -1.6437852382659912, "logits/rejected": -2.112433671951294, "logps/chosen": -126.61643981933594, "logps/rejected": -312.2010192871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.940423965454102, "rewards/margins": 14.855512619018555, "rewards/rejected": -19.79593849182129, "step": 3341 }, { "epoch": 5.75, "learning_rate": 2.0048873778155546e-07, "logits/chosen": -1.778968334197998, "logits/rejected": -1.9534828662872314, "logps/chosen": -166.09652709960938, "logps/rejected": -320.06005859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.980487823486328, "rewards/margins": 14.898303031921387, "rewards/rejected": -22.87879180908203, "step": 3342 }, { "epoch": 5.75, "learning_rate": 2.0038249043773906e-07, "logits/chosen": -1.8343547582626343, "logits/rejected": -1.986363410949707, "logps/chosen": -171.83926391601562, "logps/rejected": -330.4082336425781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.453513145446777, "rewards/margins": 14.817313194274902, "rewards/rejected": -23.27082633972168, "step": 3343 }, { "epoch": 5.76, "learning_rate": 2.0027624309392263e-07, "logits/chosen": -1.5812649726867676, "logits/rejected": -2.063206195831299, "logps/chosen": -107.15864562988281, "logps/rejected": -276.7699279785156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.395732879638672, "rewards/margins": 14.933109283447266, "rewards/rejected": -19.328842163085938, "step": 3344 }, { "epoch": 5.76, "learning_rate": 2.0016999575010623e-07, "logits/chosen": -1.687164068222046, "logits/rejected": -1.8860751390457153, "logps/chosen": -146.4180145263672, "logps/rejected": -239.63992309570312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.494873523712158, "rewards/margins": 9.257017135620117, "rewards/rejected": -16.751890182495117, "step": 3345 }, { "epoch": 5.76, "learning_rate": 2.0006374840628983e-07, "logits/chosen": -1.7898056507110596, "logits/rejected": -1.9054630994796753, "logps/chosen": -149.32423400878906, "logps/rejected": -322.24224853515625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.995251655578613, "rewards/margins": 16.754756927490234, "rewards/rejected": -23.75000762939453, "step": 3346 }, { "epoch": 5.76, "learning_rate": 1.999575010624734e-07, "logits/chosen": -2.045276403427124, "logits/rejected": -1.5791257619857788, "logps/chosen": -141.6253662109375, "logps/rejected": -233.038330078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.292624473571777, "rewards/margins": 10.627910614013672, "rewards/rejected": -17.920536041259766, "step": 3347 }, { "epoch": 5.76, "learning_rate": 1.9985125371865703e-07, "logits/chosen": -2.0640757083892822, "logits/rejected": -1.5974786281585693, "logps/chosen": -162.6812286376953, "logps/rejected": -289.63446044921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.575357437133789, "rewards/margins": 13.362323760986328, "rewards/rejected": -20.937681198120117, "step": 3348 }, { "epoch": 5.76, "learning_rate": 1.9974500637484063e-07, "logits/chosen": -1.9134843349456787, "logits/rejected": -1.9693825244903564, "logps/chosen": -136.77105712890625, "logps/rejected": -273.85845947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.176148891448975, "rewards/margins": 14.194060325622559, "rewards/rejected": -20.370210647583008, "step": 3349 }, { "epoch": 5.77, "learning_rate": 1.9963875903102423e-07, "logits/chosen": -1.9927561283111572, "logits/rejected": -2.0452141761779785, "logps/chosen": -127.89698791503906, "logps/rejected": -312.4224853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.526395797729492, "rewards/margins": 17.65179443359375, "rewards/rejected": -22.178192138671875, "step": 3350 }, { "epoch": 5.77, "learning_rate": 1.995325116872078e-07, "logits/chosen": -1.9130778312683105, "logits/rejected": -1.6566694974899292, "logps/chosen": -161.3302764892578, "logps/rejected": -255.6957244873047, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -6.9151716232299805, "rewards/margins": 11.008529663085938, "rewards/rejected": -17.923702239990234, "step": 3351 }, { "epoch": 5.77, "learning_rate": 1.994262643433914e-07, "logits/chosen": -1.7422966957092285, "logits/rejected": -1.873288869857788, "logps/chosen": -146.74420166015625, "logps/rejected": -277.7437744140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.328670978546143, "rewards/margins": 12.911776542663574, "rewards/rejected": -19.240447998046875, "step": 3352 }, { "epoch": 5.77, "learning_rate": 1.9932001699957502e-07, "logits/chosen": -1.87396240234375, "logits/rejected": -1.9717634916305542, "logps/chosen": -141.78933715820312, "logps/rejected": -288.7646484375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -5.737312316894531, "rewards/margins": 14.857418060302734, "rewards/rejected": -20.594730377197266, "step": 3353 }, { "epoch": 5.77, "learning_rate": 1.992137696557586e-07, "logits/chosen": -1.5768537521362305, "logits/rejected": -1.6340818405151367, "logps/chosen": -116.32954406738281, "logps/rejected": -326.81982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.927288055419922, "rewards/margins": 20.322006225585938, "rewards/rejected": -24.249292373657227, "step": 3354 }, { "epoch": 5.77, "learning_rate": 1.991075223119422e-07, "logits/chosen": -1.8878366947174072, "logits/rejected": -1.6271445751190186, "logps/chosen": -191.70587158203125, "logps/rejected": -323.7630615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.216200828552246, "rewards/margins": 13.341014862060547, "rewards/rejected": -22.55721664428711, "step": 3355 }, { "epoch": 5.78, "learning_rate": 1.990012749681258e-07, "logits/chosen": -1.5822288990020752, "logits/rejected": -1.9105513095855713, "logps/chosen": -150.30560302734375, "logps/rejected": -288.0726623535156, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.404943466186523, "rewards/margins": 14.019058227539062, "rewards/rejected": -20.424001693725586, "step": 3356 }, { "epoch": 5.78, "learning_rate": 1.9889502762430937e-07, "logits/chosen": -1.9589753150939941, "logits/rejected": -1.8232450485229492, "logps/chosen": -128.28762817382812, "logps/rejected": -319.09674072265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.929634094238281, "rewards/margins": 18.478439331054688, "rewards/rejected": -23.4080753326416, "step": 3357 }, { "epoch": 5.78, "learning_rate": 1.98788780280493e-07, "logits/chosen": -1.8460068702697754, "logits/rejected": -1.9863924980163574, "logps/chosen": -94.19038391113281, "logps/rejected": -240.31295776367188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4757940769195557, "rewards/margins": 14.943694114685059, "rewards/rejected": -17.41948890686035, "step": 3358 }, { "epoch": 5.78, "learning_rate": 1.986825329366766e-07, "logits/chosen": -1.703319787979126, "logits/rejected": -1.6620938777923584, "logps/chosen": -150.66409301757812, "logps/rejected": -242.08480834960938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.544732093811035, "rewards/margins": 9.992931365966797, "rewards/rejected": -16.53766441345215, "step": 3359 }, { "epoch": 5.78, "learning_rate": 1.9857628559286016e-07, "logits/chosen": -2.21809983253479, "logits/rejected": -1.6897536516189575, "logps/chosen": -149.67654418945312, "logps/rejected": -270.8016357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.818813800811768, "rewards/margins": 14.90542984008789, "rewards/rejected": -19.7242431640625, "step": 3360 }, { "epoch": 5.78, "learning_rate": 1.9847003824904376e-07, "logits/chosen": -1.5976985692977905, "logits/rejected": -1.829890251159668, "logps/chosen": -123.3387680053711, "logps/rejected": -305.9539794921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.3715362548828125, "rewards/margins": 14.673887252807617, "rewards/rejected": -19.045425415039062, "step": 3361 }, { "epoch": 5.79, "learning_rate": 1.9836379090522736e-07, "logits/chosen": -1.8860349655151367, "logits/rejected": -2.052903890609741, "logps/chosen": -141.1900634765625, "logps/rejected": -284.00262451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.113125801086426, "rewards/margins": 14.953231811523438, "rewards/rejected": -21.06635856628418, "step": 3362 }, { "epoch": 5.79, "learning_rate": 1.9825754356141093e-07, "logits/chosen": -1.953705906867981, "logits/rejected": -2.0123865604400635, "logps/chosen": -142.2468719482422, "logps/rejected": -274.97332763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.338432788848877, "rewards/margins": 13.366005897521973, "rewards/rejected": -18.704439163208008, "step": 3363 }, { "epoch": 5.79, "learning_rate": 1.9815129621759456e-07, "logits/chosen": -1.8010863065719604, "logits/rejected": -1.596529483795166, "logps/chosen": -136.62152099609375, "logps/rejected": -308.605224609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.204339027404785, "rewards/margins": 17.291332244873047, "rewards/rejected": -23.495670318603516, "step": 3364 }, { "epoch": 5.79, "learning_rate": 1.9804504887377816e-07, "logits/chosen": -1.7985539436340332, "logits/rejected": -2.0836894512176514, "logps/chosen": -139.45541381835938, "logps/rejected": -298.23785400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.012441635131836, "rewards/margins": 15.39578628540039, "rewards/rejected": -20.40822982788086, "step": 3365 }, { "epoch": 5.79, "learning_rate": 1.9793880152996173e-07, "logits/chosen": -1.1803470849990845, "logits/rejected": -2.1838769912719727, "logps/chosen": -125.11947631835938, "logps/rejected": -341.5793762207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.272710800170898, "rewards/margins": 15.378620147705078, "rewards/rejected": -19.651330947875977, "step": 3366 }, { "epoch": 5.8, "learning_rate": 1.9783255418614533e-07, "logits/chosen": -1.7526524066925049, "logits/rejected": -1.568594217300415, "logps/chosen": -169.7439727783203, "logps/rejected": -273.7462158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.771082401275635, "rewards/margins": 11.346569061279297, "rewards/rejected": -19.117650985717773, "step": 3367 }, { "epoch": 5.8, "learning_rate": 1.9772630684232893e-07, "logits/chosen": -2.083451509475708, "logits/rejected": -1.8677388429641724, "logps/chosen": -137.8817901611328, "logps/rejected": -302.7040710449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.232468605041504, "rewards/margins": 16.841108322143555, "rewards/rejected": -22.073577880859375, "step": 3368 }, { "epoch": 5.8, "learning_rate": 1.9762005949851253e-07, "logits/chosen": -2.0755794048309326, "logits/rejected": -2.0978617668151855, "logps/chosen": -93.10964965820312, "logps/rejected": -242.28619384765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.5712289810180664, "rewards/margins": 14.801970481872559, "rewards/rejected": -17.373199462890625, "step": 3369 }, { "epoch": 5.8, "learning_rate": 1.9751381215469613e-07, "logits/chosen": -1.3525413274765015, "logits/rejected": -1.9315977096557617, "logps/chosen": -122.53375244140625, "logps/rejected": -272.9310302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.403604507446289, "rewards/margins": 14.899632453918457, "rewards/rejected": -20.30323600769043, "step": 3370 }, { "epoch": 5.8, "learning_rate": 1.9740756481087972e-07, "logits/chosen": -1.8748745918273926, "logits/rejected": -2.013918876647949, "logps/chosen": -126.14652252197266, "logps/rejected": -339.2872009277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.772761344909668, "rewards/margins": 20.449451446533203, "rewards/rejected": -25.222211837768555, "step": 3371 }, { "epoch": 5.8, "learning_rate": 1.9730131746706332e-07, "logits/chosen": -1.8135104179382324, "logits/rejected": -1.7960078716278076, "logps/chosen": -149.72412109375, "logps/rejected": -294.10223388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6610260009765625, "rewards/margins": 14.764504432678223, "rewards/rejected": -21.42552947998047, "step": 3372 }, { "epoch": 5.81, "learning_rate": 1.971950701232469e-07, "logits/chosen": -1.7949118614196777, "logits/rejected": -2.286299705505371, "logps/chosen": -126.08526611328125, "logps/rejected": -259.065673828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.751986503601074, "rewards/margins": 13.036252975463867, "rewards/rejected": -17.788240432739258, "step": 3373 }, { "epoch": 5.81, "learning_rate": 1.970888227794305e-07, "logits/chosen": -2.048322916030884, "logits/rejected": -1.6780328750610352, "logps/chosen": -153.07769775390625, "logps/rejected": -260.2350158691406, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.391639709472656, "rewards/margins": 13.090521812438965, "rewards/rejected": -17.482162475585938, "step": 3374 }, { "epoch": 5.81, "learning_rate": 1.9698257543561412e-07, "logits/chosen": -2.024563789367676, "logits/rejected": -1.7672011852264404, "logps/chosen": -175.11288452148438, "logps/rejected": -309.0635070800781, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.74188232421875, "rewards/margins": 14.613204956054688, "rewards/rejected": -20.355087280273438, "step": 3375 }, { "epoch": 5.81, "learning_rate": 1.968763280917977e-07, "logits/chosen": -1.8905062675476074, "logits/rejected": -1.8917258977890015, "logps/chosen": -168.62890625, "logps/rejected": -286.64971923828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.323729991912842, "rewards/margins": 11.234766006469727, "rewards/rejected": -18.558496475219727, "step": 3376 }, { "epoch": 5.81, "learning_rate": 1.967700807479813e-07, "logits/chosen": -1.9968615770339966, "logits/rejected": -1.6464135646820068, "logps/chosen": -180.36964416503906, "logps/rejected": -291.62255859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.129715919494629, "rewards/margins": 10.913569450378418, "rewards/rejected": -20.043285369873047, "step": 3377 }, { "epoch": 5.81, "learning_rate": 1.966638334041649e-07, "logits/chosen": -1.829569697380066, "logits/rejected": -1.6952379941940308, "logps/chosen": -176.546142578125, "logps/rejected": -337.7303161621094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.57126522064209, "rewards/margins": 16.091384887695312, "rewards/rejected": -24.662649154663086, "step": 3378 }, { "epoch": 5.82, "learning_rate": 1.9655758606034846e-07, "logits/chosen": -2.0117311477661133, "logits/rejected": -1.6764345169067383, "logps/chosen": -181.66021728515625, "logps/rejected": -278.3053283691406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.052406311035156, "rewards/margins": 10.483305931091309, "rewards/rejected": -19.53571319580078, "step": 3379 }, { "epoch": 5.82, "learning_rate": 1.964513387165321e-07, "logits/chosen": -1.8564255237579346, "logits/rejected": -1.4381916522979736, "logps/chosen": -138.5318603515625, "logps/rejected": -224.29122924804688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.443010330200195, "rewards/margins": 11.16573429107666, "rewards/rejected": -16.60874366760254, "step": 3380 }, { "epoch": 5.82, "learning_rate": 1.963450913727157e-07, "logits/chosen": -1.7300076484680176, "logits/rejected": -1.9758950471878052, "logps/chosen": -124.11894226074219, "logps/rejected": -261.19970703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.547415733337402, "rewards/margins": 13.220332145690918, "rewards/rejected": -19.76774787902832, "step": 3381 }, { "epoch": 5.82, "learning_rate": 1.9623884402889926e-07, "logits/chosen": -1.5974830389022827, "logits/rejected": -1.9214098453521729, "logps/chosen": -140.0588836669922, "logps/rejected": -279.20611572265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.989042282104492, "rewards/margins": 12.371101379394531, "rewards/rejected": -18.360143661499023, "step": 3382 }, { "epoch": 5.82, "learning_rate": 1.9613259668508286e-07, "logits/chosen": -1.8843486309051514, "logits/rejected": -1.87747061252594, "logps/chosen": -172.2245635986328, "logps/rejected": -304.27734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.657916069030762, "rewards/margins": 13.330987930297852, "rewards/rejected": -19.988903045654297, "step": 3383 }, { "epoch": 5.82, "learning_rate": 1.9602634934126646e-07, "logits/chosen": -1.753465175628662, "logits/rejected": -1.9374648332595825, "logps/chosen": -133.8312225341797, "logps/rejected": -310.09991455078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.365311622619629, "rewards/margins": 16.387805938720703, "rewards/rejected": -20.753116607666016, "step": 3384 }, { "epoch": 5.83, "learning_rate": 1.9592010199745006e-07, "logits/chosen": -1.9714689254760742, "logits/rejected": -1.8256415128707886, "logps/chosen": -179.21812438964844, "logps/rejected": -348.23480224609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.148157119750977, "rewards/margins": 17.72071075439453, "rewards/rejected": -24.868867874145508, "step": 3385 }, { "epoch": 5.83, "learning_rate": 1.9581385465363366e-07, "logits/chosen": -1.5471413135528564, "logits/rejected": -1.9569501876831055, "logps/chosen": -143.19496154785156, "logps/rejected": -321.43060302734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.24964714050293, "rewards/margins": 15.564620018005371, "rewards/rejected": -20.814268112182617, "step": 3386 }, { "epoch": 5.83, "learning_rate": 1.9570760730981725e-07, "logits/chosen": -1.915390968322754, "logits/rejected": -1.595927357673645, "logps/chosen": -187.30551147460938, "logps/rejected": -288.4401550292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.056062698364258, "rewards/margins": 12.445355415344238, "rewards/rejected": -21.50141716003418, "step": 3387 }, { "epoch": 5.83, "learning_rate": 1.9560135996600083e-07, "logits/chosen": -2.092663288116455, "logits/rejected": -1.9723856449127197, "logps/chosen": -151.0303955078125, "logps/rejected": -308.32476806640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.241058349609375, "rewards/margins": 16.41686248779297, "rewards/rejected": -22.657920837402344, "step": 3388 }, { "epoch": 5.83, "learning_rate": 1.9549511262218443e-07, "logits/chosen": -1.9371119737625122, "logits/rejected": -1.9322824478149414, "logps/chosen": -166.53038024902344, "logps/rejected": -288.1849670410156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.114083290100098, "rewards/margins": 12.35295581817627, "rewards/rejected": -18.467039108276367, "step": 3389 }, { "epoch": 5.83, "learning_rate": 1.9538886527836802e-07, "logits/chosen": -2.016997814178467, "logits/rejected": -1.8842248916625977, "logps/chosen": -127.0338134765625, "logps/rejected": -259.21453857421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.709047317504883, "rewards/margins": 14.148118019104004, "rewards/rejected": -18.857166290283203, "step": 3390 }, { "epoch": 5.84, "learning_rate": 1.9528261793455165e-07, "logits/chosen": -1.916956901550293, "logits/rejected": -1.951694369316101, "logps/chosen": -159.03086853027344, "logps/rejected": -315.6100158691406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.127361297607422, "rewards/margins": 15.330877304077148, "rewards/rejected": -22.458236694335938, "step": 3391 }, { "epoch": 5.84, "learning_rate": 1.9517637059073522e-07, "logits/chosen": -1.9691429138183594, "logits/rejected": -2.0692851543426514, "logps/chosen": -131.00479125976562, "logps/rejected": -306.6905212402344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.474649429321289, "rewards/margins": 17.61561393737793, "rewards/rejected": -22.09026336669922, "step": 3392 }, { "epoch": 5.84, "learning_rate": 1.9507012324691882e-07, "logits/chosen": -1.5558103322982788, "logits/rejected": -2.058953046798706, "logps/chosen": -179.7420654296875, "logps/rejected": -297.45892333984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.964296340942383, "rewards/margins": 11.042436599731445, "rewards/rejected": -20.006732940673828, "step": 3393 }, { "epoch": 5.84, "learning_rate": 1.9496387590310242e-07, "logits/chosen": -2.0664193630218506, "logits/rejected": -1.6436659097671509, "logps/chosen": -132.75271606445312, "logps/rejected": -244.11605834960938, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.596457481384277, "rewards/margins": 12.808501243591309, "rewards/rejected": -18.404958724975586, "step": 3394 }, { "epoch": 5.84, "learning_rate": 1.94857628559286e-07, "logits/chosen": -1.801356315612793, "logits/rejected": -2.1092209815979004, "logps/chosen": -189.04638671875, "logps/rejected": -332.36651611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.38838005065918, "rewards/margins": 13.174745559692383, "rewards/rejected": -22.563127517700195, "step": 3395 }, { "epoch": 5.85, "learning_rate": 1.9475138121546962e-07, "logits/chosen": -1.8234357833862305, "logits/rejected": -2.037764310836792, "logps/chosen": -153.25189208984375, "logps/rejected": -313.5325927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.970909118652344, "rewards/margins": 16.355224609375, "rewards/rejected": -21.326135635375977, "step": 3396 }, { "epoch": 5.85, "learning_rate": 1.9464513387165322e-07, "logits/chosen": -1.7423731088638306, "logits/rejected": -1.8269091844558716, "logps/chosen": -209.4601287841797, "logps/rejected": -328.0032043457031, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -11.061657905578613, "rewards/margins": 11.911169052124023, "rewards/rejected": -22.972827911376953, "step": 3397 }, { "epoch": 5.85, "learning_rate": 1.945388865278368e-07, "logits/chosen": -1.8938096761703491, "logits/rejected": -1.8121321201324463, "logps/chosen": -161.25381469726562, "logps/rejected": -264.7394104003906, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -8.662866592407227, "rewards/margins": 10.775328636169434, "rewards/rejected": -19.438196182250977, "step": 3398 }, { "epoch": 5.85, "learning_rate": 1.944326391840204e-07, "logits/chosen": -2.0925076007843018, "logits/rejected": -1.5582414865493774, "logps/chosen": -160.4825439453125, "logps/rejected": -277.85235595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.8947434425354, "rewards/margins": 12.815255165100098, "rewards/rejected": -19.709999084472656, "step": 3399 }, { "epoch": 5.85, "learning_rate": 1.94326391840204e-07, "logits/chosen": -1.6489856243133545, "logits/rejected": -2.056373119354248, "logps/chosen": -154.6685791015625, "logps/rejected": -294.75189208984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.639815330505371, "rewards/margins": 12.467196464538574, "rewards/rejected": -21.107011795043945, "step": 3400 }, { "epoch": 5.85, "learning_rate": 1.9422014449638756e-07, "logits/chosen": -1.974524736404419, "logits/rejected": -2.0528459548950195, "logps/chosen": -166.35617065429688, "logps/rejected": -347.3636474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.145036697387695, "rewards/margins": 16.564559936523438, "rewards/rejected": -24.709596633911133, "step": 3401 }, { "epoch": 5.86, "learning_rate": 1.9411389715257118e-07, "logits/chosen": -1.833787202835083, "logits/rejected": -1.8902422189712524, "logps/chosen": -168.2284698486328, "logps/rejected": -319.91595458984375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.311039924621582, "rewards/margins": 15.692837715148926, "rewards/rejected": -24.00387954711914, "step": 3402 }, { "epoch": 5.86, "learning_rate": 1.9400764980875478e-07, "logits/chosen": -1.8602098226547241, "logits/rejected": -1.9593396186828613, "logps/chosen": -158.7462921142578, "logps/rejected": -337.5994873046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.341730117797852, "rewards/margins": 16.38129425048828, "rewards/rejected": -23.723024368286133, "step": 3403 }, { "epoch": 5.86, "learning_rate": 1.9390140246493836e-07, "logits/chosen": -1.858567237854004, "logits/rejected": -1.6765773296356201, "logps/chosen": -144.0306396484375, "logps/rejected": -245.2152099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.915383815765381, "rewards/margins": 12.098051071166992, "rewards/rejected": -17.01343536376953, "step": 3404 }, { "epoch": 5.86, "learning_rate": 1.9379515512112196e-07, "logits/chosen": -1.8473490476608276, "logits/rejected": -1.9932177066802979, "logps/chosen": -163.5708770751953, "logps/rejected": -311.86376953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.785927772521973, "rewards/margins": 14.339045524597168, "rewards/rejected": -22.12497329711914, "step": 3405 }, { "epoch": 5.86, "learning_rate": 1.9368890777730555e-07, "logits/chosen": -1.8642901182174683, "logits/rejected": -2.045628070831299, "logps/chosen": -151.12037658691406, "logps/rejected": -302.1148681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.366523742675781, "rewards/margins": 14.876468658447266, "rewards/rejected": -20.242992401123047, "step": 3406 }, { "epoch": 5.86, "learning_rate": 1.9358266043348915e-07, "logits/chosen": -1.520264983177185, "logits/rejected": -1.8854825496673584, "logps/chosen": -123.69580841064453, "logps/rejected": -274.50238037109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.250740051269531, "rewards/margins": 13.840630531311035, "rewards/rejected": -19.091371536254883, "step": 3407 }, { "epoch": 5.87, "learning_rate": 1.9347641308967275e-07, "logits/chosen": -1.8475618362426758, "logits/rejected": -1.943190336227417, "logps/chosen": -172.90235900878906, "logps/rejected": -334.2737731933594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.373908996582031, "rewards/margins": 16.192577362060547, "rewards/rejected": -24.566486358642578, "step": 3408 }, { "epoch": 5.87, "learning_rate": 1.9337016574585635e-07, "logits/chosen": -1.9691494703292847, "logits/rejected": -1.9653480052947998, "logps/chosen": -147.03529357910156, "logps/rejected": -336.2674865722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.7697038650512695, "rewards/margins": 17.7500057220459, "rewards/rejected": -24.519710540771484, "step": 3409 }, { "epoch": 5.87, "learning_rate": 1.9326391840203992e-07, "logits/chosen": -1.491797924041748, "logits/rejected": -1.6235325336456299, "logps/chosen": -169.5386505126953, "logps/rejected": -301.65277099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.148069381713867, "rewards/margins": 13.010637283325195, "rewards/rejected": -22.158706665039062, "step": 3410 }, { "epoch": 5.87, "learning_rate": 1.9315767105822352e-07, "logits/chosen": -1.9848403930664062, "logits/rejected": -1.7245155572891235, "logps/chosen": -135.84683227539062, "logps/rejected": -276.1451721191406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.427559852600098, "rewards/margins": 15.395214080810547, "rewards/rejected": -20.82277488708496, "step": 3411 }, { "epoch": 5.87, "learning_rate": 1.9305142371440715e-07, "logits/chosen": -1.757813811302185, "logits/rejected": -1.9985675811767578, "logps/chosen": -172.40582275390625, "logps/rejected": -322.39813232421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.024515151977539, "rewards/margins": 13.586542129516602, "rewards/rejected": -22.61105728149414, "step": 3412 }, { "epoch": 5.87, "learning_rate": 1.9294517637059075e-07, "logits/chosen": -1.7335339784622192, "logits/rejected": -2.007786989212036, "logps/chosen": -147.37191772460938, "logps/rejected": -308.8665466308594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.78496789932251, "rewards/margins": 15.762819290161133, "rewards/rejected": -21.547786712646484, "step": 3413 }, { "epoch": 5.88, "learning_rate": 1.9283892902677432e-07, "logits/chosen": -1.913128137588501, "logits/rejected": -2.050118923187256, "logps/chosen": -140.2239532470703, "logps/rejected": -278.4390869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.521119594573975, "rewards/margins": 12.480392456054688, "rewards/rejected": -19.00151252746582, "step": 3414 }, { "epoch": 5.88, "learning_rate": 1.9273268168295792e-07, "logits/chosen": -1.6795803308486938, "logits/rejected": -2.1593267917633057, "logps/chosen": -137.42837524414062, "logps/rejected": -331.2801208496094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.853635787963867, "rewards/margins": 17.7374324798584, "rewards/rejected": -23.5910701751709, "step": 3415 }, { "epoch": 5.88, "learning_rate": 1.9262643433914152e-07, "logits/chosen": -1.6998803615570068, "logits/rejected": -2.0677857398986816, "logps/chosen": -123.08828735351562, "logps/rejected": -300.01483154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.199774265289307, "rewards/margins": 15.918084144592285, "rewards/rejected": -20.117856979370117, "step": 3416 }, { "epoch": 5.88, "learning_rate": 1.925201869953251e-07, "logits/chosen": -1.551161527633667, "logits/rejected": -1.8616939783096313, "logps/chosen": -135.0902099609375, "logps/rejected": -298.3009948730469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.39902400970459, "rewards/margins": 14.516706466674805, "rewards/rejected": -20.91573143005371, "step": 3417 }, { "epoch": 5.88, "learning_rate": 1.9241393965150871e-07, "logits/chosen": -1.9882323741912842, "logits/rejected": -1.8725416660308838, "logps/chosen": -120.4812240600586, "logps/rejected": -308.57159423828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.8063764572143555, "rewards/margins": 18.910842895507812, "rewards/rejected": -22.717220306396484, "step": 3418 }, { "epoch": 5.88, "learning_rate": 1.9230769230769231e-07, "logits/chosen": -2.0889339447021484, "logits/rejected": -1.8673691749572754, "logps/chosen": -143.3260498046875, "logps/rejected": -314.319091796875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.9374284744262695, "rewards/margins": 18.468944549560547, "rewards/rejected": -23.406373977661133, "step": 3419 }, { "epoch": 5.89, "learning_rate": 1.9220144496387589e-07, "logits/chosen": -1.8303511142730713, "logits/rejected": -1.6833909749984741, "logps/chosen": -146.37010192871094, "logps/rejected": -315.76287841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.342619895935059, "rewards/margins": 16.482641220092773, "rewards/rejected": -22.825260162353516, "step": 3420 }, { "epoch": 5.89, "learning_rate": 1.9209519762005948e-07, "logits/chosen": -1.9122555255889893, "logits/rejected": -1.8826459646224976, "logps/chosen": -121.27444458007812, "logps/rejected": -272.7298889160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.2484130859375, "rewards/margins": 15.215267181396484, "rewards/rejected": -20.463680267333984, "step": 3421 }, { "epoch": 5.89, "learning_rate": 1.9198895027624308e-07, "logits/chosen": -1.8064582347869873, "logits/rejected": -1.778477430343628, "logps/chosen": -116.80725860595703, "logps/rejected": -252.95297241210938, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.021565914154053, "rewards/margins": 13.537019729614258, "rewards/rejected": -18.55858612060547, "step": 3422 }, { "epoch": 5.89, "learning_rate": 1.9188270293242668e-07, "logits/chosen": -1.7394979000091553, "logits/rejected": -1.7582893371582031, "logps/chosen": -149.54470825195312, "logps/rejected": -282.260009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.98993444442749, "rewards/margins": 14.777186393737793, "rewards/rejected": -20.767120361328125, "step": 3423 }, { "epoch": 5.89, "learning_rate": 1.9177645558861028e-07, "logits/chosen": -1.6046054363250732, "logits/rejected": -1.9447343349456787, "logps/chosen": -145.77952575683594, "logps/rejected": -344.80572509765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.653275966644287, "rewards/margins": 18.9060001373291, "rewards/rejected": -24.559274673461914, "step": 3424 }, { "epoch": 5.9, "learning_rate": 1.9167020824479388e-07, "logits/chosen": -1.696367621421814, "logits/rejected": -1.9900047779083252, "logps/chosen": -158.29180908203125, "logps/rejected": -338.2042236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.4974894523620605, "rewards/margins": 16.522825241088867, "rewards/rejected": -24.020315170288086, "step": 3425 }, { "epoch": 5.9, "learning_rate": 1.9156396090097745e-07, "logits/chosen": -1.7954195737838745, "logits/rejected": -1.9371678829193115, "logps/chosen": -163.54156494140625, "logps/rejected": -304.9471130371094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.450658798217773, "rewards/margins": 13.306463241577148, "rewards/rejected": -21.757122039794922, "step": 3426 }, { "epoch": 5.9, "learning_rate": 1.9145771355716105e-07, "logits/chosen": -1.3973244428634644, "logits/rejected": -1.7811301946640015, "logps/chosen": -116.82743835449219, "logps/rejected": -262.9415588378906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.325974464416504, "rewards/margins": 13.596075057983398, "rewards/rejected": -18.92205047607422, "step": 3427 }, { "epoch": 5.9, "learning_rate": 1.9135146621334468e-07, "logits/chosen": -1.6739580631256104, "logits/rejected": -1.8924286365509033, "logps/chosen": -162.02670288085938, "logps/rejected": -314.1481018066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.804055213928223, "rewards/margins": 13.967689514160156, "rewards/rejected": -21.771745681762695, "step": 3428 }, { "epoch": 5.9, "learning_rate": 1.9124521886952825e-07, "logits/chosen": -1.8977128267288208, "logits/rejected": -1.996636152267456, "logps/chosen": -176.76197814941406, "logps/rejected": -323.65655517578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.959128379821777, "rewards/margins": 13.890978813171387, "rewards/rejected": -22.850107192993164, "step": 3429 }, { "epoch": 5.9, "learning_rate": 1.9113897152571185e-07, "logits/chosen": -1.883433222770691, "logits/rejected": -1.9295939207077026, "logps/chosen": -165.53411865234375, "logps/rejected": -335.0948791503906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.899019718170166, "rewards/margins": 16.905826568603516, "rewards/rejected": -24.804845809936523, "step": 3430 }, { "epoch": 5.91, "learning_rate": 1.9103272418189545e-07, "logits/chosen": -1.7458546161651611, "logits/rejected": -1.882056474685669, "logps/chosen": -120.45922088623047, "logps/rejected": -251.2001495361328, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.498107433319092, "rewards/margins": 12.342552185058594, "rewards/rejected": -16.840660095214844, "step": 3431 }, { "epoch": 5.91, "learning_rate": 1.9092647683807902e-07, "logits/chosen": -1.9765160083770752, "logits/rejected": -1.565696358680725, "logps/chosen": -181.6778564453125, "logps/rejected": -256.7481994628906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.384223937988281, "rewards/margins": 9.386555671691895, "rewards/rejected": -17.77077865600586, "step": 3432 }, { "epoch": 5.91, "learning_rate": 1.9082022949426262e-07, "logits/chosen": -1.7751832008361816, "logits/rejected": -1.9127223491668701, "logps/chosen": -193.46371459960938, "logps/rejected": -309.66998291015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.998990058898926, "rewards/margins": 12.713356018066406, "rewards/rejected": -21.712345123291016, "step": 3433 }, { "epoch": 5.91, "learning_rate": 1.9071398215044624e-07, "logits/chosen": -1.4401251077651978, "logits/rejected": -1.7489328384399414, "logps/chosen": -139.10751342773438, "logps/rejected": -254.85211181640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.934237480163574, "rewards/margins": 10.639784812927246, "rewards/rejected": -17.57402229309082, "step": 3434 }, { "epoch": 5.91, "learning_rate": 1.9060773480662984e-07, "logits/chosen": -1.6273969411849976, "logits/rejected": -1.9270497560501099, "logps/chosen": -161.962158203125, "logps/rejected": -350.7449951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.742058753967285, "rewards/margins": 16.06593894958496, "rewards/rejected": -23.80799674987793, "step": 3435 }, { "epoch": 5.91, "learning_rate": 1.9050148746281342e-07, "logits/chosen": -1.4354900121688843, "logits/rejected": -2.1069889068603516, "logps/chosen": -122.19007873535156, "logps/rejected": -325.8780517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.2100830078125, "rewards/margins": 18.075401306152344, "rewards/rejected": -23.285484313964844, "step": 3436 }, { "epoch": 5.92, "learning_rate": 1.9039524011899701e-07, "logits/chosen": -1.9450143575668335, "logits/rejected": -1.7875306606292725, "logps/chosen": -133.75436401367188, "logps/rejected": -326.0663757324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3900551795959473, "rewards/margins": 20.006637573242188, "rewards/rejected": -23.396692276000977, "step": 3437 }, { "epoch": 5.92, "learning_rate": 1.902889927751806e-07, "logits/chosen": -1.904250144958496, "logits/rejected": -2.015035390853882, "logps/chosen": -163.6380615234375, "logps/rejected": -312.4535827636719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.865486145019531, "rewards/margins": 13.637456893920898, "rewards/rejected": -21.50294303894043, "step": 3438 }, { "epoch": 5.92, "learning_rate": 1.901827454313642e-07, "logits/chosen": -1.696815848350525, "logits/rejected": -1.7071236371994019, "logps/chosen": -203.34780883789062, "logps/rejected": -303.27490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.623363494873047, "rewards/margins": 11.397443771362305, "rewards/rejected": -21.02080726623535, "step": 3439 }, { "epoch": 5.92, "learning_rate": 1.900764980875478e-07, "logits/chosen": -1.7707425355911255, "logits/rejected": -1.606794834136963, "logps/chosen": -148.6147003173828, "logps/rejected": -248.62222290039062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.792333126068115, "rewards/margins": 10.814481735229492, "rewards/rejected": -18.606815338134766, "step": 3440 }, { "epoch": 5.92, "learning_rate": 1.899702507437314e-07, "logits/chosen": -1.7683730125427246, "logits/rejected": -2.019041061401367, "logps/chosen": -129.94248962402344, "logps/rejected": -298.34869384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.3016557693481445, "rewards/margins": 14.401111602783203, "rewards/rejected": -20.702768325805664, "step": 3441 }, { "epoch": 5.92, "learning_rate": 1.8986400339991498e-07, "logits/chosen": -1.8874444961547852, "logits/rejected": -1.6932002305984497, "logps/chosen": -150.78085327148438, "logps/rejected": -311.59527587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.110036849975586, "rewards/margins": 15.98403549194336, "rewards/rejected": -23.094072341918945, "step": 3442 }, { "epoch": 5.93, "learning_rate": 1.8975775605609858e-07, "logits/chosen": -1.5805387496948242, "logits/rejected": -1.9291890859603882, "logps/chosen": -140.53648376464844, "logps/rejected": -268.1379089355469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.439891338348389, "rewards/margins": 12.428942680358887, "rewards/rejected": -18.868833541870117, "step": 3443 }, { "epoch": 5.93, "learning_rate": 1.896515087122822e-07, "logits/chosen": -2.0075981616973877, "logits/rejected": -1.6525359153747559, "logps/chosen": -135.06724548339844, "logps/rejected": -270.14459228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.592069149017334, "rewards/margins": 15.63347339630127, "rewards/rejected": -20.225543975830078, "step": 3444 }, { "epoch": 5.93, "learning_rate": 1.8954526136846578e-07, "logits/chosen": -1.3884912729263306, "logits/rejected": -1.9410521984100342, "logps/chosen": -167.63250732421875, "logps/rejected": -332.42486572265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.087031364440918, "rewards/margins": 13.476041793823242, "rewards/rejected": -24.563074111938477, "step": 3445 }, { "epoch": 5.93, "learning_rate": 1.8943901402464938e-07, "logits/chosen": -1.713512897491455, "logits/rejected": -1.7977420091629028, "logps/chosen": -155.468994140625, "logps/rejected": -259.7372131347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.366532325744629, "rewards/margins": 10.972357749938965, "rewards/rejected": -19.338890075683594, "step": 3446 }, { "epoch": 5.93, "learning_rate": 1.8933276668083298e-07, "logits/chosen": -1.8015216588974, "logits/rejected": -1.7510696649551392, "logps/chosen": -145.314697265625, "logps/rejected": -281.8786315917969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.079624176025391, "rewards/margins": 13.824973106384277, "rewards/rejected": -20.904598236083984, "step": 3447 }, { "epoch": 5.93, "learning_rate": 1.8922651933701655e-07, "logits/chosen": -1.9036356210708618, "logits/rejected": -1.589418649673462, "logps/chosen": -181.7638397216797, "logps/rejected": -302.9779052734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.7370829582214355, "rewards/margins": 15.07487964630127, "rewards/rejected": -22.81196403503418, "step": 3448 }, { "epoch": 5.94, "learning_rate": 1.8912027199320015e-07, "logits/chosen": -1.9569324254989624, "logits/rejected": -1.9110727310180664, "logps/chosen": -156.82960510253906, "logps/rejected": -293.86077880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.295586585998535, "rewards/margins": 13.370099067687988, "rewards/rejected": -21.66568374633789, "step": 3449 }, { "epoch": 5.94, "learning_rate": 1.8901402464938377e-07, "logits/chosen": -1.9526314735412598, "logits/rejected": -1.5626325607299805, "logps/chosen": -165.35476684570312, "logps/rejected": -313.1158142089844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.180520534515381, "rewards/margins": 16.62910270690918, "rewards/rejected": -22.80962371826172, "step": 3450 }, { "epoch": 5.94, "learning_rate": 1.8890777730556735e-07, "logits/chosen": -1.822243094444275, "logits/rejected": -1.9671919345855713, "logps/chosen": -150.55055236816406, "logps/rejected": -323.9040832519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.238814353942871, "rewards/margins": 17.344512939453125, "rewards/rejected": -23.583328247070312, "step": 3451 }, { "epoch": 5.94, "learning_rate": 1.8880152996175094e-07, "logits/chosen": -1.7895344495773315, "logits/rejected": -1.9650745391845703, "logps/chosen": -139.88815307617188, "logps/rejected": -300.3345947265625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -6.502352237701416, "rewards/margins": 14.382807731628418, "rewards/rejected": -20.885160446166992, "step": 3452 }, { "epoch": 5.94, "learning_rate": 1.8869528261793454e-07, "logits/chosen": -1.9411892890930176, "logits/rejected": -2.0164284706115723, "logps/chosen": -127.35620880126953, "logps/rejected": -279.40948486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.3896403312683105, "rewards/margins": 14.804869651794434, "rewards/rejected": -20.194509506225586, "step": 3453 }, { "epoch": 5.94, "learning_rate": 1.8858903527411814e-07, "logits/chosen": -1.8493142127990723, "logits/rejected": -1.6780149936676025, "logps/chosen": -175.998779296875, "logps/rejected": -327.9735107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.043317794799805, "rewards/margins": 15.264849662780762, "rewards/rejected": -23.308168411254883, "step": 3454 }, { "epoch": 5.95, "learning_rate": 1.8848278793030174e-07, "logits/chosen": -1.922811508178711, "logits/rejected": -2.0385847091674805, "logps/chosen": -167.07107543945312, "logps/rejected": -289.79412841796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.503322124481201, "rewards/margins": 12.954336166381836, "rewards/rejected": -20.457660675048828, "step": 3455 }, { "epoch": 5.95, "learning_rate": 1.8837654058648534e-07, "logits/chosen": -2.161369562149048, "logits/rejected": -1.7930271625518799, "logps/chosen": -197.44972229003906, "logps/rejected": -320.1935729980469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.690041542053223, "rewards/margins": 13.44711971282959, "rewards/rejected": -23.137163162231445, "step": 3456 }, { "epoch": 5.95, "learning_rate": 1.8827029324266894e-07, "logits/chosen": -1.5664231777191162, "logits/rejected": -1.9162174463272095, "logps/chosen": -144.42193603515625, "logps/rejected": -318.87811279296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.432197093963623, "rewards/margins": 15.940959930419922, "rewards/rejected": -22.373157501220703, "step": 3457 }, { "epoch": 5.95, "learning_rate": 1.881640458988525e-07, "logits/chosen": -1.8267669677734375, "logits/rejected": -2.0009326934814453, "logps/chosen": -154.2295379638672, "logps/rejected": -318.9149169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.64271354675293, "rewards/margins": 16.328685760498047, "rewards/rejected": -22.971397399902344, "step": 3458 }, { "epoch": 5.95, "learning_rate": 1.880577985550361e-07, "logits/chosen": -1.588024377822876, "logits/rejected": -1.8950886726379395, "logps/chosen": -176.19708251953125, "logps/rejected": -313.204833984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.171200275421143, "rewards/margins": 13.071569442749023, "rewards/rejected": -20.24277114868164, "step": 3459 }, { "epoch": 5.96, "learning_rate": 1.879515512112197e-07, "logits/chosen": -1.885246753692627, "logits/rejected": -1.9436073303222656, "logps/chosen": -133.55255126953125, "logps/rejected": -291.13787841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.308993339538574, "rewards/margins": 15.831697463989258, "rewards/rejected": -21.14069175720215, "step": 3460 }, { "epoch": 5.96, "learning_rate": 1.878453038674033e-07, "logits/chosen": -1.9041543006896973, "logits/rejected": -1.7902857065200806, "logps/chosen": -167.40158081054688, "logps/rejected": -299.8486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.598158359527588, "rewards/margins": 13.2855806350708, "rewards/rejected": -20.883739471435547, "step": 3461 }, { "epoch": 5.96, "learning_rate": 1.877390565235869e-07, "logits/chosen": -1.4657281637191772, "logits/rejected": -1.914049506187439, "logps/chosen": -146.9667510986328, "logps/rejected": -261.7105712890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -7.062095642089844, "rewards/margins": 11.25372314453125, "rewards/rejected": -18.31581687927246, "step": 3462 }, { "epoch": 5.96, "learning_rate": 1.876328091797705e-07, "logits/chosen": -1.9003660678863525, "logits/rejected": -2.003108024597168, "logps/chosen": -136.5827178955078, "logps/rejected": -264.79766845703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.713015079498291, "rewards/margins": 13.768939971923828, "rewards/rejected": -19.48195457458496, "step": 3463 }, { "epoch": 5.96, "learning_rate": 1.8752656183595408e-07, "logits/chosen": -1.9426379203796387, "logits/rejected": -1.9246411323547363, "logps/chosen": -142.7276153564453, "logps/rejected": -289.2493896484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.496393203735352, "rewards/margins": 13.37939453125, "rewards/rejected": -19.87578582763672, "step": 3464 }, { "epoch": 5.96, "learning_rate": 1.8742031449213768e-07, "logits/chosen": -1.9899401664733887, "logits/rejected": -1.2773274183273315, "logps/chosen": -178.67465209960938, "logps/rejected": -288.2690124511719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.714353561401367, "rewards/margins": 13.300172805786133, "rewards/rejected": -21.0145263671875, "step": 3465 }, { "epoch": 5.97, "learning_rate": 1.873140671483213e-07, "logits/chosen": -1.6218065023422241, "logits/rejected": -1.8987786769866943, "logps/chosen": -139.06851196289062, "logps/rejected": -270.736328125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.835318088531494, "rewards/margins": 12.02261734008789, "rewards/rejected": -16.85793685913086, "step": 3466 }, { "epoch": 5.97, "learning_rate": 1.8720781980450488e-07, "logits/chosen": -1.9099491834640503, "logits/rejected": -1.9396432638168335, "logps/chosen": -120.97023010253906, "logps/rejected": -290.9444580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4734644889831543, "rewards/margins": 16.69720458984375, "rewards/rejected": -20.170669555664062, "step": 3467 }, { "epoch": 5.97, "learning_rate": 1.8710157246068847e-07, "logits/chosen": -1.924118161201477, "logits/rejected": -1.9774396419525146, "logps/chosen": -124.45713806152344, "logps/rejected": -267.0993957519531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.0821123123168945, "rewards/margins": 13.012454986572266, "rewards/rejected": -18.094568252563477, "step": 3468 }, { "epoch": 5.97, "learning_rate": 1.8699532511687207e-07, "logits/chosen": -2.09263014793396, "logits/rejected": -1.9578303098678589, "logps/chosen": -137.64480590820312, "logps/rejected": -321.70343017578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.115903854370117, "rewards/margins": 17.32363510131836, "rewards/rejected": -23.439537048339844, "step": 3469 }, { "epoch": 5.97, "learning_rate": 1.8688907777305565e-07, "logits/chosen": -2.0806798934936523, "logits/rejected": -1.5691204071044922, "logps/chosen": -143.90023803710938, "logps/rejected": -283.71099853515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.0060133934021, "rewards/margins": 14.922637939453125, "rewards/rejected": -20.928651809692383, "step": 3470 }, { "epoch": 5.97, "learning_rate": 1.8678283042923924e-07, "logits/chosen": -1.5863295793533325, "logits/rejected": -1.7450261116027832, "logps/chosen": -117.0341567993164, "logps/rejected": -310.56890869140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.8167009353637695, "rewards/margins": 17.21601104736328, "rewards/rejected": -22.032712936401367, "step": 3471 }, { "epoch": 5.98, "learning_rate": 1.8667658308542287e-07, "logits/chosen": -1.9451735019683838, "logits/rejected": -1.6895177364349365, "logps/chosen": -145.88002014160156, "logps/rejected": -309.0619201660156, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.020820140838623, "rewards/margins": 16.81434440612793, "rewards/rejected": -23.835163116455078, "step": 3472 }, { "epoch": 5.98, "learning_rate": 1.8657033574160644e-07, "logits/chosen": -1.908710241317749, "logits/rejected": -1.8519840240478516, "logps/chosen": -169.37913513183594, "logps/rejected": -274.9324951171875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.541983604431152, "rewards/margins": 11.013307571411133, "rewards/rejected": -18.55529022216797, "step": 3473 }, { "epoch": 5.98, "learning_rate": 1.8646408839779004e-07, "logits/chosen": -1.8413591384887695, "logits/rejected": -1.8621572256088257, "logps/chosen": -117.88794708251953, "logps/rejected": -276.33758544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.83192253112793, "rewards/margins": 15.852222442626953, "rewards/rejected": -20.684144973754883, "step": 3474 }, { "epoch": 5.98, "learning_rate": 1.8635784105397364e-07, "logits/chosen": -1.9748823642730713, "logits/rejected": -1.7996333837509155, "logps/chosen": -160.37034606933594, "logps/rejected": -274.5837097167969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.992342948913574, "rewards/margins": 12.475919723510742, "rewards/rejected": -18.46826171875, "step": 3475 }, { "epoch": 5.98, "learning_rate": 1.8625159371015724e-07, "logits/chosen": -1.8290138244628906, "logits/rejected": -1.5870492458343506, "logps/chosen": -157.30401611328125, "logps/rejected": -294.1305847167969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.519156455993652, "rewards/margins": 16.16259002685547, "rewards/rejected": -21.681745529174805, "step": 3476 }, { "epoch": 5.98, "learning_rate": 1.8614534636634084e-07, "logits/chosen": -1.8182518482208252, "logits/rejected": -1.6117709875106812, "logps/chosen": -127.54200744628906, "logps/rejected": -265.23284912109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.46561336517334, "rewards/margins": 14.823139190673828, "rewards/rejected": -19.288753509521484, "step": 3477 }, { "epoch": 5.99, "learning_rate": 1.8603909902252444e-07, "logits/chosen": -1.8972290754318237, "logits/rejected": -1.8916807174682617, "logps/chosen": -170.05340576171875, "logps/rejected": -310.21661376953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.610754013061523, "rewards/margins": 13.895146369934082, "rewards/rejected": -21.50589942932129, "step": 3478 }, { "epoch": 5.99, "learning_rate": 1.8593285167870804e-07, "logits/chosen": -1.9881080389022827, "logits/rejected": -2.0800559520721436, "logps/chosen": -151.33383178710938, "logps/rejected": -297.3880615234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.494235515594482, "rewards/margins": 13.96572208404541, "rewards/rejected": -20.459957122802734, "step": 3479 }, { "epoch": 5.99, "learning_rate": 1.858266043348916e-07, "logits/chosen": -1.7206780910491943, "logits/rejected": -1.8014194965362549, "logps/chosen": -121.80433654785156, "logps/rejected": -285.2010192871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.818511486053467, "rewards/margins": 16.24677276611328, "rewards/rejected": -21.065284729003906, "step": 3480 }, { "epoch": 5.99, "learning_rate": 1.857203569910752e-07, "logits/chosen": -1.5872020721435547, "logits/rejected": -1.8199231624603271, "logps/chosen": -166.87472534179688, "logps/rejected": -288.71923828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.80141544342041, "rewards/margins": 10.038318634033203, "rewards/rejected": -17.83973503112793, "step": 3481 }, { "epoch": 5.99, "learning_rate": 1.8561410964725883e-07, "logits/chosen": -2.192796230316162, "logits/rejected": -1.8099757432937622, "logps/chosen": -125.31493377685547, "logps/rejected": -247.34262084960938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.206439971923828, "rewards/margins": 14.08722972869873, "rewards/rejected": -18.293670654296875, "step": 3482 }, { "epoch": 5.99, "learning_rate": 1.855078623034424e-07, "logits/chosen": -1.4574089050292969, "logits/rejected": -1.9000515937805176, "logps/chosen": -119.28030395507812, "logps/rejected": -273.564453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.774319648742676, "rewards/margins": 13.728083610534668, "rewards/rejected": -18.502403259277344, "step": 3483 }, { "epoch": 6.0, "learning_rate": 1.85401614959626e-07, "logits/chosen": -1.8572804927825928, "logits/rejected": -1.956397533416748, "logps/chosen": -179.64317321777344, "logps/rejected": -302.60040283203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.551912307739258, "rewards/margins": 12.255744934082031, "rewards/rejected": -20.80765724182129, "step": 3484 }, { "epoch": 6.0, "learning_rate": 1.852953676158096e-07, "logits/chosen": -1.761658787727356, "logits/rejected": -1.8385488986968994, "logps/chosen": -142.87933349609375, "logps/rejected": -288.7216796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.583041667938232, "rewards/margins": 16.008440017700195, "rewards/rejected": -21.59148406982422, "step": 3485 }, { "epoch": 6.0, "learning_rate": 1.8518912027199317e-07, "logits/chosen": -1.714184284210205, "logits/rejected": -1.8440903425216675, "logps/chosen": -148.63365173339844, "logps/rejected": -303.9117736816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.455625534057617, "rewards/margins": 14.739095687866211, "rewards/rejected": -22.194721221923828, "step": 3486 }, { "epoch": 6.0, "learning_rate": 1.8508287292817677e-07, "logits/chosen": -1.7365760803222656, "logits/rejected": -2.00895619392395, "logps/chosen": -132.5209197998047, "logps/rejected": -295.5106201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.110718250274658, "rewards/margins": 17.011775970458984, "rewards/rejected": -21.122495651245117, "step": 3487 }, { "epoch": 6.0, "learning_rate": 1.849766255843604e-07, "logits/chosen": -1.4471452236175537, "logits/rejected": -1.9497220516204834, "logps/chosen": -114.72709655761719, "logps/rejected": -300.77838134765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.163184642791748, "rewards/margins": 15.601703643798828, "rewards/rejected": -20.764888763427734, "step": 3488 }, { "epoch": 6.01, "learning_rate": 1.8487037824054397e-07, "logits/chosen": -1.752176284790039, "logits/rejected": -1.9149720668792725, "logps/chosen": -146.91201782226562, "logps/rejected": -284.0757751464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.562460899353027, "rewards/margins": 11.992109298706055, "rewards/rejected": -18.554569244384766, "step": 3489 }, { "epoch": 6.01, "learning_rate": 1.8476413089672757e-07, "logits/chosen": -1.558695673942566, "logits/rejected": -1.8921990394592285, "logps/chosen": -173.3812713623047, "logps/rejected": -364.47576904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.893923282623291, "rewards/margins": 17.55176544189453, "rewards/rejected": -25.445688247680664, "step": 3490 }, { "epoch": 6.01, "learning_rate": 1.8465788355291117e-07, "logits/chosen": -1.7316914796829224, "logits/rejected": -2.0303993225097656, "logps/chosen": -212.73568725585938, "logps/rejected": -319.65460205078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -12.672628402709961, "rewards/margins": 10.536459922790527, "rewards/rejected": -23.209089279174805, "step": 3491 }, { "epoch": 6.01, "learning_rate": 1.8455163620909474e-07, "logits/chosen": -1.8200981616973877, "logits/rejected": -1.5972347259521484, "logps/chosen": -171.5273895263672, "logps/rejected": -310.93634033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.187250137329102, "rewards/margins": 14.744290351867676, "rewards/rejected": -22.93153953552246, "step": 3492 }, { "epoch": 6.01, "learning_rate": 1.8444538886527837e-07, "logits/chosen": -1.916696310043335, "logits/rejected": -1.876778244972229, "logps/chosen": -202.7694854736328, "logps/rejected": -317.3804626464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.069467544555664, "rewards/margins": 10.916131973266602, "rewards/rejected": -21.985599517822266, "step": 3493 }, { "epoch": 6.01, "learning_rate": 1.8433914152146197e-07, "logits/chosen": -1.8880581855773926, "logits/rejected": -1.7213084697723389, "logps/chosen": -159.22792053222656, "logps/rejected": -278.320068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.009450912475586, "rewards/margins": 14.733274459838867, "rewards/rejected": -20.742727279663086, "step": 3494 }, { "epoch": 6.02, "learning_rate": 1.8423289417764554e-07, "logits/chosen": -1.8012112379074097, "logits/rejected": -1.8600373268127441, "logps/chosen": -169.29640197753906, "logps/rejected": -305.5643005371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.402276992797852, "rewards/margins": 14.5591459274292, "rewards/rejected": -21.961423873901367, "step": 3495 }, { "epoch": 6.02, "learning_rate": 1.8412664683382914e-07, "logits/chosen": -1.8904671669006348, "logits/rejected": -1.9960331916809082, "logps/chosen": -127.62020111083984, "logps/rejected": -248.23350524902344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.987610340118408, "rewards/margins": 12.8079833984375, "rewards/rejected": -18.795595169067383, "step": 3496 }, { "epoch": 6.02, "learning_rate": 1.8402039949001274e-07, "logits/chosen": -1.3384917974472046, "logits/rejected": -2.0009236335754395, "logps/chosen": -135.42355346679688, "logps/rejected": -317.8072204589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.279372215270996, "rewards/margins": 14.701745986938477, "rewards/rejected": -20.98111915588379, "step": 3497 }, { "epoch": 6.02, "learning_rate": 1.8391415214619636e-07, "logits/chosen": -1.8430529832839966, "logits/rejected": -1.9897723197937012, "logps/chosen": -147.2863311767578, "logps/rejected": -311.28363037109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.714462757110596, "rewards/margins": 16.43266487121582, "rewards/rejected": -23.14712905883789, "step": 3498 }, { "epoch": 6.02, "learning_rate": 1.8380790480237993e-07, "logits/chosen": -1.6739389896392822, "logits/rejected": -1.9280086755752563, "logps/chosen": -204.12606811523438, "logps/rejected": -343.6843566894531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.623186111450195, "rewards/margins": 13.600271224975586, "rewards/rejected": -25.22345733642578, "step": 3499 }, { "epoch": 6.02, "learning_rate": 1.8370165745856353e-07, "logits/chosen": -1.8676838874816895, "logits/rejected": -1.966949224472046, "logps/chosen": -161.51388549804688, "logps/rejected": -299.60931396484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.653458118438721, "rewards/margins": 13.760196685791016, "rewards/rejected": -20.41365623474121, "step": 3500 }, { "epoch": 6.03, "learning_rate": 1.8359541011474713e-07, "logits/chosen": -1.599461317062378, "logits/rejected": -2.0010921955108643, "logps/chosen": -155.5577392578125, "logps/rejected": -314.6439208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.333889007568359, "rewards/margins": 14.52260971069336, "rewards/rejected": -21.856496810913086, "step": 3501 }, { "epoch": 6.03, "learning_rate": 1.834891627709307e-07, "logits/chosen": -1.6443349123001099, "logits/rejected": -1.7960914373397827, "logps/chosen": -136.4986572265625, "logps/rejected": -287.89825439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.364353656768799, "rewards/margins": 15.514503479003906, "rewards/rejected": -19.878856658935547, "step": 3502 }, { "epoch": 6.03, "learning_rate": 1.833829154271143e-07, "logits/chosen": -1.6963276863098145, "logits/rejected": -2.008742570877075, "logps/chosen": -161.4628448486328, "logps/rejected": -330.92010498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.771549224853516, "rewards/margins": 14.258962631225586, "rewards/rejected": -21.0305118560791, "step": 3503 }, { "epoch": 6.03, "learning_rate": 1.8327666808329793e-07, "logits/chosen": -1.956380844116211, "logits/rejected": -1.9403469562530518, "logps/chosen": -161.8611297607422, "logps/rejected": -290.17901611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.120253562927246, "rewards/margins": 13.889490127563477, "rewards/rejected": -21.00974464416504, "step": 3504 }, { "epoch": 6.03, "learning_rate": 1.831704207394815e-07, "logits/chosen": -1.3790369033813477, "logits/rejected": -1.9285629987716675, "logps/chosen": -135.71717834472656, "logps/rejected": -309.3314208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.862094879150391, "rewards/margins": 15.44792366027832, "rewards/rejected": -22.31001853942871, "step": 3505 }, { "epoch": 6.03, "learning_rate": 1.830641733956651e-07, "logits/chosen": -1.927825927734375, "logits/rejected": -2.1560516357421875, "logps/chosen": -138.78271484375, "logps/rejected": -338.3594970703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.200331211090088, "rewards/margins": 19.060346603393555, "rewards/rejected": -24.260677337646484, "step": 3506 }, { "epoch": 6.04, "learning_rate": 1.829579260518487e-07, "logits/chosen": -2.05808162689209, "logits/rejected": -1.8062081336975098, "logps/chosen": -137.89437866210938, "logps/rejected": -286.615478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.3522467613220215, "rewards/margins": 15.099525451660156, "rewards/rejected": -20.45177459716797, "step": 3507 }, { "epoch": 6.04, "learning_rate": 1.8285167870803227e-07, "logits/chosen": -1.8413888216018677, "logits/rejected": -1.2415069341659546, "logps/chosen": -161.32281494140625, "logps/rejected": -274.5538330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.527029037475586, "rewards/margins": 13.062186241149902, "rewards/rejected": -19.589216232299805, "step": 3508 }, { "epoch": 6.04, "learning_rate": 1.827454313642159e-07, "logits/chosen": -1.930410623550415, "logits/rejected": -1.9553215503692627, "logps/chosen": -145.17340087890625, "logps/rejected": -301.6575012207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.848147392272949, "rewards/margins": 13.635013580322266, "rewards/rejected": -19.48316192626953, "step": 3509 }, { "epoch": 6.04, "learning_rate": 1.826391840203995e-07, "logits/chosen": -1.7669106721878052, "logits/rejected": -2.031514883041382, "logps/chosen": -136.66053771972656, "logps/rejected": -319.1383056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.352434158325195, "rewards/margins": 16.93777084350586, "rewards/rejected": -22.290203094482422, "step": 3510 }, { "epoch": 6.04, "learning_rate": 1.8253293667658307e-07, "logits/chosen": -1.6702678203582764, "logits/rejected": -1.8775900602340698, "logps/chosen": -157.0296630859375, "logps/rejected": -302.25128173828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.503191947937012, "rewards/margins": 13.974749565124512, "rewards/rejected": -21.47793960571289, "step": 3511 }, { "epoch": 6.04, "learning_rate": 1.8242668933276667e-07, "logits/chosen": -1.8881101608276367, "logits/rejected": -1.9133130311965942, "logps/chosen": -141.4768829345703, "logps/rejected": -297.5341491699219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.364040374755859, "rewards/margins": 15.678831100463867, "rewards/rejected": -22.042869567871094, "step": 3512 }, { "epoch": 6.05, "learning_rate": 1.8232044198895027e-07, "logits/chosen": -1.7561640739440918, "logits/rejected": -1.6907342672348022, "logps/chosen": -117.50068664550781, "logps/rejected": -243.73587036132812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.476593971252441, "rewards/margins": 11.85534381866455, "rewards/rejected": -17.331937789916992, "step": 3513 }, { "epoch": 6.05, "learning_rate": 1.8221419464513384e-07, "logits/chosen": -1.886142373085022, "logits/rejected": -2.065883159637451, "logps/chosen": -166.11932373046875, "logps/rejected": -279.2250061035156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.974565505981445, "rewards/margins": 12.95822811126709, "rewards/rejected": -18.93279266357422, "step": 3514 }, { "epoch": 6.05, "learning_rate": 1.8210794730131746e-07, "logits/chosen": -1.8244668245315552, "logits/rejected": -1.6958277225494385, "logps/chosen": -137.47796630859375, "logps/rejected": -229.24961853027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.090785980224609, "rewards/margins": 11.134882926940918, "rewards/rejected": -16.225669860839844, "step": 3515 }, { "epoch": 6.05, "learning_rate": 1.8200169995750106e-07, "logits/chosen": -1.9174691438674927, "logits/rejected": -1.626218318939209, "logps/chosen": -192.089599609375, "logps/rejected": -319.8537902832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.138014793395996, "rewards/margins": 14.347146034240723, "rewards/rejected": -22.48516082763672, "step": 3516 }, { "epoch": 6.05, "learning_rate": 1.8189545261368466e-07, "logits/chosen": -2.121687889099121, "logits/rejected": -1.573392629623413, "logps/chosen": -141.06814575195312, "logps/rejected": -293.82293701171875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.693862438201904, "rewards/margins": 16.554492950439453, "rewards/rejected": -21.248353958129883, "step": 3517 }, { "epoch": 6.06, "learning_rate": 1.8178920526986823e-07, "logits/chosen": -1.8276002407073975, "logits/rejected": -1.6859385967254639, "logps/chosen": -159.08673095703125, "logps/rejected": -328.3586120605469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.731744766235352, "rewards/margins": 16.516971588134766, "rewards/rejected": -24.248716354370117, "step": 3518 }, { "epoch": 6.06, "learning_rate": 1.8168295792605183e-07, "logits/chosen": -1.9727977514266968, "logits/rejected": -1.8452515602111816, "logps/chosen": -128.24217224121094, "logps/rejected": -268.9118957519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.54356575012207, "rewards/margins": 13.888345718383789, "rewards/rejected": -19.43191146850586, "step": 3519 }, { "epoch": 6.06, "learning_rate": 1.8157671058223546e-07, "logits/chosen": -1.859499454498291, "logits/rejected": -2.084683418273926, "logps/chosen": -126.71585845947266, "logps/rejected": -318.4197998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.133458614349365, "rewards/margins": 18.37334442138672, "rewards/rejected": -22.506803512573242, "step": 3520 }, { "epoch": 6.06, "learning_rate": 1.8147046323841903e-07, "logits/chosen": -1.8148540258407593, "logits/rejected": -1.8249361515045166, "logps/chosen": -116.54533386230469, "logps/rejected": -239.85684204101562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.454811096191406, "rewards/margins": 10.924886703491211, "rewards/rejected": -15.379697799682617, "step": 3521 }, { "epoch": 6.06, "learning_rate": 1.8136421589460263e-07, "logits/chosen": -1.8248426914215088, "logits/rejected": -1.6724474430084229, "logps/chosen": -151.92340087890625, "logps/rejected": -269.3431396484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.414088249206543, "rewards/margins": 12.495838165283203, "rewards/rejected": -19.909927368164062, "step": 3522 }, { "epoch": 6.06, "learning_rate": 1.8125796855078623e-07, "logits/chosen": -1.707310676574707, "logits/rejected": -1.7700880765914917, "logps/chosen": -186.82156372070312, "logps/rejected": -320.0859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.534741401672363, "rewards/margins": 14.908625602722168, "rewards/rejected": -23.44336700439453, "step": 3523 }, { "epoch": 6.07, "learning_rate": 1.811517212069698e-07, "logits/chosen": -2.0161499977111816, "logits/rejected": -1.9563965797424316, "logps/chosen": -130.34873962402344, "logps/rejected": -276.1839599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.069519519805908, "rewards/margins": 14.83206558227539, "rewards/rejected": -19.90158462524414, "step": 3524 }, { "epoch": 6.07, "learning_rate": 1.8104547386315343e-07, "logits/chosen": -1.8049579858779907, "logits/rejected": -1.8563710451126099, "logps/chosen": -121.76539611816406, "logps/rejected": -243.04371643066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.749138832092285, "rewards/margins": 11.581087112426758, "rewards/rejected": -16.33022689819336, "step": 3525 }, { "epoch": 6.07, "learning_rate": 1.8093922651933703e-07, "logits/chosen": -1.979271411895752, "logits/rejected": -1.9017620086669922, "logps/chosen": -143.761474609375, "logps/rejected": -302.92657470703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.088404178619385, "rewards/margins": 12.861177444458008, "rewards/rejected": -18.949581146240234, "step": 3526 }, { "epoch": 6.07, "learning_rate": 1.808329791755206e-07, "logits/chosen": -1.897747278213501, "logits/rejected": -1.5581064224243164, "logps/chosen": -156.69740295410156, "logps/rejected": -276.38031005859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.7278642654418945, "rewards/margins": 12.744267463684082, "rewards/rejected": -20.472131729125977, "step": 3527 }, { "epoch": 6.07, "learning_rate": 1.807267318317042e-07, "logits/chosen": -2.0860209465026855, "logits/rejected": -1.950488567352295, "logps/chosen": -159.49639892578125, "logps/rejected": -295.2599182128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.848345756530762, "rewards/margins": 13.553056716918945, "rewards/rejected": -20.40140151977539, "step": 3528 }, { "epoch": 6.07, "learning_rate": 1.806204844878878e-07, "logits/chosen": -2.142190933227539, "logits/rejected": -1.7004762887954712, "logps/chosen": -153.34075927734375, "logps/rejected": -278.8282775878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.788505554199219, "rewards/margins": 13.886991500854492, "rewards/rejected": -19.67549705505371, "step": 3529 }, { "epoch": 6.08, "learning_rate": 1.8051423714407137e-07, "logits/chosen": -1.6513745784759521, "logits/rejected": -1.910380244255066, "logps/chosen": -178.92335510253906, "logps/rejected": -339.57232666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.245081901550293, "rewards/margins": 15.723999977111816, "rewards/rejected": -23.969079971313477, "step": 3530 }, { "epoch": 6.08, "learning_rate": 1.80407989800255e-07, "logits/chosen": -1.8479864597320557, "logits/rejected": -1.7766306400299072, "logps/chosen": -144.996337890625, "logps/rejected": -274.82635498046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.530893802642822, "rewards/margins": 13.090262413024902, "rewards/rejected": -20.62115478515625, "step": 3531 }, { "epoch": 6.08, "learning_rate": 1.803017424564386e-07, "logits/chosen": -1.8493494987487793, "logits/rejected": -1.6991078853607178, "logps/chosen": -125.62543487548828, "logps/rejected": -268.8350830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.9283528327941895, "rewards/margins": 15.658723831176758, "rewards/rejected": -20.58707618713379, "step": 3532 }, { "epoch": 6.08, "learning_rate": 1.8019549511262216e-07, "logits/chosen": -1.8914225101470947, "logits/rejected": -1.86016047000885, "logps/chosen": -146.66134643554688, "logps/rejected": -311.5433349609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.998972415924072, "rewards/margins": 16.465354919433594, "rewards/rejected": -22.464326858520508, "step": 3533 }, { "epoch": 6.08, "learning_rate": 1.8008924776880576e-07, "logits/chosen": -1.9802615642547607, "logits/rejected": -1.8876773118972778, "logps/chosen": -162.94691467285156, "logps/rejected": -310.6195068359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.764040946960449, "rewards/margins": 14.850412368774414, "rewards/rejected": -21.614452362060547, "step": 3534 }, { "epoch": 6.08, "learning_rate": 1.7998300042498936e-07, "logits/chosen": -1.8961973190307617, "logits/rejected": -1.8604028224945068, "logps/chosen": -125.67771911621094, "logps/rejected": -295.77911376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.719706058502197, "rewards/margins": 15.819604873657227, "rewards/rejected": -21.5393123626709, "step": 3535 }, { "epoch": 6.09, "learning_rate": 1.7987675308117296e-07, "logits/chosen": -2.033601760864258, "logits/rejected": -1.868173599243164, "logps/chosen": -192.23440551757812, "logps/rejected": -296.70806884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.917672157287598, "rewards/margins": 15.450934410095215, "rewards/rejected": -21.368606567382812, "step": 3536 }, { "epoch": 6.09, "learning_rate": 1.7977050573735656e-07, "logits/chosen": -2.1167564392089844, "logits/rejected": -1.9277111291885376, "logps/chosen": -155.5382537841797, "logps/rejected": -272.393310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.920613765716553, "rewards/margins": 12.261360168457031, "rewards/rejected": -19.18197250366211, "step": 3537 }, { "epoch": 6.09, "learning_rate": 1.7966425839354016e-07, "logits/chosen": -1.8826658725738525, "logits/rejected": -1.75518798828125, "logps/chosen": -213.09251403808594, "logps/rejected": -355.8689880371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.148146629333496, "rewards/margins": 13.477141380310059, "rewards/rejected": -24.625288009643555, "step": 3538 }, { "epoch": 6.09, "learning_rate": 1.7955801104972376e-07, "logits/chosen": -1.831947922706604, "logits/rejected": -2.0079562664031982, "logps/chosen": -149.624267578125, "logps/rejected": -320.2272644042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.292252540588379, "rewards/margins": 15.447835922241211, "rewards/rejected": -22.740087509155273, "step": 3539 }, { "epoch": 6.09, "learning_rate": 1.7945176370590733e-07, "logits/chosen": -2.0016493797302246, "logits/rejected": -1.7224057912826538, "logps/chosen": -120.83306884765625, "logps/rejected": -275.9959411621094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.266033172607422, "rewards/margins": 15.691615104675293, "rewards/rejected": -19.95764923095703, "step": 3540 }, { "epoch": 6.09, "learning_rate": 1.7934551636209096e-07, "logits/chosen": -1.7275960445404053, "logits/rejected": -1.9118962287902832, "logps/chosen": -149.7021026611328, "logps/rejected": -300.4211120605469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.11973762512207, "rewards/margins": 14.309571266174316, "rewards/rejected": -20.429309844970703, "step": 3541 }, { "epoch": 6.1, "learning_rate": 1.7923926901827455e-07, "logits/chosen": -1.8075716495513916, "logits/rejected": -1.7698434591293335, "logps/chosen": -134.2022705078125, "logps/rejected": -257.1697998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.3408427238464355, "rewards/margins": 12.278848648071289, "rewards/rejected": -18.619691848754883, "step": 3542 }, { "epoch": 6.1, "learning_rate": 1.7913302167445813e-07, "logits/chosen": -1.9544124603271484, "logits/rejected": -1.7340707778930664, "logps/chosen": -149.22398376464844, "logps/rejected": -280.18548583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.886190414428711, "rewards/margins": 14.201719284057617, "rewards/rejected": -20.087909698486328, "step": 3543 }, { "epoch": 6.1, "learning_rate": 1.7902677433064173e-07, "logits/chosen": -1.9020808935165405, "logits/rejected": -2.014883279800415, "logps/chosen": -126.07243347167969, "logps/rejected": -342.9026794433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.1196136474609375, "rewards/margins": 19.991443634033203, "rewards/rejected": -25.11105728149414, "step": 3544 }, { "epoch": 6.1, "learning_rate": 1.7892052698682532e-07, "logits/chosen": -1.7822972536087036, "logits/rejected": -1.7325507402420044, "logps/chosen": -191.1316375732422, "logps/rejected": -320.81011962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.04995059967041, "rewards/margins": 13.192770004272461, "rewards/rejected": -22.242719650268555, "step": 3545 }, { "epoch": 6.1, "learning_rate": 1.788142796430089e-07, "logits/chosen": -1.6422309875488281, "logits/rejected": -1.9279673099517822, "logps/chosen": -144.9483642578125, "logps/rejected": -310.2499694824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.579883575439453, "rewards/margins": 14.133415222167969, "rewards/rejected": -20.713298797607422, "step": 3546 }, { "epoch": 6.1, "learning_rate": 1.7870803229919252e-07, "logits/chosen": -1.6952447891235352, "logits/rejected": -1.8503940105438232, "logps/chosen": -144.65890502929688, "logps/rejected": -279.0031433105469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.482744216918945, "rewards/margins": 12.72252368927002, "rewards/rejected": -20.20526885986328, "step": 3547 }, { "epoch": 6.11, "learning_rate": 1.7860178495537612e-07, "logits/chosen": -1.9962859153747559, "logits/rejected": -1.8583916425704956, "logps/chosen": -148.60894775390625, "logps/rejected": -275.64404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.361559867858887, "rewards/margins": 12.256416320800781, "rewards/rejected": -19.617977142333984, "step": 3548 }, { "epoch": 6.11, "learning_rate": 1.784955376115597e-07, "logits/chosen": -1.8300611972808838, "logits/rejected": -1.5734233856201172, "logps/chosen": -202.8037567138672, "logps/rejected": -332.62200927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.66775131225586, "rewards/margins": 14.672271728515625, "rewards/rejected": -25.340023040771484, "step": 3549 }, { "epoch": 6.11, "learning_rate": 1.783892902677433e-07, "logits/chosen": -2.011870861053467, "logits/rejected": -1.7483241558074951, "logps/chosen": -175.271484375, "logps/rejected": -325.81005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.06915283203125, "rewards/margins": 16.67251968383789, "rewards/rejected": -24.741674423217773, "step": 3550 }, { "epoch": 6.11, "learning_rate": 1.782830429239269e-07, "logits/chosen": -1.7348804473876953, "logits/rejected": -1.9921841621398926, "logps/chosen": -140.79039001464844, "logps/rejected": -282.4957275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.013228416442871, "rewards/margins": 14.146220207214355, "rewards/rejected": -20.159446716308594, "step": 3551 }, { "epoch": 6.11, "learning_rate": 1.781767955801105e-07, "logits/chosen": -1.993567943572998, "logits/rejected": -1.5709795951843262, "logps/chosen": -156.35931396484375, "logps/rejected": -284.0205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.499836444854736, "rewards/margins": 13.604440689086914, "rewards/rejected": -20.104276657104492, "step": 3552 }, { "epoch": 6.12, "learning_rate": 1.780705482362941e-07, "logits/chosen": -1.8037662506103516, "logits/rejected": -1.9903572797775269, "logps/chosen": -120.24080657958984, "logps/rejected": -267.0952453613281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.625569820404053, "rewards/margins": 14.360834121704102, "rewards/rejected": -19.986404418945312, "step": 3553 }, { "epoch": 6.12, "learning_rate": 1.779643008924777e-07, "logits/chosen": -1.7938344478607178, "logits/rejected": -1.8705835342407227, "logps/chosen": -167.01138305664062, "logps/rejected": -326.4114990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.837747573852539, "rewards/margins": 15.339815139770508, "rewards/rejected": -22.177562713623047, "step": 3554 }, { "epoch": 6.12, "learning_rate": 1.7785805354866126e-07, "logits/chosen": -1.7039620876312256, "logits/rejected": -1.7669185400009155, "logps/chosen": -107.0674057006836, "logps/rejected": -266.9783020019531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.288652420043945, "rewards/margins": 16.080032348632812, "rewards/rejected": -20.368684768676758, "step": 3555 }, { "epoch": 6.12, "learning_rate": 1.7775180620484486e-07, "logits/chosen": -1.2754840850830078, "logits/rejected": -1.9468202590942383, "logps/chosen": -152.29296875, "logps/rejected": -363.74371337890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.749612808227539, "rewards/margins": 17.53601837158203, "rewards/rejected": -24.28563117980957, "step": 3556 }, { "epoch": 6.12, "learning_rate": 1.7764555886102846e-07, "logits/chosen": -1.9446635246276855, "logits/rejected": -1.5506747961044312, "logps/chosen": -147.81704711914062, "logps/rejected": -319.950927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.54945182800293, "rewards/margins": 18.43061637878418, "rewards/rejected": -23.98006820678711, "step": 3557 }, { "epoch": 6.12, "learning_rate": 1.7753931151721206e-07, "logits/chosen": -1.6057798862457275, "logits/rejected": -1.8447601795196533, "logps/chosen": -147.4608154296875, "logps/rejected": -274.0770568847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.135092735290527, "rewards/margins": 11.956512451171875, "rewards/rejected": -19.09160614013672, "step": 3558 }, { "epoch": 6.13, "learning_rate": 1.7743306417339566e-07, "logits/chosen": -2.01587176322937, "logits/rejected": -1.673628568649292, "logps/chosen": -125.52777099609375, "logps/rejected": -226.30908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4303297996521, "rewards/margins": 11.290632247924805, "rewards/rejected": -16.720964431762695, "step": 3559 }, { "epoch": 6.13, "learning_rate": 1.7732681682957926e-07, "logits/chosen": -1.6921029090881348, "logits/rejected": -1.6017780303955078, "logps/chosen": -186.68951416015625, "logps/rejected": -308.3343505859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.959728240966797, "rewards/margins": 12.316770553588867, "rewards/rejected": -22.276500701904297, "step": 3560 }, { "epoch": 6.13, "learning_rate": 1.7722056948576285e-07, "logits/chosen": -1.959838628768921, "logits/rejected": -1.8314454555511475, "logps/chosen": -164.81008911132812, "logps/rejected": -291.72039794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.490603446960449, "rewards/margins": 13.361299514770508, "rewards/rejected": -20.851903915405273, "step": 3561 }, { "epoch": 6.13, "learning_rate": 1.7711432214194643e-07, "logits/chosen": -1.7145546674728394, "logits/rejected": -1.8283129930496216, "logps/chosen": -152.86041259765625, "logps/rejected": -292.35296630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6444244384765625, "rewards/margins": 13.697855949401855, "rewards/rejected": -20.342281341552734, "step": 3562 }, { "epoch": 6.13, "learning_rate": 1.7700807479813005e-07, "logits/chosen": -1.9957513809204102, "logits/rejected": -1.8953208923339844, "logps/chosen": -127.42144775390625, "logps/rejected": -285.998291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.902853012084961, "rewards/margins": 16.126365661621094, "rewards/rejected": -21.029220581054688, "step": 3563 }, { "epoch": 6.13, "learning_rate": 1.7690182745431365e-07, "logits/chosen": -1.8026237487792969, "logits/rejected": -2.0609130859375, "logps/chosen": -162.7066650390625, "logps/rejected": -301.9599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.921054363250732, "rewards/margins": 13.736143112182617, "rewards/rejected": -21.657197952270508, "step": 3564 }, { "epoch": 6.14, "learning_rate": 1.7679558011049722e-07, "logits/chosen": -1.838035225868225, "logits/rejected": -1.7967000007629395, "logps/chosen": -127.59864807128906, "logps/rejected": -279.25177001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.579345703125, "rewards/margins": 14.474668502807617, "rewards/rejected": -19.054014205932617, "step": 3565 }, { "epoch": 6.14, "learning_rate": 1.7668933276668082e-07, "logits/chosen": -1.664224624633789, "logits/rejected": -2.150878429412842, "logps/chosen": -169.1292266845703, "logps/rejected": -319.9992980957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.710917472839355, "rewards/margins": 12.500088691711426, "rewards/rejected": -22.21100616455078, "step": 3566 }, { "epoch": 6.14, "learning_rate": 1.7658308542286442e-07, "logits/chosen": -1.7389030456542969, "logits/rejected": -1.7823991775512695, "logps/chosen": -201.863525390625, "logps/rejected": -377.33203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.036641120910645, "rewards/margins": 16.789440155029297, "rewards/rejected": -26.826082229614258, "step": 3567 }, { "epoch": 6.14, "learning_rate": 1.7647683807904802e-07, "logits/chosen": -1.8729772567749023, "logits/rejected": -1.9140630960464478, "logps/chosen": -111.46845245361328, "logps/rejected": -305.1249694824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.753472805023193, "rewards/margins": 17.477338790893555, "rewards/rejected": -22.230812072753906, "step": 3568 }, { "epoch": 6.14, "learning_rate": 1.7637059073523162e-07, "logits/chosen": -1.6800180673599243, "logits/rejected": -1.522606611251831, "logps/chosen": -145.46090698242188, "logps/rejected": -324.42022705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.631738662719727, "rewards/margins": 17.385143280029297, "rewards/rejected": -23.016883850097656, "step": 3569 }, { "epoch": 6.14, "learning_rate": 1.7626434339141522e-07, "logits/chosen": -1.7793867588043213, "logits/rejected": -1.7741578817367554, "logps/chosen": -154.35202026367188, "logps/rejected": -287.4495849609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.431857109069824, "rewards/margins": 13.21098518371582, "rewards/rejected": -21.64284324645996, "step": 3570 }, { "epoch": 6.15, "learning_rate": 1.761580960475988e-07, "logits/chosen": -1.8365845680236816, "logits/rejected": -1.9337589740753174, "logps/chosen": -178.287353515625, "logps/rejected": -327.816162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.614789009094238, "rewards/margins": 14.795013427734375, "rewards/rejected": -22.40980339050293, "step": 3571 }, { "epoch": 6.15, "learning_rate": 1.760518487037824e-07, "logits/chosen": -1.8562791347503662, "logits/rejected": -1.539615273475647, "logps/chosen": -160.762451171875, "logps/rejected": -312.80194091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.980952262878418, "rewards/margins": 17.797842025756836, "rewards/rejected": -22.778793334960938, "step": 3572 }, { "epoch": 6.15, "learning_rate": 1.75945601359966e-07, "logits/chosen": -1.9428658485412598, "logits/rejected": -1.7121058702468872, "logps/chosen": -180.22152709960938, "logps/rejected": -319.6671142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.425853729248047, "rewards/margins": 14.614640235900879, "rewards/rejected": -23.040494918823242, "step": 3573 }, { "epoch": 6.15, "learning_rate": 1.758393540161496e-07, "logits/chosen": -1.947952389717102, "logits/rejected": -1.8091678619384766, "logps/chosen": -162.6780548095703, "logps/rejected": -290.896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.900793075561523, "rewards/margins": 12.83117961883545, "rewards/rejected": -20.73197364807129, "step": 3574 }, { "epoch": 6.15, "learning_rate": 1.7573310667233319e-07, "logits/chosen": -1.623626708984375, "logits/rejected": -2.045788049697876, "logps/chosen": -156.33013916015625, "logps/rejected": -347.23876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.132033348083496, "rewards/margins": 18.749998092651367, "rewards/rejected": -25.882030487060547, "step": 3575 }, { "epoch": 6.15, "learning_rate": 1.7562685932851679e-07, "logits/chosen": -1.2815148830413818, "logits/rejected": -1.8932111263275146, "logps/chosen": -154.38385009765625, "logps/rejected": -308.13995361328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.700139999389648, "rewards/margins": 13.19234848022461, "rewards/rejected": -20.892488479614258, "step": 3576 }, { "epoch": 6.16, "learning_rate": 1.7552061198470036e-07, "logits/chosen": -1.8924260139465332, "logits/rejected": -1.611717939376831, "logps/chosen": -191.00067138671875, "logps/rejected": -338.95050048828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.449397087097168, "rewards/margins": 15.176673889160156, "rewards/rejected": -24.62607192993164, "step": 3577 }, { "epoch": 6.16, "learning_rate": 1.7541436464088396e-07, "logits/chosen": -1.7486306428909302, "logits/rejected": -1.779695749282837, "logps/chosen": -150.76864624023438, "logps/rejected": -312.68157958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.89179801940918, "rewards/margins": 15.249532699584961, "rewards/rejected": -21.141328811645508, "step": 3578 }, { "epoch": 6.16, "learning_rate": 1.7530811729706758e-07, "logits/chosen": -1.7263758182525635, "logits/rejected": -1.9696576595306396, "logps/chosen": -126.47409057617188, "logps/rejected": -282.15594482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.767651557922363, "rewards/margins": 14.080314636230469, "rewards/rejected": -18.847965240478516, "step": 3579 }, { "epoch": 6.16, "learning_rate": 1.7520186995325118e-07, "logits/chosen": -1.7603704929351807, "logits/rejected": -1.9285364151000977, "logps/chosen": -173.67832946777344, "logps/rejected": -333.67401123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.613486289978027, "rewards/margins": 15.790321350097656, "rewards/rejected": -24.40380859375, "step": 3580 }, { "epoch": 6.16, "learning_rate": 1.7509562260943475e-07, "logits/chosen": -1.5812345743179321, "logits/rejected": -1.8392115831375122, "logps/chosen": -145.39942932128906, "logps/rejected": -321.2510070800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.028414726257324, "rewards/margins": 16.757076263427734, "rewards/rejected": -23.785491943359375, "step": 3581 }, { "epoch": 6.17, "learning_rate": 1.7498937526561835e-07, "logits/chosen": -1.9826874732971191, "logits/rejected": -1.7227879762649536, "logps/chosen": -165.56419372558594, "logps/rejected": -306.3433532714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.888317108154297, "rewards/margins": 14.63582992553711, "rewards/rejected": -21.52414894104004, "step": 3582 }, { "epoch": 6.17, "learning_rate": 1.7488312792180195e-07, "logits/chosen": -1.5896494388580322, "logits/rejected": -1.792246699333191, "logps/chosen": -158.44638061523438, "logps/rejected": -328.4355163574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.349897384643555, "rewards/margins": 14.508886337280273, "rewards/rejected": -22.858783721923828, "step": 3583 }, { "epoch": 6.17, "learning_rate": 1.7477688057798552e-07, "logits/chosen": -1.7684261798858643, "logits/rejected": -1.9517300128936768, "logps/chosen": -163.60406494140625, "logps/rejected": -289.1882019042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.677599906921387, "rewards/margins": 11.364435195922852, "rewards/rejected": -19.042036056518555, "step": 3584 }, { "epoch": 6.17, "learning_rate": 1.7467063323416915e-07, "logits/chosen": -1.59517240524292, "logits/rejected": -1.9908504486083984, "logps/chosen": -150.23812866210938, "logps/rejected": -328.67822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.423498630523682, "rewards/margins": 15.419167518615723, "rewards/rejected": -22.842666625976562, "step": 3585 }, { "epoch": 6.17, "learning_rate": 1.7456438589035275e-07, "logits/chosen": -1.7145252227783203, "logits/rejected": -1.9264109134674072, "logps/chosen": -175.1247100830078, "logps/rejected": -314.90234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.17625617980957, "rewards/margins": 14.044459342956543, "rewards/rejected": -22.220714569091797, "step": 3586 }, { "epoch": 6.17, "learning_rate": 1.7445813854653632e-07, "logits/chosen": -1.5800378322601318, "logits/rejected": -2.0106871128082275, "logps/chosen": -150.681396484375, "logps/rejected": -303.07220458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.945987701416016, "rewards/margins": 14.826862335205078, "rewards/rejected": -21.772850036621094, "step": 3587 }, { "epoch": 6.18, "learning_rate": 1.7435189120271992e-07, "logits/chosen": -1.5664560794830322, "logits/rejected": -2.095823287963867, "logps/chosen": -100.62549591064453, "logps/rejected": -301.251220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.025116443634033, "rewards/margins": 17.32794189453125, "rewards/rejected": -21.353057861328125, "step": 3588 }, { "epoch": 6.18, "learning_rate": 1.7424564385890352e-07, "logits/chosen": -1.7851269245147705, "logits/rejected": -1.7487399578094482, "logps/chosen": -169.78781127929688, "logps/rejected": -299.8822021484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.74729061126709, "rewards/margins": 13.03745174407959, "rewards/rejected": -20.78474235534668, "step": 3589 }, { "epoch": 6.18, "learning_rate": 1.7413939651508712e-07, "logits/chosen": -1.6366245746612549, "logits/rejected": -1.987788438796997, "logps/chosen": -126.92799377441406, "logps/rejected": -312.6800537109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.713776588439941, "rewards/margins": 18.132617950439453, "rewards/rejected": -22.84639549255371, "step": 3590 }, { "epoch": 6.18, "learning_rate": 1.7403314917127072e-07, "logits/chosen": -1.7713031768798828, "logits/rejected": -1.615432858467102, "logps/chosen": -154.86541748046875, "logps/rejected": -277.7679138183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.359339714050293, "rewards/margins": 14.051820755004883, "rewards/rejected": -20.41115951538086, "step": 3591 }, { "epoch": 6.18, "learning_rate": 1.7392690182745431e-07, "logits/chosen": -2.0300464630126953, "logits/rejected": -1.9038586616516113, "logps/chosen": -133.66571044921875, "logps/rejected": -287.0254821777344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.614308834075928, "rewards/margins": 15.117963790893555, "rewards/rejected": -19.732271194458008, "step": 3592 }, { "epoch": 6.18, "learning_rate": 1.738206544836379e-07, "logits/chosen": -2.01762056350708, "logits/rejected": -1.6768572330474854, "logps/chosen": -131.96981811523438, "logps/rejected": -259.4749450683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.7590484619140625, "rewards/margins": 13.876380920410156, "rewards/rejected": -20.63542938232422, "step": 3593 }, { "epoch": 6.19, "learning_rate": 1.7371440713982149e-07, "logits/chosen": -1.697143793106079, "logits/rejected": -1.997199535369873, "logps/chosen": -140.30027770996094, "logps/rejected": -332.1519470214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.13795280456543, "rewards/margins": 16.916791915893555, "rewards/rejected": -23.054744720458984, "step": 3594 }, { "epoch": 6.19, "learning_rate": 1.736081597960051e-07, "logits/chosen": -1.909367322921753, "logits/rejected": -1.881874680519104, "logps/chosen": -161.12750244140625, "logps/rejected": -316.4680480957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.591542720794678, "rewards/margins": 15.711996078491211, "rewards/rejected": -22.303537368774414, "step": 3595 }, { "epoch": 6.19, "learning_rate": 1.7350191245218868e-07, "logits/chosen": -2.118074417114258, "logits/rejected": -2.0681474208831787, "logps/chosen": -168.56077575683594, "logps/rejected": -319.13592529296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.966730117797852, "rewards/margins": 14.61681079864502, "rewards/rejected": -21.583541870117188, "step": 3596 }, { "epoch": 6.19, "learning_rate": 1.7339566510837228e-07, "logits/chosen": -1.6810219287872314, "logits/rejected": -1.8270646333694458, "logps/chosen": -179.65386962890625, "logps/rejected": -312.0948791503906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.385721206665039, "rewards/margins": 13.50295639038086, "rewards/rejected": -21.8886775970459, "step": 3597 }, { "epoch": 6.19, "learning_rate": 1.7328941776455588e-07, "logits/chosen": -1.7264878749847412, "logits/rejected": -1.8841707706451416, "logps/chosen": -118.2472915649414, "logps/rejected": -298.410400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.705548286437988, "rewards/margins": 17.04859733581543, "rewards/rejected": -21.754146575927734, "step": 3598 }, { "epoch": 6.19, "learning_rate": 1.7318317042073945e-07, "logits/chosen": -2.0248258113861084, "logits/rejected": -1.7078640460968018, "logps/chosen": -185.91787719726562, "logps/rejected": -312.80767822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.579425811767578, "rewards/margins": 13.151988983154297, "rewards/rejected": -21.731414794921875, "step": 3599 }, { "epoch": 6.2, "learning_rate": 1.7307692307692305e-07, "logits/chosen": -1.9905496835708618, "logits/rejected": -1.9086041450500488, "logps/chosen": -169.38824462890625, "logps/rejected": -308.475830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.005698204040527, "rewards/margins": 15.737041473388672, "rewards/rejected": -22.742740631103516, "step": 3600 }, { "epoch": 6.2, "learning_rate": 1.7297067573310668e-07, "logits/chosen": -1.7997478246688843, "logits/rejected": -1.8082153797149658, "logps/chosen": -160.35581970214844, "logps/rejected": -299.3276062011719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.80876350402832, "rewards/margins": 13.846614837646484, "rewards/rejected": -22.655378341674805, "step": 3601 }, { "epoch": 6.2, "learning_rate": 1.7286442838929028e-07, "logits/chosen": -1.7121623754501343, "logits/rejected": -2.155510187149048, "logps/chosen": -136.09957885742188, "logps/rejected": -298.5418395996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.591695785522461, "rewards/margins": 14.000774383544922, "rewards/rejected": -20.592470169067383, "step": 3602 }, { "epoch": 6.2, "learning_rate": 1.7275818104547385e-07, "logits/chosen": -1.5144246816635132, "logits/rejected": -2.036874294281006, "logps/chosen": -166.8876495361328, "logps/rejected": -353.0946350097656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.028754234313965, "rewards/margins": 16.26165771484375, "rewards/rejected": -24.2904109954834, "step": 3603 }, { "epoch": 6.2, "learning_rate": 1.7265193370165745e-07, "logits/chosen": -1.8215276002883911, "logits/rejected": -1.8124009370803833, "logps/chosen": -171.22909545898438, "logps/rejected": -330.6578369140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.080458641052246, "rewards/margins": 15.231779098510742, "rewards/rejected": -24.312236785888672, "step": 3604 }, { "epoch": 6.2, "learning_rate": 1.7254568635784105e-07, "logits/chosen": -2.0308945178985596, "logits/rejected": -1.650516152381897, "logps/chosen": -150.90487670898438, "logps/rejected": -294.6698913574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.8534393310546875, "rewards/margins": 15.748958587646484, "rewards/rejected": -21.602397918701172, "step": 3605 }, { "epoch": 6.21, "learning_rate": 1.7243943901402465e-07, "logits/chosen": -1.7450120449066162, "logits/rejected": -1.8312654495239258, "logps/chosen": -184.0142364501953, "logps/rejected": -288.3387451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.922614097595215, "rewards/margins": 11.429699897766113, "rewards/rejected": -20.352313995361328, "step": 3606 }, { "epoch": 6.21, "learning_rate": 1.7233319167020825e-07, "logits/chosen": -1.7040603160858154, "logits/rejected": -1.5216584205627441, "logps/chosen": -109.56456756591797, "logps/rejected": -205.72686767578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.624959945678711, "rewards/margins": 10.953615188598633, "rewards/rejected": -14.578575134277344, "step": 3607 }, { "epoch": 6.21, "learning_rate": 1.7222694432639184e-07, "logits/chosen": -1.6111891269683838, "logits/rejected": -1.977855920791626, "logps/chosen": -186.03961181640625, "logps/rejected": -315.7349853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.186474800109863, "rewards/margins": 11.212847709655762, "rewards/rejected": -22.399322509765625, "step": 3608 }, { "epoch": 6.21, "learning_rate": 1.7212069698257542e-07, "logits/chosen": -1.4336092472076416, "logits/rejected": -1.948815107345581, "logps/chosen": -109.46397399902344, "logps/rejected": -240.2144317626953, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.687078475952148, "rewards/margins": 12.40768814086914, "rewards/rejected": -17.09476661682129, "step": 3609 }, { "epoch": 6.21, "learning_rate": 1.7201444963875902e-07, "logits/chosen": -1.7059288024902344, "logits/rejected": -2.0503556728363037, "logps/chosen": -201.5580596923828, "logps/rejected": -368.8213806152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.630678176879883, "rewards/margins": 15.733144760131836, "rewards/rejected": -26.36382293701172, "step": 3610 }, { "epoch": 6.22, "learning_rate": 1.7190820229494264e-07, "logits/chosen": -1.7540779113769531, "logits/rejected": -1.9745534658432007, "logps/chosen": -127.23918151855469, "logps/rejected": -300.51214599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.286025524139404, "rewards/margins": 16.38741683959961, "rewards/rejected": -21.673442840576172, "step": 3611 }, { "epoch": 6.22, "learning_rate": 1.718019549511262e-07, "logits/chosen": -1.8174538612365723, "logits/rejected": -1.9402287006378174, "logps/chosen": -124.08390045166016, "logps/rejected": -292.3865051269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.602487564086914, "rewards/margins": 16.88131332397461, "rewards/rejected": -21.483802795410156, "step": 3612 }, { "epoch": 6.22, "learning_rate": 1.716957076073098e-07, "logits/chosen": -1.9894933700561523, "logits/rejected": -1.8781083822250366, "logps/chosen": -150.218505859375, "logps/rejected": -261.7203674316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.344194412231445, "rewards/margins": 11.973529815673828, "rewards/rejected": -18.31772232055664, "step": 3613 }, { "epoch": 6.22, "learning_rate": 1.715894602634934e-07, "logits/chosen": -1.7251633405685425, "logits/rejected": -2.131855010986328, "logps/chosen": -133.166015625, "logps/rejected": -295.7925109863281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.488278865814209, "rewards/margins": 15.086634635925293, "rewards/rejected": -20.574913024902344, "step": 3614 }, { "epoch": 6.22, "learning_rate": 1.7148321291967698e-07, "logits/chosen": -1.8932678699493408, "logits/rejected": -1.8629841804504395, "logps/chosen": -118.12966918945312, "logps/rejected": -269.3159484863281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.1230926513671875, "rewards/margins": 15.128832817077637, "rewards/rejected": -19.25192642211914, "step": 3615 }, { "epoch": 6.22, "learning_rate": 1.7137696557586058e-07, "logits/chosen": -1.6624274253845215, "logits/rejected": -1.8628274202346802, "logps/chosen": -139.3284912109375, "logps/rejected": -297.931884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.614804267883301, "rewards/margins": 15.425813674926758, "rewards/rejected": -22.040618896484375, "step": 3616 }, { "epoch": 6.23, "learning_rate": 1.712707182320442e-07, "logits/chosen": -1.9380838871002197, "logits/rejected": -1.9279217720031738, "logps/chosen": -151.4057159423828, "logps/rejected": -281.11517333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.148541450500488, "rewards/margins": 14.120195388793945, "rewards/rejected": -19.268735885620117, "step": 3617 }, { "epoch": 6.23, "learning_rate": 1.7116447088822778e-07, "logits/chosen": -1.9235527515411377, "logits/rejected": -1.6264485120773315, "logps/chosen": -176.48081970214844, "logps/rejected": -307.786376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.477836608886719, "rewards/margins": 16.073421478271484, "rewards/rejected": -23.551258087158203, "step": 3618 }, { "epoch": 6.23, "learning_rate": 1.7105822354441138e-07, "logits/chosen": -1.7119207382202148, "logits/rejected": -1.9927353858947754, "logps/chosen": -125.3554458618164, "logps/rejected": -314.20843505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.045372486114502, "rewards/margins": 18.351951599121094, "rewards/rejected": -23.397323608398438, "step": 3619 }, { "epoch": 6.23, "learning_rate": 1.7095197620059498e-07, "logits/chosen": -1.880993127822876, "logits/rejected": -1.8064625263214111, "logps/chosen": -160.7458038330078, "logps/rejected": -281.93218994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.379951477050781, "rewards/margins": 12.915756225585938, "rewards/rejected": -20.29570770263672, "step": 3620 }, { "epoch": 6.23, "learning_rate": 1.7084572885677855e-07, "logits/chosen": -2.037307024002075, "logits/rejected": -1.895000696182251, "logps/chosen": -129.7042236328125, "logps/rejected": -293.9335021972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.250675201416016, "rewards/margins": 17.33824348449707, "rewards/rejected": -21.58892059326172, "step": 3621 }, { "epoch": 6.23, "learning_rate": 1.7073948151296218e-07, "logits/chosen": -2.1455953121185303, "logits/rejected": -1.782432198524475, "logps/chosen": -151.14642333984375, "logps/rejected": -290.8287048339844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.671718597412109, "rewards/margins": 15.25450325012207, "rewards/rejected": -21.92622184753418, "step": 3622 }, { "epoch": 6.24, "learning_rate": 1.7063323416914577e-07, "logits/chosen": -1.981567621231079, "logits/rejected": -1.6961021423339844, "logps/chosen": -169.791748046875, "logps/rejected": -299.3260498046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.46115779876709, "rewards/margins": 13.967451095581055, "rewards/rejected": -20.42860984802246, "step": 3623 }, { "epoch": 6.24, "learning_rate": 1.7052698682532937e-07, "logits/chosen": -1.8680369853973389, "logits/rejected": -1.6685919761657715, "logps/chosen": -184.93441772460938, "logps/rejected": -327.3998107910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.337116718292236, "rewards/margins": 16.215564727783203, "rewards/rejected": -23.55268096923828, "step": 3624 }, { "epoch": 6.24, "learning_rate": 1.7042073948151295e-07, "logits/chosen": -2.0039310455322266, "logits/rejected": -1.70183265209198, "logps/chosen": -196.2467041015625, "logps/rejected": -319.01318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.449522018432617, "rewards/margins": 14.569196701049805, "rewards/rejected": -24.018718719482422, "step": 3625 }, { "epoch": 6.24, "learning_rate": 1.7031449213769654e-07, "logits/chosen": -1.7430274486541748, "logits/rejected": -2.1112821102142334, "logps/chosen": -109.6280288696289, "logps/rejected": -325.75103759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.998013734817505, "rewards/margins": 20.464324951171875, "rewards/rejected": -24.462339401245117, "step": 3626 }, { "epoch": 6.24, "learning_rate": 1.7020824479388014e-07, "logits/chosen": -1.4558367729187012, "logits/rejected": -2.1215946674346924, "logps/chosen": -120.0242919921875, "logps/rejected": -341.03839111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.366557598114014, "rewards/margins": 20.116451263427734, "rewards/rejected": -24.483009338378906, "step": 3627 }, { "epoch": 6.24, "learning_rate": 1.7010199745006374e-07, "logits/chosen": -1.9146249294281006, "logits/rejected": -2.23531174659729, "logps/chosen": -129.85826110839844, "logps/rejected": -307.2867431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.414552688598633, "rewards/margins": 17.2064208984375, "rewards/rejected": -21.620973587036133, "step": 3628 }, { "epoch": 6.25, "learning_rate": 1.6999575010624734e-07, "logits/chosen": -1.6776623725891113, "logits/rejected": -1.9567145109176636, "logps/chosen": -170.08453369140625, "logps/rejected": -324.3038635253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.883708000183105, "rewards/margins": 14.243175506591797, "rewards/rejected": -23.12688446044922, "step": 3629 }, { "epoch": 6.25, "learning_rate": 1.6988950276243094e-07, "logits/chosen": -1.638914942741394, "logits/rejected": -1.7994002103805542, "logps/chosen": -139.860107421875, "logps/rejected": -331.97161865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.643074035644531, "rewards/margins": 17.733001708984375, "rewards/rejected": -24.37607765197754, "step": 3630 }, { "epoch": 6.25, "learning_rate": 1.697832554186145e-07, "logits/chosen": -2.0862350463867188, "logits/rejected": -1.908483862876892, "logps/chosen": -161.4282684326172, "logps/rejected": -299.17498779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.6890339851379395, "rewards/margins": 13.445894241333008, "rewards/rejected": -19.13492774963379, "step": 3631 }, { "epoch": 6.25, "learning_rate": 1.696770080747981e-07, "logits/chosen": -1.9421299695968628, "logits/rejected": -1.6469736099243164, "logps/chosen": -93.32112884521484, "logps/rejected": -221.1745147705078, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.8642187118530273, "rewards/margins": 12.905599594116211, "rewards/rejected": -15.769820213317871, "step": 3632 }, { "epoch": 6.25, "learning_rate": 1.6957076073098174e-07, "logits/chosen": -1.7610344886779785, "logits/rejected": -2.045170783996582, "logps/chosen": -160.5462646484375, "logps/rejected": -314.1334228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.837680816650391, "rewards/margins": 16.06066131591797, "rewards/rejected": -21.89834213256836, "step": 3633 }, { "epoch": 6.25, "learning_rate": 1.694645133871653e-07, "logits/chosen": -1.746948480606079, "logits/rejected": -1.9355273246765137, "logps/chosen": -143.1880340576172, "logps/rejected": -306.3843688964844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.997862815856934, "rewards/margins": 15.514668464660645, "rewards/rejected": -22.51253318786621, "step": 3634 }, { "epoch": 6.26, "learning_rate": 1.693582660433489e-07, "logits/chosen": -1.9134567975997925, "logits/rejected": -2.0169289112091064, "logps/chosen": -195.8520050048828, "logps/rejected": -366.0119323730469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.849014282226562, "rewards/margins": 16.778480529785156, "rewards/rejected": -28.62749481201172, "step": 3635 }, { "epoch": 6.26, "learning_rate": 1.692520186995325e-07, "logits/chosen": -2.0410308837890625, "logits/rejected": -1.8286418914794922, "logps/chosen": -204.17588806152344, "logps/rejected": -306.9975280761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.18952465057373, "rewards/margins": 11.983013153076172, "rewards/rejected": -22.17253875732422, "step": 3636 }, { "epoch": 6.26, "learning_rate": 1.6914577135571608e-07, "logits/chosen": -2.0795183181762695, "logits/rejected": -1.660689353942871, "logps/chosen": -157.62387084960938, "logps/rejected": -298.406005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.986738681793213, "rewards/margins": 15.408697128295898, "rewards/rejected": -21.395435333251953, "step": 3637 }, { "epoch": 6.26, "learning_rate": 1.690395240118997e-07, "logits/chosen": -1.923008680343628, "logits/rejected": -1.9624080657958984, "logps/chosen": -153.22332763671875, "logps/rejected": -275.73077392578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.44461727142334, "rewards/margins": 11.059529304504395, "rewards/rejected": -18.504146575927734, "step": 3638 }, { "epoch": 6.26, "learning_rate": 1.689332766680833e-07, "logits/chosen": -1.705514907836914, "logits/rejected": -1.5700656175613403, "logps/chosen": -148.70294189453125, "logps/rejected": -280.020751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.670302391052246, "rewards/margins": 13.074344635009766, "rewards/rejected": -19.744646072387695, "step": 3639 }, { "epoch": 6.27, "learning_rate": 1.6882702932426688e-07, "logits/chosen": -1.9360594749450684, "logits/rejected": -1.6932591199874878, "logps/chosen": -154.95297241210938, "logps/rejected": -289.10540771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.94011926651001, "rewards/margins": 14.166933059692383, "rewards/rejected": -20.107051849365234, "step": 3640 }, { "epoch": 6.27, "learning_rate": 1.6872078198045048e-07, "logits/chosen": -1.5057106018066406, "logits/rejected": -1.8630993366241455, "logps/chosen": -121.13175201416016, "logps/rejected": -255.47586059570312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.544196605682373, "rewards/margins": 13.520458221435547, "rewards/rejected": -19.064655303955078, "step": 3641 }, { "epoch": 6.27, "learning_rate": 1.6861453463663407e-07, "logits/chosen": -1.8655369281768799, "logits/rejected": -1.9508299827575684, "logps/chosen": -162.79666137695312, "logps/rejected": -290.755859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.313477039337158, "rewards/margins": 13.408645629882812, "rewards/rejected": -20.722122192382812, "step": 3642 }, { "epoch": 6.27, "learning_rate": 1.6850828729281767e-07, "logits/chosen": -1.8915016651153564, "logits/rejected": -1.6962504386901855, "logps/chosen": -141.3970184326172, "logps/rejected": -261.1792907714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.199692249298096, "rewards/margins": 12.125682830810547, "rewards/rejected": -17.325376510620117, "step": 3643 }, { "epoch": 6.27, "learning_rate": 1.6840203994900127e-07, "logits/chosen": -2.0569400787353516, "logits/rejected": -1.8453339338302612, "logps/chosen": -155.6585693359375, "logps/rejected": -298.82940673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.930690765380859, "rewards/margins": 13.070151329040527, "rewards/rejected": -21.00084114074707, "step": 3644 }, { "epoch": 6.27, "learning_rate": 1.6829579260518487e-07, "logits/chosen": -1.6400163173675537, "logits/rejected": -1.4736342430114746, "logps/chosen": -117.05911254882812, "logps/rejected": -254.27847290039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.889535427093506, "rewards/margins": 14.382713317871094, "rewards/rejected": -18.272249221801758, "step": 3645 }, { "epoch": 6.28, "learning_rate": 1.6818954526136847e-07, "logits/chosen": -1.8256638050079346, "logits/rejected": -1.8007457256317139, "logps/chosen": -166.58067321777344, "logps/rejected": -372.6131286621094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.633111476898193, "rewards/margins": 20.328670501708984, "rewards/rejected": -27.961782455444336, "step": 3646 }, { "epoch": 6.28, "learning_rate": 1.6808329791755204e-07, "logits/chosen": -1.874461054801941, "logits/rejected": -1.852031946182251, "logps/chosen": -150.50025939941406, "logps/rejected": -305.59112548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.071966171264648, "rewards/margins": 16.190385818481445, "rewards/rejected": -22.262351989746094, "step": 3647 }, { "epoch": 6.28, "learning_rate": 1.6797705057373564e-07, "logits/chosen": -1.6789510250091553, "logits/rejected": -1.924325942993164, "logps/chosen": -181.55343627929688, "logps/rejected": -316.2665710449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2965989112854, "rewards/margins": 14.052932739257812, "rewards/rejected": -21.349531173706055, "step": 3648 }, { "epoch": 6.28, "learning_rate": 1.6787080322991927e-07, "logits/chosen": -1.7395440340042114, "logits/rejected": -1.79009211063385, "logps/chosen": -157.25473022460938, "logps/rejected": -297.8634948730469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.892579078674316, "rewards/margins": 13.611953735351562, "rewards/rejected": -20.504531860351562, "step": 3649 }, { "epoch": 6.28, "learning_rate": 1.6776455588610284e-07, "logits/chosen": -1.5921205282211304, "logits/rejected": -2.0703225135803223, "logps/chosen": -165.78384399414062, "logps/rejected": -428.5692138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.754071235656738, "rewards/margins": 23.45296287536621, "rewards/rejected": -31.207035064697266, "step": 3650 }, { "epoch": 6.28, "learning_rate": 1.6765830854228644e-07, "logits/chosen": -1.4423105716705322, "logits/rejected": -2.0651402473449707, "logps/chosen": -164.61126708984375, "logps/rejected": -308.2421569824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.400056838989258, "rewards/margins": 12.316511154174805, "rewards/rejected": -20.716567993164062, "step": 3651 }, { "epoch": 6.29, "learning_rate": 1.6755206119847004e-07, "logits/chosen": -1.8337419033050537, "logits/rejected": -1.7779620885849, "logps/chosen": -152.7305908203125, "logps/rejected": -294.57623291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.628942489624023, "rewards/margins": 14.871589660644531, "rewards/rejected": -20.500534057617188, "step": 3652 }, { "epoch": 6.29, "learning_rate": 1.674458138546536e-07, "logits/chosen": -1.8926007747650146, "logits/rejected": -1.9352916479110718, "logps/chosen": -110.79824829101562, "logps/rejected": -263.2906494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4993855953216553, "rewards/margins": 15.559142112731934, "rewards/rejected": -19.05852699279785, "step": 3653 }, { "epoch": 6.29, "learning_rate": 1.673395665108372e-07, "logits/chosen": -1.8532145023345947, "logits/rejected": -1.6905131340026855, "logps/chosen": -178.95700073242188, "logps/rejected": -327.4033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.15037727355957, "rewards/margins": 15.208725929260254, "rewards/rejected": -23.359102249145508, "step": 3654 }, { "epoch": 6.29, "learning_rate": 1.6723331916702083e-07, "logits/chosen": -1.9386351108551025, "logits/rejected": -1.8894579410552979, "logps/chosen": -126.13638305664062, "logps/rejected": -274.56829833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.403158187866211, "rewards/margins": 15.040590286254883, "rewards/rejected": -19.443748474121094, "step": 3655 }, { "epoch": 6.29, "learning_rate": 1.671270718232044e-07, "logits/chosen": -1.7192237377166748, "logits/rejected": -1.6802340745925903, "logps/chosen": -143.83584594726562, "logps/rejected": -274.73883056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.629489421844482, "rewards/margins": 13.384634017944336, "rewards/rejected": -20.014123916625977, "step": 3656 }, { "epoch": 6.29, "learning_rate": 1.67020824479388e-07, "logits/chosen": -1.9878864288330078, "logits/rejected": -1.7668583393096924, "logps/chosen": -158.162353515625, "logps/rejected": -388.90283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.790876865386963, "rewards/margins": 22.32068634033203, "rewards/rejected": -29.111560821533203, "step": 3657 }, { "epoch": 6.3, "learning_rate": 1.669145771355716e-07, "logits/chosen": -1.8557679653167725, "logits/rejected": -1.833620548248291, "logps/chosen": -140.97872924804688, "logps/rejected": -276.4839172363281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.103300094604492, "rewards/margins": 15.616652488708496, "rewards/rejected": -19.719953536987305, "step": 3658 }, { "epoch": 6.3, "learning_rate": 1.6680832979175518e-07, "logits/chosen": -1.5338798761367798, "logits/rejected": -2.095863103866577, "logps/chosen": -135.5195770263672, "logps/rejected": -303.16552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.242110729217529, "rewards/margins": 16.36932373046875, "rewards/rejected": -21.611434936523438, "step": 3659 }, { "epoch": 6.3, "learning_rate": 1.667020824479388e-07, "logits/chosen": -1.857898235321045, "logits/rejected": -1.9129276275634766, "logps/chosen": -146.52120971679688, "logps/rejected": -311.81854248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.2356767654418945, "rewards/margins": 15.951936721801758, "rewards/rejected": -22.18761444091797, "step": 3660 }, { "epoch": 6.3, "learning_rate": 1.665958351041224e-07, "logits/chosen": -1.7905274629592896, "logits/rejected": -1.9932795763015747, "logps/chosen": -166.72117614746094, "logps/rejected": -328.11492919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.640476703643799, "rewards/margins": 15.376873970031738, "rewards/rejected": -23.017351150512695, "step": 3661 }, { "epoch": 6.3, "learning_rate": 1.6648958776030597e-07, "logits/chosen": -1.9236668348312378, "logits/rejected": -2.0174055099487305, "logps/chosen": -148.0619354248047, "logps/rejected": -307.1138916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.152508735656738, "rewards/margins": 15.565460205078125, "rewards/rejected": -20.71796989440918, "step": 3662 }, { "epoch": 6.3, "learning_rate": 1.6638334041648957e-07, "logits/chosen": -1.8664132356643677, "logits/rejected": -2.034949541091919, "logps/chosen": -137.77359008789062, "logps/rejected": -319.7947998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8057174682617188, "rewards/margins": 18.253957748413086, "rewards/rejected": -22.059673309326172, "step": 3663 }, { "epoch": 6.31, "learning_rate": 1.6627709307267317e-07, "logits/chosen": -1.8196420669555664, "logits/rejected": -1.484218716621399, "logps/chosen": -139.89974975585938, "logps/rejected": -276.8719787597656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.331697463989258, "rewards/margins": 14.308576583862305, "rewards/rejected": -20.640274047851562, "step": 3664 }, { "epoch": 6.31, "learning_rate": 1.661708457288568e-07, "logits/chosen": -1.9384407997131348, "logits/rejected": -1.5632730722427368, "logps/chosen": -131.045166015625, "logps/rejected": -219.0231170654297, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.371814727783203, "rewards/margins": 12.001201629638672, "rewards/rejected": -16.373016357421875, "step": 3665 }, { "epoch": 6.31, "learning_rate": 1.6606459838504037e-07, "logits/chosen": -1.956777572631836, "logits/rejected": -1.774163842201233, "logps/chosen": -146.69491577148438, "logps/rejected": -283.7464294433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.725406646728516, "rewards/margins": 15.069759368896484, "rewards/rejected": -20.795166015625, "step": 3666 }, { "epoch": 6.31, "learning_rate": 1.6595835104122397e-07, "logits/chosen": -1.444209337234497, "logits/rejected": -2.040928363800049, "logps/chosen": -165.44515991210938, "logps/rejected": -319.299560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.597489356994629, "rewards/margins": 12.994359016418457, "rewards/rejected": -22.591846466064453, "step": 3667 }, { "epoch": 6.31, "learning_rate": 1.6585210369740757e-07, "logits/chosen": -2.0965075492858887, "logits/rejected": -1.382237434387207, "logps/chosen": -167.2019500732422, "logps/rejected": -283.6007385253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.71175479888916, "rewards/margins": 14.358922958374023, "rewards/rejected": -21.0706787109375, "step": 3668 }, { "epoch": 6.31, "learning_rate": 1.6574585635359114e-07, "logits/chosen": -1.5921156406402588, "logits/rejected": -1.9070520401000977, "logps/chosen": -149.33419799804688, "logps/rejected": -298.7157897949219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.904109001159668, "rewards/margins": 14.403545379638672, "rewards/rejected": -21.30765151977539, "step": 3669 }, { "epoch": 6.32, "learning_rate": 1.6563960900977474e-07, "logits/chosen": -1.8895896673202515, "logits/rejected": -1.9443106651306152, "logps/chosen": -153.05691528320312, "logps/rejected": -305.9220886230469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.431218147277832, "rewards/margins": 14.398933410644531, "rewards/rejected": -21.830150604248047, "step": 3670 }, { "epoch": 6.32, "learning_rate": 1.6553336166595836e-07, "logits/chosen": -1.774352788925171, "logits/rejected": -1.990707278251648, "logps/chosen": -142.09376525878906, "logps/rejected": -305.8459167480469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.983514308929443, "rewards/margins": 17.661949157714844, "rewards/rejected": -23.645465850830078, "step": 3671 }, { "epoch": 6.32, "learning_rate": 1.6542711432214194e-07, "logits/chosen": -1.6570543050765991, "logits/rejected": -1.744581937789917, "logps/chosen": -169.06805419921875, "logps/rejected": -293.24749755859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.973174095153809, "rewards/margins": 11.756475448608398, "rewards/rejected": -19.72964859008789, "step": 3672 }, { "epoch": 6.32, "learning_rate": 1.6532086697832553e-07, "logits/chosen": -2.0457587242126465, "logits/rejected": -1.478656530380249, "logps/chosen": -194.88330078125, "logps/rejected": -290.799560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.682804107666016, "rewards/margins": 12.118191719055176, "rewards/rejected": -21.800994873046875, "step": 3673 }, { "epoch": 6.32, "learning_rate": 1.6521461963450913e-07, "logits/chosen": -1.8131837844848633, "logits/rejected": -1.9173212051391602, "logps/chosen": -165.5764923095703, "logps/rejected": -287.9974060058594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.558318614959717, "rewards/margins": 13.281740188598633, "rewards/rejected": -20.840057373046875, "step": 3674 }, { "epoch": 6.33, "learning_rate": 1.651083722906927e-07, "logits/chosen": -1.8995580673217773, "logits/rejected": -1.9966397285461426, "logps/chosen": -163.49002075195312, "logps/rejected": -280.4017333984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.405105113983154, "rewards/margins": 11.526517868041992, "rewards/rejected": -18.931621551513672, "step": 3675 }, { "epoch": 6.33, "learning_rate": 1.6500212494687633e-07, "logits/chosen": -1.5658257007598877, "logits/rejected": -1.8227269649505615, "logps/chosen": -179.62957763671875, "logps/rejected": -305.1781005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.55166244506836, "rewards/margins": 11.777731895446777, "rewards/rejected": -21.329395294189453, "step": 3676 }, { "epoch": 6.33, "learning_rate": 1.6489587760305993e-07, "logits/chosen": -1.5357505083084106, "logits/rejected": -1.9173156023025513, "logps/chosen": -142.93063354492188, "logps/rejected": -305.3023681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.948231220245361, "rewards/margins": 13.707890510559082, "rewards/rejected": -20.6561222076416, "step": 3677 }, { "epoch": 6.33, "learning_rate": 1.647896302592435e-07, "logits/chosen": -1.8871634006500244, "logits/rejected": -1.717734456062317, "logps/chosen": -143.10256958007812, "logps/rejected": -306.2276611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.937325477600098, "rewards/margins": 15.704717636108398, "rewards/rejected": -21.64204216003418, "step": 3678 }, { "epoch": 6.33, "learning_rate": 1.646833829154271e-07, "logits/chosen": -1.7955271005630493, "logits/rejected": -1.7809512615203857, "logps/chosen": -157.7172088623047, "logps/rejected": -351.20745849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.160246849060059, "rewards/margins": 18.782188415527344, "rewards/rejected": -25.942434310913086, "step": 3679 }, { "epoch": 6.33, "learning_rate": 1.645771355716107e-07, "logits/chosen": -1.5556236505508423, "logits/rejected": -1.9667553901672363, "logps/chosen": -212.2086639404297, "logps/rejected": -314.4215087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.938774108886719, "rewards/margins": 11.122729301452637, "rewards/rejected": -23.061504364013672, "step": 3680 }, { "epoch": 6.34, "learning_rate": 1.6447088822779427e-07, "logits/chosen": -1.720217227935791, "logits/rejected": -2.0087931156158447, "logps/chosen": -110.20457458496094, "logps/rejected": -286.935791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.66768217086792, "rewards/margins": 16.677671432495117, "rewards/rejected": -20.345354080200195, "step": 3681 }, { "epoch": 6.34, "learning_rate": 1.643646408839779e-07, "logits/chosen": -1.889115810394287, "logits/rejected": -1.7300559282302856, "logps/chosen": -169.05360412597656, "logps/rejected": -286.4237365722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.620633125305176, "rewards/margins": 11.221956253051758, "rewards/rejected": -19.84259033203125, "step": 3682 }, { "epoch": 6.34, "learning_rate": 1.642583935401615e-07, "logits/chosen": -1.8641884326934814, "logits/rejected": -1.8907467126846313, "logps/chosen": -120.13372802734375, "logps/rejected": -304.8451232910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.363350868225098, "rewards/margins": 18.597673416137695, "rewards/rejected": -22.961021423339844, "step": 3683 }, { "epoch": 6.34, "learning_rate": 1.6415214619634507e-07, "logits/chosen": -1.9629967212677002, "logits/rejected": -1.4319251775741577, "logps/chosen": -154.402099609375, "logps/rejected": -297.533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.703892230987549, "rewards/margins": 15.347101211547852, "rewards/rejected": -22.050994873046875, "step": 3684 }, { "epoch": 6.34, "learning_rate": 1.6404589885252867e-07, "logits/chosen": -2.0421438217163086, "logits/rejected": -1.8119691610336304, "logps/chosen": -158.3919677734375, "logps/rejected": -326.29150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.437554359436035, "rewards/margins": 16.666521072387695, "rewards/rejected": -24.104076385498047, "step": 3685 }, { "epoch": 6.34, "learning_rate": 1.6393965150871227e-07, "logits/chosen": -1.9765492677688599, "logits/rejected": -1.7840981483459473, "logps/chosen": -141.27102661132812, "logps/rejected": -294.01507568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.292323112487793, "rewards/margins": 16.220218658447266, "rewards/rejected": -21.512540817260742, "step": 3686 }, { "epoch": 6.35, "learning_rate": 1.638334041648959e-07, "logits/chosen": -2.0520272254943848, "logits/rejected": -1.979860782623291, "logps/chosen": -144.10012817382812, "logps/rejected": -269.2308044433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7181267738342285, "rewards/margins": 12.711641311645508, "rewards/rejected": -18.429767608642578, "step": 3687 }, { "epoch": 6.35, "learning_rate": 1.6372715682107947e-07, "logits/chosen": -1.5358095169067383, "logits/rejected": -1.8009016513824463, "logps/chosen": -101.02871704101562, "logps/rejected": -312.1435546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9977006912231445, "rewards/margins": 20.165363311767578, "rewards/rejected": -23.163061141967773, "step": 3688 }, { "epoch": 6.35, "learning_rate": 1.6362090947726306e-07, "logits/chosen": -1.8065348863601685, "logits/rejected": -1.8081796169281006, "logps/chosen": -160.51812744140625, "logps/rejected": -299.6624755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.544600963592529, "rewards/margins": 14.245617866516113, "rewards/rejected": -20.790218353271484, "step": 3689 }, { "epoch": 6.35, "learning_rate": 1.6351466213344666e-07, "logits/chosen": -1.9009382724761963, "logits/rejected": -1.7403346300125122, "logps/chosen": -163.9866943359375, "logps/rejected": -283.22784423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.737917900085449, "rewards/margins": 13.322901725769043, "rewards/rejected": -21.060819625854492, "step": 3690 }, { "epoch": 6.35, "learning_rate": 1.6340841478963024e-07, "logits/chosen": -1.5081470012664795, "logits/rejected": -2.0774128437042236, "logps/chosen": -177.438232421875, "logps/rejected": -377.6678771972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.415327072143555, "rewards/margins": 17.899057388305664, "rewards/rejected": -27.31438446044922, "step": 3691 }, { "epoch": 6.35, "learning_rate": 1.6330216744581386e-07, "logits/chosen": -1.9833283424377441, "logits/rejected": -1.9166944026947021, "logps/chosen": -216.8834686279297, "logps/rejected": -343.6471862792969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.00012493133545, "rewards/margins": 12.8861665725708, "rewards/rejected": -24.88629150390625, "step": 3692 }, { "epoch": 6.36, "learning_rate": 1.6319592010199746e-07, "logits/chosen": -1.9478031396865845, "logits/rejected": -1.5466735363006592, "logps/chosen": -173.86770629882812, "logps/rejected": -303.2900695800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.311384677886963, "rewards/margins": 14.431177139282227, "rewards/rejected": -21.742563247680664, "step": 3693 }, { "epoch": 6.36, "learning_rate": 1.6308967275818103e-07, "logits/chosen": -1.94175386428833, "logits/rejected": -1.8843045234680176, "logps/chosen": -154.60069274902344, "logps/rejected": -306.49676513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0484161376953125, "rewards/margins": 14.89548110961914, "rewards/rejected": -20.943897247314453, "step": 3694 }, { "epoch": 6.36, "learning_rate": 1.6298342541436463e-07, "logits/chosen": -1.6837128400802612, "logits/rejected": -1.7057737112045288, "logps/chosen": -176.6358642578125, "logps/rejected": -314.631103515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.22453784942627, "rewards/margins": 14.343437194824219, "rewards/rejected": -23.567974090576172, "step": 3695 }, { "epoch": 6.36, "learning_rate": 1.6287717807054823e-07, "logits/chosen": -2.1031556129455566, "logits/rejected": -1.8601990938186646, "logps/chosen": -184.20208740234375, "logps/rejected": -267.9450378417969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.881128311157227, "rewards/margins": 10.180717468261719, "rewards/rejected": -19.061845779418945, "step": 3696 }, { "epoch": 6.36, "learning_rate": 1.627709307267318e-07, "logits/chosen": -1.6097254753112793, "logits/rejected": -1.7632453441619873, "logps/chosen": -201.49069213867188, "logps/rejected": -339.9501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.858346939086914, "rewards/margins": 12.711029052734375, "rewards/rejected": -24.56937599182129, "step": 3697 }, { "epoch": 6.36, "learning_rate": 1.6266468338291543e-07, "logits/chosen": -1.9025410413742065, "logits/rejected": -1.6031047105789185, "logps/chosen": -155.3414306640625, "logps/rejected": -308.6575927734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.328288555145264, "rewards/margins": 16.88545799255371, "rewards/rejected": -23.2137451171875, "step": 3698 }, { "epoch": 6.37, "learning_rate": 1.6255843603909903e-07, "logits/chosen": -1.843241572380066, "logits/rejected": -1.4592512845993042, "logps/chosen": -132.48561096191406, "logps/rejected": -249.12283325195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.251853942871094, "rewards/margins": 12.950845718383789, "rewards/rejected": -19.20269775390625, "step": 3699 }, { "epoch": 6.37, "learning_rate": 1.624521886952826e-07, "logits/chosen": -1.9291861057281494, "logits/rejected": -1.6885350942611694, "logps/chosen": -108.05857849121094, "logps/rejected": -236.83168029785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5972046852111816, "rewards/margins": 13.260086059570312, "rewards/rejected": -16.857290267944336, "step": 3700 }, { "epoch": 6.37, "learning_rate": 1.623459413514662e-07, "logits/chosen": -1.6740742921829224, "logits/rejected": -2.0970957279205322, "logps/chosen": -153.35926818847656, "logps/rejected": -332.7676696777344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.596587181091309, "rewards/margins": 16.254772186279297, "rewards/rejected": -23.851360321044922, "step": 3701 }, { "epoch": 6.37, "learning_rate": 1.622396940076498e-07, "logits/chosen": -1.7522846460342407, "logits/rejected": -2.1434826850891113, "logps/chosen": -118.31820678710938, "logps/rejected": -271.60455322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.132885932922363, "rewards/margins": 13.71929931640625, "rewards/rejected": -17.852184295654297, "step": 3702 }, { "epoch": 6.37, "learning_rate": 1.621334466638334e-07, "logits/chosen": -1.82332181930542, "logits/rejected": -1.9571149349212646, "logps/chosen": -119.98359680175781, "logps/rejected": -270.8816833496094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.0409722328186035, "rewards/margins": 13.65460205078125, "rewards/rejected": -18.695575714111328, "step": 3703 }, { "epoch": 6.38, "learning_rate": 1.62027199320017e-07, "logits/chosen": -2.0070700645446777, "logits/rejected": -1.5174639225006104, "logps/chosen": -165.64407348632812, "logps/rejected": -322.58526611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.858065128326416, "rewards/margins": 16.164304733276367, "rewards/rejected": -23.022371292114258, "step": 3704 }, { "epoch": 6.38, "learning_rate": 1.619209519762006e-07, "logits/chosen": -1.6015286445617676, "logits/rejected": -1.8940777778625488, "logps/chosen": -163.41761779785156, "logps/rejected": -330.3587646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.705504417419434, "rewards/margins": 14.238365173339844, "rewards/rejected": -23.94386863708496, "step": 3705 }, { "epoch": 6.38, "learning_rate": 1.618147046323842e-07, "logits/chosen": -2.0075442790985107, "logits/rejected": -2.0113589763641357, "logps/chosen": -126.31949615478516, "logps/rejected": -254.5177764892578, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.7476983070373535, "rewards/margins": 12.980191230773926, "rewards/rejected": -17.727890014648438, "step": 3706 }, { "epoch": 6.38, "learning_rate": 1.6170845728856776e-07, "logits/chosen": -1.6719269752502441, "logits/rejected": -1.9089672565460205, "logps/chosen": -165.54794311523438, "logps/rejected": -322.00537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.157770156860352, "rewards/margins": 14.231250762939453, "rewards/rejected": -22.389022827148438, "step": 3707 }, { "epoch": 6.38, "learning_rate": 1.616022099447514e-07, "logits/chosen": -1.725003957748413, "logits/rejected": -1.951742172241211, "logps/chosen": -136.11740112304688, "logps/rejected": -298.4801025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.269401550292969, "rewards/margins": 13.96285343170166, "rewards/rejected": -21.232254028320312, "step": 3708 }, { "epoch": 6.38, "learning_rate": 1.61495962600935e-07, "logits/chosen": -1.9814825057983398, "logits/rejected": -1.8524121046066284, "logps/chosen": -157.06448364257812, "logps/rejected": -276.0231018066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.194835662841797, "rewards/margins": 11.75878620147705, "rewards/rejected": -19.95362091064453, "step": 3709 }, { "epoch": 6.39, "learning_rate": 1.6138971525711856e-07, "logits/chosen": -1.590984582901001, "logits/rejected": -2.058197259902954, "logps/chosen": -167.06802368164062, "logps/rejected": -341.36785888671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.204215049743652, "rewards/margins": 14.228647232055664, "rewards/rejected": -22.432863235473633, "step": 3710 }, { "epoch": 6.39, "learning_rate": 1.6128346791330216e-07, "logits/chosen": -1.897176742553711, "logits/rejected": -1.9492192268371582, "logps/chosen": -159.609130859375, "logps/rejected": -333.7960205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.095284461975098, "rewards/margins": 16.295698165893555, "rewards/rejected": -23.39098358154297, "step": 3711 }, { "epoch": 6.39, "learning_rate": 1.6117722056948576e-07, "logits/chosen": -1.6867268085479736, "logits/rejected": -1.8679109811782837, "logps/chosen": -128.75196838378906, "logps/rejected": -317.85162353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.966513156890869, "rewards/margins": 18.188629150390625, "rewards/rejected": -24.155141830444336, "step": 3712 }, { "epoch": 6.39, "learning_rate": 1.6107097322566933e-07, "logits/chosen": -1.977012276649475, "logits/rejected": -2.0219194889068604, "logps/chosen": -114.3305435180664, "logps/rejected": -263.93780517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.048921585083008, "rewards/margins": 13.830023765563965, "rewards/rejected": -17.878944396972656, "step": 3713 }, { "epoch": 6.39, "learning_rate": 1.6096472588185296e-07, "logits/chosen": -1.7799859046936035, "logits/rejected": -1.7515053749084473, "logps/chosen": -165.79861450195312, "logps/rejected": -372.45013427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.278247833251953, "rewards/margins": 19.160795211791992, "rewards/rejected": -27.439041137695312, "step": 3714 }, { "epoch": 6.39, "learning_rate": 1.6085847853803656e-07, "logits/chosen": -1.9037449359893799, "logits/rejected": -1.7497870922088623, "logps/chosen": -155.1295166015625, "logps/rejected": -291.5312194824219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.495931625366211, "rewards/margins": 13.537856101989746, "rewards/rejected": -20.03378677368164, "step": 3715 }, { "epoch": 6.4, "learning_rate": 1.6075223119422013e-07, "logits/chosen": -1.6243582963943481, "logits/rejected": -1.9699733257293701, "logps/chosen": -139.3788604736328, "logps/rejected": -286.9765319824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.095663070678711, "rewards/margins": 13.687926292419434, "rewards/rejected": -19.783588409423828, "step": 3716 }, { "epoch": 6.4, "learning_rate": 1.6064598385040373e-07, "logits/chosen": -1.6615076065063477, "logits/rejected": -1.6985851526260376, "logps/chosen": -180.84405517578125, "logps/rejected": -331.4339294433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.019824981689453, "rewards/margins": 15.332096099853516, "rewards/rejected": -24.35192108154297, "step": 3717 }, { "epoch": 6.4, "learning_rate": 1.6053973650658733e-07, "logits/chosen": -1.5610898733139038, "logits/rejected": -1.9116332530975342, "logps/chosen": -102.50660705566406, "logps/rejected": -227.0217742919922, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.139589309692383, "rewards/margins": 11.815950393676758, "rewards/rejected": -15.955541610717773, "step": 3718 }, { "epoch": 6.4, "learning_rate": 1.6043348916277093e-07, "logits/chosen": -1.790271520614624, "logits/rejected": -2.0069265365600586, "logps/chosen": -210.55108642578125, "logps/rejected": -362.8183288574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.679788589477539, "rewards/margins": 13.63178825378418, "rewards/rejected": -26.31157684326172, "step": 3719 }, { "epoch": 6.4, "learning_rate": 1.6032724181895452e-07, "logits/chosen": -1.9688869714736938, "logits/rejected": -1.803232192993164, "logps/chosen": -131.95217895507812, "logps/rejected": -314.50390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.624115943908691, "rewards/margins": 17.637882232666016, "rewards/rejected": -23.261999130249023, "step": 3720 }, { "epoch": 6.4, "learning_rate": 1.6022099447513812e-07, "logits/chosen": -1.6924394369125366, "logits/rejected": -2.096092462539673, "logps/chosen": -167.5156707763672, "logps/rejected": -327.1459045410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.97386360168457, "rewards/margins": 13.380615234375, "rewards/rejected": -22.354476928710938, "step": 3721 }, { "epoch": 6.41, "learning_rate": 1.601147471313217e-07, "logits/chosen": -1.8382141590118408, "logits/rejected": -1.752894401550293, "logps/chosen": -170.80545043945312, "logps/rejected": -304.61676025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.818730354309082, "rewards/margins": 14.072135925292969, "rewards/rejected": -20.890865325927734, "step": 3722 }, { "epoch": 6.41, "learning_rate": 1.600084997875053e-07, "logits/chosen": -1.898046612739563, "logits/rejected": -2.0772109031677246, "logps/chosen": -139.7475128173828, "logps/rejected": -277.26580810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.028153419494629, "rewards/margins": 14.156956672668457, "rewards/rejected": -21.185110092163086, "step": 3723 }, { "epoch": 6.41, "learning_rate": 1.5990225244368892e-07, "logits/chosen": -1.631790280342102, "logits/rejected": -1.9724771976470947, "logps/chosen": -139.5448760986328, "logps/rejected": -334.9282531738281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.902887344360352, "rewards/margins": 18.711576461791992, "rewards/rejected": -24.614463806152344, "step": 3724 }, { "epoch": 6.41, "learning_rate": 1.597960050998725e-07, "logits/chosen": -1.833470106124878, "logits/rejected": -1.6526694297790527, "logps/chosen": -190.9788818359375, "logps/rejected": -324.228271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.945328712463379, "rewards/margins": 13.736120223999023, "rewards/rejected": -24.68144989013672, "step": 3725 }, { "epoch": 6.41, "learning_rate": 1.596897577560561e-07, "logits/chosen": -1.7562947273254395, "logits/rejected": -2.0894885063171387, "logps/chosen": -144.04095458984375, "logps/rejected": -339.9700622558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.083023548126221, "rewards/margins": 17.626964569091797, "rewards/rejected": -23.70998764038086, "step": 3726 }, { "epoch": 6.41, "learning_rate": 1.595835104122397e-07, "logits/chosen": -1.7342171669006348, "logits/rejected": -1.8780720233917236, "logps/chosen": -128.53985595703125, "logps/rejected": -365.439208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.765375137329102, "rewards/margins": 22.701183319091797, "rewards/rejected": -27.4665584564209, "step": 3727 }, { "epoch": 6.42, "learning_rate": 1.594772630684233e-07, "logits/chosen": -1.7995750904083252, "logits/rejected": -2.0264196395874023, "logps/chosen": -111.72291564941406, "logps/rejected": -293.8431396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4993338584899902, "rewards/margins": 16.851341247558594, "rewards/rejected": -20.350675582885742, "step": 3728 }, { "epoch": 6.42, "learning_rate": 1.5937101572460686e-07, "logits/chosen": -1.9065001010894775, "logits/rejected": -1.7227565050125122, "logps/chosen": -147.09921264648438, "logps/rejected": -290.5155029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.934914588928223, "rewards/margins": 14.55823040008545, "rewards/rejected": -20.493144989013672, "step": 3729 }, { "epoch": 6.42, "learning_rate": 1.5926476838079049e-07, "logits/chosen": -1.8439388275146484, "logits/rejected": -1.813459873199463, "logps/chosen": -161.98492431640625, "logps/rejected": -295.8457336425781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.34295129776001, "rewards/margins": 13.88882064819336, "rewards/rejected": -20.23177146911621, "step": 3730 }, { "epoch": 6.42, "learning_rate": 1.5915852103697409e-07, "logits/chosen": -1.379380464553833, "logits/rejected": -1.6286423206329346, "logps/chosen": -142.7450714111328, "logps/rejected": -317.51275634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.929939270019531, "rewards/margins": 15.366034507751465, "rewards/rejected": -22.295974731445312, "step": 3731 }, { "epoch": 6.42, "learning_rate": 1.5905227369315766e-07, "logits/chosen": -1.7777681350708008, "logits/rejected": -1.5831276178359985, "logps/chosen": -160.55416870117188, "logps/rejected": -316.9223327636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.528254508972168, "rewards/margins": 18.095481872558594, "rewards/rejected": -24.623737335205078, "step": 3732 }, { "epoch": 6.43, "learning_rate": 1.5894602634934126e-07, "logits/chosen": -1.8914458751678467, "logits/rejected": -1.6858232021331787, "logps/chosen": -168.44857788085938, "logps/rejected": -303.7978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.724576950073242, "rewards/margins": 13.549147605895996, "rewards/rejected": -22.273725509643555, "step": 3733 }, { "epoch": 6.43, "learning_rate": 1.5883977900552486e-07, "logits/chosen": -1.8283110857009888, "logits/rejected": -1.7739747762680054, "logps/chosen": -162.25421142578125, "logps/rejected": -317.74652099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.706127166748047, "rewards/margins": 15.21200942993164, "rewards/rejected": -24.918136596679688, "step": 3734 }, { "epoch": 6.43, "learning_rate": 1.5873353166170845e-07, "logits/chosen": -1.6818220615386963, "logits/rejected": -2.005108118057251, "logps/chosen": -165.91036987304688, "logps/rejected": -322.73193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.577474594116211, "rewards/margins": 14.375486373901367, "rewards/rejected": -20.952960968017578, "step": 3735 }, { "epoch": 6.43, "learning_rate": 1.5862728431789205e-07, "logits/chosen": -2.011871576309204, "logits/rejected": -1.8566817045211792, "logps/chosen": -155.12020874023438, "logps/rejected": -295.612548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.761430263519287, "rewards/margins": 14.386300086975098, "rewards/rejected": -20.147729873657227, "step": 3736 }, { "epoch": 6.43, "learning_rate": 1.5852103697407565e-07, "logits/chosen": -1.898589849472046, "logits/rejected": -1.8459416627883911, "logps/chosen": -137.5910186767578, "logps/rejected": -275.77178955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.820096969604492, "rewards/margins": 13.402435302734375, "rewards/rejected": -19.2225341796875, "step": 3737 }, { "epoch": 6.43, "learning_rate": 1.5841478963025922e-07, "logits/chosen": -1.6112629175186157, "logits/rejected": -2.003896713256836, "logps/chosen": -136.31459045410156, "logps/rejected": -304.162841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.743854999542236, "rewards/margins": 16.312273025512695, "rewards/rejected": -21.056127548217773, "step": 3738 }, { "epoch": 6.44, "learning_rate": 1.5830854228644282e-07, "logits/chosen": -1.8322020769119263, "logits/rejected": -2.1412458419799805, "logps/chosen": -130.74229431152344, "logps/rejected": -292.2980651855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.330376148223877, "rewards/margins": 14.8050537109375, "rewards/rejected": -21.13543128967285, "step": 3739 }, { "epoch": 6.44, "learning_rate": 1.5820229494262642e-07, "logits/chosen": -1.7177298069000244, "logits/rejected": -2.035020589828491, "logps/chosen": -138.50076293945312, "logps/rejected": -325.4703369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.924671649932861, "rewards/margins": 18.007564544677734, "rewards/rejected": -23.932235717773438, "step": 3740 }, { "epoch": 6.44, "learning_rate": 1.5809604759881002e-07, "logits/chosen": -1.985059380531311, "logits/rejected": -1.8957610130310059, "logps/chosen": -153.08302307128906, "logps/rejected": -346.74493408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.10901403427124, "rewards/margins": 18.853897094726562, "rewards/rejected": -25.962909698486328, "step": 3741 }, { "epoch": 6.44, "learning_rate": 1.5798980025499362e-07, "logits/chosen": -1.6880438327789307, "logits/rejected": -1.8188953399658203, "logps/chosen": -148.2880859375, "logps/rejected": -278.5320129394531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.118803977966309, "rewards/margins": 13.290807723999023, "rewards/rejected": -19.40961265563965, "step": 3742 }, { "epoch": 6.44, "learning_rate": 1.5788355291117722e-07, "logits/chosen": -1.9871554374694824, "logits/rejected": -1.2914857864379883, "logps/chosen": -152.25009155273438, "logps/rejected": -250.58335876464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.481054782867432, "rewards/margins": 12.8798828125, "rewards/rejected": -19.360937118530273, "step": 3743 }, { "epoch": 6.44, "learning_rate": 1.577773055673608e-07, "logits/chosen": -1.9876941442489624, "logits/rejected": -1.962186574935913, "logps/chosen": -155.60757446289062, "logps/rejected": -339.42193603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7845940589904785, "rewards/margins": 17.58935546875, "rewards/rejected": -23.37394905090332, "step": 3744 }, { "epoch": 6.45, "learning_rate": 1.576710582235444e-07, "logits/chosen": -1.532494068145752, "logits/rejected": -1.6529643535614014, "logps/chosen": -228.39451599121094, "logps/rejected": -340.62896728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.690320014953613, "rewards/margins": 12.413556098937988, "rewards/rejected": -25.1038761138916, "step": 3745 }, { "epoch": 6.45, "learning_rate": 1.5756481087972802e-07, "logits/chosen": -2.077554702758789, "logits/rejected": -2.2296102046966553, "logps/chosen": -154.0550537109375, "logps/rejected": -284.9548645019531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.254664421081543, "rewards/margins": 11.37317943572998, "rewards/rejected": -18.627843856811523, "step": 3746 }, { "epoch": 6.45, "learning_rate": 1.574585635359116e-07, "logits/chosen": -2.053374767303467, "logits/rejected": -1.6420748233795166, "logps/chosen": -174.5145263671875, "logps/rejected": -338.5791320800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.61149787902832, "rewards/margins": 16.685340881347656, "rewards/rejected": -25.29684066772461, "step": 3747 }, { "epoch": 6.45, "learning_rate": 1.573523161920952e-07, "logits/chosen": -1.7286343574523926, "logits/rejected": -1.8576796054840088, "logps/chosen": -138.3453826904297, "logps/rejected": -311.0410461425781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.851860046386719, "rewards/margins": 15.185503959655762, "rewards/rejected": -22.037364959716797, "step": 3748 }, { "epoch": 6.45, "learning_rate": 1.5724606884827879e-07, "logits/chosen": -1.3340339660644531, "logits/rejected": -2.0073294639587402, "logps/chosen": -171.43505859375, "logps/rejected": -342.0897216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.524470329284668, "rewards/margins": 14.563234329223633, "rewards/rejected": -24.087705612182617, "step": 3749 }, { "epoch": 6.45, "learning_rate": 1.5713982150446239e-07, "logits/chosen": -1.6917833089828491, "logits/rejected": -2.084742546081543, "logps/chosen": -170.60736083984375, "logps/rejected": -311.3800964355469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.827595710754395, "rewards/margins": 12.879768371582031, "rewards/rejected": -21.70736312866211, "step": 3750 }, { "epoch": 6.46, "learning_rate": 1.5703357416064596e-07, "logits/chosen": -1.7222702503204346, "logits/rejected": -1.8256057500839233, "logps/chosen": -198.18203735351562, "logps/rejected": -297.7052307128906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.853293418884277, "rewards/margins": 10.197378158569336, "rewards/rejected": -21.05067253112793, "step": 3751 }, { "epoch": 6.46, "learning_rate": 1.5692732681682958e-07, "logits/chosen": -1.792115569114685, "logits/rejected": -1.8855853080749512, "logps/chosen": -161.6842803955078, "logps/rejected": -267.013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.94064998626709, "rewards/margins": 12.930740356445312, "rewards/rejected": -18.871389389038086, "step": 3752 }, { "epoch": 6.46, "learning_rate": 1.5682107947301318e-07, "logits/chosen": -1.8135838508605957, "logits/rejected": -1.9089215993881226, "logps/chosen": -150.37037658691406, "logps/rejected": -324.08526611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.226820945739746, "rewards/margins": 16.901809692382812, "rewards/rejected": -22.128629684448242, "step": 3753 }, { "epoch": 6.46, "learning_rate": 1.5671483212919675e-07, "logits/chosen": -2.0489907264709473, "logits/rejected": -1.8724464178085327, "logps/chosen": -207.87197875976562, "logps/rejected": -346.8650817871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.428070068359375, "rewards/margins": 15.144826889038086, "rewards/rejected": -25.57289695739746, "step": 3754 }, { "epoch": 6.46, "learning_rate": 1.5660858478538035e-07, "logits/chosen": -1.938796877861023, "logits/rejected": -1.8052524328231812, "logps/chosen": -122.94618225097656, "logps/rejected": -308.8607482910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.87641716003418, "rewards/margins": 17.795560836791992, "rewards/rejected": -22.67197608947754, "step": 3755 }, { "epoch": 6.46, "learning_rate": 1.5650233744156395e-07, "logits/chosen": -1.7451783418655396, "logits/rejected": -1.9267650842666626, "logps/chosen": -141.81971740722656, "logps/rejected": -267.15191650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.6729254722595215, "rewards/margins": 10.495232582092285, "rewards/rejected": -18.16815757751465, "step": 3756 }, { "epoch": 6.47, "learning_rate": 1.5639609009774755e-07, "logits/chosen": -2.085477352142334, "logits/rejected": -1.8454420566558838, "logps/chosen": -122.18478393554688, "logps/rejected": -282.5423583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9455742835998535, "rewards/margins": 16.008066177368164, "rewards/rejected": -19.95363998413086, "step": 3757 }, { "epoch": 6.47, "learning_rate": 1.5628984275393115e-07, "logits/chosen": -1.6784045696258545, "logits/rejected": -2.0387730598449707, "logps/chosen": -158.94309997558594, "logps/rejected": -350.67486572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.739045143127441, "rewards/margins": 17.862537384033203, "rewards/rejected": -26.60158348083496, "step": 3758 }, { "epoch": 6.47, "learning_rate": 1.5618359541011475e-07, "logits/chosen": -2.06166672706604, "logits/rejected": -1.9212400913238525, "logps/chosen": -150.2002410888672, "logps/rejected": -311.44635009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.849721908569336, "rewards/margins": 14.853703498840332, "rewards/rejected": -21.703426361083984, "step": 3759 }, { "epoch": 6.47, "learning_rate": 1.5607734806629832e-07, "logits/chosen": -1.8886001110076904, "logits/rejected": -1.6873714923858643, "logps/chosen": -137.5455780029297, "logps/rejected": -259.5997314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.925065994262695, "rewards/margins": 13.301695823669434, "rewards/rejected": -19.226762771606445, "step": 3760 }, { "epoch": 6.47, "learning_rate": 1.5597110072248192e-07, "logits/chosen": -1.8919434547424316, "logits/rejected": -2.0385947227478027, "logps/chosen": -143.77833557128906, "logps/rejected": -334.3155517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.178590774536133, "rewards/margins": 18.056697845458984, "rewards/rejected": -25.23529052734375, "step": 3761 }, { "epoch": 6.48, "learning_rate": 1.5586485337866555e-07, "logits/chosen": -1.735645055770874, "logits/rejected": -1.7841284275054932, "logps/chosen": -143.96534729003906, "logps/rejected": -338.55902099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.079098701477051, "rewards/margins": 18.54269027709961, "rewards/rejected": -25.621788024902344, "step": 3762 }, { "epoch": 6.48, "learning_rate": 1.5575860603484912e-07, "logits/chosen": -1.9759242534637451, "logits/rejected": -1.8961056470870972, "logps/chosen": -162.93429565429688, "logps/rejected": -312.43017578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.321734428405762, "rewards/margins": 13.182458877563477, "rewards/rejected": -21.504192352294922, "step": 3763 }, { "epoch": 6.48, "learning_rate": 1.5565235869103272e-07, "logits/chosen": -1.565094232559204, "logits/rejected": -1.8298641443252563, "logps/chosen": -155.73587036132812, "logps/rejected": -273.9256591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.016133308410645, "rewards/margins": 11.458578109741211, "rewards/rejected": -19.474712371826172, "step": 3764 }, { "epoch": 6.48, "learning_rate": 1.5554611134721632e-07, "logits/chosen": -1.701033353805542, "logits/rejected": -1.8643049001693726, "logps/chosen": -177.96633911132812, "logps/rejected": -303.7814636230469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.774683952331543, "rewards/margins": 11.817126274108887, "rewards/rejected": -20.591808319091797, "step": 3765 }, { "epoch": 6.48, "learning_rate": 1.554398640033999e-07, "logits/chosen": -1.853570580482483, "logits/rejected": -1.7931276559829712, "logps/chosen": -178.1682891845703, "logps/rejected": -335.8438720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.546252250671387, "rewards/margins": 15.040597915649414, "rewards/rejected": -23.586851119995117, "step": 3766 }, { "epoch": 6.48, "learning_rate": 1.553336166595835e-07, "logits/chosen": -1.7506380081176758, "logits/rejected": -1.8425965309143066, "logps/chosen": -173.3890380859375, "logps/rejected": -296.6063232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.679204940795898, "rewards/margins": 12.406057357788086, "rewards/rejected": -21.085262298583984, "step": 3767 }, { "epoch": 6.49, "learning_rate": 1.552273693157671e-07, "logits/chosen": -1.811540961265564, "logits/rejected": -1.9731849431991577, "logps/chosen": -167.82679748535156, "logps/rejected": -343.1434631347656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.16698169708252, "rewards/margins": 15.919893264770508, "rewards/rejected": -24.086875915527344, "step": 3768 }, { "epoch": 6.49, "learning_rate": 1.551211219719507e-07, "logits/chosen": -1.7891706228256226, "logits/rejected": -1.7724021673202515, "logps/chosen": -170.81430053710938, "logps/rejected": -345.0194396972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.952876091003418, "rewards/margins": 17.063800811767578, "rewards/rejected": -25.01667594909668, "step": 3769 }, { "epoch": 6.49, "learning_rate": 1.5501487462813428e-07, "logits/chosen": -1.7751628160476685, "logits/rejected": -1.9670813083648682, "logps/chosen": -176.77133178710938, "logps/rejected": -341.58599853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.19007682800293, "rewards/margins": 16.67438507080078, "rewards/rejected": -24.864459991455078, "step": 3770 }, { "epoch": 6.49, "learning_rate": 1.5490862728431788e-07, "logits/chosen": -1.5243051052093506, "logits/rejected": -2.0351486206054688, "logps/chosen": -178.42782592773438, "logps/rejected": -340.89208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.192466735839844, "rewards/margins": 15.875884056091309, "rewards/rejected": -25.068349838256836, "step": 3771 }, { "epoch": 6.49, "learning_rate": 1.5480237994050148e-07, "logits/chosen": -1.9731943607330322, "logits/rejected": -1.9052376747131348, "logps/chosen": -162.42115783691406, "logps/rejected": -317.82012939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.214221000671387, "rewards/margins": 15.85348129272461, "rewards/rejected": -24.067703247070312, "step": 3772 }, { "epoch": 6.49, "learning_rate": 1.5469613259668508e-07, "logits/chosen": -1.8118581771850586, "logits/rejected": -1.9169931411743164, "logps/chosen": -167.70343017578125, "logps/rejected": -325.4949035644531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.889588356018066, "rewards/margins": 14.286233901977539, "rewards/rejected": -22.17582130432129, "step": 3773 }, { "epoch": 6.5, "learning_rate": 1.5458988525286868e-07, "logits/chosen": -1.849571943283081, "logits/rejected": -1.8502060174942017, "logps/chosen": -134.51303100585938, "logps/rejected": -287.9407043457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.703293800354004, "rewards/margins": 14.903234481811523, "rewards/rejected": -20.606525421142578, "step": 3774 }, { "epoch": 6.5, "learning_rate": 1.5448363790905228e-07, "logits/chosen": -1.9617786407470703, "logits/rejected": -1.771411657333374, "logps/chosen": -167.9648895263672, "logps/rejected": -352.3319091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.736564636230469, "rewards/margins": 18.241912841796875, "rewards/rejected": -25.978477478027344, "step": 3775 }, { "epoch": 6.5, "learning_rate": 1.5437739056523585e-07, "logits/chosen": -2.093564987182617, "logits/rejected": -1.9428491592407227, "logps/chosen": -157.90879821777344, "logps/rejected": -298.2532653808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.67756462097168, "rewards/margins": 15.261119842529297, "rewards/rejected": -22.938682556152344, "step": 3776 }, { "epoch": 6.5, "learning_rate": 1.5427114322141945e-07, "logits/chosen": -1.9064021110534668, "logits/rejected": -1.9466910362243652, "logps/chosen": -115.33517456054688, "logps/rejected": -252.5205535888672, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.8818511962890625, "rewards/margins": 13.637711524963379, "rewards/rejected": -18.519561767578125, "step": 3777 }, { "epoch": 6.5, "learning_rate": 1.5416489587760308e-07, "logits/chosen": -2.0944912433624268, "logits/rejected": -1.8295094966888428, "logps/chosen": -158.6456298828125, "logps/rejected": -342.59454345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.752109527587891, "rewards/margins": 20.258495330810547, "rewards/rejected": -26.010602951049805, "step": 3778 }, { "epoch": 6.5, "learning_rate": 1.5405864853378665e-07, "logits/chosen": -2.0532243251800537, "logits/rejected": -2.05312442779541, "logps/chosen": -137.56398010253906, "logps/rejected": -245.11819458007812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.065675735473633, "rewards/margins": 10.502434730529785, "rewards/rejected": -17.5681095123291, "step": 3779 }, { "epoch": 6.51, "learning_rate": 1.5395240118997025e-07, "logits/chosen": -2.1203842163085938, "logits/rejected": -1.9621245861053467, "logps/chosen": -132.418701171875, "logps/rejected": -274.368408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0964674949646, "rewards/margins": 14.033965110778809, "rewards/rejected": -20.13043212890625, "step": 3780 }, { "epoch": 6.51, "learning_rate": 1.5384615384615385e-07, "logits/chosen": -1.8105894327163696, "logits/rejected": -1.9695199728012085, "logps/chosen": -159.79116821289062, "logps/rejected": -331.23797607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.284266948699951, "rewards/margins": 16.962051391601562, "rewards/rejected": -23.24631690979004, "step": 3781 }, { "epoch": 6.51, "learning_rate": 1.5373990650233742e-07, "logits/chosen": -1.8656843900680542, "logits/rejected": -1.9218487739562988, "logps/chosen": -176.5359344482422, "logps/rejected": -332.1059265136719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.639872550964355, "rewards/margins": 15.118060111999512, "rewards/rejected": -23.757932662963867, "step": 3782 }, { "epoch": 6.51, "learning_rate": 1.5363365915852102e-07, "logits/chosen": -1.5465967655181885, "logits/rejected": -1.7396225929260254, "logps/chosen": -170.23313903808594, "logps/rejected": -326.8877868652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.33352279663086, "rewards/margins": 15.697094917297363, "rewards/rejected": -25.03061866760254, "step": 3783 }, { "epoch": 6.51, "learning_rate": 1.5352741181470464e-07, "logits/chosen": -2.061600685119629, "logits/rejected": -1.503223180770874, "logps/chosen": -186.68634033203125, "logps/rejected": -328.17626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.422419548034668, "rewards/margins": 15.437143325805664, "rewards/rejected": -24.859561920166016, "step": 3784 }, { "epoch": 6.51, "learning_rate": 1.5342116447088821e-07, "logits/chosen": -1.914982557296753, "logits/rejected": -1.7240639925003052, "logps/chosen": -152.7692413330078, "logps/rejected": -319.9366455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.583868026733398, "rewards/margins": 16.635513305664062, "rewards/rejected": -23.219383239746094, "step": 3785 }, { "epoch": 6.52, "learning_rate": 1.5331491712707181e-07, "logits/chosen": -2.002955913543701, "logits/rejected": -1.6799367666244507, "logps/chosen": -164.8629608154297, "logps/rejected": -261.24554443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.203409194946289, "rewards/margins": 12.537384033203125, "rewards/rejected": -19.740793228149414, "step": 3786 }, { "epoch": 6.52, "learning_rate": 1.532086697832554e-07, "logits/chosen": -1.4613709449768066, "logits/rejected": -1.8878529071807861, "logps/chosen": -129.9447479248047, "logps/rejected": -282.9460144042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7663469314575195, "rewards/margins": 14.647689819335938, "rewards/rejected": -20.414037704467773, "step": 3787 }, { "epoch": 6.52, "learning_rate": 1.5310242243943898e-07, "logits/chosen": -1.8102035522460938, "logits/rejected": -1.6953017711639404, "logps/chosen": -194.2067413330078, "logps/rejected": -333.4393005371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.640889167785645, "rewards/margins": 12.968334197998047, "rewards/rejected": -23.609224319458008, "step": 3788 }, { "epoch": 6.52, "learning_rate": 1.529961750956226e-07, "logits/chosen": -1.4354265928268433, "logits/rejected": -1.9568653106689453, "logps/chosen": -153.5790557861328, "logps/rejected": -314.61279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.015949726104736, "rewards/margins": 14.370976448059082, "rewards/rejected": -21.38692855834961, "step": 3789 }, { "epoch": 6.52, "learning_rate": 1.528899277518062e-07, "logits/chosen": -1.7264739274978638, "logits/rejected": -2.050029754638672, "logps/chosen": -135.4510498046875, "logps/rejected": -298.4390869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.9769392013549805, "rewards/margins": 15.430630683898926, "rewards/rejected": -21.407569885253906, "step": 3790 }, { "epoch": 6.52, "learning_rate": 1.527836804079898e-07, "logits/chosen": -1.9372166395187378, "logits/rejected": -1.5959749221801758, "logps/chosen": -167.7484130859375, "logps/rejected": -319.3196105957031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.890655994415283, "rewards/margins": 15.939277648925781, "rewards/rejected": -23.829933166503906, "step": 3791 }, { "epoch": 6.53, "learning_rate": 1.5267743306417338e-07, "logits/chosen": -1.8209211826324463, "logits/rejected": -1.811666488647461, "logps/chosen": -131.08055114746094, "logps/rejected": -287.2608642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.192259788513184, "rewards/margins": 14.601676940917969, "rewards/rejected": -21.793935775756836, "step": 3792 }, { "epoch": 6.53, "learning_rate": 1.5257118572035698e-07, "logits/chosen": -1.8441765308380127, "logits/rejected": -1.5471670627593994, "logps/chosen": -133.30581665039062, "logps/rejected": -239.77291870117188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4320573806762695, "rewards/margins": 14.011923789978027, "rewards/rejected": -17.443981170654297, "step": 3793 }, { "epoch": 6.53, "learning_rate": 1.524649383765406e-07, "logits/chosen": -1.9503557682037354, "logits/rejected": -1.3960857391357422, "logps/chosen": -168.358642578125, "logps/rejected": -291.09503173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.099347114562988, "rewards/margins": 13.92347526550293, "rewards/rejected": -21.0228214263916, "step": 3794 }, { "epoch": 6.53, "learning_rate": 1.5235869103272418e-07, "logits/chosen": -1.7556912899017334, "logits/rejected": -1.8553842306137085, "logps/chosen": -150.59429931640625, "logps/rejected": -313.9855651855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.208643913269043, "rewards/margins": 15.395991325378418, "rewards/rejected": -22.60463523864746, "step": 3795 }, { "epoch": 6.53, "learning_rate": 1.5225244368890778e-07, "logits/chosen": -1.4757734537124634, "logits/rejected": -2.008718252182007, "logps/chosen": -166.8770751953125, "logps/rejected": -331.6300964355469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.947242736816406, "rewards/margins": 15.843435287475586, "rewards/rejected": -22.790678024291992, "step": 3796 }, { "epoch": 6.54, "learning_rate": 1.5214619634509137e-07, "logits/chosen": -1.9754595756530762, "logits/rejected": -1.935562014579773, "logps/chosen": -151.1075439453125, "logps/rejected": -254.46536254882812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.508789539337158, "rewards/margins": 11.967873573303223, "rewards/rejected": -18.476661682128906, "step": 3797 }, { "epoch": 6.54, "learning_rate": 1.5203994900127495e-07, "logits/chosen": -1.884942889213562, "logits/rejected": -2.016842842102051, "logps/chosen": -149.6968994140625, "logps/rejected": -323.408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.1357035636901855, "rewards/margins": 15.66689682006836, "rewards/rejected": -21.802600860595703, "step": 3798 }, { "epoch": 6.54, "learning_rate": 1.5193370165745855e-07, "logits/chosen": -1.7900491952896118, "logits/rejected": -1.4842268228530884, "logps/chosen": -149.974609375, "logps/rejected": -350.9758605957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.196859836578369, "rewards/margins": 19.42129898071289, "rewards/rejected": -25.6181583404541, "step": 3799 }, { "epoch": 6.54, "learning_rate": 1.5182745431364217e-07, "logits/chosen": -1.6968936920166016, "logits/rejected": -1.921439290046692, "logps/chosen": -189.88336181640625, "logps/rejected": -325.83306884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.281941413879395, "rewards/margins": 14.091196060180664, "rewards/rejected": -23.373138427734375, "step": 3800 }, { "epoch": 6.54, "learning_rate": 1.5172120696982574e-07, "logits/chosen": -1.722409963607788, "logits/rejected": -1.768988847732544, "logps/chosen": -133.88916015625, "logps/rejected": -285.29339599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.767608642578125, "rewards/margins": 15.207161903381348, "rewards/rejected": -19.97477149963379, "step": 3801 }, { "epoch": 6.54, "learning_rate": 1.5161495962600934e-07, "logits/chosen": -1.7395063638687134, "logits/rejected": -1.9645776748657227, "logps/chosen": -103.19235229492188, "logps/rejected": -264.2242431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.797778606414795, "rewards/margins": 15.133052825927734, "rewards/rejected": -19.930831909179688, "step": 3802 }, { "epoch": 6.55, "learning_rate": 1.5150871228219294e-07, "logits/chosen": -1.861077070236206, "logits/rejected": -1.828937292098999, "logps/chosen": -200.8246307373047, "logps/rejected": -324.8077087402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.080190658569336, "rewards/margins": 12.034127235412598, "rewards/rejected": -22.11431884765625, "step": 3803 }, { "epoch": 6.55, "learning_rate": 1.5140246493837651e-07, "logits/chosen": -1.9033334255218506, "logits/rejected": -1.8979898691177368, "logps/chosen": -162.11434936523438, "logps/rejected": -315.08050537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.431461334228516, "rewards/margins": 14.657166481018066, "rewards/rejected": -22.0886287689209, "step": 3804 }, { "epoch": 6.55, "learning_rate": 1.5129621759456014e-07, "logits/chosen": -1.9304277896881104, "logits/rejected": -1.8108464479446411, "logps/chosen": -139.38619995117188, "logps/rejected": -317.84405517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.546285152435303, "rewards/margins": 18.56743049621582, "rewards/rejected": -24.11371612548828, "step": 3805 }, { "epoch": 6.55, "learning_rate": 1.5118997025074374e-07, "logits/chosen": -1.7209341526031494, "logits/rejected": -1.8141944408416748, "logps/chosen": -165.1376190185547, "logps/rejected": -310.60498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.947957038879395, "rewards/margins": 13.255471229553223, "rewards/rejected": -22.20343017578125, "step": 3806 }, { "epoch": 6.55, "learning_rate": 1.510837229069273e-07, "logits/chosen": -2.038724660873413, "logits/rejected": -1.5267030000686646, "logps/chosen": -177.2650146484375, "logps/rejected": -270.64117431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.7786545753479, "rewards/margins": 11.165812492370605, "rewards/rejected": -18.944467544555664, "step": 3807 }, { "epoch": 6.55, "learning_rate": 1.509774755631109e-07, "logits/chosen": -1.8824892044067383, "logits/rejected": -1.9651012420654297, "logps/chosen": -140.28872680664062, "logps/rejected": -290.43804931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.38153076171875, "rewards/margins": 14.574647903442383, "rewards/rejected": -19.956178665161133, "step": 3808 }, { "epoch": 6.56, "learning_rate": 1.508712282192945e-07, "logits/chosen": -1.8199992179870605, "logits/rejected": -1.9177031517028809, "logps/chosen": -141.2329864501953, "logps/rejected": -284.76995849609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.236767768859863, "rewards/margins": 13.389249801635742, "rewards/rejected": -20.626018524169922, "step": 3809 }, { "epoch": 6.56, "learning_rate": 1.5076498087547808e-07, "logits/chosen": -1.8783831596374512, "logits/rejected": -1.784337043762207, "logps/chosen": -156.9212188720703, "logps/rejected": -287.53179931640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.070052146911621, "rewards/margins": 13.59318733215332, "rewards/rejected": -22.663238525390625, "step": 3810 }, { "epoch": 6.56, "learning_rate": 1.506587335316617e-07, "logits/chosen": -1.74385666847229, "logits/rejected": -1.438474416732788, "logps/chosen": -153.55055236816406, "logps/rejected": -279.255615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.027122497558594, "rewards/margins": 13.466155052185059, "rewards/rejected": -20.49327850341797, "step": 3811 }, { "epoch": 6.56, "learning_rate": 1.505524861878453e-07, "logits/chosen": -1.6185357570648193, "logits/rejected": -1.9092882871627808, "logps/chosen": -163.55226135253906, "logps/rejected": -319.29052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.493307113647461, "rewards/margins": 14.66657543182373, "rewards/rejected": -22.159881591796875, "step": 3812 }, { "epoch": 6.56, "learning_rate": 1.504462388440289e-07, "logits/chosen": -1.9717481136322021, "logits/rejected": -1.7497153282165527, "logps/chosen": -139.6015625, "logps/rejected": -315.3453369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.541781425476074, "rewards/margins": 17.81764793395996, "rewards/rejected": -22.35942840576172, "step": 3813 }, { "epoch": 6.56, "learning_rate": 1.5033999150021248e-07, "logits/chosen": -1.879584789276123, "logits/rejected": -1.8609304428100586, "logps/chosen": -165.6461181640625, "logps/rejected": -278.1844482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.437723159790039, "rewards/margins": 11.943446159362793, "rewards/rejected": -19.381168365478516, "step": 3814 }, { "epoch": 6.57, "learning_rate": 1.5023374415639608e-07, "logits/chosen": -1.9656747579574585, "logits/rejected": -1.9023014307022095, "logps/chosen": -119.46099853515625, "logps/rejected": -294.8580017089844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.001336574554443, "rewards/margins": 17.367454528808594, "rewards/rejected": -21.368791580200195, "step": 3815 }, { "epoch": 6.57, "learning_rate": 1.501274968125797e-07, "logits/chosen": -1.4886014461517334, "logits/rejected": -1.9131855964660645, "logps/chosen": -133.8804931640625, "logps/rejected": -337.82220458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.399298667907715, "rewards/margins": 18.163230895996094, "rewards/rejected": -23.562530517578125, "step": 3816 }, { "epoch": 6.57, "learning_rate": 1.5002124946876327e-07, "logits/chosen": -1.8126945495605469, "logits/rejected": -2.0989577770233154, "logps/chosen": -146.18951416015625, "logps/rejected": -291.686767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.732566833496094, "rewards/margins": 14.026894569396973, "rewards/rejected": -21.75946044921875, "step": 3817 }, { "epoch": 6.57, "learning_rate": 1.4991500212494687e-07, "logits/chosen": -2.0057787895202637, "logits/rejected": -1.9946002960205078, "logps/chosen": -157.08702087402344, "logps/rejected": -350.32745361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.557821273803711, "rewards/margins": 17.895660400390625, "rewards/rejected": -24.453481674194336, "step": 3818 }, { "epoch": 6.57, "learning_rate": 1.4980875478113047e-07, "logits/chosen": -1.9159038066864014, "logits/rejected": -2.098120927810669, "logps/chosen": -180.53369140625, "logps/rejected": -339.80059814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.964316368103027, "rewards/margins": 15.06649112701416, "rewards/rejected": -23.030805587768555, "step": 3819 }, { "epoch": 6.57, "learning_rate": 1.4970250743731404e-07, "logits/chosen": -1.9936838150024414, "logits/rejected": -1.935096025466919, "logps/chosen": -97.64385986328125, "logps/rejected": -244.85171508789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4345755577087402, "rewards/margins": 14.84443473815918, "rewards/rejected": -17.279010772705078, "step": 3820 }, { "epoch": 6.58, "learning_rate": 1.4959626009349767e-07, "logits/chosen": -1.5438671112060547, "logits/rejected": -2.038106918334961, "logps/chosen": -154.78146362304688, "logps/rejected": -297.59014892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.566131114959717, "rewards/margins": 14.54214096069336, "rewards/rejected": -21.108272552490234, "step": 3821 }, { "epoch": 6.58, "learning_rate": 1.4949001274968127e-07, "logits/chosen": -1.5955356359481812, "logits/rejected": -1.9080009460449219, "logps/chosen": -127.53668975830078, "logps/rejected": -284.4786071777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.451140880584717, "rewards/margins": 14.524984359741211, "rewards/rejected": -19.976125717163086, "step": 3822 }, { "epoch": 6.58, "learning_rate": 1.4938376540586484e-07, "logits/chosen": -1.7953016757965088, "logits/rejected": -2.1028389930725098, "logps/chosen": -155.5535430908203, "logps/rejected": -301.3667297363281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.319034576416016, "rewards/margins": 14.653451919555664, "rewards/rejected": -21.972484588623047, "step": 3823 }, { "epoch": 6.58, "learning_rate": 1.4927751806204844e-07, "logits/chosen": -1.7458653450012207, "logits/rejected": -1.7193481922149658, "logps/chosen": -174.84617614746094, "logps/rejected": -315.87823486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.755584716796875, "rewards/margins": 14.365636825561523, "rewards/rejected": -23.1212215423584, "step": 3824 }, { "epoch": 6.58, "learning_rate": 1.4917127071823204e-07, "logits/chosen": -1.852701187133789, "logits/rejected": -2.0834851264953613, "logps/chosen": -178.4456787109375, "logps/rejected": -315.49383544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.972834587097168, "rewards/margins": 13.379842758178711, "rewards/rejected": -20.352676391601562, "step": 3825 }, { "epoch": 6.59, "learning_rate": 1.490650233744156e-07, "logits/chosen": -1.8586831092834473, "logits/rejected": -1.7749203443527222, "logps/chosen": -139.61172485351562, "logps/rejected": -259.8647766113281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.9873857498168945, "rewards/margins": 12.696394920349121, "rewards/rejected": -19.683780670166016, "step": 3826 }, { "epoch": 6.59, "learning_rate": 1.4895877603059924e-07, "logits/chosen": -1.8478344678878784, "logits/rejected": -1.7720023393630981, "logps/chosen": -192.45889282226562, "logps/rejected": -324.064208984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.880518913269043, "rewards/margins": 14.990927696228027, "rewards/rejected": -23.87144660949707, "step": 3827 }, { "epoch": 6.59, "learning_rate": 1.4885252868678283e-07, "logits/chosen": -1.8137662410736084, "logits/rejected": -1.952340841293335, "logps/chosen": -155.63290405273438, "logps/rejected": -304.68951416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.779342651367188, "rewards/margins": 14.306114196777344, "rewards/rejected": -23.08545684814453, "step": 3828 }, { "epoch": 6.59, "learning_rate": 1.487462813429664e-07, "logits/chosen": -1.802453637123108, "logits/rejected": -1.7701303958892822, "logps/chosen": -146.00396728515625, "logps/rejected": -320.8516540527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.466030120849609, "rewards/margins": 16.26795196533203, "rewards/rejected": -22.733983993530273, "step": 3829 }, { "epoch": 6.59, "learning_rate": 1.4864003399915e-07, "logits/chosen": -1.7195441722869873, "logits/rejected": -1.8649965524673462, "logps/chosen": -97.84548950195312, "logps/rejected": -277.3459777832031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3475475311279297, "rewards/margins": 17.48281478881836, "rewards/rejected": -20.830360412597656, "step": 3830 }, { "epoch": 6.59, "learning_rate": 1.485337866553336e-07, "logits/chosen": -1.758752465248108, "logits/rejected": -1.8673155307769775, "logps/chosen": -195.27081298828125, "logps/rejected": -321.92510986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.272406578063965, "rewards/margins": 13.459346771240234, "rewards/rejected": -23.731754302978516, "step": 3831 }, { "epoch": 6.6, "learning_rate": 1.4842753931151723e-07, "logits/chosen": -1.8716630935668945, "logits/rejected": -2.016094207763672, "logps/chosen": -173.44078063964844, "logps/rejected": -308.6259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.098198890686035, "rewards/margins": 13.698602676391602, "rewards/rejected": -21.79680061340332, "step": 3832 }, { "epoch": 6.6, "learning_rate": 1.483212919677008e-07, "logits/chosen": -1.8197908401489258, "logits/rejected": -1.830101728439331, "logps/chosen": -201.501220703125, "logps/rejected": -357.17950439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.975046157836914, "rewards/margins": 14.449053764343262, "rewards/rejected": -24.42409896850586, "step": 3833 }, { "epoch": 6.6, "learning_rate": 1.482150446238844e-07, "logits/chosen": -1.7067978382110596, "logits/rejected": -1.8339695930480957, "logps/chosen": -120.78175354003906, "logps/rejected": -300.01800537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.072377681732178, "rewards/margins": 16.5180606842041, "rewards/rejected": -20.590438842773438, "step": 3834 }, { "epoch": 6.6, "learning_rate": 1.48108797280068e-07, "logits/chosen": -1.9400138854980469, "logits/rejected": -1.6939163208007812, "logps/chosen": -175.1653289794922, "logps/rejected": -352.741943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.56118631362915, "rewards/margins": 18.966815948486328, "rewards/rejected": -26.52800178527832, "step": 3835 }, { "epoch": 6.6, "learning_rate": 1.4800254993625157e-07, "logits/chosen": -1.5450026988983154, "logits/rejected": -1.7552803754806519, "logps/chosen": -123.30553436279297, "logps/rejected": -307.6388244628906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.183900356292725, "rewards/margins": 15.693159103393555, "rewards/rejected": -20.877059936523438, "step": 3836 }, { "epoch": 6.6, "learning_rate": 1.4789630259243517e-07, "logits/chosen": -1.7800686359405518, "logits/rejected": -1.7451331615447998, "logps/chosen": -167.3730926513672, "logps/rejected": -297.4588317871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.527043342590332, "rewards/margins": 14.183525085449219, "rewards/rejected": -22.710569381713867, "step": 3837 }, { "epoch": 6.61, "learning_rate": 1.477900552486188e-07, "logits/chosen": -1.45075523853302, "logits/rejected": -1.9770243167877197, "logps/chosen": -146.00498962402344, "logps/rejected": -302.4970397949219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.66406774520874, "rewards/margins": 14.162450790405273, "rewards/rejected": -21.82651710510254, "step": 3838 }, { "epoch": 6.61, "learning_rate": 1.4768380790480237e-07, "logits/chosen": -1.8149555921554565, "logits/rejected": -1.8910212516784668, "logps/chosen": -136.47720336914062, "logps/rejected": -314.3356628417969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.26377534866333, "rewards/margins": 17.970966339111328, "rewards/rejected": -23.2347412109375, "step": 3839 }, { "epoch": 6.61, "learning_rate": 1.4757756056098597e-07, "logits/chosen": -1.7670084238052368, "logits/rejected": -1.8226028680801392, "logps/chosen": -150.28558349609375, "logps/rejected": -298.6530456542969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.139233589172363, "rewards/margins": 14.81197738647461, "rewards/rejected": -20.95121192932129, "step": 3840 }, { "epoch": 6.61, "learning_rate": 1.4747131321716957e-07, "logits/chosen": -1.9387086629867554, "logits/rejected": -1.8846938610076904, "logps/chosen": -147.23670959472656, "logps/rejected": -312.3927001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.683199882507324, "rewards/margins": 17.153987884521484, "rewards/rejected": -23.837186813354492, "step": 3841 }, { "epoch": 6.61, "learning_rate": 1.4736506587335314e-07, "logits/chosen": -1.749151587486267, "logits/rejected": -1.9084240198135376, "logps/chosen": -172.513671875, "logps/rejected": -315.57281494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.630707740783691, "rewards/margins": 13.415594100952148, "rewards/rejected": -21.046302795410156, "step": 3842 }, { "epoch": 6.61, "learning_rate": 1.4725881852953677e-07, "logits/chosen": -1.9925341606140137, "logits/rejected": -1.8240209817886353, "logps/chosen": -96.34687042236328, "logps/rejected": -248.44668579101562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.44728946685791, "rewards/margins": 15.821486473083496, "rewards/rejected": -20.268775939941406, "step": 3843 }, { "epoch": 6.62, "learning_rate": 1.4715257118572036e-07, "logits/chosen": -2.116837978363037, "logits/rejected": -1.8747068643569946, "logps/chosen": -110.85200500488281, "logps/rejected": -296.73114013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4377477169036865, "rewards/margins": 19.522014617919922, "rewards/rejected": -22.959762573242188, "step": 3844 }, { "epoch": 6.62, "learning_rate": 1.4704632384190394e-07, "logits/chosen": -1.762368083000183, "logits/rejected": -1.7837753295898438, "logps/chosen": -125.0169677734375, "logps/rejected": -298.0060119628906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.425931930541992, "rewards/margins": 15.825798034667969, "rewards/rejected": -21.251731872558594, "step": 3845 }, { "epoch": 6.62, "learning_rate": 1.4694007649808754e-07, "logits/chosen": -1.8016456365585327, "logits/rejected": -1.6738836765289307, "logps/chosen": -126.88420104980469, "logps/rejected": -265.7961120605469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.26544189453125, "rewards/margins": 14.219825744628906, "rewards/rejected": -19.485267639160156, "step": 3846 }, { "epoch": 6.62, "learning_rate": 1.4683382915427113e-07, "logits/chosen": -1.6684097051620483, "logits/rejected": -2.0433173179626465, "logps/chosen": -141.27528381347656, "logps/rejected": -319.30255126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.557873249053955, "rewards/margins": 17.25116539001465, "rewards/rejected": -23.809040069580078, "step": 3847 }, { "epoch": 6.62, "learning_rate": 1.467275818104547e-07, "logits/chosen": -1.9875452518463135, "logits/rejected": -1.7658405303955078, "logps/chosen": -175.7563934326172, "logps/rejected": -315.2546691894531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.16140079498291, "rewards/margins": 15.312751770019531, "rewards/rejected": -22.474151611328125, "step": 3848 }, { "epoch": 6.62, "learning_rate": 1.4662133446663833e-07, "logits/chosen": -2.0002052783966064, "logits/rejected": -1.6792386770248413, "logps/chosen": -119.57234954833984, "logps/rejected": -259.65240478515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.498401165008545, "rewards/margins": 14.464312553405762, "rewards/rejected": -17.96271324157715, "step": 3849 }, { "epoch": 6.63, "learning_rate": 1.4651508712282193e-07, "logits/chosen": -1.6399246454238892, "logits/rejected": -1.7380897998809814, "logps/chosen": -97.95860290527344, "logps/rejected": -279.5908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.541594982147217, "rewards/margins": 16.813230514526367, "rewards/rejected": -21.354825973510742, "step": 3850 }, { "epoch": 6.63, "learning_rate": 1.464088397790055e-07, "logits/chosen": -1.6903393268585205, "logits/rejected": -1.9257748126983643, "logps/chosen": -166.7381591796875, "logps/rejected": -308.97613525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.366775989532471, "rewards/margins": 14.15093994140625, "rewards/rejected": -21.517715454101562, "step": 3851 }, { "epoch": 6.63, "learning_rate": 1.463025924351891e-07, "logits/chosen": -1.5474629402160645, "logits/rejected": -1.9831901788711548, "logps/chosen": -180.46456909179688, "logps/rejected": -290.8027648925781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.946003913879395, "rewards/margins": 11.149718284606934, "rewards/rejected": -20.095722198486328, "step": 3852 }, { "epoch": 6.63, "learning_rate": 1.461963450913727e-07, "logits/chosen": -1.758757472038269, "logits/rejected": -1.842761516571045, "logps/chosen": -169.3289031982422, "logps/rejected": -366.7631530761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.400404930114746, "rewards/margins": 18.038082122802734, "rewards/rejected": -26.438488006591797, "step": 3853 }, { "epoch": 6.63, "learning_rate": 1.4609009774755633e-07, "logits/chosen": -1.7028874158859253, "logits/rejected": -1.8587831258773804, "logps/chosen": -137.27981567382812, "logps/rejected": -304.4716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.403285980224609, "rewards/margins": 16.60853385925293, "rewards/rejected": -23.01181983947754, "step": 3854 }, { "epoch": 6.64, "learning_rate": 1.459838504037399e-07, "logits/chosen": -1.6633737087249756, "logits/rejected": -1.9362831115722656, "logps/chosen": -161.4290008544922, "logps/rejected": -308.39813232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.728602409362793, "rewards/margins": 13.839256286621094, "rewards/rejected": -21.56785774230957, "step": 3855 }, { "epoch": 6.64, "learning_rate": 1.458776030599235e-07, "logits/chosen": -1.9422178268432617, "logits/rejected": -1.8481101989746094, "logps/chosen": -158.91372680664062, "logps/rejected": -290.5239562988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.297013759613037, "rewards/margins": 14.196611404418945, "rewards/rejected": -20.49362564086914, "step": 3856 }, { "epoch": 6.64, "learning_rate": 1.457713557161071e-07, "logits/chosen": -1.5259861946105957, "logits/rejected": -1.8617932796478271, "logps/chosen": -144.82315063476562, "logps/rejected": -347.5381164550781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.411073684692383, "rewards/margins": 16.705432891845703, "rewards/rejected": -24.11650848388672, "step": 3857 }, { "epoch": 6.64, "learning_rate": 1.4566510837229067e-07, "logits/chosen": -1.770727515220642, "logits/rejected": -1.5360438823699951, "logps/chosen": -184.3271942138672, "logps/rejected": -319.7657165527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.918527603149414, "rewards/margins": 14.664443969726562, "rewards/rejected": -23.582971572875977, "step": 3858 }, { "epoch": 6.64, "learning_rate": 1.455588610284743e-07, "logits/chosen": -1.8267371654510498, "logits/rejected": -1.956864595413208, "logps/chosen": -144.36277770996094, "logps/rejected": -317.16302490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.878533840179443, "rewards/margins": 15.836494445800781, "rewards/rejected": -22.71502685546875, "step": 3859 }, { "epoch": 6.64, "learning_rate": 1.454526136846579e-07, "logits/chosen": -1.8741421699523926, "logits/rejected": -1.3068482875823975, "logps/chosen": -200.0347137451172, "logps/rejected": -296.5461120605469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.918240547180176, "rewards/margins": 11.45672607421875, "rewards/rejected": -21.374967575073242, "step": 3860 }, { "epoch": 6.65, "learning_rate": 1.4534636634084147e-07, "logits/chosen": -1.7919591665267944, "logits/rejected": -1.6312600374221802, "logps/chosen": -136.39019775390625, "logps/rejected": -275.0960693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.243191242218018, "rewards/margins": 15.537769317626953, "rewards/rejected": -20.780960083007812, "step": 3861 }, { "epoch": 6.65, "learning_rate": 1.4524011899702507e-07, "logits/chosen": -1.9157036542892456, "logits/rejected": -1.9621527194976807, "logps/chosen": -130.58883666992188, "logps/rejected": -310.6326904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.401886463165283, "rewards/margins": 17.200416564941406, "rewards/rejected": -23.602306365966797, "step": 3862 }, { "epoch": 6.65, "learning_rate": 1.4513387165320866e-07, "logits/chosen": -1.8131670951843262, "logits/rejected": -1.798876166343689, "logps/chosen": -179.84030151367188, "logps/rejected": -364.7511901855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.529701232910156, "rewards/margins": 17.980249404907227, "rewards/rejected": -27.509950637817383, "step": 3863 }, { "epoch": 6.65, "learning_rate": 1.4502762430939224e-07, "logits/chosen": -1.9312891960144043, "logits/rejected": -1.8809651136398315, "logps/chosen": -127.590087890625, "logps/rejected": -281.9412536621094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4121599197387695, "rewards/margins": 15.500687599182129, "rewards/rejected": -20.9128475189209, "step": 3864 }, { "epoch": 6.65, "learning_rate": 1.4492137696557586e-07, "logits/chosen": -1.8107566833496094, "logits/rejected": -1.6854047775268555, "logps/chosen": -115.2686538696289, "logps/rejected": -228.6651611328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.1386213302612305, "rewards/margins": 12.104655265808105, "rewards/rejected": -16.24327850341797, "step": 3865 }, { "epoch": 6.65, "learning_rate": 1.4481512962175946e-07, "logits/chosen": -1.8177733421325684, "logits/rejected": -1.8402800559997559, "logps/chosen": -167.79025268554688, "logps/rejected": -286.9315185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.968788146972656, "rewards/margins": 11.864654541015625, "rewards/rejected": -20.83344268798828, "step": 3866 }, { "epoch": 6.66, "learning_rate": 1.4470888227794303e-07, "logits/chosen": -1.9031254053115845, "logits/rejected": -1.9080255031585693, "logps/chosen": -155.25164794921875, "logps/rejected": -335.6918029785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.497346878051758, "rewards/margins": 16.10181427001953, "rewards/rejected": -24.59916114807129, "step": 3867 }, { "epoch": 6.66, "learning_rate": 1.4460263493412663e-07, "logits/chosen": -1.607987403869629, "logits/rejected": -1.904771089553833, "logps/chosen": -165.40554809570312, "logps/rejected": -305.74603271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.100330352783203, "rewards/margins": 13.923781394958496, "rewards/rejected": -22.024112701416016, "step": 3868 }, { "epoch": 6.66, "learning_rate": 1.4449638759031023e-07, "logits/chosen": -1.901336669921875, "logits/rejected": -1.7828816175460815, "logps/chosen": -183.20220947265625, "logps/rejected": -326.0805358886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.34304428100586, "rewards/margins": 15.187774658203125, "rewards/rejected": -24.530820846557617, "step": 3869 }, { "epoch": 6.66, "learning_rate": 1.4439014024649383e-07, "logits/chosen": -1.741668462753296, "logits/rejected": -1.7202966213226318, "logps/chosen": -153.97933959960938, "logps/rejected": -307.8394775390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.631204605102539, "rewards/margins": 13.94774341583252, "rewards/rejected": -22.578948974609375, "step": 3870 }, { "epoch": 6.66, "learning_rate": 1.4428389290267743e-07, "logits/chosen": -1.9541616439819336, "logits/rejected": -1.7442777156829834, "logps/chosen": -165.31907653808594, "logps/rejected": -314.23406982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.582402229309082, "rewards/margins": 15.284031867980957, "rewards/rejected": -22.86643409729004, "step": 3871 }, { "epoch": 6.66, "learning_rate": 1.4417764555886103e-07, "logits/chosen": -1.9547522068023682, "logits/rejected": -1.831868052482605, "logps/chosen": -149.38555908203125, "logps/rejected": -303.4208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.498334884643555, "rewards/margins": 15.308135986328125, "rewards/rejected": -20.80647087097168, "step": 3872 }, { "epoch": 6.67, "learning_rate": 1.440713982150446e-07, "logits/chosen": -1.7125847339630127, "logits/rejected": -2.086162805557251, "logps/chosen": -141.62722778320312, "logps/rejected": -296.44287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.445432186126709, "rewards/margins": 14.905811309814453, "rewards/rejected": -20.351242065429688, "step": 3873 }, { "epoch": 6.67, "learning_rate": 1.439651508712282e-07, "logits/chosen": -2.1025094985961914, "logits/rejected": -1.9586795568466187, "logps/chosen": -132.365966796875, "logps/rejected": -272.9750061035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.1629815101623535, "rewards/margins": 14.685099601745605, "rewards/rejected": -18.84808349609375, "step": 3874 }, { "epoch": 6.67, "learning_rate": 1.4385890352741182e-07, "logits/chosen": -1.7102131843566895, "logits/rejected": -1.77226984500885, "logps/chosen": -125.40813446044922, "logps/rejected": -271.7608642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.781110763549805, "rewards/margins": 13.430341720581055, "rewards/rejected": -19.21145248413086, "step": 3875 }, { "epoch": 6.67, "learning_rate": 1.4375265618359542e-07, "logits/chosen": -1.8189566135406494, "logits/rejected": -1.805132508277893, "logps/chosen": -107.25312805175781, "logps/rejected": -271.96441650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.81296443939209, "rewards/margins": 16.40359878540039, "rewards/rejected": -19.216562271118164, "step": 3876 }, { "epoch": 6.67, "learning_rate": 1.43646408839779e-07, "logits/chosen": -1.9743835926055908, "logits/rejected": -1.6003656387329102, "logps/chosen": -189.7769775390625, "logps/rejected": -288.8812561035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.888002395629883, "rewards/margins": 10.94931411743164, "rewards/rejected": -19.837316513061523, "step": 3877 }, { "epoch": 6.67, "learning_rate": 1.435401614959626e-07, "logits/chosen": -1.3727822303771973, "logits/rejected": -1.8679587841033936, "logps/chosen": -193.84149169921875, "logps/rejected": -363.1521301269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.202563285827637, "rewards/margins": 14.607623100280762, "rewards/rejected": -26.8101863861084, "step": 3878 }, { "epoch": 6.68, "learning_rate": 1.434339141521462e-07, "logits/chosen": -1.625157356262207, "logits/rejected": -1.9421954154968262, "logps/chosen": -164.88873291015625, "logps/rejected": -299.48822021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.510619163513184, "rewards/margins": 10.900766372680664, "rewards/rejected": -19.41138458251953, "step": 3879 }, { "epoch": 6.68, "learning_rate": 1.4332766680832977e-07, "logits/chosen": -1.688284158706665, "logits/rejected": -1.8229031562805176, "logps/chosen": -122.3329849243164, "logps/rejected": -316.31683349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.502161979675293, "rewards/margins": 17.642742156982422, "rewards/rejected": -22.1449031829834, "step": 3880 }, { "epoch": 6.68, "learning_rate": 1.432214194645134e-07, "logits/chosen": -1.970546841621399, "logits/rejected": -2.021383762359619, "logps/chosen": -157.02371215820312, "logps/rejected": -325.8288879394531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.658108711242676, "rewards/margins": 16.1306095123291, "rewards/rejected": -22.78871726989746, "step": 3881 }, { "epoch": 6.68, "learning_rate": 1.43115172120697e-07, "logits/chosen": -2.040764331817627, "logits/rejected": -1.5298757553100586, "logps/chosen": -163.0901641845703, "logps/rejected": -285.48876953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.95510196685791, "rewards/margins": 16.32648468017578, "rewards/rejected": -21.281587600708008, "step": 3882 }, { "epoch": 6.68, "learning_rate": 1.4300892477688056e-07, "logits/chosen": -1.5308881998062134, "logits/rejected": -2.0627551078796387, "logps/chosen": -147.96255493164062, "logps/rejected": -314.0545654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.6941752433776855, "rewards/margins": 14.794303894042969, "rewards/rejected": -22.488481521606445, "step": 3883 }, { "epoch": 6.69, "learning_rate": 1.4290267743306416e-07, "logits/chosen": -1.7555153369903564, "logits/rejected": -1.9397807121276855, "logps/chosen": -198.93255615234375, "logps/rejected": -309.8221130371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.387046813964844, "rewards/margins": 10.933698654174805, "rewards/rejected": -21.32074546813965, "step": 3884 }, { "epoch": 6.69, "learning_rate": 1.4279643008924776e-07, "logits/chosen": -2.0489184856414795, "logits/rejected": -1.627936601638794, "logps/chosen": -178.08045959472656, "logps/rejected": -353.05517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.59736156463623, "rewards/margins": 18.556676864624023, "rewards/rejected": -27.154037475585938, "step": 3885 }, { "epoch": 6.69, "learning_rate": 1.4269018274543136e-07, "logits/chosen": -2.0198025703430176, "logits/rejected": -1.6694765090942383, "logps/chosen": -140.621337890625, "logps/rejected": -295.6834411621094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.830695629119873, "rewards/margins": 17.080642700195312, "rewards/rejected": -21.911340713500977, "step": 3886 }, { "epoch": 6.69, "learning_rate": 1.4258393540161496e-07, "logits/chosen": -1.7587676048278809, "logits/rejected": -1.9904944896697998, "logps/chosen": -167.88612365722656, "logps/rejected": -317.3594970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.867382049560547, "rewards/margins": 13.512378692626953, "rewards/rejected": -22.3797607421875, "step": 3887 }, { "epoch": 6.69, "learning_rate": 1.4247768805779856e-07, "logits/chosen": -1.993733286857605, "logits/rejected": -2.0249807834625244, "logps/chosen": -169.8813018798828, "logps/rejected": -312.0744323730469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.041853904724121, "rewards/margins": 15.11397933959961, "rewards/rejected": -22.155834197998047, "step": 3888 }, { "epoch": 6.69, "learning_rate": 1.4237144071398213e-07, "logits/chosen": -2.001115322113037, "logits/rejected": -1.599130630493164, "logps/chosen": -163.054443359375, "logps/rejected": -290.2981262207031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.1412763595581055, "rewards/margins": 14.456231117248535, "rewards/rejected": -21.59750747680664, "step": 3889 }, { "epoch": 6.7, "learning_rate": 1.4226519337016573e-07, "logits/chosen": -1.7779521942138672, "logits/rejected": -1.9354339838027954, "logps/chosen": -150.4872589111328, "logps/rejected": -314.59259033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.981928825378418, "rewards/margins": 16.21014976501465, "rewards/rejected": -23.192079544067383, "step": 3890 }, { "epoch": 6.7, "learning_rate": 1.4215894602634935e-07, "logits/chosen": -1.7456839084625244, "logits/rejected": -1.7318289279937744, "logps/chosen": -162.001220703125, "logps/rejected": -352.721923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.58901309967041, "rewards/margins": 19.804840087890625, "rewards/rejected": -27.39385414123535, "step": 3891 }, { "epoch": 6.7, "learning_rate": 1.4205269868253293e-07, "logits/chosen": -2.043318748474121, "logits/rejected": -2.020127296447754, "logps/chosen": -149.4325714111328, "logps/rejected": -343.587890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.220563888549805, "rewards/margins": 17.644710540771484, "rewards/rejected": -23.86527442932129, "step": 3892 }, { "epoch": 6.7, "learning_rate": 1.4194645133871653e-07, "logits/chosen": -2.1087982654571533, "logits/rejected": -1.8198268413543701, "logps/chosen": -164.15975952148438, "logps/rejected": -362.539306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.167220115661621, "rewards/margins": 21.46407699584961, "rewards/rejected": -27.63129425048828, "step": 3893 }, { "epoch": 6.7, "learning_rate": 1.4184020399490012e-07, "logits/chosen": -1.690611720085144, "logits/rejected": -1.9321454763412476, "logps/chosen": -133.70013427734375, "logps/rejected": -341.5626220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.82487678527832, "rewards/margins": 20.09729766845703, "rewards/rejected": -25.922176361083984, "step": 3894 }, { "epoch": 6.7, "learning_rate": 1.4173395665108372e-07, "logits/chosen": -1.745054841041565, "logits/rejected": -1.8295001983642578, "logps/chosen": -157.2815704345703, "logps/rejected": -312.386962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.793981075286865, "rewards/margins": 16.43475341796875, "rewards/rejected": -24.228736877441406, "step": 3895 }, { "epoch": 6.71, "learning_rate": 1.416277093072673e-07, "logits/chosen": -2.045579195022583, "logits/rejected": -1.7861813306808472, "logps/chosen": -135.232177734375, "logps/rejected": -266.6810302734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.306760787963867, "rewards/margins": 14.564558029174805, "rewards/rejected": -18.871318817138672, "step": 3896 }, { "epoch": 6.71, "learning_rate": 1.4152146196345092e-07, "logits/chosen": -1.6900792121887207, "logits/rejected": -1.9257330894470215, "logps/chosen": -176.99899291992188, "logps/rejected": -341.7073974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.13650894165039, "rewards/margins": 15.279373168945312, "rewards/rejected": -24.415882110595703, "step": 3897 }, { "epoch": 6.71, "learning_rate": 1.4141521461963452e-07, "logits/chosen": -1.7058627605438232, "logits/rejected": -1.6412144899368286, "logps/chosen": -134.7718048095703, "logps/rejected": -289.72564697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.145112991333008, "rewards/margins": 16.49690818786621, "rewards/rejected": -21.64202308654785, "step": 3898 }, { "epoch": 6.71, "learning_rate": 1.413089672758181e-07, "logits/chosen": -1.4190430641174316, "logits/rejected": -1.9552757740020752, "logps/chosen": -150.1043701171875, "logps/rejected": -438.37493896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.433082580566406, "rewards/margins": 25.013927459716797, "rewards/rejected": -32.4470100402832, "step": 3899 }, { "epoch": 6.71, "learning_rate": 1.412027199320017e-07, "logits/chosen": -1.8466482162475586, "logits/rejected": -1.772402286529541, "logps/chosen": -178.1678924560547, "logps/rejected": -288.87542724609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.049046516418457, "rewards/margins": 11.744278907775879, "rewards/rejected": -19.793325424194336, "step": 3900 }, { "epoch": 6.71, "learning_rate": 1.410964725881853e-07, "logits/chosen": -1.8358708620071411, "logits/rejected": -1.9110994338989258, "logps/chosen": -177.76101684570312, "logps/rejected": -312.79150390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.360234260559082, "rewards/margins": 13.542963027954102, "rewards/rejected": -22.903196334838867, "step": 3901 }, { "epoch": 6.72, "learning_rate": 1.409902252443689e-07, "logits/chosen": -1.7261130809783936, "logits/rejected": -1.828827977180481, "logps/chosen": -156.98416137695312, "logps/rejected": -320.8468933105469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.518433570861816, "rewards/margins": 16.41797637939453, "rewards/rejected": -23.93640899658203, "step": 3902 }, { "epoch": 6.72, "learning_rate": 1.408839779005525e-07, "logits/chosen": -1.836970567703247, "logits/rejected": -1.95664644241333, "logps/chosen": -211.01486206054688, "logps/rejected": -333.659912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -13.109467506408691, "rewards/margins": 11.845853805541992, "rewards/rejected": -24.955320358276367, "step": 3903 }, { "epoch": 6.72, "learning_rate": 1.4077773055673609e-07, "logits/chosen": -1.9922122955322266, "logits/rejected": -1.9471185207366943, "logps/chosen": -154.4166717529297, "logps/rejected": -282.5394592285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.976905345916748, "rewards/margins": 12.34906005859375, "rewards/rejected": -20.325965881347656, "step": 3904 }, { "epoch": 6.72, "learning_rate": 1.4067148321291966e-07, "logits/chosen": -1.4915109872817993, "logits/rejected": -2.1701180934906006, "logps/chosen": -132.9722900390625, "logps/rejected": -306.5601501464844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.5453996658325195, "rewards/margins": 15.449602127075195, "rewards/rejected": -20.9950008392334, "step": 3905 }, { "epoch": 6.72, "learning_rate": 1.4056523586910326e-07, "logits/chosen": -1.9842376708984375, "logits/rejected": -1.5535893440246582, "logps/chosen": -170.01107788085938, "logps/rejected": -276.89093017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.343472480773926, "rewards/margins": 12.94860553741455, "rewards/rejected": -20.292078018188477, "step": 3906 }, { "epoch": 6.72, "learning_rate": 1.4045898852528686e-07, "logits/chosen": -1.70866060256958, "logits/rejected": -2.117806911468506, "logps/chosen": -130.23623657226562, "logps/rejected": -321.64697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.886379718780518, "rewards/margins": 18.398357391357422, "rewards/rejected": -24.28473663330078, "step": 3907 }, { "epoch": 6.73, "learning_rate": 1.4035274118147046e-07, "logits/chosen": -1.8665745258331299, "logits/rejected": -1.8793290853500366, "logps/chosen": -138.159423828125, "logps/rejected": -304.02667236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.599024295806885, "rewards/margins": 15.994433403015137, "rewards/rejected": -20.593456268310547, "step": 3908 }, { "epoch": 6.73, "learning_rate": 1.4024649383765405e-07, "logits/chosen": -1.989683747291565, "logits/rejected": -1.8005056381225586, "logps/chosen": -175.14199829101562, "logps/rejected": -268.65802001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.164312362670898, "rewards/margins": 10.09394645690918, "rewards/rejected": -18.258258819580078, "step": 3909 }, { "epoch": 6.73, "learning_rate": 1.4014024649383765e-07, "logits/chosen": -1.7420068979263306, "logits/rejected": -1.6856440305709839, "logps/chosen": -143.03533935546875, "logps/rejected": -299.86676025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.466806411743164, "rewards/margins": 15.194767951965332, "rewards/rejected": -22.66157341003418, "step": 3910 }, { "epoch": 6.73, "learning_rate": 1.4003399915002123e-07, "logits/chosen": -1.8201260566711426, "logits/rejected": -2.1478424072265625, "logps/chosen": -166.65768432617188, "logps/rejected": -310.2220458984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.78635025024414, "rewards/margins": 12.323896408081055, "rewards/rejected": -21.110246658325195, "step": 3911 }, { "epoch": 6.73, "learning_rate": 1.3992775180620483e-07, "logits/chosen": -1.8081088066101074, "logits/rejected": -1.7413368225097656, "logps/chosen": -124.8834228515625, "logps/rejected": -280.957763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.255460262298584, "rewards/margins": 15.595455169677734, "rewards/rejected": -20.850914001464844, "step": 3912 }, { "epoch": 6.73, "learning_rate": 1.3982150446238845e-07, "logits/chosen": -1.7582366466522217, "logits/rejected": -2.041177749633789, "logps/chosen": -159.55160522460938, "logps/rejected": -299.5721130371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.292845726013184, "rewards/margins": 11.43014907836914, "rewards/rejected": -20.72299575805664, "step": 3913 }, { "epoch": 6.74, "learning_rate": 1.3971525711857202e-07, "logits/chosen": -1.775436282157898, "logits/rejected": -1.8803589344024658, "logps/chosen": -156.20498657226562, "logps/rejected": -308.4870300292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.7417426109313965, "rewards/margins": 16.779699325561523, "rewards/rejected": -21.521442413330078, "step": 3914 }, { "epoch": 6.74, "learning_rate": 1.3960900977475562e-07, "logits/chosen": -2.0269246101379395, "logits/rejected": -1.8207042217254639, "logps/chosen": -190.9939422607422, "logps/rejected": -300.9674072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.099930763244629, "rewards/margins": 13.721343994140625, "rewards/rejected": -22.821273803710938, "step": 3915 }, { "epoch": 6.74, "learning_rate": 1.3950276243093922e-07, "logits/chosen": -1.850272536277771, "logits/rejected": -1.8771452903747559, "logps/chosen": -110.81696319580078, "logps/rejected": -245.32766723632812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.405810832977295, "rewards/margins": 13.073875427246094, "rewards/rejected": -17.479686737060547, "step": 3916 }, { "epoch": 6.74, "learning_rate": 1.3939651508712282e-07, "logits/chosen": -2.0099053382873535, "logits/rejected": -1.7797281742095947, "logps/chosen": -177.37905883789062, "logps/rejected": -332.6241760253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.173452377319336, "rewards/margins": 16.790748596191406, "rewards/rejected": -24.964200973510742, "step": 3917 }, { "epoch": 6.74, "learning_rate": 1.3929026774330642e-07, "logits/chosen": -1.9051891565322876, "logits/rejected": -1.6385283470153809, "logps/chosen": -106.52728271484375, "logps/rejected": -289.4524230957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9665744304656982, "rewards/margins": 18.87050437927246, "rewards/rejected": -21.837078094482422, "step": 3918 }, { "epoch": 6.75, "learning_rate": 1.3918402039949002e-07, "logits/chosen": -1.8629169464111328, "logits/rejected": -1.807594656944275, "logps/chosen": -161.77435302734375, "logps/rejected": -298.8271789550781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.388298511505127, "rewards/margins": 14.601604461669922, "rewards/rejected": -21.98990249633789, "step": 3919 }, { "epoch": 6.75, "learning_rate": 1.3907777305567362e-07, "logits/chosen": -1.939396858215332, "logits/rejected": -1.3102436065673828, "logps/chosen": -179.99240112304688, "logps/rejected": -271.93560791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0686845779418945, "rewards/margins": 12.094969749450684, "rewards/rejected": -19.163654327392578, "step": 3920 }, { "epoch": 6.75, "learning_rate": 1.389715257118572e-07, "logits/chosen": -1.2480733394622803, "logits/rejected": -2.0820376873016357, "logps/chosen": -150.03321838378906, "logps/rejected": -383.10174560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.058406829833984, "rewards/margins": 19.62436866760254, "rewards/rejected": -25.68277359008789, "step": 3921 }, { "epoch": 6.75, "learning_rate": 1.388652783680408e-07, "logits/chosen": -1.6130772829055786, "logits/rejected": -1.8952529430389404, "logps/chosen": -145.92713928222656, "logps/rejected": -314.15081787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.797900676727295, "rewards/margins": 16.095327377319336, "rewards/rejected": -21.89322853088379, "step": 3922 }, { "epoch": 6.75, "learning_rate": 1.3875903102422439e-07, "logits/chosen": -1.7681735754013062, "logits/rejected": -1.899357557296753, "logps/chosen": -120.97978973388672, "logps/rejected": -288.761474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.423251152038574, "rewards/margins": 16.428340911865234, "rewards/rejected": -21.851593017578125, "step": 3923 }, { "epoch": 6.75, "learning_rate": 1.3865278368040799e-07, "logits/chosen": -1.668302297592163, "logits/rejected": -1.8235207796096802, "logps/chosen": -126.45805358886719, "logps/rejected": -313.4305419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.266865253448486, "rewards/margins": 17.368696212768555, "rewards/rejected": -22.635560989379883, "step": 3924 }, { "epoch": 6.76, "learning_rate": 1.3854653633659158e-07, "logits/chosen": -1.935335397720337, "logits/rejected": -1.5983712673187256, "logps/chosen": -157.47463989257812, "logps/rejected": -300.0401611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.8795599937438965, "rewards/margins": 15.341135025024414, "rewards/rejected": -22.22069549560547, "step": 3925 }, { "epoch": 6.76, "learning_rate": 1.3844028899277518e-07, "logits/chosen": -1.8336269855499268, "logits/rejected": -1.965179681777954, "logps/chosen": -141.45553588867188, "logps/rejected": -285.07501220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.3182454109191895, "rewards/margins": 14.21236515045166, "rewards/rejected": -20.530609130859375, "step": 3926 }, { "epoch": 6.76, "learning_rate": 1.3833404164895876e-07, "logits/chosen": -1.5661184787750244, "logits/rejected": -1.8291263580322266, "logps/chosen": -157.8226776123047, "logps/rejected": -272.62249755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.623245716094971, "rewards/margins": 11.722146987915039, "rewards/rejected": -17.345394134521484, "step": 3927 }, { "epoch": 6.76, "learning_rate": 1.3822779430514235e-07, "logits/chosen": -1.8047492504119873, "logits/rejected": -1.823267936706543, "logps/chosen": -172.80606079101562, "logps/rejected": -304.3944091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.410157203674316, "rewards/margins": 13.589359283447266, "rewards/rejected": -21.9995174407959, "step": 3928 }, { "epoch": 6.76, "learning_rate": 1.3812154696132598e-07, "logits/chosen": -1.624759554862976, "logits/rejected": -2.0007059574127197, "logps/chosen": -114.75592041015625, "logps/rejected": -247.97348022460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.163191318511963, "rewards/margins": 12.652726173400879, "rewards/rejected": -17.81591796875, "step": 3929 }, { "epoch": 6.76, "learning_rate": 1.3801529961750955e-07, "logits/chosen": -1.7179921865463257, "logits/rejected": -2.0170164108276367, "logps/chosen": -119.2354965209961, "logps/rejected": -300.93804931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.738315582275391, "rewards/margins": 15.834330558776855, "rewards/rejected": -20.572647094726562, "step": 3930 }, { "epoch": 6.77, "learning_rate": 1.3790905227369315e-07, "logits/chosen": -1.955236554145813, "logits/rejected": -1.8364849090576172, "logps/chosen": -144.88217163085938, "logps/rejected": -363.34234619140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.001183986663818, "rewards/margins": 21.754127502441406, "rewards/rejected": -26.755313873291016, "step": 3931 }, { "epoch": 6.77, "learning_rate": 1.3780280492987675e-07, "logits/chosen": -1.4310023784637451, "logits/rejected": -2.0532288551330566, "logps/chosen": -156.47012329101562, "logps/rejected": -340.35467529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.368230819702148, "rewards/margins": 17.934226989746094, "rewards/rejected": -24.302457809448242, "step": 3932 }, { "epoch": 6.77, "learning_rate": 1.3769655758606032e-07, "logits/chosen": -1.8633705377578735, "logits/rejected": -1.8871574401855469, "logps/chosen": -147.4263153076172, "logps/rejected": -260.6129150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.698263168334961, "rewards/margins": 11.55809497833252, "rewards/rejected": -18.256357192993164, "step": 3933 }, { "epoch": 6.77, "learning_rate": 1.3759031024224392e-07, "logits/chosen": -1.9399051666259766, "logits/rejected": -1.5205893516540527, "logps/chosen": -155.0525360107422, "logps/rejected": -298.98248291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.561844348907471, "rewards/margins": 15.828699111938477, "rewards/rejected": -22.390544891357422, "step": 3934 }, { "epoch": 6.77, "learning_rate": 1.3748406289842755e-07, "logits/chosen": -1.8545167446136475, "logits/rejected": -1.8699002265930176, "logps/chosen": -165.551513671875, "logps/rejected": -338.0093994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.250999450683594, "rewards/margins": 15.245129585266113, "rewards/rejected": -23.49612808227539, "step": 3935 }, { "epoch": 6.77, "learning_rate": 1.3737781555461112e-07, "logits/chosen": -1.745068073272705, "logits/rejected": -1.936873197555542, "logps/chosen": -113.08751678466797, "logps/rejected": -313.7537536621094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.134026050567627, "rewards/margins": 18.870471954345703, "rewards/rejected": -23.004497528076172, "step": 3936 }, { "epoch": 6.78, "learning_rate": 1.3727156821079472e-07, "logits/chosen": -1.8227636814117432, "logits/rejected": -1.936672568321228, "logps/chosen": -160.91860961914062, "logps/rejected": -327.3911437988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7923688888549805, "rewards/margins": 17.23365592956543, "rewards/rejected": -23.026023864746094, "step": 3937 }, { "epoch": 6.78, "learning_rate": 1.3716532086697832e-07, "logits/chosen": -2.0480523109436035, "logits/rejected": -1.8923927545547485, "logps/chosen": -164.68858337402344, "logps/rejected": -287.25006103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.111842155456543, "rewards/margins": 12.998682022094727, "rewards/rejected": -21.110525131225586, "step": 3938 }, { "epoch": 6.78, "learning_rate": 1.3705907352316192e-07, "logits/chosen": -1.9882750511169434, "logits/rejected": -1.5289231538772583, "logps/chosen": -167.2982177734375, "logps/rejected": -264.5364990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.353434562683105, "rewards/margins": 11.735637664794922, "rewards/rejected": -20.08907127380371, "step": 3939 }, { "epoch": 6.78, "learning_rate": 1.3695282617934551e-07, "logits/chosen": -1.8154481649398804, "logits/rejected": -1.9995102882385254, "logps/chosen": -142.31468200683594, "logps/rejected": -275.2169189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.348801612854004, "rewards/margins": 14.098762512207031, "rewards/rejected": -20.44756507873535, "step": 3940 }, { "epoch": 6.78, "learning_rate": 1.3684657883552911e-07, "logits/chosen": -1.8235726356506348, "logits/rejected": -1.7898861169815063, "logps/chosen": -129.83096313476562, "logps/rejected": -297.0778503417969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.05625057220459, "rewards/margins": 16.885520935058594, "rewards/rejected": -21.9417724609375, "step": 3941 }, { "epoch": 6.78, "learning_rate": 1.367403314917127e-07, "logits/chosen": -1.9523794651031494, "logits/rejected": -1.7879563570022583, "logps/chosen": -146.4216766357422, "logps/rejected": -249.51426696777344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.317336082458496, "rewards/margins": 10.407388687133789, "rewards/rejected": -16.7247257232666, "step": 3942 }, { "epoch": 6.79, "learning_rate": 1.3663408414789629e-07, "logits/chosen": -1.8563213348388672, "logits/rejected": -1.6573607921600342, "logps/chosen": -135.47366333007812, "logps/rejected": -291.91497802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.421782970428467, "rewards/margins": 15.604973793029785, "rewards/rejected": -22.026756286621094, "step": 3943 }, { "epoch": 6.79, "learning_rate": 1.3652783680407988e-07, "logits/chosen": -2.0338850021362305, "logits/rejected": -1.7416791915893555, "logps/chosen": -158.0017852783203, "logps/rejected": -279.18927001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.848683834075928, "rewards/margins": 14.629714965820312, "rewards/rejected": -21.478397369384766, "step": 3944 }, { "epoch": 6.79, "learning_rate": 1.364215894602635e-07, "logits/chosen": -1.6285147666931152, "logits/rejected": -1.7418251037597656, "logps/chosen": -157.81019592285156, "logps/rejected": -273.3448486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.713475227355957, "rewards/margins": 11.006513595581055, "rewards/rejected": -19.719989776611328, "step": 3945 }, { "epoch": 6.79, "learning_rate": 1.3631534211644708e-07, "logits/chosen": -1.575814127922058, "logits/rejected": -1.7985154390335083, "logps/chosen": -181.61207580566406, "logps/rejected": -305.0993957519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.780062675476074, "rewards/margins": 12.446420669555664, "rewards/rejected": -20.226482391357422, "step": 3946 }, { "epoch": 6.79, "learning_rate": 1.3620909477263068e-07, "logits/chosen": -1.8264636993408203, "logits/rejected": -1.9180078506469727, "logps/chosen": -198.299072265625, "logps/rejected": -378.8949890136719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.028400421142578, "rewards/margins": 17.656539916992188, "rewards/rejected": -27.684940338134766, "step": 3947 }, { "epoch": 6.8, "learning_rate": 1.3610284742881428e-07, "logits/chosen": -1.7973995208740234, "logits/rejected": -1.696443796157837, "logps/chosen": -149.38360595703125, "logps/rejected": -318.55157470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.912684440612793, "rewards/margins": 16.425479888916016, "rewards/rejected": -23.338165283203125, "step": 3948 }, { "epoch": 6.8, "learning_rate": 1.3599660008499785e-07, "logits/chosen": -1.9433748722076416, "logits/rejected": -1.5012589693069458, "logps/chosen": -136.95680236816406, "logps/rejected": -289.109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.084401607513428, "rewards/margins": 15.686713218688965, "rewards/rejected": -20.771114349365234, "step": 3949 }, { "epoch": 6.8, "learning_rate": 1.3589035274118145e-07, "logits/chosen": -1.7668695449829102, "logits/rejected": -1.9806643724441528, "logps/chosen": -112.79329681396484, "logps/rejected": -303.4931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.690122604370117, "rewards/margins": 18.125951766967773, "rewards/rejected": -21.81607437133789, "step": 3950 }, { "epoch": 6.8, "learning_rate": 1.3578410539736508e-07, "logits/chosen": -1.4529693126678467, "logits/rejected": -1.8873683214187622, "logps/chosen": -144.47520446777344, "logps/rejected": -312.68951416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.728495121002197, "rewards/margins": 15.027810096740723, "rewards/rejected": -21.756305694580078, "step": 3951 }, { "epoch": 6.8, "learning_rate": 1.3567785805354865e-07, "logits/chosen": -1.6634180545806885, "logits/rejected": -1.6703757047653198, "logps/chosen": -128.7159423828125, "logps/rejected": -295.7445068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.042267322540283, "rewards/margins": 16.36243438720703, "rewards/rejected": -22.404701232910156, "step": 3952 }, { "epoch": 6.8, "learning_rate": 1.3557161070973225e-07, "logits/chosen": -1.4996799230575562, "logits/rejected": -2.0196609497070312, "logps/chosen": -136.74368286132812, "logps/rejected": -282.9751892089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.807908535003662, "rewards/margins": 12.780580520629883, "rewards/rejected": -19.588489532470703, "step": 3953 }, { "epoch": 6.81, "learning_rate": 1.3546536336591585e-07, "logits/chosen": -1.418217420578003, "logits/rejected": -1.9656877517700195, "logps/chosen": -144.76766967773438, "logps/rejected": -356.59454345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.180630207061768, "rewards/margins": 18.971471786499023, "rewards/rejected": -25.152099609375, "step": 3954 }, { "epoch": 6.81, "learning_rate": 1.3535911602209942e-07, "logits/chosen": -1.5194568634033203, "logits/rejected": -1.9560248851776123, "logps/chosen": -161.466796875, "logps/rejected": -308.4075927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.957252502441406, "rewards/margins": 13.433505058288574, "rewards/rejected": -20.390758514404297, "step": 3955 }, { "epoch": 6.81, "learning_rate": 1.3525286867828304e-07, "logits/chosen": -1.7670488357543945, "logits/rejected": -1.447181224822998, "logps/chosen": -94.89689636230469, "logps/rejected": -241.7836456298828, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.188505172729492, "rewards/margins": 14.600397109985352, "rewards/rejected": -18.788902282714844, "step": 3956 }, { "epoch": 6.81, "learning_rate": 1.3514662133446664e-07, "logits/chosen": -2.069888114929199, "logits/rejected": -1.7256144285202026, "logps/chosen": -150.27838134765625, "logps/rejected": -275.2917785644531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.115683078765869, "rewards/margins": 14.097772598266602, "rewards/rejected": -20.213457107543945, "step": 3957 }, { "epoch": 6.81, "learning_rate": 1.3504037399065024e-07, "logits/chosen": -1.7674944400787354, "logits/rejected": -1.6705589294433594, "logps/chosen": -179.67779541015625, "logps/rejected": -356.0590515136719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.484228134155273, "rewards/margins": 17.30904769897461, "rewards/rejected": -26.793275833129883, "step": 3958 }, { "epoch": 6.81, "learning_rate": 1.3493412664683381e-07, "logits/chosen": -1.5470917224884033, "logits/rejected": -2.1402769088745117, "logps/chosen": -153.8769989013672, "logps/rejected": -331.8412780761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.905706882476807, "rewards/margins": 15.303239822387695, "rewards/rejected": -22.208946228027344, "step": 3959 }, { "epoch": 6.82, "learning_rate": 1.3482787930301741e-07, "logits/chosen": -1.8782166242599487, "logits/rejected": -2.0226731300354004, "logps/chosen": -132.9429168701172, "logps/rejected": -253.11962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.949882984161377, "rewards/margins": 11.87571907043457, "rewards/rejected": -17.825603485107422, "step": 3960 }, { "epoch": 6.82, "learning_rate": 1.3472163195920104e-07, "logits/chosen": -1.6596362590789795, "logits/rejected": -1.8296926021575928, "logps/chosen": -168.58663940429688, "logps/rejected": -318.4245300292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.201229095458984, "rewards/margins": 13.797178268432617, "rewards/rejected": -21.9984073638916, "step": 3961 }, { "epoch": 6.82, "learning_rate": 1.346153846153846e-07, "logits/chosen": -1.3229173421859741, "logits/rejected": -1.9534006118774414, "logps/chosen": -173.72430419921875, "logps/rejected": -368.53302001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.500277519226074, "rewards/margins": 18.443513870239258, "rewards/rejected": -27.94379234313965, "step": 3962 }, { "epoch": 6.82, "learning_rate": 1.345091372715682e-07, "logits/chosen": -1.6932573318481445, "logits/rejected": -1.5186711549758911, "logps/chosen": -150.2294158935547, "logps/rejected": -307.20977783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.84269905090332, "rewards/margins": 16.827917098999023, "rewards/rejected": -23.670616149902344, "step": 3963 }, { "epoch": 6.82, "learning_rate": 1.344028899277518e-07, "logits/chosen": -1.4801015853881836, "logits/rejected": -1.921978235244751, "logps/chosen": -146.7539520263672, "logps/rejected": -294.0718994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.889110565185547, "rewards/margins": 14.442931175231934, "rewards/rejected": -20.332042694091797, "step": 3964 }, { "epoch": 6.82, "learning_rate": 1.3429664258393538e-07, "logits/chosen": -1.7433011531829834, "logits/rejected": -1.8013958930969238, "logps/chosen": -199.15557861328125, "logps/rejected": -364.41851806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.4061918258667, "rewards/margins": 17.421001434326172, "rewards/rejected": -26.827192306518555, "step": 3965 }, { "epoch": 6.83, "learning_rate": 1.3419039524011898e-07, "logits/chosen": -1.853839635848999, "logits/rejected": -1.7063941955566406, "logps/chosen": -195.91162109375, "logps/rejected": -322.6895446777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.37588119506836, "rewards/margins": 15.451478958129883, "rewards/rejected": -23.827360153198242, "step": 3966 }, { "epoch": 6.83, "learning_rate": 1.340841478963026e-07, "logits/chosen": -1.8468329906463623, "logits/rejected": -1.8477022647857666, "logps/chosen": -123.43408203125, "logps/rejected": -243.75596618652344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.872913837432861, "rewards/margins": 12.654014587402344, "rewards/rejected": -18.526927947998047, "step": 3967 }, { "epoch": 6.83, "learning_rate": 1.3397790055248618e-07, "logits/chosen": -1.8139607906341553, "logits/rejected": -1.3961570262908936, "logps/chosen": -168.8732147216797, "logps/rejected": -287.2303161621094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.771263122558594, "rewards/margins": 12.142963409423828, "rewards/rejected": -20.914228439331055, "step": 3968 }, { "epoch": 6.83, "learning_rate": 1.3387165320866978e-07, "logits/chosen": -1.7554981708526611, "logits/rejected": -1.8394341468811035, "logps/chosen": -138.70892333984375, "logps/rejected": -294.0148010253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.377121448516846, "rewards/margins": 14.69312858581543, "rewards/rejected": -22.07025146484375, "step": 3969 }, { "epoch": 6.83, "learning_rate": 1.3376540586485338e-07, "logits/chosen": -1.8592143058776855, "logits/rejected": -1.7545545101165771, "logps/chosen": -133.2879180908203, "logps/rejected": -295.75579833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.180060386657715, "rewards/margins": 15.818592071533203, "rewards/rejected": -21.9986515045166, "step": 3970 }, { "epoch": 6.83, "learning_rate": 1.3365915852103695e-07, "logits/chosen": -2.0878334045410156, "logits/rejected": -1.8746932744979858, "logps/chosen": -125.99591827392578, "logps/rejected": -251.37266540527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.706106662750244, "rewards/margins": 13.119572639465332, "rewards/rejected": -17.825679779052734, "step": 3971 }, { "epoch": 6.84, "learning_rate": 1.3355291117722057e-07, "logits/chosen": -1.9880900382995605, "logits/rejected": -1.7288973331451416, "logps/chosen": -137.51092529296875, "logps/rejected": -300.0923767089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.905036926269531, "rewards/margins": 16.32357406616211, "rewards/rejected": -21.22861099243164, "step": 3972 }, { "epoch": 6.84, "learning_rate": 1.3344666383340417e-07, "logits/chosen": -1.8742443323135376, "logits/rejected": -1.8730173110961914, "logps/chosen": -199.00967407226562, "logps/rejected": -343.1559143066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.855422973632812, "rewards/margins": 15.023964881896973, "rewards/rejected": -24.8793888092041, "step": 3973 }, { "epoch": 6.84, "learning_rate": 1.3334041648958775e-07, "logits/chosen": -1.876767635345459, "logits/rejected": -1.8432611227035522, "logps/chosen": -126.66204833984375, "logps/rejected": -261.23687744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.589531898498535, "rewards/margins": 13.197232246398926, "rewards/rejected": -18.786766052246094, "step": 3974 }, { "epoch": 6.84, "learning_rate": 1.3323416914577134e-07, "logits/chosen": -1.8555169105529785, "logits/rejected": -2.064007043838501, "logps/chosen": -160.3218994140625, "logps/rejected": -327.7179260253906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.034891128540039, "rewards/margins": 16.53448486328125, "rewards/rejected": -24.569374084472656, "step": 3975 }, { "epoch": 6.84, "learning_rate": 1.3312792180195494e-07, "logits/chosen": -1.732093095779419, "logits/rejected": -2.084768056869507, "logps/chosen": -166.21168518066406, "logps/rejected": -318.21160888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.701869010925293, "rewards/margins": 13.925971031188965, "rewards/rejected": -21.627840042114258, "step": 3976 }, { "epoch": 6.85, "learning_rate": 1.3302167445813852e-07, "logits/chosen": -1.7962348461151123, "logits/rejected": -1.8393218517303467, "logps/chosen": -183.22674560546875, "logps/rejected": -298.697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.194683074951172, "rewards/margins": 13.691590309143066, "rewards/rejected": -21.886272430419922, "step": 3977 }, { "epoch": 6.85, "learning_rate": 1.3291542711432214e-07, "logits/chosen": -1.9248309135437012, "logits/rejected": -1.5925614833831787, "logps/chosen": -169.5068359375, "logps/rejected": -324.6204528808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.526904106140137, "rewards/margins": 16.402809143066406, "rewards/rejected": -24.929712295532227, "step": 3978 }, { "epoch": 6.85, "learning_rate": 1.3280917977050574e-07, "logits/chosen": -1.9615422487258911, "logits/rejected": -1.858471155166626, "logps/chosen": -161.45668029785156, "logps/rejected": -305.80340576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.632468223571777, "rewards/margins": 13.783238410949707, "rewards/rejected": -22.415706634521484, "step": 3979 }, { "epoch": 6.85, "learning_rate": 1.3270293242668934e-07, "logits/chosen": -1.9983770847320557, "logits/rejected": -1.9130834341049194, "logps/chosen": -140.83973693847656, "logps/rejected": -289.58837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.376644134521484, "rewards/margins": 14.894120216369629, "rewards/rejected": -21.270763397216797, "step": 3980 }, { "epoch": 6.85, "learning_rate": 1.325966850828729e-07, "logits/chosen": -1.9522011280059814, "logits/rejected": -1.9617303609848022, "logps/chosen": -156.22113037109375, "logps/rejected": -312.82281494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.137155532836914, "rewards/margins": 15.543219566345215, "rewards/rejected": -22.680376052856445, "step": 3981 }, { "epoch": 6.85, "learning_rate": 1.324904377390565e-07, "logits/chosen": -1.4852045774459839, "logits/rejected": -2.1301512718200684, "logps/chosen": -109.04246520996094, "logps/rejected": -334.5028381347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.915163040161133, "rewards/margins": 20.510887145996094, "rewards/rejected": -24.426048278808594, "step": 3982 }, { "epoch": 6.86, "learning_rate": 1.3238419039524014e-07, "logits/chosen": -1.952907919883728, "logits/rejected": -1.5109046697616577, "logps/chosen": -150.3527069091797, "logps/rejected": -264.22601318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.158025741577148, "rewards/margins": 13.439815521240234, "rewards/rejected": -19.597841262817383, "step": 3983 }, { "epoch": 6.86, "learning_rate": 1.322779430514237e-07, "logits/chosen": -1.7950811386108398, "logits/rejected": -1.7714422941207886, "logps/chosen": -174.81198120117188, "logps/rejected": -319.35308837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.451706886291504, "rewards/margins": 14.674148559570312, "rewards/rejected": -23.125856399536133, "step": 3984 }, { "epoch": 6.86, "learning_rate": 1.321716957076073e-07, "logits/chosen": -1.958756446838379, "logits/rejected": -1.927173376083374, "logps/chosen": -144.22119140625, "logps/rejected": -298.386474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.902670383453369, "rewards/margins": 14.766183853149414, "rewards/rejected": -19.668853759765625, "step": 3985 }, { "epoch": 6.86, "learning_rate": 1.320654483637909e-07, "logits/chosen": -1.5493823289871216, "logits/rejected": -1.5830923318862915, "logps/chosen": -153.29623413085938, "logps/rejected": -257.5013732910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.283597946166992, "rewards/margins": 11.383722305297852, "rewards/rejected": -18.667320251464844, "step": 3986 }, { "epoch": 6.86, "learning_rate": 1.3195920101997448e-07, "logits/chosen": -1.876554012298584, "logits/rejected": -1.8060768842697144, "logps/chosen": -117.50418090820312, "logps/rejected": -314.3466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.343936920166016, "rewards/margins": 19.608963012695312, "rewards/rejected": -23.952899932861328, "step": 3987 }, { "epoch": 6.86, "learning_rate": 1.318529536761581e-07, "logits/chosen": -1.7625489234924316, "logits/rejected": -1.9021809101104736, "logps/chosen": -153.38755798339844, "logps/rejected": -329.4358215332031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.251774311065674, "rewards/margins": 17.50113296508789, "rewards/rejected": -24.752906799316406, "step": 3988 }, { "epoch": 6.87, "learning_rate": 1.317467063323417e-07, "logits/chosen": -1.5856306552886963, "logits/rejected": -1.9048445224761963, "logps/chosen": -170.16055297851562, "logps/rejected": -303.8369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.504122257232666, "rewards/margins": 13.195928573608398, "rewards/rejected": -20.70005226135254, "step": 3989 }, { "epoch": 6.87, "learning_rate": 1.3164045898852527e-07, "logits/chosen": -2.006531238555908, "logits/rejected": -1.9430222511291504, "logps/chosen": -166.58514404296875, "logps/rejected": -347.93695068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.943882942199707, "rewards/margins": 17.035234451293945, "rewards/rejected": -25.97911834716797, "step": 3990 }, { "epoch": 6.87, "learning_rate": 1.3153421164470887e-07, "logits/chosen": -1.8930566310882568, "logits/rejected": -1.9227224588394165, "logps/chosen": -164.61386108398438, "logps/rejected": -311.0932312011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.562591552734375, "rewards/margins": 14.128053665161133, "rewards/rejected": -21.690645217895508, "step": 3991 }, { "epoch": 6.87, "learning_rate": 1.3142796430089247e-07, "logits/chosen": -1.7508360147476196, "logits/rejected": -1.8333373069763184, "logps/chosen": -154.19569396972656, "logps/rejected": -311.14154052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.524919509887695, "rewards/margins": 15.869450569152832, "rewards/rejected": -23.394371032714844, "step": 3992 }, { "epoch": 6.87, "learning_rate": 1.3132171695707604e-07, "logits/chosen": -1.8943512439727783, "logits/rejected": -1.908017635345459, "logps/chosen": -191.95127868652344, "logps/rejected": -342.90972900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.76314926147461, "rewards/margins": 15.612283706665039, "rewards/rejected": -25.375431060791016, "step": 3993 }, { "epoch": 6.87, "learning_rate": 1.3121546961325967e-07, "logits/chosen": -2.0804522037506104, "logits/rejected": -1.5858608484268188, "logps/chosen": -161.7511444091797, "logps/rejected": -288.4918212890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.259704113006592, "rewards/margins": 13.643571853637695, "rewards/rejected": -20.903276443481445, "step": 3994 }, { "epoch": 6.88, "learning_rate": 1.3110922226944327e-07, "logits/chosen": -1.6675095558166504, "logits/rejected": -1.9197636842727661, "logps/chosen": -134.52322387695312, "logps/rejected": -267.5541076660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.26696252822876, "rewards/margins": 13.281970977783203, "rewards/rejected": -18.548934936523438, "step": 3995 }, { "epoch": 6.88, "learning_rate": 1.3100297492562684e-07, "logits/chosen": -2.014922857284546, "logits/rejected": -1.965701937675476, "logps/chosen": -165.36566162109375, "logps/rejected": -347.01031494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.758754253387451, "rewards/margins": 15.779916763305664, "rewards/rejected": -23.538671493530273, "step": 3996 }, { "epoch": 6.88, "learning_rate": 1.3089672758181044e-07, "logits/chosen": -1.8626083135604858, "logits/rejected": -1.9845027923583984, "logps/chosen": -195.84774780273438, "logps/rejected": -315.7400817871094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.15499496459961, "rewards/margins": 12.09422779083252, "rewards/rejected": -23.249221801757812, "step": 3997 }, { "epoch": 6.88, "learning_rate": 1.3079048023799404e-07, "logits/chosen": -1.7704813480377197, "logits/rejected": -1.9066009521484375, "logps/chosen": -150.01380920410156, "logps/rejected": -317.7267761230469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.566721439361572, "rewards/margins": 16.72603416442871, "rewards/rejected": -23.292755126953125, "step": 3998 }, { "epoch": 6.88, "learning_rate": 1.3068423289417764e-07, "logits/chosen": -1.7855808734893799, "logits/rejected": -1.9360615015029907, "logps/chosen": -170.61456298828125, "logps/rejected": -389.0513000488281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6398701667785645, "rewards/margins": 19.559045791625977, "rewards/rejected": -26.19891357421875, "step": 3999 }, { "epoch": 6.88, "learning_rate": 1.3057798555036124e-07, "logits/chosen": -1.9822328090667725, "logits/rejected": -1.6058921813964844, "logps/chosen": -187.8903045654297, "logps/rejected": -324.5997314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.474100112915039, "rewards/margins": 14.105504989624023, "rewards/rejected": -22.579605102539062, "step": 4000 }, { "epoch": 6.89, "learning_rate": 1.3047173820654484e-07, "logits/chosen": -1.834306240081787, "logits/rejected": -1.9489765167236328, "logps/chosen": -106.9759521484375, "logps/rejected": -259.8554382324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.691932439804077, "rewards/margins": 15.261493682861328, "rewards/rejected": -18.95342445373535, "step": 4001 }, { "epoch": 6.89, "learning_rate": 1.3036549086272844e-07, "logits/chosen": -2.0406367778778076, "logits/rejected": -1.6142500638961792, "logps/chosen": -149.6391143798828, "logps/rejected": -283.7540283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.803326606750488, "rewards/margins": 14.315823554992676, "rewards/rejected": -20.119150161743164, "step": 4002 }, { "epoch": 6.89, "learning_rate": 1.30259243518912e-07, "logits/chosen": -1.4361567497253418, "logits/rejected": -2.016294240951538, "logps/chosen": -116.416015625, "logps/rejected": -298.0169677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.420241832733154, "rewards/margins": 16.1468448638916, "rewards/rejected": -20.56708526611328, "step": 4003 }, { "epoch": 6.89, "learning_rate": 1.301529961750956e-07, "logits/chosen": -1.854629397392273, "logits/rejected": -1.7143645286560059, "logps/chosen": -160.51904296875, "logps/rejected": -266.22589111328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.3176093101501465, "rewards/margins": 11.479220390319824, "rewards/rejected": -17.796829223632812, "step": 4004 }, { "epoch": 6.89, "learning_rate": 1.3004674883127923e-07, "logits/chosen": -1.5736331939697266, "logits/rejected": -1.9910368919372559, "logps/chosen": -167.75608825683594, "logps/rejected": -355.09222412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.980073928833008, "rewards/margins": 17.769100189208984, "rewards/rejected": -26.74917221069336, "step": 4005 }, { "epoch": 6.9, "learning_rate": 1.299405014874628e-07, "logits/chosen": -1.8335328102111816, "logits/rejected": -1.4613893032073975, "logps/chosen": -159.06527709960938, "logps/rejected": -290.5552978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.380687713623047, "rewards/margins": 13.465213775634766, "rewards/rejected": -20.845901489257812, "step": 4006 }, { "epoch": 6.9, "learning_rate": 1.298342541436464e-07, "logits/chosen": -1.9082415103912354, "logits/rejected": -2.088160753250122, "logps/chosen": -198.87184143066406, "logps/rejected": -302.72418212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.980249404907227, "rewards/margins": 11.393119812011719, "rewards/rejected": -21.373371124267578, "step": 4007 }, { "epoch": 6.9, "learning_rate": 1.2972800679983e-07, "logits/chosen": -1.9205915927886963, "logits/rejected": -1.9841657876968384, "logps/chosen": -128.72483825683594, "logps/rejected": -334.57470703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.684047222137451, "rewards/margins": 19.46710777282715, "rewards/rejected": -25.151153564453125, "step": 4008 }, { "epoch": 6.9, "learning_rate": 1.2962175945601357e-07, "logits/chosen": -1.8834869861602783, "logits/rejected": -2.0747792720794678, "logps/chosen": -158.66510009765625, "logps/rejected": -297.42431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.605611324310303, "rewards/margins": 13.043100357055664, "rewards/rejected": -20.648712158203125, "step": 4009 }, { "epoch": 6.9, "learning_rate": 1.295155121121972e-07, "logits/chosen": -1.7864763736724854, "logits/rejected": -1.9706172943115234, "logps/chosen": -146.32763671875, "logps/rejected": -338.630615234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.263912200927734, "rewards/margins": 18.828641891479492, "rewards/rejected": -25.092552185058594, "step": 4010 }, { "epoch": 6.9, "learning_rate": 1.294092647683808e-07, "logits/chosen": -1.7448421716690063, "logits/rejected": -1.6965820789337158, "logps/chosen": -130.5435333251953, "logps/rejected": -263.05230712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.317746162414551, "rewards/margins": 14.141931533813477, "rewards/rejected": -19.459678649902344, "step": 4011 }, { "epoch": 6.91, "learning_rate": 1.2930301742456437e-07, "logits/chosen": -1.7173652648925781, "logits/rejected": -2.0277647972106934, "logps/chosen": -160.40704345703125, "logps/rejected": -342.078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.489125728607178, "rewards/margins": 16.876853942871094, "rewards/rejected": -24.36598014831543, "step": 4012 }, { "epoch": 6.91, "learning_rate": 1.2919677008074797e-07, "logits/chosen": -1.7858401536941528, "logits/rejected": -2.017726421356201, "logps/chosen": -183.17718505859375, "logps/rejected": -353.1061096191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.768882751464844, "rewards/margins": 16.568876266479492, "rewards/rejected": -25.337759017944336, "step": 4013 }, { "epoch": 6.91, "learning_rate": 1.2909052273693157e-07, "logits/chosen": -1.8132368326187134, "logits/rejected": -1.8357584476470947, "logps/chosen": -172.59963989257812, "logps/rejected": -330.3821716308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.560477256774902, "rewards/margins": 16.09434700012207, "rewards/rejected": -24.654823303222656, "step": 4014 }, { "epoch": 6.91, "learning_rate": 1.2898427539311517e-07, "logits/chosen": -1.8472399711608887, "logits/rejected": -1.8272900581359863, "logps/chosen": -146.5548553466797, "logps/rejected": -313.2503967285156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.332025527954102, "rewards/margins": 16.9540958404541, "rewards/rejected": -23.286121368408203, "step": 4015 }, { "epoch": 6.91, "learning_rate": 1.2887802804929877e-07, "logits/chosen": -1.9589900970458984, "logits/rejected": -2.072314500808716, "logps/chosen": -158.4385986328125, "logps/rejected": -301.8123779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.923778533935547, "rewards/margins": 13.658746719360352, "rewards/rejected": -21.5825252532959, "step": 4016 }, { "epoch": 6.91, "learning_rate": 1.2877178070548237e-07, "logits/chosen": -2.028825283050537, "logits/rejected": -1.9266893863677979, "logps/chosen": -142.631591796875, "logps/rejected": -323.19329833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.949680328369141, "rewards/margins": 18.46402359008789, "rewards/rejected": -25.41370391845703, "step": 4017 }, { "epoch": 6.92, "learning_rate": 1.2866553336166594e-07, "logits/chosen": -1.7820022106170654, "logits/rejected": -2.1551835536956787, "logps/chosen": -180.10223388671875, "logps/rejected": -358.22418212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.4570951461792, "rewards/margins": 16.678089141845703, "rewards/rejected": -25.13518524169922, "step": 4018 }, { "epoch": 6.92, "learning_rate": 1.2855928601784954e-07, "logits/chosen": -1.8812233209609985, "logits/rejected": -1.6920195817947388, "logps/chosen": -197.7139892578125, "logps/rejected": -326.52252197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.102810859680176, "rewards/margins": 13.290483474731445, "rewards/rejected": -24.393295288085938, "step": 4019 }, { "epoch": 6.92, "learning_rate": 1.2845303867403314e-07, "logits/chosen": -2.06807279586792, "logits/rejected": -1.5790131092071533, "logps/chosen": -159.41995239257812, "logps/rejected": -345.1516418457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.795242786407471, "rewards/margins": 19.612150192260742, "rewards/rejected": -26.407394409179688, "step": 4020 }, { "epoch": 6.92, "learning_rate": 1.2834679133021676e-07, "logits/chosen": -1.5258686542510986, "logits/rejected": -1.8586517572402954, "logps/chosen": -146.80838012695312, "logps/rejected": -312.76031494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.778565406799316, "rewards/margins": 14.674355506896973, "rewards/rejected": -21.45292091369629, "step": 4021 }, { "epoch": 6.92, "learning_rate": 1.2824054398640033e-07, "logits/chosen": -1.8426645994186401, "logits/rejected": -2.037172555923462, "logps/chosen": -111.7149887084961, "logps/rejected": -279.3241882324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.2877349853515625, "rewards/margins": 15.551058769226074, "rewards/rejected": -20.83879280090332, "step": 4022 }, { "epoch": 6.92, "learning_rate": 1.2813429664258393e-07, "logits/chosen": -1.410644769668579, "logits/rejected": -1.9918761253356934, "logps/chosen": -130.0044403076172, "logps/rejected": -314.7421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.407031536102295, "rewards/margins": 16.111011505126953, "rewards/rejected": -22.518043518066406, "step": 4023 }, { "epoch": 6.93, "learning_rate": 1.2802804929876753e-07, "logits/chosen": -1.83876371383667, "logits/rejected": -1.6503074169158936, "logps/chosen": -144.49972534179688, "logps/rejected": -304.0703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.332805633544922, "rewards/margins": 16.048376083374023, "rewards/rejected": -22.381181716918945, "step": 4024 }, { "epoch": 6.93, "learning_rate": 1.279218019549511e-07, "logits/chosen": -1.9270671606063843, "logits/rejected": -1.9374773502349854, "logps/chosen": -165.34378051757812, "logps/rejected": -323.8232116699219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.430939674377441, "rewards/margins": 15.869626998901367, "rewards/rejected": -23.300567626953125, "step": 4025 }, { "epoch": 6.93, "learning_rate": 1.2781555461113473e-07, "logits/chosen": -1.8187127113342285, "logits/rejected": -1.860337495803833, "logps/chosen": -107.2831039428711, "logps/rejected": -248.4108428955078, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.5038604736328125, "rewards/margins": 14.349096298217773, "rewards/rejected": -18.85295867919922, "step": 4026 }, { "epoch": 6.93, "learning_rate": 1.2770930726731833e-07, "logits/chosen": -1.7293832302093506, "logits/rejected": -2.0242185592651367, "logps/chosen": -131.03936767578125, "logps/rejected": -293.1983947753906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.124111652374268, "rewards/margins": 15.380683898925781, "rewards/rejected": -21.50479507446289, "step": 4027 }, { "epoch": 6.93, "learning_rate": 1.276030599235019e-07, "logits/chosen": -1.8662543296813965, "logits/rejected": -1.7922589778900146, "logps/chosen": -139.93609619140625, "logps/rejected": -269.0790710449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.905084133148193, "rewards/margins": 14.312337875366211, "rewards/rejected": -19.217422485351562, "step": 4028 }, { "epoch": 6.93, "learning_rate": 1.274968125796855e-07, "logits/chosen": -1.923098087310791, "logits/rejected": -2.2355871200561523, "logps/chosen": -167.4531707763672, "logps/rejected": -343.2916259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.442473411560059, "rewards/margins": 16.56972885131836, "rewards/rejected": -24.01219940185547, "step": 4029 }, { "epoch": 6.94, "learning_rate": 1.273905652358691e-07, "logits/chosen": -1.8068084716796875, "logits/rejected": -1.8575770854949951, "logps/chosen": -214.84588623046875, "logps/rejected": -335.7244567871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.145797729492188, "rewards/margins": 12.079402923583984, "rewards/rejected": -23.225200653076172, "step": 4030 }, { "epoch": 6.94, "learning_rate": 1.2728431789205267e-07, "logits/chosen": -1.9128645658493042, "logits/rejected": -1.9286211729049683, "logps/chosen": -209.0743408203125, "logps/rejected": -325.7926025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.46208381652832, "rewards/margins": 12.239286422729492, "rewards/rejected": -23.701370239257812, "step": 4031 }, { "epoch": 6.94, "learning_rate": 1.271780705482363e-07, "logits/chosen": -1.6338574886322021, "logits/rejected": -1.9579191207885742, "logps/chosen": -125.00090026855469, "logps/rejected": -276.64276123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.093801498413086, "rewards/margins": 15.100625038146973, "rewards/rejected": -19.194427490234375, "step": 4032 }, { "epoch": 6.94, "learning_rate": 1.270718232044199e-07, "logits/chosen": -1.4571750164031982, "logits/rejected": -1.868828296661377, "logps/chosen": -129.30224609375, "logps/rejected": -271.0364685058594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5157976150512695, "rewards/margins": 13.938358306884766, "rewards/rejected": -20.45415687561035, "step": 4033 }, { "epoch": 6.94, "learning_rate": 1.2696557586060347e-07, "logits/chosen": -2.0802013874053955, "logits/rejected": -2.001055955886841, "logps/chosen": -169.97503662109375, "logps/rejected": -345.79046630859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.94100284576416, "rewards/margins": 18.453018188476562, "rewards/rejected": -25.39402198791504, "step": 4034 }, { "epoch": 6.94, "learning_rate": 1.2685932851678707e-07, "logits/chosen": -1.4893356561660767, "logits/rejected": -1.8340367078781128, "logps/chosen": -179.5675811767578, "logps/rejected": -348.84259033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.654760360717773, "rewards/margins": 16.984594345092773, "rewards/rejected": -25.639352798461914, "step": 4035 }, { "epoch": 6.95, "learning_rate": 1.2675308117297067e-07, "logits/chosen": -1.8457386493682861, "logits/rejected": -1.6546156406402588, "logps/chosen": -149.17782592773438, "logps/rejected": -285.5895690917969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.001502990722656, "rewards/margins": 13.684832572937012, "rewards/rejected": -20.68633460998535, "step": 4036 }, { "epoch": 6.95, "learning_rate": 1.2664683382915426e-07, "logits/chosen": -1.8584623336791992, "logits/rejected": -1.77292799949646, "logps/chosen": -151.58065795898438, "logps/rejected": -317.11279296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.604764461517334, "rewards/margins": 15.924652099609375, "rewards/rejected": -22.529417037963867, "step": 4037 }, { "epoch": 6.95, "learning_rate": 1.2654058648533786e-07, "logits/chosen": -1.7063043117523193, "logits/rejected": -2.0777435302734375, "logps/chosen": -169.51625061035156, "logps/rejected": -367.10125732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.413125991821289, "rewards/margins": 17.5853271484375, "rewards/rejected": -26.998455047607422, "step": 4038 }, { "epoch": 6.95, "learning_rate": 1.2643433914152146e-07, "logits/chosen": -1.7133880853652954, "logits/rejected": -2.131932258605957, "logps/chosen": -167.67691040039062, "logps/rejected": -357.92327880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.231719970703125, "rewards/margins": 16.625591278076172, "rewards/rejected": -24.857309341430664, "step": 4039 }, { "epoch": 6.95, "learning_rate": 1.2632809179770503e-07, "logits/chosen": -1.6990876197814941, "logits/rejected": -1.4415220022201538, "logps/chosen": -186.47032165527344, "logps/rejected": -332.3487548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.3426513671875, "rewards/margins": 15.573912620544434, "rewards/rejected": -24.916566848754883, "step": 4040 }, { "epoch": 6.96, "learning_rate": 1.2622184445388863e-07, "logits/chosen": -1.9033857583999634, "logits/rejected": -1.7566654682159424, "logps/chosen": -136.99703979492188, "logps/rejected": -281.95159912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6030683517456055, "rewards/margins": 14.881963729858398, "rewards/rejected": -21.485031127929688, "step": 4041 }, { "epoch": 6.96, "learning_rate": 1.2611559711007226e-07, "logits/chosen": -1.7804186344146729, "logits/rejected": -1.7048168182373047, "logps/chosen": -161.36451721191406, "logps/rejected": -346.4676513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.532899856567383, "rewards/margins": 18.899681091308594, "rewards/rejected": -27.43258285522461, "step": 4042 }, { "epoch": 6.96, "learning_rate": 1.2600934976625586e-07, "logits/chosen": -1.9635614156723022, "logits/rejected": -1.732924461364746, "logps/chosen": -149.4958038330078, "logps/rejected": -278.3595886230469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.321340560913086, "rewards/margins": 13.663715362548828, "rewards/rejected": -20.985055923461914, "step": 4043 }, { "epoch": 6.96, "learning_rate": 1.2590310242243943e-07, "logits/chosen": -1.9165806770324707, "logits/rejected": -1.909930944442749, "logps/chosen": -134.91354370117188, "logps/rejected": -253.57884216308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.166210174560547, "rewards/margins": 12.528590202331543, "rewards/rejected": -18.694799423217773, "step": 4044 }, { "epoch": 6.96, "learning_rate": 1.2579685507862303e-07, "logits/chosen": -1.9566254615783691, "logits/rejected": -1.9895899295806885, "logps/chosen": -176.73916625976562, "logps/rejected": -319.86749267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.657417297363281, "rewards/margins": 14.255698204040527, "rewards/rejected": -22.913114547729492, "step": 4045 }, { "epoch": 6.96, "learning_rate": 1.2569060773480663e-07, "logits/chosen": -2.018061876296997, "logits/rejected": -1.8582305908203125, "logps/chosen": -154.13153076171875, "logps/rejected": -315.67742919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.802457809448242, "rewards/margins": 15.540441513061523, "rewards/rejected": -23.3429012298584, "step": 4046 }, { "epoch": 6.97, "learning_rate": 1.255843603909902e-07, "logits/chosen": -1.9188423156738281, "logits/rejected": -1.5762391090393066, "logps/chosen": -181.32916259765625, "logps/rejected": -315.1136474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.439170837402344, "rewards/margins": 14.241119384765625, "rewards/rejected": -23.68029022216797, "step": 4047 }, { "epoch": 6.97, "learning_rate": 1.2547811304717383e-07, "logits/chosen": -1.9085947275161743, "logits/rejected": -1.7670702934265137, "logps/chosen": -165.6200408935547, "logps/rejected": -344.52777099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.106566429138184, "rewards/margins": 18.040130615234375, "rewards/rejected": -26.146697998046875, "step": 4048 }, { "epoch": 6.97, "learning_rate": 1.2537186570335742e-07, "logits/chosen": -1.783672571182251, "logits/rejected": -2.0074195861816406, "logps/chosen": -151.07681274414062, "logps/rejected": -328.8209228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.543305397033691, "rewards/margins": 15.174615859985352, "rewards/rejected": -22.71792221069336, "step": 4049 }, { "epoch": 6.97, "learning_rate": 1.25265618359541e-07, "logits/chosen": -1.6493706703186035, "logits/rejected": -1.9147489070892334, "logps/chosen": -164.72808837890625, "logps/rejected": -340.20672607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.9792280197143555, "rewards/margins": 16.041975021362305, "rewards/rejected": -24.021203994750977, "step": 4050 }, { "epoch": 6.97, "learning_rate": 1.251593710157246e-07, "logits/chosen": -1.7423596382141113, "logits/rejected": -1.8982163667678833, "logps/chosen": -157.64398193359375, "logps/rejected": -341.8113708496094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.500771522521973, "rewards/margins": 17.038009643554688, "rewards/rejected": -25.538780212402344, "step": 4051 }, { "epoch": 6.97, "learning_rate": 1.250531236719082e-07, "logits/chosen": -1.9611893892288208, "logits/rejected": -1.6654934883117676, "logps/chosen": -186.01998901367188, "logps/rejected": -342.5219421386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.140503883361816, "rewards/margins": 16.253999710083008, "rewards/rejected": -24.394502639770508, "step": 4052 }, { "epoch": 6.98, "learning_rate": 1.249468763280918e-07, "logits/chosen": -1.7296786308288574, "logits/rejected": -1.916196584701538, "logps/chosen": -122.66893005371094, "logps/rejected": -300.684326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7891740798950195, "rewards/margins": 16.555036544799805, "rewards/rejected": -20.34421157836914, "step": 4053 }, { "epoch": 6.98, "learning_rate": 1.248406289842754e-07, "logits/chosen": -1.7829885482788086, "logits/rejected": -1.80322265625, "logps/chosen": -168.30743408203125, "logps/rejected": -328.68389892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2595319747924805, "rewards/margins": 17.231666564941406, "rewards/rejected": -24.491199493408203, "step": 4054 }, { "epoch": 6.98, "learning_rate": 1.2473438164045897e-07, "logits/chosen": -1.898701786994934, "logits/rejected": -1.6578614711761475, "logps/chosen": -164.53765869140625, "logps/rejected": -303.45318603515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.06452751159668, "rewards/margins": 14.511510848999023, "rewards/rejected": -23.576038360595703, "step": 4055 }, { "epoch": 6.98, "learning_rate": 1.246281342966426e-07, "logits/chosen": -1.5104421377182007, "logits/rejected": -1.963013768196106, "logps/chosen": -145.11216735839844, "logps/rejected": -258.13568115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.215404510498047, "rewards/margins": 12.080680847167969, "rewards/rejected": -18.296085357666016, "step": 4056 }, { "epoch": 6.98, "learning_rate": 1.2452188695282616e-07, "logits/chosen": -1.7118303775787354, "logits/rejected": -2.0567526817321777, "logps/chosen": -137.43931579589844, "logps/rejected": -309.024169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.761366844177246, "rewards/margins": 16.072965621948242, "rewards/rejected": -22.834331512451172, "step": 4057 }, { "epoch": 6.98, "learning_rate": 1.244156396090098e-07, "logits/chosen": -2.007256269454956, "logits/rejected": -1.6685423851013184, "logps/chosen": -195.4418487548828, "logps/rejected": -318.50640869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.405791282653809, "rewards/margins": 13.207649230957031, "rewards/rejected": -22.613441467285156, "step": 4058 }, { "epoch": 6.99, "learning_rate": 1.2430939226519336e-07, "logits/chosen": -1.7923178672790527, "logits/rejected": -1.6859674453735352, "logps/chosen": -106.28965759277344, "logps/rejected": -268.3639221191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.757568836212158, "rewards/margins": 16.0145320892334, "rewards/rejected": -20.772098541259766, "step": 4059 }, { "epoch": 6.99, "learning_rate": 1.2420314492137696e-07, "logits/chosen": -1.7208770513534546, "logits/rejected": -1.9185500144958496, "logps/chosen": -139.4622802734375, "logps/rejected": -283.9122009277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.096644878387451, "rewards/margins": 12.946856498718262, "rewards/rejected": -19.043502807617188, "step": 4060 }, { "epoch": 6.99, "learning_rate": 1.2409689757756056e-07, "logits/chosen": -1.7461364269256592, "logits/rejected": -1.8615902662277222, "logps/chosen": -123.1646728515625, "logps/rejected": -310.8313293457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.924393653869629, "rewards/margins": 17.23209571838379, "rewards/rejected": -23.1564884185791, "step": 4061 }, { "epoch": 6.99, "learning_rate": 1.2399065023374416e-07, "logits/chosen": -1.6714638471603394, "logits/rejected": -1.7355599403381348, "logps/chosen": -160.0801239013672, "logps/rejected": -315.9891662597656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.337278366088867, "rewards/margins": 14.509560585021973, "rewards/rejected": -22.846839904785156, "step": 4062 }, { "epoch": 6.99, "learning_rate": 1.2388440288992773e-07, "logits/chosen": -1.8077954053878784, "logits/rejected": -1.9899647235870361, "logps/chosen": -165.59756469726562, "logps/rejected": -282.382080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.012505531311035, "rewards/margins": 10.352923393249512, "rewards/rejected": -20.36543083190918, "step": 4063 }, { "epoch": 6.99, "learning_rate": 1.2377815554611136e-07, "logits/chosen": -1.950260877609253, "logits/rejected": -1.9156477451324463, "logps/chosen": -190.7567901611328, "logps/rejected": -349.09808349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.591079711914062, "rewards/margins": 15.432920455932617, "rewards/rejected": -25.02400016784668, "step": 4064 }, { "epoch": 7.0, "learning_rate": 1.2367190820229493e-07, "logits/chosen": -1.799363136291504, "logits/rejected": -1.8393385410308838, "logps/chosen": -175.18177795410156, "logps/rejected": -326.55584716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.089515686035156, "rewards/margins": 14.639213562011719, "rewards/rejected": -24.728731155395508, "step": 4065 }, { "epoch": 7.0, "learning_rate": 1.2356566085847853e-07, "logits/chosen": -1.9077680110931396, "logits/rejected": -1.6969743967056274, "logps/chosen": -172.59861755371094, "logps/rejected": -273.2867431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.872020244598389, "rewards/margins": 12.016966819763184, "rewards/rejected": -19.888986587524414, "step": 4066 }, { "epoch": 7.0, "learning_rate": 1.2345941351466213e-07, "logits/chosen": -1.6616435050964355, "logits/rejected": -1.9953337907791138, "logps/chosen": -171.6529083251953, "logps/rejected": -326.55316162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.949921607971191, "rewards/margins": 13.910757064819336, "rewards/rejected": -22.860679626464844, "step": 4067 }, { "epoch": 7.0, "learning_rate": 1.2335316617084572e-07, "logits/chosen": -1.8009222745895386, "logits/rejected": -1.8088219165802002, "logps/chosen": -114.01708221435547, "logps/rejected": -264.5318298339844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.7290167808532715, "rewards/margins": 14.332023620605469, "rewards/rejected": -19.061038970947266, "step": 4068 }, { "epoch": 7.0, "learning_rate": 1.2324691882702932e-07, "logits/chosen": -1.7995038032531738, "logits/rejected": -1.8407373428344727, "logps/chosen": -153.62245178222656, "logps/rejected": -274.544677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.358753204345703, "rewards/margins": 12.082036018371582, "rewards/rejected": -19.44078826904297, "step": 4069 }, { "epoch": 7.01, "learning_rate": 1.2314067148321292e-07, "logits/chosen": -1.6972002983093262, "logits/rejected": -1.819016933441162, "logps/chosen": -177.07745361328125, "logps/rejected": -383.44439697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.027115821838379, "rewards/margins": 20.34107208251953, "rewards/rejected": -28.368188858032227, "step": 4070 }, { "epoch": 7.01, "learning_rate": 1.230344241393965e-07, "logits/chosen": -1.8481042385101318, "logits/rejected": -1.8564342260360718, "logps/chosen": -136.54092407226562, "logps/rejected": -316.2490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.793382167816162, "rewards/margins": 16.730257034301758, "rewards/rejected": -21.523639678955078, "step": 4071 }, { "epoch": 7.01, "learning_rate": 1.2292817679558012e-07, "logits/chosen": -2.1195433139801025, "logits/rejected": -1.806544542312622, "logps/chosen": -128.97579956054688, "logps/rejected": -264.0536193847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.4162397384643555, "rewards/margins": 15.593986511230469, "rewards/rejected": -20.01022720336914, "step": 4072 }, { "epoch": 7.01, "learning_rate": 1.228219294517637e-07, "logits/chosen": -1.5357816219329834, "logits/rejected": -1.7877423763275146, "logps/chosen": -163.06375122070312, "logps/rejected": -330.6788330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.805276870727539, "rewards/margins": 16.529150009155273, "rewards/rejected": -25.33442497253418, "step": 4073 }, { "epoch": 7.01, "learning_rate": 1.227156821079473e-07, "logits/chosen": -1.4728765487670898, "logits/rejected": -1.8692094087600708, "logps/chosen": -118.51786804199219, "logps/rejected": -299.6016845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.217543601989746, "rewards/margins": 16.317363739013672, "rewards/rejected": -20.5349063873291, "step": 4074 }, { "epoch": 7.01, "learning_rate": 1.226094347641309e-07, "logits/chosen": -1.8788223266601562, "logits/rejected": -2.038209915161133, "logps/chosen": -115.74462127685547, "logps/rejected": -305.607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.654541015625, "rewards/margins": 17.094417572021484, "rewards/rejected": -20.74895668029785, "step": 4075 }, { "epoch": 7.02, "learning_rate": 1.225031874203145e-07, "logits/chosen": -1.8477206230163574, "logits/rejected": -2.011828660964966, "logps/chosen": -112.26155090332031, "logps/rejected": -262.5419006347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.822168827056885, "rewards/margins": 13.741596221923828, "rewards/rejected": -18.563766479492188, "step": 4076 }, { "epoch": 7.02, "learning_rate": 1.223969400764981e-07, "logits/chosen": -1.792149305343628, "logits/rejected": -1.9808857440948486, "logps/chosen": -168.6016082763672, "logps/rejected": -395.28179931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.844354629516602, "rewards/margins": 20.285547256469727, "rewards/rejected": -28.129899978637695, "step": 4077 }, { "epoch": 7.02, "learning_rate": 1.222906927326817e-07, "logits/chosen": -1.748199462890625, "logits/rejected": -1.609243631362915, "logps/chosen": -134.41580200195312, "logps/rejected": -237.3260498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.461108207702637, "rewards/margins": 11.597940444946289, "rewards/rejected": -18.059049606323242, "step": 4078 }, { "epoch": 7.02, "learning_rate": 1.2218444538886526e-07, "logits/chosen": -1.8759019374847412, "logits/rejected": -1.87762451171875, "logps/chosen": -110.60000610351562, "logps/rejected": -311.8599853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.625361919403076, "rewards/margins": 19.294015884399414, "rewards/rejected": -23.91937828063965, "step": 4079 }, { "epoch": 7.02, "learning_rate": 1.2207819804504888e-07, "logits/chosen": -1.7461397647857666, "logits/rejected": -1.7412465810775757, "logps/chosen": -175.40480041503906, "logps/rejected": -290.5166320800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.313006401062012, "rewards/margins": 11.530742645263672, "rewards/rejected": -20.84375, "step": 4080 }, { "epoch": 7.02, "learning_rate": 1.2197195070123246e-07, "logits/chosen": -1.851942777633667, "logits/rejected": -1.6985478401184082, "logps/chosen": -149.78277587890625, "logps/rejected": -340.367431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.920706748962402, "rewards/margins": 18.108257293701172, "rewards/rejected": -25.028963088989258, "step": 4081 }, { "epoch": 7.03, "learning_rate": 1.2186570335741606e-07, "logits/chosen": -1.77371084690094, "logits/rejected": -1.8590071201324463, "logps/chosen": -161.07550048828125, "logps/rejected": -344.96148681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.024836540222168, "rewards/margins": 16.502365112304688, "rewards/rejected": -24.527202606201172, "step": 4082 }, { "epoch": 7.03, "learning_rate": 1.2175945601359965e-07, "logits/chosen": -1.614931344985962, "logits/rejected": -1.8682270050048828, "logps/chosen": -139.6140899658203, "logps/rejected": -271.1040954589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.799132347106934, "rewards/margins": 12.355573654174805, "rewards/rejected": -19.154706954956055, "step": 4083 }, { "epoch": 7.03, "learning_rate": 1.2165320866978325e-07, "logits/chosen": -1.6303441524505615, "logits/rejected": -1.9018254280090332, "logps/chosen": -126.64466857910156, "logps/rejected": -313.326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.1633758544921875, "rewards/margins": 17.01537322998047, "rewards/rejected": -22.178749084472656, "step": 4084 }, { "epoch": 7.03, "learning_rate": 1.2154696132596685e-07, "logits/chosen": -1.6960567235946655, "logits/rejected": -1.8609952926635742, "logps/chosen": -178.77752685546875, "logps/rejected": -299.27459716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.138110160827637, "rewards/margins": 11.416839599609375, "rewards/rejected": -20.554950714111328, "step": 4085 }, { "epoch": 7.03, "learning_rate": 1.2144071398215045e-07, "logits/chosen": -1.9138879776000977, "logits/rejected": -1.9236717224121094, "logps/chosen": -148.40243530273438, "logps/rejected": -324.5174560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.313779354095459, "rewards/margins": 16.57321548461914, "rewards/rejected": -22.886995315551758, "step": 4086 }, { "epoch": 7.03, "learning_rate": 1.2133446663833402e-07, "logits/chosen": -2.003139019012451, "logits/rejected": -1.7314047813415527, "logps/chosen": -209.53147888183594, "logps/rejected": -369.46490478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.472212791442871, "rewards/margins": 15.071951866149902, "rewards/rejected": -25.544166564941406, "step": 4087 }, { "epoch": 7.04, "learning_rate": 1.2122821929451762e-07, "logits/chosen": -1.7254016399383545, "logits/rejected": -1.8674352169036865, "logps/chosen": -124.86221313476562, "logps/rejected": -271.7960205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.6835618019104, "rewards/margins": 14.494131088256836, "rewards/rejected": -19.177692413330078, "step": 4088 }, { "epoch": 7.04, "learning_rate": 1.2112197195070122e-07, "logits/chosen": -1.7772760391235352, "logits/rejected": -1.8513383865356445, "logps/chosen": -169.82472229003906, "logps/rejected": -339.0567626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.44191837310791, "rewards/margins": 16.926469802856445, "rewards/rejected": -24.36838722229004, "step": 4089 }, { "epoch": 7.04, "learning_rate": 1.2101572460688482e-07, "logits/chosen": -1.5443122386932373, "logits/rejected": -1.8442792892456055, "logps/chosen": -128.97503662109375, "logps/rejected": -311.8905029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.14772367477417, "rewards/margins": 16.458120346069336, "rewards/rejected": -22.605844497680664, "step": 4090 }, { "epoch": 7.04, "learning_rate": 1.2090947726306842e-07, "logits/chosen": -2.0711002349853516, "logits/rejected": -1.9540433883666992, "logps/chosen": -159.38912963867188, "logps/rejected": -291.36541748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.966570854187012, "rewards/margins": 13.38662052154541, "rewards/rejected": -20.353191375732422, "step": 4091 }, { "epoch": 7.04, "learning_rate": 1.2080322991925202e-07, "logits/chosen": -1.8951094150543213, "logits/rejected": -1.9035604000091553, "logps/chosen": -167.42465209960938, "logps/rejected": -333.49896240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.953163146972656, "rewards/margins": 16.71918487548828, "rewards/rejected": -24.672348022460938, "step": 4092 }, { "epoch": 7.04, "learning_rate": 1.2069698257543562e-07, "logits/chosen": -1.6390600204467773, "logits/rejected": -1.9862680435180664, "logps/chosen": -162.207763671875, "logps/rejected": -323.003662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.784289360046387, "rewards/margins": 15.816947937011719, "rewards/rejected": -22.601238250732422, "step": 4093 }, { "epoch": 7.05, "learning_rate": 1.2059073523161922e-07, "logits/chosen": -2.077526092529297, "logits/rejected": -1.4818978309631348, "logps/chosen": -183.33123779296875, "logps/rejected": -287.03668212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.019791603088379, "rewards/margins": 12.847651481628418, "rewards/rejected": -21.867443084716797, "step": 4094 }, { "epoch": 7.05, "learning_rate": 1.204844878878028e-07, "logits/chosen": -1.7215182781219482, "logits/rejected": -1.8215034008026123, "logps/chosen": -136.54315185546875, "logps/rejected": -305.9322509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.992610931396484, "rewards/margins": 17.14033317565918, "rewards/rejected": -23.132944107055664, "step": 4095 }, { "epoch": 7.05, "learning_rate": 1.203782405439864e-07, "logits/chosen": -1.6847196817398071, "logits/rejected": -1.886139154434204, "logps/chosen": -175.42054748535156, "logps/rejected": -314.0546569824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.906944274902344, "rewards/margins": 12.527806282043457, "rewards/rejected": -21.434749603271484, "step": 4096 }, { "epoch": 7.05, "learning_rate": 1.2027199320016999e-07, "logits/chosen": -1.796431303024292, "logits/rejected": -1.9236879348754883, "logps/chosen": -150.81597900390625, "logps/rejected": -328.863037109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.215161323547363, "rewards/margins": 16.668519973754883, "rewards/rejected": -23.883682250976562, "step": 4097 }, { "epoch": 7.05, "learning_rate": 1.2016574585635359e-07, "logits/chosen": -1.5570836067199707, "logits/rejected": -1.8067513704299927, "logps/chosen": -120.42401123046875, "logps/rejected": -268.3751525878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.478829860687256, "rewards/margins": 14.289597511291504, "rewards/rejected": -19.768428802490234, "step": 4098 }, { "epoch": 7.06, "learning_rate": 1.2005949851253718e-07, "logits/chosen": -1.7523651123046875, "logits/rejected": -1.4097117185592651, "logps/chosen": -195.05209350585938, "logps/rejected": -292.3487854003906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.189760208129883, "rewards/margins": 11.391090393066406, "rewards/rejected": -21.580848693847656, "step": 4099 }, { "epoch": 7.06, "learning_rate": 1.1995325116872078e-07, "logits/chosen": -1.9811538457870483, "logits/rejected": -1.6635277271270752, "logps/chosen": -156.3804931640625, "logps/rejected": -323.9275207519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.301939964294434, "rewards/margins": 17.385812759399414, "rewards/rejected": -24.687753677368164, "step": 4100 }, { "epoch": 7.06, "learning_rate": 1.1984700382490438e-07, "logits/chosen": -1.901143193244934, "logits/rejected": -1.68581223487854, "logps/chosen": -120.07295989990234, "logps/rejected": -268.4960632324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.279551029205322, "rewards/margins": 14.458213806152344, "rewards/rejected": -19.737764358520508, "step": 4101 }, { "epoch": 7.06, "learning_rate": 1.1974075648108798e-07, "logits/chosen": -1.586127519607544, "logits/rejected": -2.043753147125244, "logps/chosen": -141.5743408203125, "logps/rejected": -325.312744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.732011795043945, "rewards/margins": 15.405837059020996, "rewards/rejected": -22.137849807739258, "step": 4102 }, { "epoch": 7.06, "learning_rate": 1.1963450913727155e-07, "logits/chosen": -1.9280579090118408, "logits/rejected": -1.9431216716766357, "logps/chosen": -133.7845001220703, "logps/rejected": -319.62762451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.588761806488037, "rewards/margins": 18.810611724853516, "rewards/rejected": -24.39937400817871, "step": 4103 }, { "epoch": 7.06, "learning_rate": 1.1952826179345515e-07, "logits/chosen": -1.990452766418457, "logits/rejected": -1.7946901321411133, "logps/chosen": -146.7669219970703, "logps/rejected": -260.55743408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4928131103515625, "rewards/margins": 12.756135940551758, "rewards/rejected": -18.248950958251953, "step": 4104 }, { "epoch": 7.07, "learning_rate": 1.1942201444963875e-07, "logits/chosen": -1.6771584749221802, "logits/rejected": -1.890680193901062, "logps/chosen": -158.04666137695312, "logps/rejected": -300.96630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.428326606750488, "rewards/margins": 13.716193199157715, "rewards/rejected": -22.144519805908203, "step": 4105 }, { "epoch": 7.07, "learning_rate": 1.1931576710582235e-07, "logits/chosen": -1.789298176765442, "logits/rejected": -1.8458664417266846, "logps/chosen": -151.09280395507812, "logps/rejected": -329.5096130371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.58373498916626, "rewards/margins": 16.334810256958008, "rewards/rejected": -22.91854476928711, "step": 4106 }, { "epoch": 7.07, "learning_rate": 1.1920951976200595e-07, "logits/chosen": -1.8656561374664307, "logits/rejected": -1.9082509279251099, "logps/chosen": -194.58212280273438, "logps/rejected": -311.9691162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.003137588500977, "rewards/margins": 11.855829238891602, "rewards/rejected": -21.858966827392578, "step": 4107 }, { "epoch": 7.07, "learning_rate": 1.1910327241818955e-07, "logits/chosen": -2.070756435394287, "logits/rejected": -1.759021282196045, "logps/chosen": -185.57522583007812, "logps/rejected": -303.6258850097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.186182975769043, "rewards/margins": 12.753914833068848, "rewards/rejected": -20.94009780883789, "step": 4108 }, { "epoch": 7.07, "learning_rate": 1.1899702507437313e-07, "logits/chosen": -1.891280174255371, "logits/rejected": -2.1758830547332764, "logps/chosen": -154.7162628173828, "logps/rejected": -325.8967590332031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.379299163818359, "rewards/margins": 16.345535278320312, "rewards/rejected": -23.724836349487305, "step": 4109 }, { "epoch": 7.07, "learning_rate": 1.1889077773055672e-07, "logits/chosen": -1.7859528064727783, "logits/rejected": -1.7074453830718994, "logps/chosen": -146.05043029785156, "logps/rejected": -278.6737060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.819496154785156, "rewards/margins": 14.621406555175781, "rewards/rejected": -20.440902709960938, "step": 4110 }, { "epoch": 7.08, "learning_rate": 1.1878453038674033e-07, "logits/chosen": -1.5889188051223755, "logits/rejected": -2.0237925052642822, "logps/chosen": -143.2648468017578, "logps/rejected": -288.31884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.382641792297363, "rewards/margins": 12.330633163452148, "rewards/rejected": -18.713275909423828, "step": 4111 }, { "epoch": 7.08, "learning_rate": 1.1867828304292392e-07, "logits/chosen": -1.6999437808990479, "logits/rejected": -1.9052711725234985, "logps/chosen": -156.90179443359375, "logps/rejected": -310.4801025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.100481986999512, "rewards/margins": 14.864066123962402, "rewards/rejected": -21.964548110961914, "step": 4112 }, { "epoch": 7.08, "learning_rate": 1.1857203569910752e-07, "logits/chosen": -1.8317872285842896, "logits/rejected": -1.7794640064239502, "logps/chosen": -147.02459716796875, "logps/rejected": -298.89715576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.834249496459961, "rewards/margins": 14.904717445373535, "rewards/rejected": -20.738967895507812, "step": 4113 }, { "epoch": 7.08, "learning_rate": 1.1846578835529112e-07, "logits/chosen": -1.606309413909912, "logits/rejected": -1.7250182628631592, "logps/chosen": -169.7151641845703, "logps/rejected": -325.398193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.974296569824219, "rewards/margins": 14.804073333740234, "rewards/rejected": -21.778369903564453, "step": 4114 }, { "epoch": 7.08, "learning_rate": 1.183595410114747e-07, "logits/chosen": -1.8735640048980713, "logits/rejected": -1.7941029071807861, "logps/chosen": -173.92828369140625, "logps/rejected": -314.1425476074219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.038590908050537, "rewards/margins": 15.48198127746582, "rewards/rejected": -22.520572662353516, "step": 4115 }, { "epoch": 7.08, "learning_rate": 1.1825329366765831e-07, "logits/chosen": -1.946155309677124, "logits/rejected": -1.9067513942718506, "logps/chosen": -158.064453125, "logps/rejected": -311.69390869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.884378433227539, "rewards/margins": 15.664461135864258, "rewards/rejected": -22.54884147644043, "step": 4116 }, { "epoch": 7.09, "learning_rate": 1.181470463238419e-07, "logits/chosen": -1.9634475708007812, "logits/rejected": -1.776071548461914, "logps/chosen": -110.25482940673828, "logps/rejected": -282.0274963378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.921473264694214, "rewards/margins": 17.4093017578125, "rewards/rejected": -21.33077621459961, "step": 4117 }, { "epoch": 7.09, "learning_rate": 1.1804079898002548e-07, "logits/chosen": -1.9830594062805176, "logits/rejected": -1.9791994094848633, "logps/chosen": -160.78939819335938, "logps/rejected": -309.4407653808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.87552547454834, "rewards/margins": 15.07980728149414, "rewards/rejected": -22.955333709716797, "step": 4118 }, { "epoch": 7.09, "learning_rate": 1.179345516362091e-07, "logits/chosen": -1.6891849040985107, "logits/rejected": -1.7163670063018799, "logps/chosen": -161.69541931152344, "logps/rejected": -290.36419677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.891960144042969, "rewards/margins": 13.148089408874512, "rewards/rejected": -22.040048599243164, "step": 4119 }, { "epoch": 7.09, "learning_rate": 1.1782830429239268e-07, "logits/chosen": -1.9047088623046875, "logits/rejected": -1.736498475074768, "logps/chosen": -191.95846557617188, "logps/rejected": -310.2487487792969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.721409797668457, "rewards/margins": 13.531532287597656, "rewards/rejected": -22.25294303894043, "step": 4120 }, { "epoch": 7.09, "learning_rate": 1.1772205694857628e-07, "logits/chosen": -1.8813250064849854, "logits/rejected": -1.6736946105957031, "logps/chosen": -194.65501403808594, "logps/rejected": -319.46075439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.973834991455078, "rewards/margins": 13.649884223937988, "rewards/rejected": -23.62371826171875, "step": 4121 }, { "epoch": 7.09, "learning_rate": 1.1761580960475988e-07, "logits/chosen": -1.5896309614181519, "logits/rejected": -2.1051714420318604, "logps/chosen": -148.5911865234375, "logps/rejected": -316.0360107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.24812650680542, "rewards/margins": 16.058719635009766, "rewards/rejected": -22.306846618652344, "step": 4122 }, { "epoch": 7.1, "learning_rate": 1.1750956226094347e-07, "logits/chosen": -2.0836117267608643, "logits/rejected": -1.6404497623443604, "logps/chosen": -166.89915466308594, "logps/rejected": -323.2539367675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.134267807006836, "rewards/margins": 17.764358520507812, "rewards/rejected": -24.89862632751465, "step": 4123 }, { "epoch": 7.1, "learning_rate": 1.1740331491712708e-07, "logits/chosen": -1.7993652820587158, "logits/rejected": -1.8612267971038818, "logps/chosen": -143.3022003173828, "logps/rejected": -348.00164794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.696504592895508, "rewards/margins": 19.4053897857666, "rewards/rejected": -25.101896286010742, "step": 4124 }, { "epoch": 7.1, "learning_rate": 1.1729706757331066e-07, "logits/chosen": -1.8328527212142944, "logits/rejected": -1.916359543800354, "logps/chosen": -168.66946411132812, "logps/rejected": -324.11260986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.645435333251953, "rewards/margins": 15.4461669921875, "rewards/rejected": -22.091602325439453, "step": 4125 }, { "epoch": 7.1, "learning_rate": 1.1719082022949425e-07, "logits/chosen": -1.581918478012085, "logits/rejected": -1.8308050632476807, "logps/chosen": -172.6789093017578, "logps/rejected": -317.69671630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.221216201782227, "rewards/margins": 15.353826522827148, "rewards/rejected": -23.575042724609375, "step": 4126 }, { "epoch": 7.1, "learning_rate": 1.1708457288567786e-07, "logits/chosen": -1.8354884386062622, "logits/rejected": -1.980467438697815, "logps/chosen": -173.49234008789062, "logps/rejected": -335.98370361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.642083168029785, "rewards/margins": 15.373018264770508, "rewards/rejected": -24.01510238647461, "step": 4127 }, { "epoch": 7.1, "learning_rate": 1.1697832554186145e-07, "logits/chosen": -1.7085354328155518, "logits/rejected": -1.9451053142547607, "logps/chosen": -182.51205444335938, "logps/rejected": -380.9930419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.922272682189941, "rewards/margins": 19.202089309692383, "rewards/rejected": -28.12436294555664, "step": 4128 }, { "epoch": 7.11, "learning_rate": 1.1687207819804503e-07, "logits/chosen": -1.354146957397461, "logits/rejected": -1.7824475765228271, "logps/chosen": -161.18484497070312, "logps/rejected": -286.37933349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.206209182739258, "rewards/margins": 11.31983470916748, "rewards/rejected": -19.526044845581055, "step": 4129 }, { "epoch": 7.11, "learning_rate": 1.1676583085422864e-07, "logits/chosen": -1.6414101123809814, "logits/rejected": -1.7851914167404175, "logps/chosen": -139.77659606933594, "logps/rejected": -254.39132690429688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.509111404418945, "rewards/margins": 11.417476654052734, "rewards/rejected": -16.92658805847168, "step": 4130 }, { "epoch": 7.11, "learning_rate": 1.1665958351041223e-07, "logits/chosen": -1.7062256336212158, "logits/rejected": -2.111957550048828, "logps/chosen": -129.34498596191406, "logps/rejected": -287.31207275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.283962249755859, "rewards/margins": 14.136202812194824, "rewards/rejected": -20.420164108276367, "step": 4131 }, { "epoch": 7.11, "learning_rate": 1.1655333616659584e-07, "logits/chosen": -1.7818528413772583, "logits/rejected": -2.038499116897583, "logps/chosen": -133.08218383789062, "logps/rejected": -296.778076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0809197425842285, "rewards/margins": 13.573298454284668, "rewards/rejected": -19.654216766357422, "step": 4132 }, { "epoch": 7.11, "learning_rate": 1.1644708882277943e-07, "logits/chosen": -1.759170413017273, "logits/rejected": -1.8100159168243408, "logps/chosen": -153.9273681640625, "logps/rejected": -323.49365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.183016300201416, "rewards/margins": 16.453630447387695, "rewards/rejected": -22.636648178100586, "step": 4133 }, { "epoch": 7.12, "learning_rate": 1.1634084147896301e-07, "logits/chosen": -1.85459303855896, "logits/rejected": -1.8356420993804932, "logps/chosen": -129.7150421142578, "logps/rejected": -289.98046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.859162330627441, "rewards/margins": 16.077756881713867, "rewards/rejected": -20.936920166015625, "step": 4134 }, { "epoch": 7.12, "learning_rate": 1.1623459413514663e-07, "logits/chosen": -1.7411136627197266, "logits/rejected": -2.0963149070739746, "logps/chosen": -142.1843719482422, "logps/rejected": -295.805419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.552273750305176, "rewards/margins": 14.34614372253418, "rewards/rejected": -20.898418426513672, "step": 4135 }, { "epoch": 7.12, "learning_rate": 1.1612834679133021e-07, "logits/chosen": -1.9535443782806396, "logits/rejected": -1.7050046920776367, "logps/chosen": -147.27113342285156, "logps/rejected": -301.56622314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.334227561950684, "rewards/margins": 16.46820831298828, "rewards/rejected": -22.80243492126465, "step": 4136 }, { "epoch": 7.12, "learning_rate": 1.160220994475138e-07, "logits/chosen": -1.782970666885376, "logits/rejected": -1.7612345218658447, "logps/chosen": -174.76156616210938, "logps/rejected": -273.46051025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.157549858093262, "rewards/margins": 11.032621383666992, "rewards/rejected": -19.190170288085938, "step": 4137 }, { "epoch": 7.12, "learning_rate": 1.1591585210369741e-07, "logits/chosen": -1.664230227470398, "logits/rejected": -2.016204357147217, "logps/chosen": -117.81620788574219, "logps/rejected": -292.7412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.373170852661133, "rewards/margins": 15.8844633102417, "rewards/rejected": -20.257633209228516, "step": 4138 }, { "epoch": 7.12, "learning_rate": 1.15809604759881e-07, "logits/chosen": -2.0438945293426514, "logits/rejected": -1.7208703756332397, "logps/chosen": -189.74349975585938, "logps/rejected": -324.6978759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.871821403503418, "rewards/margins": 14.168322563171387, "rewards/rejected": -24.040142059326172, "step": 4139 }, { "epoch": 7.13, "learning_rate": 1.1570335741606458e-07, "logits/chosen": -1.746856689453125, "logits/rejected": -1.4254045486450195, "logps/chosen": -176.11676025390625, "logps/rejected": -278.99267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.50560474395752, "rewards/margins": 11.403463363647461, "rewards/rejected": -19.909069061279297, "step": 4140 }, { "epoch": 7.13, "learning_rate": 1.1559711007224819e-07, "logits/chosen": -1.5254497528076172, "logits/rejected": -1.9934954643249512, "logps/chosen": -147.2913055419922, "logps/rejected": -359.44921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.533692359924316, "rewards/margins": 18.88050651550293, "rewards/rejected": -26.414199829101562, "step": 4141 }, { "epoch": 7.13, "learning_rate": 1.1549086272843178e-07, "logits/chosen": -1.5291826725006104, "logits/rejected": -1.9323893785476685, "logps/chosen": -142.4783935546875, "logps/rejected": -343.1567077636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.593029975891113, "rewards/margins": 18.782224655151367, "rewards/rejected": -25.375255584716797, "step": 4142 }, { "epoch": 7.13, "learning_rate": 1.1538461538461539e-07, "logits/chosen": -1.9087220430374146, "logits/rejected": -1.735547423362732, "logps/chosen": -155.63772583007812, "logps/rejected": -296.4082336425781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.690618515014648, "rewards/margins": 14.972763061523438, "rewards/rejected": -22.66338348388672, "step": 4143 }, { "epoch": 7.13, "learning_rate": 1.1527836804079898e-07, "logits/chosen": -1.8498650789260864, "logits/rejected": -1.5596529245376587, "logps/chosen": -156.91656494140625, "logps/rejected": -303.2763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.068965911865234, "rewards/margins": 14.392049789428711, "rewards/rejected": -21.461017608642578, "step": 4144 }, { "epoch": 7.13, "learning_rate": 1.1517212069698256e-07, "logits/chosen": -2.001826286315918, "logits/rejected": -1.6854984760284424, "logps/chosen": -144.1327362060547, "logps/rejected": -279.2810363769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.124337673187256, "rewards/margins": 14.766971588134766, "rewards/rejected": -20.89130973815918, "step": 4145 }, { "epoch": 7.14, "learning_rate": 1.1506587335316617e-07, "logits/chosen": -1.9750120639801025, "logits/rejected": -1.8714661598205566, "logps/chosen": -179.47047424316406, "logps/rejected": -330.89874267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.6253767013549805, "rewards/margins": 14.421954154968262, "rewards/rejected": -22.04732894897461, "step": 4146 }, { "epoch": 7.14, "learning_rate": 1.1495962600934976e-07, "logits/chosen": -1.9395545721054077, "logits/rejected": -1.9713408946990967, "logps/chosen": -163.13331604003906, "logps/rejected": -314.1056823730469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.7878875732421875, "rewards/margins": 14.342538833618164, "rewards/rejected": -22.13042640686035, "step": 4147 }, { "epoch": 7.14, "learning_rate": 1.1485337866553335e-07, "logits/chosen": -1.8694003820419312, "logits/rejected": -1.9724220037460327, "logps/chosen": -148.5321044921875, "logps/rejected": -304.57830810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.671808242797852, "rewards/margins": 15.59093189239502, "rewards/rejected": -22.262739181518555, "step": 4148 }, { "epoch": 7.14, "learning_rate": 1.1474713132171696e-07, "logits/chosen": -1.8163986206054688, "logits/rejected": -1.966264247894287, "logps/chosen": -180.1752166748047, "logps/rejected": -344.82757568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.37729024887085, "rewards/margins": 17.65118408203125, "rewards/rejected": -25.028474807739258, "step": 4149 }, { "epoch": 7.14, "learning_rate": 1.1464088397790054e-07, "logits/chosen": -1.889737844467163, "logits/rejected": -1.894357681274414, "logps/chosen": -149.35247802734375, "logps/rejected": -313.357666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.997073650360107, "rewards/margins": 15.210482597351074, "rewards/rejected": -22.207557678222656, "step": 4150 }, { "epoch": 7.14, "learning_rate": 1.1453463663408414e-07, "logits/chosen": -1.892179012298584, "logits/rejected": -1.7319910526275635, "logps/chosen": -152.11700439453125, "logps/rejected": -295.083251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.364155769348145, "rewards/margins": 15.075223922729492, "rewards/rejected": -23.43937873840332, "step": 4151 }, { "epoch": 7.15, "learning_rate": 1.1442838929026774e-07, "logits/chosen": -1.988772988319397, "logits/rejected": -2.11881685256958, "logps/chosen": -192.85775756835938, "logps/rejected": -307.04937744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.390256881713867, "rewards/margins": 11.126043319702148, "rewards/rejected": -21.516300201416016, "step": 4152 }, { "epoch": 7.15, "learning_rate": 1.1432214194645133e-07, "logits/chosen": -1.8390512466430664, "logits/rejected": -1.6936112642288208, "logps/chosen": -180.20010375976562, "logps/rejected": -334.4305419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.762701034545898, "rewards/margins": 15.306568145751953, "rewards/rejected": -24.06926918029785, "step": 4153 }, { "epoch": 7.15, "learning_rate": 1.1421589460263494e-07, "logits/chosen": -2.014575481414795, "logits/rejected": -1.8036465644836426, "logps/chosen": -172.8621826171875, "logps/rejected": -312.67498779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.075039863586426, "rewards/margins": 13.669393539428711, "rewards/rejected": -20.74443244934082, "step": 4154 }, { "epoch": 7.15, "learning_rate": 1.1410964725881852e-07, "logits/chosen": -1.5654947757720947, "logits/rejected": -2.0333707332611084, "logps/chosen": -119.94007110595703, "logps/rejected": -293.9117126464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.213028430938721, "rewards/margins": 15.425531387329102, "rewards/rejected": -20.638559341430664, "step": 4155 }, { "epoch": 7.15, "learning_rate": 1.1400339991500211e-07, "logits/chosen": -1.7105053663253784, "logits/rejected": -1.7910654544830322, "logps/chosen": -161.33648681640625, "logps/rejected": -298.9041748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.767850875854492, "rewards/margins": 13.858717918395996, "rewards/rejected": -20.626569747924805, "step": 4156 }, { "epoch": 7.15, "learning_rate": 1.1389715257118572e-07, "logits/chosen": -1.6731289625167847, "logits/rejected": -2.1087403297424316, "logps/chosen": -151.05545043945312, "logps/rejected": -314.9150085449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.034917831420898, "rewards/margins": 15.751544952392578, "rewards/rejected": -22.786460876464844, "step": 4157 }, { "epoch": 7.16, "learning_rate": 1.1379090522736931e-07, "logits/chosen": -2.0273332595825195, "logits/rejected": -1.619035243988037, "logps/chosen": -150.31613159179688, "logps/rejected": -269.1494445800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.820431232452393, "rewards/margins": 12.66928482055664, "rewards/rejected": -18.489715576171875, "step": 4158 }, { "epoch": 7.16, "learning_rate": 1.1368465788355291e-07, "logits/chosen": -2.0722882747650146, "logits/rejected": -1.9450953006744385, "logps/chosen": -130.51882934570312, "logps/rejected": -276.5107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.720789432525635, "rewards/margins": 14.198098182678223, "rewards/rejected": -18.918888092041016, "step": 4159 }, { "epoch": 7.16, "learning_rate": 1.135784105397365e-07, "logits/chosen": -1.6363255977630615, "logits/rejected": -1.9334288835525513, "logps/chosen": -179.92721557617188, "logps/rejected": -342.7340087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.300786972045898, "rewards/margins": 15.335310935974121, "rewards/rejected": -24.636098861694336, "step": 4160 }, { "epoch": 7.16, "learning_rate": 1.1347216319592009e-07, "logits/chosen": -1.8334347009658813, "logits/rejected": -1.7860608100891113, "logps/chosen": -168.77847290039062, "logps/rejected": -313.4688720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.069561004638672, "rewards/margins": 14.944074630737305, "rewards/rejected": -24.013633728027344, "step": 4161 }, { "epoch": 7.16, "learning_rate": 1.1336591585210369e-07, "logits/chosen": -1.811843752861023, "logits/rejected": -1.739483118057251, "logps/chosen": -144.36378479003906, "logps/rejected": -278.8480529785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.092928409576416, "rewards/margins": 13.099302291870117, "rewards/rejected": -19.192232131958008, "step": 4162 }, { "epoch": 7.17, "learning_rate": 1.1325966850828729e-07, "logits/chosen": -1.9425926208496094, "logits/rejected": -2.019651412963867, "logps/chosen": -128.89830017089844, "logps/rejected": -320.1065368652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.72464656829834, "rewards/margins": 18.89116668701172, "rewards/rejected": -23.615812301635742, "step": 4163 }, { "epoch": 7.17, "learning_rate": 1.1315342116447087e-07, "logits/chosen": -1.700329303741455, "logits/rejected": -2.014824628829956, "logps/chosen": -143.468505859375, "logps/rejected": -290.77410888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.5421366691589355, "rewards/margins": 14.668060302734375, "rewards/rejected": -20.21019744873047, "step": 4164 }, { "epoch": 7.17, "learning_rate": 1.1304717382065449e-07, "logits/chosen": -1.9555952548980713, "logits/rejected": -1.9188592433929443, "logps/chosen": -189.32717895507812, "logps/rejected": -323.45867919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.994939804077148, "rewards/margins": 13.472027778625488, "rewards/rejected": -22.466968536376953, "step": 4165 }, { "epoch": 7.17, "learning_rate": 1.1294092647683807e-07, "logits/chosen": -1.4620361328125, "logits/rejected": -1.9749348163604736, "logps/chosen": -171.29835510253906, "logps/rejected": -319.98388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.072595596313477, "rewards/margins": 13.856263160705566, "rewards/rejected": -20.92885971069336, "step": 4166 }, { "epoch": 7.17, "learning_rate": 1.1283467913302167e-07, "logits/chosen": -1.5441371202468872, "logits/rejected": -1.9629271030426025, "logps/chosen": -199.20584106445312, "logps/rejected": -326.05548095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.418621063232422, "rewards/margins": 10.983004570007324, "rewards/rejected": -22.401626586914062, "step": 4167 }, { "epoch": 7.17, "learning_rate": 1.1272843178920527e-07, "logits/chosen": -2.04901123046875, "logits/rejected": -1.8662022352218628, "logps/chosen": -186.0027618408203, "logps/rejected": -304.612060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.497698783874512, "rewards/margins": 13.040960311889648, "rewards/rejected": -21.538658142089844, "step": 4168 }, { "epoch": 7.18, "learning_rate": 1.1262218444538886e-07, "logits/chosen": -1.6426618099212646, "logits/rejected": -1.9676461219787598, "logps/chosen": -176.93798828125, "logps/rejected": -315.9859924316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.152523040771484, "rewards/margins": 13.863460540771484, "rewards/rejected": -22.01598358154297, "step": 4169 }, { "epoch": 7.18, "learning_rate": 1.1251593710157246e-07, "logits/chosen": -1.957140564918518, "logits/rejected": -1.8489521741867065, "logps/chosen": -223.32794189453125, "logps/rejected": -366.8688049316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.817987442016602, "rewards/margins": 15.782661437988281, "rewards/rejected": -26.600648880004883, "step": 4170 }, { "epoch": 7.18, "learning_rate": 1.1240968975775605e-07, "logits/chosen": -1.4684357643127441, "logits/rejected": -2.0232319831848145, "logps/chosen": -189.14463806152344, "logps/rejected": -350.6531982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.079462051391602, "rewards/margins": 12.961125373840332, "rewards/rejected": -24.04058837890625, "step": 4171 }, { "epoch": 7.18, "learning_rate": 1.1230344241393964e-07, "logits/chosen": -2.192134380340576, "logits/rejected": -1.8261033296585083, "logps/chosen": -174.55728149414062, "logps/rejected": -305.8863220214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.335568428039551, "rewards/margins": 14.823060989379883, "rewards/rejected": -21.158628463745117, "step": 4172 }, { "epoch": 7.18, "learning_rate": 1.1219719507012324e-07, "logits/chosen": -1.515707015991211, "logits/rejected": -1.7703883647918701, "logps/chosen": -162.56463623046875, "logps/rejected": -326.41900634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.693629741668701, "rewards/margins": 15.931257247924805, "rewards/rejected": -23.62488555908203, "step": 4173 }, { "epoch": 7.18, "learning_rate": 1.1209094772630684e-07, "logits/chosen": -2.120434522628784, "logits/rejected": -1.9593321084976196, "logps/chosen": -220.9830780029297, "logps/rejected": -352.71502685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.09656047821045, "rewards/margins": 13.88534164428711, "rewards/rejected": -24.981903076171875, "step": 4174 }, { "epoch": 7.19, "learning_rate": 1.1198470038249042e-07, "logits/chosen": -1.8012224435806274, "logits/rejected": -1.926134705543518, "logps/chosen": -185.53677368164062, "logps/rejected": -331.78802490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.185138702392578, "rewards/margins": 14.384942054748535, "rewards/rejected": -23.570079803466797, "step": 4175 }, { "epoch": 7.19, "learning_rate": 1.1187845303867404e-07, "logits/chosen": -1.7575175762176514, "logits/rejected": -2.117600202560425, "logps/chosen": -136.55886840820312, "logps/rejected": -273.0096435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.667051792144775, "rewards/margins": 12.23654556274414, "rewards/rejected": -17.903596878051758, "step": 4176 }, { "epoch": 7.19, "learning_rate": 1.1177220569485762e-07, "logits/chosen": -1.626326560974121, "logits/rejected": -1.7938427925109863, "logps/chosen": -143.8811492919922, "logps/rejected": -334.2219543457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.60212516784668, "rewards/margins": 16.440677642822266, "rewards/rejected": -23.042802810668945, "step": 4177 }, { "epoch": 7.19, "learning_rate": 1.1166595835104122e-07, "logits/chosen": -1.9334394931793213, "logits/rejected": -2.0131521224975586, "logps/chosen": -172.81887817382812, "logps/rejected": -298.81573486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.715885162353516, "rewards/margins": 13.422525405883789, "rewards/rejected": -21.138412475585938, "step": 4178 }, { "epoch": 7.19, "learning_rate": 1.1155971100722482e-07, "logits/chosen": -1.9028964042663574, "logits/rejected": -1.7558480501174927, "logps/chosen": -186.73611450195312, "logps/rejected": -316.955810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.51845932006836, "rewards/margins": 12.816568374633789, "rewards/rejected": -22.33502960205078, "step": 4179 }, { "epoch": 7.19, "learning_rate": 1.114534636634084e-07, "logits/chosen": -1.94523024559021, "logits/rejected": -1.8176406621932983, "logps/chosen": -150.12228393554688, "logps/rejected": -310.9710693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.48318338394165, "rewards/margins": 16.976032257080078, "rewards/rejected": -23.45921516418457, "step": 4180 }, { "epoch": 7.2, "learning_rate": 1.11347216319592e-07, "logits/chosen": -2.0317881107330322, "logits/rejected": -1.5289353132247925, "logps/chosen": -194.46188354492188, "logps/rejected": -326.36151123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.618919372558594, "rewards/margins": 15.49421215057373, "rewards/rejected": -25.113130569458008, "step": 4181 }, { "epoch": 7.2, "learning_rate": 1.112409689757756e-07, "logits/chosen": -2.1164138317108154, "logits/rejected": -1.9542129039764404, "logps/chosen": -131.98582458496094, "logps/rejected": -255.21827697753906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.070493698120117, "rewards/margins": 13.264806747436523, "rewards/rejected": -18.33530044555664, "step": 4182 }, { "epoch": 7.2, "learning_rate": 1.1113472163195919e-07, "logits/chosen": -1.8998345136642456, "logits/rejected": -2.125190019607544, "logps/chosen": -124.0321044921875, "logps/rejected": -271.0279846191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.1915202140808105, "rewards/margins": 13.343645095825195, "rewards/rejected": -18.535165786743164, "step": 4183 }, { "epoch": 7.2, "learning_rate": 1.110284742881428e-07, "logits/chosen": -1.757522702217102, "logits/rejected": -1.8374966382980347, "logps/chosen": -184.25341796875, "logps/rejected": -334.52362060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.266328811645508, "rewards/margins": 15.679980278015137, "rewards/rejected": -24.946308135986328, "step": 4184 }, { "epoch": 7.2, "learning_rate": 1.1092222694432639e-07, "logits/chosen": -2.1014647483825684, "logits/rejected": -1.8048996925354004, "logps/chosen": -179.1050262451172, "logps/rejected": -322.6668701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.883606910705566, "rewards/margins": 13.54163932800293, "rewards/rejected": -21.42524528503418, "step": 4185 }, { "epoch": 7.2, "learning_rate": 1.1081597960050998e-07, "logits/chosen": -2.1479432582855225, "logits/rejected": -1.6137728691101074, "logps/chosen": -185.17147827148438, "logps/rejected": -311.27227783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.652484893798828, "rewards/margins": 15.084918975830078, "rewards/rejected": -23.737403869628906, "step": 4186 }, { "epoch": 7.21, "learning_rate": 1.1070973225669358e-07, "logits/chosen": -1.7179471254348755, "logits/rejected": -1.792565107345581, "logps/chosen": -150.40625, "logps/rejected": -297.4444885253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.2044997215271, "rewards/margins": 15.347530364990234, "rewards/rejected": -21.552030563354492, "step": 4187 }, { "epoch": 7.21, "learning_rate": 1.1060348491287717e-07, "logits/chosen": -1.7727575302124023, "logits/rejected": -1.838360071182251, "logps/chosen": -196.25949096679688, "logps/rejected": -347.32318115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.337815284729004, "rewards/margins": 16.042068481445312, "rewards/rejected": -26.379884719848633, "step": 4188 }, { "epoch": 7.21, "learning_rate": 1.1049723756906077e-07, "logits/chosen": -1.784234642982483, "logits/rejected": -1.834650993347168, "logps/chosen": -130.61732482910156, "logps/rejected": -292.0513916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.2797956466674805, "rewards/margins": 15.14257526397705, "rewards/rejected": -20.42237091064453, "step": 4189 }, { "epoch": 7.21, "learning_rate": 1.1039099022524437e-07, "logits/chosen": -1.9927905797958374, "logits/rejected": -2.022190809249878, "logps/chosen": -118.97561645507812, "logps/rejected": -334.47760009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.349370956420898, "rewards/margins": 18.851354598999023, "rewards/rejected": -24.200725555419922, "step": 4190 }, { "epoch": 7.21, "learning_rate": 1.1028474288142795e-07, "logits/chosen": -1.7001336812973022, "logits/rejected": -1.7680935859680176, "logps/chosen": -118.83799743652344, "logps/rejected": -244.31866455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.509618282318115, "rewards/margins": 11.737635612487793, "rewards/rejected": -16.24725341796875, "step": 4191 }, { "epoch": 7.22, "learning_rate": 1.1017849553761155e-07, "logits/chosen": -1.8332641124725342, "logits/rejected": -1.9318947792053223, "logps/chosen": -154.27215576171875, "logps/rejected": -334.0543518066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.145847797393799, "rewards/margins": 15.319402694702148, "rewards/rejected": -22.46525001525879, "step": 4192 }, { "epoch": 7.22, "learning_rate": 1.1007224819379515e-07, "logits/chosen": -1.8284626007080078, "logits/rejected": -1.7446329593658447, "logps/chosen": -144.7616424560547, "logps/rejected": -342.61785888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.077625274658203, "rewards/margins": 19.65736198425293, "rewards/rejected": -25.734987258911133, "step": 4193 }, { "epoch": 7.22, "learning_rate": 1.0996600084997875e-07, "logits/chosen": -2.041821241378784, "logits/rejected": -1.6304078102111816, "logps/chosen": -148.603271484375, "logps/rejected": -325.2327575683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.570457458496094, "rewards/margins": 18.81566047668457, "rewards/rejected": -24.38611602783203, "step": 4194 }, { "epoch": 7.22, "learning_rate": 1.0985975350616235e-07, "logits/chosen": -1.9132086038589478, "logits/rejected": -1.8542582988739014, "logps/chosen": -154.5102996826172, "logps/rejected": -313.3956298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.180343151092529, "rewards/margins": 15.414358139038086, "rewards/rejected": -22.594703674316406, "step": 4195 }, { "epoch": 7.22, "learning_rate": 1.0975350616234593e-07, "logits/chosen": -1.7218291759490967, "logits/rejected": -1.753981113433838, "logps/chosen": -148.49542236328125, "logps/rejected": -329.9561767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.987083911895752, "rewards/margins": 17.351137161254883, "rewards/rejected": -24.338220596313477, "step": 4196 }, { "epoch": 7.22, "learning_rate": 1.0964725881852953e-07, "logits/chosen": -1.9170347452163696, "logits/rejected": -1.4999213218688965, "logps/chosen": -121.03389739990234, "logps/rejected": -225.81716918945312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.121315956115723, "rewards/margins": 12.18541431427002, "rewards/rejected": -16.306730270385742, "step": 4197 }, { "epoch": 7.23, "learning_rate": 1.0954101147471313e-07, "logits/chosen": -2.0921099185943604, "logits/rejected": -1.7926228046417236, "logps/chosen": -132.3975830078125, "logps/rejected": -289.71728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.822521209716797, "rewards/margins": 16.942485809326172, "rewards/rejected": -21.7650089263916, "step": 4198 }, { "epoch": 7.23, "learning_rate": 1.0943476413089672e-07, "logits/chosen": -1.952242374420166, "logits/rejected": -1.9130274057388306, "logps/chosen": -168.7474365234375, "logps/rejected": -366.1177978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.098176956176758, "rewards/margins": 18.635589599609375, "rewards/rejected": -26.733766555786133, "step": 4199 }, { "epoch": 7.23, "learning_rate": 1.0932851678708032e-07, "logits/chosen": -1.921152114868164, "logits/rejected": -1.84000825881958, "logps/chosen": -167.14759826660156, "logps/rejected": -332.2467041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.1910080909729, "rewards/margins": 17.800386428833008, "rewards/rejected": -24.99139404296875, "step": 4200 }, { "epoch": 7.23, "learning_rate": 1.0922226944326392e-07, "logits/chosen": -2.0129687786102295, "logits/rejected": -1.3487098217010498, "logps/chosen": -171.40765380859375, "logps/rejected": -286.23663330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.834756851196289, "rewards/margins": 14.355074882507324, "rewards/rejected": -22.189830780029297, "step": 4201 }, { "epoch": 7.23, "learning_rate": 1.0911602209944751e-07, "logits/chosen": -1.7179452180862427, "logits/rejected": -1.9783790111541748, "logps/chosen": -228.00103759765625, "logps/rejected": -356.60986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -13.026294708251953, "rewards/margins": 11.813360214233398, "rewards/rejected": -24.83965492248535, "step": 4202 }, { "epoch": 7.23, "learning_rate": 1.090097747556311e-07, "logits/chosen": -1.8713868856430054, "logits/rejected": -1.9887378215789795, "logps/chosen": -156.1214141845703, "logps/rejected": -285.3223876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.110938549041748, "rewards/margins": 12.715142250061035, "rewards/rejected": -19.826080322265625, "step": 4203 }, { "epoch": 7.24, "learning_rate": 1.089035274118147e-07, "logits/chosen": -1.86441171169281, "logits/rejected": -1.9949848651885986, "logps/chosen": -144.1641845703125, "logps/rejected": -309.0975646972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.423263072967529, "rewards/margins": 16.36166763305664, "rewards/rejected": -22.784929275512695, "step": 4204 }, { "epoch": 7.24, "learning_rate": 1.087972800679983e-07, "logits/chosen": -1.7258856296539307, "logits/rejected": -2.1027402877807617, "logps/chosen": -111.88945770263672, "logps/rejected": -294.13958740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.117913246154785, "rewards/margins": 17.14118003845215, "rewards/rejected": -21.259092330932617, "step": 4205 }, { "epoch": 7.24, "learning_rate": 1.086910327241819e-07, "logits/chosen": -1.9383078813552856, "logits/rejected": -2.063695192337036, "logps/chosen": -178.84169006347656, "logps/rejected": -311.0028076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.722552299499512, "rewards/margins": 13.378678321838379, "rewards/rejected": -22.10123062133789, "step": 4206 }, { "epoch": 7.24, "learning_rate": 1.0858478538036548e-07, "logits/chosen": -1.9194941520690918, "logits/rejected": -1.5129048824310303, "logps/chosen": -198.82679748535156, "logps/rejected": -308.1650085449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.165050506591797, "rewards/margins": 12.939902305603027, "rewards/rejected": -22.10495376586914, "step": 4207 }, { "epoch": 7.24, "learning_rate": 1.0847853803654908e-07, "logits/chosen": -2.011936664581299, "logits/rejected": -2.0383846759796143, "logps/chosen": -149.01698303222656, "logps/rejected": -335.2904968261719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.869933128356934, "rewards/margins": 17.885419845581055, "rewards/rejected": -24.755352020263672, "step": 4208 }, { "epoch": 7.24, "learning_rate": 1.0837229069273268e-07, "logits/chosen": -1.3669779300689697, "logits/rejected": -1.9059438705444336, "logps/chosen": -148.4781951904297, "logps/rejected": -331.3564758300781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.722594738006592, "rewards/margins": 16.11787986755371, "rewards/rejected": -22.84047508239746, "step": 4209 }, { "epoch": 7.25, "learning_rate": 1.0826604334891628e-07, "logits/chosen": -1.5878666639328003, "logits/rejected": -1.8322595357894897, "logps/chosen": -111.56915283203125, "logps/rejected": -298.21673583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.832122564315796, "rewards/margins": 17.368343353271484, "rewards/rejected": -21.20046615600586, "step": 4210 }, { "epoch": 7.25, "learning_rate": 1.0815979600509986e-07, "logits/chosen": -1.9095611572265625, "logits/rejected": -1.7867364883422852, "logps/chosen": -169.59033203125, "logps/rejected": -347.7356262207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.52635383605957, "rewards/margins": 17.531736373901367, "rewards/rejected": -25.058090209960938, "step": 4211 }, { "epoch": 7.25, "learning_rate": 1.0805354866128346e-07, "logits/chosen": -1.9192931652069092, "logits/rejected": -1.801178216934204, "logps/chosen": -171.9347686767578, "logps/rejected": -312.6647644042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.765951633453369, "rewards/margins": 14.055570602416992, "rewards/rejected": -20.821521759033203, "step": 4212 }, { "epoch": 7.25, "learning_rate": 1.0794730131746706e-07, "logits/chosen": -1.9162625074386597, "logits/rejected": -1.8055355548858643, "logps/chosen": -164.04510498046875, "logps/rejected": -335.6961975097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.952739715576172, "rewards/margins": 18.145601272583008, "rewards/rejected": -25.09834098815918, "step": 4213 }, { "epoch": 7.25, "learning_rate": 1.0784105397365065e-07, "logits/chosen": -1.9235343933105469, "logits/rejected": -1.8620762825012207, "logps/chosen": -129.72262573242188, "logps/rejected": -268.44171142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.19327974319458, "rewards/margins": 13.989348411560059, "rewards/rejected": -20.182628631591797, "step": 4214 }, { "epoch": 7.25, "learning_rate": 1.0773480662983425e-07, "logits/chosen": -1.8945536613464355, "logits/rejected": -1.941659927368164, "logps/chosen": -188.40631103515625, "logps/rejected": -353.6317443847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.258753776550293, "rewards/margins": 16.173582077026367, "rewards/rejected": -25.432336807250977, "step": 4215 }, { "epoch": 7.26, "learning_rate": 1.0762855928601785e-07, "logits/chosen": -2.1556239128112793, "logits/rejected": -2.0144710540771484, "logps/chosen": -159.28030395507812, "logps/rejected": -318.6228332519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.503935813903809, "rewards/margins": 17.66257095336914, "rewards/rejected": -24.166507720947266, "step": 4216 }, { "epoch": 7.26, "learning_rate": 1.0752231194220144e-07, "logits/chosen": -1.7808315753936768, "logits/rejected": -1.8494887351989746, "logps/chosen": -149.9279022216797, "logps/rejected": -308.52398681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.382815361022949, "rewards/margins": 16.413057327270508, "rewards/rejected": -21.79587173461914, "step": 4217 }, { "epoch": 7.26, "learning_rate": 1.0741606459838503e-07, "logits/chosen": -1.8994617462158203, "logits/rejected": -1.5522969961166382, "logps/chosen": -176.84860229492188, "logps/rejected": -338.77691650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.624506950378418, "rewards/margins": 16.51202964782715, "rewards/rejected": -24.13653564453125, "step": 4218 }, { "epoch": 7.26, "learning_rate": 1.0730981725456863e-07, "logits/chosen": -1.9436105489730835, "logits/rejected": -1.9466756582260132, "logps/chosen": -145.6925048828125, "logps/rejected": -343.7386474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.158679485321045, "rewards/margins": 19.06156349182129, "rewards/rejected": -24.220243453979492, "step": 4219 }, { "epoch": 7.26, "learning_rate": 1.0720356991075223e-07, "logits/chosen": -1.696958065032959, "logits/rejected": -2.135179042816162, "logps/chosen": -104.98881530761719, "logps/rejected": -343.02398681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.828441858291626, "rewards/margins": 21.4886474609375, "rewards/rejected": -25.317089080810547, "step": 4220 }, { "epoch": 7.27, "learning_rate": 1.0709732256693583e-07, "logits/chosen": -1.5247782468795776, "logits/rejected": -1.9704678058624268, "logps/chosen": -169.04171752929688, "logps/rejected": -335.2403259277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.788801193237305, "rewards/margins": 15.161456108093262, "rewards/rejected": -23.950258255004883, "step": 4221 }, { "epoch": 7.27, "learning_rate": 1.0699107522311941e-07, "logits/chosen": -1.6155054569244385, "logits/rejected": -1.8997387886047363, "logps/chosen": -196.337158203125, "logps/rejected": -321.06402587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.811025619506836, "rewards/margins": 12.54052734375, "rewards/rejected": -23.351552963256836, "step": 4222 }, { "epoch": 7.27, "learning_rate": 1.0688482787930301e-07, "logits/chosen": -1.3263037204742432, "logits/rejected": -2.063274621963501, "logps/chosen": -128.76121520996094, "logps/rejected": -357.38946533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.829012870788574, "rewards/margins": 18.974931716918945, "rewards/rejected": -24.803943634033203, "step": 4223 }, { "epoch": 7.27, "learning_rate": 1.0677858053548661e-07, "logits/chosen": -1.871671199798584, "logits/rejected": -1.70265793800354, "logps/chosen": -162.27810668945312, "logps/rejected": -297.92279052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.005016326904297, "rewards/margins": 14.69833755493164, "rewards/rejected": -21.703353881835938, "step": 4224 }, { "epoch": 7.27, "learning_rate": 1.066723331916702e-07, "logits/chosen": -1.9333468675613403, "logits/rejected": -1.752382755279541, "logps/chosen": -168.0811309814453, "logps/rejected": -291.6039123535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.26465129852295, "rewards/margins": 12.263965606689453, "rewards/rejected": -20.528615951538086, "step": 4225 }, { "epoch": 7.27, "learning_rate": 1.065660858478538e-07, "logits/chosen": -1.9332609176635742, "logits/rejected": -1.8463621139526367, "logps/chosen": -198.86911010742188, "logps/rejected": -318.1959228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.929801940917969, "rewards/margins": 12.020064353942871, "rewards/rejected": -22.949867248535156, "step": 4226 }, { "epoch": 7.28, "learning_rate": 1.064598385040374e-07, "logits/chosen": -1.7320001125335693, "logits/rejected": -2.057621479034424, "logps/chosen": -160.80987548828125, "logps/rejected": -325.30877685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.37060022354126, "rewards/margins": 15.846715927124023, "rewards/rejected": -23.217315673828125, "step": 4227 }, { "epoch": 7.28, "learning_rate": 1.0635359116022099e-07, "logits/chosen": -1.5998995304107666, "logits/rejected": -1.673933744430542, "logps/chosen": -182.4958953857422, "logps/rejected": -343.56732177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.213223457336426, "rewards/margins": 15.521757125854492, "rewards/rejected": -24.7349796295166, "step": 4228 }, { "epoch": 7.28, "learning_rate": 1.0624734381640459e-07, "logits/chosen": -1.95241379737854, "logits/rejected": -1.8612741231918335, "logps/chosen": -152.04531860351562, "logps/rejected": -298.6555480957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.946681976318359, "rewards/margins": 15.176581382751465, "rewards/rejected": -21.12326431274414, "step": 4229 }, { "epoch": 7.28, "learning_rate": 1.0614109647258818e-07, "logits/chosen": -1.8520492315292358, "logits/rejected": -2.0654757022857666, "logps/chosen": -104.18013000488281, "logps/rejected": -290.3702087402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.800149440765381, "rewards/margins": 16.417152404785156, "rewards/rejected": -20.217302322387695, "step": 4230 }, { "epoch": 7.28, "learning_rate": 1.0603484912877178e-07, "logits/chosen": -1.93587327003479, "logits/rejected": -1.9704930782318115, "logps/chosen": -165.38717651367188, "logps/rejected": -307.4688720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.51181697845459, "rewards/margins": 14.535646438598633, "rewards/rejected": -21.04746437072754, "step": 4231 }, { "epoch": 7.28, "learning_rate": 1.0592860178495538e-07, "logits/chosen": -1.8530503511428833, "logits/rejected": -1.6869711875915527, "logps/chosen": -161.13156127929688, "logps/rejected": -328.26861572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.515256881713867, "rewards/margins": 17.622249603271484, "rewards/rejected": -24.13750648498535, "step": 4232 }, { "epoch": 7.29, "learning_rate": 1.0582235444113896e-07, "logits/chosen": -2.1230688095092773, "logits/rejected": -1.9085136651992798, "logps/chosen": -200.34246826171875, "logps/rejected": -352.1662292480469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.04670524597168, "rewards/margins": 16.599056243896484, "rewards/rejected": -26.64575958251953, "step": 4233 }, { "epoch": 7.29, "learning_rate": 1.0571610709732256e-07, "logits/chosen": -1.627201795578003, "logits/rejected": -1.7622736692428589, "logps/chosen": -155.7958221435547, "logps/rejected": -302.45697021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.601925849914551, "rewards/margins": 14.365405082702637, "rewards/rejected": -21.967330932617188, "step": 4234 }, { "epoch": 7.29, "learning_rate": 1.0560985975350616e-07, "logits/chosen": -1.7351558208465576, "logits/rejected": -1.4677294492721558, "logps/chosen": -171.8113250732422, "logps/rejected": -306.9189758300781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.068727970123291, "rewards/margins": 15.767019271850586, "rewards/rejected": -22.83574676513672, "step": 4235 }, { "epoch": 7.29, "learning_rate": 1.0550361240968974e-07, "logits/chosen": -1.7956206798553467, "logits/rejected": -1.809584140777588, "logps/chosen": -175.97451782226562, "logps/rejected": -301.2159118652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.755415439605713, "rewards/margins": 12.796987533569336, "rewards/rejected": -20.55240249633789, "step": 4236 }, { "epoch": 7.29, "learning_rate": 1.0539736506587336e-07, "logits/chosen": -1.8322349786758423, "logits/rejected": -1.9668505191802979, "logps/chosen": -143.23106384277344, "logps/rejected": -328.8093566894531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.584782600402832, "rewards/margins": 18.013957977294922, "rewards/rejected": -23.598743438720703, "step": 4237 }, { "epoch": 7.29, "learning_rate": 1.0529111772205694e-07, "logits/chosen": -1.765758752822876, "logits/rejected": -1.641296625137329, "logps/chosen": -165.62493896484375, "logps/rejected": -310.4107971191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.260815620422363, "rewards/margins": 13.69851016998291, "rewards/rejected": -21.959325790405273, "step": 4238 }, { "epoch": 7.3, "learning_rate": 1.0518487037824054e-07, "logits/chosen": -1.5813755989074707, "logits/rejected": -1.841971516609192, "logps/chosen": -120.8790054321289, "logps/rejected": -325.92724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.989400863647461, "rewards/margins": 19.746997833251953, "rewards/rejected": -24.736398696899414, "step": 4239 }, { "epoch": 7.3, "learning_rate": 1.0507862303442414e-07, "logits/chosen": -1.9110934734344482, "logits/rejected": -1.8454375267028809, "logps/chosen": -179.79356384277344, "logps/rejected": -295.50030517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.190925598144531, "rewards/margins": 12.12305736541748, "rewards/rejected": -21.313982009887695, "step": 4240 }, { "epoch": 7.3, "learning_rate": 1.0497237569060773e-07, "logits/chosen": -1.7961374521255493, "logits/rejected": -1.7218858003616333, "logps/chosen": -128.3602294921875, "logps/rejected": -324.54022216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.829282283782959, "rewards/margins": 19.77890396118164, "rewards/rejected": -25.608186721801758, "step": 4241 }, { "epoch": 7.3, "learning_rate": 1.0486612834679132e-07, "logits/chosen": -1.5533299446105957, "logits/rejected": -2.0037271976470947, "logps/chosen": -190.29330444335938, "logps/rejected": -326.66717529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.560842514038086, "rewards/margins": 13.392585754394531, "rewards/rejected": -22.953428268432617, "step": 4242 }, { "epoch": 7.3, "learning_rate": 1.0475988100297492e-07, "logits/chosen": -2.003978729248047, "logits/rejected": -1.8089500665664673, "logps/chosen": -191.33709716796875, "logps/rejected": -301.5245056152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.556720733642578, "rewards/margins": 12.812564849853516, "rewards/rejected": -21.369285583496094, "step": 4243 }, { "epoch": 7.3, "learning_rate": 1.0465363365915851e-07, "logits/chosen": -1.8278474807739258, "logits/rejected": -1.9401795864105225, "logps/chosen": -175.82009887695312, "logps/rejected": -323.0337829589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.652043342590332, "rewards/margins": 15.04632568359375, "rewards/rejected": -22.6983699798584, "step": 4244 }, { "epoch": 7.31, "learning_rate": 1.0454738631534212e-07, "logits/chosen": -2.025063991546631, "logits/rejected": -1.5400068759918213, "logps/chosen": -183.35546875, "logps/rejected": -345.3576965332031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.343786239624023, "rewards/margins": 16.654129028320312, "rewards/rejected": -25.997915267944336, "step": 4245 }, { "epoch": 7.31, "learning_rate": 1.0444113897152571e-07, "logits/chosen": -1.5905189514160156, "logits/rejected": -2.0008981227874756, "logps/chosen": -133.8677978515625, "logps/rejected": -311.9962158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.72430419921875, "rewards/margins": 16.109331130981445, "rewards/rejected": -21.833635330200195, "step": 4246 }, { "epoch": 7.31, "learning_rate": 1.043348916277093e-07, "logits/chosen": -1.6098849773406982, "logits/rejected": -1.8802666664123535, "logps/chosen": -137.0103759765625, "logps/rejected": -323.500732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.589969158172607, "rewards/margins": 17.06163787841797, "rewards/rejected": -23.651607513427734, "step": 4247 }, { "epoch": 7.31, "learning_rate": 1.042286442838929e-07, "logits/chosen": -1.524577260017395, "logits/rejected": -1.67872953414917, "logps/chosen": -154.4159393310547, "logps/rejected": -334.35272216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.952930450439453, "rewards/margins": 17.10601806640625, "rewards/rejected": -26.058948516845703, "step": 4248 }, { "epoch": 7.31, "learning_rate": 1.0412239694007649e-07, "logits/chosen": -1.871525526046753, "logits/rejected": -2.0607597827911377, "logps/chosen": -162.44403076171875, "logps/rejected": -293.9512023925781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.519180297851562, "rewards/margins": 12.562522888183594, "rewards/rejected": -21.081703186035156, "step": 4249 }, { "epoch": 7.31, "learning_rate": 1.0401614959626009e-07, "logits/chosen": -2.000741481781006, "logits/rejected": -1.836074948310852, "logps/chosen": -151.15830993652344, "logps/rejected": -312.5889587402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.520627021789551, "rewards/margins": 15.962961196899414, "rewards/rejected": -22.48358917236328, "step": 4250 }, { "epoch": 7.32, "learning_rate": 1.0390990225244369e-07, "logits/chosen": -1.8879821300506592, "logits/rejected": -1.8180007934570312, "logps/chosen": -198.21022033691406, "logps/rejected": -355.602783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.696127891540527, "rewards/margins": 15.483073234558105, "rewards/rejected": -25.179203033447266, "step": 4251 }, { "epoch": 7.32, "learning_rate": 1.0380365490862727e-07, "logits/chosen": -1.9524002075195312, "logits/rejected": -1.534356951713562, "logps/chosen": -178.48020935058594, "logps/rejected": -302.93560791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.15246295928955, "rewards/margins": 13.791584014892578, "rewards/rejected": -21.944046020507812, "step": 4252 }, { "epoch": 7.32, "learning_rate": 1.0369740756481087e-07, "logits/chosen": -1.7504851818084717, "logits/rejected": -2.092418909072876, "logps/chosen": -135.1858367919922, "logps/rejected": -283.70782470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4661335945129395, "rewards/margins": 13.367388725280762, "rewards/rejected": -18.83352279663086, "step": 4253 }, { "epoch": 7.32, "learning_rate": 1.0359116022099447e-07, "logits/chosen": -1.968867301940918, "logits/rejected": -1.790457010269165, "logps/chosen": -96.39173889160156, "logps/rejected": -264.8233947753906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.280395030975342, "rewards/margins": 15.425516128540039, "rewards/rejected": -18.705909729003906, "step": 4254 }, { "epoch": 7.32, "learning_rate": 1.0348491287717806e-07, "logits/chosen": -1.940917730331421, "logits/rejected": -1.8839462995529175, "logps/chosen": -157.23544311523438, "logps/rejected": -289.5765075683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.57772159576416, "rewards/margins": 13.620345115661621, "rewards/rejected": -21.19806671142578, "step": 4255 }, { "epoch": 7.33, "learning_rate": 1.0337866553336167e-07, "logits/chosen": -1.7804512977600098, "logits/rejected": -1.7217921018600464, "logps/chosen": -169.65972900390625, "logps/rejected": -301.879638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.009908676147461, "rewards/margins": 12.524394989013672, "rewards/rejected": -20.534303665161133, "step": 4256 }, { "epoch": 7.33, "learning_rate": 1.0327241818954526e-07, "logits/chosen": -1.7878808975219727, "logits/rejected": -1.9374631643295288, "logps/chosen": -172.10633850097656, "logps/rejected": -330.66766357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.872349739074707, "rewards/margins": 17.01402473449707, "rewards/rejected": -24.88637351989746, "step": 4257 }, { "epoch": 7.33, "learning_rate": 1.0316617084572885e-07, "logits/chosen": -1.5158467292785645, "logits/rejected": -2.208909273147583, "logps/chosen": -195.31405639648438, "logps/rejected": -380.5855407714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.102073669433594, "rewards/margins": 16.527217864990234, "rewards/rejected": -26.629289627075195, "step": 4258 }, { "epoch": 7.33, "learning_rate": 1.0305992350191245e-07, "logits/chosen": -1.8037185668945312, "logits/rejected": -1.9306877851486206, "logps/chosen": -215.23727416992188, "logps/rejected": -353.2125244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.140630722045898, "rewards/margins": 14.736978530883789, "rewards/rejected": -24.877609252929688, "step": 4259 }, { "epoch": 7.33, "learning_rate": 1.0295367615809604e-07, "logits/chosen": -2.0206515789031982, "logits/rejected": -1.7829656600952148, "logps/chosen": -180.6494903564453, "logps/rejected": -313.342529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.38741397857666, "rewards/margins": 14.476532936096191, "rewards/rejected": -22.86394691467285, "step": 4260 }, { "epoch": 7.33, "learning_rate": 1.0284742881427964e-07, "logits/chosen": -1.984716773033142, "logits/rejected": -1.9481021165847778, "logps/chosen": -132.23741149902344, "logps/rejected": -273.2593994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5660400390625, "rewards/margins": 13.393390655517578, "rewards/rejected": -19.959430694580078, "step": 4261 }, { "epoch": 7.34, "learning_rate": 1.0274118147046324e-07, "logits/chosen": -1.6465075016021729, "logits/rejected": -1.5571142435073853, "logps/chosen": -129.03994750976562, "logps/rejected": -310.5579833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.719111442565918, "rewards/margins": 17.27938461303711, "rewards/rejected": -20.998497009277344, "step": 4262 }, { "epoch": 7.34, "learning_rate": 1.0263493412664682e-07, "logits/chosen": -1.6823350191116333, "logits/rejected": -1.767780065536499, "logps/chosen": -187.11083984375, "logps/rejected": -327.000244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.514809608459473, "rewards/margins": 13.030585289001465, "rewards/rejected": -23.545394897460938, "step": 4263 }, { "epoch": 7.34, "learning_rate": 1.0252868678283043e-07, "logits/chosen": -1.7380051612854004, "logits/rejected": -1.8569306135177612, "logps/chosen": -136.61378479003906, "logps/rejected": -259.91522216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.517343044281006, "rewards/margins": 11.876531600952148, "rewards/rejected": -17.393875122070312, "step": 4264 }, { "epoch": 7.34, "learning_rate": 1.0242243943901402e-07, "logits/chosen": -1.7422997951507568, "logits/rejected": -1.9081077575683594, "logps/chosen": -133.93206787109375, "logps/rejected": -264.410400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.961634635925293, "rewards/margins": 13.064192771911621, "rewards/rejected": -18.025829315185547, "step": 4265 }, { "epoch": 7.34, "learning_rate": 1.023161920951976e-07, "logits/chosen": -1.7695248126983643, "logits/rejected": -1.9191831350326538, "logps/chosen": -153.449462890625, "logps/rejected": -314.559814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.651199817657471, "rewards/margins": 15.732458114624023, "rewards/rejected": -22.383657455444336, "step": 4266 }, { "epoch": 7.34, "learning_rate": 1.0220994475138122e-07, "logits/chosen": -1.9591959714889526, "logits/rejected": -2.0308163166046143, "logps/chosen": -123.70880889892578, "logps/rejected": -365.0203857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.838535785675049, "rewards/margins": 22.03121566772461, "rewards/rejected": -26.8697509765625, "step": 4267 }, { "epoch": 7.35, "learning_rate": 1.021036974075648e-07, "logits/chosen": -1.9051711559295654, "logits/rejected": -1.9648756980895996, "logps/chosen": -167.38861083984375, "logps/rejected": -292.07135009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.61271858215332, "rewards/margins": 12.407736778259277, "rewards/rejected": -21.02045440673828, "step": 4268 }, { "epoch": 7.35, "learning_rate": 1.019974500637484e-07, "logits/chosen": -1.8128536939620972, "logits/rejected": -1.8967427015304565, "logps/chosen": -131.07252502441406, "logps/rejected": -312.464111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.235989093780518, "rewards/margins": 16.439598083496094, "rewards/rejected": -20.67559051513672, "step": 4269 }, { "epoch": 7.35, "learning_rate": 1.01891202719932e-07, "logits/chosen": -1.6869045495986938, "logits/rejected": -2.0302951335906982, "logps/chosen": -119.12855529785156, "logps/rejected": -272.7388000488281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.7094011306762695, "rewards/margins": 15.409856796264648, "rewards/rejected": -20.119258880615234, "step": 4270 }, { "epoch": 7.35, "learning_rate": 1.0178495537611559e-07, "logits/chosen": -1.8148329257965088, "logits/rejected": -1.7455965280532837, "logps/chosen": -162.37875366210938, "logps/rejected": -320.0328369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.172239303588867, "rewards/margins": 15.955968856811523, "rewards/rejected": -24.12820816040039, "step": 4271 }, { "epoch": 7.35, "learning_rate": 1.016787080322992e-07, "logits/chosen": -1.688297986984253, "logits/rejected": -1.683722734451294, "logps/chosen": -146.70309448242188, "logps/rejected": -281.3109130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.025156021118164, "rewards/margins": 12.742044448852539, "rewards/rejected": -20.767200469970703, "step": 4272 }, { "epoch": 7.35, "learning_rate": 1.0157246068848278e-07, "logits/chosen": -1.8025612831115723, "logits/rejected": -1.9072877168655396, "logps/chosen": -197.53335571289062, "logps/rejected": -322.005615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.191895484924316, "rewards/margins": 11.727531433105469, "rewards/rejected": -20.91942596435547, "step": 4273 }, { "epoch": 7.36, "learning_rate": 1.0146621334466637e-07, "logits/chosen": -1.72611403465271, "logits/rejected": -1.792165756225586, "logps/chosen": -168.6755828857422, "logps/rejected": -343.58795166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.239760398864746, "rewards/margins": 16.762149810791016, "rewards/rejected": -25.001909255981445, "step": 4274 }, { "epoch": 7.36, "learning_rate": 1.0135996600084998e-07, "logits/chosen": -1.8091942071914673, "logits/rejected": -1.9302878379821777, "logps/chosen": -136.8037109375, "logps/rejected": -288.5117492675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.874856472015381, "rewards/margins": 15.405241966247559, "rewards/rejected": -22.28009796142578, "step": 4275 }, { "epoch": 7.36, "learning_rate": 1.0125371865703357e-07, "logits/chosen": -1.7486873865127563, "logits/rejected": -1.8457050323486328, "logps/chosen": -160.29788208007812, "logps/rejected": -339.85797119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.133520126342773, "rewards/margins": 17.23724365234375, "rewards/rejected": -25.370765686035156, "step": 4276 }, { "epoch": 7.36, "learning_rate": 1.0114747131321715e-07, "logits/chosen": -1.771176815032959, "logits/rejected": -1.4748711585998535, "logps/chosen": -135.19528198242188, "logps/rejected": -270.03387451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.320027828216553, "rewards/margins": 13.850761413574219, "rewards/rejected": -18.17078971862793, "step": 4277 }, { "epoch": 7.36, "learning_rate": 1.0104122396940077e-07, "logits/chosen": -1.6904996633529663, "logits/rejected": -2.048154354095459, "logps/chosen": -175.60557556152344, "logps/rejected": -349.34307861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.976296424865723, "rewards/margins": 14.629508018493652, "rewards/rejected": -23.605804443359375, "step": 4278 }, { "epoch": 7.36, "learning_rate": 1.0093497662558435e-07, "logits/chosen": -1.6466573476791382, "logits/rejected": -2.0084381103515625, "logps/chosen": -121.44418334960938, "logps/rejected": -315.87994384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.934684753417969, "rewards/margins": 16.582046508789062, "rewards/rejected": -22.51673126220703, "step": 4279 }, { "epoch": 7.37, "learning_rate": 1.0082872928176796e-07, "logits/chosen": -1.5352134704589844, "logits/rejected": -1.9182486534118652, "logps/chosen": -135.87698364257812, "logps/rejected": -285.16510009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.361708641052246, "rewards/margins": 13.207937240600586, "rewards/rejected": -19.569644927978516, "step": 4280 }, { "epoch": 7.37, "learning_rate": 1.0072248193795155e-07, "logits/chosen": -1.9952707290649414, "logits/rejected": -1.7059835195541382, "logps/chosen": -161.0230255126953, "logps/rejected": -266.3264465332031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.70355749130249, "rewards/margins": 11.892657279968262, "rewards/rejected": -18.596214294433594, "step": 4281 }, { "epoch": 7.37, "learning_rate": 1.0061623459413514e-07, "logits/chosen": -1.6714568138122559, "logits/rejected": -2.1518149375915527, "logps/chosen": -134.9416961669922, "logps/rejected": -338.33660888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.300589561462402, "rewards/margins": 17.841474533081055, "rewards/rejected": -23.14206314086914, "step": 4282 }, { "epoch": 7.37, "learning_rate": 1.0050998725031875e-07, "logits/chosen": -2.1126887798309326, "logits/rejected": -2.090827703475952, "logps/chosen": -163.5390625, "logps/rejected": -331.0322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.7088727951049805, "rewards/margins": 15.866556167602539, "rewards/rejected": -23.575429916381836, "step": 4283 }, { "epoch": 7.37, "learning_rate": 1.0040373990650233e-07, "logits/chosen": -1.7002795934677124, "logits/rejected": -2.1367905139923096, "logps/chosen": -136.58517456054688, "logps/rejected": -289.3587646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.7162556648254395, "rewards/margins": 14.092323303222656, "rewards/rejected": -18.808578491210938, "step": 4284 }, { "epoch": 7.38, "learning_rate": 1.0029749256268592e-07, "logits/chosen": -1.4972715377807617, "logits/rejected": -1.9286437034606934, "logps/chosen": -110.81036376953125, "logps/rejected": -275.1739807128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9433531761169434, "rewards/margins": 16.63348388671875, "rewards/rejected": -20.57683753967285, "step": 4285 }, { "epoch": 7.38, "learning_rate": 1.0019124521886953e-07, "logits/chosen": -1.8561220169067383, "logits/rejected": -1.902355670928955, "logps/chosen": -146.52818298339844, "logps/rejected": -299.4031982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.573478698730469, "rewards/margins": 15.35222339630127, "rewards/rejected": -21.925703048706055, "step": 4286 }, { "epoch": 7.38, "learning_rate": 1.0008499787505312e-07, "logits/chosen": -1.8185802698135376, "logits/rejected": -1.6078219413757324, "logps/chosen": -154.94351196289062, "logps/rejected": -308.8060607910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.108559608459473, "rewards/margins": 16.265535354614258, "rewards/rejected": -23.374095916748047, "step": 4287 }, { "epoch": 7.38, "learning_rate": 9.99787505312367e-08, "logits/chosen": -1.941329836845398, "logits/rejected": -1.5325692892074585, "logps/chosen": -144.0014190673828, "logps/rejected": -348.5321044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.94936466217041, "rewards/margins": 18.414403915405273, "rewards/rejected": -25.36376953125, "step": 4288 }, { "epoch": 7.38, "learning_rate": 9.987250318742031e-08, "logits/chosen": -1.554648518562317, "logits/rejected": -2.0139801502227783, "logps/chosen": -166.7895965576172, "logps/rejected": -356.4661865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.522187232971191, "rewards/margins": 16.971078872680664, "rewards/rejected": -24.493267059326172, "step": 4289 }, { "epoch": 7.38, "learning_rate": 9.97662558436039e-08, "logits/chosen": -1.9751056432724, "logits/rejected": -1.8441832065582275, "logps/chosen": -180.87765502929688, "logps/rejected": -326.8773193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.490007400512695, "rewards/margins": 15.36625862121582, "rewards/rejected": -23.856266021728516, "step": 4290 }, { "epoch": 7.39, "learning_rate": 9.966000849978751e-08, "logits/chosen": -1.9238476753234863, "logits/rejected": -1.9088088274002075, "logps/chosen": -123.91117095947266, "logps/rejected": -281.005615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.599100112915039, "rewards/margins": 14.887774467468262, "rewards/rejected": -20.486873626708984, "step": 4291 }, { "epoch": 7.39, "learning_rate": 9.95537611559711e-08, "logits/chosen": -1.531548261642456, "logits/rejected": -2.0672953128814697, "logps/chosen": -120.58277893066406, "logps/rejected": -301.59368896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.610313415527344, "rewards/margins": 16.404273986816406, "rewards/rejected": -21.014589309692383, "step": 4292 }, { "epoch": 7.39, "learning_rate": 9.944751381215468e-08, "logits/chosen": -1.7028930187225342, "logits/rejected": -2.016007423400879, "logps/chosen": -187.39041137695312, "logps/rejected": -335.2641296386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.169978141784668, "rewards/margins": 15.581071853637695, "rewards/rejected": -24.751049041748047, "step": 4293 }, { "epoch": 7.39, "learning_rate": 9.93412664683383e-08, "logits/chosen": -1.8270853757858276, "logits/rejected": -1.8112797737121582, "logps/chosen": -163.7813720703125, "logps/rejected": -291.88580322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.7029805183410645, "rewards/margins": 12.388389587402344, "rewards/rejected": -20.09136962890625, "step": 4294 }, { "epoch": 7.39, "learning_rate": 9.923501912452188e-08, "logits/chosen": -1.6557697057724, "logits/rejected": -2.0201594829559326, "logps/chosen": -96.56129455566406, "logps/rejected": -308.01470947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1462490558624268, "rewards/margins": 18.333887100219727, "rewards/rejected": -21.48013687133789, "step": 4295 }, { "epoch": 7.39, "learning_rate": 9.912877178070547e-08, "logits/chosen": -1.6624810695648193, "logits/rejected": -1.983689546585083, "logps/chosen": -151.58096313476562, "logps/rejected": -340.0014953613281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.943689346313477, "rewards/margins": 16.25452423095703, "rewards/rejected": -23.198213577270508, "step": 4296 }, { "epoch": 7.4, "learning_rate": 9.902252443688908e-08, "logits/chosen": -2.1501212120056152, "logits/rejected": -2.112659454345703, "logps/chosen": -153.45750427246094, "logps/rejected": -288.7198486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.5662384033203125, "rewards/margins": 14.587265014648438, "rewards/rejected": -20.15350341796875, "step": 4297 }, { "epoch": 7.4, "learning_rate": 9.891627709307266e-08, "logits/chosen": -1.8261630535125732, "logits/rejected": -1.8264240026474, "logps/chosen": -225.0052947998047, "logps/rejected": -351.85687255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.664390563964844, "rewards/margins": 12.16942310333252, "rewards/rejected": -24.833812713623047, "step": 4298 }, { "epoch": 7.4, "learning_rate": 9.881002974925626e-08, "logits/chosen": -1.5771535634994507, "logits/rejected": -1.9267170429229736, "logps/chosen": -147.53424072265625, "logps/rejected": -298.60223388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.767656326293945, "rewards/margins": 13.931835174560547, "rewards/rejected": -20.699491500854492, "step": 4299 }, { "epoch": 7.4, "learning_rate": 9.870378240543986e-08, "logits/chosen": -1.6551141738891602, "logits/rejected": -2.0950100421905518, "logps/chosen": -160.94073486328125, "logps/rejected": -318.7061767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.169672966003418, "rewards/margins": 15.745392799377441, "rewards/rejected": -22.91506576538086, "step": 4300 }, { "epoch": 7.4, "learning_rate": 9.859753506162345e-08, "logits/chosen": -1.7210325002670288, "logits/rejected": -1.9776684045791626, "logps/chosen": -166.67193603515625, "logps/rejected": -358.79913330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.614259719848633, "rewards/margins": 17.99645233154297, "rewards/rejected": -25.6107120513916, "step": 4301 }, { "epoch": 7.4, "learning_rate": 9.849128771780706e-08, "logits/chosen": -2.123227119445801, "logits/rejected": -1.8007910251617432, "logps/chosen": -195.75135803222656, "logps/rejected": -310.38427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.167448043823242, "rewards/margins": 13.659612655639648, "rewards/rejected": -21.82706069946289, "step": 4302 }, { "epoch": 7.41, "learning_rate": 9.838504037399065e-08, "logits/chosen": -2.1504898071289062, "logits/rejected": -1.8446531295776367, "logps/chosen": -114.12625122070312, "logps/rejected": -223.481689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1394248008728027, "rewards/margins": 12.9478120803833, "rewards/rejected": -16.087236404418945, "step": 4303 }, { "epoch": 7.41, "learning_rate": 9.827879303017423e-08, "logits/chosen": -1.956207036972046, "logits/rejected": -1.9670181274414062, "logps/chosen": -140.75662231445312, "logps/rejected": -277.38250732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.871915817260742, "rewards/margins": 13.582263946533203, "rewards/rejected": -19.454179763793945, "step": 4304 }, { "epoch": 7.41, "learning_rate": 9.817254568635784e-08, "logits/chosen": -1.806365728378296, "logits/rejected": -2.008909225463867, "logps/chosen": -123.01345825195312, "logps/rejected": -253.04052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.1782941818237305, "rewards/margins": 11.286752700805664, "rewards/rejected": -15.465045928955078, "step": 4305 }, { "epoch": 7.41, "learning_rate": 9.806629834254143e-08, "logits/chosen": -1.9384167194366455, "logits/rejected": -1.7231757640838623, "logps/chosen": -123.99365234375, "logps/rejected": -274.6077575683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.344738006591797, "rewards/margins": 15.173471450805664, "rewards/rejected": -20.51820945739746, "step": 4306 }, { "epoch": 7.41, "learning_rate": 9.796005099872503e-08, "logits/chosen": -1.9387497901916504, "logits/rejected": -1.6295087337493896, "logps/chosen": -127.8946762084961, "logps/rejected": -264.6640930175781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.048614025115967, "rewards/margins": 14.950037956237793, "rewards/rejected": -19.9986515045166, "step": 4307 }, { "epoch": 7.41, "learning_rate": 9.785380365490863e-08, "logits/chosen": -1.6388318538665771, "logits/rejected": -2.0036139488220215, "logps/chosen": -107.01081848144531, "logps/rejected": -336.1696472167969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.978015899658203, "rewards/margins": 20.034915924072266, "rewards/rejected": -23.01293182373047, "step": 4308 }, { "epoch": 7.42, "learning_rate": 9.774755631109221e-08, "logits/chosen": -1.5760425329208374, "logits/rejected": -1.9929442405700684, "logps/chosen": -179.21961975097656, "logps/rejected": -391.2591857910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.990745544433594, "rewards/margins": 18.891014099121094, "rewards/rejected": -28.881757736206055, "step": 4309 }, { "epoch": 7.42, "learning_rate": 9.764130896727582e-08, "logits/chosen": -2.0661611557006836, "logits/rejected": -1.5966384410858154, "logps/chosen": -155.42059326171875, "logps/rejected": -291.77178955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.760592937469482, "rewards/margins": 17.63070297241211, "rewards/rejected": -22.391294479370117, "step": 4310 }, { "epoch": 7.42, "learning_rate": 9.753506162345941e-08, "logits/chosen": -1.575115442276001, "logits/rejected": -1.8366230726242065, "logps/chosen": -138.99899291992188, "logps/rejected": -313.797119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6743059158325195, "rewards/margins": 16.33582305908203, "rewards/rejected": -23.010128021240234, "step": 4311 }, { "epoch": 7.42, "learning_rate": 9.7428814279643e-08, "logits/chosen": -1.5700691938400269, "logits/rejected": -2.0930252075195312, "logps/chosen": -142.3750457763672, "logps/rejected": -368.71844482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.932289123535156, "rewards/margins": 20.181957244873047, "rewards/rejected": -26.114246368408203, "step": 4312 }, { "epoch": 7.42, "learning_rate": 9.732256693582661e-08, "logits/chosen": -1.6628634929656982, "logits/rejected": -1.9255702495574951, "logps/chosen": -147.50479125976562, "logps/rejected": -291.6001281738281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.72548770904541, "rewards/margins": 14.34299087524414, "rewards/rejected": -21.068479537963867, "step": 4313 }, { "epoch": 7.43, "learning_rate": 9.72163195920102e-08, "logits/chosen": -1.9313589334487915, "logits/rejected": -2.0064034461975098, "logps/chosen": -134.9071502685547, "logps/rejected": -281.00909423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.750391960144043, "rewards/margins": 13.53853702545166, "rewards/rejected": -19.288928985595703, "step": 4314 }, { "epoch": 7.43, "learning_rate": 9.711007224819378e-08, "logits/chosen": -1.8233544826507568, "logits/rejected": -1.8901240825653076, "logps/chosen": -165.6756134033203, "logps/rejected": -309.6630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.951037406921387, "rewards/margins": 15.038599967956543, "rewards/rejected": -22.98963737487793, "step": 4315 }, { "epoch": 7.43, "learning_rate": 9.700382490437739e-08, "logits/chosen": -1.6883220672607422, "logits/rejected": -1.7615770101547241, "logps/chosen": -123.51251220703125, "logps/rejected": -315.41619873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.791060447692871, "rewards/margins": 16.79148292541504, "rewards/rejected": -21.582544326782227, "step": 4316 }, { "epoch": 7.43, "learning_rate": 9.689757756056098e-08, "logits/chosen": -2.0076358318328857, "logits/rejected": -2.039407730102539, "logps/chosen": -168.0078887939453, "logps/rejected": -287.8694152832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.808098793029785, "rewards/margins": 12.798168182373047, "rewards/rejected": -20.606266021728516, "step": 4317 }, { "epoch": 7.43, "learning_rate": 9.679133021674458e-08, "logits/chosen": -1.5693597793579102, "logits/rejected": -2.0300796031951904, "logps/chosen": -155.26422119140625, "logps/rejected": -343.31475830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.3570733070373535, "rewards/margins": 16.81574821472168, "rewards/rejected": -24.172819137573242, "step": 4318 }, { "epoch": 7.43, "learning_rate": 9.668508287292818e-08, "logits/chosen": -1.781465768814087, "logits/rejected": -1.8589037656784058, "logps/chosen": -129.66893005371094, "logps/rejected": -295.81329345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.316486358642578, "rewards/margins": 16.161907196044922, "rewards/rejected": -21.4783935546875, "step": 4319 }, { "epoch": 7.44, "learning_rate": 9.657883552911176e-08, "logits/chosen": -1.5780808925628662, "logits/rejected": -2.050095558166504, "logps/chosen": -115.73908996582031, "logps/rejected": -281.91583251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.65021276473999, "rewards/margins": 16.03881072998047, "rewards/rejected": -20.689023971557617, "step": 4320 }, { "epoch": 7.44, "learning_rate": 9.647258818529537e-08, "logits/chosen": -1.643543004989624, "logits/rejected": -1.9941502809524536, "logps/chosen": -160.91000366210938, "logps/rejected": -318.87615966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.158350944519043, "rewards/margins": 14.423892974853516, "rewards/rejected": -22.582242965698242, "step": 4321 }, { "epoch": 7.44, "learning_rate": 9.636634084147896e-08, "logits/chosen": -1.6924571990966797, "logits/rejected": -2.064892292022705, "logps/chosen": -124.18132781982422, "logps/rejected": -330.992919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.061367988586426, "rewards/margins": 19.305614471435547, "rewards/rejected": -24.366981506347656, "step": 4322 }, { "epoch": 7.44, "learning_rate": 9.626009349766254e-08, "logits/chosen": -1.888946533203125, "logits/rejected": -2.0317869186401367, "logps/chosen": -120.36204528808594, "logps/rejected": -299.28167724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.684284210205078, "rewards/margins": 17.05414581298828, "rewards/rejected": -21.73843002319336, "step": 4323 }, { "epoch": 7.44, "learning_rate": 9.615384615384616e-08, "logits/chosen": -1.9155755043029785, "logits/rejected": -1.855726718902588, "logps/chosen": -157.47598266601562, "logps/rejected": -297.6932067871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.2647833824157715, "rewards/margins": 14.36336612701416, "rewards/rejected": -20.628150939941406, "step": 4324 }, { "epoch": 7.44, "learning_rate": 9.604759881002974e-08, "logits/chosen": -1.84989333152771, "logits/rejected": -1.7257726192474365, "logps/chosen": -150.0186767578125, "logps/rejected": -323.5579833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.658817291259766, "rewards/margins": 17.768152236938477, "rewards/rejected": -24.426971435546875, "step": 4325 }, { "epoch": 7.45, "learning_rate": 9.594135146621334e-08, "logits/chosen": -1.9718899726867676, "logits/rejected": -1.5325467586517334, "logps/chosen": -119.43144989013672, "logps/rejected": -266.36187744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.320438385009766, "rewards/margins": 15.452418327331543, "rewards/rejected": -19.772857666015625, "step": 4326 }, { "epoch": 7.45, "learning_rate": 9.583510412239694e-08, "logits/chosen": -1.7746305465698242, "logits/rejected": -1.7908546924591064, "logps/chosen": -155.93560791015625, "logps/rejected": -309.2933349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.752436637878418, "rewards/margins": 16.008460998535156, "rewards/rejected": -22.760896682739258, "step": 4327 }, { "epoch": 7.45, "learning_rate": 9.572885677858053e-08, "logits/chosen": -2.166916847229004, "logits/rejected": -1.8610000610351562, "logps/chosen": -128.9141387939453, "logps/rejected": -260.552490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.416460037231445, "rewards/margins": 12.895116806030273, "rewards/rejected": -17.31157684326172, "step": 4328 }, { "epoch": 7.45, "learning_rate": 9.562260943476412e-08, "logits/chosen": -1.997880458831787, "logits/rejected": -2.062350034713745, "logps/chosen": -132.4285888671875, "logps/rejected": -344.1539001464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7029242515563965, "rewards/margins": 21.14619255065918, "rewards/rejected": -26.849119186401367, "step": 4329 }, { "epoch": 7.45, "learning_rate": 9.551636209094772e-08, "logits/chosen": -1.9720230102539062, "logits/rejected": -1.7852866649627686, "logps/chosen": -163.81907653808594, "logps/rejected": -308.4720153808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.679644584655762, "rewards/margins": 15.393693923950195, "rewards/rejected": -23.07333755493164, "step": 4330 }, { "epoch": 7.45, "learning_rate": 9.541011474713131e-08, "logits/chosen": -1.7987264394760132, "logits/rejected": -2.1631579399108887, "logps/chosen": -146.17489624023438, "logps/rejected": -266.41845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.96375036239624, "rewards/margins": 11.293943405151367, "rewards/rejected": -18.257694244384766, "step": 4331 }, { "epoch": 7.46, "learning_rate": 9.530386740331492e-08, "logits/chosen": -1.878917932510376, "logits/rejected": -1.8395847082138062, "logps/chosen": -130.55926513671875, "logps/rejected": -248.84503173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2630770206451416, "rewards/margins": 13.896899223327637, "rewards/rejected": -17.159975051879883, "step": 4332 }, { "epoch": 7.46, "learning_rate": 9.519762005949851e-08, "logits/chosen": -1.7313873767852783, "logits/rejected": -2.022280693054199, "logps/chosen": -153.9132843017578, "logps/rejected": -353.55755615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.548612594604492, "rewards/margins": 19.234716415405273, "rewards/rejected": -25.783329010009766, "step": 4333 }, { "epoch": 7.46, "learning_rate": 9.50913727156821e-08, "logits/chosen": -1.6815500259399414, "logits/rejected": -1.7119203805923462, "logps/chosen": -147.97055053710938, "logps/rejected": -345.4552001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.673996925354004, "rewards/margins": 19.266849517822266, "rewards/rejected": -25.940845489501953, "step": 4334 }, { "epoch": 7.46, "learning_rate": 9.49851253718657e-08, "logits/chosen": -1.989440679550171, "logits/rejected": -1.6832116842269897, "logps/chosen": -158.44683837890625, "logps/rejected": -301.0521545410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.423059463500977, "rewards/margins": 15.117602348327637, "rewards/rejected": -22.54066276550293, "step": 4335 }, { "epoch": 7.46, "learning_rate": 9.487887802804929e-08, "logits/chosen": -1.9152277708053589, "logits/rejected": -1.9952428340911865, "logps/chosen": -139.79135131835938, "logps/rejected": -312.8245544433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.32445764541626, "rewards/margins": 16.70635414123535, "rewards/rejected": -23.030811309814453, "step": 4336 }, { "epoch": 7.46, "learning_rate": 9.477263068423289e-08, "logits/chosen": -1.6829205751419067, "logits/rejected": -1.9057416915893555, "logps/chosen": -146.7041015625, "logps/rejected": -301.80731201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.100841999053955, "rewards/margins": 14.877439498901367, "rewards/rejected": -20.978281021118164, "step": 4337 }, { "epoch": 7.47, "learning_rate": 9.466638334041649e-08, "logits/chosen": -1.6129810810089111, "logits/rejected": -1.8663698434829712, "logps/chosen": -177.4634246826172, "logps/rejected": -338.80096435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.985123634338379, "rewards/margins": 15.83927059173584, "rewards/rejected": -24.824392318725586, "step": 4338 }, { "epoch": 7.47, "learning_rate": 9.456013599660007e-08, "logits/chosen": -1.9929249286651611, "logits/rejected": -1.4278373718261719, "logps/chosen": -139.44601440429688, "logps/rejected": -259.4288330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.130369663238525, "rewards/margins": 13.584710121154785, "rewards/rejected": -19.71508026123047, "step": 4339 }, { "epoch": 7.47, "learning_rate": 9.445388865278367e-08, "logits/chosen": -2.0100016593933105, "logits/rejected": -1.7129184007644653, "logps/chosen": -169.42430114746094, "logps/rejected": -299.260009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.635322570800781, "rewards/margins": 14.020601272583008, "rewards/rejected": -20.65592384338379, "step": 4340 }, { "epoch": 7.47, "learning_rate": 9.434764130896727e-08, "logits/chosen": -2.12178373336792, "logits/rejected": -1.9523650407791138, "logps/chosen": -141.2916259765625, "logps/rejected": -265.9647216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.053490161895752, "rewards/margins": 13.639106750488281, "rewards/rejected": -19.692598342895508, "step": 4341 }, { "epoch": 7.47, "learning_rate": 9.424139396515087e-08, "logits/chosen": -1.7266099452972412, "logits/rejected": -2.0066897869110107, "logps/chosen": -175.3670654296875, "logps/rejected": -351.0312194824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.641371726989746, "rewards/margins": 17.107677459716797, "rewards/rejected": -24.74905014038086, "step": 4342 }, { "epoch": 7.48, "learning_rate": 9.413514662133447e-08, "logits/chosen": -1.833519458770752, "logits/rejected": -1.7868776321411133, "logps/chosen": -195.37265014648438, "logps/rejected": -303.0694885253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.069076538085938, "rewards/margins": 13.47883415222168, "rewards/rejected": -21.54791259765625, "step": 4343 }, { "epoch": 7.48, "learning_rate": 9.402889927751806e-08, "logits/chosen": -1.847358226776123, "logits/rejected": -2.0236692428588867, "logps/chosen": -145.6642303466797, "logps/rejected": -321.14892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.998527526855469, "rewards/margins": 16.326129913330078, "rewards/rejected": -22.324655532836914, "step": 4344 }, { "epoch": 7.48, "learning_rate": 9.392265193370165e-08, "logits/chosen": -1.581687331199646, "logits/rejected": -1.881199598312378, "logps/chosen": -164.27960205078125, "logps/rejected": -348.05633544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.453963279724121, "rewards/margins": 17.43109130859375, "rewards/rejected": -25.885055541992188, "step": 4345 }, { "epoch": 7.48, "learning_rate": 9.381640458988525e-08, "logits/chosen": -1.514889121055603, "logits/rejected": -1.969824194908142, "logps/chosen": -115.92469787597656, "logps/rejected": -345.46868896484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.136937141418457, "rewards/margins": 20.13453483581543, "rewards/rejected": -26.271472930908203, "step": 4346 }, { "epoch": 7.48, "learning_rate": 9.371015724606884e-08, "logits/chosen": -1.5514401197433472, "logits/rejected": -1.7885067462921143, "logps/chosen": -148.87661743164062, "logps/rejected": -323.9982604980469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.822298049926758, "rewards/margins": 15.798507690429688, "rewards/rejected": -23.620805740356445, "step": 4347 }, { "epoch": 7.48, "learning_rate": 9.360390990225244e-08, "logits/chosen": -1.8840205669403076, "logits/rejected": -1.483189344406128, "logps/chosen": -161.78994750976562, "logps/rejected": -290.219482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.446513175964355, "rewards/margins": 13.50411319732666, "rewards/rejected": -21.950626373291016, "step": 4348 }, { "epoch": 7.49, "learning_rate": 9.349766255843604e-08, "logits/chosen": -1.8427469730377197, "logits/rejected": -1.921515941619873, "logps/chosen": -157.7451934814453, "logps/rejected": -357.55670166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.83424711227417, "rewards/margins": 19.134761810302734, "rewards/rejected": -25.969009399414062, "step": 4349 }, { "epoch": 7.49, "learning_rate": 9.339141521461962e-08, "logits/chosen": -1.8843005895614624, "logits/rejected": -1.61931574344635, "logps/chosen": -164.71768188476562, "logps/rejected": -329.343017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.041952133178711, "rewards/margins": 15.04505443572998, "rewards/rejected": -23.087007522583008, "step": 4350 }, { "epoch": 7.49, "learning_rate": 9.328516787080322e-08, "logits/chosen": -1.562805414199829, "logits/rejected": -2.114968776702881, "logps/chosen": -165.4364776611328, "logps/rejected": -379.4632873535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.635906219482422, "rewards/margins": 17.577903747558594, "rewards/rejected": -25.213809967041016, "step": 4351 }, { "epoch": 7.49, "learning_rate": 9.317892052698682e-08, "logits/chosen": -1.664880394935608, "logits/rejected": -1.8561222553253174, "logps/chosen": -173.88467407226562, "logps/rejected": -347.7198181152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.032995223999023, "rewards/margins": 16.426231384277344, "rewards/rejected": -24.459226608276367, "step": 4352 }, { "epoch": 7.49, "learning_rate": 9.307267318317042e-08, "logits/chosen": -1.6602318286895752, "logits/rejected": -1.7581095695495605, "logps/chosen": -185.81494140625, "logps/rejected": -360.8477478027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.27774429321289, "rewards/margins": 18.766223907470703, "rewards/rejected": -27.043968200683594, "step": 4353 }, { "epoch": 7.49, "learning_rate": 9.296642583935402e-08, "logits/chosen": -1.813485860824585, "logits/rejected": -1.8726458549499512, "logps/chosen": -165.50653076171875, "logps/rejected": -334.0609130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.310686111450195, "rewards/margins": 17.17789077758789, "rewards/rejected": -25.488576889038086, "step": 4354 }, { "epoch": 7.5, "learning_rate": 9.28601784955376e-08, "logits/chosen": -1.9712560176849365, "logits/rejected": -1.8081268072128296, "logps/chosen": -191.97547912597656, "logps/rejected": -372.14794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.095511436462402, "rewards/margins": 18.004409790039062, "rewards/rejected": -28.09992218017578, "step": 4355 }, { "epoch": 7.5, "learning_rate": 9.27539311517212e-08, "logits/chosen": -1.8334920406341553, "logits/rejected": -2.017754554748535, "logps/chosen": -177.73886108398438, "logps/rejected": -292.9517517089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.500362396240234, "rewards/margins": 11.760087966918945, "rewards/rejected": -20.260448455810547, "step": 4356 }, { "epoch": 7.5, "learning_rate": 9.26476838079048e-08, "logits/chosen": -1.5592886209487915, "logits/rejected": -1.6289008855819702, "logps/chosen": -187.28248596191406, "logps/rejected": -362.00262451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.964254379272461, "rewards/margins": 17.105619430541992, "rewards/rejected": -27.069873809814453, "step": 4357 }, { "epoch": 7.5, "learning_rate": 9.254143646408839e-08, "logits/chosen": -1.829124927520752, "logits/rejected": -1.360572338104248, "logps/chosen": -100.07369995117188, "logps/rejected": -249.36073303222656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.258622646331787, "rewards/margins": 14.821977615356445, "rewards/rejected": -19.08060073852539, "step": 4358 }, { "epoch": 7.5, "learning_rate": 9.243518912027199e-08, "logits/chosen": -1.8396515846252441, "logits/rejected": -1.80893874168396, "logps/chosen": -134.74185180664062, "logps/rejected": -297.14288330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.047333717346191, "rewards/margins": 16.351308822631836, "rewards/rejected": -21.398643493652344, "step": 4359 }, { "epoch": 7.5, "learning_rate": 9.232894177645558e-08, "logits/chosen": -2.057283639907837, "logits/rejected": -2.055814266204834, "logps/chosen": -178.80213928222656, "logps/rejected": -306.820068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.794034957885742, "rewards/margins": 13.253013610839844, "rewards/rejected": -22.047048568725586, "step": 4360 }, { "epoch": 7.51, "learning_rate": 9.222269443263918e-08, "logits/chosen": -1.718842625617981, "logits/rejected": -2.1328444480895996, "logps/chosen": -121.791748046875, "logps/rejected": -328.0064697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.087728500366211, "rewards/margins": 18.581941604614258, "rewards/rejected": -23.66967010498047, "step": 4361 }, { "epoch": 7.51, "learning_rate": 9.211644708882277e-08, "logits/chosen": -1.8826931715011597, "logits/rejected": -1.7642573118209839, "logps/chosen": -144.60989379882812, "logps/rejected": -271.247314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.891064643859863, "rewards/margins": 12.206770896911621, "rewards/rejected": -18.097835540771484, "step": 4362 }, { "epoch": 7.51, "learning_rate": 9.201019974500637e-08, "logits/chosen": -2.1187593936920166, "logits/rejected": -2.0219945907592773, "logps/chosen": -118.99305725097656, "logps/rejected": -316.02008056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.078855037689209, "rewards/margins": 17.81479263305664, "rewards/rejected": -21.89365005493164, "step": 4363 }, { "epoch": 7.51, "learning_rate": 9.190395240118997e-08, "logits/chosen": -1.5989794731140137, "logits/rejected": -1.9309697151184082, "logps/chosen": -142.9752197265625, "logps/rejected": -319.37506103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.401771545410156, "rewards/margins": 17.29694175720215, "rewards/rejected": -22.698715209960938, "step": 4364 }, { "epoch": 7.51, "learning_rate": 9.179770505737357e-08, "logits/chosen": -1.4392986297607422, "logits/rejected": -1.9536941051483154, "logps/chosen": -201.6778564453125, "logps/rejected": -353.0574951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.3607759475708, "rewards/margins": 14.49473762512207, "rewards/rejected": -23.855510711669922, "step": 4365 }, { "epoch": 7.51, "learning_rate": 9.169145771355715e-08, "logits/chosen": -1.6089940071105957, "logits/rejected": -1.9860563278198242, "logps/chosen": -154.27822875976562, "logps/rejected": -297.6187744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.769843101501465, "rewards/margins": 15.26887321472168, "rewards/rejected": -23.03871726989746, "step": 4366 }, { "epoch": 7.52, "learning_rate": 9.158521036974075e-08, "logits/chosen": -1.5857502222061157, "logits/rejected": -1.7938045263290405, "logps/chosen": -128.01712036132812, "logps/rejected": -311.1813659667969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.208493709564209, "rewards/margins": 17.460079193115234, "rewards/rejected": -23.66857147216797, "step": 4367 }, { "epoch": 7.52, "learning_rate": 9.147896302592435e-08, "logits/chosen": -1.6184883117675781, "logits/rejected": -1.8935195207595825, "logps/chosen": -168.515625, "logps/rejected": -366.93658447265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.871773719787598, "rewards/margins": 16.691364288330078, "rewards/rejected": -26.563140869140625, "step": 4368 }, { "epoch": 7.52, "learning_rate": 9.137271568210795e-08, "logits/chosen": -1.7407687902450562, "logits/rejected": -1.8959119319915771, "logps/chosen": -134.65676879882812, "logps/rejected": -274.79376220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.622416019439697, "rewards/margins": 14.894408226013184, "rewards/rejected": -19.51682472229004, "step": 4369 }, { "epoch": 7.52, "learning_rate": 9.126646833829153e-08, "logits/chosen": -1.6818822622299194, "logits/rejected": -1.8182169198989868, "logps/chosen": -142.28659057617188, "logps/rejected": -352.6138610839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.625966548919678, "rewards/margins": 19.71210479736328, "rewards/rejected": -25.338069915771484, "step": 4370 }, { "epoch": 7.52, "learning_rate": 9.116022099447513e-08, "logits/chosen": -1.671422004699707, "logits/rejected": -1.8115146160125732, "logps/chosen": -121.3857650756836, "logps/rejected": -267.6141357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.402052879333496, "rewards/margins": 14.306079864501953, "rewards/rejected": -18.708133697509766, "step": 4371 }, { "epoch": 7.52, "learning_rate": 9.105397365065873e-08, "logits/chosen": -1.7430979013442993, "logits/rejected": -2.121668577194214, "logps/chosen": -155.80487060546875, "logps/rejected": -329.668212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.037527084350586, "rewards/margins": 16.030183792114258, "rewards/rejected": -24.067710876464844, "step": 4372 }, { "epoch": 7.53, "learning_rate": 9.094772630684233e-08, "logits/chosen": -1.8037091493606567, "logits/rejected": -1.9619375467300415, "logps/chosen": -139.7791748046875, "logps/rejected": -302.2548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.1812639236450195, "rewards/margins": 15.701634407043457, "rewards/rejected": -21.88290023803711, "step": 4373 }, { "epoch": 7.53, "learning_rate": 9.084147896302592e-08, "logits/chosen": -1.7448493242263794, "logits/rejected": -1.5911887884140015, "logps/chosen": -161.7898406982422, "logps/rejected": -312.736572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.153586387634277, "rewards/margins": 15.023822784423828, "rewards/rejected": -23.177406311035156, "step": 4374 }, { "epoch": 7.53, "learning_rate": 9.073523161920952e-08, "logits/chosen": -1.9003955125808716, "logits/rejected": -1.867286205291748, "logps/chosen": -154.33290100097656, "logps/rejected": -287.22601318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.549370288848877, "rewards/margins": 13.444143295288086, "rewards/rejected": -19.993515014648438, "step": 4375 }, { "epoch": 7.53, "learning_rate": 9.062898427539311e-08, "logits/chosen": -1.612663984298706, "logits/rejected": -1.5649101734161377, "logps/chosen": -167.0753173828125, "logps/rejected": -367.2189025878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.828625679016113, "rewards/margins": 20.189720153808594, "rewards/rejected": -26.01834487915039, "step": 4376 }, { "epoch": 7.53, "learning_rate": 9.052273693157671e-08, "logits/chosen": -2.0184736251831055, "logits/rejected": -1.606913685798645, "logps/chosen": -222.56626892089844, "logps/rejected": -359.23748779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.881362915039062, "rewards/margins": 14.877788543701172, "rewards/rejected": -25.759151458740234, "step": 4377 }, { "epoch": 7.54, "learning_rate": 9.04164895877603e-08, "logits/chosen": -1.5286797285079956, "logits/rejected": -1.980791449546814, "logps/chosen": -123.34235382080078, "logps/rejected": -283.1840515136719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.08225679397583, "rewards/margins": 15.232522964477539, "rewards/rejected": -19.314781188964844, "step": 4378 }, { "epoch": 7.54, "learning_rate": 9.03102422439439e-08, "logits/chosen": -1.9425805807113647, "logits/rejected": -1.8891695737838745, "logps/chosen": -140.51910400390625, "logps/rejected": -296.282958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5450029373168945, "rewards/margins": 14.157017707824707, "rewards/rejected": -20.7020206451416, "step": 4379 }, { "epoch": 7.54, "learning_rate": 9.02039949001275e-08, "logits/chosen": -1.896727442741394, "logits/rejected": -2.238947868347168, "logps/chosen": -154.17965698242188, "logps/rejected": -301.17987060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.3614044189453125, "rewards/margins": 13.52884292602539, "rewards/rejected": -20.890247344970703, "step": 4380 }, { "epoch": 7.54, "learning_rate": 9.009774755631108e-08, "logits/chosen": -1.8500392436981201, "logits/rejected": -2.0289793014526367, "logps/chosen": -154.51162719726562, "logps/rejected": -313.1893310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.515233993530273, "rewards/margins": 17.04937744140625, "rewards/rejected": -23.564611434936523, "step": 4381 }, { "epoch": 7.54, "learning_rate": 8.999150021249468e-08, "logits/chosen": -1.8593063354492188, "logits/rejected": -1.9777647256851196, "logps/chosen": -172.2540740966797, "logps/rejected": -349.4947814941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.029022216796875, "rewards/margins": 18.095531463623047, "rewards/rejected": -26.124553680419922, "step": 4382 }, { "epoch": 7.54, "learning_rate": 8.988525286867828e-08, "logits/chosen": -1.5371873378753662, "logits/rejected": -2.0478460788726807, "logps/chosen": -181.02000427246094, "logps/rejected": -353.18792724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.809026718139648, "rewards/margins": 15.913082122802734, "rewards/rejected": -23.722110748291016, "step": 4383 }, { "epoch": 7.55, "learning_rate": 8.977900552486188e-08, "logits/chosen": -1.8795963525772095, "logits/rejected": -1.8789640665054321, "logps/chosen": -198.88671875, "logps/rejected": -331.15924072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.951468467712402, "rewards/margins": 12.523906707763672, "rewards/rejected": -22.475374221801758, "step": 4384 }, { "epoch": 7.55, "learning_rate": 8.967275818104548e-08, "logits/chosen": -1.5368123054504395, "logits/rejected": -2.015254497528076, "logps/chosen": -165.4751739501953, "logps/rejected": -321.03460693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.521356582641602, "rewards/margins": 14.158060073852539, "rewards/rejected": -22.67941665649414, "step": 4385 }, { "epoch": 7.55, "learning_rate": 8.956651083722906e-08, "logits/chosen": -1.661646842956543, "logits/rejected": -1.8618052005767822, "logps/chosen": -168.3193359375, "logps/rejected": -369.10821533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.8237886428833, "rewards/margins": 17.00801658630371, "rewards/rejected": -26.831806182861328, "step": 4386 }, { "epoch": 7.55, "learning_rate": 8.946026349341266e-08, "logits/chosen": -1.5744500160217285, "logits/rejected": -1.6821306943893433, "logps/chosen": -185.32064819335938, "logps/rejected": -316.40509033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.56282901763916, "rewards/margins": 12.913928985595703, "rewards/rejected": -22.47675895690918, "step": 4387 }, { "epoch": 7.55, "learning_rate": 8.935401614959626e-08, "logits/chosen": -1.985126256942749, "logits/rejected": -1.444353461265564, "logps/chosen": -183.29881286621094, "logps/rejected": -288.941162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.792656898498535, "rewards/margins": 12.070855140686035, "rewards/rejected": -21.863513946533203, "step": 4388 }, { "epoch": 7.55, "learning_rate": 8.924776880577985e-08, "logits/chosen": -1.7835745811462402, "logits/rejected": -1.745216965675354, "logps/chosen": -177.69920349121094, "logps/rejected": -291.9251708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.085302352905273, "rewards/margins": 12.081418991088867, "rewards/rejected": -22.166719436645508, "step": 4389 }, { "epoch": 7.56, "learning_rate": 8.914152146196345e-08, "logits/chosen": -1.420276403427124, "logits/rejected": -2.1021604537963867, "logps/chosen": -231.31224060058594, "logps/rejected": -416.7508239746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.922107696533203, "rewards/margins": 16.053707122802734, "rewards/rejected": -28.975814819335938, "step": 4390 }, { "epoch": 7.56, "learning_rate": 8.903527411814704e-08, "logits/chosen": -1.8462586402893066, "logits/rejected": -1.7520500421524048, "logps/chosen": -194.00709533691406, "logps/rejected": -285.35894775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.306177139282227, "rewards/margins": 11.120075225830078, "rewards/rejected": -21.426252365112305, "step": 4391 }, { "epoch": 7.56, "learning_rate": 8.892902677433063e-08, "logits/chosen": -2.1284446716308594, "logits/rejected": -1.7061171531677246, "logps/chosen": -159.43167114257812, "logps/rejected": -293.41949462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.974261283874512, "rewards/margins": 14.331014633178711, "rewards/rejected": -21.30527687072754, "step": 4392 }, { "epoch": 7.56, "learning_rate": 8.882277943051423e-08, "logits/chosen": -1.8958911895751953, "logits/rejected": -2.0184171199798584, "logps/chosen": -147.39813232421875, "logps/rejected": -313.6521911621094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.019946575164795, "rewards/margins": 15.838470458984375, "rewards/rejected": -22.858417510986328, "step": 4393 }, { "epoch": 7.56, "learning_rate": 8.871653208669783e-08, "logits/chosen": -2.1552066802978516, "logits/rejected": -1.9306288957595825, "logps/chosen": -171.01788330078125, "logps/rejected": -307.64215087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.695512771606445, "rewards/margins": 14.425130844116211, "rewards/rejected": -21.120643615722656, "step": 4394 }, { "epoch": 7.56, "learning_rate": 8.861028474288143e-08, "logits/chosen": -1.9582267999649048, "logits/rejected": -2.013370990753174, "logps/chosen": -121.24578857421875, "logps/rejected": -266.6868896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.429140567779541, "rewards/margins": 15.069391250610352, "rewards/rejected": -20.498531341552734, "step": 4395 }, { "epoch": 7.57, "learning_rate": 8.850403739906503e-08, "logits/chosen": -2.1268558502197266, "logits/rejected": -1.497464895248413, "logps/chosen": -202.7239990234375, "logps/rejected": -319.29583740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.72220516204834, "rewards/margins": 13.33001708984375, "rewards/rejected": -23.052223205566406, "step": 4396 }, { "epoch": 7.57, "learning_rate": 8.839779005524861e-08, "logits/chosen": -1.4132364988327026, "logits/rejected": -1.7869434356689453, "logps/chosen": -131.7386016845703, "logps/rejected": -282.6526184082031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.647800445556641, "rewards/margins": 13.893818855285645, "rewards/rejected": -20.5416202545166, "step": 4397 }, { "epoch": 7.57, "learning_rate": 8.829154271143221e-08, "logits/chosen": -1.5410146713256836, "logits/rejected": -2.0014681816101074, "logps/chosen": -118.33067321777344, "logps/rejected": -313.4078369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.441481590270996, "rewards/margins": 17.967041015625, "rewards/rejected": -23.40852165222168, "step": 4398 }, { "epoch": 7.57, "learning_rate": 8.818529536761581e-08, "logits/chosen": -1.7265903949737549, "logits/rejected": -1.9853143692016602, "logps/chosen": -145.401611328125, "logps/rejected": -277.417724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.628472328186035, "rewards/margins": 13.262428283691406, "rewards/rejected": -19.890901565551758, "step": 4399 }, { "epoch": 7.57, "learning_rate": 8.80790480237994e-08, "logits/chosen": -1.918952465057373, "logits/rejected": -2.0914413928985596, "logps/chosen": -141.85292053222656, "logps/rejected": -329.0780944824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.845803737640381, "rewards/margins": 16.586118698120117, "rewards/rejected": -24.431922912597656, "step": 4400 }, { "epoch": 7.57, "learning_rate": 8.7972800679983e-08, "logits/chosen": -1.7001397609710693, "logits/rejected": -1.7942655086517334, "logps/chosen": -164.94168090820312, "logps/rejected": -284.7311096191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.34714126586914, "rewards/margins": 13.271681785583496, "rewards/rejected": -21.618824005126953, "step": 4401 }, { "epoch": 7.58, "learning_rate": 8.786655333616659e-08, "logits/chosen": -1.6646201610565186, "logits/rejected": -1.9950075149536133, "logps/chosen": -177.65963745117188, "logps/rejected": -324.0361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.911428928375244, "rewards/margins": 15.08027458190918, "rewards/rejected": -22.991703033447266, "step": 4402 }, { "epoch": 7.58, "learning_rate": 8.776030599235018e-08, "logits/chosen": -2.0517358779907227, "logits/rejected": -1.9254164695739746, "logps/chosen": -139.91372680664062, "logps/rejected": -287.8836669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.777436256408691, "rewards/margins": 14.323417663574219, "rewards/rejected": -20.100852966308594, "step": 4403 }, { "epoch": 7.58, "learning_rate": 8.765405864853379e-08, "logits/chosen": -1.9500441551208496, "logits/rejected": -1.7565455436706543, "logps/chosen": -171.344970703125, "logps/rejected": -283.5494079589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.786855697631836, "rewards/margins": 13.626445770263672, "rewards/rejected": -20.413301467895508, "step": 4404 }, { "epoch": 7.58, "learning_rate": 8.754781130471738e-08, "logits/chosen": -1.9408984184265137, "logits/rejected": -1.845747709274292, "logps/chosen": -205.89788818359375, "logps/rejected": -350.7314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.437761306762695, "rewards/margins": 15.944293022155762, "rewards/rejected": -25.382055282592773, "step": 4405 }, { "epoch": 7.58, "learning_rate": 8.744156396090098e-08, "logits/chosen": -1.9539237022399902, "logits/rejected": -1.6851807832717896, "logps/chosen": -179.01380920410156, "logps/rejected": -283.61798095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.154474258422852, "rewards/margins": 11.045812606811523, "rewards/rejected": -19.200286865234375, "step": 4406 }, { "epoch": 7.59, "learning_rate": 8.733531661708457e-08, "logits/chosen": -2.038370370864868, "logits/rejected": -1.7851545810699463, "logps/chosen": -187.9224853515625, "logps/rejected": -339.9468994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.232449531555176, "rewards/margins": 15.617935180664062, "rewards/rejected": -23.850387573242188, "step": 4407 }, { "epoch": 7.59, "learning_rate": 8.722906927326816e-08, "logits/chosen": -1.6306424140930176, "logits/rejected": -1.7138320207595825, "logps/chosen": -169.39849853515625, "logps/rejected": -303.21209716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.65727424621582, "rewards/margins": 14.15018081665039, "rewards/rejected": -21.807456970214844, "step": 4408 }, { "epoch": 7.59, "learning_rate": 8.712282192945176e-08, "logits/chosen": -1.6004533767700195, "logits/rejected": -1.8663861751556396, "logps/chosen": -171.0196075439453, "logps/rejected": -356.97174072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.952951908111572, "rewards/margins": 17.014461517333984, "rewards/rejected": -24.96741485595703, "step": 4409 }, { "epoch": 7.59, "learning_rate": 8.701657458563536e-08, "logits/chosen": -1.609731674194336, "logits/rejected": -2.0284364223480225, "logps/chosen": -111.0126953125, "logps/rejected": -301.1930847167969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.957117557525635, "rewards/margins": 17.42655372619629, "rewards/rejected": -22.383670806884766, "step": 4410 }, { "epoch": 7.59, "learning_rate": 8.691032724181894e-08, "logits/chosen": -1.8047198057174683, "logits/rejected": -1.9121958017349243, "logps/chosen": -142.10504150390625, "logps/rejected": -303.5439758300781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.10288667678833, "rewards/margins": 15.30578899383545, "rewards/rejected": -21.408674240112305, "step": 4411 }, { "epoch": 7.59, "learning_rate": 8.680407989800256e-08, "logits/chosen": -1.5151755809783936, "logits/rejected": -1.5079104900360107, "logps/chosen": -172.20901489257812, "logps/rejected": -346.3916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.42241382598877, "rewards/margins": 16.47795295715332, "rewards/rejected": -25.900367736816406, "step": 4412 }, { "epoch": 7.6, "learning_rate": 8.669783255418614e-08, "logits/chosen": -1.6591874361038208, "logits/rejected": -1.554003119468689, "logps/chosen": -125.88121795654297, "logps/rejected": -291.7986755371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.82551383972168, "rewards/margins": 16.468482971191406, "rewards/rejected": -21.29399871826172, "step": 4413 }, { "epoch": 7.6, "learning_rate": 8.659158521036973e-08, "logits/chosen": -1.7708780765533447, "logits/rejected": -1.5294243097305298, "logps/chosen": -184.1292266845703, "logps/rejected": -319.6777038574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.517132759094238, "rewards/margins": 14.379568099975586, "rewards/rejected": -21.896699905395508, "step": 4414 }, { "epoch": 7.6, "learning_rate": 8.648533786655334e-08, "logits/chosen": -1.8067671060562134, "logits/rejected": -1.9237960577011108, "logps/chosen": -160.3778533935547, "logps/rejected": -305.10394287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.912286758422852, "rewards/margins": 12.751506805419922, "rewards/rejected": -19.663793563842773, "step": 4415 }, { "epoch": 7.6, "learning_rate": 8.637909052273692e-08, "logits/chosen": -2.0382492542266846, "logits/rejected": -1.9355583190917969, "logps/chosen": -138.6981658935547, "logps/rejected": -296.8095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.532190799713135, "rewards/margins": 14.74361515045166, "rewards/rejected": -20.275806427001953, "step": 4416 }, { "epoch": 7.6, "learning_rate": 8.627284317892052e-08, "logits/chosen": -1.7764604091644287, "logits/rejected": -1.701546311378479, "logps/chosen": -167.30796813964844, "logps/rejected": -319.14208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.294748306274414, "rewards/margins": 14.503628730773926, "rewards/rejected": -22.798376083374023, "step": 4417 }, { "epoch": 7.6, "learning_rate": 8.616659583510412e-08, "logits/chosen": -2.0679941177368164, "logits/rejected": -1.380977749824524, "logps/chosen": -159.88079833984375, "logps/rejected": -239.24566650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.941086769104004, "rewards/margins": 11.317212104797363, "rewards/rejected": -17.258298873901367, "step": 4418 }, { "epoch": 7.61, "learning_rate": 8.606034849128771e-08, "logits/chosen": -1.8888843059539795, "logits/rejected": -1.863660216331482, "logps/chosen": -184.82424926757812, "logps/rejected": -303.505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.795825958251953, "rewards/margins": 13.457423210144043, "rewards/rejected": -23.253250122070312, "step": 4419 }, { "epoch": 7.61, "learning_rate": 8.595410114747132e-08, "logits/chosen": -1.923316478729248, "logits/rejected": -1.9406673908233643, "logps/chosen": -180.16976928710938, "logps/rejected": -345.4436950683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.164867401123047, "rewards/margins": 17.20134735107422, "rewards/rejected": -24.366214752197266, "step": 4420 }, { "epoch": 7.61, "learning_rate": 8.58478538036549e-08, "logits/chosen": -1.75021493434906, "logits/rejected": -1.6645872592926025, "logps/chosen": -147.4796142578125, "logps/rejected": -293.9479675292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.700423717498779, "rewards/margins": 14.55439567565918, "rewards/rejected": -21.254819869995117, "step": 4421 }, { "epoch": 7.61, "learning_rate": 8.574160645983849e-08, "logits/chosen": -1.929909110069275, "logits/rejected": -1.480221152305603, "logps/chosen": -149.29010009765625, "logps/rejected": -284.13482666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.347610950469971, "rewards/margins": 14.158349990844727, "rewards/rejected": -20.50596046447754, "step": 4422 }, { "epoch": 7.61, "learning_rate": 8.56353591160221e-08, "logits/chosen": -1.8208668231964111, "logits/rejected": -1.9408972263336182, "logps/chosen": -167.06222534179688, "logps/rejected": -325.65313720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.360527515411377, "rewards/margins": 16.199203491210938, "rewards/rejected": -23.559730529785156, "step": 4423 }, { "epoch": 7.61, "learning_rate": 8.552911177220569e-08, "logits/chosen": -1.6460613012313843, "logits/rejected": -1.796770453453064, "logps/chosen": -125.89126586914062, "logps/rejected": -275.95111083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.1583251953125, "rewards/margins": 14.514097213745117, "rewards/rejected": -18.67242431640625, "step": 4424 }, { "epoch": 7.62, "learning_rate": 8.542286442838928e-08, "logits/chosen": -1.9662755727767944, "logits/rejected": -1.7438156604766846, "logps/chosen": -167.80917358398438, "logps/rejected": -322.41619873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4609456062316895, "rewards/margins": 16.19097137451172, "rewards/rejected": -21.65191650390625, "step": 4425 }, { "epoch": 7.62, "learning_rate": 8.531661708457289e-08, "logits/chosen": -1.9668179750442505, "logits/rejected": -1.5484267473220825, "logps/chosen": -160.3814697265625, "logps/rejected": -283.0402526855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.240845680236816, "rewards/margins": 13.752425193786621, "rewards/rejected": -20.993270874023438, "step": 4426 }, { "epoch": 7.62, "learning_rate": 8.521036974075647e-08, "logits/chosen": -1.9271637201309204, "logits/rejected": -1.7581671476364136, "logps/chosen": -181.92996215820312, "logps/rejected": -296.44140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.754505157470703, "rewards/margins": 13.700129508972168, "rewards/rejected": -21.454633712768555, "step": 4427 }, { "epoch": 7.62, "learning_rate": 8.510412239694007e-08, "logits/chosen": -1.8558907508850098, "logits/rejected": -1.4745690822601318, "logps/chosen": -139.97225952148438, "logps/rejected": -301.6020812988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.990378379821777, "rewards/margins": 15.984277725219727, "rewards/rejected": -21.974655151367188, "step": 4428 }, { "epoch": 7.62, "learning_rate": 8.499787505312367e-08, "logits/chosen": -1.849669098854065, "logits/rejected": -1.5888559818267822, "logps/chosen": -151.8546142578125, "logps/rejected": -312.1965026855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.012376308441162, "rewards/margins": 16.7353515625, "rewards/rejected": -22.747726440429688, "step": 4429 }, { "epoch": 7.62, "learning_rate": 8.489162770930726e-08, "logits/chosen": -1.8468577861785889, "logits/rejected": -1.9674991369247437, "logps/chosen": -142.5309600830078, "logps/rejected": -336.7196960449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.525049209594727, "rewards/margins": 17.91063117980957, "rewards/rejected": -24.43568229675293, "step": 4430 }, { "epoch": 7.63, "learning_rate": 8.478538036549087e-08, "logits/chosen": -1.536733627319336, "logits/rejected": -1.8778184652328491, "logps/chosen": -141.8504638671875, "logps/rejected": -356.88629150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.981356620788574, "rewards/margins": 20.011150360107422, "rewards/rejected": -25.992507934570312, "step": 4431 }, { "epoch": 7.63, "learning_rate": 8.467913302167445e-08, "logits/chosen": -1.6013606786727905, "logits/rejected": -1.9890226125717163, "logps/chosen": -152.9278564453125, "logps/rejected": -333.78271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0274152755737305, "rewards/margins": 17.698413848876953, "rewards/rejected": -23.725830078125, "step": 4432 }, { "epoch": 7.63, "learning_rate": 8.457288567785804e-08, "logits/chosen": -1.7349767684936523, "logits/rejected": -1.743827223777771, "logps/chosen": -122.37086486816406, "logps/rejected": -289.8201904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.247710227966309, "rewards/margins": 15.906081199645996, "rewards/rejected": -21.153791427612305, "step": 4433 }, { "epoch": 7.63, "learning_rate": 8.446663833404165e-08, "logits/chosen": -1.898140549659729, "logits/rejected": -1.8179386854171753, "logps/chosen": -111.62977600097656, "logps/rejected": -268.61248779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.882589817047119, "rewards/margins": 14.552364349365234, "rewards/rejected": -18.434953689575195, "step": 4434 }, { "epoch": 7.63, "learning_rate": 8.436039099022524e-08, "logits/chosen": -1.627284049987793, "logits/rejected": -1.8987863063812256, "logps/chosen": -130.37515258789062, "logps/rejected": -269.7373352050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.412972450256348, "rewards/margins": 12.592884063720703, "rewards/rejected": -18.005857467651367, "step": 4435 }, { "epoch": 7.64, "learning_rate": 8.425414364640884e-08, "logits/chosen": -1.828209638595581, "logits/rejected": -1.7987076044082642, "logps/chosen": -138.9404296875, "logps/rejected": -284.5487060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.2995381355285645, "rewards/margins": 14.604104042053223, "rewards/rejected": -20.903644561767578, "step": 4436 }, { "epoch": 7.64, "learning_rate": 8.414789630259244e-08, "logits/chosen": -1.8586632013320923, "logits/rejected": -1.7746424674987793, "logps/chosen": -150.80174255371094, "logps/rejected": -274.7845764160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.577098846435547, "rewards/margins": 12.872852325439453, "rewards/rejected": -20.449951171875, "step": 4437 }, { "epoch": 7.64, "learning_rate": 8.404164895877602e-08, "logits/chosen": -1.9652122259140015, "logits/rejected": -1.4761621952056885, "logps/chosen": -153.42132568359375, "logps/rejected": -272.4577331542969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.288023948669434, "rewards/margins": 13.157743453979492, "rewards/rejected": -20.44576644897461, "step": 4438 }, { "epoch": 7.64, "learning_rate": 8.393540161495963e-08, "logits/chosen": -1.9561653137207031, "logits/rejected": -1.652529239654541, "logps/chosen": -224.48989868164062, "logps/rejected": -345.01593017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.756284713745117, "rewards/margins": 14.206342697143555, "rewards/rejected": -24.962627410888672, "step": 4439 }, { "epoch": 7.64, "learning_rate": 8.382915427114322e-08, "logits/chosen": -1.6794164180755615, "logits/rejected": -1.918704628944397, "logps/chosen": -183.89219665527344, "logps/rejected": -338.22906494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.798397064208984, "rewards/margins": 13.617644309997559, "rewards/rejected": -24.416040420532227, "step": 4440 }, { "epoch": 7.64, "learning_rate": 8.37229069273268e-08, "logits/chosen": -1.4957411289215088, "logits/rejected": -2.009769916534424, "logps/chosen": -132.199462890625, "logps/rejected": -315.9479675292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.388901233673096, "rewards/margins": 16.396480560302734, "rewards/rejected": -20.785381317138672, "step": 4441 }, { "epoch": 7.65, "learning_rate": 8.361665958351042e-08, "logits/chosen": -1.8287277221679688, "logits/rejected": -2.0120420455932617, "logps/chosen": -114.37158203125, "logps/rejected": -319.8544006347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.379680633544922, "rewards/margins": 19.120420455932617, "rewards/rejected": -24.500102996826172, "step": 4442 }, { "epoch": 7.65, "learning_rate": 8.3510412239694e-08, "logits/chosen": -1.6276650428771973, "logits/rejected": -1.9170174598693848, "logps/chosen": -173.4556884765625, "logps/rejected": -333.5653076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.476824760437012, "rewards/margins": 15.651739120483398, "rewards/rejected": -24.128564834594727, "step": 4443 }, { "epoch": 7.65, "learning_rate": 8.340416489587759e-08, "logits/chosen": -1.1597394943237305, "logits/rejected": -2.0527238845825195, "logps/chosen": -137.05491638183594, "logps/rejected": -314.7346496582031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6959123611450195, "rewards/margins": 14.405120849609375, "rewards/rejected": -21.101032257080078, "step": 4444 }, { "epoch": 7.65, "learning_rate": 8.32979175520612e-08, "logits/chosen": -1.9185256958007812, "logits/rejected": -1.6031838655471802, "logps/chosen": -161.82546997070312, "logps/rejected": -284.38623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.8649702072143555, "rewards/margins": 12.119134902954102, "rewards/rejected": -19.98410415649414, "step": 4445 }, { "epoch": 7.65, "learning_rate": 8.319167020824479e-08, "logits/chosen": -1.8817188739776611, "logits/rejected": -1.887373685836792, "logps/chosen": -142.77017211914062, "logps/rejected": -304.25274658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.005152702331543, "rewards/margins": 16.061986923217773, "rewards/rejected": -21.067138671875, "step": 4446 }, { "epoch": 7.65, "learning_rate": 8.30854228644284e-08, "logits/chosen": -2.0023136138916016, "logits/rejected": -1.9395933151245117, "logps/chosen": -147.22216796875, "logps/rejected": -318.3269958496094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.75666618347168, "rewards/margins": 15.836729049682617, "rewards/rejected": -21.593395233154297, "step": 4447 }, { "epoch": 7.66, "learning_rate": 8.297917552061198e-08, "logits/chosen": -1.9615275859832764, "logits/rejected": -1.978607177734375, "logps/chosen": -153.20684814453125, "logps/rejected": -321.6942138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.144563674926758, "rewards/margins": 15.465461730957031, "rewards/rejected": -21.61002540588379, "step": 4448 }, { "epoch": 7.66, "learning_rate": 8.287292817679557e-08, "logits/chosen": -1.7999486923217773, "logits/rejected": -2.003556966781616, "logps/chosen": -190.75250244140625, "logps/rejected": -371.3202209472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.10903549194336, "rewards/margins": 16.660236358642578, "rewards/rejected": -26.769271850585938, "step": 4449 }, { "epoch": 7.66, "learning_rate": 8.276668083297918e-08, "logits/chosen": -1.8399665355682373, "logits/rejected": -1.6598061323165894, "logps/chosen": -177.16041564941406, "logps/rejected": -305.37921142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.49880599975586, "rewards/margins": 14.010380744934082, "rewards/rejected": -23.509187698364258, "step": 4450 }, { "epoch": 7.66, "learning_rate": 8.266043348916277e-08, "logits/chosen": -1.9701768159866333, "logits/rejected": -1.7432576417922974, "logps/chosen": -152.5025634765625, "logps/rejected": -328.2004699707031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.426276683807373, "rewards/margins": 17.567567825317383, "rewards/rejected": -23.993844985961914, "step": 4451 }, { "epoch": 7.66, "learning_rate": 8.255418614534635e-08, "logits/chosen": -1.8500144481658936, "logits/rejected": -1.7910354137420654, "logps/chosen": -145.90350341796875, "logps/rejected": -277.33648681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.159568786621094, "rewards/margins": 12.616914749145508, "rewards/rejected": -20.7764835357666, "step": 4452 }, { "epoch": 7.66, "learning_rate": 8.244793880152997e-08, "logits/chosen": -1.7273802757263184, "logits/rejected": -2.0599803924560547, "logps/chosen": -129.134521484375, "logps/rejected": -333.11151123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.16353702545166, "rewards/margins": 18.703657150268555, "rewards/rejected": -24.86719512939453, "step": 4453 }, { "epoch": 7.67, "learning_rate": 8.234169145771355e-08, "logits/chosen": -1.5865026712417603, "logits/rejected": -1.9796990156173706, "logps/chosen": -166.77720642089844, "logps/rejected": -314.411376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.263602256774902, "rewards/margins": 14.985244750976562, "rewards/rejected": -23.24884796142578, "step": 4454 }, { "epoch": 7.67, "learning_rate": 8.223544411389714e-08, "logits/chosen": -1.8433164358139038, "logits/rejected": -1.7378041744232178, "logps/chosen": -146.67173767089844, "logps/rejected": -355.0517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.752736568450928, "rewards/margins": 20.560319900512695, "rewards/rejected": -26.31305694580078, "step": 4455 }, { "epoch": 7.67, "learning_rate": 8.212919677008075e-08, "logits/chosen": -1.6648520231246948, "logits/rejected": -1.8134236335754395, "logps/chosen": -162.22268676757812, "logps/rejected": -341.56451416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.35467529296875, "rewards/margins": 16.377248764038086, "rewards/rejected": -24.731924057006836, "step": 4456 }, { "epoch": 7.67, "learning_rate": 8.202294942626433e-08, "logits/chosen": -1.810748815536499, "logits/rejected": -1.8577240705490112, "logps/chosen": -163.5126190185547, "logps/rejected": -318.0506591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.6036376953125, "rewards/margins": 15.362150192260742, "rewards/rejected": -23.965787887573242, "step": 4457 }, { "epoch": 7.67, "learning_rate": 8.191670208244795e-08, "logits/chosen": -1.973806619644165, "logits/rejected": -1.7304866313934326, "logps/chosen": -110.91748809814453, "logps/rejected": -283.6995849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.43316650390625, "rewards/margins": 16.7052059173584, "rewards/rejected": -21.13837242126465, "step": 4458 }, { "epoch": 7.67, "learning_rate": 8.181045473863153e-08, "logits/chosen": -1.9760481119155884, "logits/rejected": -1.8821758031845093, "logps/chosen": -174.64794921875, "logps/rejected": -313.5708923339844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.749126434326172, "rewards/margins": 14.627201080322266, "rewards/rejected": -23.37632942199707, "step": 4459 }, { "epoch": 7.68, "learning_rate": 8.170420739481512e-08, "logits/chosen": -1.4277502298355103, "logits/rejected": -1.894681692123413, "logps/chosen": -151.6336669921875, "logps/rejected": -344.28204345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.67646598815918, "rewards/margins": 17.88870620727539, "rewards/rejected": -24.565170288085938, "step": 4460 }, { "epoch": 7.68, "learning_rate": 8.159796005099873e-08, "logits/chosen": -1.6952884197235107, "logits/rejected": -1.8087489604949951, "logps/chosen": -151.42816162109375, "logps/rejected": -298.5918273925781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.340503692626953, "rewards/margins": 14.072946548461914, "rewards/rejected": -21.413450241088867, "step": 4461 }, { "epoch": 7.68, "learning_rate": 8.149171270718232e-08, "logits/chosen": -1.6871540546417236, "logits/rejected": -1.966581106185913, "logps/chosen": -137.55445861816406, "logps/rejected": -310.9290466308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.585519790649414, "rewards/margins": 15.810623168945312, "rewards/rejected": -22.396142959594727, "step": 4462 }, { "epoch": 7.68, "learning_rate": 8.13854653633659e-08, "logits/chosen": -1.9771504402160645, "logits/rejected": -1.5058494806289673, "logps/chosen": -125.40338897705078, "logps/rejected": -250.19114685058594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.706140518188477, "rewards/margins": 13.333831787109375, "rewards/rejected": -19.03997230529785, "step": 4463 }, { "epoch": 7.68, "learning_rate": 8.127921801954951e-08, "logits/chosen": -1.3981688022613525, "logits/rejected": -1.8956090211868286, "logps/chosen": -181.37911987304688, "logps/rejected": -347.8822937011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.09211254119873, "rewards/margins": 16.18209457397461, "rewards/rejected": -26.27420425415039, "step": 4464 }, { "epoch": 7.69, "learning_rate": 8.11729706757331e-08, "logits/chosen": -1.9115839004516602, "logits/rejected": -1.657644510269165, "logps/chosen": -181.54852294921875, "logps/rejected": -312.19036865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.307967185974121, "rewards/margins": 14.994373321533203, "rewards/rejected": -23.30234146118164, "step": 4465 }, { "epoch": 7.69, "learning_rate": 8.10667233319167e-08, "logits/chosen": -1.9704580307006836, "logits/rejected": -2.0901315212249756, "logps/chosen": -147.5326385498047, "logps/rejected": -345.5299377441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.971643924713135, "rewards/margins": 18.58449935913086, "rewards/rejected": -24.556142807006836, "step": 4466 }, { "epoch": 7.69, "learning_rate": 8.09604759881003e-08, "logits/chosen": -1.9319349527359009, "logits/rejected": -1.9139735698699951, "logps/chosen": -167.95669555664062, "logps/rejected": -309.32354736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0772013664245605, "rewards/margins": 13.78276538848877, "rewards/rejected": -20.859966278076172, "step": 4467 }, { "epoch": 7.69, "learning_rate": 8.085422864428388e-08, "logits/chosen": -1.6928634643554688, "logits/rejected": -1.9354403018951416, "logps/chosen": -164.44161987304688, "logps/rejected": -350.72344970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.331500053405762, "rewards/margins": 16.452856063842773, "rewards/rejected": -24.78435707092285, "step": 4468 }, { "epoch": 7.69, "learning_rate": 8.07479813004675e-08, "logits/chosen": -1.758438229560852, "logits/rejected": -1.978464126586914, "logps/chosen": -157.239501953125, "logps/rejected": -327.8829650878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.238659381866455, "rewards/margins": 16.728673934936523, "rewards/rejected": -23.967334747314453, "step": 4469 }, { "epoch": 7.69, "learning_rate": 8.064173395665108e-08, "logits/chosen": -1.624568223953247, "logits/rejected": -1.7310937643051147, "logps/chosen": -165.48471069335938, "logps/rejected": -318.4130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.974566459655762, "rewards/margins": 15.283458709716797, "rewards/rejected": -22.258026123046875, "step": 4470 }, { "epoch": 7.7, "learning_rate": 8.053548661283467e-08, "logits/chosen": -1.5610121488571167, "logits/rejected": -1.8797167539596558, "logps/chosen": -158.59176635742188, "logps/rejected": -314.52374267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.434303283691406, "rewards/margins": 13.844294548034668, "rewards/rejected": -21.27859878540039, "step": 4471 }, { "epoch": 7.7, "learning_rate": 8.042923926901828e-08, "logits/chosen": -1.8563709259033203, "logits/rejected": -1.9427685737609863, "logps/chosen": -168.5410614013672, "logps/rejected": -298.0048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.637782573699951, "rewards/margins": 13.379751205444336, "rewards/rejected": -20.017534255981445, "step": 4472 }, { "epoch": 7.7, "learning_rate": 8.032299192520186e-08, "logits/chosen": -2.0689778327941895, "logits/rejected": -1.8416551351547241, "logps/chosen": -157.92486572265625, "logps/rejected": -309.36444091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.991626262664795, "rewards/margins": 16.89630889892578, "rewards/rejected": -22.887935638427734, "step": 4473 }, { "epoch": 7.7, "learning_rate": 8.021674458138546e-08, "logits/chosen": -1.6777006387710571, "logits/rejected": -1.7427570819854736, "logps/chosen": -185.85504150390625, "logps/rejected": -346.2898864746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.863450050354004, "rewards/margins": 15.813436508178711, "rewards/rejected": -25.67688751220703, "step": 4474 }, { "epoch": 7.7, "learning_rate": 8.011049723756906e-08, "logits/chosen": -1.9372339248657227, "logits/rejected": -1.584996223449707, "logps/chosen": -147.5303192138672, "logps/rejected": -292.8563537597656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.215541362762451, "rewards/margins": 15.469032287597656, "rewards/rejected": -21.684574127197266, "step": 4475 }, { "epoch": 7.7, "learning_rate": 8.000424989375265e-08, "logits/chosen": -1.640601396560669, "logits/rejected": -2.07145094871521, "logps/chosen": -138.92588806152344, "logps/rejected": -343.31781005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.367818832397461, "rewards/margins": 17.935726165771484, "rewards/rejected": -24.303546905517578, "step": 4476 }, { "epoch": 7.71, "learning_rate": 7.989800254993625e-08, "logits/chosen": -1.5691511631011963, "logits/rejected": -1.7136108875274658, "logps/chosen": -188.57115173339844, "logps/rejected": -294.455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.38240909576416, "rewards/margins": 11.600870132446289, "rewards/rejected": -22.983278274536133, "step": 4477 }, { "epoch": 7.71, "learning_rate": 7.979175520611984e-08, "logits/chosen": -2.0674800872802734, "logits/rejected": -1.10525381565094, "logps/chosen": -206.60848999023438, "logps/rejected": -312.8837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.782464981079102, "rewards/margins": 13.681985855102539, "rewards/rejected": -23.46445083618164, "step": 4478 }, { "epoch": 7.71, "learning_rate": 7.968550786230343e-08, "logits/chosen": -1.888911485671997, "logits/rejected": -2.062896728515625, "logps/chosen": -140.0748291015625, "logps/rejected": -345.2618408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.909712791442871, "rewards/margins": 17.969905853271484, "rewards/rejected": -22.87961769104004, "step": 4479 }, { "epoch": 7.71, "learning_rate": 7.957926051848704e-08, "logits/chosen": -1.7507457733154297, "logits/rejected": -1.5135616064071655, "logps/chosen": -192.5986785888672, "logps/rejected": -316.6189270019531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.851377487182617, "rewards/margins": 14.036587715148926, "rewards/rejected": -23.887964248657227, "step": 4480 }, { "epoch": 7.71, "learning_rate": 7.947301317467063e-08, "logits/chosen": -1.6464991569519043, "logits/rejected": -2.0383360385894775, "logps/chosen": -112.35052490234375, "logps/rejected": -313.12567138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.494008541107178, "rewards/margins": 18.29010009765625, "rewards/rejected": -22.784109115600586, "step": 4481 }, { "epoch": 7.71, "learning_rate": 7.936676583085423e-08, "logits/chosen": -1.715268611907959, "logits/rejected": -1.6846318244934082, "logps/chosen": -127.35154724121094, "logps/rejected": -293.6181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.686452388763428, "rewards/margins": 15.76632308959961, "rewards/rejected": -21.452774047851562, "step": 4482 }, { "epoch": 7.72, "learning_rate": 7.926051848703783e-08, "logits/chosen": -1.590339183807373, "logits/rejected": -1.7715617418289185, "logps/chosen": -132.92410278320312, "logps/rejected": -287.257568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.758309364318848, "rewards/margins": 15.154409408569336, "rewards/rejected": -21.912717819213867, "step": 4483 }, { "epoch": 7.72, "learning_rate": 7.915427114322141e-08, "logits/chosen": -2.047325372695923, "logits/rejected": -1.9706605672836304, "logps/chosen": -115.02690887451172, "logps/rejected": -280.62994384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.43105936050415, "rewards/margins": 16.1508731842041, "rewards/rejected": -20.581932067871094, "step": 4484 }, { "epoch": 7.72, "learning_rate": 7.904802379940501e-08, "logits/chosen": -1.936530590057373, "logits/rejected": -1.303276777267456, "logps/chosen": -152.35269165039062, "logps/rejected": -293.45391845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.2392964363098145, "rewards/margins": 16.661041259765625, "rewards/rejected": -21.90033721923828, "step": 4485 }, { "epoch": 7.72, "learning_rate": 7.894177645558861e-08, "logits/chosen": -1.9869143962860107, "logits/rejected": -1.9870784282684326, "logps/chosen": -132.60501098632812, "logps/rejected": -328.28515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.201137065887451, "rewards/margins": 19.23471450805664, "rewards/rejected": -25.43585205078125, "step": 4486 }, { "epoch": 7.72, "learning_rate": 7.88355291117722e-08, "logits/chosen": -2.009145736694336, "logits/rejected": -1.6594843864440918, "logps/chosen": -193.29580688476562, "logps/rejected": -328.84503173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.3696980476379395, "rewards/margins": 15.39504337310791, "rewards/rejected": -22.764741897583008, "step": 4487 }, { "epoch": 7.72, "learning_rate": 7.87292817679558e-08, "logits/chosen": -1.7364263534545898, "logits/rejected": -1.7801876068115234, "logps/chosen": -144.11178588867188, "logps/rejected": -274.8006591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.689786911010742, "rewards/margins": 13.736472129821777, "rewards/rejected": -19.426259994506836, "step": 4488 }, { "epoch": 7.73, "learning_rate": 7.862303442413939e-08, "logits/chosen": -1.8159654140472412, "logits/rejected": -2.0133819580078125, "logps/chosen": -162.91065979003906, "logps/rejected": -345.682373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.204368591308594, "rewards/margins": 16.060264587402344, "rewards/rejected": -24.264633178710938, "step": 4489 }, { "epoch": 7.73, "learning_rate": 7.851678708032298e-08, "logits/chosen": -2.0189976692199707, "logits/rejected": -1.7219009399414062, "logps/chosen": -173.6977081298828, "logps/rejected": -324.71234130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.338533401489258, "rewards/margins": 14.673851013183594, "rewards/rejected": -23.012386322021484, "step": 4490 }, { "epoch": 7.73, "learning_rate": 7.841053973650659e-08, "logits/chosen": -1.8893606662750244, "logits/rejected": -1.86002779006958, "logps/chosen": -165.3387451171875, "logps/rejected": -312.3420715332031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.847371101379395, "rewards/margins": 15.33380126953125, "rewards/rejected": -24.181171417236328, "step": 4491 }, { "epoch": 7.73, "learning_rate": 7.830429239269018e-08, "logits/chosen": -1.8253717422485352, "logits/rejected": -1.8063344955444336, "logps/chosen": -137.72413635253906, "logps/rejected": -316.448486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.250155448913574, "rewards/margins": 17.52855110168457, "rewards/rejected": -22.778705596923828, "step": 4492 }, { "epoch": 7.73, "learning_rate": 7.819804504887378e-08, "logits/chosen": -2.0357582569122314, "logits/rejected": -1.723217487335205, "logps/chosen": -118.155517578125, "logps/rejected": -261.42694091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.785664081573486, "rewards/margins": 14.421228408813477, "rewards/rejected": -19.206892013549805, "step": 4493 }, { "epoch": 7.73, "learning_rate": 7.809179770505737e-08, "logits/chosen": -1.644869327545166, "logits/rejected": -1.7813067436218262, "logps/chosen": -149.28387451171875, "logps/rejected": -337.4681701660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.669581413269043, "rewards/margins": 18.179271697998047, "rewards/rejected": -24.848852157592773, "step": 4494 }, { "epoch": 7.74, "learning_rate": 7.798555036124096e-08, "logits/chosen": -1.6043403148651123, "logits/rejected": -1.380685567855835, "logps/chosen": -150.59075927734375, "logps/rejected": -269.92388916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.435464859008789, "rewards/margins": 11.489566802978516, "rewards/rejected": -18.925031661987305, "step": 4495 }, { "epoch": 7.74, "learning_rate": 7.787930301742456e-08, "logits/chosen": -1.532024621963501, "logits/rejected": -2.0654101371765137, "logps/chosen": -162.49798583984375, "logps/rejected": -323.9150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.022006034851074, "rewards/margins": 15.738231658935547, "rewards/rejected": -22.760236740112305, "step": 4496 }, { "epoch": 7.74, "learning_rate": 7.777305567360816e-08, "logits/chosen": -1.8105089664459229, "logits/rejected": -1.626947045326233, "logps/chosen": -186.55398559570312, "logps/rejected": -327.4395751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.900699615478516, "rewards/margins": 13.643814086914062, "rewards/rejected": -23.544513702392578, "step": 4497 }, { "epoch": 7.74, "learning_rate": 7.766680832979174e-08, "logits/chosen": -1.7810554504394531, "logits/rejected": -2.032860517501831, "logps/chosen": -147.45193481445312, "logps/rejected": -332.1982727050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.712945938110352, "rewards/margins": 16.57162857055664, "rewards/rejected": -23.284576416015625, "step": 4498 }, { "epoch": 7.74, "learning_rate": 7.756056098597536e-08, "logits/chosen": -1.5776904821395874, "logits/rejected": -1.7553938627243042, "logps/chosen": -201.19198608398438, "logps/rejected": -346.1451110839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.031002044677734, "rewards/margins": 12.103445053100586, "rewards/rejected": -23.134449005126953, "step": 4499 }, { "epoch": 7.75, "learning_rate": 7.745431364215894e-08, "logits/chosen": -1.7507832050323486, "logits/rejected": -1.81890869140625, "logps/chosen": -173.26296997070312, "logps/rejected": -310.839111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.421662330627441, "rewards/margins": 13.156208038330078, "rewards/rejected": -21.577869415283203, "step": 4500 }, { "epoch": 7.75, "learning_rate": 7.734806629834254e-08, "logits/chosen": -1.848036289215088, "logits/rejected": -1.586859941482544, "logps/chosen": -160.92208862304688, "logps/rejected": -289.974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.540290832519531, "rewards/margins": 14.188143730163574, "rewards/rejected": -21.728435516357422, "step": 4501 }, { "epoch": 7.75, "learning_rate": 7.724181895452614e-08, "logits/chosen": -1.9747002124786377, "logits/rejected": -1.6859242916107178, "logps/chosen": -152.8922576904297, "logps/rejected": -289.6454772949219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.720002174377441, "rewards/margins": 13.43755054473877, "rewards/rejected": -21.15755271911621, "step": 4502 }, { "epoch": 7.75, "learning_rate": 7.713557161070972e-08, "logits/chosen": -1.6727677583694458, "logits/rejected": -1.8024001121520996, "logps/chosen": -148.43316650390625, "logps/rejected": -317.9285888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.947527885437012, "rewards/margins": 15.998332023620605, "rewards/rejected": -22.945859909057617, "step": 4503 }, { "epoch": 7.75, "learning_rate": 7.702932426689332e-08, "logits/chosen": -1.7828865051269531, "logits/rejected": -1.8180739879608154, "logps/chosen": -158.7404327392578, "logps/rejected": -309.90045166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.216896057128906, "rewards/margins": 15.547056198120117, "rewards/rejected": -22.763954162597656, "step": 4504 }, { "epoch": 7.75, "learning_rate": 7.692307692307692e-08, "logits/chosen": -1.7673509120941162, "logits/rejected": -1.969068169593811, "logps/chosen": -125.54818725585938, "logps/rejected": -289.97052001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.44843053817749, "rewards/margins": 15.124706268310547, "rewards/rejected": -19.573137283325195, "step": 4505 }, { "epoch": 7.76, "learning_rate": 7.681682957926051e-08, "logits/chosen": -1.6667240858078003, "logits/rejected": -1.904466152191162, "logps/chosen": -153.65769958496094, "logps/rejected": -307.30938720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.644826889038086, "rewards/margins": 14.497356414794922, "rewards/rejected": -21.14218521118164, "step": 4506 }, { "epoch": 7.76, "learning_rate": 7.671058223544411e-08, "logits/chosen": -1.8726862668991089, "logits/rejected": -1.7617892026901245, "logps/chosen": -164.46018981933594, "logps/rejected": -341.24127197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.192169189453125, "rewards/margins": 17.589160919189453, "rewards/rejected": -25.781328201293945, "step": 4507 }, { "epoch": 7.76, "learning_rate": 7.66043348916277e-08, "logits/chosen": -1.7974128723144531, "logits/rejected": -1.7818808555603027, "logps/chosen": -148.78558349609375, "logps/rejected": -348.7955322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.227199554443359, "rewards/margins": 19.180356979370117, "rewards/rejected": -24.407554626464844, "step": 4508 }, { "epoch": 7.76, "learning_rate": 7.64980875478113e-08, "logits/chosen": -1.7500218152999878, "logits/rejected": -2.0099029541015625, "logps/chosen": -145.46109008789062, "logps/rejected": -314.89459228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.837105751037598, "rewards/margins": 15.99176025390625, "rewards/rejected": -21.828866958618164, "step": 4509 }, { "epoch": 7.76, "learning_rate": 7.63918402039949e-08, "logits/chosen": -2.022675037384033, "logits/rejected": -1.8513319492340088, "logps/chosen": -126.31328582763672, "logps/rejected": -292.31787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.016690254211426, "rewards/margins": 16.6538028717041, "rewards/rejected": -21.670494079589844, "step": 4510 }, { "epoch": 7.76, "learning_rate": 7.628559286017849e-08, "logits/chosen": -1.8206706047058105, "logits/rejected": -1.7041170597076416, "logps/chosen": -198.65921020507812, "logps/rejected": -336.15631103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.675207138061523, "rewards/margins": 15.53095817565918, "rewards/rejected": -24.20616340637207, "step": 4511 }, { "epoch": 7.77, "learning_rate": 7.617934551636209e-08, "logits/chosen": -1.8831020593643188, "logits/rejected": -1.9212744235992432, "logps/chosen": -172.33114624023438, "logps/rejected": -349.23284912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.007205963134766, "rewards/margins": 17.389467239379883, "rewards/rejected": -25.39667320251465, "step": 4512 }, { "epoch": 7.77, "learning_rate": 7.607309817254569e-08, "logits/chosen": -1.941364049911499, "logits/rejected": -1.7951884269714355, "logps/chosen": -157.2660675048828, "logps/rejected": -275.6907043457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.7920732498168945, "rewards/margins": 13.711437225341797, "rewards/rejected": -20.503511428833008, "step": 4513 }, { "epoch": 7.77, "learning_rate": 7.596685082872927e-08, "logits/chosen": -1.8631477355957031, "logits/rejected": -1.694916844367981, "logps/chosen": -147.32264709472656, "logps/rejected": -284.0035705566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.497325897216797, "rewards/margins": 13.692680358886719, "rewards/rejected": -21.19000816345215, "step": 4514 }, { "epoch": 7.77, "learning_rate": 7.586060348491287e-08, "logits/chosen": -1.8377711772918701, "logits/rejected": -1.7342214584350586, "logps/chosen": -144.2791290283203, "logps/rejected": -241.08229064941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.207850456237793, "rewards/margins": 11.712848663330078, "rewards/rejected": -15.920698165893555, "step": 4515 }, { "epoch": 7.77, "learning_rate": 7.575435614109647e-08, "logits/chosen": -1.8346152305603027, "logits/rejected": -1.7983248233795166, "logps/chosen": -110.2823257446289, "logps/rejected": -281.0310974121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.694131851196289, "rewards/margins": 17.795026779174805, "rewards/rejected": -21.489158630371094, "step": 4516 }, { "epoch": 7.77, "learning_rate": 7.564810879728007e-08, "logits/chosen": -1.8277256488800049, "logits/rejected": -1.8290200233459473, "logps/chosen": -137.96339416503906, "logps/rejected": -296.1134033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.678159713745117, "rewards/margins": 16.111827850341797, "rewards/rejected": -21.789987564086914, "step": 4517 }, { "epoch": 7.78, "learning_rate": 7.554186145346366e-08, "logits/chosen": -1.8524340391159058, "logits/rejected": -1.6962237358093262, "logps/chosen": -178.95111083984375, "logps/rejected": -371.68414306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.3603196144104, "rewards/margins": 19.065025329589844, "rewards/rejected": -26.425344467163086, "step": 4518 }, { "epoch": 7.78, "learning_rate": 7.543561410964725e-08, "logits/chosen": -1.8869025707244873, "logits/rejected": -1.9083213806152344, "logps/chosen": -131.16567993164062, "logps/rejected": -276.8055114746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.8005547523498535, "rewards/margins": 14.461289405822754, "rewards/rejected": -20.261844635009766, "step": 4519 }, { "epoch": 7.78, "learning_rate": 7.532936676583085e-08, "logits/chosen": -1.6723966598510742, "logits/rejected": -1.8934470415115356, "logps/chosen": -170.04107666015625, "logps/rejected": -330.179931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.355229377746582, "rewards/margins": 14.718563079833984, "rewards/rejected": -24.073793411254883, "step": 4520 }, { "epoch": 7.78, "learning_rate": 7.522311942201445e-08, "logits/chosen": -1.561565637588501, "logits/rejected": -1.8235809803009033, "logps/chosen": -126.59193420410156, "logps/rejected": -307.80963134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.543694019317627, "rewards/margins": 15.91199016571045, "rewards/rejected": -21.455684661865234, "step": 4521 }, { "epoch": 7.78, "learning_rate": 7.511687207819804e-08, "logits/chosen": -1.857163906097412, "logits/rejected": -1.8537821769714355, "logps/chosen": -198.83392333984375, "logps/rejected": -336.4601135253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.149539947509766, "rewards/margins": 12.673450469970703, "rewards/rejected": -22.82299041748047, "step": 4522 }, { "epoch": 7.78, "learning_rate": 7.501062473438164e-08, "logits/chosen": -1.6488780975341797, "logits/rejected": -1.793043851852417, "logps/chosen": -151.12841796875, "logps/rejected": -282.12359619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.026566028594971, "rewards/margins": 13.489410400390625, "rewards/rejected": -19.515975952148438, "step": 4523 }, { "epoch": 7.79, "learning_rate": 7.490437739056524e-08, "logits/chosen": -1.9157135486602783, "logits/rejected": -1.9085922241210938, "logps/chosen": -148.56678771972656, "logps/rejected": -298.494873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.603085041046143, "rewards/margins": 15.004167556762695, "rewards/rejected": -20.607254028320312, "step": 4524 }, { "epoch": 7.79, "learning_rate": 7.479813004674883e-08, "logits/chosen": -1.9505785703659058, "logits/rejected": -1.7958922386169434, "logps/chosen": -170.21896362304688, "logps/rejected": -322.653564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.8780317306518555, "rewards/margins": 16.208547592163086, "rewards/rejected": -24.086580276489258, "step": 4525 }, { "epoch": 7.79, "learning_rate": 7.469188270293242e-08, "logits/chosen": -1.814725637435913, "logits/rejected": -1.845875859260559, "logps/chosen": -162.92640686035156, "logps/rejected": -298.8526611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.671222686767578, "rewards/margins": 14.519193649291992, "rewards/rejected": -22.190414428710938, "step": 4526 }, { "epoch": 7.79, "learning_rate": 7.458563535911602e-08, "logits/chosen": -1.7939057350158691, "logits/rejected": -1.8394854068756104, "logps/chosen": -148.92701721191406, "logps/rejected": -266.3544006347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.152917861938477, "rewards/margins": 12.449531555175781, "rewards/rejected": -19.602449417114258, "step": 4527 }, { "epoch": 7.79, "learning_rate": 7.447938801529962e-08, "logits/chosen": -1.9870470762252808, "logits/rejected": -1.7429735660552979, "logps/chosen": -141.02479553222656, "logps/rejected": -305.5380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.340169429779053, "rewards/margins": 16.779369354248047, "rewards/rejected": -23.119537353515625, "step": 4528 }, { "epoch": 7.8, "learning_rate": 7.43731406714832e-08, "logits/chosen": -1.961232304573059, "logits/rejected": -1.8447515964508057, "logps/chosen": -148.35121154785156, "logps/rejected": -341.9813232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.480826377868652, "rewards/margins": 19.377857208251953, "rewards/rejected": -25.858686447143555, "step": 4529 }, { "epoch": 7.8, "learning_rate": 7.42668933276668e-08, "logits/chosen": -1.3871952295303345, "logits/rejected": -2.0030198097229004, "logps/chosen": -140.2165069580078, "logps/rejected": -297.6354064941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.773369789123535, "rewards/margins": 14.953153610229492, "rewards/rejected": -20.726524353027344, "step": 4530 }, { "epoch": 7.8, "learning_rate": 7.41606459838504e-08, "logits/chosen": -1.5633968114852905, "logits/rejected": -1.9435579776763916, "logps/chosen": -161.0554962158203, "logps/rejected": -330.3165588378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.893478393554688, "rewards/margins": 16.390220642089844, "rewards/rejected": -25.28369903564453, "step": 4531 }, { "epoch": 7.8, "learning_rate": 7.4054398640034e-08, "logits/chosen": -1.7286877632141113, "logits/rejected": -1.8758645057678223, "logps/chosen": -194.47549438476562, "logps/rejected": -355.77593994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.037008285522461, "rewards/margins": 15.539985656738281, "rewards/rejected": -25.576993942260742, "step": 4532 }, { "epoch": 7.8, "learning_rate": 7.394815129621759e-08, "logits/chosen": -2.0731441974639893, "logits/rejected": -1.8730884790420532, "logps/chosen": -211.55738830566406, "logps/rejected": -358.8797607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.237709045410156, "rewards/margins": 15.905914306640625, "rewards/rejected": -25.14362335205078, "step": 4533 }, { "epoch": 7.8, "learning_rate": 7.384190395240118e-08, "logits/chosen": -1.852205753326416, "logits/rejected": -1.4945040941238403, "logps/chosen": -144.17503356933594, "logps/rejected": -318.05450439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.185523986816406, "rewards/margins": 18.583040237426758, "rewards/rejected": -23.768564224243164, "step": 4534 }, { "epoch": 7.81, "learning_rate": 7.373565660858478e-08, "logits/chosen": -1.8866400718688965, "logits/rejected": -1.6876966953277588, "logps/chosen": -186.24850463867188, "logps/rejected": -314.61749267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.321656227111816, "rewards/margins": 13.367904663085938, "rewards/rejected": -22.689559936523438, "step": 4535 }, { "epoch": 7.81, "learning_rate": 7.362940926476838e-08, "logits/chosen": -1.7824664115905762, "logits/rejected": -1.400216817855835, "logps/chosen": -168.69342041015625, "logps/rejected": -272.1787414550781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.5195770263671875, "rewards/margins": 12.19235610961914, "rewards/rejected": -19.711933135986328, "step": 4536 }, { "epoch": 7.81, "learning_rate": 7.352316192095197e-08, "logits/chosen": -1.9115180969238281, "logits/rejected": -1.8087291717529297, "logps/chosen": -193.84747314453125, "logps/rejected": -350.3258361816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.927247047424316, "rewards/margins": 15.05860710144043, "rewards/rejected": -25.985855102539062, "step": 4537 }, { "epoch": 7.81, "learning_rate": 7.341691457713557e-08, "logits/chosen": -1.844890832901001, "logits/rejected": -1.7345869541168213, "logps/chosen": -121.77079772949219, "logps/rejected": -283.2101135253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6738243103027344, "rewards/margins": 15.353862762451172, "rewards/rejected": -19.027687072753906, "step": 4538 }, { "epoch": 7.81, "learning_rate": 7.331066723331917e-08, "logits/chosen": -1.9119269847869873, "logits/rejected": -1.8665826320648193, "logps/chosen": -118.96890258789062, "logps/rejected": -262.2872009277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.944406986236572, "rewards/margins": 15.070037841796875, "rewards/rejected": -20.014446258544922, "step": 4539 }, { "epoch": 7.81, "learning_rate": 7.320441988950275e-08, "logits/chosen": -1.9100325107574463, "logits/rejected": -2.0411322116851807, "logps/chosen": -123.8160629272461, "logps/rejected": -291.819091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.004108428955078, "rewards/margins": 15.850448608398438, "rewards/rejected": -19.85455894470215, "step": 4540 }, { "epoch": 7.82, "learning_rate": 7.309817254568635e-08, "logits/chosen": -1.8468619585037231, "logits/rejected": -2.080209970474243, "logps/chosen": -168.2889862060547, "logps/rejected": -332.9811096191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.311925888061523, "rewards/margins": 15.141003608703613, "rewards/rejected": -24.452930450439453, "step": 4541 }, { "epoch": 7.82, "learning_rate": 7.299192520186995e-08, "logits/chosen": -1.8546394109725952, "logits/rejected": -1.5926166772842407, "logps/chosen": -134.41592407226562, "logps/rejected": -254.86614990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4637451171875, "rewards/margins": 12.593038558959961, "rewards/rejected": -18.056785583496094, "step": 4542 }, { "epoch": 7.82, "learning_rate": 7.288567785805355e-08, "logits/chosen": -1.802825689315796, "logits/rejected": -1.7443400621414185, "logps/chosen": -110.8851089477539, "logps/rejected": -270.09576416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.630200386047363, "rewards/margins": 14.544516563415527, "rewards/rejected": -20.17471694946289, "step": 4543 }, { "epoch": 7.82, "learning_rate": 7.277943051423715e-08, "logits/chosen": -1.6303545236587524, "logits/rejected": -1.8228665590286255, "logps/chosen": -154.14324951171875, "logps/rejected": -309.37261962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.03493595123291, "rewards/margins": 13.573789596557617, "rewards/rejected": -20.60872459411621, "step": 4544 }, { "epoch": 7.82, "learning_rate": 7.267318317042073e-08, "logits/chosen": -1.8376376628875732, "logits/rejected": -1.6802093982696533, "logps/chosen": -179.5469207763672, "logps/rejected": -322.94610595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.394608497619629, "rewards/margins": 15.320293426513672, "rewards/rejected": -23.714902877807617, "step": 4545 }, { "epoch": 7.82, "learning_rate": 7.256693582660433e-08, "logits/chosen": -1.7605377435684204, "logits/rejected": -1.6649296283721924, "logps/chosen": -147.6625213623047, "logps/rejected": -267.3865966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.956455230712891, "rewards/margins": 12.32961654663086, "rewards/rejected": -20.28607177734375, "step": 4546 }, { "epoch": 7.83, "learning_rate": 7.246068848278793e-08, "logits/chosen": -1.849616527557373, "logits/rejected": -1.7911574840545654, "logps/chosen": -144.4412841796875, "logps/rejected": -282.2657165527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.128297805786133, "rewards/margins": 15.086365699768066, "rewards/rejected": -21.214664459228516, "step": 4547 }, { "epoch": 7.83, "learning_rate": 7.235444113897152e-08, "logits/chosen": -1.9442574977874756, "logits/rejected": -1.7846208810806274, "logps/chosen": -178.7524871826172, "logps/rejected": -318.0509948730469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.549307823181152, "rewards/margins": 13.553951263427734, "rewards/rejected": -22.103260040283203, "step": 4548 }, { "epoch": 7.83, "learning_rate": 7.224819379515512e-08, "logits/chosen": -1.7372502088546753, "logits/rejected": -1.762592077255249, "logps/chosen": -126.27490234375, "logps/rejected": -282.0570068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.356903076171875, "rewards/margins": 15.00711441040039, "rewards/rejected": -20.364017486572266, "step": 4549 }, { "epoch": 7.83, "learning_rate": 7.214194645133871e-08, "logits/chosen": -1.723858118057251, "logits/rejected": -1.9150214195251465, "logps/chosen": -132.0592041015625, "logps/rejected": -325.0979919433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.231473922729492, "rewards/margins": 18.99513053894043, "rewards/rejected": -24.226604461669922, "step": 4550 }, { "epoch": 7.83, "learning_rate": 7.20356991075223e-08, "logits/chosen": -1.947507381439209, "logits/rejected": -1.9406046867370605, "logps/chosen": -100.07052612304688, "logps/rejected": -244.52821350097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8188021183013916, "rewards/margins": 13.297150611877441, "rewards/rejected": -16.115951538085938, "step": 4551 }, { "epoch": 7.83, "learning_rate": 7.192945176370591e-08, "logits/chosen": -1.8062314987182617, "logits/rejected": -1.920222520828247, "logps/chosen": -162.91958618164062, "logps/rejected": -308.44512939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.185189723968506, "rewards/margins": 14.601278305053711, "rewards/rejected": -21.786468505859375, "step": 4552 }, { "epoch": 7.84, "learning_rate": 7.18232044198895e-08, "logits/chosen": -1.3533751964569092, "logits/rejected": -1.980175256729126, "logps/chosen": -193.17501831054688, "logps/rejected": -369.4349670410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.799973487854004, "rewards/margins": 15.706457138061523, "rewards/rejected": -26.506431579589844, "step": 4553 }, { "epoch": 7.84, "learning_rate": 7.17169570760731e-08, "logits/chosen": -1.9466073513031006, "logits/rejected": -1.8968346118927002, "logps/chosen": -187.54188537597656, "logps/rejected": -335.6800231933594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.314676284790039, "rewards/margins": 15.420299530029297, "rewards/rejected": -24.734975814819336, "step": 4554 }, { "epoch": 7.84, "learning_rate": 7.16107097322567e-08, "logits/chosen": -1.7289881706237793, "logits/rejected": -1.8892991542816162, "logps/chosen": -147.73001098632812, "logps/rejected": -303.687255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.27459192276001, "rewards/margins": 14.759843826293945, "rewards/rejected": -21.034435272216797, "step": 4555 }, { "epoch": 7.84, "learning_rate": 7.150446238844028e-08, "logits/chosen": -2.1014013290405273, "logits/rejected": -1.9351608753204346, "logps/chosen": -158.1127166748047, "logps/rejected": -289.7218017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.194413661956787, "rewards/margins": 13.218070983886719, "rewards/rejected": -20.412485122680664, "step": 4556 }, { "epoch": 7.84, "learning_rate": 7.139821504462388e-08, "logits/chosen": -1.9771993160247803, "logits/rejected": -1.7811219692230225, "logps/chosen": -158.474853515625, "logps/rejected": -313.1885681152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.602825164794922, "rewards/margins": 14.843496322631836, "rewards/rejected": -21.446321487426758, "step": 4557 }, { "epoch": 7.85, "learning_rate": 7.129196770080748e-08, "logits/chosen": -1.3693948984146118, "logits/rejected": -1.7748392820358276, "logps/chosen": -146.62416076660156, "logps/rejected": -290.33465576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.674866199493408, "rewards/margins": 13.032859802246094, "rewards/rejected": -19.707725524902344, "step": 4558 }, { "epoch": 7.85, "learning_rate": 7.118572035699106e-08, "logits/chosen": -1.5497119426727295, "logits/rejected": -1.8955986499786377, "logps/chosen": -155.5055694580078, "logps/rejected": -330.3147888183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.956404685974121, "rewards/margins": 16.06862449645996, "rewards/rejected": -22.0250301361084, "step": 4559 }, { "epoch": 7.85, "learning_rate": 7.107947301317468e-08, "logits/chosen": -2.051384687423706, "logits/rejected": -2.047227382659912, "logps/chosen": -167.04432678222656, "logps/rejected": -371.1476745605469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.216178894042969, "rewards/margins": 19.418243408203125, "rewards/rejected": -26.63442611694336, "step": 4560 }, { "epoch": 7.85, "learning_rate": 7.097322566935826e-08, "logits/chosen": -1.902766466140747, "logits/rejected": -1.9544960260391235, "logps/chosen": -127.86637878417969, "logps/rejected": -290.1598205566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.771417617797852, "rewards/margins": 14.620208740234375, "rewards/rejected": -20.391626358032227, "step": 4561 }, { "epoch": 7.85, "learning_rate": 7.086697832554186e-08, "logits/chosen": -2.0031607151031494, "logits/rejected": -2.030369520187378, "logps/chosen": -157.5435791015625, "logps/rejected": -326.8822937011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.7437334060668945, "rewards/margins": 16.842742919921875, "rewards/rejected": -23.586477279663086, "step": 4562 }, { "epoch": 7.85, "learning_rate": 7.076073098172546e-08, "logits/chosen": -2.008180856704712, "logits/rejected": -1.9498796463012695, "logps/chosen": -136.60333251953125, "logps/rejected": -297.5786437988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.304400444030762, "rewards/margins": 16.6981201171875, "rewards/rejected": -24.002521514892578, "step": 4563 }, { "epoch": 7.86, "learning_rate": 7.065448363790905e-08, "logits/chosen": -1.9252076148986816, "logits/rejected": -2.096978187561035, "logps/chosen": -133.92459106445312, "logps/rejected": -323.8916320800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4591450691223145, "rewards/margins": 19.321504592895508, "rewards/rejected": -24.780649185180664, "step": 4564 }, { "epoch": 7.86, "learning_rate": 7.054823629409265e-08, "logits/chosen": -1.8396950960159302, "logits/rejected": -1.9227049350738525, "logps/chosen": -135.4011688232422, "logps/rejected": -295.0494384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.78755521774292, "rewards/margins": 14.85008716583252, "rewards/rejected": -20.63764190673828, "step": 4565 }, { "epoch": 7.86, "learning_rate": 7.044198895027624e-08, "logits/chosen": -2.0746312141418457, "logits/rejected": -2.122189521789551, "logps/chosen": -154.3577117919922, "logps/rejected": -313.6009521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.793538570404053, "rewards/margins": 14.753798484802246, "rewards/rejected": -21.54733657836914, "step": 4566 }, { "epoch": 7.86, "learning_rate": 7.033574160645983e-08, "logits/chosen": -1.6203118562698364, "logits/rejected": -1.8828415870666504, "logps/chosen": -114.03202819824219, "logps/rejected": -265.5365905761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.8338398933410645, "rewards/margins": 13.855085372924805, "rewards/rejected": -19.68892478942871, "step": 4567 }, { "epoch": 7.86, "learning_rate": 7.022949426264343e-08, "logits/chosen": -1.6968907117843628, "logits/rejected": -1.9808762073516846, "logps/chosen": -149.62771606445312, "logps/rejected": -323.88189697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.273063659667969, "rewards/margins": 16.88269805908203, "rewards/rejected": -24.15576171875, "step": 4568 }, { "epoch": 7.86, "learning_rate": 7.012324691882703e-08, "logits/chosen": -1.7192219495773315, "logits/rejected": -1.9629337787628174, "logps/chosen": -155.84262084960938, "logps/rejected": -298.1392822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.226931095123291, "rewards/margins": 14.41789436340332, "rewards/rejected": -21.644824981689453, "step": 4569 }, { "epoch": 7.87, "learning_rate": 7.001699957501061e-08, "logits/chosen": -1.8641694784164429, "logits/rejected": -1.6365599632263184, "logps/chosen": -167.40550231933594, "logps/rejected": -315.2515563964844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.398242950439453, "rewards/margins": 13.904556274414062, "rewards/rejected": -21.302799224853516, "step": 4570 }, { "epoch": 7.87, "learning_rate": 6.991075223119423e-08, "logits/chosen": -1.7628774642944336, "logits/rejected": -1.8631532192230225, "logps/chosen": -152.38690185546875, "logps/rejected": -289.7386779785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.577613353729248, "rewards/margins": 13.74261474609375, "rewards/rejected": -20.320226669311523, "step": 4571 }, { "epoch": 7.87, "learning_rate": 6.980450488737781e-08, "logits/chosen": -1.8501118421554565, "logits/rejected": -1.8332141637802124, "logps/chosen": -180.50997924804688, "logps/rejected": -342.5477294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.164482116699219, "rewards/margins": 16.884939193725586, "rewards/rejected": -25.049423217773438, "step": 4572 }, { "epoch": 7.87, "learning_rate": 6.969825754356141e-08, "logits/chosen": -1.914674997329712, "logits/rejected": -1.8717819452285767, "logps/chosen": -147.88734436035156, "logps/rejected": -289.987548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.8374342918396, "rewards/margins": 14.391319274902344, "rewards/rejected": -20.2287540435791, "step": 4573 }, { "epoch": 7.87, "learning_rate": 6.959201019974501e-08, "logits/chosen": -1.833908200263977, "logits/rejected": -1.9048112630844116, "logps/chosen": -159.7291259765625, "logps/rejected": -311.6026916503906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.043342590332031, "rewards/margins": 16.16843032836914, "rewards/rejected": -24.211772918701172, "step": 4574 }, { "epoch": 7.87, "learning_rate": 6.94857628559286e-08, "logits/chosen": -2.063976526260376, "logits/rejected": -1.5319006443023682, "logps/chosen": -170.82437133789062, "logps/rejected": -275.973388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.779245376586914, "rewards/margins": 12.94434928894043, "rewards/rejected": -19.723594665527344, "step": 4575 }, { "epoch": 7.88, "learning_rate": 6.937951551211219e-08, "logits/chosen": -1.479541540145874, "logits/rejected": -1.9570553302764893, "logps/chosen": -105.79325866699219, "logps/rejected": -329.4941101074219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9582793712615967, "rewards/margins": 20.821971893310547, "rewards/rejected": -24.780250549316406, "step": 4576 }, { "epoch": 7.88, "learning_rate": 6.927326816829579e-08, "logits/chosen": -1.81756591796875, "logits/rejected": -1.9344375133514404, "logps/chosen": -162.65957641601562, "logps/rejected": -264.9774169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.343698978424072, "rewards/margins": 11.688385009765625, "rewards/rejected": -18.03208351135254, "step": 4577 }, { "epoch": 7.88, "learning_rate": 6.916702082447938e-08, "logits/chosen": -1.979461431503296, "logits/rejected": -1.2544432878494263, "logps/chosen": -213.72293090820312, "logps/rejected": -340.60791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.47170352935791, "rewards/margins": 13.627589225769043, "rewards/rejected": -25.099292755126953, "step": 4578 }, { "epoch": 7.88, "learning_rate": 6.906077348066299e-08, "logits/chosen": -1.515262484550476, "logits/rejected": -1.8598781824111938, "logps/chosen": -123.28762817382812, "logps/rejected": -301.6438293457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.592429161071777, "rewards/margins": 16.92086410522461, "rewards/rejected": -22.513290405273438, "step": 4579 }, { "epoch": 7.88, "learning_rate": 6.895452613684658e-08, "logits/chosen": -1.6676855087280273, "logits/rejected": -1.8089534044265747, "logps/chosen": -127.94664001464844, "logps/rejected": -318.89105224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.2300567626953125, "rewards/margins": 18.58135986328125, "rewards/rejected": -22.811416625976562, "step": 4580 }, { "epoch": 7.88, "learning_rate": 6.884827879303016e-08, "logits/chosen": -1.459275722503662, "logits/rejected": -2.098393440246582, "logps/chosen": -136.9186553955078, "logps/rejected": -370.9718933105469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.618770599365234, "rewards/margins": 19.737564086914062, "rewards/rejected": -26.356334686279297, "step": 4581 }, { "epoch": 7.89, "learning_rate": 6.874203144921377e-08, "logits/chosen": -1.8701571226119995, "logits/rejected": -1.9313867092132568, "logps/chosen": -168.63490295410156, "logps/rejected": -300.2727966308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.446844100952148, "rewards/margins": 13.256317138671875, "rewards/rejected": -20.703161239624023, "step": 4582 }, { "epoch": 7.89, "learning_rate": 6.863578410539736e-08, "logits/chosen": -1.9567608833312988, "logits/rejected": -1.75893235206604, "logps/chosen": -153.60382080078125, "logps/rejected": -297.2273864746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.024181365966797, "rewards/margins": 15.167071342468262, "rewards/rejected": -22.191253662109375, "step": 4583 }, { "epoch": 7.89, "learning_rate": 6.852953676158096e-08, "logits/chosen": -1.8721592426300049, "logits/rejected": -1.940863847732544, "logps/chosen": -130.11744689941406, "logps/rejected": -280.2481384277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.840299606323242, "rewards/margins": 14.08871841430664, "rewards/rejected": -19.929018020629883, "step": 4584 }, { "epoch": 7.89, "learning_rate": 6.842328941776456e-08, "logits/chosen": -1.9597296714782715, "logits/rejected": -1.8135032653808594, "logps/chosen": -154.56695556640625, "logps/rejected": -276.0838928222656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5247015953063965, "rewards/margins": 13.386215209960938, "rewards/rejected": -19.910919189453125, "step": 4585 }, { "epoch": 7.89, "learning_rate": 6.831704207394814e-08, "logits/chosen": -1.8913118839263916, "logits/rejected": -1.6745184659957886, "logps/chosen": -196.5373992919922, "logps/rejected": -328.4527893066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.880142211914062, "rewards/margins": 14.691490173339844, "rewards/rejected": -25.571632385253906, "step": 4586 }, { "epoch": 7.9, "learning_rate": 6.821079473013175e-08, "logits/chosen": -1.5409401655197144, "logits/rejected": -2.101522207260132, "logps/chosen": -121.7176513671875, "logps/rejected": -277.5507507324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.213756561279297, "rewards/margins": 14.193498611450195, "rewards/rejected": -19.407255172729492, "step": 4587 }, { "epoch": 7.9, "learning_rate": 6.810454738631534e-08, "logits/chosen": -1.9116060733795166, "logits/rejected": -1.4602792263031006, "logps/chosen": -158.4578857421875, "logps/rejected": -287.3522644042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.19036865234375, "rewards/margins": 13.707015991210938, "rewards/rejected": -21.897384643554688, "step": 4588 }, { "epoch": 7.9, "learning_rate": 6.799830004249893e-08, "logits/chosen": -2.0719516277313232, "logits/rejected": -1.990799903869629, "logps/chosen": -199.60897827148438, "logps/rejected": -354.3326721191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.99223518371582, "rewards/margins": 16.19721031188965, "rewards/rejected": -25.18944549560547, "step": 4589 }, { "epoch": 7.9, "learning_rate": 6.789205269868254e-08, "logits/chosen": -1.9952822923660278, "logits/rejected": -1.9082577228546143, "logps/chosen": -104.17304229736328, "logps/rejected": -302.97479248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9674248695373535, "rewards/margins": 18.25076675415039, "rewards/rejected": -21.21819305419922, "step": 4590 }, { "epoch": 7.9, "learning_rate": 6.778580535486612e-08, "logits/chosen": -1.9546372890472412, "logits/rejected": -2.10429310798645, "logps/chosen": -118.00088500976562, "logps/rejected": -257.3210754394531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.469298362731934, "rewards/margins": 13.861976623535156, "rewards/rejected": -19.331275939941406, "step": 4591 }, { "epoch": 7.9, "learning_rate": 6.767955801104971e-08, "logits/chosen": -1.8189407587051392, "logits/rejected": -1.6773327589035034, "logps/chosen": -202.25439453125, "logps/rejected": -352.085693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.401193618774414, "rewards/margins": 14.912578582763672, "rewards/rejected": -24.313770294189453, "step": 4592 }, { "epoch": 7.91, "learning_rate": 6.757331066723332e-08, "logits/chosen": -1.7130777835845947, "logits/rejected": -1.8785536289215088, "logps/chosen": -180.79473876953125, "logps/rejected": -309.9782409667969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.950010299682617, "rewards/margins": 14.195837020874023, "rewards/rejected": -21.14584732055664, "step": 4593 }, { "epoch": 7.91, "learning_rate": 6.746706332341691e-08, "logits/chosen": -1.9171974658966064, "logits/rejected": -1.988046646118164, "logps/chosen": -139.59185791015625, "logps/rejected": -308.3724365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7061543464660645, "rewards/margins": 16.839954376220703, "rewards/rejected": -22.546110153198242, "step": 4594 }, { "epoch": 7.91, "learning_rate": 6.736081597960052e-08, "logits/chosen": -1.8452097177505493, "logits/rejected": -1.7155077457427979, "logps/chosen": -157.89144897460938, "logps/rejected": -323.32342529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.721070766448975, "rewards/margins": 13.937204360961914, "rewards/rejected": -21.658275604248047, "step": 4595 }, { "epoch": 7.91, "learning_rate": 6.72545686357841e-08, "logits/chosen": -1.9934958219528198, "logits/rejected": -1.5598857402801514, "logps/chosen": -151.4126739501953, "logps/rejected": -275.233642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.097784042358398, "rewards/margins": 14.140406608581543, "rewards/rejected": -20.238189697265625, "step": 4596 }, { "epoch": 7.91, "learning_rate": 6.714832129196769e-08, "logits/chosen": -2.1592421531677246, "logits/rejected": -1.9361082315444946, "logps/chosen": -179.7799072265625, "logps/rejected": -297.97613525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.263595581054688, "rewards/margins": 13.955239295959473, "rewards/rejected": -22.218835830688477, "step": 4597 }, { "epoch": 7.91, "learning_rate": 6.70420739481513e-08, "logits/chosen": -1.6949732303619385, "logits/rejected": -1.7672927379608154, "logps/chosen": -138.42709350585938, "logps/rejected": -290.52740478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.4675493240356445, "rewards/margins": 14.120962142944336, "rewards/rejected": -21.588512420654297, "step": 4598 }, { "epoch": 7.92, "learning_rate": 6.693582660433489e-08, "logits/chosen": -1.7029216289520264, "logits/rejected": -2.03692364692688, "logps/chosen": -125.92332458496094, "logps/rejected": -314.7115478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.261739730834961, "rewards/margins": 16.918136596679688, "rewards/rejected": -22.17987823486328, "step": 4599 }, { "epoch": 7.92, "learning_rate": 6.682957926051847e-08, "logits/chosen": -1.8034889698028564, "logits/rejected": -1.9970746040344238, "logps/chosen": -166.01174926757812, "logps/rejected": -308.8519287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.822278022766113, "rewards/margins": 14.776937484741211, "rewards/rejected": -23.599212646484375, "step": 4600 }, { "epoch": 7.92, "learning_rate": 6.672333191670209e-08, "logits/chosen": -1.6650258302688599, "logits/rejected": -1.801344871520996, "logps/chosen": -160.11404418945312, "logps/rejected": -320.30810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.188685894012451, "rewards/margins": 14.446148872375488, "rewards/rejected": -21.63483428955078, "step": 4601 }, { "epoch": 7.92, "learning_rate": 6.661708457288567e-08, "logits/chosen": -1.772626280784607, "logits/rejected": -1.8649705648422241, "logps/chosen": -136.59364318847656, "logps/rejected": -338.57464599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.269766330718994, "rewards/margins": 18.861173629760742, "rewards/rejected": -24.130939483642578, "step": 4602 }, { "epoch": 7.92, "learning_rate": 6.651083722906926e-08, "logits/chosen": -1.8608906269073486, "logits/rejected": -1.7250635623931885, "logps/chosen": -148.1765594482422, "logps/rejected": -300.7620544433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.935212135314941, "rewards/margins": 14.338094711303711, "rewards/rejected": -21.27330780029297, "step": 4603 }, { "epoch": 7.92, "learning_rate": 6.640458988525287e-08, "logits/chosen": -1.4458990097045898, "logits/rejected": -1.7370142936706543, "logps/chosen": -120.50914001464844, "logps/rejected": -281.66461181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.9023118019104, "rewards/margins": 15.733064651489258, "rewards/rejected": -21.6353759765625, "step": 4604 }, { "epoch": 7.93, "learning_rate": 6.629834254143646e-08, "logits/chosen": -1.649545431137085, "logits/rejected": -1.5613654851913452, "logps/chosen": -112.55584716796875, "logps/rejected": -235.0282745361328, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.317140102386475, "rewards/margins": 12.336233139038086, "rewards/rejected": -16.653371810913086, "step": 4605 }, { "epoch": 7.93, "learning_rate": 6.619209519762007e-08, "logits/chosen": -1.857429027557373, "logits/rejected": -1.7117141485214233, "logps/chosen": -122.25885772705078, "logps/rejected": -270.4084777832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.861888885498047, "rewards/margins": 14.524526596069336, "rewards/rejected": -19.386415481567383, "step": 4606 }, { "epoch": 7.93, "learning_rate": 6.608584785380365e-08, "logits/chosen": -1.4396477937698364, "logits/rejected": -1.977199912071228, "logps/chosen": -129.20095825195312, "logps/rejected": -324.0313720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.162581443786621, "rewards/margins": 17.5731258392334, "rewards/rejected": -23.735706329345703, "step": 4607 }, { "epoch": 7.93, "learning_rate": 6.597960050998724e-08, "logits/chosen": -1.770540714263916, "logits/rejected": -1.8286648988723755, "logps/chosen": -141.17457580566406, "logps/rejected": -302.69354248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.285398483276367, "rewards/margins": 15.7066068649292, "rewards/rejected": -20.992006301879883, "step": 4608 }, { "epoch": 7.93, "learning_rate": 6.587335316617085e-08, "logits/chosen": -1.7925375699996948, "logits/rejected": -2.038849353790283, "logps/chosen": -178.9907989501953, "logps/rejected": -336.1141662597656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.923663139343262, "rewards/margins": 13.823654174804688, "rewards/rejected": -23.747316360473633, "step": 4609 }, { "epoch": 7.93, "learning_rate": 6.576710582235444e-08, "logits/chosen": -1.5527704954147339, "logits/rejected": -1.9944077730178833, "logps/chosen": -186.7695770263672, "logps/rejected": -341.79107666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.82754898071289, "rewards/margins": 14.303413391113281, "rewards/rejected": -23.130962371826172, "step": 4610 }, { "epoch": 7.94, "learning_rate": 6.566085847853802e-08, "logits/chosen": -1.8182722330093384, "logits/rejected": -1.765716552734375, "logps/chosen": -157.1299285888672, "logps/rejected": -370.98809814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.525373458862305, "rewards/margins": 17.505199432373047, "rewards/rejected": -24.03057289123535, "step": 4611 }, { "epoch": 7.94, "learning_rate": 6.555461113472163e-08, "logits/chosen": -1.8710079193115234, "logits/rejected": -1.7378987073898315, "logps/chosen": -169.88868713378906, "logps/rejected": -293.79888916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.984488010406494, "rewards/margins": 13.83000659942627, "rewards/rejected": -20.814495086669922, "step": 4612 }, { "epoch": 7.94, "learning_rate": 6.544836379090522e-08, "logits/chosen": -1.974102258682251, "logits/rejected": -2.039088010787964, "logps/chosen": -191.46762084960938, "logps/rejected": -333.266845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.660881996154785, "rewards/margins": 16.01464080810547, "rewards/rejected": -24.67552375793457, "step": 4613 }, { "epoch": 7.94, "learning_rate": 6.534211644708882e-08, "logits/chosen": -1.5574936866760254, "logits/rejected": -1.946470022201538, "logps/chosen": -141.27731323242188, "logps/rejected": -348.9693603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.614289283752441, "rewards/margins": 18.380630493164062, "rewards/rejected": -24.994918823242188, "step": 4614 }, { "epoch": 7.94, "learning_rate": 6.523586910327242e-08, "logits/chosen": -1.6690056324005127, "logits/rejected": -1.9270247220993042, "logps/chosen": -165.07826232910156, "logps/rejected": -323.2269287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.092506408691406, "rewards/margins": 15.921102523803711, "rewards/rejected": -24.013608932495117, "step": 4615 }, { "epoch": 7.94, "learning_rate": 6.5129621759456e-08, "logits/chosen": -1.9055707454681396, "logits/rejected": -2.0092885494232178, "logps/chosen": -177.07591247558594, "logps/rejected": -304.22216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.049657821655273, "rewards/margins": 12.75362777709961, "rewards/rejected": -20.803285598754883, "step": 4616 }, { "epoch": 7.95, "learning_rate": 6.502337441563962e-08, "logits/chosen": -1.8763880729675293, "logits/rejected": -1.5788042545318604, "logps/chosen": -129.97039794921875, "logps/rejected": -298.6515197753906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.962190628051758, "rewards/margins": 15.634340286254883, "rewards/rejected": -20.59653091430664, "step": 4617 }, { "epoch": 7.95, "learning_rate": 6.49171270718232e-08, "logits/chosen": -1.5606141090393066, "logits/rejected": -2.053013801574707, "logps/chosen": -151.62330627441406, "logps/rejected": -335.4300231933594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.843287944793701, "rewards/margins": 15.155609130859375, "rewards/rejected": -21.998897552490234, "step": 4618 }, { "epoch": 7.95, "learning_rate": 6.481087972800679e-08, "logits/chosen": -1.425855040550232, "logits/rejected": -1.8071801662445068, "logps/chosen": -129.77500915527344, "logps/rejected": -304.0382080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.425269603729248, "rewards/margins": 16.17540740966797, "rewards/rejected": -21.600677490234375, "step": 4619 }, { "epoch": 7.95, "learning_rate": 6.47046323841904e-08, "logits/chosen": -1.6464776992797852, "logits/rejected": -2.0535199642181396, "logps/chosen": -125.31981658935547, "logps/rejected": -328.4720153808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.562023639678955, "rewards/margins": 17.925655364990234, "rewards/rejected": -23.487680435180664, "step": 4620 }, { "epoch": 7.95, "learning_rate": 6.459838504037399e-08, "logits/chosen": -1.6707782745361328, "logits/rejected": -2.0919978618621826, "logps/chosen": -141.34823608398438, "logps/rejected": -289.3580322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.107388019561768, "rewards/margins": 13.42579174041748, "rewards/rejected": -20.533178329467773, "step": 4621 }, { "epoch": 7.96, "learning_rate": 6.449213769655758e-08, "logits/chosen": -1.6949518918991089, "logits/rejected": -1.7289520502090454, "logps/chosen": -153.1040802001953, "logps/rejected": -326.0101013183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.468527317047119, "rewards/margins": 15.99028205871582, "rewards/rejected": -23.45880889892578, "step": 4622 }, { "epoch": 7.96, "learning_rate": 6.438589035274118e-08, "logits/chosen": -1.8190374374389648, "logits/rejected": -1.8738940954208374, "logps/chosen": -189.22804260253906, "logps/rejected": -316.9946594238281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.654187202453613, "rewards/margins": 12.528916358947754, "rewards/rejected": -21.183103561401367, "step": 4623 }, { "epoch": 7.96, "learning_rate": 6.427964300892477e-08, "logits/chosen": -1.7867941856384277, "logits/rejected": -1.605470061302185, "logps/chosen": -171.38284301757812, "logps/rejected": -296.7015075683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.597827911376953, "rewards/margins": 12.971290588378906, "rewards/rejected": -21.56911849975586, "step": 4624 }, { "epoch": 7.96, "learning_rate": 6.417339566510838e-08, "logits/chosen": -1.930132508277893, "logits/rejected": -1.6628587245941162, "logps/chosen": -172.85482788085938, "logps/rejected": -301.65960693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.24614143371582, "rewards/margins": 14.988958358764648, "rewards/rejected": -21.23509979248047, "step": 4625 }, { "epoch": 7.96, "learning_rate": 6.406714832129197e-08, "logits/chosen": -1.7689683437347412, "logits/rejected": -1.5393071174621582, "logps/chosen": -199.48455810546875, "logps/rejected": -303.75140380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.383818626403809, "rewards/margins": 11.714163780212402, "rewards/rejected": -21.097980499267578, "step": 4626 }, { "epoch": 7.96, "learning_rate": 6.396090097747555e-08, "logits/chosen": -1.881941795349121, "logits/rejected": -1.872469425201416, "logps/chosen": -178.56729125976562, "logps/rejected": -285.42047119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.801299571990967, "rewards/margins": 12.164419174194336, "rewards/rejected": -19.96571922302246, "step": 4627 }, { "epoch": 7.97, "learning_rate": 6.385465363365916e-08, "logits/chosen": -1.8995308876037598, "logits/rejected": -1.938805103302002, "logps/chosen": -136.42648315429688, "logps/rejected": -273.8357238769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5503082275390625, "rewards/margins": 12.934097290039062, "rewards/rejected": -19.484405517578125, "step": 4628 }, { "epoch": 7.97, "learning_rate": 6.374840628984275e-08, "logits/chosen": -1.5502978563308716, "logits/rejected": -1.898338794708252, "logps/chosen": -162.6027069091797, "logps/rejected": -344.0511779785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.9845123291015625, "rewards/margins": 16.892047882080078, "rewards/rejected": -24.876562118530273, "step": 4629 }, { "epoch": 7.97, "learning_rate": 6.364215894602634e-08, "logits/chosen": -1.86594820022583, "logits/rejected": -1.9987009763717651, "logps/chosen": -185.21807861328125, "logps/rejected": -375.14801025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.075757026672363, "rewards/margins": 17.247596740722656, "rewards/rejected": -25.323354721069336, "step": 4630 }, { "epoch": 7.97, "learning_rate": 6.353591160220995e-08, "logits/chosen": -1.3171354532241821, "logits/rejected": -1.9327592849731445, "logps/chosen": -122.0628433227539, "logps/rejected": -291.4831237792969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.063714981079102, "rewards/margins": 13.946710586547852, "rewards/rejected": -20.010425567626953, "step": 4631 }, { "epoch": 7.97, "learning_rate": 6.342966425839353e-08, "logits/chosen": -1.4971598386764526, "logits/rejected": -2.114718198776245, "logps/chosen": -148.11495971679688, "logps/rejected": -333.3026428222656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0626220703125, "rewards/margins": 15.793224334716797, "rewards/rejected": -22.855846405029297, "step": 4632 }, { "epoch": 7.97, "learning_rate": 6.332341691457713e-08, "logits/chosen": -1.954484462738037, "logits/rejected": -1.7896969318389893, "logps/chosen": -134.69595336914062, "logps/rejected": -297.4094543457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.059292316436768, "rewards/margins": 16.522890090942383, "rewards/rejected": -22.582183837890625, "step": 4633 }, { "epoch": 7.98, "learning_rate": 6.321716957076073e-08, "logits/chosen": -1.9525264501571655, "logits/rejected": -1.6510229110717773, "logps/chosen": -149.24916076660156, "logps/rejected": -323.5730285644531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.27916955947876, "rewards/margins": 16.633039474487305, "rewards/rejected": -22.912208557128906, "step": 4634 }, { "epoch": 7.98, "learning_rate": 6.311092222694432e-08, "logits/chosen": -1.960249662399292, "logits/rejected": -2.04697322845459, "logps/chosen": -98.65177917480469, "logps/rejected": -272.3845520019531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.150216817855835, "rewards/margins": 16.2302188873291, "rewards/rejected": -19.380435943603516, "step": 4635 }, { "epoch": 7.98, "learning_rate": 6.300467488312793e-08, "logits/chosen": -1.7858850955963135, "logits/rejected": -1.8468852043151855, "logps/chosen": -167.59793090820312, "logps/rejected": -337.6805725097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.02637767791748, "rewards/margins": 16.143428802490234, "rewards/rejected": -25.1698055267334, "step": 4636 }, { "epoch": 7.98, "learning_rate": 6.289842753931151e-08, "logits/chosen": -1.7335867881774902, "logits/rejected": -1.5865247249603271, "logps/chosen": -183.8465576171875, "logps/rejected": -318.4063415527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.087426662445068, "rewards/margins": 14.23552417755127, "rewards/rejected": -21.32295036315918, "step": 4637 }, { "epoch": 7.98, "learning_rate": 6.27921801954951e-08, "logits/chosen": -1.9245390892028809, "logits/rejected": -1.8028939962387085, "logps/chosen": -165.27093505859375, "logps/rejected": -315.22528076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.720591068267822, "rewards/margins": 15.701661109924316, "rewards/rejected": -23.422250747680664, "step": 4638 }, { "epoch": 7.98, "learning_rate": 6.268593285167871e-08, "logits/chosen": -1.642931342124939, "logits/rejected": -1.6547918319702148, "logps/chosen": -156.93789672851562, "logps/rejected": -274.8967590332031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.745288372039795, "rewards/margins": 12.627638816833496, "rewards/rejected": -19.372928619384766, "step": 4639 }, { "epoch": 7.99, "learning_rate": 6.25796855078623e-08, "logits/chosen": -1.761683464050293, "logits/rejected": -1.9750641584396362, "logps/chosen": -149.39785766601562, "logps/rejected": -319.9456787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.559685707092285, "rewards/margins": 15.945013999938965, "rewards/rejected": -23.50469970703125, "step": 4640 }, { "epoch": 7.99, "learning_rate": 6.24734381640459e-08, "logits/chosen": -1.7541723251342773, "logits/rejected": -1.918810248374939, "logps/chosen": -163.04202270507812, "logps/rejected": -296.50811767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.692741394042969, "rewards/margins": 13.430208206176758, "rewards/rejected": -22.122949600219727, "step": 4641 }, { "epoch": 7.99, "learning_rate": 6.236719082022948e-08, "logits/chosen": -1.837612509727478, "logits/rejected": -1.787050485610962, "logps/chosen": -153.4708251953125, "logps/rejected": -264.6013488769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.288278579711914, "rewards/margins": 11.771422386169434, "rewards/rejected": -18.059701919555664, "step": 4642 }, { "epoch": 7.99, "learning_rate": 6.226094347641308e-08, "logits/chosen": -1.4881296157836914, "logits/rejected": -1.8253264427185059, "logps/chosen": -137.6731414794922, "logps/rejected": -285.0447692871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.012856960296631, "rewards/margins": 13.94687271118164, "rewards/rejected": -19.95973014831543, "step": 4643 }, { "epoch": 7.99, "learning_rate": 6.215469613259668e-08, "logits/chosen": -1.7630422115325928, "logits/rejected": -1.732788324356079, "logps/chosen": -161.60092163085938, "logps/rejected": -293.1522216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.804401397705078, "rewards/margins": 14.845117568969727, "rewards/rejected": -21.649520874023438, "step": 4644 }, { "epoch": 7.99, "learning_rate": 6.204844878878028e-08, "logits/chosen": -1.7605293989181519, "logits/rejected": -1.876335620880127, "logps/chosen": -122.69154357910156, "logps/rejected": -276.0313720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.073178291320801, "rewards/margins": 15.229573249816895, "rewards/rejected": -20.302753448486328, "step": 4645 }, { "epoch": 8.0, "learning_rate": 6.194220144496386e-08, "logits/chosen": -1.9689784049987793, "logits/rejected": -1.6836743354797363, "logps/chosen": -152.21180725097656, "logps/rejected": -278.7457275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.867086410522461, "rewards/margins": 13.186846733093262, "rewards/rejected": -20.053932189941406, "step": 4646 }, { "epoch": 8.0, "learning_rate": 6.183595410114746e-08, "logits/chosen": -1.6459743976593018, "logits/rejected": -2.0789809226989746, "logps/chosen": -167.52471923828125, "logps/rejected": -366.64105224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.87132453918457, "rewards/margins": 17.601839065551758, "rewards/rejected": -26.473163604736328, "step": 4647 }, { "epoch": 8.0, "learning_rate": 6.172970675733106e-08, "logits/chosen": -1.54844331741333, "logits/rejected": -1.8614778518676758, "logps/chosen": -137.27938842773438, "logps/rejected": -338.376220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6032867431640625, "rewards/margins": 18.84720230102539, "rewards/rejected": -25.45048713684082, "step": 4648 }, { "epoch": 8.0, "learning_rate": 6.162345941351466e-08, "logits/chosen": -2.041565418243408, "logits/rejected": -1.483259916305542, "logps/chosen": -112.6922607421875, "logps/rejected": -261.29437255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3595805168151855, "rewards/margins": 16.056537628173828, "rewards/rejected": -19.416118621826172, "step": 4649 }, { "epoch": 8.0, "learning_rate": 6.151721206969825e-08, "logits/chosen": -1.4714229106903076, "logits/rejected": -1.843991994857788, "logps/chosen": -157.35977172851562, "logps/rejected": -390.9154052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.847859859466553, "rewards/margins": 20.45281219482422, "rewards/rejected": -28.300670623779297, "step": 4650 }, { "epoch": 8.01, "learning_rate": 6.141096472588185e-08, "logits/chosen": -2.0238137245178223, "logits/rejected": -1.6964516639709473, "logps/chosen": -137.37454223632812, "logps/rejected": -255.62286376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.262970924377441, "rewards/margins": 13.353474617004395, "rewards/rejected": -19.616445541381836, "step": 4651 }, { "epoch": 8.01, "learning_rate": 6.130471738206545e-08, "logits/chosen": -1.9333908557891846, "logits/rejected": -2.073652744293213, "logps/chosen": -207.4952850341797, "logps/rejected": -362.33056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.07053279876709, "rewards/margins": 16.154903411865234, "rewards/rejected": -27.22543716430664, "step": 4652 }, { "epoch": 8.01, "learning_rate": 6.119847003824904e-08, "logits/chosen": -1.8273332118988037, "logits/rejected": -2.005880355834961, "logps/chosen": -108.52671813964844, "logps/rejected": -275.29913330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.479026794433594, "rewards/margins": 16.26972198486328, "rewards/rejected": -20.748750686645508, "step": 4653 }, { "epoch": 8.01, "learning_rate": 6.109222269443263e-08, "logits/chosen": -1.5789453983306885, "logits/rejected": -2.24221134185791, "logps/chosen": -131.39930725097656, "logps/rejected": -297.5166320800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.172280311584473, "rewards/margins": 13.702824592590332, "rewards/rejected": -18.875104904174805, "step": 4654 }, { "epoch": 8.01, "learning_rate": 6.098597535061623e-08, "logits/chosen": -1.9741251468658447, "logits/rejected": -1.7473899126052856, "logps/chosen": -156.3311767578125, "logps/rejected": -300.0843811035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.339026927947998, "rewards/margins": 14.378982543945312, "rewards/rejected": -21.718008041381836, "step": 4655 }, { "epoch": 8.01, "learning_rate": 6.087972800679983e-08, "logits/chosen": -1.7135093212127686, "logits/rejected": -1.9608335494995117, "logps/chosen": -202.9971923828125, "logps/rejected": -334.1420593261719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.0771484375, "rewards/margins": 13.991889953613281, "rewards/rejected": -24.06903839111328, "step": 4656 }, { "epoch": 8.02, "learning_rate": 6.077348066298343e-08, "logits/chosen": -1.8397798538208008, "logits/rejected": -1.7931103706359863, "logps/chosen": -142.8107452392578, "logps/rejected": -303.6715393066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.568958282470703, "rewards/margins": 14.911748886108398, "rewards/rejected": -22.480709075927734, "step": 4657 }, { "epoch": 8.02, "learning_rate": 6.066723331916701e-08, "logits/chosen": -1.8354852199554443, "logits/rejected": -1.5372331142425537, "logps/chosen": -162.9949493408203, "logps/rejected": -284.000244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.5831170082092285, "rewards/margins": 13.247964859008789, "rewards/rejected": -20.83108139038086, "step": 4658 }, { "epoch": 8.02, "learning_rate": 6.056098597535061e-08, "logits/chosen": -1.7521872520446777, "logits/rejected": -2.028165817260742, "logps/chosen": -160.48272705078125, "logps/rejected": -295.9568176269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.450882911682129, "rewards/margins": 12.34868049621582, "rewards/rejected": -19.799562454223633, "step": 4659 }, { "epoch": 8.02, "learning_rate": 6.045473863153421e-08, "logits/chosen": -1.745092749595642, "logits/rejected": -1.80464506149292, "logps/chosen": -141.47897338867188, "logps/rejected": -324.62030029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.348292827606201, "rewards/margins": 18.68171501159668, "rewards/rejected": -24.03000831604004, "step": 4660 }, { "epoch": 8.02, "learning_rate": 6.034849128771781e-08, "logits/chosen": -1.7383546829223633, "logits/rejected": -1.885129690170288, "logps/chosen": -119.52092742919922, "logps/rejected": -329.2842102050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.133480548858643, "rewards/margins": 19.476511001586914, "rewards/rejected": -24.6099910736084, "step": 4661 }, { "epoch": 8.02, "learning_rate": 6.02422439439014e-08, "logits/chosen": -1.419785499572754, "logits/rejected": -1.8106034994125366, "logps/chosen": -158.08914184570312, "logps/rejected": -368.9082946777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.650188446044922, "rewards/margins": 19.393871307373047, "rewards/rejected": -26.044057846069336, "step": 4662 }, { "epoch": 8.03, "learning_rate": 6.013599660008499e-08, "logits/chosen": -1.7648411989212036, "logits/rejected": -1.8901926279067993, "logps/chosen": -154.94129943847656, "logps/rejected": -292.46429443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.903058052062988, "rewards/margins": 14.076542854309082, "rewards/rejected": -21.97960090637207, "step": 4663 }, { "epoch": 8.03, "learning_rate": 6.002974925626859e-08, "logits/chosen": -1.6025047302246094, "logits/rejected": -1.9564945697784424, "logps/chosen": -129.79811096191406, "logps/rejected": -326.81744384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.52410364151001, "rewards/margins": 18.751407623291016, "rewards/rejected": -23.2755126953125, "step": 4664 }, { "epoch": 8.03, "learning_rate": 5.992350191245219e-08, "logits/chosen": -1.875859260559082, "logits/rejected": -1.6213338375091553, "logps/chosen": -153.70767211914062, "logps/rejected": -296.43450927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0973968505859375, "rewards/margins": 14.26814079284668, "rewards/rejected": -21.365537643432617, "step": 4665 }, { "epoch": 8.03, "learning_rate": 5.981725456863578e-08, "logits/chosen": -1.9924155473709106, "logits/rejected": -1.4972739219665527, "logps/chosen": -142.44664001464844, "logps/rejected": -282.8533020019531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.879666328430176, "rewards/margins": 13.837620735168457, "rewards/rejected": -20.71728515625, "step": 4666 }, { "epoch": 8.03, "learning_rate": 5.971100722481938e-08, "logits/chosen": -1.6751946210861206, "logits/rejected": -1.9788758754730225, "logps/chosen": -152.3749237060547, "logps/rejected": -305.46014404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.623808860778809, "rewards/margins": 15.346395492553711, "rewards/rejected": -22.970203399658203, "step": 4667 }, { "epoch": 8.03, "learning_rate": 5.960475988100297e-08, "logits/chosen": -1.7804198265075684, "logits/rejected": -1.7470760345458984, "logps/chosen": -177.25686645507812, "logps/rejected": -289.7480163574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.239104270935059, "rewards/margins": 12.797253608703613, "rewards/rejected": -22.036357879638672, "step": 4668 }, { "epoch": 8.04, "learning_rate": 5.949851253718657e-08, "logits/chosen": -1.484196424484253, "logits/rejected": -1.822399616241455, "logps/chosen": -147.1027069091797, "logps/rejected": -290.62384033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.191195487976074, "rewards/margins": 13.151567459106445, "rewards/rejected": -20.342761993408203, "step": 4669 }, { "epoch": 8.04, "learning_rate": 5.9392265193370166e-08, "logits/chosen": -1.7593401670455933, "logits/rejected": -1.6049342155456543, "logps/chosen": -145.8865203857422, "logps/rejected": -294.64886474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.552482604980469, "rewards/margins": 15.063056945800781, "rewards/rejected": -22.61553955078125, "step": 4670 }, { "epoch": 8.04, "learning_rate": 5.928601784955376e-08, "logits/chosen": -1.8416327238082886, "logits/rejected": -1.8901422023773193, "logps/chosen": -131.23388671875, "logps/rejected": -274.7389831542969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.579455852508545, "rewards/margins": 13.901782035827637, "rewards/rejected": -18.481237411499023, "step": 4671 }, { "epoch": 8.04, "learning_rate": 5.917977050573735e-08, "logits/chosen": -1.9548790454864502, "logits/rejected": -1.6441099643707275, "logps/chosen": -143.388916015625, "logps/rejected": -283.6007080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.987788200378418, "rewards/margins": 13.722661972045898, "rewards/rejected": -20.710451126098633, "step": 4672 }, { "epoch": 8.04, "learning_rate": 5.907352316192095e-08, "logits/chosen": -1.9381496906280518, "logits/rejected": -1.9483838081359863, "logps/chosen": -172.56005859375, "logps/rejected": -307.77313232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.68364143371582, "rewards/margins": 13.461043357849121, "rewards/rejected": -22.144683837890625, "step": 4673 }, { "epoch": 8.04, "learning_rate": 5.896727581810455e-08, "logits/chosen": -1.2857840061187744, "logits/rejected": -1.974210500717163, "logps/chosen": -151.88914489746094, "logps/rejected": -357.51922607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5243916511535645, "rewards/margins": 18.44554901123047, "rewards/rejected": -24.969940185546875, "step": 4674 }, { "epoch": 8.05, "learning_rate": 5.886102847428814e-08, "logits/chosen": -1.9416862726211548, "logits/rejected": -1.8707393407821655, "logps/chosen": -173.57986450195312, "logps/rejected": -275.04949951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.060837745666504, "rewards/margins": 10.23939037322998, "rewards/rejected": -19.300228118896484, "step": 4675 }, { "epoch": 8.05, "learning_rate": 5.875478113047173e-08, "logits/chosen": -1.4581037759780884, "logits/rejected": -1.4973013401031494, "logps/chosen": -115.98297119140625, "logps/rejected": -316.7027282714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.310940265655518, "rewards/margins": 19.19182586669922, "rewards/rejected": -23.50276756286621, "step": 4676 }, { "epoch": 8.05, "learning_rate": 5.864853378665533e-08, "logits/chosen": -1.5900535583496094, "logits/rejected": -1.730055332183838, "logps/chosen": -176.3232879638672, "logps/rejected": -293.80621337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.412423133850098, "rewards/margins": 11.990764617919922, "rewards/rejected": -21.403188705444336, "step": 4677 }, { "epoch": 8.05, "learning_rate": 5.854228644283893e-08, "logits/chosen": -1.7671372890472412, "logits/rejected": -1.9877557754516602, "logps/chosen": -136.92030334472656, "logps/rejected": -280.447021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.962155342102051, "rewards/margins": 14.384522438049316, "rewards/rejected": -21.346677780151367, "step": 4678 }, { "epoch": 8.05, "learning_rate": 5.8436039099022516e-08, "logits/chosen": -2.137889862060547, "logits/rejected": -1.7935104370117188, "logps/chosen": -132.84976196289062, "logps/rejected": -304.0411682128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.323574066162109, "rewards/margins": 17.566848754882812, "rewards/rejected": -22.890422821044922, "step": 4679 }, { "epoch": 8.06, "learning_rate": 5.8329791755206115e-08, "logits/chosen": -1.7469117641448975, "logits/rejected": -1.713163137435913, "logps/chosen": -154.23678588867188, "logps/rejected": -338.0646057128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.630596160888672, "rewards/margins": 18.116268157958984, "rewards/rejected": -24.746864318847656, "step": 4680 }, { "epoch": 8.06, "learning_rate": 5.8223544411389714e-08, "logits/chosen": -1.7013812065124512, "logits/rejected": -1.6731927394866943, "logps/chosen": -125.31507873535156, "logps/rejected": -285.4322509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.962856769561768, "rewards/margins": 16.36090850830078, "rewards/rejected": -21.32376480102539, "step": 4681 }, { "epoch": 8.06, "learning_rate": 5.811729706757331e-08, "logits/chosen": -1.689950942993164, "logits/rejected": -2.00630521774292, "logps/chosen": -154.42706298828125, "logps/rejected": -335.03900146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.330205917358398, "rewards/margins": 16.67942237854004, "rewards/rejected": -24.009628295898438, "step": 4682 }, { "epoch": 8.06, "learning_rate": 5.80110497237569e-08, "logits/chosen": -1.589933156967163, "logits/rejected": -1.8662104606628418, "logps/chosen": -137.99826049804688, "logps/rejected": -323.6539306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.433128833770752, "rewards/margins": 16.878955841064453, "rewards/rejected": -23.312084197998047, "step": 4683 }, { "epoch": 8.06, "learning_rate": 5.79048023799405e-08, "logits/chosen": -1.7737061977386475, "logits/rejected": -1.9794723987579346, "logps/chosen": -96.17216491699219, "logps/rejected": -252.00657653808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5888640880584717, "rewards/margins": 14.597593307495117, "rewards/rejected": -17.18645668029785, "step": 4684 }, { "epoch": 8.06, "learning_rate": 5.7798555036124096e-08, "logits/chosen": -1.803727388381958, "logits/rejected": -1.6559174060821533, "logps/chosen": -114.78227233886719, "logps/rejected": -260.1607666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.041191816329956, "rewards/margins": 14.986236572265625, "rewards/rejected": -18.027427673339844, "step": 4685 }, { "epoch": 8.07, "learning_rate": 5.7692307692307695e-08, "logits/chosen": -1.693480134010315, "logits/rejected": -1.6560579538345337, "logps/chosen": -162.57142639160156, "logps/rejected": -282.5174255371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.831441879272461, "rewards/margins": 12.821784973144531, "rewards/rejected": -20.65322494506836, "step": 4686 }, { "epoch": 8.07, "learning_rate": 5.758606034849128e-08, "logits/chosen": -2.124415397644043, "logits/rejected": -1.851864218711853, "logps/chosen": -183.02938842773438, "logps/rejected": -318.7294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.099448204040527, "rewards/margins": 13.902925491333008, "rewards/rejected": -23.00237274169922, "step": 4687 }, { "epoch": 8.07, "learning_rate": 5.747981300467488e-08, "logits/chosen": -2.002473831176758, "logits/rejected": -2.108102560043335, "logps/chosen": -208.03060913085938, "logps/rejected": -364.82574462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.903946876525879, "rewards/margins": 14.898138046264648, "rewards/rejected": -25.802085876464844, "step": 4688 }, { "epoch": 8.07, "learning_rate": 5.737356566085848e-08, "logits/chosen": -1.9364572763442993, "logits/rejected": -1.7165592908859253, "logps/chosen": -200.11387634277344, "logps/rejected": -363.77520751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.140242576599121, "rewards/margins": 16.961288452148438, "rewards/rejected": -26.101531982421875, "step": 4689 }, { "epoch": 8.07, "learning_rate": 5.726731831704207e-08, "logits/chosen": -1.659809947013855, "logits/rejected": -1.4581102132797241, "logps/chosen": -181.73870849609375, "logps/rejected": -335.2507629394531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.335128784179688, "rewards/margins": 15.732075691223145, "rewards/rejected": -25.067203521728516, "step": 4690 }, { "epoch": 8.07, "learning_rate": 5.7161070973225663e-08, "logits/chosen": -1.8095735311508179, "logits/rejected": -1.598459005355835, "logps/chosen": -190.37171936035156, "logps/rejected": -329.9260559082031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.857830047607422, "rewards/margins": 14.122207641601562, "rewards/rejected": -23.980037689208984, "step": 4691 }, { "epoch": 8.08, "learning_rate": 5.705482362940926e-08, "logits/chosen": -1.798910140991211, "logits/rejected": -1.948353886604309, "logps/chosen": -168.2755584716797, "logps/rejected": -281.8523864746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.864173889160156, "rewards/margins": 12.293476104736328, "rewards/rejected": -20.157649993896484, "step": 4692 }, { "epoch": 8.08, "learning_rate": 5.694857628559286e-08, "logits/chosen": -1.9445953369140625, "logits/rejected": -1.7569950819015503, "logps/chosen": -194.76011657714844, "logps/rejected": -346.18304443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.340621948242188, "rewards/margins": 15.535883903503418, "rewards/rejected": -24.87650489807129, "step": 4693 }, { "epoch": 8.08, "learning_rate": 5.6842328941776453e-08, "logits/chosen": -1.7258721590042114, "logits/rejected": -1.9839649200439453, "logps/chosen": -159.14373779296875, "logps/rejected": -333.3996887207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.085892677307129, "rewards/margins": 16.478652954101562, "rewards/rejected": -23.564546585083008, "step": 4694 }, { "epoch": 8.08, "learning_rate": 5.6736081597960046e-08, "logits/chosen": -1.85749351978302, "logits/rejected": -2.0995020866394043, "logps/chosen": -130.50538635253906, "logps/rejected": -347.8285217285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.444036483764648, "rewards/margins": 19.99750518798828, "rewards/rejected": -25.441543579101562, "step": 4695 }, { "epoch": 8.08, "learning_rate": 5.6629834254143645e-08, "logits/chosen": -1.8759698867797852, "logits/rejected": -1.5024306774139404, "logps/chosen": -171.1817626953125, "logps/rejected": -309.059326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.930482864379883, "rewards/margins": 14.897113800048828, "rewards/rejected": -22.827598571777344, "step": 4696 }, { "epoch": 8.08, "learning_rate": 5.6523586910327244e-08, "logits/chosen": -1.8504759073257446, "logits/rejected": -2.020246982574463, "logps/chosen": -165.64813232421875, "logps/rejected": -304.5019836425781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.8020219802856445, "rewards/margins": 14.789871215820312, "rewards/rejected": -21.591894149780273, "step": 4697 }, { "epoch": 8.09, "learning_rate": 5.6417339566510836e-08, "logits/chosen": -1.4423257112503052, "logits/rejected": -1.837033748626709, "logps/chosen": -120.35086822509766, "logps/rejected": -280.36810302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.191410541534424, "rewards/margins": 15.084047317504883, "rewards/rejected": -20.27545738220215, "step": 4698 }, { "epoch": 8.09, "learning_rate": 5.631109222269443e-08, "logits/chosen": -2.0623695850372314, "logits/rejected": -1.7623685598373413, "logps/chosen": -219.82505798339844, "logps/rejected": -341.5627746582031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.422380447387695, "rewards/margins": 13.516573905944824, "rewards/rejected": -23.938953399658203, "step": 4699 }, { "epoch": 8.09, "learning_rate": 5.620484487887803e-08, "logits/chosen": -1.8728855848312378, "logits/rejected": -2.015490770339966, "logps/chosen": -139.27870178222656, "logps/rejected": -279.827880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.531804084777832, "rewards/margins": 12.752281188964844, "rewards/rejected": -19.284086227416992, "step": 4700 }, { "epoch": 8.09, "learning_rate": 5.609859753506162e-08, "logits/chosen": -1.9559026956558228, "logits/rejected": -1.9646942615509033, "logps/chosen": -138.10128784179688, "logps/rejected": -374.0997314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.937862873077393, "rewards/margins": 21.57923126220703, "rewards/rejected": -27.517093658447266, "step": 4701 }, { "epoch": 8.09, "learning_rate": 5.599235019124521e-08, "logits/chosen": -1.8916244506835938, "logits/rejected": -1.8918043375015259, "logps/chosen": -124.35125732421875, "logps/rejected": -295.12640380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.334333896636963, "rewards/margins": 17.716991424560547, "rewards/rejected": -24.05132484436035, "step": 4702 }, { "epoch": 8.09, "learning_rate": 5.588610284742881e-08, "logits/chosen": -1.707804560661316, "logits/rejected": -2.0741851329803467, "logps/chosen": -156.27810668945312, "logps/rejected": -317.7196044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.725264549255371, "rewards/margins": 14.53205394744873, "rewards/rejected": -22.2573184967041, "step": 4703 }, { "epoch": 8.1, "learning_rate": 5.577985550361241e-08, "logits/chosen": -2.0322303771972656, "logits/rejected": -1.439400553703308, "logps/chosen": -176.12156677246094, "logps/rejected": -304.25616455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.911402702331543, "rewards/margins": 15.314092636108398, "rewards/rejected": -22.225496292114258, "step": 4704 }, { "epoch": 8.1, "learning_rate": 5.5673608159796e-08, "logits/chosen": -1.732116937637329, "logits/rejected": -1.6039600372314453, "logps/chosen": -135.00662231445312, "logps/rejected": -278.6370849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6984405517578125, "rewards/margins": 14.384467124938965, "rewards/rejected": -21.082908630371094, "step": 4705 }, { "epoch": 8.1, "learning_rate": 5.5567360815979594e-08, "logits/chosen": -1.70005464553833, "logits/rejected": -1.6755297183990479, "logps/chosen": -152.5371551513672, "logps/rejected": -278.8050842285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.348814010620117, "rewards/margins": 13.673299789428711, "rewards/rejected": -21.02211570739746, "step": 4706 }, { "epoch": 8.1, "learning_rate": 5.546111347216319e-08, "logits/chosen": -1.8638744354248047, "logits/rejected": -1.8826754093170166, "logps/chosen": -146.6900634765625, "logps/rejected": -332.4649353027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.3942952156066895, "rewards/margins": 17.63851547241211, "rewards/rejected": -25.03281021118164, "step": 4707 }, { "epoch": 8.1, "learning_rate": 5.535486612834679e-08, "logits/chosen": -1.9106855392456055, "logits/rejected": -2.068964958190918, "logps/chosen": -164.19602966308594, "logps/rejected": -291.8213806152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.60927677154541, "rewards/margins": 12.40248966217041, "rewards/rejected": -21.01176643371582, "step": 4708 }, { "epoch": 8.1, "learning_rate": 5.5248618784530384e-08, "logits/chosen": -1.7467049360275269, "logits/rejected": -2.130131959915161, "logps/chosen": -154.23690795898438, "logps/rejected": -336.7027587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.986855506896973, "rewards/margins": 16.932889938354492, "rewards/rejected": -24.91974639892578, "step": 4709 }, { "epoch": 8.11, "learning_rate": 5.5142371440713976e-08, "logits/chosen": -1.7794183492660522, "logits/rejected": -1.9670673608779907, "logps/chosen": -147.04196166992188, "logps/rejected": -251.23825073242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.374963760375977, "rewards/margins": 10.969986915588379, "rewards/rejected": -16.34494972229004, "step": 4710 }, { "epoch": 8.11, "learning_rate": 5.5036124096897575e-08, "logits/chosen": -1.571324348449707, "logits/rejected": -1.861460566520691, "logps/chosen": -147.2290802001953, "logps/rejected": -300.6445617675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.492010116577148, "rewards/margins": 14.810185432434082, "rewards/rejected": -21.302196502685547, "step": 4711 }, { "epoch": 8.11, "learning_rate": 5.4929876753081174e-08, "logits/chosen": -1.6527103185653687, "logits/rejected": -1.953627109527588, "logps/chosen": -137.337158203125, "logps/rejected": -327.1496276855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.443193435668945, "rewards/margins": 17.27785301208496, "rewards/rejected": -23.721046447753906, "step": 4712 }, { "epoch": 8.11, "learning_rate": 5.4823629409264766e-08, "logits/chosen": -1.7841824293136597, "logits/rejected": -1.8297364711761475, "logps/chosen": -170.5896759033203, "logps/rejected": -367.51025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.258831024169922, "rewards/margins": 16.902040481567383, "rewards/rejected": -23.160871505737305, "step": 4713 }, { "epoch": 8.11, "learning_rate": 5.471738206544836e-08, "logits/chosen": -1.6153678894042969, "logits/rejected": -1.7742819786071777, "logps/chosen": -158.3221435546875, "logps/rejected": -362.51953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.454587459564209, "rewards/margins": 18.288990020751953, "rewards/rejected": -25.74357795715332, "step": 4714 }, { "epoch": 8.12, "learning_rate": 5.461113472163196e-08, "logits/chosen": -1.9191371202468872, "logits/rejected": -1.8422472476959229, "logps/chosen": -125.35160064697266, "logps/rejected": -263.9664306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.9134626388549805, "rewards/margins": 13.878536224365234, "rewards/rejected": -18.79199981689453, "step": 4715 }, { "epoch": 8.12, "learning_rate": 5.450488737781555e-08, "logits/chosen": -1.521897554397583, "logits/rejected": -1.9570896625518799, "logps/chosen": -127.95484924316406, "logps/rejected": -274.4607849121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.960139274597168, "rewards/margins": 14.01850414276123, "rewards/rejected": -18.978641510009766, "step": 4716 }, { "epoch": 8.12, "learning_rate": 5.439864003399915e-08, "logits/chosen": -1.5976839065551758, "logits/rejected": -2.029179573059082, "logps/chosen": -115.21165466308594, "logps/rejected": -300.3738708496094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.343434810638428, "rewards/margins": 17.77768325805664, "rewards/rejected": -22.121118545532227, "step": 4717 }, { "epoch": 8.12, "learning_rate": 5.429239269018274e-08, "logits/chosen": -1.647544264793396, "logits/rejected": -1.577130913734436, "logps/chosen": -161.84292602539062, "logps/rejected": -302.7091064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.697717666625977, "rewards/margins": 14.17819595336914, "rewards/rejected": -21.875913619995117, "step": 4718 }, { "epoch": 8.12, "learning_rate": 5.418614534636634e-08, "logits/chosen": -1.8940421342849731, "logits/rejected": -1.6253856420516968, "logps/chosen": -168.24008178710938, "logps/rejected": -259.66778564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.538165092468262, "rewards/margins": 10.443439483642578, "rewards/rejected": -17.981603622436523, "step": 4719 }, { "epoch": 8.12, "learning_rate": 5.407989800254993e-08, "logits/chosen": -1.9183356761932373, "logits/rejected": -1.784119725227356, "logps/chosen": -173.58705139160156, "logps/rejected": -315.3673400878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.258066177368164, "rewards/margins": 13.137271881103516, "rewards/rejected": -22.395339965820312, "step": 4720 }, { "epoch": 8.13, "learning_rate": 5.397365065873353e-08, "logits/chosen": -1.8565518856048584, "logits/rejected": -1.7574206590652466, "logps/chosen": -155.19558715820312, "logps/rejected": -266.5689392089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.179001808166504, "rewards/margins": 12.286067008972168, "rewards/rejected": -20.465068817138672, "step": 4721 }, { "epoch": 8.13, "learning_rate": 5.3867403314917123e-08, "logits/chosen": -1.7872836589813232, "logits/rejected": -1.7966039180755615, "logps/chosen": -130.23890686035156, "logps/rejected": -322.95416259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.350461483001709, "rewards/margins": 17.442970275878906, "rewards/rejected": -23.79343032836914, "step": 4722 }, { "epoch": 8.13, "learning_rate": 5.376115597110072e-08, "logits/chosen": -1.798017978668213, "logits/rejected": -1.6947925090789795, "logps/chosen": -125.35782623291016, "logps/rejected": -250.50148010253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.177661895751953, "rewards/margins": 12.976006507873535, "rewards/rejected": -18.153669357299805, "step": 4723 }, { "epoch": 8.13, "learning_rate": 5.3654908627284315e-08, "logits/chosen": -1.7641288042068481, "logits/rejected": -1.8821477890014648, "logps/chosen": -111.8358154296875, "logps/rejected": -264.925537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9749679565429688, "rewards/margins": 14.46273422241211, "rewards/rejected": -18.437702178955078, "step": 4724 }, { "epoch": 8.13, "learning_rate": 5.3548661283467914e-08, "logits/chosen": -1.8516206741333008, "logits/rejected": -2.0613036155700684, "logps/chosen": -127.03890228271484, "logps/rejected": -337.45562744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.906742572784424, "rewards/margins": 19.218868255615234, "rewards/rejected": -24.125612258911133, "step": 4725 }, { "epoch": 8.13, "learning_rate": 5.3442413939651506e-08, "logits/chosen": -1.5623538494110107, "logits/rejected": -1.9150257110595703, "logps/chosen": -164.6419219970703, "logps/rejected": -334.1216735839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.47237491607666, "rewards/margins": 15.973114013671875, "rewards/rejected": -23.44548797607422, "step": 4726 }, { "epoch": 8.14, "learning_rate": 5.33361665958351e-08, "logits/chosen": -1.9213979244232178, "logits/rejected": -1.8258637189865112, "logps/chosen": -112.37024688720703, "logps/rejected": -290.72015380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4494500160217285, "rewards/margins": 17.234235763549805, "rewards/rejected": -22.683685302734375, "step": 4727 }, { "epoch": 8.14, "learning_rate": 5.32299192520187e-08, "logits/chosen": -1.716263771057129, "logits/rejected": -1.567264199256897, "logps/chosen": -198.2119903564453, "logps/rejected": -325.2862243652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.602043151855469, "rewards/margins": 13.643705368041992, "rewards/rejected": -23.24574851989746, "step": 4728 }, { "epoch": 8.14, "learning_rate": 5.3123671908202296e-08, "logits/chosen": -1.9204967021942139, "logits/rejected": -1.728171944618225, "logps/chosen": -211.98284912109375, "logps/rejected": -328.2876281738281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.1625337600708, "rewards/margins": 13.241720199584961, "rewards/rejected": -25.404253005981445, "step": 4729 }, { "epoch": 8.14, "learning_rate": 5.301742456438589e-08, "logits/chosen": -1.6791940927505493, "logits/rejected": -1.9757534265518188, "logps/chosen": -141.70285034179688, "logps/rejected": -266.236083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.303932189941406, "rewards/margins": 12.565492630004883, "rewards/rejected": -18.86942481994629, "step": 4730 }, { "epoch": 8.14, "learning_rate": 5.291117722056948e-08, "logits/chosen": -1.8848721981048584, "logits/rejected": -1.8412524461746216, "logps/chosen": -133.65594482421875, "logps/rejected": -277.4617919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.140803337097168, "rewards/margins": 14.259313583374023, "rewards/rejected": -21.400115966796875, "step": 4731 }, { "epoch": 8.14, "learning_rate": 5.280492987675308e-08, "logits/chosen": -1.8594772815704346, "logits/rejected": -1.8100779056549072, "logps/chosen": -189.26263427734375, "logps/rejected": -321.987060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.442726135253906, "rewards/margins": 13.50975227355957, "rewards/rejected": -22.95248031616211, "step": 4732 }, { "epoch": 8.15, "learning_rate": 5.269868253293668e-08, "logits/chosen": -1.7931981086730957, "logits/rejected": -1.8352043628692627, "logps/chosen": -151.85934448242188, "logps/rejected": -329.37677001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.844082355499268, "rewards/margins": 16.477739334106445, "rewards/rejected": -23.321821212768555, "step": 4733 }, { "epoch": 8.15, "learning_rate": 5.259243518912027e-08, "logits/chosen": -1.8838963508605957, "logits/rejected": -1.7578985691070557, "logps/chosen": -183.58892822265625, "logps/rejected": -336.9873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.233242988586426, "rewards/margins": 16.741342544555664, "rewards/rejected": -25.97458267211914, "step": 4734 }, { "epoch": 8.15, "learning_rate": 5.248618784530386e-08, "logits/chosen": -1.6856176853179932, "logits/rejected": -1.7658838033676147, "logps/chosen": -156.3606414794922, "logps/rejected": -302.8719482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.99730110168457, "rewards/margins": 13.981348037719727, "rewards/rejected": -19.978649139404297, "step": 4735 }, { "epoch": 8.15, "learning_rate": 5.237994050148746e-08, "logits/chosen": -1.860489845275879, "logits/rejected": -1.8210232257843018, "logps/chosen": -110.70074462890625, "logps/rejected": -299.11907958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8510608673095703, "rewards/margins": 18.49954605102539, "rewards/rejected": -22.350608825683594, "step": 4736 }, { "epoch": 8.15, "learning_rate": 5.227369315767106e-08, "logits/chosen": -1.559124231338501, "logits/rejected": -2.0358569622039795, "logps/chosen": -136.26773071289062, "logps/rejected": -292.66656494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.704624176025391, "rewards/margins": 13.946657180786133, "rewards/rejected": -20.651281356811523, "step": 4737 }, { "epoch": 8.15, "learning_rate": 5.216744581385465e-08, "logits/chosen": -1.7891063690185547, "logits/rejected": -1.7572271823883057, "logps/chosen": -136.4318084716797, "logps/rejected": -269.50927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.392110347747803, "rewards/margins": 13.934362411499023, "rewards/rejected": -19.326473236083984, "step": 4738 }, { "epoch": 8.16, "learning_rate": 5.2061198470038245e-08, "logits/chosen": -1.7101936340332031, "logits/rejected": -1.6512811183929443, "logps/chosen": -181.33624267578125, "logps/rejected": -315.7784423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.503607749938965, "rewards/margins": 12.544637680053711, "rewards/rejected": -21.04824447631836, "step": 4739 }, { "epoch": 8.16, "learning_rate": 5.1954951126221844e-08, "logits/chosen": -1.7726986408233643, "logits/rejected": -1.922669529914856, "logps/chosen": -175.31906127929688, "logps/rejected": -346.5265808105469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.005199432373047, "rewards/margins": 15.085481643676758, "rewards/rejected": -24.090681076049805, "step": 4740 }, { "epoch": 8.16, "learning_rate": 5.1848703782405436e-08, "logits/chosen": -1.591590166091919, "logits/rejected": -1.8941349983215332, "logps/chosen": -108.33665466308594, "logps/rejected": -299.72796630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.952082633972168, "rewards/margins": 17.61927032470703, "rewards/rejected": -21.571353912353516, "step": 4741 }, { "epoch": 8.16, "learning_rate": 5.174245643858903e-08, "logits/chosen": -1.8307554721832275, "logits/rejected": -1.7982187271118164, "logps/chosen": -147.33746337890625, "logps/rejected": -272.9660339355469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.74257755279541, "rewards/margins": 12.726736068725586, "rewards/rejected": -19.469314575195312, "step": 4742 }, { "epoch": 8.16, "learning_rate": 5.163620909477263e-08, "logits/chosen": -1.642061471939087, "logits/rejected": -1.6434235572814941, "logps/chosen": -161.6728057861328, "logps/rejected": -359.50665283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5677080154418945, "rewards/margins": 16.383453369140625, "rewards/rejected": -22.951160430908203, "step": 4743 }, { "epoch": 8.17, "learning_rate": 5.1529961750956226e-08, "logits/chosen": -1.7099289894104004, "logits/rejected": -1.9148935079574585, "logps/chosen": -188.84671020507812, "logps/rejected": -296.4061279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.8110933303833, "rewards/margins": 12.840251922607422, "rewards/rejected": -21.651344299316406, "step": 4744 }, { "epoch": 8.17, "learning_rate": 5.142371440713982e-08, "logits/chosen": -1.7630451917648315, "logits/rejected": -1.9960377216339111, "logps/chosen": -131.2186279296875, "logps/rejected": -283.1830749511719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.860549449920654, "rewards/margins": 14.89097785949707, "rewards/rejected": -20.751527786254883, "step": 4745 }, { "epoch": 8.17, "learning_rate": 5.131746706332341e-08, "logits/chosen": -1.678174376487732, "logits/rejected": -1.9388514757156372, "logps/chosen": -123.64007568359375, "logps/rejected": -316.1133728027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.0679545402526855, "rewards/margins": 18.387554168701172, "rewards/rejected": -23.45551109313965, "step": 4746 }, { "epoch": 8.17, "learning_rate": 5.121121971950701e-08, "logits/chosen": -1.9190250635147095, "logits/rejected": -1.8594048023223877, "logps/chosen": -137.3174285888672, "logps/rejected": -277.05413818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.555828094482422, "rewards/margins": 14.267892837524414, "rewards/rejected": -20.823719024658203, "step": 4747 }, { "epoch": 8.17, "learning_rate": 5.110497237569061e-08, "logits/chosen": -1.3846092224121094, "logits/rejected": -1.8210957050323486, "logps/chosen": -210.96267700195312, "logps/rejected": -325.70086669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.900101661682129, "rewards/margins": 11.597713470458984, "rewards/rejected": -23.497814178466797, "step": 4748 }, { "epoch": 8.17, "learning_rate": 5.09987250318742e-08, "logits/chosen": -1.8045847415924072, "logits/rejected": -1.8567522764205933, "logps/chosen": -166.94964599609375, "logps/rejected": -357.4257507324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.069785118103027, "rewards/margins": 18.7979736328125, "rewards/rejected": -26.86775779724121, "step": 4749 }, { "epoch": 8.18, "learning_rate": 5.0892477688057793e-08, "logits/chosen": -1.9114463329315186, "logits/rejected": -1.954498291015625, "logps/chosen": -159.80239868164062, "logps/rejected": -291.3357849121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.602854251861572, "rewards/margins": 13.339529037475586, "rewards/rejected": -20.9423828125, "step": 4750 }, { "epoch": 8.18, "learning_rate": 5.078623034424139e-08, "logits/chosen": -1.802046298980713, "logits/rejected": -1.8648048639297485, "logps/chosen": -147.9699249267578, "logps/rejected": -315.1952819824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.567802906036377, "rewards/margins": 15.347604751586914, "rewards/rejected": -21.915407180786133, "step": 4751 }, { "epoch": 8.18, "learning_rate": 5.067998300042499e-08, "logits/chosen": -2.0656442642211914, "logits/rejected": -1.7440428733825684, "logps/chosen": -126.74542236328125, "logps/rejected": -279.48321533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.2248148918151855, "rewards/margins": 14.46848201751709, "rewards/rejected": -18.693296432495117, "step": 4752 }, { "epoch": 8.18, "learning_rate": 5.057373565660858e-08, "logits/chosen": -1.9127459526062012, "logits/rejected": -1.7446415424346924, "logps/chosen": -140.5504913330078, "logps/rejected": -330.21624755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.903278827667236, "rewards/margins": 18.860118865966797, "rewards/rejected": -24.763397216796875, "step": 4753 }, { "epoch": 8.18, "learning_rate": 5.0467488312792176e-08, "logits/chosen": -1.6885292530059814, "logits/rejected": -1.915221929550171, "logps/chosen": -165.7213134765625, "logps/rejected": -335.652587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.109355449676514, "rewards/margins": 17.673229217529297, "rewards/rejected": -24.78258514404297, "step": 4754 }, { "epoch": 8.18, "learning_rate": 5.0361240968975775e-08, "logits/chosen": -1.9303772449493408, "logits/rejected": -1.7728228569030762, "logps/chosen": -124.9860610961914, "logps/rejected": -311.67864990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.819614410400391, "rewards/margins": 18.945995330810547, "rewards/rejected": -23.765609741210938, "step": 4755 }, { "epoch": 8.19, "learning_rate": 5.0254993625159374e-08, "logits/chosen": -1.8528485298156738, "logits/rejected": -1.8048548698425293, "logps/chosen": -136.54971313476562, "logps/rejected": -306.74554443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.995936870574951, "rewards/margins": 17.598024368286133, "rewards/rejected": -23.593961715698242, "step": 4756 }, { "epoch": 8.19, "learning_rate": 5.014874628134296e-08, "logits/chosen": -1.9346182346343994, "logits/rejected": -1.6132967472076416, "logps/chosen": -137.85897827148438, "logps/rejected": -259.7554931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.573609828948975, "rewards/margins": 14.643453598022461, "rewards/rejected": -21.217063903808594, "step": 4757 }, { "epoch": 8.19, "learning_rate": 5.004249893752656e-08, "logits/chosen": -1.8797287940979004, "logits/rejected": -1.509902000427246, "logps/chosen": -157.0985565185547, "logps/rejected": -260.9521789550781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.452204704284668, "rewards/margins": 12.226164817810059, "rewards/rejected": -17.678369522094727, "step": 4758 }, { "epoch": 8.19, "learning_rate": 4.993625159371016e-08, "logits/chosen": -1.9077306985855103, "logits/rejected": -1.9588202238082886, "logps/chosen": -116.35952758789062, "logps/rejected": -270.40081787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.169061183929443, "rewards/margins": 15.203262329101562, "rewards/rejected": -20.372325897216797, "step": 4759 }, { "epoch": 8.19, "learning_rate": 4.9830004249893756e-08, "logits/chosen": -1.6502158641815186, "logits/rejected": -1.699779987335205, "logps/chosen": -186.7251739501953, "logps/rejected": -321.8427429199219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.60388469696045, "rewards/margins": 14.235774993896484, "rewards/rejected": -23.839658737182617, "step": 4760 }, { "epoch": 8.19, "learning_rate": 4.972375690607734e-08, "logits/chosen": -2.0202579498291016, "logits/rejected": -1.415449619293213, "logps/chosen": -187.3112030029297, "logps/rejected": -264.7940368652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.23125171661377, "rewards/margins": 11.604642868041992, "rewards/rejected": -19.835895538330078, "step": 4761 }, { "epoch": 8.2, "learning_rate": 4.961750956226094e-08, "logits/chosen": -1.7921204566955566, "logits/rejected": -1.6952767372131348, "logps/chosen": -170.78594970703125, "logps/rejected": -299.98779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.97530460357666, "rewards/margins": 14.179621696472168, "rewards/rejected": -22.15492820739746, "step": 4762 }, { "epoch": 8.2, "learning_rate": 4.951126221844454e-08, "logits/chosen": -1.9275888204574585, "logits/rejected": -1.83016836643219, "logps/chosen": -158.29574584960938, "logps/rejected": -296.02471923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.506266593933105, "rewards/margins": 14.828278541564941, "rewards/rejected": -23.33454704284668, "step": 4763 }, { "epoch": 8.2, "learning_rate": 4.940501487462813e-08, "logits/chosen": -1.8326244354248047, "logits/rejected": -1.931775450706482, "logps/chosen": -196.16976928710938, "logps/rejected": -349.3789978027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.61319351196289, "rewards/margins": 15.533727645874023, "rewards/rejected": -24.14691925048828, "step": 4764 }, { "epoch": 8.2, "learning_rate": 4.9298767530811724e-08, "logits/chosen": -1.8207454681396484, "logits/rejected": -1.623665690422058, "logps/chosen": -139.93389892578125, "logps/rejected": -250.8233642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.64429235458374, "rewards/margins": 12.440736770629883, "rewards/rejected": -18.08502960205078, "step": 4765 }, { "epoch": 8.2, "learning_rate": 4.919252018699532e-08, "logits/chosen": -1.9078388214111328, "logits/rejected": -1.9241490364074707, "logps/chosen": -171.07078552246094, "logps/rejected": -350.55377197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.203350067138672, "rewards/margins": 16.665224075317383, "rewards/rejected": -24.868574142456055, "step": 4766 }, { "epoch": 8.2, "learning_rate": 4.908627284317892e-08, "logits/chosen": -1.7217482328414917, "logits/rejected": -2.043952465057373, "logps/chosen": -196.61427307128906, "logps/rejected": -351.32342529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.394460678100586, "rewards/margins": 14.325758934020996, "rewards/rejected": -24.7202205657959, "step": 4767 }, { "epoch": 8.21, "learning_rate": 4.8980025499362514e-08, "logits/chosen": -1.89128577709198, "logits/rejected": -1.8040287494659424, "logps/chosen": -152.87989807128906, "logps/rejected": -301.9875183105469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.254314422607422, "rewards/margins": 14.522224426269531, "rewards/rejected": -20.776538848876953, "step": 4768 }, { "epoch": 8.21, "learning_rate": 4.8873778155546106e-08, "logits/chosen": -1.8056303262710571, "logits/rejected": -1.7910122871398926, "logps/chosen": -194.64002990722656, "logps/rejected": -353.50946044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.451936721801758, "rewards/margins": 15.606975555419922, "rewards/rejected": -26.05891227722168, "step": 4769 }, { "epoch": 8.21, "learning_rate": 4.8767530811729705e-08, "logits/chosen": -1.7970201969146729, "logits/rejected": -1.8355436325073242, "logps/chosen": -161.5448760986328, "logps/rejected": -327.2073059082031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.601116180419922, "rewards/margins": 15.432668685913086, "rewards/rejected": -23.033784866333008, "step": 4770 }, { "epoch": 8.21, "learning_rate": 4.8661283467913304e-08, "logits/chosen": -1.8361395597457886, "logits/rejected": -1.9362916946411133, "logps/chosen": -120.94945526123047, "logps/rejected": -296.84423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.89805793762207, "rewards/margins": 17.486129760742188, "rewards/rejected": -22.384187698364258, "step": 4771 }, { "epoch": 8.21, "learning_rate": 4.855503612409689e-08, "logits/chosen": -1.765061378479004, "logits/rejected": -1.751451015472412, "logps/chosen": -178.93154907226562, "logps/rejected": -309.2832946777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.24304485321045, "rewards/margins": 12.46369743347168, "rewards/rejected": -21.706743240356445, "step": 4772 }, { "epoch": 8.22, "learning_rate": 4.844878878028049e-08, "logits/chosen": -2.0629608631134033, "logits/rejected": -1.6457215547561646, "logps/chosen": -104.4233169555664, "logps/rejected": -266.2847900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4802181720733643, "rewards/margins": 17.17377471923828, "rewards/rejected": -20.65399169921875, "step": 4773 }, { "epoch": 8.22, "learning_rate": 4.834254143646409e-08, "logits/chosen": -2.1220364570617676, "logits/rejected": -1.6736763715744019, "logps/chosen": -189.19949340820312, "logps/rejected": -319.84429931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.818038940429688, "rewards/margins": 14.211685180664062, "rewards/rejected": -24.02972412109375, "step": 4774 }, { "epoch": 8.22, "learning_rate": 4.8236294092647687e-08, "logits/chosen": -1.9564130306243896, "logits/rejected": -1.7651671171188354, "logps/chosen": -187.02105712890625, "logps/rejected": -311.5360107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.932600021362305, "rewards/margins": 13.600829124450684, "rewards/rejected": -22.533428192138672, "step": 4775 }, { "epoch": 8.22, "learning_rate": 4.813004674883127e-08, "logits/chosen": -1.941577672958374, "logits/rejected": -1.5416803359985352, "logps/chosen": -242.44935607910156, "logps/rejected": -356.9060974121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -13.210370063781738, "rewards/margins": 14.725890159606934, "rewards/rejected": -27.936262130737305, "step": 4776 }, { "epoch": 8.22, "learning_rate": 4.802379940501487e-08, "logits/chosen": -1.9894036054611206, "logits/rejected": -1.7621934413909912, "logps/chosen": -196.21923828125, "logps/rejected": -297.15167236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.858877182006836, "rewards/margins": 11.215827941894531, "rewards/rejected": -21.074705123901367, "step": 4777 }, { "epoch": 8.22, "learning_rate": 4.791755206119847e-08, "logits/chosen": -1.8101096153259277, "logits/rejected": -1.9626991748809814, "logps/chosen": -179.28524780273438, "logps/rejected": -387.8158264160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.546216011047363, "rewards/margins": 19.95364761352539, "rewards/rejected": -28.499862670898438, "step": 4778 }, { "epoch": 8.23, "learning_rate": 4.781130471738206e-08, "logits/chosen": -2.001366138458252, "logits/rejected": -1.8558238744735718, "logps/chosen": -179.69813537597656, "logps/rejected": -369.2073669433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.122743606567383, "rewards/margins": 17.891263961791992, "rewards/rejected": -27.014007568359375, "step": 4779 }, { "epoch": 8.23, "learning_rate": 4.7705057373565655e-08, "logits/chosen": -1.9262081384658813, "logits/rejected": -1.7034919261932373, "logps/chosen": -132.6737518310547, "logps/rejected": -279.5105895996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.1988348960876465, "rewards/margins": 15.717145919799805, "rewards/rejected": -21.91598129272461, "step": 4780 }, { "epoch": 8.23, "learning_rate": 4.7598810029749254e-08, "logits/chosen": -1.671249508857727, "logits/rejected": -1.8100268840789795, "logps/chosen": -120.82403564453125, "logps/rejected": -307.5584411621094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.222978591918945, "rewards/margins": 17.341154098510742, "rewards/rejected": -21.564132690429688, "step": 4781 }, { "epoch": 8.23, "learning_rate": 4.749256268593285e-08, "logits/chosen": -1.7438628673553467, "logits/rejected": -2.0121383666992188, "logps/chosen": -122.1619644165039, "logps/rejected": -307.7847595214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.410221576690674, "rewards/margins": 17.374645233154297, "rewards/rejected": -21.784866333007812, "step": 4782 }, { "epoch": 8.23, "learning_rate": 4.7386315342116445e-08, "logits/chosen": -1.5770649909973145, "logits/rejected": -1.9266130924224854, "logps/chosen": -125.51081848144531, "logps/rejected": -325.40771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.640748500823975, "rewards/margins": 18.29601287841797, "rewards/rejected": -22.936763763427734, "step": 4783 }, { "epoch": 8.23, "learning_rate": 4.728006799830004e-08, "logits/chosen": -1.496078372001648, "logits/rejected": -1.9415909051895142, "logps/chosen": -136.27310180664062, "logps/rejected": -323.98712158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.450717926025391, "rewards/margins": 18.363067626953125, "rewards/rejected": -23.813785552978516, "step": 4784 }, { "epoch": 8.24, "learning_rate": 4.7173820654483636e-08, "logits/chosen": -1.869328260421753, "logits/rejected": -1.8597471714019775, "logps/chosen": -173.65931701660156, "logps/rejected": -304.48150634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.091405868530273, "rewards/margins": 12.800484657287598, "rewards/rejected": -21.891889572143555, "step": 4785 }, { "epoch": 8.24, "learning_rate": 4.7067573310667235e-08, "logits/chosen": -1.3432090282440186, "logits/rejected": -1.9821946620941162, "logps/chosen": -139.56056213378906, "logps/rejected": -435.53656005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.760436534881592, "rewards/margins": 25.682510375976562, "rewards/rejected": -32.44294738769531, "step": 4786 }, { "epoch": 8.24, "learning_rate": 4.696132596685083e-08, "logits/chosen": -2.108821392059326, "logits/rejected": -1.6390103101730347, "logps/chosen": -156.50912475585938, "logps/rejected": -302.8880615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.656323432922363, "rewards/margins": 16.293502807617188, "rewards/rejected": -22.949825286865234, "step": 4787 }, { "epoch": 8.24, "learning_rate": 4.685507862303442e-08, "logits/chosen": -1.8785443305969238, "logits/rejected": -1.5763416290283203, "logps/chosen": -186.00906372070312, "logps/rejected": -276.6023254394531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.634243965148926, "rewards/margins": 12.33375072479248, "rewards/rejected": -20.967994689941406, "step": 4788 }, { "epoch": 8.24, "learning_rate": 4.674883127921802e-08, "logits/chosen": -1.6771471500396729, "logits/rejected": -2.01397705078125, "logps/chosen": -171.1616668701172, "logps/rejected": -322.4658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.202840805053711, "rewards/margins": 13.60688591003418, "rewards/rejected": -21.80972671508789, "step": 4789 }, { "epoch": 8.24, "learning_rate": 4.664258393540161e-08, "logits/chosen": -1.91420316696167, "logits/rejected": -1.9870681762695312, "logps/chosen": -225.0290069580078, "logps/rejected": -335.18304443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -13.476938247680664, "rewards/margins": 11.739917755126953, "rewards/rejected": -25.216856002807617, "step": 4790 }, { "epoch": 8.25, "learning_rate": 4.653633659158521e-08, "logits/chosen": -1.6142807006835938, "logits/rejected": -2.030200481414795, "logps/chosen": -131.7819061279297, "logps/rejected": -288.82647705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.7434163093566895, "rewards/margins": 15.001911163330078, "rewards/rejected": -19.745328903198242, "step": 4791 }, { "epoch": 8.25, "learning_rate": 4.64300892477688e-08, "logits/chosen": -1.4309285879135132, "logits/rejected": -2.081040382385254, "logps/chosen": -120.4337387084961, "logps/rejected": -308.2380676269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.652790069580078, "rewards/margins": 16.65643310546875, "rewards/rejected": -22.30922508239746, "step": 4792 }, { "epoch": 8.25, "learning_rate": 4.63238419039524e-08, "logits/chosen": -2.098606824874878, "logits/rejected": -1.4023922681808472, "logps/chosen": -184.3343505859375, "logps/rejected": -318.726318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.956822395324707, "rewards/margins": 14.291461944580078, "rewards/rejected": -24.2482852935791, "step": 4793 }, { "epoch": 8.25, "learning_rate": 4.621759456013599e-08, "logits/chosen": -1.9414739608764648, "logits/rejected": -1.8715920448303223, "logps/chosen": -166.1519775390625, "logps/rejected": -306.1299133300781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.516090393066406, "rewards/margins": 13.211278915405273, "rewards/rejected": -19.727367401123047, "step": 4794 }, { "epoch": 8.25, "learning_rate": 4.611134721631959e-08, "logits/chosen": -1.7711223363876343, "logits/rejected": -1.7922168970108032, "logps/chosen": -150.5677490234375, "logps/rejected": -325.3515930175781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.674198150634766, "rewards/margins": 17.254518508911133, "rewards/rejected": -23.9287166595459, "step": 4795 }, { "epoch": 8.25, "learning_rate": 4.6005099872503184e-08, "logits/chosen": -1.7983276844024658, "logits/rejected": -1.6840591430664062, "logps/chosen": -190.29635620117188, "logps/rejected": -327.58746337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.015918731689453, "rewards/margins": 13.970245361328125, "rewards/rejected": -22.986164093017578, "step": 4796 }, { "epoch": 8.26, "learning_rate": 4.589885252868678e-08, "logits/chosen": -1.9194936752319336, "logits/rejected": -1.9204953908920288, "logps/chosen": -141.07363891601562, "logps/rejected": -300.5163269042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.59784460067749, "rewards/margins": 17.116207122802734, "rewards/rejected": -22.714052200317383, "step": 4797 }, { "epoch": 8.26, "learning_rate": 4.5792605184870375e-08, "logits/chosen": -1.6943278312683105, "logits/rejected": -1.9668817520141602, "logps/chosen": -150.570068359375, "logps/rejected": -326.00555419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0332794189453125, "rewards/margins": 14.871310234069824, "rewards/rejected": -21.904590606689453, "step": 4798 }, { "epoch": 8.26, "learning_rate": 4.5686357841053974e-08, "logits/chosen": -1.9535455703735352, "logits/rejected": -1.7849152088165283, "logps/chosen": -132.81997680664062, "logps/rejected": -277.7296142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.875604629516602, "rewards/margins": 14.740671157836914, "rewards/rejected": -20.61627769470215, "step": 4799 }, { "epoch": 8.26, "learning_rate": 4.5580110497237566e-08, "logits/chosen": -1.916350245475769, "logits/rejected": -1.6015524864196777, "logps/chosen": -149.005615234375, "logps/rejected": -259.67254638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.025305271148682, "rewards/margins": 12.694704055786133, "rewards/rejected": -18.72001075744629, "step": 4800 }, { "epoch": 8.26, "learning_rate": 4.5473863153421165e-08, "logits/chosen": -2.006455183029175, "logits/rejected": -1.9009255170822144, "logps/chosen": -213.595947265625, "logps/rejected": -322.5189514160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.405583381652832, "rewards/margins": 12.4099760055542, "rewards/rejected": -22.81555938720703, "step": 4801 }, { "epoch": 8.27, "learning_rate": 4.536761580960476e-08, "logits/chosen": -2.01578426361084, "logits/rejected": -1.5886852741241455, "logps/chosen": -201.491943359375, "logps/rejected": -304.6870422363281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.811511993408203, "rewards/margins": 13.219588279724121, "rewards/rejected": -22.03110122680664, "step": 4802 }, { "epoch": 8.27, "learning_rate": 4.5261368465788357e-08, "logits/chosen": -1.8635329008102417, "logits/rejected": -1.7622573375701904, "logps/chosen": -175.99050903320312, "logps/rejected": -324.2584533691406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.361738204956055, "rewards/margins": 15.834674835205078, "rewards/rejected": -23.196413040161133, "step": 4803 }, { "epoch": 8.27, "learning_rate": 4.515512112197195e-08, "logits/chosen": -2.013644218444824, "logits/rejected": -1.9405428171157837, "logps/chosen": -143.94436645507812, "logps/rejected": -318.64166259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.341655731201172, "rewards/margins": 17.160869598388672, "rewards/rejected": -22.502525329589844, "step": 4804 }, { "epoch": 8.27, "learning_rate": 4.504887377815554e-08, "logits/chosen": -1.7929961681365967, "logits/rejected": -1.88919997215271, "logps/chosen": -123.19530487060547, "logps/rejected": -271.7454833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.504166603088379, "rewards/margins": 15.244546890258789, "rewards/rejected": -20.74871253967285, "step": 4805 }, { "epoch": 8.27, "learning_rate": 4.494262643433914e-08, "logits/chosen": -1.574678897857666, "logits/rejected": -1.7416495084762573, "logps/chosen": -159.3974151611328, "logps/rejected": -301.6946105957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.382993698120117, "rewards/margins": 14.578408241271973, "rewards/rejected": -21.961400985717773, "step": 4806 }, { "epoch": 8.27, "learning_rate": 4.483637909052274e-08, "logits/chosen": -1.893056869506836, "logits/rejected": -1.8550517559051514, "logps/chosen": -129.19850158691406, "logps/rejected": -323.0723876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.799675941467285, "rewards/margins": 18.697124481201172, "rewards/rejected": -23.496801376342773, "step": 4807 }, { "epoch": 8.28, "learning_rate": 4.473013174670633e-08, "logits/chosen": -1.8983806371688843, "logits/rejected": -1.8068873882293701, "logps/chosen": -185.0101318359375, "logps/rejected": -343.3457946777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.6821870803833, "rewards/margins": 14.707388877868652, "rewards/rejected": -23.389575958251953, "step": 4808 }, { "epoch": 8.28, "learning_rate": 4.4623884402889924e-08, "logits/chosen": -2.045342445373535, "logits/rejected": -1.6618810892105103, "logps/chosen": -153.7298126220703, "logps/rejected": -269.7672119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.1071624755859375, "rewards/margins": 12.327398300170898, "rewards/rejected": -18.434560775756836, "step": 4809 }, { "epoch": 8.28, "learning_rate": 4.451763705907352e-08, "logits/chosen": -1.7846665382385254, "logits/rejected": -2.0732290744781494, "logps/chosen": -140.3489532470703, "logps/rejected": -300.21832275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.4369025230407715, "rewards/margins": 14.814473152160645, "rewards/rejected": -21.251375198364258, "step": 4810 }, { "epoch": 8.28, "learning_rate": 4.4411389715257115e-08, "logits/chosen": -1.7197563648223877, "logits/rejected": -1.902958869934082, "logps/chosen": -169.9974822998047, "logps/rejected": -336.5558166503906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0395026206970215, "rewards/margins": 16.1527099609375, "rewards/rejected": -22.192214965820312, "step": 4811 }, { "epoch": 8.28, "learning_rate": 4.4305142371440714e-08, "logits/chosen": -1.4417251348495483, "logits/rejected": -1.7986198663711548, "logps/chosen": -134.97763061523438, "logps/rejected": -310.95953369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.695240497589111, "rewards/margins": 16.687772750854492, "rewards/rejected": -22.383014678955078, "step": 4812 }, { "epoch": 8.28, "learning_rate": 4.4198895027624306e-08, "logits/chosen": -2.1128973960876465, "logits/rejected": -1.7233141660690308, "logps/chosen": -129.7766876220703, "logps/rejected": -286.345458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.635874271392822, "rewards/margins": 16.43390655517578, "rewards/rejected": -21.069780349731445, "step": 4813 }, { "epoch": 8.29, "learning_rate": 4.4092647683807905e-08, "logits/chosen": -1.7508251667022705, "logits/rejected": -1.8173307180404663, "logps/chosen": -95.19760131835938, "logps/rejected": -230.79237365722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.879261016845703, "rewards/margins": 13.16567325592041, "rewards/rejected": -17.044933319091797, "step": 4814 }, { "epoch": 8.29, "learning_rate": 4.39864003399915e-08, "logits/chosen": -1.7830380201339722, "logits/rejected": -2.1311044692993164, "logps/chosen": -135.1476287841797, "logps/rejected": -343.11358642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.71760368347168, "rewards/margins": 18.90782928466797, "rewards/rejected": -24.62543487548828, "step": 4815 }, { "epoch": 8.29, "learning_rate": 4.388015299617509e-08, "logits/chosen": -1.5497517585754395, "logits/rejected": -1.974406361579895, "logps/chosen": -102.97321319580078, "logps/rejected": -345.67938232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.017583847045898, "rewards/margins": 21.655420303344727, "rewards/rejected": -25.673004150390625, "step": 4816 }, { "epoch": 8.29, "learning_rate": 4.377390565235869e-08, "logits/chosen": -1.9376755952835083, "logits/rejected": -1.715930461883545, "logps/chosen": -156.3521270751953, "logps/rejected": -308.5747985839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.102875709533691, "rewards/margins": 14.277874946594238, "rewards/rejected": -21.38075065612793, "step": 4817 }, { "epoch": 8.29, "learning_rate": 4.366765830854229e-08, "logits/chosen": -1.8649396896362305, "logits/rejected": -1.907848834991455, "logps/chosen": -188.75155639648438, "logps/rejected": -356.02874755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.434463500976562, "rewards/margins": 17.8226318359375, "rewards/rejected": -26.257095336914062, "step": 4818 }, { "epoch": 8.29, "learning_rate": 4.356141096472588e-08, "logits/chosen": -1.5530552864074707, "logits/rejected": -1.9113562107086182, "logps/chosen": -135.51100158691406, "logps/rejected": -304.38360595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.1174635887146, "rewards/margins": 15.665132522583008, "rewards/rejected": -21.782596588134766, "step": 4819 }, { "epoch": 8.3, "learning_rate": 4.345516362090947e-08, "logits/chosen": -1.5916221141815186, "logits/rejected": -2.0162124633789062, "logps/chosen": -118.93663787841797, "logps/rejected": -321.02801513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.679821968078613, "rewards/margins": 17.905986785888672, "rewards/rejected": -22.5858097076416, "step": 4820 }, { "epoch": 8.3, "learning_rate": 4.334891627709307e-08, "logits/chosen": -2.0675652027130127, "logits/rejected": -1.9145622253417969, "logps/chosen": -146.98336791992188, "logps/rejected": -299.32366943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.287542343139648, "rewards/margins": 16.530811309814453, "rewards/rejected": -22.818355560302734, "step": 4821 }, { "epoch": 8.3, "learning_rate": 4.324266893327667e-08, "logits/chosen": -2.0740857124328613, "logits/rejected": -1.677966833114624, "logps/chosen": -133.12283325195312, "logps/rejected": -308.8773193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.949088096618652, "rewards/margins": 18.29330825805664, "rewards/rejected": -23.242393493652344, "step": 4822 }, { "epoch": 8.3, "learning_rate": 4.313642158946026e-08, "logits/chosen": -1.6868890523910522, "logits/rejected": -1.857203483581543, "logps/chosen": -159.66519165039062, "logps/rejected": -331.48065185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.234335899353027, "rewards/margins": 15.503570556640625, "rewards/rejected": -23.73790740966797, "step": 4823 }, { "epoch": 8.3, "learning_rate": 4.3030174245643854e-08, "logits/chosen": -1.7678704261779785, "logits/rejected": -1.7551312446594238, "logps/chosen": -180.2554931640625, "logps/rejected": -300.59283447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.56248950958252, "rewards/margins": 12.341657638549805, "rewards/rejected": -20.90414810180664, "step": 4824 }, { "epoch": 8.3, "learning_rate": 4.292392690182745e-08, "logits/chosen": -1.9628024101257324, "logits/rejected": -1.5307319164276123, "logps/chosen": -158.26620483398438, "logps/rejected": -275.5514831542969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.677222728729248, "rewards/margins": 13.75545883178711, "rewards/rejected": -20.432682037353516, "step": 4825 }, { "epoch": 8.31, "learning_rate": 4.281767955801105e-08, "logits/chosen": -1.5291246175765991, "logits/rejected": -1.9126077890396118, "logps/chosen": -173.56396484375, "logps/rejected": -321.86029052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.068061828613281, "rewards/margins": 14.6929292678833, "rewards/rejected": -24.7609920501709, "step": 4826 }, { "epoch": 8.31, "learning_rate": 4.271143221419464e-08, "logits/chosen": -1.9765840768814087, "logits/rejected": -1.895986557006836, "logps/chosen": -193.80075073242188, "logps/rejected": -344.63677978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.736424446105957, "rewards/margins": 16.35463523864746, "rewards/rejected": -25.091060638427734, "step": 4827 }, { "epoch": 8.31, "learning_rate": 4.2605184870378236e-08, "logits/chosen": -1.8344849348068237, "logits/rejected": -2.025367259979248, "logps/chosen": -163.30490112304688, "logps/rejected": -316.89544677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.370616912841797, "rewards/margins": 15.235777854919434, "rewards/rejected": -22.606395721435547, "step": 4828 }, { "epoch": 8.31, "learning_rate": 4.2498937526561835e-08, "logits/chosen": -1.871334195137024, "logits/rejected": -1.612219214439392, "logps/chosen": -149.50550842285156, "logps/rejected": -289.229248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.530021667480469, "rewards/margins": 16.02224349975586, "rewards/rejected": -23.552265167236328, "step": 4829 }, { "epoch": 8.31, "learning_rate": 4.2392690182745434e-08, "logits/chosen": -1.9316504001617432, "logits/rejected": -2.0661327838897705, "logps/chosen": -124.79010772705078, "logps/rejected": -306.38818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.067470550537109, "rewards/margins": 17.15703773498535, "rewards/rejected": -22.224510192871094, "step": 4830 }, { "epoch": 8.31, "learning_rate": 4.228644283892902e-08, "logits/chosen": -1.7340061664581299, "logits/rejected": -1.8934617042541504, "logps/chosen": -168.14566040039062, "logps/rejected": -363.0298156738281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.135895729064941, "rewards/margins": 17.551036834716797, "rewards/rejected": -27.686931610107422, "step": 4831 }, { "epoch": 8.32, "learning_rate": 4.218019549511262e-08, "logits/chosen": -1.7885832786560059, "logits/rejected": -1.9828094244003296, "logps/chosen": -92.21147918701172, "logps/rejected": -260.8064270019531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0426533222198486, "rewards/margins": 16.444293975830078, "rewards/rejected": -18.486949920654297, "step": 4832 }, { "epoch": 8.32, "learning_rate": 4.207394815129622e-08, "logits/chosen": -1.808594822883606, "logits/rejected": -1.906050443649292, "logps/chosen": -191.61570739746094, "logps/rejected": -348.5611572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.94901180267334, "rewards/margins": 16.04139518737793, "rewards/rejected": -24.990407943725586, "step": 4833 }, { "epoch": 8.32, "learning_rate": 4.1967700807479817e-08, "logits/chosen": -1.8036706447601318, "logits/rejected": -1.923731803894043, "logps/chosen": -130.00311279296875, "logps/rejected": -269.382568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.506714344024658, "rewards/margins": 13.66440486907959, "rewards/rejected": -20.171119689941406, "step": 4834 }, { "epoch": 8.32, "learning_rate": 4.18614534636634e-08, "logits/chosen": -1.8900327682495117, "logits/rejected": -1.4744127988815308, "logps/chosen": -173.91818237304688, "logps/rejected": -327.62518310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.7672953605651855, "rewards/margins": 17.761350631713867, "rewards/rejected": -24.52864646911621, "step": 4835 }, { "epoch": 8.32, "learning_rate": 4.1755206119847e-08, "logits/chosen": -1.9098179340362549, "logits/rejected": -1.8693187236785889, "logps/chosen": -162.77670288085938, "logps/rejected": -295.32244873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.419007301330566, "rewards/margins": 13.5962553024292, "rewards/rejected": -21.015262603759766, "step": 4836 }, { "epoch": 8.33, "learning_rate": 4.16489587760306e-08, "logits/chosen": -2.1048779487609863, "logits/rejected": -1.4992543458938599, "logps/chosen": -153.7703094482422, "logps/rejected": -308.80364990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.069215297698975, "rewards/margins": 16.155744552612305, "rewards/rejected": -22.224958419799805, "step": 4837 }, { "epoch": 8.33, "learning_rate": 4.15427114322142e-08, "logits/chosen": -1.9376012086868286, "logits/rejected": -2.0343732833862305, "logps/chosen": -143.6813507080078, "logps/rejected": -313.4956970214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.064949989318848, "rewards/margins": 16.78714942932129, "rewards/rejected": -22.852100372314453, "step": 4838 }, { "epoch": 8.33, "learning_rate": 4.1436464088397785e-08, "logits/chosen": -1.7058182954788208, "logits/rejected": -1.939644455909729, "logps/chosen": -160.03591918945312, "logps/rejected": -344.2312927246094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.15390396118164, "rewards/margins": 17.580352783203125, "rewards/rejected": -25.734256744384766, "step": 4839 }, { "epoch": 8.33, "learning_rate": 4.1330216744581384e-08, "logits/chosen": -2.1648106575012207, "logits/rejected": -1.7943310737609863, "logps/chosen": -189.05960083007812, "logps/rejected": -283.35211181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.642396926879883, "rewards/margins": 11.627159118652344, "rewards/rejected": -20.269556045532227, "step": 4840 }, { "epoch": 8.33, "learning_rate": 4.122396940076498e-08, "logits/chosen": -1.7615702152252197, "logits/rejected": -1.8988574743270874, "logps/chosen": -132.3600311279297, "logps/rejected": -319.451904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.835537910461426, "rewards/margins": 17.5824031829834, "rewards/rejected": -24.41794204711914, "step": 4841 }, { "epoch": 8.33, "learning_rate": 4.111772205694857e-08, "logits/chosen": -2.099377155303955, "logits/rejected": -1.814096212387085, "logps/chosen": -185.95469665527344, "logps/rejected": -378.39373779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.054468154907227, "rewards/margins": 21.404743194580078, "rewards/rejected": -29.459209442138672, "step": 4842 }, { "epoch": 8.34, "learning_rate": 4.101147471313217e-08, "logits/chosen": -1.6241638660430908, "logits/rejected": -1.8529369831085205, "logps/chosen": -128.02783203125, "logps/rejected": -292.79669189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.864172458648682, "rewards/margins": 14.892866134643555, "rewards/rejected": -20.75704002380371, "step": 4843 }, { "epoch": 8.34, "learning_rate": 4.0905227369315766e-08, "logits/chosen": -1.5434998273849487, "logits/rejected": -1.9019861221313477, "logps/chosen": -111.57665252685547, "logps/rejected": -298.2513427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.883707523345947, "rewards/margins": 17.171730041503906, "rewards/rejected": -22.055437088012695, "step": 4844 }, { "epoch": 8.34, "learning_rate": 4.0798980025499365e-08, "logits/chosen": -1.8174192905426025, "logits/rejected": -2.0089287757873535, "logps/chosen": -196.41229248046875, "logps/rejected": -344.4886474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.26072883605957, "rewards/margins": 13.73288345336914, "rewards/rejected": -24.99361228942871, "step": 4845 }, { "epoch": 8.34, "learning_rate": 4.069273268168295e-08, "logits/chosen": -1.6119226217269897, "logits/rejected": -1.841200828552246, "logps/chosen": -153.53094482421875, "logps/rejected": -347.55670166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.696841716766357, "rewards/margins": 18.035905838012695, "rewards/rejected": -24.732746124267578, "step": 4846 }, { "epoch": 8.34, "learning_rate": 4.058648533786655e-08, "logits/chosen": -1.463594913482666, "logits/rejected": -1.9428468942642212, "logps/chosen": -135.6414031982422, "logps/rejected": -331.228271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.920746326446533, "rewards/margins": 17.626028060913086, "rewards/rejected": -23.546775817871094, "step": 4847 }, { "epoch": 8.34, "learning_rate": 4.048023799405015e-08, "logits/chosen": -1.5357037782669067, "logits/rejected": -1.7623854875564575, "logps/chosen": -126.74220275878906, "logps/rejected": -295.0667724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.916921138763428, "rewards/margins": 16.57024383544922, "rewards/rejected": -22.487163543701172, "step": 4848 }, { "epoch": 8.35, "learning_rate": 4.037399065023375e-08, "logits/chosen": -1.2961876392364502, "logits/rejected": -1.9743845462799072, "logps/chosen": -123.0804443359375, "logps/rejected": -330.69970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.729852676391602, "rewards/margins": 18.407331466674805, "rewards/rejected": -23.137184143066406, "step": 4849 }, { "epoch": 8.35, "learning_rate": 4.026774330641733e-08, "logits/chosen": -2.053586006164551, "logits/rejected": -1.6585484743118286, "logps/chosen": -139.33973693847656, "logps/rejected": -259.8658447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.702310085296631, "rewards/margins": 14.20499038696289, "rewards/rejected": -18.90730094909668, "step": 4850 }, { "epoch": 8.35, "learning_rate": 4.016149596260093e-08, "logits/chosen": -1.9040679931640625, "logits/rejected": -2.081843137741089, "logps/chosen": -156.21881103515625, "logps/rejected": -300.8942565917969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.2160491943359375, "rewards/margins": 13.862283706665039, "rewards/rejected": -20.078332901000977, "step": 4851 }, { "epoch": 8.35, "learning_rate": 4.005524861878453e-08, "logits/chosen": -1.8328883647918701, "logits/rejected": -2.157289981842041, "logps/chosen": -131.5736083984375, "logps/rejected": -296.7806396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4974164962768555, "rewards/margins": 13.833561897277832, "rewards/rejected": -19.330978393554688, "step": 4852 }, { "epoch": 8.35, "learning_rate": 3.994900127496812e-08, "logits/chosen": -2.0571250915527344, "logits/rejected": -1.4566906690597534, "logps/chosen": -166.51956176757812, "logps/rejected": -290.14251708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.566566467285156, "rewards/margins": 14.676225662231445, "rewards/rejected": -21.2427921295166, "step": 4853 }, { "epoch": 8.35, "learning_rate": 3.9842753931151715e-08, "logits/chosen": -1.677424669265747, "logits/rejected": -1.8664987087249756, "logps/chosen": -143.616455078125, "logps/rejected": -340.842041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.2818403244018555, "rewards/margins": 18.905920028686523, "rewards/rejected": -25.187761306762695, "step": 4854 }, { "epoch": 8.36, "learning_rate": 3.9736506587335314e-08, "logits/chosen": -2.138324737548828, "logits/rejected": -2.0389108657836914, "logps/chosen": -219.62014770507812, "logps/rejected": -353.2795104980469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.800301551818848, "rewards/margins": 14.349130630493164, "rewards/rejected": -24.149433135986328, "step": 4855 }, { "epoch": 8.36, "learning_rate": 3.963025924351891e-08, "logits/chosen": -2.0833041667938232, "logits/rejected": -1.9035987854003906, "logps/chosen": -136.1008758544922, "logps/rejected": -260.4047546386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.987722873687744, "rewards/margins": 13.978981018066406, "rewards/rejected": -18.966705322265625, "step": 4856 }, { "epoch": 8.36, "learning_rate": 3.9524011899702505e-08, "logits/chosen": -1.591279149055481, "logits/rejected": -2.0517587661743164, "logps/chosen": -137.62722778320312, "logps/rejected": -318.13079833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.802760124206543, "rewards/margins": 15.44301986694336, "rewards/rejected": -21.24578094482422, "step": 4857 }, { "epoch": 8.36, "learning_rate": 3.94177645558861e-08, "logits/chosen": -1.831571340560913, "logits/rejected": -1.5926728248596191, "logps/chosen": -129.5517120361328, "logps/rejected": -303.0274658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.4051666259765625, "rewards/margins": 17.466209411621094, "rewards/rejected": -21.87137794494629, "step": 4858 }, { "epoch": 8.36, "learning_rate": 3.9311517212069697e-08, "logits/chosen": -1.8470861911773682, "logits/rejected": -1.7275035381317139, "logps/chosen": -171.15711975097656, "logps/rejected": -322.490966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.285144329071045, "rewards/margins": 16.869407653808594, "rewards/rejected": -24.154552459716797, "step": 4859 }, { "epoch": 8.36, "learning_rate": 3.9205269868253295e-08, "logits/chosen": -1.302780270576477, "logits/rejected": -1.9730336666107178, "logps/chosen": -136.6561279296875, "logps/rejected": -343.58343505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.913298606872559, "rewards/margins": 17.27830696105957, "rewards/rejected": -24.191604614257812, "step": 4860 }, { "epoch": 8.37, "learning_rate": 3.909902252443689e-08, "logits/chosen": -1.8744630813598633, "logits/rejected": -1.7050566673278809, "logps/chosen": -165.930908203125, "logps/rejected": -290.3241271972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.3677215576171875, "rewards/margins": 13.965702056884766, "rewards/rejected": -21.333423614501953, "step": 4861 }, { "epoch": 8.37, "learning_rate": 3.899277518062048e-08, "logits/chosen": -1.862102746963501, "logits/rejected": -1.8036776781082153, "logps/chosen": -183.0077667236328, "logps/rejected": -321.1595764160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.9575605392456055, "rewards/margins": 14.12977409362793, "rewards/rejected": -22.08733558654785, "step": 4862 }, { "epoch": 8.37, "learning_rate": 3.888652783680408e-08, "logits/chosen": -1.5608636140823364, "logits/rejected": -1.8753376007080078, "logps/chosen": -163.46063232421875, "logps/rejected": -361.85357666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.042827606201172, "rewards/margins": 19.63478660583496, "rewards/rejected": -27.677614212036133, "step": 4863 }, { "epoch": 8.37, "learning_rate": 3.878028049298768e-08, "logits/chosen": -1.858177900314331, "logits/rejected": -1.763150691986084, "logps/chosen": -159.0519561767578, "logps/rejected": -290.8316650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.726055145263672, "rewards/margins": 14.734183311462402, "rewards/rejected": -20.46023941040039, "step": 4864 }, { "epoch": 8.37, "learning_rate": 3.867403314917127e-08, "logits/chosen": -1.5757274627685547, "logits/rejected": -2.0826938152313232, "logps/chosen": -154.88734436035156, "logps/rejected": -343.82904052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.054780960083008, "rewards/margins": 15.723285675048828, "rewards/rejected": -22.778066635131836, "step": 4865 }, { "epoch": 8.38, "learning_rate": 3.856778580535486e-08, "logits/chosen": -1.629562258720398, "logits/rejected": -1.9351366758346558, "logps/chosen": -124.89881134033203, "logps/rejected": -285.0441589355469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.353700637817383, "rewards/margins": 15.412161827087402, "rewards/rejected": -19.76586151123047, "step": 4866 }, { "epoch": 8.38, "learning_rate": 3.846153846153846e-08, "logits/chosen": -1.5993291139602661, "logits/rejected": -2.0682501792907715, "logps/chosen": -153.18939208984375, "logps/rejected": -292.27679443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.34764289855957, "rewards/margins": 12.711777687072754, "rewards/rejected": -20.05942153930664, "step": 4867 }, { "epoch": 8.38, "learning_rate": 3.8355291117722054e-08, "logits/chosen": -1.7478444576263428, "logits/rejected": -1.5353134870529175, "logps/chosen": -166.6563262939453, "logps/rejected": -318.99835205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.40806770324707, "rewards/margins": 16.330537796020508, "rewards/rejected": -23.738605499267578, "step": 4868 }, { "epoch": 8.38, "learning_rate": 3.824904377390565e-08, "logits/chosen": -1.7314534187316895, "logits/rejected": -2.0360686779022217, "logps/chosen": -146.66525268554688, "logps/rejected": -356.1452331542969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.433343410491943, "rewards/margins": 19.274595260620117, "rewards/rejected": -25.70793914794922, "step": 4869 }, { "epoch": 8.38, "learning_rate": 3.8142796430089245e-08, "logits/chosen": -1.6588361263275146, "logits/rejected": -1.9507951736450195, "logps/chosen": -190.46144104003906, "logps/rejected": -369.8912353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.780001640319824, "rewards/margins": 16.71697998046875, "rewards/rejected": -28.49698257446289, "step": 4870 }, { "epoch": 8.38, "learning_rate": 3.8036549086272844e-08, "logits/chosen": -1.6599551439285278, "logits/rejected": -1.9651249647140503, "logps/chosen": -118.02894592285156, "logps/rejected": -291.075927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9810516834259033, "rewards/margins": 17.05697250366211, "rewards/rejected": -20.03802490234375, "step": 4871 }, { "epoch": 8.39, "learning_rate": 3.7930301742456436e-08, "logits/chosen": -1.8969093561172485, "logits/rejected": -2.00290584564209, "logps/chosen": -175.83401489257812, "logps/rejected": -336.03985595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.336626052856445, "rewards/margins": 15.236108779907227, "rewards/rejected": -24.572736740112305, "step": 4872 }, { "epoch": 8.39, "learning_rate": 3.7824054398640035e-08, "logits/chosen": -1.8331339359283447, "logits/rejected": -1.8643851280212402, "logps/chosen": -119.32376861572266, "logps/rejected": -293.94561767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.874400615692139, "rewards/margins": 17.412839889526367, "rewards/rejected": -22.28723907470703, "step": 4873 }, { "epoch": 8.39, "learning_rate": 3.771780705482363e-08, "logits/chosen": -1.5829415321350098, "logits/rejected": -1.8148218393325806, "logps/chosen": -142.41864013671875, "logps/rejected": -346.17626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.621354103088379, "rewards/margins": 19.2730770111084, "rewards/rejected": -25.89443016052246, "step": 4874 }, { "epoch": 8.39, "learning_rate": 3.7611559711007226e-08, "logits/chosen": -1.5919334888458252, "logits/rejected": -1.8122687339782715, "logps/chosen": -170.6305389404297, "logps/rejected": -329.40216064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.591804504394531, "rewards/margins": 14.5516357421875, "rewards/rejected": -22.14344024658203, "step": 4875 }, { "epoch": 8.39, "learning_rate": 3.750531236719082e-08, "logits/chosen": -1.7530202865600586, "logits/rejected": -1.8118276596069336, "logps/chosen": -139.78746032714844, "logps/rejected": -355.4735107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.295969009399414, "rewards/margins": 20.641281127929688, "rewards/rejected": -26.9372501373291, "step": 4876 }, { "epoch": 8.39, "learning_rate": 3.739906502337442e-08, "logits/chosen": -1.7625696659088135, "logits/rejected": -1.8514115810394287, "logps/chosen": -151.6070556640625, "logps/rejected": -348.82220458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.151309490203857, "rewards/margins": 17.910762786865234, "rewards/rejected": -25.06207275390625, "step": 4877 }, { "epoch": 8.4, "learning_rate": 3.729281767955801e-08, "logits/chosen": -1.8176288604736328, "logits/rejected": -2.0431272983551025, "logps/chosen": -156.16015625, "logps/rejected": -277.8922424316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2751593589782715, "rewards/margins": 11.082347869873047, "rewards/rejected": -18.357507705688477, "step": 4878 }, { "epoch": 8.4, "learning_rate": 3.71865703357416e-08, "logits/chosen": -1.760197639465332, "logits/rejected": -1.837920904159546, "logps/chosen": -126.84651184082031, "logps/rejected": -291.32391357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.05151891708374, "rewards/margins": 14.61778736114502, "rewards/rejected": -19.6693058013916, "step": 4879 }, { "epoch": 8.4, "learning_rate": 3.70803229919252e-08, "logits/chosen": -1.8262604475021362, "logits/rejected": -1.9373695850372314, "logps/chosen": -187.80032348632812, "logps/rejected": -339.005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.69635009765625, "rewards/margins": 15.284745216369629, "rewards/rejected": -24.981094360351562, "step": 4880 }, { "epoch": 8.4, "learning_rate": 3.697407564810879e-08, "logits/chosen": -1.8515095710754395, "logits/rejected": -1.9196354150772095, "logps/chosen": -165.74432373046875, "logps/rejected": -343.5675048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.75792121887207, "rewards/margins": 16.603031158447266, "rewards/rejected": -25.360952377319336, "step": 4881 }, { "epoch": 8.4, "learning_rate": 3.686782830429239e-08, "logits/chosen": -1.8696658611297607, "logits/rejected": -2.026304244995117, "logps/chosen": -151.39572143554688, "logps/rejected": -349.3587646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.798278331756592, "rewards/margins": 18.239513397216797, "rewards/rejected": -25.03778839111328, "step": 4882 }, { "epoch": 8.4, "learning_rate": 3.6761580960475984e-08, "logits/chosen": -1.7876136302947998, "logits/rejected": -1.9818367958068848, "logps/chosen": -182.04849243164062, "logps/rejected": -373.81011962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.86269760131836, "rewards/margins": 17.601640701293945, "rewards/rejected": -27.464336395263672, "step": 4883 }, { "epoch": 8.41, "learning_rate": 3.665533361665958e-08, "logits/chosen": -1.810572862625122, "logits/rejected": -1.5551270246505737, "logps/chosen": -149.89224243164062, "logps/rejected": -298.23138427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.146862506866455, "rewards/margins": 16.304546356201172, "rewards/rejected": -21.45140838623047, "step": 4884 }, { "epoch": 8.41, "learning_rate": 3.6549086272843175e-08, "logits/chosen": -1.3669729232788086, "logits/rejected": -2.000943660736084, "logps/chosen": -149.74989318847656, "logps/rejected": -357.56988525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.717352867126465, "rewards/margins": 17.689163208007812, "rewards/rejected": -25.40651512145996, "step": 4885 }, { "epoch": 8.41, "learning_rate": 3.6442838929026774e-08, "logits/chosen": -1.683617115020752, "logits/rejected": -1.9557056427001953, "logps/chosen": -143.353271484375, "logps/rejected": -351.8175048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.739733695983887, "rewards/margins": 18.59524154663086, "rewards/rejected": -25.334976196289062, "step": 4886 }, { "epoch": 8.41, "learning_rate": 3.6336591585210367e-08, "logits/chosen": -1.94373619556427, "logits/rejected": -1.9935791492462158, "logps/chosen": -159.5572967529297, "logps/rejected": -287.07391357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0348286628723145, "rewards/margins": 12.696927070617676, "rewards/rejected": -18.73175621032715, "step": 4887 }, { "epoch": 8.41, "learning_rate": 3.6230344241393965e-08, "logits/chosen": -1.24277925491333, "logits/rejected": -2.03301739692688, "logps/chosen": -149.07069396972656, "logps/rejected": -384.31072998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.782977104187012, "rewards/margins": 20.06110382080078, "rewards/rejected": -27.844079971313477, "step": 4888 }, { "epoch": 8.41, "learning_rate": 3.612409689757756e-08, "logits/chosen": -1.522579312324524, "logits/rejected": -1.894476294517517, "logps/chosen": -130.53274536132812, "logps/rejected": -251.24441528320312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.126046180725098, "rewards/margins": 11.957315444946289, "rewards/rejected": -18.08336067199707, "step": 4889 }, { "epoch": 8.42, "learning_rate": 3.601784955376115e-08, "logits/chosen": -1.7600674629211426, "logits/rejected": -1.8362752199172974, "logps/chosen": -166.9957733154297, "logps/rejected": -331.43902587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.795251846313477, "rewards/margins": 16.917505264282227, "rewards/rejected": -23.712759017944336, "step": 4890 }, { "epoch": 8.42, "learning_rate": 3.591160220994475e-08, "logits/chosen": -1.6811052560806274, "logits/rejected": -1.5749911069869995, "logps/chosen": -170.8709716796875, "logps/rejected": -301.49993896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.007558822631836, "rewards/margins": 14.573741912841797, "rewards/rejected": -21.581300735473633, "step": 4891 }, { "epoch": 8.42, "learning_rate": 3.580535486612835e-08, "logits/chosen": -1.6197843551635742, "logits/rejected": -1.9285199642181396, "logps/chosen": -167.64414978027344, "logps/rejected": -370.57403564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.294816017150879, "rewards/margins": 20.37848663330078, "rewards/rejected": -27.673301696777344, "step": 4892 }, { "epoch": 8.42, "learning_rate": 3.569910752231194e-08, "logits/chosen": -1.7810440063476562, "logits/rejected": -1.8713126182556152, "logps/chosen": -125.99484252929688, "logps/rejected": -300.789306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.874104022979736, "rewards/margins": 14.881210327148438, "rewards/rejected": -20.75531578063965, "step": 4893 }, { "epoch": 8.42, "learning_rate": 3.559286017849553e-08, "logits/chosen": -1.958925724029541, "logits/rejected": -1.647747278213501, "logps/chosen": -150.46405029296875, "logps/rejected": -267.0911865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0859174728393555, "rewards/margins": 12.76388931274414, "rewards/rejected": -18.84980583190918, "step": 4894 }, { "epoch": 8.43, "learning_rate": 3.548661283467913e-08, "logits/chosen": -1.7954485416412354, "logits/rejected": -1.9567029476165771, "logps/chosen": -139.130126953125, "logps/rejected": -256.3779602050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.512520790100098, "rewards/margins": 11.567784309387207, "rewards/rejected": -18.080305099487305, "step": 4895 }, { "epoch": 8.43, "learning_rate": 3.538036549086273e-08, "logits/chosen": -1.7810704708099365, "logits/rejected": -1.5804331302642822, "logps/chosen": -173.64508056640625, "logps/rejected": -328.734619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.797683715820312, "rewards/margins": 14.13475513458252, "rewards/rejected": -22.932437896728516, "step": 4896 }, { "epoch": 8.43, "learning_rate": 3.527411814704632e-08, "logits/chosen": -1.7498856782913208, "logits/rejected": -1.6231476068496704, "logps/chosen": -156.55654907226562, "logps/rejected": -292.51397705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.152958869934082, "rewards/margins": 13.770177841186523, "rewards/rejected": -21.923137664794922, "step": 4897 }, { "epoch": 8.43, "learning_rate": 3.5167870803229915e-08, "logits/chosen": -2.0144424438476562, "logits/rejected": -1.7215259075164795, "logps/chosen": -110.13787841796875, "logps/rejected": -280.1069030761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.069681167602539, "rewards/margins": 17.77012825012207, "rewards/rejected": -20.83980941772461, "step": 4898 }, { "epoch": 8.43, "learning_rate": 3.5061623459413514e-08, "logits/chosen": -1.855224609375, "logits/rejected": -1.9046964645385742, "logps/chosen": -183.1436309814453, "logps/rejected": -314.30487060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.041312217712402, "rewards/margins": 13.734101295471191, "rewards/rejected": -21.775413513183594, "step": 4899 }, { "epoch": 8.43, "learning_rate": 3.495537611559711e-08, "logits/chosen": -1.9940630197525024, "logits/rejected": -1.8015499114990234, "logps/chosen": -154.18263244628906, "logps/rejected": -303.0071716308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.562472343444824, "rewards/margins": 14.65725040435791, "rewards/rejected": -21.219722747802734, "step": 4900 }, { "epoch": 8.44, "learning_rate": 3.4849128771780705e-08, "logits/chosen": -1.8134762048721313, "logits/rejected": -1.7769654989242554, "logps/chosen": -194.9086456298828, "logps/rejected": -322.66693115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.095625877380371, "rewards/margins": 13.358911514282227, "rewards/rejected": -23.454538345336914, "step": 4901 }, { "epoch": 8.44, "learning_rate": 3.47428814279643e-08, "logits/chosen": -1.596219778060913, "logits/rejected": -1.7568265199661255, "logps/chosen": -132.52784729003906, "logps/rejected": -332.6077575683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.693180561065674, "rewards/margins": 19.21019744873047, "rewards/rejected": -24.903379440307617, "step": 4902 }, { "epoch": 8.44, "learning_rate": 3.4636634084147896e-08, "logits/chosen": -1.513178825378418, "logits/rejected": -1.7646113634109497, "logps/chosen": -165.24781799316406, "logps/rejected": -329.46337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.19482421875, "rewards/margins": 16.342052459716797, "rewards/rejected": -23.536876678466797, "step": 4903 }, { "epoch": 8.44, "learning_rate": 3.4530386740331495e-08, "logits/chosen": -1.9333000183105469, "logits/rejected": -2.094831943511963, "logps/chosen": -137.30555725097656, "logps/rejected": -305.48138427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.841188907623291, "rewards/margins": 17.58052635192871, "rewards/rejected": -22.421714782714844, "step": 4904 }, { "epoch": 8.44, "learning_rate": 3.442413939651508e-08, "logits/chosen": -2.047346830368042, "logits/rejected": -1.7106703519821167, "logps/chosen": -175.13162231445312, "logps/rejected": -317.9364929199219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.354119300842285, "rewards/margins": 13.34261703491211, "rewards/rejected": -22.696735382080078, "step": 4905 }, { "epoch": 8.44, "learning_rate": 3.431789205269868e-08, "logits/chosen": -1.7287746667861938, "logits/rejected": -1.8828773498535156, "logps/chosen": -163.5141143798828, "logps/rejected": -337.6436767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.675970077514648, "rewards/margins": 17.0999755859375, "rewards/rejected": -24.77594566345215, "step": 4906 }, { "epoch": 8.45, "learning_rate": 3.421164470888228e-08, "logits/chosen": -1.7468552589416504, "logits/rejected": -1.8310205936431885, "logps/chosen": -150.88235473632812, "logps/rejected": -304.19091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.515434741973877, "rewards/margins": 16.22378921508789, "rewards/rejected": -21.73922348022461, "step": 4907 }, { "epoch": 8.45, "learning_rate": 3.410539736506588e-08, "logits/chosen": -1.8572132587432861, "logits/rejected": -2.0186994075775146, "logps/chosen": -158.10293579101562, "logps/rejected": -304.6887512207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.431680679321289, "rewards/margins": 15.383182525634766, "rewards/rejected": -20.814865112304688, "step": 4908 }, { "epoch": 8.45, "learning_rate": 3.399915002124946e-08, "logits/chosen": -1.725831151008606, "logits/rejected": -2.0685131549835205, "logps/chosen": -180.9705810546875, "logps/rejected": -345.5269775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.44974136352539, "rewards/margins": 14.955780029296875, "rewards/rejected": -23.405521392822266, "step": 4909 }, { "epoch": 8.45, "learning_rate": 3.389290267743306e-08, "logits/chosen": -1.7393715381622314, "logits/rejected": -1.81548011302948, "logps/chosen": -173.6557159423828, "logps/rejected": -322.9382629394531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.828556060791016, "rewards/margins": 14.407461166381836, "rewards/rejected": -22.236019134521484, "step": 4910 }, { "epoch": 8.45, "learning_rate": 3.378665533361666e-08, "logits/chosen": -1.8008356094360352, "logits/rejected": -1.5589505434036255, "logps/chosen": -146.85328674316406, "logps/rejected": -316.43133544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.713009834289551, "rewards/margins": 16.674835205078125, "rewards/rejected": -23.387845993041992, "step": 4911 }, { "epoch": 8.45, "learning_rate": 3.368040798980026e-08, "logits/chosen": -1.8659101724624634, "logits/rejected": -1.8309435844421387, "logps/chosen": -189.3333282470703, "logps/rejected": -336.2147216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.450608253479004, "rewards/margins": 15.17868423461914, "rewards/rejected": -24.629289627075195, "step": 4912 }, { "epoch": 8.46, "learning_rate": 3.3574160645983845e-08, "logits/chosen": -1.5861252546310425, "logits/rejected": -1.9721053838729858, "logps/chosen": -172.97560119628906, "logps/rejected": -367.32196044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.582366943359375, "rewards/margins": 17.265480041503906, "rewards/rejected": -26.847848892211914, "step": 4913 }, { "epoch": 8.46, "learning_rate": 3.3467913302167444e-08, "logits/chosen": -1.9872426986694336, "logits/rejected": -1.6908729076385498, "logps/chosen": -181.04913330078125, "logps/rejected": -290.35565185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.066764831542969, "rewards/margins": 13.165277481079102, "rewards/rejected": -22.23204231262207, "step": 4914 }, { "epoch": 8.46, "learning_rate": 3.336166595835104e-08, "logits/chosen": -1.7418137788772583, "logits/rejected": -1.7784631252288818, "logps/chosen": -148.87652587890625, "logps/rejected": -348.30047607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.997642517089844, "rewards/margins": 19.201812744140625, "rewards/rejected": -26.19945526123047, "step": 4915 }, { "epoch": 8.46, "learning_rate": 3.325541861453463e-08, "logits/chosen": -1.8768255710601807, "logits/rejected": -1.8890302181243896, "logps/chosen": -159.85934448242188, "logps/rejected": -282.5054626464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.765697479248047, "rewards/margins": 12.178751945495605, "rewards/rejected": -20.944448471069336, "step": 4916 }, { "epoch": 8.46, "learning_rate": 3.314917127071823e-08, "logits/chosen": -1.4678270816802979, "logits/rejected": -1.887758493423462, "logps/chosen": -149.79910278320312, "logps/rejected": -324.6185302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.696071624755859, "rewards/margins": 16.53911781311035, "rewards/rejected": -23.235187530517578, "step": 4917 }, { "epoch": 8.46, "learning_rate": 3.3042923926901827e-08, "logits/chosen": -1.8871629238128662, "logits/rejected": -1.9384183883666992, "logps/chosen": -141.44708251953125, "logps/rejected": -268.26702880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.797104358673096, "rewards/margins": 14.1363525390625, "rewards/rejected": -19.933456420898438, "step": 4918 }, { "epoch": 8.47, "learning_rate": 3.2936676583085426e-08, "logits/chosen": -1.9957191944122314, "logits/rejected": -1.3283480405807495, "logps/chosen": -192.6619415283203, "logps/rejected": -272.54876708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.093729019165039, "rewards/margins": 12.583354949951172, "rewards/rejected": -19.677085876464844, "step": 4919 }, { "epoch": 8.47, "learning_rate": 3.283042923926901e-08, "logits/chosen": -1.8751189708709717, "logits/rejected": -1.8410892486572266, "logps/chosen": -193.21243286132812, "logps/rejected": -322.4234619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.175131797790527, "rewards/margins": 13.291055679321289, "rewards/rejected": -22.4661865234375, "step": 4920 }, { "epoch": 8.47, "learning_rate": 3.272418189545261e-08, "logits/chosen": -1.6717753410339355, "logits/rejected": -1.9522169828414917, "logps/chosen": -125.70948028564453, "logps/rejected": -284.1849365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.243487358093262, "rewards/margins": 14.976899147033691, "rewards/rejected": -20.220386505126953, "step": 4921 }, { "epoch": 8.47, "learning_rate": 3.261793455163621e-08, "logits/chosen": -1.6873853206634521, "logits/rejected": -1.9652708768844604, "logps/chosen": -148.88475036621094, "logps/rejected": -345.4021301269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.434068202972412, "rewards/margins": 19.104703903198242, "rewards/rejected": -25.53877067565918, "step": 4922 }, { "epoch": 8.47, "learning_rate": 3.251168720781981e-08, "logits/chosen": -2.0405657291412354, "logits/rejected": -1.8152024745941162, "logps/chosen": -92.67338562011719, "logps/rejected": -240.73709106445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4613699913024902, "rewards/margins": 15.651471138000488, "rewards/rejected": -19.112842559814453, "step": 4923 }, { "epoch": 8.48, "learning_rate": 3.2405439864003394e-08, "logits/chosen": -1.4825022220611572, "logits/rejected": -1.886580467224121, "logps/chosen": -108.2432632446289, "logps/rejected": -297.45599365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5350818634033203, "rewards/margins": 17.073476791381836, "rewards/rejected": -20.608558654785156, "step": 4924 }, { "epoch": 8.48, "learning_rate": 3.229919252018699e-08, "logits/chosen": -1.902666449546814, "logits/rejected": -1.8149149417877197, "logps/chosen": -169.61300659179688, "logps/rejected": -307.428466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.178386688232422, "rewards/margins": 13.160432815551758, "rewards/rejected": -21.33881950378418, "step": 4925 }, { "epoch": 8.48, "learning_rate": 3.219294517637059e-08, "logits/chosen": -1.7074882984161377, "logits/rejected": -1.7894341945648193, "logps/chosen": -160.21194458007812, "logps/rejected": -324.624755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.268293857574463, "rewards/margins": 15.941771507263184, "rewards/rejected": -23.210065841674805, "step": 4926 }, { "epoch": 8.48, "learning_rate": 3.208669783255419e-08, "logits/chosen": -1.8095207214355469, "logits/rejected": -1.8703025579452515, "logps/chosen": -149.25994873046875, "logps/rejected": -331.4831237792969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.18440055847168, "rewards/margins": 18.354598999023438, "rewards/rejected": -24.538997650146484, "step": 4927 }, { "epoch": 8.48, "learning_rate": 3.1980450488737776e-08, "logits/chosen": -1.311803936958313, "logits/rejected": -1.859645128250122, "logps/chosen": -171.93374633789062, "logps/rejected": -363.29913330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.786425590515137, "rewards/margins": 15.743706703186035, "rewards/rejected": -24.530132293701172, "step": 4928 }, { "epoch": 8.48, "learning_rate": 3.1874203144921375e-08, "logits/chosen": -1.8047375679016113, "logits/rejected": -1.8011474609375, "logps/chosen": -161.95571899414062, "logps/rejected": -304.1741027832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.838864326477051, "rewards/margins": 14.38206672668457, "rewards/rejected": -20.220932006835938, "step": 4929 }, { "epoch": 8.49, "learning_rate": 3.1767955801104974e-08, "logits/chosen": -1.8685815334320068, "logits/rejected": -1.8865511417388916, "logps/chosen": -148.17306518554688, "logps/rejected": -298.7691345214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.411181926727295, "rewards/margins": 14.803682327270508, "rewards/rejected": -22.21486473083496, "step": 4930 }, { "epoch": 8.49, "learning_rate": 3.1661708457288566e-08, "logits/chosen": -1.8146061897277832, "logits/rejected": -1.9670064449310303, "logps/chosen": -165.49559020996094, "logps/rejected": -317.70263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.008594512939453, "rewards/margins": 15.682995796203613, "rewards/rejected": -23.691591262817383, "step": 4931 }, { "epoch": 8.49, "learning_rate": 3.155546111347216e-08, "logits/chosen": -2.0338079929351807, "logits/rejected": -1.6216468811035156, "logps/chosen": -141.65591430664062, "logps/rejected": -262.73712158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.315389633178711, "rewards/margins": 13.817606925964355, "rewards/rejected": -19.132997512817383, "step": 4932 }, { "epoch": 8.49, "learning_rate": 3.144921376965576e-08, "logits/chosen": -2.142019033432007, "logits/rejected": -1.8277699947357178, "logps/chosen": -157.91387939453125, "logps/rejected": -287.24627685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.37427282333374, "rewards/margins": 13.935446739196777, "rewards/rejected": -19.30971908569336, "step": 4933 }, { "epoch": 8.49, "learning_rate": 3.1342966425839356e-08, "logits/chosen": -1.8169975280761719, "logits/rejected": -2.076878547668457, "logps/chosen": -144.96640014648438, "logps/rejected": -315.28179931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.0388875007629395, "rewards/margins": 16.916854858398438, "rewards/rejected": -21.95574188232422, "step": 4934 }, { "epoch": 8.49, "learning_rate": 3.123671908202295e-08, "logits/chosen": -2.0843281745910645, "logits/rejected": -1.8084743022918701, "logps/chosen": -157.3399200439453, "logps/rejected": -305.3338928222656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.48427152633667, "rewards/margins": 14.336259841918945, "rewards/rejected": -21.820531845092773, "step": 4935 }, { "epoch": 8.5, "learning_rate": 3.113047173820654e-08, "logits/chosen": -1.7360928058624268, "logits/rejected": -1.9387452602386475, "logps/chosen": -151.09356689453125, "logps/rejected": -301.96759033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.194409370422363, "rewards/margins": 14.325403213500977, "rewards/rejected": -20.519813537597656, "step": 4936 }, { "epoch": 8.5, "learning_rate": 3.102422439439014e-08, "logits/chosen": -1.9278662204742432, "logits/rejected": -1.9299157857894897, "logps/chosen": -167.66822814941406, "logps/rejected": -313.66839599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.583843231201172, "rewards/margins": 15.015975952148438, "rewards/rejected": -23.59981918334961, "step": 4937 }, { "epoch": 8.5, "learning_rate": 3.091797705057373e-08, "logits/chosen": -1.968510627746582, "logits/rejected": -1.7075145244598389, "logps/chosen": -204.75991821289062, "logps/rejected": -349.8062744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.50721263885498, "rewards/margins": 14.794502258300781, "rewards/rejected": -26.301715850830078, "step": 4938 }, { "epoch": 8.5, "learning_rate": 3.081172970675733e-08, "logits/chosen": -2.1097636222839355, "logits/rejected": -1.5714356899261475, "logps/chosen": -182.5837860107422, "logps/rejected": -311.99078369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.905664443969727, "rewards/margins": 15.352081298828125, "rewards/rejected": -23.25774574279785, "step": 4939 }, { "epoch": 8.5, "learning_rate": 3.070548236294092e-08, "logits/chosen": -1.646277904510498, "logits/rejected": -2.007512331008911, "logps/chosen": -138.7878875732422, "logps/rejected": -353.14691162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.361327171325684, "rewards/margins": 18.644556045532227, "rewards/rejected": -26.005882263183594, "step": 4940 }, { "epoch": 8.5, "learning_rate": 3.059923501912452e-08, "logits/chosen": -1.6984779834747314, "logits/rejected": -1.7905876636505127, "logps/chosen": -152.1521759033203, "logps/rejected": -305.6248779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.710826396942139, "rewards/margins": 15.841747283935547, "rewards/rejected": -22.552574157714844, "step": 4941 }, { "epoch": 8.51, "learning_rate": 3.0492987675308114e-08, "logits/chosen": -1.816641926765442, "logits/rejected": -1.6913775205612183, "logps/chosen": -180.688720703125, "logps/rejected": -312.4228820800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.861860275268555, "rewards/margins": 14.206913948059082, "rewards/rejected": -23.068775177001953, "step": 4942 }, { "epoch": 8.51, "learning_rate": 3.038674033149171e-08, "logits/chosen": -1.7165117263793945, "logits/rejected": -1.921350121498108, "logps/chosen": -156.912841796875, "logps/rejected": -306.62176513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.147168159484863, "rewards/margins": 14.118289947509766, "rewards/rejected": -22.265459060668945, "step": 4943 }, { "epoch": 8.51, "learning_rate": 3.0280492987675305e-08, "logits/chosen": -1.631910800933838, "logits/rejected": -1.9309054613113403, "logps/chosen": -121.29910278320312, "logps/rejected": -301.99176025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.316547870635986, "rewards/margins": 17.14219093322754, "rewards/rejected": -22.458740234375, "step": 4944 }, { "epoch": 8.51, "learning_rate": 3.0174245643858904e-08, "logits/chosen": -1.7858428955078125, "logits/rejected": -1.7208199501037598, "logps/chosen": -152.6173553466797, "logps/rejected": -320.7239074707031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.86285400390625, "rewards/margins": 18.098806381225586, "rewards/rejected": -23.96166229248047, "step": 4945 }, { "epoch": 8.51, "learning_rate": 3.0067998300042497e-08, "logits/chosen": -1.998829960823059, "logits/rejected": -1.5636547803878784, "logps/chosen": -167.66197204589844, "logps/rejected": -322.9896240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.451847076416016, "rewards/margins": 17.310731887817383, "rewards/rejected": -24.7625789642334, "step": 4946 }, { "epoch": 8.51, "learning_rate": 2.9961750956226096e-08, "logits/chosen": -1.957338809967041, "logits/rejected": -1.9501631259918213, "logps/chosen": -123.90101623535156, "logps/rejected": -268.43670654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.194990634918213, "rewards/margins": 13.905288696289062, "rewards/rejected": -18.100278854370117, "step": 4947 }, { "epoch": 8.52, "learning_rate": 2.985550361240969e-08, "logits/chosen": -1.797102928161621, "logits/rejected": -1.699202537536621, "logps/chosen": -168.06375122070312, "logps/rejected": -327.9853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.027026176452637, "rewards/margins": 16.197240829467773, "rewards/rejected": -24.224266052246094, "step": 4948 }, { "epoch": 8.52, "learning_rate": 2.9749256268593283e-08, "logits/chosen": -1.5916039943695068, "logits/rejected": -1.879157543182373, "logps/chosen": -150.6061553955078, "logps/rejected": -312.9264221191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.337471008300781, "rewards/margins": 13.544568061828613, "rewards/rejected": -21.882038116455078, "step": 4949 }, { "epoch": 8.52, "learning_rate": 2.964300892477688e-08, "logits/chosen": -1.6958731412887573, "logits/rejected": -1.8758881092071533, "logps/chosen": -180.73130798339844, "logps/rejected": -339.7803649902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.614328384399414, "rewards/margins": 15.731937408447266, "rewards/rejected": -24.34626579284668, "step": 4950 }, { "epoch": 8.52, "learning_rate": 2.9536761580960475e-08, "logits/chosen": -1.8575156927108765, "logits/rejected": -1.9320008754730225, "logps/chosen": -108.22164154052734, "logps/rejected": -245.08941650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.8553147315979, "rewards/margins": 14.539073944091797, "rewards/rejected": -19.394390106201172, "step": 4951 }, { "epoch": 8.52, "learning_rate": 2.943051423714407e-08, "logits/chosen": -1.5398942232131958, "logits/rejected": -1.956641435623169, "logps/chosen": -141.27346801757812, "logps/rejected": -331.88983154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.338590621948242, "rewards/margins": 16.154726028442383, "rewards/rejected": -22.493318557739258, "step": 4952 }, { "epoch": 8.52, "learning_rate": 2.9324266893327666e-08, "logits/chosen": -2.0097954273223877, "logits/rejected": -1.85562264919281, "logps/chosen": -82.29401397705078, "logps/rejected": -255.2078399658203, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6414532661437988, "rewards/margins": 16.734663009643555, "rewards/rejected": -18.376115798950195, "step": 4953 }, { "epoch": 8.53, "learning_rate": 2.9218019549511258e-08, "logits/chosen": -1.8260040283203125, "logits/rejected": -1.8114606142044067, "logps/chosen": -157.69137573242188, "logps/rejected": -322.86029052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.092720031738281, "rewards/margins": 15.923999786376953, "rewards/rejected": -23.016719818115234, "step": 4954 }, { "epoch": 8.53, "learning_rate": 2.9111772205694857e-08, "logits/chosen": -1.7940255403518677, "logits/rejected": -1.9930365085601807, "logps/chosen": -130.0864715576172, "logps/rejected": -372.41314697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.55626916885376, "rewards/margins": 22.949451446533203, "rewards/rejected": -27.505720138549805, "step": 4955 }, { "epoch": 8.53, "learning_rate": 2.900552486187845e-08, "logits/chosen": -1.7618495225906372, "logits/rejected": -1.8673313856124878, "logps/chosen": -177.48670959472656, "logps/rejected": -308.0481262207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.018247604370117, "rewards/margins": 13.179203987121582, "rewards/rejected": -22.19745445251465, "step": 4956 }, { "epoch": 8.53, "learning_rate": 2.8899277518062048e-08, "logits/chosen": -1.9038918018341064, "logits/rejected": -1.7995977401733398, "logps/chosen": -190.4894561767578, "logps/rejected": -310.7604064941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.326956748962402, "rewards/margins": 12.899995803833008, "rewards/rejected": -21.226953506469727, "step": 4957 }, { "epoch": 8.53, "learning_rate": 2.879303017424564e-08, "logits/chosen": -1.8576431274414062, "logits/rejected": -1.7880367040634155, "logps/chosen": -164.10910034179688, "logps/rejected": -298.9157409667969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.505010604858398, "rewards/margins": 13.805081367492676, "rewards/rejected": -20.310091018676758, "step": 4958 }, { "epoch": 8.54, "learning_rate": 2.868678283042924e-08, "logits/chosen": -1.816794991493225, "logits/rejected": -1.8740407228469849, "logps/chosen": -124.43217468261719, "logps/rejected": -263.5680236816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.049432277679443, "rewards/margins": 14.192560195922852, "rewards/rejected": -19.241992950439453, "step": 4959 }, { "epoch": 8.54, "learning_rate": 2.8580535486612832e-08, "logits/chosen": -1.9206976890563965, "logits/rejected": -1.8967100381851196, "logps/chosen": -174.13223266601562, "logps/rejected": -326.4123229980469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.896933555603027, "rewards/margins": 14.91458511352539, "rewards/rejected": -23.811519622802734, "step": 4960 }, { "epoch": 8.54, "learning_rate": 2.847428814279643e-08, "logits/chosen": -1.6319401264190674, "logits/rejected": -1.8675498962402344, "logps/chosen": -165.4657745361328, "logps/rejected": -350.1336975097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.959890842437744, "rewards/margins": 15.315516471862793, "rewards/rejected": -23.275407791137695, "step": 4961 }, { "epoch": 8.54, "learning_rate": 2.8368040798980023e-08, "logits/chosen": -1.84401273727417, "logits/rejected": -1.2839642763137817, "logps/chosen": -158.46942138671875, "logps/rejected": -284.1799621582031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.74735164642334, "rewards/margins": 15.427578926086426, "rewards/rejected": -21.1749324798584, "step": 4962 }, { "epoch": 8.54, "learning_rate": 2.8261793455163622e-08, "logits/chosen": -1.8166191577911377, "logits/rejected": -1.6567304134368896, "logps/chosen": -122.1781005859375, "logps/rejected": -294.7679443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.13886833190918, "rewards/margins": 17.94577407836914, "rewards/rejected": -23.08464241027832, "step": 4963 }, { "epoch": 8.54, "learning_rate": 2.8155546111347214e-08, "logits/chosen": -1.7128822803497314, "logits/rejected": -1.9749186038970947, "logps/chosen": -135.90533447265625, "logps/rejected": -296.5560302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.657332897186279, "rewards/margins": 14.771669387817383, "rewards/rejected": -21.42900276184082, "step": 4964 }, { "epoch": 8.55, "learning_rate": 2.804929876753081e-08, "logits/chosen": -1.6779582500457764, "logits/rejected": -1.7710719108581543, "logps/chosen": -137.5826416015625, "logps/rejected": -296.93243408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.388033866882324, "rewards/margins": 15.449912071228027, "rewards/rejected": -21.837947845458984, "step": 4965 }, { "epoch": 8.55, "learning_rate": 2.7943051423714405e-08, "logits/chosen": -1.4668676853179932, "logits/rejected": -2.008629322052002, "logps/chosen": -144.8165283203125, "logps/rejected": -309.5740051269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.523847579956055, "rewards/margins": 14.486878395080566, "rewards/rejected": -21.010726928710938, "step": 4966 }, { "epoch": 8.55, "learning_rate": 2.7836804079898e-08, "logits/chosen": -1.9893914461135864, "logits/rejected": -1.532609462738037, "logps/chosen": -206.91433715820312, "logps/rejected": -335.7851867675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.36811351776123, "rewards/margins": 14.517292022705078, "rewards/rejected": -23.885404586791992, "step": 4967 }, { "epoch": 8.55, "learning_rate": 2.7730556736081596e-08, "logits/chosen": -1.6322274208068848, "logits/rejected": -2.0314958095550537, "logps/chosen": -128.66961669921875, "logps/rejected": -297.193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.560161590576172, "rewards/margins": 15.328822135925293, "rewards/rejected": -20.88898468017578, "step": 4968 }, { "epoch": 8.55, "learning_rate": 2.7624309392265192e-08, "logits/chosen": -2.01667857170105, "logits/rejected": -1.6900725364685059, "logps/chosen": -148.1493682861328, "logps/rejected": -324.57635498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7704081535339355, "rewards/margins": 17.742324829101562, "rewards/rejected": -23.512733459472656, "step": 4969 }, { "epoch": 8.55, "learning_rate": 2.7518062048448788e-08, "logits/chosen": -2.0099189281463623, "logits/rejected": -1.7703425884246826, "logps/chosen": -142.97833251953125, "logps/rejected": -304.42254638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.260666847229004, "rewards/margins": 16.67063331604004, "rewards/rejected": -22.931299209594727, "step": 4970 }, { "epoch": 8.56, "learning_rate": 2.7411814704632383e-08, "logits/chosen": -1.9430010318756104, "logits/rejected": -1.638878345489502, "logps/chosen": -166.6787872314453, "logps/rejected": -306.10784912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.298397064208984, "rewards/margins": 15.446145057678223, "rewards/rejected": -23.74454116821289, "step": 4971 }, { "epoch": 8.56, "learning_rate": 2.730556736081598e-08, "logits/chosen": -2.0246076583862305, "logits/rejected": -1.6324464082717896, "logps/chosen": -175.3031768798828, "logps/rejected": -328.68609619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.065842628479004, "rewards/margins": 17.278766632080078, "rewards/rejected": -25.344608306884766, "step": 4972 }, { "epoch": 8.56, "learning_rate": 2.7199320016999574e-08, "logits/chosen": -1.8504966497421265, "logits/rejected": -1.8596463203430176, "logps/chosen": -131.65074157714844, "logps/rejected": -330.7183532714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.792420387268066, "rewards/margins": 19.612836837768555, "rewards/rejected": -24.405256271362305, "step": 4973 }, { "epoch": 8.56, "learning_rate": 2.709307267318317e-08, "logits/chosen": -1.9477777481079102, "logits/rejected": -2.0350143909454346, "logps/chosen": -152.85549926757812, "logps/rejected": -321.316650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.399107933044434, "rewards/margins": 15.893566131591797, "rewards/rejected": -22.292675018310547, "step": 4974 }, { "epoch": 8.56, "learning_rate": 2.6986825329366766e-08, "logits/chosen": -2.1397271156311035, "logits/rejected": -2.0389719009399414, "logps/chosen": -141.0391082763672, "logps/rejected": -294.6979675292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.9029541015625, "rewards/margins": 15.27677059173584, "rewards/rejected": -20.179723739624023, "step": 4975 }, { "epoch": 8.56, "learning_rate": 2.688057798555036e-08, "logits/chosen": -1.967071771621704, "logits/rejected": -2.059572219848633, "logps/chosen": -164.51708984375, "logps/rejected": -324.0296630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.6737565994262695, "rewards/margins": 17.380691528320312, "rewards/rejected": -25.0544490814209, "step": 4976 }, { "epoch": 8.57, "learning_rate": 2.6774330641733957e-08, "logits/chosen": -1.888680338859558, "logits/rejected": -1.6691445112228394, "logps/chosen": -188.43885803222656, "logps/rejected": -347.8954162597656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.737305641174316, "rewards/margins": 17.221261978149414, "rewards/rejected": -26.958568572998047, "step": 4977 }, { "epoch": 8.57, "learning_rate": 2.666808329791755e-08, "logits/chosen": -1.784393548965454, "logits/rejected": -1.8568395376205444, "logps/chosen": -136.91635131835938, "logps/rejected": -302.94805908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.808404445648193, "rewards/margins": 16.271076202392578, "rewards/rejected": -21.079479217529297, "step": 4978 }, { "epoch": 8.57, "learning_rate": 2.6561835954101148e-08, "logits/chosen": -1.5024493932724, "logits/rejected": -1.8888157606124878, "logps/chosen": -169.01173400878906, "logps/rejected": -361.4049377441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.580787658691406, "rewards/margins": 17.646167755126953, "rewards/rejected": -25.22695541381836, "step": 4979 }, { "epoch": 8.57, "learning_rate": 2.645558861028474e-08, "logits/chosen": -1.7028720378875732, "logits/rejected": -2.0442981719970703, "logps/chosen": -145.90426635742188, "logps/rejected": -346.72149658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.114864349365234, "rewards/margins": 18.59465980529785, "rewards/rejected": -23.709524154663086, "step": 4980 }, { "epoch": 8.57, "learning_rate": 2.634934126646834e-08, "logits/chosen": -1.758156180381775, "logits/rejected": -1.6529268026351929, "logps/chosen": -130.55001831054688, "logps/rejected": -262.5329284667969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.134524345397949, "rewards/margins": 13.62478256225586, "rewards/rejected": -18.759307861328125, "step": 4981 }, { "epoch": 8.57, "learning_rate": 2.624309392265193e-08, "logits/chosen": -1.6451539993286133, "logits/rejected": -1.9100286960601807, "logps/chosen": -152.57852172851562, "logps/rejected": -291.18658447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.65310001373291, "rewards/margins": 13.962007522583008, "rewards/rejected": -20.6151065826416, "step": 4982 }, { "epoch": 8.58, "learning_rate": 2.613684657883553e-08, "logits/chosen": -1.859291672706604, "logits/rejected": -1.947244644165039, "logps/chosen": -204.70375061035156, "logps/rejected": -325.677001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.864519119262695, "rewards/margins": 13.213991165161133, "rewards/rejected": -22.078510284423828, "step": 4983 }, { "epoch": 8.58, "learning_rate": 2.6030599235019123e-08, "logits/chosen": -1.9144097566604614, "logits/rejected": -1.9589589834213257, "logps/chosen": -146.80697631835938, "logps/rejected": -293.78387451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.203822135925293, "rewards/margins": 13.844185829162598, "rewards/rejected": -21.048006057739258, "step": 4984 }, { "epoch": 8.58, "learning_rate": 2.5924351891202718e-08, "logits/chosen": -1.9591424465179443, "logits/rejected": -1.8225960731506348, "logps/chosen": -163.1051483154297, "logps/rejected": -326.9231262207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.826207637786865, "rewards/margins": 14.78764533996582, "rewards/rejected": -22.61385154724121, "step": 4985 }, { "epoch": 8.58, "learning_rate": 2.5818104547386314e-08, "logits/chosen": -1.7588106393814087, "logits/rejected": -1.594298243522644, "logps/chosen": -183.75604248046875, "logps/rejected": -344.7937316894531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.092275619506836, "rewards/margins": 16.013423919677734, "rewards/rejected": -24.10569953918457, "step": 4986 }, { "epoch": 8.58, "learning_rate": 2.571185720356991e-08, "logits/chosen": -1.9160317182540894, "logits/rejected": -1.9795081615447998, "logps/chosen": -186.5208740234375, "logps/rejected": -323.54376220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.429932594299316, "rewards/margins": 14.696479797363281, "rewards/rejected": -23.12641143798828, "step": 4987 }, { "epoch": 8.59, "learning_rate": 2.5605609859753505e-08, "logits/chosen": -1.6987967491149902, "logits/rejected": -1.9306507110595703, "logps/chosen": -155.55108642578125, "logps/rejected": -344.08807373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.360915184020996, "rewards/margins": 16.087081909179688, "rewards/rejected": -23.447998046875, "step": 4988 }, { "epoch": 8.59, "learning_rate": 2.54993625159371e-08, "logits/chosen": -1.9740381240844727, "logits/rejected": -1.6738200187683105, "logps/chosen": -152.53050231933594, "logps/rejected": -337.5766296386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.9406609535217285, "rewards/margins": 20.043882369995117, "rewards/rejected": -25.98454475402832, "step": 4989 }, { "epoch": 8.59, "learning_rate": 2.5393115172120696e-08, "logits/chosen": -2.005758762359619, "logits/rejected": -1.4877269268035889, "logps/chosen": -160.81048583984375, "logps/rejected": -310.0534973144531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.515484809875488, "rewards/margins": 15.468690872192383, "rewards/rejected": -21.984174728393555, "step": 4990 }, { "epoch": 8.59, "learning_rate": 2.528686782830429e-08, "logits/chosen": -1.91749906539917, "logits/rejected": -1.8050318956375122, "logps/chosen": -183.11582946777344, "logps/rejected": -346.9399719238281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.934933662414551, "rewards/margins": 17.33289337158203, "rewards/rejected": -25.2678279876709, "step": 4991 }, { "epoch": 8.59, "learning_rate": 2.5180620484487887e-08, "logits/chosen": -1.8537312746047974, "logits/rejected": -1.6673692464828491, "logps/chosen": -152.88706970214844, "logps/rejected": -358.43206787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.845462799072266, "rewards/margins": 19.624011993408203, "rewards/rejected": -27.4694766998291, "step": 4992 }, { "epoch": 8.59, "learning_rate": 2.507437314067148e-08, "logits/chosen": -1.7483633756637573, "logits/rejected": -1.7751741409301758, "logps/chosen": -155.06483459472656, "logps/rejected": -288.79278564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.4025421142578125, "rewards/margins": 13.248830795288086, "rewards/rejected": -19.6513729095459, "step": 4993 }, { "epoch": 8.6, "learning_rate": 2.496812579685508e-08, "logits/chosen": -1.9140686988830566, "logits/rejected": -1.7761075496673584, "logps/chosen": -157.4550018310547, "logps/rejected": -320.1192932128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.811365127563477, "rewards/margins": 15.616546630859375, "rewards/rejected": -22.427913665771484, "step": 4994 }, { "epoch": 8.6, "learning_rate": 2.486187845303867e-08, "logits/chosen": -1.8581910133361816, "logits/rejected": -1.8605375289916992, "logps/chosen": -131.55218505859375, "logps/rejected": -320.7785339355469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.126871109008789, "rewards/margins": 16.706008911132812, "rewards/rejected": -22.83287811279297, "step": 4995 }, { "epoch": 8.6, "learning_rate": 2.475563110922227e-08, "logits/chosen": -1.8499715328216553, "logits/rejected": -1.8656086921691895, "logps/chosen": -126.71830749511719, "logps/rejected": -279.7686767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.523507118225098, "rewards/margins": 16.159786224365234, "rewards/rejected": -21.683292388916016, "step": 4996 }, { "epoch": 8.6, "learning_rate": 2.4649383765405862e-08, "logits/chosen": -1.92875337600708, "logits/rejected": -1.5037237405776978, "logps/chosen": -158.05889892578125, "logps/rejected": -307.5809326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.078100204467773, "rewards/margins": 18.148893356323242, "rewards/rejected": -23.22699546813965, "step": 4997 }, { "epoch": 8.6, "learning_rate": 2.454313642158946e-08, "logits/chosen": -1.9485042095184326, "logits/rejected": -1.5893840789794922, "logps/chosen": -195.29510498046875, "logps/rejected": -317.7983703613281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.351285934448242, "rewards/margins": 14.778752326965332, "rewards/rejected": -24.13003921508789, "step": 4998 }, { "epoch": 8.6, "learning_rate": 2.4436889077773053e-08, "logits/chosen": -1.9746265411376953, "logits/rejected": -1.9741432666778564, "logps/chosen": -161.31546020507812, "logps/rejected": -325.6263732910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.066521167755127, "rewards/margins": 16.724605560302734, "rewards/rejected": -23.791126251220703, "step": 4999 }, { "epoch": 8.61, "learning_rate": 2.4330641733956652e-08, "logits/chosen": -1.7853857278823853, "logits/rejected": -1.7371338605880737, "logps/chosen": -151.56521606445312, "logps/rejected": -307.29986572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.566035270690918, "rewards/margins": 15.73284912109375, "rewards/rejected": -22.29888343811035, "step": 5000 }, { "epoch": 8.61, "learning_rate": 2.4224394390140244e-08, "logits/chosen": -1.9674447774887085, "logits/rejected": -1.8983433246612549, "logps/chosen": -145.83396911621094, "logps/rejected": -284.20025634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.433936595916748, "rewards/margins": 13.463403701782227, "rewards/rejected": -20.897340774536133, "step": 5001 }, { "epoch": 8.61, "learning_rate": 2.4118147046323843e-08, "logits/chosen": -1.868507742881775, "logits/rejected": -1.823779582977295, "logps/chosen": -201.04380798339844, "logps/rejected": -331.70111083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.796623229980469, "rewards/margins": 13.010858535766602, "rewards/rejected": -22.80748176574707, "step": 5002 }, { "epoch": 8.61, "learning_rate": 2.4011899702507436e-08, "logits/chosen": -1.8158175945281982, "logits/rejected": -2.0117604732513428, "logps/chosen": -171.72695922851562, "logps/rejected": -312.5186767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.007034301757812, "rewards/margins": 13.163714408874512, "rewards/rejected": -22.170747756958008, "step": 5003 }, { "epoch": 8.61, "learning_rate": 2.390565235869103e-08, "logits/chosen": -1.76104736328125, "logits/rejected": -1.8303875923156738, "logps/chosen": -163.82260131835938, "logps/rejected": -295.05108642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.322223663330078, "rewards/margins": 14.469766616821289, "rewards/rejected": -22.7919921875, "step": 5004 }, { "epoch": 8.61, "learning_rate": 2.3799405014874627e-08, "logits/chosen": -1.677217960357666, "logits/rejected": -1.9677810668945312, "logps/chosen": -207.54786682128906, "logps/rejected": -351.45294189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.683673858642578, "rewards/margins": 12.796656608581543, "rewards/rejected": -24.480329513549805, "step": 5005 }, { "epoch": 8.62, "learning_rate": 2.3693157671058222e-08, "logits/chosen": -1.8361207246780396, "logits/rejected": -1.962066650390625, "logps/chosen": -174.27655029296875, "logps/rejected": -346.33380126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.320571899414062, "rewards/margins": 17.398366928100586, "rewards/rejected": -25.71894073486328, "step": 5006 }, { "epoch": 8.62, "learning_rate": 2.3586910327241818e-08, "logits/chosen": -2.0205676555633545, "logits/rejected": -1.875850796699524, "logps/chosen": -167.37057495117188, "logps/rejected": -359.53924560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.616456985473633, "rewards/margins": 18.883705139160156, "rewards/rejected": -26.500164031982422, "step": 5007 }, { "epoch": 8.62, "learning_rate": 2.3480662983425414e-08, "logits/chosen": -1.764708161354065, "logits/rejected": -1.7320234775543213, "logps/chosen": -109.36807250976562, "logps/rejected": -253.51136779785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.532430648803711, "rewards/margins": 14.264076232910156, "rewards/rejected": -19.7965087890625, "step": 5008 }, { "epoch": 8.62, "learning_rate": 2.337441563960901e-08, "logits/chosen": -1.9221971035003662, "logits/rejected": -1.7196146249771118, "logps/chosen": -161.33486938476562, "logps/rejected": -305.10675048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.468292236328125, "rewards/margins": 13.830629348754883, "rewards/rejected": -21.29892349243164, "step": 5009 }, { "epoch": 8.62, "learning_rate": 2.3268168295792605e-08, "logits/chosen": -1.7340751886367798, "logits/rejected": -1.865140676498413, "logps/chosen": -126.15257263183594, "logps/rejected": -308.9057922363281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6191816329956055, "rewards/margins": 18.394254684448242, "rewards/rejected": -22.01343536376953, "step": 5010 }, { "epoch": 8.62, "learning_rate": 2.31619209519762e-08, "logits/chosen": -1.9455102682113647, "logits/rejected": -1.6424317359924316, "logps/chosen": -147.2137908935547, "logps/rejected": -305.11627197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.1491498947143555, "rewards/margins": 15.665693283081055, "rewards/rejected": -21.814844131469727, "step": 5011 }, { "epoch": 8.63, "learning_rate": 2.3055673608159796e-08, "logits/chosen": -1.8608448505401611, "logits/rejected": -1.5674153566360474, "logps/chosen": -150.6773223876953, "logps/rejected": -303.384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.211331844329834, "rewards/margins": 16.172876358032227, "rewards/rejected": -22.38420867919922, "step": 5012 }, { "epoch": 8.63, "learning_rate": 2.294942626434339e-08, "logits/chosen": -1.3634672164916992, "logits/rejected": -2.047067880630493, "logps/chosen": -161.3880615234375, "logps/rejected": -326.0831604003906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.747950553894043, "rewards/margins": 14.270963668823242, "rewards/rejected": -23.01891326904297, "step": 5013 }, { "epoch": 8.63, "learning_rate": 2.2843178920526987e-08, "logits/chosen": -1.9895505905151367, "logits/rejected": -1.8033655881881714, "logps/chosen": -180.99595642089844, "logps/rejected": -317.07208251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.164068222045898, "rewards/margins": 14.69791030883789, "rewards/rejected": -22.86197853088379, "step": 5014 }, { "epoch": 8.63, "learning_rate": 2.2736931576710583e-08, "logits/chosen": -2.066089630126953, "logits/rejected": -2.114306926727295, "logps/chosen": -184.6208038330078, "logps/rejected": -314.29302978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.682619094848633, "rewards/margins": 13.072600364685059, "rewards/rejected": -22.755218505859375, "step": 5015 }, { "epoch": 8.63, "learning_rate": 2.2630684232894178e-08, "logits/chosen": -1.8432519435882568, "logits/rejected": -1.6597148180007935, "logps/chosen": -148.92269897460938, "logps/rejected": -309.56451416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.200460910797119, "rewards/margins": 16.729551315307617, "rewards/rejected": -22.930011749267578, "step": 5016 }, { "epoch": 8.64, "learning_rate": 2.252443688907777e-08, "logits/chosen": -1.5808758735656738, "logits/rejected": -1.8518693447113037, "logps/chosen": -170.618408203125, "logps/rejected": -346.17144775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.085247993469238, "rewards/margins": 16.980985641479492, "rewards/rejected": -25.066234588623047, "step": 5017 }, { "epoch": 8.64, "learning_rate": 2.241818954526137e-08, "logits/chosen": -1.9139437675476074, "logits/rejected": -1.9192371368408203, "logps/chosen": -163.02767944335938, "logps/rejected": -377.0234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.085474014282227, "rewards/margins": 20.977436065673828, "rewards/rejected": -29.062910079956055, "step": 5018 }, { "epoch": 8.64, "learning_rate": 2.2311942201444962e-08, "logits/chosen": -1.501223087310791, "logits/rejected": -1.9942829608917236, "logps/chosen": -140.99142456054688, "logps/rejected": -311.8459167480469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.854676246643066, "rewards/margins": 16.628448486328125, "rewards/rejected": -22.483123779296875, "step": 5019 }, { "epoch": 8.64, "learning_rate": 2.2205694857628557e-08, "logits/chosen": -1.6212352514266968, "logits/rejected": -1.9820027351379395, "logps/chosen": -168.0208282470703, "logps/rejected": -330.509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.075590133666992, "rewards/margins": 15.836431503295898, "rewards/rejected": -23.91202163696289, "step": 5020 }, { "epoch": 8.64, "learning_rate": 2.2099447513812153e-08, "logits/chosen": -1.6241894960403442, "logits/rejected": -1.9508605003356934, "logps/chosen": -142.04635620117188, "logps/rejected": -327.2559814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.797886848449707, "rewards/margins": 17.27777862548828, "rewards/rejected": -24.075666427612305, "step": 5021 }, { "epoch": 8.64, "learning_rate": 2.199320016999575e-08, "logits/chosen": -1.85484778881073, "logits/rejected": -1.963611364364624, "logps/chosen": -152.46044921875, "logps/rejected": -278.81439208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.222154140472412, "rewards/margins": 13.238332748413086, "rewards/rejected": -19.460487365722656, "step": 5022 }, { "epoch": 8.65, "learning_rate": 2.1886952826179344e-08, "logits/chosen": -1.7008980512619019, "logits/rejected": -1.7516059875488281, "logps/chosen": -146.8468780517578, "logps/rejected": -305.74786376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.314968585968018, "rewards/margins": 15.266620635986328, "rewards/rejected": -22.581588745117188, "step": 5023 }, { "epoch": 8.65, "learning_rate": 2.178070548236294e-08, "logits/chosen": -1.6437187194824219, "logits/rejected": -2.0604066848754883, "logps/chosen": -143.20623779296875, "logps/rejected": -364.3463134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.172765254974365, "rewards/margins": 18.818801879882812, "rewards/rejected": -24.991567611694336, "step": 5024 }, { "epoch": 8.65, "learning_rate": 2.1674458138546535e-08, "logits/chosen": -1.8346893787384033, "logits/rejected": -2.06080961227417, "logps/chosen": -120.55496215820312, "logps/rejected": -297.1680908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.201889991760254, "rewards/margins": 16.98575782775879, "rewards/rejected": -22.18764877319336, "step": 5025 }, { "epoch": 8.65, "learning_rate": 2.156821079473013e-08, "logits/chosen": -1.7763004302978516, "logits/rejected": -1.995572805404663, "logps/chosen": -195.36459350585938, "logps/rejected": -313.81707763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.386818885803223, "rewards/margins": 10.515235900878906, "rewards/rejected": -22.902053833007812, "step": 5026 }, { "epoch": 8.65, "learning_rate": 2.1461963450913727e-08, "logits/chosen": -1.936758041381836, "logits/rejected": -1.755544900894165, "logps/chosen": -186.65452575683594, "logps/rejected": -343.35760498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.78791332244873, "rewards/margins": 17.05803871154785, "rewards/rejected": -25.8459529876709, "step": 5027 }, { "epoch": 8.65, "learning_rate": 2.135571610709732e-08, "logits/chosen": -1.7388126850128174, "logits/rejected": -1.7540677785873413, "logps/chosen": -148.64633178710938, "logps/rejected": -336.7135925292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.022428035736084, "rewards/margins": 19.7989501953125, "rewards/rejected": -25.821378707885742, "step": 5028 }, { "epoch": 8.66, "learning_rate": 2.1249468763280918e-08, "logits/chosen": -1.8345146179199219, "logits/rejected": -1.9517993927001953, "logps/chosen": -126.45823669433594, "logps/rejected": -323.718017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.350879669189453, "rewards/margins": 17.814224243164062, "rewards/rejected": -23.165103912353516, "step": 5029 }, { "epoch": 8.66, "learning_rate": 2.114322141946451e-08, "logits/chosen": -1.9134800434112549, "logits/rejected": -1.869075894355774, "logps/chosen": -154.6580810546875, "logps/rejected": -293.28411865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.65435791015625, "rewards/margins": 13.852487564086914, "rewards/rejected": -21.50684356689453, "step": 5030 }, { "epoch": 8.66, "learning_rate": 2.103697407564811e-08, "logits/chosen": -1.6725988388061523, "logits/rejected": -1.833372712135315, "logps/chosen": -175.71435546875, "logps/rejected": -350.08282470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.243961334228516, "rewards/margins": 17.720478057861328, "rewards/rejected": -25.96443748474121, "step": 5031 }, { "epoch": 8.66, "learning_rate": 2.09307267318317e-08, "logits/chosen": -1.7486159801483154, "logits/rejected": -1.676186203956604, "logps/chosen": -176.772216796875, "logps/rejected": -341.7423095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.630691528320312, "rewards/margins": 16.259214401245117, "rewards/rejected": -24.889907836914062, "step": 5032 }, { "epoch": 8.66, "learning_rate": 2.08244793880153e-08, "logits/chosen": -1.770794153213501, "logits/rejected": -1.6802377700805664, "logps/chosen": -176.80625915527344, "logps/rejected": -298.5440368652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.914266586303711, "rewards/margins": 10.408232688903809, "rewards/rejected": -20.322498321533203, "step": 5033 }, { "epoch": 8.66, "learning_rate": 2.0718232044198892e-08, "logits/chosen": -1.672223448753357, "logits/rejected": -1.9609646797180176, "logps/chosen": -151.48031616210938, "logps/rejected": -361.20452880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.548059463500977, "rewards/margins": 18.357271194458008, "rewards/rejected": -25.905330657958984, "step": 5034 }, { "epoch": 8.67, "learning_rate": 2.061198470038249e-08, "logits/chosen": -1.69668710231781, "logits/rejected": -1.9622695446014404, "logps/chosen": -139.88966369628906, "logps/rejected": -315.6292724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.648548603057861, "rewards/margins": 16.287368774414062, "rewards/rejected": -22.935916900634766, "step": 5035 }, { "epoch": 8.67, "learning_rate": 2.0505737356566084e-08, "logits/chosen": -1.3582961559295654, "logits/rejected": -1.9721451997756958, "logps/chosen": -153.00164794921875, "logps/rejected": -291.29547119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.510210037231445, "rewards/margins": 12.721626281738281, "rewards/rejected": -20.231836318969727, "step": 5036 }, { "epoch": 8.67, "learning_rate": 2.0399490012749682e-08, "logits/chosen": -1.7562202215194702, "logits/rejected": -1.500139594078064, "logps/chosen": -149.1112518310547, "logps/rejected": -296.8343505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.28751277923584, "rewards/margins": 14.968087196350098, "rewards/rejected": -22.255599975585938, "step": 5037 }, { "epoch": 8.67, "learning_rate": 2.0293242668933275e-08, "logits/chosen": -1.749711036682129, "logits/rejected": -1.8471964597702026, "logps/chosen": -163.22042846679688, "logps/rejected": -340.18634033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.182869911193848, "rewards/margins": 16.652698516845703, "rewards/rejected": -24.835569381713867, "step": 5038 }, { "epoch": 8.67, "learning_rate": 2.0186995325116874e-08, "logits/chosen": -1.8117470741271973, "logits/rejected": -1.8200186491012573, "logps/chosen": -180.60360717773438, "logps/rejected": -317.44586181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.189489364624023, "rewards/margins": 13.410622596740723, "rewards/rejected": -21.600112915039062, "step": 5039 }, { "epoch": 8.67, "learning_rate": 2.0080747981300466e-08, "logits/chosen": -1.86248779296875, "logits/rejected": -1.9396296739578247, "logps/chosen": -115.91930389404297, "logps/rejected": -312.9491271972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6521427631378174, "rewards/margins": 19.042028427124023, "rewards/rejected": -22.694169998168945, "step": 5040 }, { "epoch": 8.68, "learning_rate": 1.997450063748406e-08, "logits/chosen": -1.9012022018432617, "logits/rejected": -1.8260763883590698, "logps/chosen": -139.41461181640625, "logps/rejected": -318.8371887207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.266324996948242, "rewards/margins": 16.95625877380371, "rewards/rejected": -22.222583770751953, "step": 5041 }, { "epoch": 8.68, "learning_rate": 1.9868253293667657e-08, "logits/chosen": -1.6945362091064453, "logits/rejected": -1.8520512580871582, "logps/chosen": -132.7215576171875, "logps/rejected": -297.4102478027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.875818252563477, "rewards/margins": 15.886058807373047, "rewards/rejected": -20.761878967285156, "step": 5042 }, { "epoch": 8.68, "learning_rate": 1.9762005949851253e-08, "logits/chosen": -1.5077933073043823, "logits/rejected": -1.778600811958313, "logps/chosen": -152.3538818359375, "logps/rejected": -311.509033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.7190728187561035, "rewards/margins": 14.141335487365723, "rewards/rejected": -21.860408782958984, "step": 5043 }, { "epoch": 8.68, "learning_rate": 1.9655758606034848e-08, "logits/chosen": -2.074036121368408, "logits/rejected": -2.0156731605529785, "logps/chosen": -165.15185546875, "logps/rejected": -322.2601623535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.9443182945251465, "rewards/margins": 16.194469451904297, "rewards/rejected": -23.1387882232666, "step": 5044 }, { "epoch": 8.68, "learning_rate": 1.9549511262218444e-08, "logits/chosen": -1.8233182430267334, "logits/rejected": -1.520867943763733, "logps/chosen": -140.16456604003906, "logps/rejected": -326.47344970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.426721572875977, "rewards/margins": 19.730327606201172, "rewards/rejected": -25.15705108642578, "step": 5045 }, { "epoch": 8.69, "learning_rate": 1.944326391840204e-08, "logits/chosen": -1.933009147644043, "logits/rejected": -1.6966179609298706, "logps/chosen": -191.25100708007812, "logps/rejected": -334.7279052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.882585525512695, "rewards/margins": 15.734890937805176, "rewards/rejected": -24.617477416992188, "step": 5046 }, { "epoch": 8.69, "learning_rate": 1.9337016574585635e-08, "logits/chosen": -1.9082140922546387, "logits/rejected": -2.079296588897705, "logps/chosen": -149.79010009765625, "logps/rejected": -297.35675048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5049519538879395, "rewards/margins": 13.795370101928711, "rewards/rejected": -20.300323486328125, "step": 5047 }, { "epoch": 8.69, "learning_rate": 1.923076923076923e-08, "logits/chosen": -1.8102059364318848, "logits/rejected": -1.8225491046905518, "logps/chosen": -176.4917755126953, "logps/rejected": -330.2697448730469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.942958831787109, "rewards/margins": 15.109046936035156, "rewards/rejected": -23.052005767822266, "step": 5048 }, { "epoch": 8.69, "learning_rate": 1.9124521886952826e-08, "logits/chosen": -1.9118165969848633, "logits/rejected": -1.8111417293548584, "logps/chosen": -174.2289581298828, "logps/rejected": -316.71661376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.765558242797852, "rewards/margins": 13.616874694824219, "rewards/rejected": -23.382434844970703, "step": 5049 }, { "epoch": 8.69, "learning_rate": 1.9018274543136422e-08, "logits/chosen": -1.5634675025939941, "logits/rejected": -1.6967406272888184, "logps/chosen": -185.80186462402344, "logps/rejected": -346.6410827636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.990336418151855, "rewards/margins": 15.799480438232422, "rewards/rejected": -25.78981590270996, "step": 5050 }, { "epoch": 8.69, "learning_rate": 1.8912027199320017e-08, "logits/chosen": -1.7069447040557861, "logits/rejected": -1.98643159866333, "logps/chosen": -133.28045654296875, "logps/rejected": -293.5384521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.441807270050049, "rewards/margins": 14.773529052734375, "rewards/rejected": -20.2153377532959, "step": 5051 }, { "epoch": 8.7, "learning_rate": 1.8805779855503613e-08, "logits/chosen": -1.633123517036438, "logits/rejected": -1.9318692684173584, "logps/chosen": -188.75608825683594, "logps/rejected": -365.071044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.355238914489746, "rewards/margins": 17.125080108642578, "rewards/rejected": -27.480318069458008, "step": 5052 }, { "epoch": 8.7, "learning_rate": 1.869953251168721e-08, "logits/chosen": -1.917236089706421, "logits/rejected": -1.553184986114502, "logps/chosen": -195.16546630859375, "logps/rejected": -331.82745361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.374846458435059, "rewards/margins": 15.01300048828125, "rewards/rejected": -24.387847900390625, "step": 5053 }, { "epoch": 8.7, "learning_rate": 1.85932851678708e-08, "logits/chosen": -1.6352059841156006, "logits/rejected": -1.7979376316070557, "logps/chosen": -147.9012451171875, "logps/rejected": -325.7139587402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.670818328857422, "rewards/margins": 17.378536224365234, "rewards/rejected": -25.049354553222656, "step": 5054 }, { "epoch": 8.7, "learning_rate": 1.8487037824054397e-08, "logits/chosen": -1.9312396049499512, "logits/rejected": -1.8219772577285767, "logps/chosen": -114.762451171875, "logps/rejected": -268.33587646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.373656272888184, "rewards/margins": 15.026518821716309, "rewards/rejected": -19.400175094604492, "step": 5055 }, { "epoch": 8.7, "learning_rate": 1.8380790480237992e-08, "logits/chosen": -1.920208215713501, "logits/rejected": -1.8335752487182617, "logps/chosen": -168.00225830078125, "logps/rejected": -333.4328918457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.094480514526367, "rewards/margins": 16.127805709838867, "rewards/rejected": -24.222288131713867, "step": 5056 }, { "epoch": 8.7, "learning_rate": 1.8274543136421588e-08, "logits/chosen": -1.7456867694854736, "logits/rejected": -1.8702950477600098, "logps/chosen": -115.453369140625, "logps/rejected": -350.1239318847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.386886119842529, "rewards/margins": 21.379053115844727, "rewards/rejected": -26.765939712524414, "step": 5057 }, { "epoch": 8.71, "learning_rate": 1.8168295792605183e-08, "logits/chosen": -1.6154417991638184, "logits/rejected": -1.6287914514541626, "logps/chosen": -192.39303588867188, "logps/rejected": -339.97802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.588587760925293, "rewards/margins": 13.693153381347656, "rewards/rejected": -24.281742095947266, "step": 5058 }, { "epoch": 8.71, "learning_rate": 1.806204844878878e-08, "logits/chosen": -1.6769375801086426, "logits/rejected": -1.909898281097412, "logps/chosen": -159.00262451171875, "logps/rejected": -319.63531494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.477113723754883, "rewards/margins": 16.3636417388916, "rewards/rejected": -21.840755462646484, "step": 5059 }, { "epoch": 8.71, "learning_rate": 1.7955801104972374e-08, "logits/chosen": -1.727454662322998, "logits/rejected": -1.7768945693969727, "logps/chosen": -143.70872497558594, "logps/rejected": -312.68414306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.292364120483398, "rewards/margins": 16.750059127807617, "rewards/rejected": -22.042423248291016, "step": 5060 }, { "epoch": 8.71, "learning_rate": 1.784955376115597e-08, "logits/chosen": -1.6221892833709717, "logits/rejected": -1.7816216945648193, "logps/chosen": -127.54827880859375, "logps/rejected": -290.7102966308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.999211311340332, "rewards/margins": 16.392322540283203, "rewards/rejected": -21.39153480529785, "step": 5061 }, { "epoch": 8.71, "learning_rate": 1.7743306417339566e-08, "logits/chosen": -1.9017677307128906, "logits/rejected": -1.794015645980835, "logps/chosen": -138.78121948242188, "logps/rejected": -296.9815673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.267794609069824, "rewards/margins": 14.64403247833252, "rewards/rejected": -19.911827087402344, "step": 5062 }, { "epoch": 8.71, "learning_rate": 1.763705907352316e-08, "logits/chosen": -1.7332215309143066, "logits/rejected": -1.8086425065994263, "logps/chosen": -169.078125, "logps/rejected": -332.0762023925781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.285758972167969, "rewards/margins": 17.600879669189453, "rewards/rejected": -23.886640548706055, "step": 5063 }, { "epoch": 8.72, "learning_rate": 1.7530811729706757e-08, "logits/chosen": -1.5668836832046509, "logits/rejected": -1.615527868270874, "logps/chosen": -134.41932678222656, "logps/rejected": -293.119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.177550792694092, "rewards/margins": 15.34815788269043, "rewards/rejected": -21.52570915222168, "step": 5064 }, { "epoch": 8.72, "learning_rate": 1.7424564385890352e-08, "logits/chosen": -1.7874003648757935, "logits/rejected": -1.5398130416870117, "logps/chosen": -179.28726196289062, "logps/rejected": -308.1258850097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.685517311096191, "rewards/margins": 16.35135269165039, "rewards/rejected": -23.03687286376953, "step": 5065 }, { "epoch": 8.72, "learning_rate": 1.7318317042073948e-08, "logits/chosen": -1.6478084325790405, "logits/rejected": -2.0479323863983154, "logps/chosen": -163.6251220703125, "logps/rejected": -347.53594970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.105752944946289, "rewards/margins": 15.602657318115234, "rewards/rejected": -23.708410263061523, "step": 5066 }, { "epoch": 8.72, "learning_rate": 1.721206969825754e-08, "logits/chosen": -1.9878543615341187, "logits/rejected": -1.7821815013885498, "logps/chosen": -214.4417724609375, "logps/rejected": -331.37664794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.121459007263184, "rewards/margins": 12.779987335205078, "rewards/rejected": -23.901445388793945, "step": 5067 }, { "epoch": 8.72, "learning_rate": 1.710582235444114e-08, "logits/chosen": -1.6963036060333252, "logits/rejected": -1.7451789379119873, "logps/chosen": -135.80828857421875, "logps/rejected": -299.5716247558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.843070983886719, "rewards/margins": 15.790847778320312, "rewards/rejected": -21.63391876220703, "step": 5068 }, { "epoch": 8.72, "learning_rate": 1.699957501062473e-08, "logits/chosen": -1.7867732048034668, "logits/rejected": -1.4169691801071167, "logps/chosen": -189.6984100341797, "logps/rejected": -329.3565673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.176105499267578, "rewards/margins": 14.802714347839355, "rewards/rejected": -24.978818893432617, "step": 5069 }, { "epoch": 8.73, "learning_rate": 1.689332766680833e-08, "logits/chosen": -1.7773292064666748, "logits/rejected": -1.9539512395858765, "logps/chosen": -154.2607879638672, "logps/rejected": -318.44647216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.401901245117188, "rewards/margins": 16.40080451965332, "rewards/rejected": -24.80270767211914, "step": 5070 }, { "epoch": 8.73, "learning_rate": 1.6787080322991923e-08, "logits/chosen": -1.8211541175842285, "logits/rejected": -1.8502246141433716, "logps/chosen": -153.92306518554688, "logps/rejected": -303.7844543457031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.934137344360352, "rewards/margins": 15.501896858215332, "rewards/rejected": -22.43603515625, "step": 5071 }, { "epoch": 8.73, "learning_rate": 1.668083297917552e-08, "logits/chosen": -1.8050652742385864, "logits/rejected": -1.660787582397461, "logps/chosen": -152.36163330078125, "logps/rejected": -274.1200256347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.468404293060303, "rewards/margins": 11.724974632263184, "rewards/rejected": -19.193378448486328, "step": 5072 }, { "epoch": 8.73, "learning_rate": 1.6574585635359114e-08, "logits/chosen": -1.901275396347046, "logits/rejected": -2.022618532180786, "logps/chosen": -177.43939208984375, "logps/rejected": -351.09185791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.716452598571777, "rewards/margins": 16.879175186157227, "rewards/rejected": -24.59562873840332, "step": 5073 }, { "epoch": 8.73, "learning_rate": 1.6468338291542713e-08, "logits/chosen": -1.9359488487243652, "logits/rejected": -1.9718492031097412, "logps/chosen": -143.34323120117188, "logps/rejected": -278.0721435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.58176326751709, "rewards/margins": 13.335918426513672, "rewards/rejected": -19.917680740356445, "step": 5074 }, { "epoch": 8.73, "learning_rate": 1.6362090947726305e-08, "logits/chosen": -1.687309741973877, "logits/rejected": -1.9682947397232056, "logps/chosen": -183.0157012939453, "logps/rejected": -330.1340026855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.07565689086914, "rewards/margins": 13.592323303222656, "rewards/rejected": -23.66798210144043, "step": 5075 }, { "epoch": 8.74, "learning_rate": 1.6255843603909904e-08, "logits/chosen": -1.8493432998657227, "logits/rejected": -1.439064860343933, "logps/chosen": -143.15403747558594, "logps/rejected": -295.25189208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.980513572692871, "rewards/margins": 17.158946990966797, "rewards/rejected": -23.13945960998535, "step": 5076 }, { "epoch": 8.74, "learning_rate": 1.6149596260093496e-08, "logits/chosen": -1.871880292892456, "logits/rejected": -1.8266866207122803, "logps/chosen": -159.20657348632812, "logps/rejected": -270.490966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.155360221862793, "rewards/margins": 11.705652236938477, "rewards/rejected": -18.861011505126953, "step": 5077 }, { "epoch": 8.74, "learning_rate": 1.6043348916277095e-08, "logits/chosen": -2.038705348968506, "logits/rejected": -1.7485716342926025, "logps/chosen": -159.06002807617188, "logps/rejected": -278.752685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.159573554992676, "rewards/margins": 13.80841064453125, "rewards/rejected": -19.967985153198242, "step": 5078 }, { "epoch": 8.74, "learning_rate": 1.5937101572460687e-08, "logits/chosen": -1.89127516746521, "logits/rejected": -1.7825329303741455, "logps/chosen": -135.38352966308594, "logps/rejected": -312.93896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.305086135864258, "rewards/margins": 18.205337524414062, "rewards/rejected": -23.51042366027832, "step": 5079 }, { "epoch": 8.74, "learning_rate": 1.5830854228644283e-08, "logits/chosen": -2.031437873840332, "logits/rejected": -1.5719578266143799, "logps/chosen": -172.43661499023438, "logps/rejected": -274.7508544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.58632230758667, "rewards/margins": 11.936918258666992, "rewards/rejected": -19.52324104309082, "step": 5080 }, { "epoch": 8.75, "learning_rate": 1.572460688482788e-08, "logits/chosen": -1.9127696752548218, "logits/rejected": -1.490683913230896, "logps/chosen": -168.46450805664062, "logps/rejected": -282.8714294433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.196880340576172, "rewards/margins": 12.060005187988281, "rewards/rejected": -20.25688362121582, "step": 5081 }, { "epoch": 8.75, "learning_rate": 1.5618359541011474e-08, "logits/chosen": -1.8354326486587524, "logits/rejected": -1.5264103412628174, "logps/chosen": -137.63568115234375, "logps/rejected": -280.6382751464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.24570369720459, "rewards/margins": 14.350930213928223, "rewards/rejected": -19.596633911132812, "step": 5082 }, { "epoch": 8.75, "learning_rate": 1.551211219719507e-08, "logits/chosen": -2.0196707248687744, "logits/rejected": -1.8489112854003906, "logps/chosen": -99.95189666748047, "logps/rejected": -280.56842041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.12395977973938, "rewards/margins": 19.167722702026367, "rewards/rejected": -21.291685104370117, "step": 5083 }, { "epoch": 8.75, "learning_rate": 1.5405864853378665e-08, "logits/chosen": -1.9991123676300049, "logits/rejected": -1.9993858337402344, "logps/chosen": -170.54978942871094, "logps/rejected": -324.2530822753906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.661177635192871, "rewards/margins": 14.728089332580566, "rewards/rejected": -22.38926887512207, "step": 5084 }, { "epoch": 8.75, "learning_rate": 1.529961750956226e-08, "logits/chosen": -1.6305980682373047, "logits/rejected": -2.0266942977905273, "logps/chosen": -171.01454162597656, "logps/rejected": -338.835693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.222980499267578, "rewards/margins": 15.453520774841309, "rewards/rejected": -24.676502227783203, "step": 5085 }, { "epoch": 8.75, "learning_rate": 1.5193370165745857e-08, "logits/chosen": -1.6640346050262451, "logits/rejected": -1.6953401565551758, "logps/chosen": -144.51773071289062, "logps/rejected": -343.6298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.202431678771973, "rewards/margins": 19.42706298828125, "rewards/rejected": -24.62949562072754, "step": 5086 }, { "epoch": 8.76, "learning_rate": 1.5087122821929452e-08, "logits/chosen": -2.0092177391052246, "logits/rejected": -1.8160839080810547, "logps/chosen": -150.28933715820312, "logps/rejected": -292.5481872558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.632650375366211, "rewards/margins": 14.437360763549805, "rewards/rejected": -23.070011138916016, "step": 5087 }, { "epoch": 8.76, "learning_rate": 1.4980875478113048e-08, "logits/chosen": -1.9273314476013184, "logits/rejected": -1.4813551902770996, "logps/chosen": -182.2752685546875, "logps/rejected": -315.497802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.037062644958496, "rewards/margins": 15.550281524658203, "rewards/rejected": -23.587343215942383, "step": 5088 }, { "epoch": 8.76, "learning_rate": 1.4874628134296642e-08, "logits/chosen": -1.8202003240585327, "logits/rejected": -2.0176219940185547, "logps/chosen": -160.77162170410156, "logps/rejected": -298.5633850097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.515628814697266, "rewards/margins": 13.58602237701416, "rewards/rejected": -19.10165023803711, "step": 5089 }, { "epoch": 8.76, "learning_rate": 1.4768380790480237e-08, "logits/chosen": -1.745739221572876, "logits/rejected": -1.6477805376052856, "logps/chosen": -153.51040649414062, "logps/rejected": -286.8930969238281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.739595413208008, "rewards/margins": 14.433659553527832, "rewards/rejected": -20.173255920410156, "step": 5090 }, { "epoch": 8.76, "learning_rate": 1.4662133446663833e-08, "logits/chosen": -1.5891681909561157, "logits/rejected": -2.0516357421875, "logps/chosen": -188.73269653320312, "logps/rejected": -330.124755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.009374618530273, "rewards/margins": 15.271097183227539, "rewards/rejected": -23.280471801757812, "step": 5091 }, { "epoch": 8.76, "learning_rate": 1.4555886102847429e-08, "logits/chosen": -1.8729196786880493, "logits/rejected": -1.8265454769134521, "logps/chosen": -129.05709838867188, "logps/rejected": -297.6117248535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.245632171630859, "rewards/margins": 16.467296600341797, "rewards/rejected": -21.712928771972656, "step": 5092 }, { "epoch": 8.77, "learning_rate": 1.4449638759031024e-08, "logits/chosen": -1.947113275527954, "logits/rejected": -1.9812650680541992, "logps/chosen": -167.35165405273438, "logps/rejected": -290.4667663574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0232720375061035, "rewards/margins": 13.798887252807617, "rewards/rejected": -20.822158813476562, "step": 5093 }, { "epoch": 8.77, "learning_rate": 1.434339141521462e-08, "logits/chosen": -1.3923768997192383, "logits/rejected": -1.8427783250808716, "logps/chosen": -174.91622924804688, "logps/rejected": -336.2298278808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.311334609985352, "rewards/margins": 14.69532585144043, "rewards/rejected": -24.00665855407715, "step": 5094 }, { "epoch": 8.77, "learning_rate": 1.4237144071398215e-08, "logits/chosen": -2.0884323120117188, "logits/rejected": -2.0144410133361816, "logps/chosen": -170.4797821044922, "logps/rejected": -318.69775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.293732643127441, "rewards/margins": 12.555481910705566, "rewards/rejected": -20.849214553833008, "step": 5095 }, { "epoch": 8.77, "learning_rate": 1.4130896727581811e-08, "logits/chosen": -1.5452609062194824, "logits/rejected": -1.9161043167114258, "logps/chosen": -179.41500854492188, "logps/rejected": -331.1974182128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.782774925231934, "rewards/margins": 15.171821594238281, "rewards/rejected": -24.95459747314453, "step": 5096 }, { "epoch": 8.77, "learning_rate": 1.4024649383765405e-08, "logits/chosen": -1.8656197786331177, "logits/rejected": -1.65232515335083, "logps/chosen": -188.20968627929688, "logps/rejected": -311.53302001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.882930755615234, "rewards/margins": 14.766966819763184, "rewards/rejected": -23.6498966217041, "step": 5097 }, { "epoch": 8.77, "learning_rate": 1.3918402039949e-08, "logits/chosen": -1.4285602569580078, "logits/rejected": -2.0428273677825928, "logps/chosen": -146.05032348632812, "logps/rejected": -357.8377685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.349498748779297, "rewards/margins": 18.669357299804688, "rewards/rejected": -26.018856048583984, "step": 5098 }, { "epoch": 8.78, "learning_rate": 1.3812154696132596e-08, "logits/chosen": -1.9291651248931885, "logits/rejected": -1.9878110885620117, "logps/chosen": -168.44955444335938, "logps/rejected": -304.900146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.168169021606445, "rewards/margins": 13.535396575927734, "rewards/rejected": -21.70356559753418, "step": 5099 }, { "epoch": 8.78, "learning_rate": 1.3705907352316192e-08, "logits/chosen": -1.9489538669586182, "logits/rejected": -1.902221918106079, "logps/chosen": -161.37655639648438, "logps/rejected": -321.15008544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.023908615112305, "rewards/margins": 15.29739761352539, "rewards/rejected": -23.321306228637695, "step": 5100 }, { "epoch": 8.78, "learning_rate": 1.3599660008499787e-08, "logits/chosen": -1.8983451128005981, "logits/rejected": -1.6548789739608765, "logps/chosen": -170.5453338623047, "logps/rejected": -314.2768859863281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.870124816894531, "rewards/margins": 15.754470825195312, "rewards/rejected": -22.624595642089844, "step": 5101 }, { "epoch": 8.78, "learning_rate": 1.3493412664683383e-08, "logits/chosen": -1.5320578813552856, "logits/rejected": -1.9473116397857666, "logps/chosen": -181.98143005371094, "logps/rejected": -352.67205810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.908682823181152, "rewards/margins": 16.513965606689453, "rewards/rejected": -25.42264747619629, "step": 5102 }, { "epoch": 8.78, "learning_rate": 1.3387165320866978e-08, "logits/chosen": -1.9779837131500244, "logits/rejected": -1.9658372402191162, "logps/chosen": -169.11814880371094, "logps/rejected": -307.2462158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.474706649780273, "rewards/margins": 14.020605087280273, "rewards/rejected": -21.495311737060547, "step": 5103 }, { "epoch": 8.78, "learning_rate": 1.3280917977050574e-08, "logits/chosen": -1.9646828174591064, "logits/rejected": -1.8279253244400024, "logps/chosen": -151.5119171142578, "logps/rejected": -293.78515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.238054275512695, "rewards/margins": 14.790699005126953, "rewards/rejected": -20.02875328063965, "step": 5104 }, { "epoch": 8.79, "learning_rate": 1.317467063323417e-08, "logits/chosen": -1.8976788520812988, "logits/rejected": -1.560383677482605, "logps/chosen": -152.6102752685547, "logps/rejected": -295.48370361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.622053146362305, "rewards/margins": 16.20758056640625, "rewards/rejected": -22.829631805419922, "step": 5105 }, { "epoch": 8.79, "learning_rate": 1.3068423289417765e-08, "logits/chosen": -1.6966792345046997, "logits/rejected": -1.8350071907043457, "logps/chosen": -160.272216796875, "logps/rejected": -307.4116516113281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.966253757476807, "rewards/margins": 15.413427352905273, "rewards/rejected": -22.379680633544922, "step": 5106 }, { "epoch": 8.79, "learning_rate": 1.2962175945601359e-08, "logits/chosen": -1.4961495399475098, "logits/rejected": -1.8643068075180054, "logps/chosen": -131.39805603027344, "logps/rejected": -294.8462829589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.221270561218262, "rewards/margins": 15.261252403259277, "rewards/rejected": -21.482521057128906, "step": 5107 }, { "epoch": 8.79, "learning_rate": 1.2855928601784955e-08, "logits/chosen": -1.7848118543624878, "logits/rejected": -1.7600144147872925, "logps/chosen": -127.24163818359375, "logps/rejected": -231.41578674316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.355555534362793, "rewards/margins": 10.875812530517578, "rewards/rejected": -17.231367111206055, "step": 5108 }, { "epoch": 8.79, "learning_rate": 1.274968125796855e-08, "logits/chosen": -1.6409835815429688, "logits/rejected": -2.036245346069336, "logps/chosen": -143.6110382080078, "logps/rejected": -307.9945373535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.732753276824951, "rewards/margins": 14.874290466308594, "rewards/rejected": -21.607044219970703, "step": 5109 }, { "epoch": 8.8, "learning_rate": 1.2643433914152144e-08, "logits/chosen": -1.5842993259429932, "logits/rejected": -1.8451324701309204, "logps/chosen": -174.96214294433594, "logps/rejected": -336.6701354980469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.093992233276367, "rewards/margins": 15.360136985778809, "rewards/rejected": -23.45412826538086, "step": 5110 }, { "epoch": 8.8, "learning_rate": 1.253718657033574e-08, "logits/chosen": -1.7202215194702148, "logits/rejected": -1.8742825984954834, "logps/chosen": -149.83514404296875, "logps/rejected": -325.3536071777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.276528358459473, "rewards/margins": 16.660114288330078, "rewards/rejected": -23.9366397857666, "step": 5111 }, { "epoch": 8.8, "learning_rate": 1.2430939226519335e-08, "logits/chosen": -1.7217594385147095, "logits/rejected": -1.8518730401992798, "logps/chosen": -161.36013793945312, "logps/rejected": -345.88653564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.315610885620117, "rewards/margins": 16.48640251159668, "rewards/rejected": -24.802013397216797, "step": 5112 }, { "epoch": 8.8, "learning_rate": 1.2324691882702931e-08, "logits/chosen": -1.7445430755615234, "logits/rejected": -1.9542251825332642, "logps/chosen": -124.822998046875, "logps/rejected": -282.25213623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0113420486450195, "rewards/margins": 14.38268756866455, "rewards/rejected": -20.39402961730957, "step": 5113 }, { "epoch": 8.8, "learning_rate": 1.2218444538886527e-08, "logits/chosen": -1.868342638015747, "logits/rejected": -1.7776716947555542, "logps/chosen": -148.82382202148438, "logps/rejected": -300.9479675292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.769967555999756, "rewards/margins": 15.662500381469727, "rewards/rejected": -21.432466506958008, "step": 5114 }, { "epoch": 8.8, "learning_rate": 1.2112197195070122e-08, "logits/chosen": -1.7138687372207642, "logits/rejected": -1.7360608577728271, "logps/chosen": -170.0009765625, "logps/rejected": -294.2832336425781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.62229061126709, "rewards/margins": 13.156213760375977, "rewards/rejected": -21.778505325317383, "step": 5115 }, { "epoch": 8.81, "learning_rate": 1.2005949851253718e-08, "logits/chosen": -1.8442884683609009, "logits/rejected": -1.7231098413467407, "logps/chosen": -144.6116943359375, "logps/rejected": -270.093505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.37393045425415, "rewards/margins": 13.353120803833008, "rewards/rejected": -19.72705078125, "step": 5116 }, { "epoch": 8.81, "learning_rate": 1.1899702507437313e-08, "logits/chosen": -1.725132942199707, "logits/rejected": -1.643538236618042, "logps/chosen": -125.21089172363281, "logps/rejected": -302.5862121582031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.397094249725342, "rewards/margins": 17.550058364868164, "rewards/rejected": -22.947154998779297, "step": 5117 }, { "epoch": 8.81, "learning_rate": 1.1793455163620909e-08, "logits/chosen": -1.9971332550048828, "logits/rejected": -1.2443701028823853, "logps/chosen": -168.8614044189453, "logps/rejected": -311.9103088378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.691713333129883, "rewards/margins": 15.94752025604248, "rewards/rejected": -22.63923454284668, "step": 5118 }, { "epoch": 8.81, "learning_rate": 1.1687207819804505e-08, "logits/chosen": -1.975019097328186, "logits/rejected": -1.9957778453826904, "logps/chosen": -167.73818969726562, "logps/rejected": -343.64459228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.441507339477539, "rewards/margins": 17.33110809326172, "rewards/rejected": -23.77261734008789, "step": 5119 }, { "epoch": 8.81, "learning_rate": 1.15809604759881e-08, "logits/chosen": -1.8267290592193604, "logits/rejected": -1.6451284885406494, "logps/chosen": -147.65847778320312, "logps/rejected": -280.536865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.525209426879883, "rewards/margins": 13.62990951538086, "rewards/rejected": -20.155120849609375, "step": 5120 }, { "epoch": 8.81, "learning_rate": 1.1474713132171696e-08, "logits/chosen": -1.929002046585083, "logits/rejected": -1.7708054780960083, "logps/chosen": -147.83349609375, "logps/rejected": -314.09375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.868892192840576, "rewards/margins": 15.854757308959961, "rewards/rejected": -22.723649978637695, "step": 5121 }, { "epoch": 8.82, "learning_rate": 1.1368465788355291e-08, "logits/chosen": -1.9931831359863281, "logits/rejected": -1.909231185913086, "logps/chosen": -158.64208984375, "logps/rejected": -335.22930908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.227845191955566, "rewards/margins": 18.091705322265625, "rewards/rejected": -24.319551467895508, "step": 5122 }, { "epoch": 8.82, "learning_rate": 1.1262218444538885e-08, "logits/chosen": -1.8895580768585205, "logits/rejected": -1.9402971267700195, "logps/chosen": -146.38775634765625, "logps/rejected": -284.1194152832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.4402971267700195, "rewards/margins": 12.884443283081055, "rewards/rejected": -20.324739456176758, "step": 5123 }, { "epoch": 8.82, "learning_rate": 1.1155971100722481e-08, "logits/chosen": -1.6542580127716064, "logits/rejected": -1.6811983585357666, "logps/chosen": -191.57254028320312, "logps/rejected": -323.3590087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.704207420349121, "rewards/margins": 12.765085220336914, "rewards/rejected": -23.46929168701172, "step": 5124 }, { "epoch": 8.82, "learning_rate": 1.1049723756906076e-08, "logits/chosen": -1.4350085258483887, "logits/rejected": -1.8793842792510986, "logps/chosen": -161.326171875, "logps/rejected": -357.54998779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.526434898376465, "rewards/margins": 15.95131778717041, "rewards/rejected": -25.477754592895508, "step": 5125 }, { "epoch": 8.82, "learning_rate": 1.0943476413089672e-08, "logits/chosen": -1.7381865978240967, "logits/rejected": -1.9079906940460205, "logps/chosen": -207.96774291992188, "logps/rejected": -337.07098388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.494731903076172, "rewards/margins": 14.357401847839355, "rewards/rejected": -23.85213279724121, "step": 5126 }, { "epoch": 8.82, "learning_rate": 1.0837229069273268e-08, "logits/chosen": -1.7486836910247803, "logits/rejected": -1.9097294807434082, "logps/chosen": -127.21305847167969, "logps/rejected": -282.3432922363281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.55596399307251, "rewards/margins": 14.928473472595215, "rewards/rejected": -20.48443603515625, "step": 5127 }, { "epoch": 8.83, "learning_rate": 1.0730981725456863e-08, "logits/chosen": -1.8126496076583862, "logits/rejected": -1.9500480890274048, "logps/chosen": -181.1012420654297, "logps/rejected": -337.87286376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.875555038452148, "rewards/margins": 16.257247924804688, "rewards/rejected": -25.13280487060547, "step": 5128 }, { "epoch": 8.83, "learning_rate": 1.0624734381640459e-08, "logits/chosen": -1.6969784498214722, "logits/rejected": -2.057455539703369, "logps/chosen": -172.9119110107422, "logps/rejected": -342.1739501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.740180015563965, "rewards/margins": 16.365449905395508, "rewards/rejected": -25.105628967285156, "step": 5129 }, { "epoch": 8.83, "learning_rate": 1.0518487037824054e-08, "logits/chosen": -1.941587209701538, "logits/rejected": -1.8293111324310303, "logps/chosen": -152.04586791992188, "logps/rejected": -318.4990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.817866325378418, "rewards/margins": 17.65479850769043, "rewards/rejected": -24.472665786743164, "step": 5130 }, { "epoch": 8.83, "learning_rate": 1.041223969400765e-08, "logits/chosen": -1.6898341178894043, "logits/rejected": -1.9243266582489014, "logps/chosen": -131.34588623046875, "logps/rejected": -304.9228210449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.351226806640625, "rewards/margins": 15.89791488647461, "rewards/rejected": -22.249141693115234, "step": 5131 }, { "epoch": 8.83, "learning_rate": 1.0305992350191246e-08, "logits/chosen": -1.8772162199020386, "logits/rejected": -1.949256181716919, "logps/chosen": -166.22210693359375, "logps/rejected": -334.9818420410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.1363115310668945, "rewards/margins": 17.56330108642578, "rewards/rejected": -24.699613571166992, "step": 5132 }, { "epoch": 8.83, "learning_rate": 1.0199745006374841e-08, "logits/chosen": -1.7879798412322998, "logits/rejected": -1.8289618492126465, "logps/chosen": -167.69522094726562, "logps/rejected": -317.7095947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.139413833618164, "rewards/margins": 13.60319709777832, "rewards/rejected": -21.742610931396484, "step": 5133 }, { "epoch": 8.84, "learning_rate": 1.0093497662558437e-08, "logits/chosen": -1.7536245584487915, "logits/rejected": -2.0520691871643066, "logps/chosen": -122.64395141601562, "logps/rejected": -325.6132507324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.314221382141113, "rewards/margins": 19.221153259277344, "rewards/rejected": -24.53537368774414, "step": 5134 }, { "epoch": 8.84, "learning_rate": 9.98725031874203e-09, "logits/chosen": -1.6367567777633667, "logits/rejected": -1.5516929626464844, "logps/chosen": -134.00218200683594, "logps/rejected": -309.5657958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.608243942260742, "rewards/margins": 17.306367874145508, "rewards/rejected": -22.91461181640625, "step": 5135 }, { "epoch": 8.84, "learning_rate": 9.881002974925626e-09, "logits/chosen": -1.7461521625518799, "logits/rejected": -1.911982774734497, "logps/chosen": -179.81051635742188, "logps/rejected": -353.2294006347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.238292694091797, "rewards/margins": 15.148283958435059, "rewards/rejected": -25.386577606201172, "step": 5136 }, { "epoch": 8.84, "learning_rate": 9.774755631109222e-09, "logits/chosen": -1.7757476568222046, "logits/rejected": -1.9011310338974, "logps/chosen": -138.25225830078125, "logps/rejected": -330.7261047363281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.625301361083984, "rewards/margins": 17.104585647583008, "rewards/rejected": -22.72988510131836, "step": 5137 }, { "epoch": 8.84, "learning_rate": 9.668508287292818e-09, "logits/chosen": -1.8025797605514526, "logits/rejected": -1.8306910991668701, "logps/chosen": -165.10928344726562, "logps/rejected": -321.4870910644531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.955682754516602, "rewards/margins": 15.545916557312012, "rewards/rejected": -23.501598358154297, "step": 5138 }, { "epoch": 8.85, "learning_rate": 9.562260943476413e-09, "logits/chosen": -1.8571279048919678, "logits/rejected": -1.719645380973816, "logps/chosen": -125.19622802734375, "logps/rejected": -249.69491577148438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.380727291107178, "rewards/margins": 12.758057594299316, "rewards/rejected": -18.138784408569336, "step": 5139 }, { "epoch": 8.85, "learning_rate": 9.456013599660009e-09, "logits/chosen": -1.9258440732955933, "logits/rejected": -1.5803561210632324, "logps/chosen": -235.0081787109375, "logps/rejected": -347.070068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.3414888381958, "rewards/margins": 14.412949562072754, "rewards/rejected": -26.754438400268555, "step": 5140 }, { "epoch": 8.85, "learning_rate": 9.349766255843604e-09, "logits/chosen": -1.984351396560669, "logits/rejected": -2.174934148788452, "logps/chosen": -121.81574249267578, "logps/rejected": -295.639404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.170570373535156, "rewards/margins": 17.103315353393555, "rewards/rejected": -21.273887634277344, "step": 5141 }, { "epoch": 8.85, "learning_rate": 9.243518912027198e-09, "logits/chosen": -1.6523072719573975, "logits/rejected": -1.9182429313659668, "logps/chosen": -143.59230041503906, "logps/rejected": -317.1231689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.631495475769043, "rewards/margins": 17.068340301513672, "rewards/rejected": -22.69983673095703, "step": 5142 }, { "epoch": 8.85, "learning_rate": 9.137271568210794e-09, "logits/chosen": -1.4962373971939087, "logits/rejected": -1.7874443531036377, "logps/chosen": -122.2913589477539, "logps/rejected": -310.07012939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.761523723602295, "rewards/margins": 15.984725952148438, "rewards/rejected": -21.74625015258789, "step": 5143 }, { "epoch": 8.85, "learning_rate": 9.03102422439439e-09, "logits/chosen": -1.8509368896484375, "logits/rejected": -1.8996118307113647, "logps/chosen": -161.36553955078125, "logps/rejected": -373.08917236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.6234941482543945, "rewards/margins": 18.46055030822754, "rewards/rejected": -26.08404541015625, "step": 5144 }, { "epoch": 8.86, "learning_rate": 8.924776880577985e-09, "logits/chosen": -1.9614481925964355, "logits/rejected": -1.6405885219573975, "logps/chosen": -174.89065551757812, "logps/rejected": -298.3022155761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.085291862487793, "rewards/margins": 12.585113525390625, "rewards/rejected": -20.6704044342041, "step": 5145 }, { "epoch": 8.86, "learning_rate": 8.81852953676158e-09, "logits/chosen": -1.9143935441970825, "logits/rejected": -1.846761703491211, "logps/chosen": -142.86276245117188, "logps/rejected": -293.7156982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.577046871185303, "rewards/margins": 15.211148262023926, "rewards/rejected": -20.78819465637207, "step": 5146 }, { "epoch": 8.86, "learning_rate": 8.712282192945176e-09, "logits/chosen": -2.0384483337402344, "logits/rejected": -1.8726344108581543, "logps/chosen": -134.97756958007812, "logps/rejected": -279.6932678222656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.611375331878662, "rewards/margins": 13.849058151245117, "rewards/rejected": -20.460433959960938, "step": 5147 }, { "epoch": 8.86, "learning_rate": 8.60603484912877e-09, "logits/chosen": -1.8053594827651978, "logits/rejected": -1.6632089614868164, "logps/chosen": -143.76626586914062, "logps/rejected": -313.6593322753906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.824347496032715, "rewards/margins": 17.30070686340332, "rewards/rejected": -23.12505340576172, "step": 5148 }, { "epoch": 8.86, "learning_rate": 8.499787505312366e-09, "logits/chosen": -1.879136085510254, "logits/rejected": -1.6189875602722168, "logps/chosen": -188.93301391601562, "logps/rejected": -291.7893371582031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.769702911376953, "rewards/margins": 12.606077194213867, "rewards/rejected": -22.37578010559082, "step": 5149 }, { "epoch": 8.86, "learning_rate": 8.393540161495961e-09, "logits/chosen": -1.5578131675720215, "logits/rejected": -1.931682825088501, "logps/chosen": -144.18832397460938, "logps/rejected": -331.31536865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.034817695617676, "rewards/margins": 17.580894470214844, "rewards/rejected": -24.615713119506836, "step": 5150 }, { "epoch": 8.87, "learning_rate": 8.287292817679557e-09, "logits/chosen": -2.129166603088379, "logits/rejected": -1.9999643564224243, "logps/chosen": -130.27020263671875, "logps/rejected": -314.3291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.1496124267578125, "rewards/margins": 17.082901000976562, "rewards/rejected": -22.232515335083008, "step": 5151 }, { "epoch": 8.87, "learning_rate": 8.181045473863153e-09, "logits/chosen": -1.6572980880737305, "logits/rejected": -1.6300673484802246, "logps/chosen": -194.4476776123047, "logps/rejected": -355.03033447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.329612731933594, "rewards/margins": 16.005325317382812, "rewards/rejected": -25.334938049316406, "step": 5152 }, { "epoch": 8.87, "learning_rate": 8.074798130046748e-09, "logits/chosen": -1.8181087970733643, "logits/rejected": -1.8860046863555908, "logps/chosen": -130.57655334472656, "logps/rejected": -267.473876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.129127025604248, "rewards/margins": 13.462919235229492, "rewards/rejected": -18.5920467376709, "step": 5153 }, { "epoch": 8.87, "learning_rate": 7.968550786230344e-09, "logits/chosen": -1.7865115404129028, "logits/rejected": -1.7477962970733643, "logps/chosen": -181.32008361816406, "logps/rejected": -343.0885009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.75801944732666, "rewards/margins": 16.236387252807617, "rewards/rejected": -25.994407653808594, "step": 5154 }, { "epoch": 8.87, "learning_rate": 7.86230344241394e-09, "logits/chosen": -1.6268091201782227, "logits/rejected": -1.795646071434021, "logps/chosen": -161.56936645507812, "logps/rejected": -305.17242431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.867753028869629, "rewards/margins": 15.542383193969727, "rewards/rejected": -22.410137176513672, "step": 5155 }, { "epoch": 8.87, "learning_rate": 7.756056098597535e-09, "logits/chosen": -1.976813554763794, "logits/rejected": -1.8450911045074463, "logps/chosen": -151.16555786132812, "logps/rejected": -318.2745056152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.82733154296875, "rewards/margins": 16.78508758544922, "rewards/rejected": -23.61241912841797, "step": 5156 }, { "epoch": 8.88, "learning_rate": 7.64980875478113e-09, "logits/chosen": -1.881722092628479, "logits/rejected": -1.8843107223510742, "logps/chosen": -149.74514770507812, "logps/rejected": -329.0090637207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.06010103225708, "rewards/margins": 16.991073608398438, "rewards/rejected": -24.05117416381836, "step": 5157 }, { "epoch": 8.88, "learning_rate": 7.543561410964726e-09, "logits/chosen": -1.9214348793029785, "logits/rejected": -1.7370777130126953, "logps/chosen": -163.06918334960938, "logps/rejected": -326.895751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.633376121520996, "rewards/margins": 17.16922378540039, "rewards/rejected": -23.802600860595703, "step": 5158 }, { "epoch": 8.88, "learning_rate": 7.437314067148321e-09, "logits/chosen": -1.9477252960205078, "logits/rejected": -1.5372262001037598, "logps/chosen": -209.9247589111328, "logps/rejected": -315.1658630371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.771578788757324, "rewards/margins": 12.70005989074707, "rewards/rejected": -21.47163963317871, "step": 5159 }, { "epoch": 8.88, "learning_rate": 7.3310667233319165e-09, "logits/chosen": -1.7677977085113525, "logits/rejected": -2.0608537197113037, "logps/chosen": -128.59613037109375, "logps/rejected": -331.5960693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.581386089324951, "rewards/margins": 20.121213912963867, "rewards/rejected": -25.702598571777344, "step": 5160 }, { "epoch": 8.88, "learning_rate": 7.224819379515512e-09, "logits/chosen": -1.6498687267303467, "logits/rejected": -1.844613790512085, "logps/chosen": -182.69369506835938, "logps/rejected": -358.005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.535375595092773, "rewards/margins": 13.663612365722656, "rewards/rejected": -23.19898796081543, "step": 5161 }, { "epoch": 8.88, "learning_rate": 7.118572035699108e-09, "logits/chosen": -1.7864166498184204, "logits/rejected": -1.965010404586792, "logps/chosen": -183.27035522460938, "logps/rejected": -362.4002990722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.530440330505371, "rewards/margins": 16.728073120117188, "rewards/rejected": -27.258512496948242, "step": 5162 }, { "epoch": 8.89, "learning_rate": 7.012324691882702e-09, "logits/chosen": -1.5232380628585815, "logits/rejected": -1.650359869003296, "logps/chosen": -121.55514526367188, "logps/rejected": -290.8444519042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.193055152893066, "rewards/margins": 15.219259262084961, "rewards/rejected": -20.412315368652344, "step": 5163 }, { "epoch": 8.89, "learning_rate": 6.906077348066298e-09, "logits/chosen": -1.9692747592926025, "logits/rejected": -1.9955493211746216, "logps/chosen": -119.64088439941406, "logps/rejected": -327.44927978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4646854400634766, "rewards/margins": 20.232830047607422, "rewards/rejected": -23.6975154876709, "step": 5164 }, { "epoch": 8.89, "learning_rate": 6.799830004249894e-09, "logits/chosen": -1.5969552993774414, "logits/rejected": -1.7817270755767822, "logps/chosen": -133.49502563476562, "logps/rejected": -334.44805908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.091845989227295, "rewards/margins": 19.54281997680664, "rewards/rejected": -24.63466453552246, "step": 5165 }, { "epoch": 8.89, "learning_rate": 6.693582660433489e-09, "logits/chosen": -1.8883006572723389, "logits/rejected": -1.895676851272583, "logps/chosen": -116.93562316894531, "logps/rejected": -271.5621337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.863725662231445, "rewards/margins": 15.972990036010742, "rewards/rejected": -20.836715698242188, "step": 5166 }, { "epoch": 8.89, "learning_rate": 6.587335316617085e-09, "logits/chosen": -1.6920552253723145, "logits/rejected": -1.9722986221313477, "logps/chosen": -197.5398406982422, "logps/rejected": -354.94268798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.542213439941406, "rewards/margins": 14.53749942779541, "rewards/rejected": -25.079713821411133, "step": 5167 }, { "epoch": 8.9, "learning_rate": 6.4810879728006795e-09, "logits/chosen": -1.8044025897979736, "logits/rejected": -1.5916128158569336, "logps/chosen": -182.0612030029297, "logps/rejected": -280.3287353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.463351249694824, "rewards/margins": 11.083703994750977, "rewards/rejected": -20.547056198120117, "step": 5168 }, { "epoch": 8.9, "learning_rate": 6.374840628984275e-09, "logits/chosen": -1.7432955503463745, "logits/rejected": -1.7665221691131592, "logps/chosen": -193.490966796875, "logps/rejected": -355.2242736816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.96540641784668, "rewards/margins": 16.58502197265625, "rewards/rejected": -26.550426483154297, "step": 5169 }, { "epoch": 8.9, "learning_rate": 6.26859328516787e-09, "logits/chosen": -1.7875478267669678, "logits/rejected": -1.7976405620574951, "logps/chosen": -201.9600067138672, "logps/rejected": -331.21441650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.155722618103027, "rewards/margins": 13.587778091430664, "rewards/rejected": -24.743501663208008, "step": 5170 }, { "epoch": 8.9, "learning_rate": 6.1623459413514655e-09, "logits/chosen": -1.8123342990875244, "logits/rejected": -1.807707667350769, "logps/chosen": -151.2231903076172, "logps/rejected": -323.975341796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.720578193664551, "rewards/margins": 16.42776870727539, "rewards/rejected": -24.148345947265625, "step": 5171 }, { "epoch": 8.9, "learning_rate": 6.056098597535061e-09, "logits/chosen": -1.7799482345581055, "logits/rejected": -1.8513089418411255, "logps/chosen": -181.84043884277344, "logps/rejected": -321.4998779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.425271034240723, "rewards/margins": 14.002406120300293, "rewards/rejected": -22.427677154541016, "step": 5172 }, { "epoch": 8.9, "learning_rate": 5.949851253718657e-09, "logits/chosen": -1.7854615449905396, "logits/rejected": -1.7068862915039062, "logps/chosen": -168.7664031982422, "logps/rejected": -349.1248474121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.54660701751709, "rewards/margins": 17.10204315185547, "rewards/rejected": -23.648651123046875, "step": 5173 }, { "epoch": 8.91, "learning_rate": 5.843603909902252e-09, "logits/chosen": -1.9471030235290527, "logits/rejected": -1.6867631673812866, "logps/chosen": -178.43539428710938, "logps/rejected": -286.91229248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.390456199645996, "rewards/margins": 12.75078296661377, "rewards/rejected": -20.1412410736084, "step": 5174 }, { "epoch": 8.91, "learning_rate": 5.737356566085848e-09, "logits/chosen": -1.9675861597061157, "logits/rejected": -1.8483152389526367, "logps/chosen": -137.49478149414062, "logps/rejected": -292.757080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.723905563354492, "rewards/margins": 16.022340774536133, "rewards/rejected": -20.746246337890625, "step": 5175 }, { "epoch": 8.91, "learning_rate": 5.631109222269443e-09, "logits/chosen": -2.0749948024749756, "logits/rejected": -1.801721215248108, "logps/chosen": -134.01959228515625, "logps/rejected": -296.86767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.119082450866699, "rewards/margins": 16.835094451904297, "rewards/rejected": -21.954177856445312, "step": 5176 }, { "epoch": 8.91, "learning_rate": 5.524861878453038e-09, "logits/chosen": -1.68430495262146, "logits/rejected": -1.8061025142669678, "logps/chosen": -198.20809936523438, "logps/rejected": -329.2204895019531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.029149055480957, "rewards/margins": 13.677133560180664, "rewards/rejected": -22.706283569335938, "step": 5177 }, { "epoch": 8.91, "learning_rate": 5.418614534636634e-09, "logits/chosen": -1.6713162660598755, "logits/rejected": -1.9867925643920898, "logps/chosen": -133.47727966308594, "logps/rejected": -288.1445617675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.274353981018066, "rewards/margins": 13.538589477539062, "rewards/rejected": -18.812944412231445, "step": 5178 }, { "epoch": 8.91, "learning_rate": 5.3123671908202294e-09, "logits/chosen": -1.9338569641113281, "logits/rejected": -1.8877874612808228, "logps/chosen": -120.71294403076172, "logps/rejected": -264.7502136230469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.900976181030273, "rewards/margins": 14.289257049560547, "rewards/rejected": -20.190235137939453, "step": 5179 }, { "epoch": 8.92, "learning_rate": 5.206119847003825e-09, "logits/chosen": -1.9213330745697021, "logits/rejected": -1.8765195608139038, "logps/chosen": -174.15309143066406, "logps/rejected": -359.6229248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.634931564331055, "rewards/margins": 18.25945281982422, "rewards/rejected": -26.894386291503906, "step": 5180 }, { "epoch": 8.92, "learning_rate": 5.099872503187421e-09, "logits/chosen": -2.0938520431518555, "logits/rejected": -1.8589959144592285, "logps/chosen": -174.13426208496094, "logps/rejected": -356.1677551269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.265653133392334, "rewards/margins": 17.723005294799805, "rewards/rejected": -24.988658905029297, "step": 5181 }, { "epoch": 8.92, "learning_rate": 4.993625159371015e-09, "logits/chosen": -1.9420135021209717, "logits/rejected": -2.0223169326782227, "logps/chosen": -97.75421142578125, "logps/rejected": -315.0885009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.809013843536377, "rewards/margins": 20.438034057617188, "rewards/rejected": -24.247047424316406, "step": 5182 }, { "epoch": 8.92, "learning_rate": 4.887377815554611e-09, "logits/chosen": -1.629641056060791, "logits/rejected": -1.8193955421447754, "logps/chosen": -163.48550415039062, "logps/rejected": -336.7590026855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.963687419891357, "rewards/margins": 16.232847213745117, "rewards/rejected": -23.196537017822266, "step": 5183 }, { "epoch": 8.92, "learning_rate": 4.7811304717382066e-09, "logits/chosen": -1.978339433670044, "logits/rejected": -1.7257976531982422, "logps/chosen": -132.25125122070312, "logps/rejected": -283.4037170410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.703455448150635, "rewards/margins": 14.888483047485352, "rewards/rejected": -20.591938018798828, "step": 5184 }, { "epoch": 8.92, "learning_rate": 4.674883127921802e-09, "logits/chosen": -1.9001719951629639, "logits/rejected": -1.8452149629592896, "logps/chosen": -151.1146240234375, "logps/rejected": -309.0079345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.757075309753418, "rewards/margins": 15.260706901550293, "rewards/rejected": -22.01778221130371, "step": 5185 }, { "epoch": 8.93, "learning_rate": 4.568635784105397e-09, "logits/chosen": -1.9473025798797607, "logits/rejected": -1.9771289825439453, "logps/chosen": -157.18402099609375, "logps/rejected": -300.6900634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.148944854736328, "rewards/margins": 13.99490737915039, "rewards/rejected": -22.14385223388672, "step": 5186 }, { "epoch": 8.93, "learning_rate": 4.4623884402889925e-09, "logits/chosen": -2.072293281555176, "logits/rejected": -2.0208847522735596, "logps/chosen": -147.6131134033203, "logps/rejected": -283.7937927246094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.832684516906738, "rewards/margins": 14.70268440246582, "rewards/rejected": -20.535369873046875, "step": 5187 }, { "epoch": 8.93, "learning_rate": 4.356141096472588e-09, "logits/chosen": -1.2557512521743774, "logits/rejected": -2.146987199783325, "logps/chosen": -146.00311279296875, "logps/rejected": -298.8902587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.510981559753418, "rewards/margins": 12.752795219421387, "rewards/rejected": -19.263776779174805, "step": 5188 }, { "epoch": 8.93, "learning_rate": 4.249893752656183e-09, "logits/chosen": -1.3612593412399292, "logits/rejected": -1.8634852170944214, "logps/chosen": -181.00225830078125, "logps/rejected": -380.1635437011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.864798545837402, "rewards/margins": 16.846302032470703, "rewards/rejected": -25.711103439331055, "step": 5189 }, { "epoch": 8.93, "learning_rate": 4.1436464088397785e-09, "logits/chosen": -1.4030358791351318, "logits/rejected": -1.9728401899337769, "logps/chosen": -134.29481506347656, "logps/rejected": -358.7220764160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.802290916442871, "rewards/margins": 19.65053939819336, "rewards/rejected": -26.45282745361328, "step": 5190 }, { "epoch": 8.93, "learning_rate": 4.037399065023374e-09, "logits/chosen": -1.7251508235931396, "logits/rejected": -1.887097716331482, "logps/chosen": -147.46957397460938, "logps/rejected": -261.83880615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.582789421081543, "rewards/margins": 11.928398132324219, "rewards/rejected": -18.511186599731445, "step": 5191 }, { "epoch": 8.94, "learning_rate": 3.93115172120697e-09, "logits/chosen": -1.586463212966919, "logits/rejected": -1.8175795078277588, "logps/chosen": -173.17825317382812, "logps/rejected": -358.221435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.23034381866455, "rewards/margins": 17.78943634033203, "rewards/rejected": -26.0197811126709, "step": 5192 }, { "epoch": 8.94, "learning_rate": 3.824904377390565e-09, "logits/chosen": -1.9940162897109985, "logits/rejected": -1.7411179542541504, "logps/chosen": -145.91336059570312, "logps/rejected": -292.6806945800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.507314682006836, "rewards/margins": 15.665156364440918, "rewards/rejected": -22.172470092773438, "step": 5193 }, { "epoch": 8.94, "learning_rate": 3.7186570335741604e-09, "logits/chosen": -1.8920937776565552, "logits/rejected": -1.7644860744476318, "logps/chosen": -174.44776916503906, "logps/rejected": -374.385986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.91294002532959, "rewards/margins": 19.58554458618164, "rewards/rejected": -28.498485565185547, "step": 5194 }, { "epoch": 8.94, "learning_rate": 3.612409689757756e-09, "logits/chosen": -1.8050916194915771, "logits/rejected": -2.0023088455200195, "logps/chosen": -159.30455017089844, "logps/rejected": -333.487548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.213507652282715, "rewards/margins": 16.16574478149414, "rewards/rejected": -23.379253387451172, "step": 5195 }, { "epoch": 8.94, "learning_rate": 3.506162345941351e-09, "logits/chosen": -1.9970028400421143, "logits/rejected": -1.508178472518921, "logps/chosen": -173.04441833496094, "logps/rejected": -342.81719970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.72180700302124, "rewards/margins": 18.43541145324707, "rewards/rejected": -26.15721893310547, "step": 5196 }, { "epoch": 8.94, "learning_rate": 3.399915002124947e-09, "logits/chosen": -1.8881912231445312, "logits/rejected": -1.749389410018921, "logps/chosen": -188.00296020507812, "logps/rejected": -320.9052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.039589881896973, "rewards/margins": 13.478689193725586, "rewards/rejected": -22.518280029296875, "step": 5197 }, { "epoch": 8.95, "learning_rate": 3.2936676583085424e-09, "logits/chosen": -1.810727596282959, "logits/rejected": -1.6820576190948486, "logps/chosen": -140.71286010742188, "logps/rejected": -316.30206298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.615978240966797, "rewards/margins": 18.83883285522461, "rewards/rejected": -24.454811096191406, "step": 5198 }, { "epoch": 8.95, "learning_rate": 3.1874203144921376e-09, "logits/chosen": -1.763041615486145, "logits/rejected": -1.6645448207855225, "logps/chosen": -176.86509704589844, "logps/rejected": -310.12054443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.99772310256958, "rewards/margins": 13.781646728515625, "rewards/rejected": -21.779369354248047, "step": 5199 }, { "epoch": 8.95, "learning_rate": 3.0811729706757328e-09, "logits/chosen": -1.9891107082366943, "logits/rejected": -1.7062911987304688, "logps/chosen": -143.20718383789062, "logps/rejected": -298.7588806152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.672930717468262, "rewards/margins": 16.549766540527344, "rewards/rejected": -22.222698211669922, "step": 5200 }, { "epoch": 8.95, "learning_rate": 2.9749256268593283e-09, "logits/chosen": -1.7980775833129883, "logits/rejected": -1.8455729484558105, "logps/chosen": -174.3728485107422, "logps/rejected": -319.4913330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.180856704711914, "rewards/margins": 14.212987899780273, "rewards/rejected": -22.393842697143555, "step": 5201 }, { "epoch": 8.95, "learning_rate": 2.868678283042924e-09, "logits/chosen": -1.9154422283172607, "logits/rejected": -1.9310152530670166, "logps/chosen": -154.19937133789062, "logps/rejected": -320.74395751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.65013313293457, "rewards/margins": 16.52621078491211, "rewards/rejected": -23.176342010498047, "step": 5202 }, { "epoch": 8.96, "learning_rate": 2.762430939226519e-09, "logits/chosen": -1.7682950496673584, "logits/rejected": -1.6479099988937378, "logps/chosen": -112.34918212890625, "logps/rejected": -280.25091552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1264209747314453, "rewards/margins": 17.194915771484375, "rewards/rejected": -20.32133674621582, "step": 5203 }, { "epoch": 8.96, "learning_rate": 2.6561835954101147e-09, "logits/chosen": -1.8653247356414795, "logits/rejected": -1.943988561630249, "logps/chosen": -118.85395812988281, "logps/rejected": -343.45703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.6534223556518555, "rewards/margins": 20.15768051147461, "rewards/rejected": -24.81110191345215, "step": 5204 }, { "epoch": 8.96, "learning_rate": 2.5499362515937103e-09, "logits/chosen": -2.0478336811065674, "logits/rejected": -1.7120625972747803, "logps/chosen": -185.4133758544922, "logps/rejected": -342.4040222167969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.882965087890625, "rewards/margins": 17.382110595703125, "rewards/rejected": -25.26507568359375, "step": 5205 }, { "epoch": 8.96, "learning_rate": 2.4436889077773055e-09, "logits/chosen": -1.6927717924118042, "logits/rejected": -1.7900935411453247, "logps/chosen": -119.72592163085938, "logps/rejected": -298.92919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.464803695678711, "rewards/margins": 17.258832931518555, "rewards/rejected": -21.7236385345459, "step": 5206 }, { "epoch": 8.96, "learning_rate": 2.337441563960901e-09, "logits/chosen": -1.4406732320785522, "logits/rejected": -1.8392329216003418, "logps/chosen": -128.70748901367188, "logps/rejected": -297.36260986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.081758499145508, "rewards/margins": 14.165794372558594, "rewards/rejected": -20.2475528717041, "step": 5207 }, { "epoch": 8.96, "learning_rate": 2.2311942201444963e-09, "logits/chosen": -1.7912859916687012, "logits/rejected": -1.9853607416152954, "logps/chosen": -177.6638946533203, "logps/rejected": -344.6147155761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.310562133789062, "rewards/margins": 16.10255241394043, "rewards/rejected": -25.41311264038086, "step": 5208 }, { "epoch": 8.97, "learning_rate": 2.1249468763280914e-09, "logits/chosen": -1.4246118068695068, "logits/rejected": -1.8910400867462158, "logps/chosen": -153.9789276123047, "logps/rejected": -320.47760009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.716778755187988, "rewards/margins": 14.96915054321289, "rewards/rejected": -22.685928344726562, "step": 5209 }, { "epoch": 8.97, "learning_rate": 2.018699532511687e-09, "logits/chosen": -1.7490665912628174, "logits/rejected": -1.5498297214508057, "logps/chosen": -150.1741943359375, "logps/rejected": -271.66912841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.542048931121826, "rewards/margins": 13.78852367401123, "rewards/rejected": -20.3305721282959, "step": 5210 }, { "epoch": 8.97, "learning_rate": 1.9124521886952826e-09, "logits/chosen": -1.8165967464447021, "logits/rejected": -1.7522578239440918, "logps/chosen": -157.9557342529297, "logps/rejected": -328.6954650878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.569261074066162, "rewards/margins": 16.224281311035156, "rewards/rejected": -23.793542861938477, "step": 5211 }, { "epoch": 8.97, "learning_rate": 1.806204844878878e-09, "logits/chosen": -1.9018690586090088, "logits/rejected": -2.01839017868042, "logps/chosen": -186.45925903320312, "logps/rejected": -320.354248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.395389556884766, "rewards/margins": 13.772972106933594, "rewards/rejected": -22.16836166381836, "step": 5212 }, { "epoch": 8.97, "learning_rate": 1.6999575010624734e-09, "logits/chosen": -1.5267274379730225, "logits/rejected": -2.044792652130127, "logps/chosen": -153.4257049560547, "logps/rejected": -321.4821472167969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.642879486083984, "rewards/margins": 17.423458099365234, "rewards/rejected": -23.06633758544922, "step": 5213 }, { "epoch": 8.97, "learning_rate": 1.5937101572460688e-09, "logits/chosen": -1.7651054859161377, "logits/rejected": -2.1022086143493652, "logps/chosen": -143.62002563476562, "logps/rejected": -338.73931884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.191456317901611, "rewards/margins": 16.094966888427734, "rewards/rejected": -23.286422729492188, "step": 5214 }, { "epoch": 8.98, "learning_rate": 1.4874628134296642e-09, "logits/chosen": -1.8389127254486084, "logits/rejected": -2.132126569747925, "logps/chosen": -141.16836547851562, "logps/rejected": -304.07159423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.832047939300537, "rewards/margins": 14.825691223144531, "rewards/rejected": -21.657737731933594, "step": 5215 }, { "epoch": 8.98, "learning_rate": 1.3812154696132596e-09, "logits/chosen": -1.9365341663360596, "logits/rejected": -1.8489497900009155, "logps/chosen": -146.87347412109375, "logps/rejected": -291.41680908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.36482572555542, "rewards/margins": 14.343384742736816, "rewards/rejected": -20.708209991455078, "step": 5216 }, { "epoch": 8.98, "learning_rate": 1.2749681257968552e-09, "logits/chosen": -1.7365213632583618, "logits/rejected": -2.040472984313965, "logps/chosen": -156.93104553222656, "logps/rejected": -304.07208251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.074140548706055, "rewards/margins": 14.701383590698242, "rewards/rejected": -20.775524139404297, "step": 5217 }, { "epoch": 8.98, "learning_rate": 1.1687207819804505e-09, "logits/chosen": -1.9571582078933716, "logits/rejected": -1.8847038745880127, "logps/chosen": -204.63247680664062, "logps/rejected": -382.31463623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.878350257873535, "rewards/margins": 18.448139190673828, "rewards/rejected": -28.32649040222168, "step": 5218 }, { "epoch": 8.98, "learning_rate": 1.0624734381640457e-09, "logits/chosen": -1.73148512840271, "logits/rejected": -1.8343514204025269, "logps/chosen": -122.06959533691406, "logps/rejected": -287.1628723144531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.938226222991943, "rewards/margins": 15.821786880493164, "rewards/rejected": -20.760013580322266, "step": 5219 }, { "epoch": 8.98, "learning_rate": 9.562260943476413e-10, "logits/chosen": -1.6363369226455688, "logits/rejected": -1.8061363697052002, "logps/chosen": -194.94638061523438, "logps/rejected": -374.33062744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.275813102722168, "rewards/margins": 18.183692932128906, "rewards/rejected": -27.459505081176758, "step": 5220 }, { "epoch": 8.99, "learning_rate": 8.499787505312367e-10, "logits/chosen": -1.8408634662628174, "logits/rejected": -1.8842799663543701, "logps/chosen": -142.5151824951172, "logps/rejected": -299.4882507324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.9753570556640625, "rewards/margins": 14.225765228271484, "rewards/rejected": -21.201122283935547, "step": 5221 }, { "epoch": 8.99, "learning_rate": 7.437314067148321e-10, "logits/chosen": -1.9279165267944336, "logits/rejected": -1.9763925075531006, "logps/chosen": -136.7961883544922, "logps/rejected": -315.38482666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.426172256469727, "rewards/margins": 17.016494750976562, "rewards/rejected": -23.44266700744629, "step": 5222 }, { "epoch": 8.99, "learning_rate": 6.374840628984276e-10, "logits/chosen": -2.0285592079162598, "logits/rejected": -1.848396897315979, "logps/chosen": -173.39089965820312, "logps/rejected": -308.8958740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.250622749328613, "rewards/margins": 13.436668395996094, "rewards/rejected": -22.687292098999023, "step": 5223 }, { "epoch": 8.99, "learning_rate": 5.312367190820229e-10, "logits/chosen": -1.657858967781067, "logits/rejected": -1.7013447284698486, "logps/chosen": -173.38951110839844, "logps/rejected": -314.8583068847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.133658409118652, "rewards/margins": 15.430584907531738, "rewards/rejected": -23.56424331665039, "step": 5224 }, { "epoch": 8.99, "learning_rate": 4.2498937526561835e-10, "logits/chosen": -1.260367751121521, "logits/rejected": -1.9482423067092896, "logps/chosen": -137.26339721679688, "logps/rejected": -355.55914306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.257659435272217, "rewards/margins": 18.463666915893555, "rewards/rejected": -23.72132682800293, "step": 5225 }, { "epoch": 8.99, "learning_rate": 3.187420314492138e-10, "logits/chosen": -1.9184257984161377, "logits/rejected": -1.9224035739898682, "logps/chosen": -94.40485382080078, "logps/rejected": -266.7804870605469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.502685308456421, "rewards/margins": 16.890796661376953, "rewards/rejected": -20.393482208251953, "step": 5226 }, { "epoch": 9.0, "learning_rate": 2.1249468763280917e-10, "logits/chosen": -1.8929290771484375, "logits/rejected": -1.9988594055175781, "logps/chosen": -182.12203979492188, "logps/rejected": -327.9019470214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.06466007232666, "rewards/margins": 14.598665237426758, "rewards/rejected": -24.663326263427734, "step": 5227 }, { "epoch": 9.0, "learning_rate": 1.0624734381640459e-10, "logits/chosen": -1.585849642753601, "logits/rejected": -1.6475303173065186, "logps/chosen": -202.25360107421875, "logps/rejected": -371.87432861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.341676712036133, "rewards/margins": 16.4492130279541, "rewards/rejected": -25.7908878326416, "step": 5228 }, { "epoch": 9.0, "learning_rate": 0.0, "logits/chosen": -1.9238612651824951, "logits/rejected": -1.7191734313964844, "logps/chosen": -150.56829833984375, "logps/rejected": -333.6749267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.2862324714660645, "rewards/margins": 18.034042358398438, "rewards/rejected": -24.320276260375977, "step": 5229 }, { "epoch": 9.0, "step": 5229, "total_flos": 0.0, "train_loss": 0.11654319613648585, "train_runtime": 25394.1508, "train_samples_per_second": 6.589, "train_steps_per_second": 0.206 } ], "logging_steps": 1.0, "max_steps": 5229, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 10000, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }