{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.930638359749891, "eval_steps": 1000, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.3255813953488372e-09, "logits/chosen": -2.471824884414673, "logits/rejected": -2.079159736633301, "logps/chosen": -174.749267578125, "logps/rejected": -308.16778564453125, "loss": 0.5964, "rewards/accuracies": 0.625, "rewards/chosen": -0.27328822016716003, "rewards/margins": 0.4016978144645691, "rewards/rejected": -0.6749860048294067, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.6511627906976744e-09, "logits/chosen": -2.6063313484191895, "logits/rejected": -2.2856028079986572, "logps/chosen": -318.5167236328125, "logps/rejected": -396.08905029296875, "loss": 0.6508, "rewards/accuracies": 0.5, "rewards/chosen": -0.13937486708164215, "rewards/margins": 0.16503377258777618, "rewards/rejected": -0.30440863966941833, "step": 2 }, { "epoch": 0.0, "learning_rate": 6.976744186046511e-09, "logits/chosen": -2.4662411212921143, "logits/rejected": -2.6295621395111084, "logps/chosen": -146.83151245117188, "logps/rejected": -161.9111328125, "loss": 0.7889, "rewards/accuracies": 0.75, "rewards/chosen": -0.643913209438324, "rewards/margins": -0.0388026162981987, "rewards/rejected": -0.6051105856895447, "step": 3 }, { "epoch": 0.0, "learning_rate": 9.302325581395349e-09, "logits/chosen": -2.2128384113311768, "logits/rejected": -2.203862428665161, "logps/chosen": -225.96243286132812, "logps/rejected": -244.6072540283203, "loss": 0.9154, "rewards/accuracies": 0.375, "rewards/chosen": -0.37408754229545593, "rewards/margins": -0.31671643257141113, "rewards/rejected": -0.057371072471141815, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.1627906976744186e-08, "logits/chosen": -2.065877676010132, "logits/rejected": -2.1803784370422363, "logps/chosen": -213.409423828125, "logps/rejected": -203.0699920654297, "loss": 0.5983, "rewards/accuracies": 0.5, "rewards/chosen": -0.3307132124900818, "rewards/margins": 0.4796845018863678, "rewards/rejected": -0.810397744178772, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.3953488372093022e-08, "logits/chosen": -2.319749355316162, "logits/rejected": -2.6371703147888184, "logps/chosen": -439.01751708984375, "logps/rejected": -241.67933654785156, "loss": 1.5479, "rewards/accuracies": 0.5, "rewards/chosen": -1.5972545146942139, "rewards/margins": -1.078753113746643, "rewards/rejected": -0.5185015201568604, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.627906976744186e-08, "logits/chosen": -2.9757614135742188, "logits/rejected": -2.895737886428833, "logps/chosen": -194.6126708984375, "logps/rejected": -215.57421875, "loss": 0.6194, "rewards/accuracies": 0.75, "rewards/chosen": -0.13256311416625977, "rewards/margins": 0.2143106460571289, "rewards/rejected": -0.34687376022338867, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.8604651162790698e-08, "logits/chosen": -2.0621490478515625, "logits/rejected": -2.3770477771759033, "logps/chosen": -661.87451171875, "logps/rejected": -352.9231872558594, "loss": 0.5665, "rewards/accuracies": 0.75, "rewards/chosen": -0.18366073071956635, "rewards/margins": 0.3257562518119812, "rewards/rejected": -0.5094169974327087, "step": 8 }, { "epoch": 0.0, "learning_rate": 2.0930232558139533e-08, "logits/chosen": -2.7137913703918457, "logits/rejected": -2.715798854827881, "logps/chosen": -211.98388671875, "logps/rejected": -210.24530029296875, "loss": 0.6558, "rewards/accuracies": 0.625, "rewards/chosen": -0.8401208519935608, "rewards/margins": 0.10100153833627701, "rewards/rejected": -0.9411224722862244, "step": 9 }, { "epoch": 0.0, "learning_rate": 2.3255813953488372e-08, "logits/chosen": -2.268031120300293, "logits/rejected": -2.333078384399414, "logps/chosen": -304.6269226074219, "logps/rejected": -218.21595764160156, "loss": 1.0356, "rewards/accuracies": 0.375, "rewards/chosen": -0.9625828266143799, "rewards/margins": -0.43421831727027893, "rewards/rejected": -0.5283644795417786, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.5581395348837208e-08, "logits/chosen": -2.46273136138916, "logits/rejected": -2.5069494247436523, "logps/chosen": -280.9668273925781, "logps/rejected": -242.62289428710938, "loss": 0.8667, "rewards/accuracies": 0.375, "rewards/chosen": -0.5893173813819885, "rewards/margins": -0.16260433197021484, "rewards/rejected": -0.42671307921409607, "step": 11 }, { "epoch": 0.0, "learning_rate": 2.7906976744186043e-08, "logits/chosen": -2.474379301071167, "logits/rejected": -2.4130866527557373, "logps/chosen": -340.4402160644531, "logps/rejected": -490.8125, "loss": 0.7761, "rewards/accuracies": 0.5, "rewards/chosen": -0.4141865670681, "rewards/margins": -0.07961148768663406, "rewards/rejected": -0.3345750868320465, "step": 12 }, { "epoch": 0.0, "learning_rate": 3.023255813953488e-08, "logits/chosen": -2.4873900413513184, "logits/rejected": -2.613281726837158, "logps/chosen": -284.20635986328125, "logps/rejected": -145.10678100585938, "loss": 0.9958, "rewards/accuracies": 0.375, "rewards/chosen": -0.3096553683280945, "rewards/margins": -0.4070754647254944, "rewards/rejected": 0.09742006659507751, "step": 13 }, { "epoch": 0.0, "learning_rate": 3.255813953488372e-08, "logits/chosen": -2.4154539108276367, "logits/rejected": -2.6339876651763916, "logps/chosen": -536.8396606445312, "logps/rejected": -440.3067626953125, "loss": 0.8262, "rewards/accuracies": 0.125, "rewards/chosen": -0.42787837982177734, "rewards/margins": -0.2260442078113556, "rewards/rejected": -0.20183415710926056, "step": 14 }, { "epoch": 0.0, "learning_rate": 3.4883720930232553e-08, "logits/chosen": -2.0477490425109863, "logits/rejected": -2.0686001777648926, "logps/chosen": -247.60975646972656, "logps/rejected": -182.30712890625, "loss": 0.824, "rewards/accuracies": 0.375, "rewards/chosen": -0.4310076832771301, "rewards/margins": -0.18956337869167328, "rewards/rejected": -0.24144430458545685, "step": 15 }, { "epoch": 0.0, "learning_rate": 3.7209302325581396e-08, "logits/chosen": -2.0259194374084473, "logits/rejected": -2.2158186435699463, "logps/chosen": -146.38795471191406, "logps/rejected": -211.36460876464844, "loss": 1.0976, "rewards/accuracies": 0.625, "rewards/chosen": -1.0345203876495361, "rewards/margins": 0.4986010193824768, "rewards/rejected": -1.5331213474273682, "step": 16 }, { "epoch": 0.0, "learning_rate": 3.953488372093023e-08, "logits/chosen": -3.0699312686920166, "logits/rejected": -2.992260456085205, "logps/chosen": -230.7408447265625, "logps/rejected": -158.0206756591797, "loss": 0.8654, "rewards/accuracies": 0.5, "rewards/chosen": -0.41682323813438416, "rewards/margins": -0.2596265971660614, "rewards/rejected": -0.15719667077064514, "step": 17 }, { "epoch": 0.0, "learning_rate": 4.1860465116279067e-08, "logits/chosen": -2.431905508041382, "logits/rejected": -2.119258403778076, "logps/chosen": -179.03073120117188, "logps/rejected": -186.11746215820312, "loss": 0.5048, "rewards/accuracies": 0.75, "rewards/chosen": -0.624506950378418, "rewards/margins": 0.5342190861701965, "rewards/rejected": -1.1587260961532593, "step": 18 }, { "epoch": 0.0, "learning_rate": 4.418604651162791e-08, "logits/chosen": -2.3878049850463867, "logits/rejected": -2.3388524055480957, "logps/chosen": -184.7909698486328, "logps/rejected": -249.22142028808594, "loss": 0.7476, "rewards/accuracies": 0.25, "rewards/chosen": -0.28819751739501953, "rewards/margins": -0.08883972465991974, "rewards/rejected": -0.1993577927350998, "step": 19 }, { "epoch": 0.0, "learning_rate": 4.6511627906976744e-08, "logits/chosen": -2.1187872886657715, "logits/rejected": -2.462371349334717, "logps/chosen": -407.9158935546875, "logps/rejected": -218.08628845214844, "loss": 1.049, "rewards/accuracies": 0.25, "rewards/chosen": -0.8088630437850952, "rewards/margins": -0.5424136519432068, "rewards/rejected": -0.2664493918418884, "step": 20 }, { "epoch": 0.0, "learning_rate": 4.883720930232558e-08, "logits/chosen": -2.1358978748321533, "logits/rejected": -1.8003429174423218, "logps/chosen": -178.56448364257812, "logps/rejected": -273.8068542480469, "loss": 0.6224, "rewards/accuracies": 0.625, "rewards/chosen": -0.10442107915878296, "rewards/margins": 0.20315207540988922, "rewards/rejected": -0.307573139667511, "step": 21 }, { "epoch": 0.0, "learning_rate": 5.1162790697674416e-08, "logits/chosen": -2.0717709064483643, "logits/rejected": -2.526026487350464, "logps/chosen": -480.04022216796875, "logps/rejected": -304.8790283203125, "loss": 0.6126, "rewards/accuracies": 0.75, "rewards/chosen": -0.426477313041687, "rewards/margins": 0.19031721353530884, "rewards/rejected": -0.6167945265769958, "step": 22 }, { "epoch": 0.0, "learning_rate": 5.348837209302326e-08, "logits/chosen": -2.432738780975342, "logits/rejected": -2.3010363578796387, "logps/chosen": -228.48782348632812, "logps/rejected": -256.86456298828125, "loss": 0.5999, "rewards/accuracies": 0.75, "rewards/chosen": -0.19084897637367249, "rewards/margins": 0.27129706740379333, "rewards/rejected": -0.4621460437774658, "step": 23 }, { "epoch": 0.0, "learning_rate": 5.5813953488372087e-08, "logits/chosen": -2.284633159637451, "logits/rejected": -2.101175308227539, "logps/chosen": -177.31121826171875, "logps/rejected": -201.95387268066406, "loss": 1.2117, "rewards/accuracies": 0.5, "rewards/chosen": -1.6518417596817017, "rewards/margins": -0.3435242772102356, "rewards/rejected": -1.3083174228668213, "step": 24 }, { "epoch": 0.0, "learning_rate": 5.813953488372093e-08, "logits/chosen": -2.59863543510437, "logits/rejected": -2.7144598960876465, "logps/chosen": -255.5670928955078, "logps/rejected": -311.2051696777344, "loss": 0.6775, "rewards/accuracies": 0.875, "rewards/chosen": -0.33376044034957886, "rewards/margins": 0.19081330299377441, "rewards/rejected": -0.5245736837387085, "step": 25 }, { "epoch": 0.0, "learning_rate": 6.046511627906976e-08, "logits/chosen": -2.3561620712280273, "logits/rejected": -2.2605764865875244, "logps/chosen": -172.1944580078125, "logps/rejected": -180.14633178710938, "loss": 0.5331, "rewards/accuracies": 0.75, "rewards/chosen": -0.059106722474098206, "rewards/margins": 0.6985692977905273, "rewards/rejected": -0.7576760053634644, "step": 26 }, { "epoch": 0.0, "learning_rate": 6.27906976744186e-08, "logits/chosen": -1.923173189163208, "logits/rejected": -1.8035862445831299, "logps/chosen": -324.13397216796875, "logps/rejected": -296.1949157714844, "loss": 0.9933, "rewards/accuracies": 0.375, "rewards/chosen": -0.4739299714565277, "rewards/margins": -0.30800122022628784, "rewards/rejected": -0.16592879593372345, "step": 27 }, { "epoch": 0.0, "learning_rate": 6.511627906976744e-08, "logits/chosen": -1.7430894374847412, "logits/rejected": -1.587222695350647, "logps/chosen": -125.14323425292969, "logps/rejected": -168.17587280273438, "loss": 0.8748, "rewards/accuracies": 0.5, "rewards/chosen": -0.505617618560791, "rewards/margins": 0.04966023564338684, "rewards/rejected": -0.5552778840065002, "step": 28 }, { "epoch": 0.0, "learning_rate": 6.744186046511628e-08, "logits/chosen": -2.3875133991241455, "logits/rejected": -2.4412190914154053, "logps/chosen": -180.6735076904297, "logps/rejected": -149.4240264892578, "loss": 0.6651, "rewards/accuracies": 0.5, "rewards/chosen": -0.7512054443359375, "rewards/margins": 0.7399396896362305, "rewards/rejected": -1.4911452531814575, "step": 29 }, { "epoch": 0.0, "learning_rate": 6.976744186046511e-08, "logits/chosen": -2.1003315448760986, "logits/rejected": -2.2294981479644775, "logps/chosen": -319.9154968261719, "logps/rejected": -232.8546600341797, "loss": 0.753, "rewards/accuracies": 0.375, "rewards/chosen": -0.13115616142749786, "rewards/margins": -0.06040288507938385, "rewards/rejected": -0.07075329124927521, "step": 30 }, { "epoch": 0.0, "learning_rate": 7.209302325581396e-08, "logits/chosen": -2.8773646354675293, "logits/rejected": -2.7694449424743652, "logps/chosen": -186.46194458007812, "logps/rejected": -255.46096801757812, "loss": 0.8024, "rewards/accuracies": 0.5, "rewards/chosen": -0.3301490843296051, "rewards/margins": -0.10115490108728409, "rewards/rejected": -0.22899417579174042, "step": 31 }, { "epoch": 0.0, "learning_rate": 7.441860465116279e-08, "logits/chosen": -2.1780011653900146, "logits/rejected": -2.2342939376831055, "logps/chosen": -343.21746826171875, "logps/rejected": -264.72637939453125, "loss": 0.6627, "rewards/accuracies": 0.375, "rewards/chosen": -0.5595120191574097, "rewards/margins": 0.10767830908298492, "rewards/rejected": -0.6671902537345886, "step": 32 }, { "epoch": 0.0, "learning_rate": 7.674418604651163e-08, "logits/chosen": -2.9698286056518555, "logits/rejected": -3.0241994857788086, "logps/chosen": -183.80117797851562, "logps/rejected": -177.83920288085938, "loss": 0.7493, "rewards/accuracies": 0.5, "rewards/chosen": -0.14829738438129425, "rewards/margins": 0.0063615962862968445, "rewards/rejected": -0.1546589881181717, "step": 33 }, { "epoch": 0.0, "learning_rate": 7.906976744186046e-08, "logits/chosen": -2.6177778244018555, "logits/rejected": -2.711055040359497, "logps/chosen": -221.38626098632812, "logps/rejected": -210.04791259765625, "loss": 0.8767, "rewards/accuracies": 0.375, "rewards/chosen": -0.34128063917160034, "rewards/margins": -0.2563408613204956, "rewards/rejected": -0.08493972569704056, "step": 34 }, { "epoch": 0.0, "learning_rate": 8.13953488372093e-08, "logits/chosen": -2.139709234237671, "logits/rejected": -2.1862668991088867, "logps/chosen": -404.7296447753906, "logps/rejected": -282.5640563964844, "loss": 0.6201, "rewards/accuracies": 0.625, "rewards/chosen": -0.18404540419578552, "rewards/margins": 0.23818421363830566, "rewards/rejected": -0.4222296476364136, "step": 35 }, { "epoch": 0.0, "learning_rate": 8.372093023255813e-08, "logits/chosen": -1.888787865638733, "logits/rejected": -1.9491325616836548, "logps/chosen": -590.8215942382812, "logps/rejected": -364.3123779296875, "loss": 0.9904, "rewards/accuracies": 0.375, "rewards/chosen": -0.7168409824371338, "rewards/margins": -0.27501195669174194, "rewards/rejected": -0.44182896614074707, "step": 36 }, { "epoch": 0.0, "learning_rate": 8.604651162790698e-08, "logits/chosen": -2.8992369174957275, "logits/rejected": -2.9710233211517334, "logps/chosen": -299.6357421875, "logps/rejected": -316.66363525390625, "loss": 0.774, "rewards/accuracies": 0.5, "rewards/chosen": -0.27237987518310547, "rewards/margins": -0.09465236961841583, "rewards/rejected": -0.17772750556468964, "step": 37 }, { "epoch": 0.0, "learning_rate": 8.837209302325582e-08, "logits/chosen": -2.4523823261260986, "logits/rejected": -2.5172343254089355, "logps/chosen": -289.5021057128906, "logps/rejected": -251.57266235351562, "loss": 0.6092, "rewards/accuracies": 0.75, "rewards/chosen": -0.12071393430233002, "rewards/margins": 0.19816556572914124, "rewards/rejected": -0.31887951493263245, "step": 38 }, { "epoch": 0.0, "learning_rate": 9.069767441860464e-08, "logits/chosen": -2.383002758026123, "logits/rejected": -2.4668784141540527, "logps/chosen": -417.6801452636719, "logps/rejected": -317.9768371582031, "loss": 0.7849, "rewards/accuracies": 0.625, "rewards/chosen": -0.5020383596420288, "rewards/margins": -0.1064617931842804, "rewards/rejected": -0.395576536655426, "step": 39 }, { "epoch": 0.0, "learning_rate": 9.302325581395349e-08, "logits/chosen": -2.4345924854278564, "logits/rejected": -2.3385424613952637, "logps/chosen": -310.6422424316406, "logps/rejected": -241.9683837890625, "loss": 0.8526, "rewards/accuracies": 0.875, "rewards/chosen": -1.8365793228149414, "rewards/margins": 0.3219362497329712, "rewards/rejected": -2.158515691757202, "step": 40 }, { "epoch": 0.0, "learning_rate": 9.534883720930232e-08, "logits/chosen": -2.4973034858703613, "logits/rejected": -2.285468578338623, "logps/chosen": -209.2900390625, "logps/rejected": -206.86367797851562, "loss": 0.8699, "rewards/accuracies": 0.25, "rewards/chosen": -0.4569118618965149, "rewards/margins": -0.2597395181655884, "rewards/rejected": -0.1971723437309265, "step": 41 }, { "epoch": 0.0, "learning_rate": 9.767441860465116e-08, "logits/chosen": -2.52140736579895, "logits/rejected": -2.4069154262542725, "logps/chosen": -255.2979736328125, "logps/rejected": -202.064453125, "loss": 0.5774, "rewards/accuracies": 0.75, "rewards/chosen": -0.062088657170534134, "rewards/margins": 0.2910660207271576, "rewards/rejected": -0.35315465927124023, "step": 42 }, { "epoch": 0.01, "learning_rate": 1e-07, "logits/chosen": -2.484564781188965, "logits/rejected": -2.4409337043762207, "logps/chosen": -179.82351684570312, "logps/rejected": -179.67767333984375, "loss": 1.1582, "rewards/accuracies": 0.5, "rewards/chosen": -0.5952649712562561, "rewards/margins": -0.596484363079071, "rewards/rejected": 0.0012193676084280014, "step": 43 }, { "epoch": 0.01, "learning_rate": 1.0232558139534883e-07, "logits/chosen": -2.62892484664917, "logits/rejected": -2.2565321922302246, "logps/chosen": -177.9101104736328, "logps/rejected": -251.5445098876953, "loss": 0.6459, "rewards/accuracies": 0.375, "rewards/chosen": -0.03073548898100853, "rewards/margins": 0.1703680008649826, "rewards/rejected": -0.20110349357128143, "step": 44 }, { "epoch": 0.01, "learning_rate": 1.0465116279069767e-07, "logits/chosen": -2.327939987182617, "logits/rejected": -2.0135440826416016, "logps/chosen": -246.9445343017578, "logps/rejected": -416.3923645019531, "loss": 0.6944, "rewards/accuracies": 0.5, "rewards/chosen": -0.4190826117992401, "rewards/margins": 0.06963533163070679, "rewards/rejected": -0.4887179136276245, "step": 45 }, { "epoch": 0.01, "learning_rate": 1.0697674418604652e-07, "logits/chosen": -2.1443333625793457, "logits/rejected": -2.0008716583251953, "logps/chosen": -250.57481384277344, "logps/rejected": -226.42410278320312, "loss": 0.7956, "rewards/accuracies": 0.25, "rewards/chosen": -0.34612005949020386, "rewards/margins": -0.13149793446063995, "rewards/rejected": -0.2146221101284027, "step": 46 }, { "epoch": 0.01, "learning_rate": 1.0930232558139534e-07, "logits/chosen": -2.745948076248169, "logits/rejected": -2.4107697010040283, "logps/chosen": -281.4881591796875, "logps/rejected": -269.353515625, "loss": 0.8439, "rewards/accuracies": 0.375, "rewards/chosen": -0.6514642238616943, "rewards/margins": -0.22894853353500366, "rewards/rejected": -0.4225156903266907, "step": 47 }, { "epoch": 0.01, "learning_rate": 1.1162790697674417e-07, "logits/chosen": -2.198420286178589, "logits/rejected": -2.2828078269958496, "logps/chosen": -167.5601806640625, "logps/rejected": -156.71365356445312, "loss": 0.8383, "rewards/accuracies": 0.625, "rewards/chosen": -2.158750057220459, "rewards/margins": 0.2781144976615906, "rewards/rejected": -2.4368646144866943, "step": 48 }, { "epoch": 0.01, "learning_rate": 1.1395348837209302e-07, "logits/chosen": -2.0897417068481445, "logits/rejected": -2.4239277839660645, "logps/chosen": -379.0882873535156, "logps/rejected": -334.62799072265625, "loss": 0.9831, "rewards/accuracies": 0.5, "rewards/chosen": -0.6604304313659668, "rewards/margins": -0.3971719443798065, "rewards/rejected": -0.2632584869861603, "step": 49 }, { "epoch": 0.01, "learning_rate": 1.1627906976744186e-07, "logits/chosen": -2.442202091217041, "logits/rejected": -2.3841967582702637, "logps/chosen": -304.512451171875, "logps/rejected": -279.9529724121094, "loss": 0.7445, "rewards/accuracies": 0.25, "rewards/chosen": -0.5183042287826538, "rewards/margins": -0.04576530307531357, "rewards/rejected": -0.47253894805908203, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.1860465116279068e-07, "logits/chosen": -2.252142906188965, "logits/rejected": -1.9597101211547852, "logps/chosen": -181.8081512451172, "logps/rejected": -330.64056396484375, "loss": 0.5954, "rewards/accuracies": 0.375, "rewards/chosen": -0.09730133414268494, "rewards/margins": 0.514672577381134, "rewards/rejected": -0.6119738817214966, "step": 51 }, { "epoch": 0.01, "learning_rate": 1.2093023255813953e-07, "logits/chosen": -2.501095771789551, "logits/rejected": -2.6473937034606934, "logps/chosen": -346.1275939941406, "logps/rejected": -219.6533203125, "loss": 0.9755, "rewards/accuracies": 0.25, "rewards/chosen": -0.5505790710449219, "rewards/margins": -0.3939691185951233, "rewards/rejected": -0.1566099226474762, "step": 52 }, { "epoch": 0.01, "learning_rate": 1.2325581395348838e-07, "logits/chosen": -2.6609301567077637, "logits/rejected": -2.6081488132476807, "logps/chosen": -306.408203125, "logps/rejected": -260.86627197265625, "loss": 0.6929, "rewards/accuracies": 0.375, "rewards/chosen": -0.18002676963806152, "rewards/margins": 0.034969575703144073, "rewards/rejected": -0.214996337890625, "step": 53 }, { "epoch": 0.01, "learning_rate": 1.255813953488372e-07, "logits/chosen": -2.687702178955078, "logits/rejected": -2.5199923515319824, "logps/chosen": -261.8502197265625, "logps/rejected": -238.46640014648438, "loss": 0.5445, "rewards/accuracies": 0.75, "rewards/chosen": -0.5038968324661255, "rewards/margins": 0.7838702201843262, "rewards/rejected": -1.2877670526504517, "step": 54 }, { "epoch": 0.01, "learning_rate": 1.2790697674418602e-07, "logits/chosen": -2.3474626541137695, "logits/rejected": -2.340203285217285, "logps/chosen": -374.0192565917969, "logps/rejected": -368.4148864746094, "loss": 0.786, "rewards/accuracies": 0.5, "rewards/chosen": -0.4484564960002899, "rewards/margins": -0.07293173670768738, "rewards/rejected": -0.37552469968795776, "step": 55 }, { "epoch": 0.01, "learning_rate": 1.3023255813953487e-07, "logits/chosen": -2.3067140579223633, "logits/rejected": -2.245939016342163, "logps/chosen": -278.44842529296875, "logps/rejected": -341.2032470703125, "loss": 0.7258, "rewards/accuracies": 0.5, "rewards/chosen": -0.42136216163635254, "rewards/margins": -0.04227380454540253, "rewards/rejected": -0.3790883421897888, "step": 56 }, { "epoch": 0.01, "learning_rate": 1.3255813953488372e-07, "logits/chosen": -2.502161741256714, "logits/rejected": -2.46122145652771, "logps/chosen": -327.3040771484375, "logps/rejected": -251.4073028564453, "loss": 0.6466, "rewards/accuracies": 0.875, "rewards/chosen": -0.00623655691742897, "rewards/margins": 0.15304572880268097, "rewards/rejected": -0.15928226709365845, "step": 57 }, { "epoch": 0.01, "learning_rate": 1.3488372093023257e-07, "logits/chosen": -1.8423786163330078, "logits/rejected": -2.146819591522217, "logps/chosen": -539.5610961914062, "logps/rejected": -328.5838317871094, "loss": 0.7059, "rewards/accuracies": 0.5, "rewards/chosen": -0.28162315487861633, "rewards/margins": 0.10821359604597092, "rewards/rejected": -0.38983675837516785, "step": 58 }, { "epoch": 0.01, "learning_rate": 1.372093023255814e-07, "logits/chosen": -2.3620684146881104, "logits/rejected": -2.5322587490081787, "logps/chosen": -280.43353271484375, "logps/rejected": -188.90223693847656, "loss": 0.6032, "rewards/accuracies": 0.75, "rewards/chosen": -0.02366514876484871, "rewards/margins": 0.23062409460544586, "rewards/rejected": -0.2542892396450043, "step": 59 }, { "epoch": 0.01, "learning_rate": 1.3953488372093021e-07, "logits/chosen": -2.473050355911255, "logits/rejected": -2.5788612365722656, "logps/chosen": -179.7606658935547, "logps/rejected": -175.0801239013672, "loss": 0.6764, "rewards/accuracies": 0.75, "rewards/chosen": -0.15987436473369598, "rewards/margins": 0.27256450057029724, "rewards/rejected": -0.4324389100074768, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.4186046511627906e-07, "logits/chosen": -2.0576164722442627, "logits/rejected": -1.8822991847991943, "logps/chosen": -332.81524658203125, "logps/rejected": -298.131103515625, "loss": 0.853, "rewards/accuracies": 0.375, "rewards/chosen": -0.9428043365478516, "rewards/margins": -0.11977139860391617, "rewards/rejected": -0.8230329155921936, "step": 61 }, { "epoch": 0.01, "learning_rate": 1.441860465116279e-07, "logits/chosen": -2.597367286682129, "logits/rejected": -2.418121337890625, "logps/chosen": -481.1905822753906, "logps/rejected": -314.07086181640625, "loss": 0.8734, "rewards/accuracies": 0.5, "rewards/chosen": -0.5394330024719238, "rewards/margins": -0.0797206461429596, "rewards/rejected": -0.45971229672431946, "step": 62 }, { "epoch": 0.01, "learning_rate": 1.4651162790697673e-07, "logits/chosen": -2.741827964782715, "logits/rejected": -2.795870542526245, "logps/chosen": -317.8959045410156, "logps/rejected": -182.69577026367188, "loss": 0.7737, "rewards/accuracies": 0.5, "rewards/chosen": -0.18465913832187653, "rewards/margins": -0.006775900721549988, "rewards/rejected": -0.17788323760032654, "step": 63 }, { "epoch": 0.01, "learning_rate": 1.4883720930232558e-07, "logits/chosen": -1.9943172931671143, "logits/rejected": -2.168597459793091, "logps/chosen": -483.20599365234375, "logps/rejected": -412.0091552734375, "loss": 0.6568, "rewards/accuracies": 0.75, "rewards/chosen": -0.7220395803451538, "rewards/margins": 0.2720763385295868, "rewards/rejected": -0.9941158890724182, "step": 64 }, { "epoch": 0.01, "learning_rate": 1.511627906976744e-07, "logits/chosen": -2.6222081184387207, "logits/rejected": -2.667799949645996, "logps/chosen": -72.76377868652344, "logps/rejected": -141.27818298339844, "loss": 0.5185, "rewards/accuracies": 0.75, "rewards/chosen": -0.15879850089550018, "rewards/margins": 0.5578910708427429, "rewards/rejected": -0.7166895866394043, "step": 65 }, { "epoch": 0.01, "learning_rate": 1.5348837209302325e-07, "logits/chosen": -2.5911662578582764, "logits/rejected": -2.658141613006592, "logps/chosen": -178.01080322265625, "logps/rejected": -188.33279418945312, "loss": 0.5075, "rewards/accuracies": 0.75, "rewards/chosen": -0.16364695131778717, "rewards/margins": 0.6743106842041016, "rewards/rejected": -0.8379575610160828, "step": 66 }, { "epoch": 0.01, "learning_rate": 1.558139534883721e-07, "logits/chosen": -1.8621940612792969, "logits/rejected": -2.5851268768310547, "logps/chosen": -301.75958251953125, "logps/rejected": -233.52359008789062, "loss": 0.7691, "rewards/accuracies": 0.5, "rewards/chosen": -0.8451168537139893, "rewards/margins": 0.09759727120399475, "rewards/rejected": -0.9427141547203064, "step": 67 }, { "epoch": 0.01, "learning_rate": 1.5813953488372092e-07, "logits/chosen": -2.4558424949645996, "logits/rejected": -2.2277941703796387, "logps/chosen": -216.7931365966797, "logps/rejected": -234.21463012695312, "loss": 0.8939, "rewards/accuracies": 0.5, "rewards/chosen": -0.8377339243888855, "rewards/margins": -0.1661604642868042, "rewards/rejected": -0.6715735197067261, "step": 68 }, { "epoch": 0.01, "learning_rate": 1.6046511627906975e-07, "logits/chosen": -2.363039255142212, "logits/rejected": -2.160311460494995, "logps/chosen": -356.49432373046875, "logps/rejected": -399.37176513671875, "loss": 0.6304, "rewards/accuracies": 0.5, "rewards/chosen": -0.2711089849472046, "rewards/margins": 0.2946273684501648, "rewards/rejected": -0.5657364130020142, "step": 69 }, { "epoch": 0.01, "learning_rate": 1.627906976744186e-07, "logits/chosen": -3.031538248062134, "logits/rejected": -3.012721538543701, "logps/chosen": -205.16964721679688, "logps/rejected": -218.19842529296875, "loss": 0.6057, "rewards/accuracies": 0.625, "rewards/chosen": -0.09832841157913208, "rewards/margins": 0.3259468674659729, "rewards/rejected": -0.424275279045105, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.6511627906976742e-07, "logits/chosen": -2.079557180404663, "logits/rejected": -2.255338191986084, "logps/chosen": -310.7487487792969, "logps/rejected": -236.25978088378906, "loss": 0.7614, "rewards/accuracies": 0.625, "rewards/chosen": -0.5114730596542358, "rewards/margins": -0.07345346361398697, "rewards/rejected": -0.43801963329315186, "step": 71 }, { "epoch": 0.01, "learning_rate": 1.6744186046511627e-07, "logits/chosen": -2.3511509895324707, "logits/rejected": -2.0647401809692383, "logps/chosen": -335.1462097167969, "logps/rejected": -420.6827087402344, "loss": 0.6492, "rewards/accuracies": 0.625, "rewards/chosen": -0.3721458613872528, "rewards/margins": 0.1769292652606964, "rewards/rejected": -0.5490751266479492, "step": 72 }, { "epoch": 0.01, "learning_rate": 1.6976744186046512e-07, "logits/chosen": -2.3079817295074463, "logits/rejected": -2.32616925239563, "logps/chosen": -188.2433319091797, "logps/rejected": -237.46751403808594, "loss": 0.6356, "rewards/accuracies": 0.75, "rewards/chosen": -0.11632364243268967, "rewards/margins": 0.1776544749736786, "rewards/rejected": -0.29397812485694885, "step": 73 }, { "epoch": 0.01, "learning_rate": 1.7209302325581396e-07, "logits/chosen": -2.3757071495056152, "logits/rejected": -2.3956480026245117, "logps/chosen": -221.7606658935547, "logps/rejected": -181.85850524902344, "loss": 0.776, "rewards/accuracies": 0.625, "rewards/chosen": -0.1650571972131729, "rewards/margins": -0.011085748672485352, "rewards/rejected": -0.15397146344184875, "step": 74 }, { "epoch": 0.01, "learning_rate": 1.7441860465116279e-07, "logits/chosen": -1.8253350257873535, "logits/rejected": -1.9870967864990234, "logps/chosen": -185.83123779296875, "logps/rejected": -210.3099365234375, "loss": 0.8035, "rewards/accuracies": 0.5, "rewards/chosen": -0.48037785291671753, "rewards/margins": -0.07544104009866714, "rewards/rejected": -0.404936820268631, "step": 75 }, { "epoch": 0.01, "learning_rate": 1.7674418604651164e-07, "logits/chosen": -2.8733153343200684, "logits/rejected": -2.8293955326080322, "logps/chosen": -224.70474243164062, "logps/rejected": -145.5535430908203, "loss": 0.9047, "rewards/accuracies": 0.375, "rewards/chosen": -0.5753843784332275, "rewards/margins": -0.1298551708459854, "rewards/rejected": -0.44552919268608093, "step": 76 }, { "epoch": 0.01, "learning_rate": 1.7906976744186043e-07, "logits/chosen": -2.5145156383514404, "logits/rejected": -2.584648609161377, "logps/chosen": -214.8340301513672, "logps/rejected": -285.63616943359375, "loss": 0.4961, "rewards/accuracies": 0.75, "rewards/chosen": -0.1631683111190796, "rewards/margins": 0.5445789098739624, "rewards/rejected": -0.707747220993042, "step": 77 }, { "epoch": 0.01, "learning_rate": 1.8139534883720928e-07, "logits/chosen": -2.116382598876953, "logits/rejected": -2.2611207962036133, "logps/chosen": -139.19638061523438, "logps/rejected": -164.49725341796875, "loss": 2.3158, "rewards/accuracies": 0.625, "rewards/chosen": -2.4383955001831055, "rewards/margins": -1.4515013694763184, "rewards/rejected": -0.986893892288208, "step": 78 }, { "epoch": 0.01, "learning_rate": 1.8372093023255813e-07, "logits/chosen": -2.3509247303009033, "logits/rejected": -2.111485242843628, "logps/chosen": -170.42820739746094, "logps/rejected": -262.1083679199219, "loss": 0.6916, "rewards/accuracies": 0.5, "rewards/chosen": -0.7016013264656067, "rewards/margins": 0.05304943770170212, "rewards/rejected": -0.7546507716178894, "step": 79 }, { "epoch": 0.01, "learning_rate": 1.8604651162790698e-07, "logits/chosen": -2.5512146949768066, "logits/rejected": -2.405355930328369, "logps/chosen": -302.0247802734375, "logps/rejected": -332.14385986328125, "loss": 0.6452, "rewards/accuracies": 0.625, "rewards/chosen": -0.21803753077983856, "rewards/margins": 0.2606792151927948, "rewards/rejected": -0.47871676087379456, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.883720930232558e-07, "logits/chosen": -2.649334192276001, "logits/rejected": -2.5358147621154785, "logps/chosen": -201.7420196533203, "logps/rejected": -186.45254516601562, "loss": 0.6067, "rewards/accuracies": 0.5, "rewards/chosen": -0.23455172777175903, "rewards/margins": 0.5783436298370361, "rewards/rejected": -0.8128952383995056, "step": 81 }, { "epoch": 0.01, "learning_rate": 1.9069767441860465e-07, "logits/chosen": -2.448350429534912, "logits/rejected": -2.317558765411377, "logps/chosen": -256.4176025390625, "logps/rejected": -312.09637451171875, "loss": 0.5629, "rewards/accuracies": 0.75, "rewards/chosen": -0.2117278128862381, "rewards/margins": 0.3453845977783203, "rewards/rejected": -0.5571123957633972, "step": 82 }, { "epoch": 0.01, "learning_rate": 1.930232558139535e-07, "logits/chosen": -2.4099225997924805, "logits/rejected": -2.5384576320648193, "logps/chosen": -395.0341796875, "logps/rejected": -222.17771911621094, "loss": 0.9035, "rewards/accuracies": 0.375, "rewards/chosen": -0.3871869444847107, "rewards/margins": -0.2399856150150299, "rewards/rejected": -0.14720135927200317, "step": 83 }, { "epoch": 0.01, "learning_rate": 1.9534883720930232e-07, "logits/chosen": -2.3440794944763184, "logits/rejected": -2.2873172760009766, "logps/chosen": -220.02389526367188, "logps/rejected": -284.811767578125, "loss": 0.8451, "rewards/accuracies": 0.375, "rewards/chosen": -0.28907763957977295, "rewards/margins": -0.22321955859661102, "rewards/rejected": -0.0658581331372261, "step": 84 }, { "epoch": 0.01, "learning_rate": 1.9767441860465114e-07, "logits/chosen": -2.413980007171631, "logits/rejected": -2.5064969062805176, "logps/chosen": -337.5306091308594, "logps/rejected": -304.260498046875, "loss": 0.9087, "rewards/accuracies": 0.375, "rewards/chosen": -0.807115912437439, "rewards/margins": -0.2305152714252472, "rewards/rejected": -0.5766006112098694, "step": 85 }, { "epoch": 0.01, "learning_rate": 2e-07, "logits/chosen": -2.4838058948516846, "logits/rejected": -2.0719006061553955, "logps/chosen": -131.3878173828125, "logps/rejected": -252.70587158203125, "loss": 0.623, "rewards/accuracies": 0.5, "rewards/chosen": 0.02195039391517639, "rewards/margins": 0.24605891108512878, "rewards/rejected": -0.22410848736763, "step": 86 }, { "epoch": 0.01, "learning_rate": 2.0232558139534881e-07, "logits/chosen": -2.2932047843933105, "logits/rejected": -2.3045012950897217, "logps/chosen": -145.06350708007812, "logps/rejected": -107.056884765625, "loss": 0.8407, "rewards/accuracies": 0.375, "rewards/chosen": -0.2594671845436096, "rewards/margins": -0.22573591768741608, "rewards/rejected": -0.03373127430677414, "step": 87 }, { "epoch": 0.01, "learning_rate": 2.0465116279069766e-07, "logits/chosen": -2.6768062114715576, "logits/rejected": -2.4649171829223633, "logps/chosen": -354.6230773925781, "logps/rejected": -442.5867004394531, "loss": 0.596, "rewards/accuracies": 0.625, "rewards/chosen": -0.3420978784561157, "rewards/margins": 0.3304292857646942, "rewards/rejected": -0.6725271344184875, "step": 88 }, { "epoch": 0.01, "learning_rate": 2.069767441860465e-07, "logits/chosen": -2.3527350425720215, "logits/rejected": -2.3163013458251953, "logps/chosen": -232.95733642578125, "logps/rejected": -156.99827575683594, "loss": 0.8092, "rewards/accuracies": 0.625, "rewards/chosen": -0.36036771535873413, "rewards/margins": 0.1983512043952942, "rewards/rejected": -0.5587188601493835, "step": 89 }, { "epoch": 0.01, "learning_rate": 2.0930232558139533e-07, "logits/chosen": -2.6749043464660645, "logits/rejected": -2.6445324420928955, "logps/chosen": -335.8284606933594, "logps/rejected": -277.89697265625, "loss": 0.5855, "rewards/accuracies": 0.625, "rewards/chosen": -0.3249202370643616, "rewards/margins": 0.4027801752090454, "rewards/rejected": -0.7277003526687622, "step": 90 }, { "epoch": 0.01, "learning_rate": 2.1162790697674418e-07, "logits/chosen": -2.066859245300293, "logits/rejected": -2.3091135025024414, "logps/chosen": -287.7630615234375, "logps/rejected": -233.48422241210938, "loss": 0.5942, "rewards/accuracies": 0.5, "rewards/chosen": -0.33292651176452637, "rewards/margins": 0.33223992586135864, "rewards/rejected": -0.665166437625885, "step": 91 }, { "epoch": 0.01, "learning_rate": 2.1395348837209303e-07, "logits/chosen": -2.086124897003174, "logits/rejected": -1.982297420501709, "logps/chosen": -209.50343322753906, "logps/rejected": -193.66485595703125, "loss": 1.1486, "rewards/accuracies": 0.25, "rewards/chosen": -0.9113097190856934, "rewards/margins": -0.490678608417511, "rewards/rejected": -0.42063108086586, "step": 92 }, { "epoch": 0.01, "learning_rate": 2.1627906976744183e-07, "logits/chosen": -2.570356845855713, "logits/rejected": -2.636735677719116, "logps/chosen": -183.26925659179688, "logps/rejected": -170.32650756835938, "loss": 0.8528, "rewards/accuracies": 0.625, "rewards/chosen": -0.8393935561180115, "rewards/margins": -0.02400238811969757, "rewards/rejected": -0.8153911828994751, "step": 93 }, { "epoch": 0.01, "learning_rate": 2.1860465116279068e-07, "logits/chosen": -2.406005620956421, "logits/rejected": -2.591987133026123, "logps/chosen": -218.12379455566406, "logps/rejected": -201.21875, "loss": 0.5105, "rewards/accuracies": 0.875, "rewards/chosen": -0.163734570145607, "rewards/margins": 0.5999506711959839, "rewards/rejected": -0.7636852860450745, "step": 94 }, { "epoch": 0.01, "learning_rate": 2.2093023255813952e-07, "logits/chosen": -2.2681570053100586, "logits/rejected": -2.3083791732788086, "logps/chosen": -242.9326629638672, "logps/rejected": -310.1129455566406, "loss": 0.5452, "rewards/accuracies": 0.75, "rewards/chosen": -0.14550848305225372, "rewards/margins": 0.3707062602043152, "rewards/rejected": -0.5162147283554077, "step": 95 }, { "epoch": 0.01, "learning_rate": 2.2325581395348835e-07, "logits/chosen": -2.6006369590759277, "logits/rejected": -2.2266733646392822, "logps/chosen": -117.85919189453125, "logps/rejected": -216.32598876953125, "loss": 0.6745, "rewards/accuracies": 0.5, "rewards/chosen": -0.5499044060707092, "rewards/margins": 0.27583783864974976, "rewards/rejected": -0.825742244720459, "step": 96 }, { "epoch": 0.01, "learning_rate": 2.255813953488372e-07, "logits/chosen": -1.6210027933120728, "logits/rejected": -2.2731971740722656, "logps/chosen": -555.565673828125, "logps/rejected": -290.25115966796875, "loss": 1.204, "rewards/accuracies": 0.5, "rewards/chosen": -0.9664278030395508, "rewards/margins": -0.6703994274139404, "rewards/rejected": -0.29602834582328796, "step": 97 }, { "epoch": 0.01, "learning_rate": 2.2790697674418604e-07, "logits/chosen": -2.392503499984741, "logits/rejected": -2.2693846225738525, "logps/chosen": -141.28466796875, "logps/rejected": -231.76806640625, "loss": 0.5255, "rewards/accuracies": 0.75, "rewards/chosen": -1.0303176641464233, "rewards/margins": 0.6791374087333679, "rewards/rejected": -1.709455132484436, "step": 98 }, { "epoch": 0.01, "learning_rate": 2.302325581395349e-07, "logits/chosen": -2.2897956371307373, "logits/rejected": -2.125539779663086, "logps/chosen": -371.5478515625, "logps/rejected": -412.0826416015625, "loss": 0.7491, "rewards/accuracies": 0.5, "rewards/chosen": -0.6182222962379456, "rewards/margins": -0.024349048733711243, "rewards/rejected": -0.5938732028007507, "step": 99 }, { "epoch": 0.01, "learning_rate": 2.3255813953488372e-07, "logits/chosen": -1.8030691146850586, "logits/rejected": -2.1702818870544434, "logps/chosen": -447.7699890136719, "logps/rejected": -252.9459686279297, "loss": 0.886, "rewards/accuracies": 0.625, "rewards/chosen": -0.49596014618873596, "rewards/margins": -0.2117862105369568, "rewards/rejected": -0.2841739356517792, "step": 100 }, { "epoch": 0.01, "learning_rate": 2.3488372093023254e-07, "logits/chosen": -2.4537315368652344, "logits/rejected": -2.274648666381836, "logps/chosen": -348.35028076171875, "logps/rejected": -277.3187255859375, "loss": 0.6709, "rewards/accuracies": 0.5, "rewards/chosen": -0.22102771699428558, "rewards/margins": 0.10838757455348969, "rewards/rejected": -0.3294152617454529, "step": 101 }, { "epoch": 0.01, "learning_rate": 2.3720930232558136e-07, "logits/chosen": -2.1595985889434814, "logits/rejected": -2.1457972526550293, "logps/chosen": -143.21351623535156, "logps/rejected": -197.78945922851562, "loss": 0.8263, "rewards/accuracies": 0.625, "rewards/chosen": -0.42699629068374634, "rewards/margins": -0.18382105231285095, "rewards/rejected": -0.243175208568573, "step": 102 }, { "epoch": 0.01, "learning_rate": 2.3953488372093024e-07, "logits/chosen": -2.7874536514282227, "logits/rejected": -2.850274085998535, "logps/chosen": -410.5032958984375, "logps/rejected": -342.883544921875, "loss": 0.5105, "rewards/accuracies": 0.875, "rewards/chosen": -0.22132186591625214, "rewards/margins": 0.49288198351860046, "rewards/rejected": -0.7142038345336914, "step": 103 }, { "epoch": 0.01, "learning_rate": 2.4186046511627906e-07, "logits/chosen": -1.8586337566375732, "logits/rejected": -1.871573805809021, "logps/chosen": -201.969482421875, "logps/rejected": -206.4261474609375, "loss": 0.7765, "rewards/accuracies": 0.5, "rewards/chosen": -0.36011138558387756, "rewards/margins": -0.09883344173431396, "rewards/rejected": -0.261277973651886, "step": 104 }, { "epoch": 0.01, "learning_rate": 2.441860465116279e-07, "logits/chosen": -1.9981329441070557, "logits/rejected": -2.097548007965088, "logps/chosen": -170.4879608154297, "logps/rejected": -178.36611938476562, "loss": 0.6712, "rewards/accuracies": 0.625, "rewards/chosen": -1.0482807159423828, "rewards/margins": 0.1192755326628685, "rewards/rejected": -1.1675562858581543, "step": 105 }, { "epoch": 0.01, "learning_rate": 2.4651162790697676e-07, "logits/chosen": -2.8918509483337402, "logits/rejected": -2.8618173599243164, "logps/chosen": -163.16221618652344, "logps/rejected": -220.1534423828125, "loss": 0.6941, "rewards/accuracies": 0.5, "rewards/chosen": -0.49197301268577576, "rewards/margins": 0.0901225209236145, "rewards/rejected": -0.5820955038070679, "step": 106 }, { "epoch": 0.01, "learning_rate": 2.488372093023256e-07, "logits/chosen": -2.243861198425293, "logits/rejected": -2.241596221923828, "logps/chosen": -272.9605712890625, "logps/rejected": -212.6696319580078, "loss": 0.8962, "rewards/accuracies": 0.75, "rewards/chosen": -0.8061225414276123, "rewards/margins": -0.11616721749305725, "rewards/rejected": -0.6899554133415222, "step": 107 }, { "epoch": 0.01, "learning_rate": 2.511627906976744e-07, "logits/chosen": -2.4468531608581543, "logits/rejected": -2.6342649459838867, "logps/chosen": -302.49273681640625, "logps/rejected": -250.5012969970703, "loss": 0.9525, "rewards/accuracies": 0.625, "rewards/chosen": -0.6266262531280518, "rewards/margins": -0.2894216477870941, "rewards/rejected": -0.33720460534095764, "step": 108 }, { "epoch": 0.01, "learning_rate": 2.534883720930232e-07, "logits/chosen": -1.8889508247375488, "logits/rejected": -1.847444772720337, "logps/chosen": -347.6550598144531, "logps/rejected": -330.3041076660156, "loss": 1.243, "rewards/accuracies": 0.375, "rewards/chosen": -1.2972767353057861, "rewards/margins": -0.44702115654945374, "rewards/rejected": -0.8502554893493652, "step": 109 }, { "epoch": 0.01, "learning_rate": 2.5581395348837204e-07, "logits/chosen": -2.876573324203491, "logits/rejected": -2.8644440174102783, "logps/chosen": -210.26419067382812, "logps/rejected": -189.56478881835938, "loss": 1.0044, "rewards/accuracies": 0.5, "rewards/chosen": -0.49664145708084106, "rewards/margins": -0.3557063937187195, "rewards/rejected": -0.1409350335597992, "step": 110 }, { "epoch": 0.01, "learning_rate": 2.581395348837209e-07, "logits/chosen": -2.6873810291290283, "logits/rejected": -2.6178128719329834, "logps/chosen": -156.33543395996094, "logps/rejected": -181.47691345214844, "loss": 0.6194, "rewards/accuracies": 0.625, "rewards/chosen": -0.253256231546402, "rewards/margins": 0.21962662041187286, "rewards/rejected": -0.47288286685943604, "step": 111 }, { "epoch": 0.01, "learning_rate": 2.6046511627906974e-07, "logits/chosen": -2.4136996269226074, "logits/rejected": -2.627744436264038, "logps/chosen": -77.70036315917969, "logps/rejected": -160.221923828125, "loss": 0.5063, "rewards/accuracies": 0.875, "rewards/chosen": -0.4397839605808258, "rewards/margins": 0.7447307109832764, "rewards/rejected": -1.1845147609710693, "step": 112 }, { "epoch": 0.01, "learning_rate": 2.627906976744186e-07, "logits/chosen": -2.1982645988464355, "logits/rejected": -2.1680996417999268, "logps/chosen": -284.7992858886719, "logps/rejected": -381.1324157714844, "loss": 0.5805, "rewards/accuracies": 0.75, "rewards/chosen": -0.15814796090126038, "rewards/margins": 0.31709104776382446, "rewards/rejected": -0.4752390384674072, "step": 113 }, { "epoch": 0.01, "learning_rate": 2.6511627906976744e-07, "logits/chosen": -1.969825029373169, "logits/rejected": -1.9821308851242065, "logps/chosen": -254.40682983398438, "logps/rejected": -240.384033203125, "loss": 0.8084, "rewards/accuracies": 0.5, "rewards/chosen": -0.2654780447483063, "rewards/margins": -0.10995368659496307, "rewards/rejected": -0.155524343252182, "step": 114 }, { "epoch": 0.01, "learning_rate": 2.6744186046511626e-07, "logits/chosen": -1.8670458793640137, "logits/rejected": -1.8168087005615234, "logps/chosen": -424.1560363769531, "logps/rejected": -379.2054138183594, "loss": 0.5846, "rewards/accuracies": 0.625, "rewards/chosen": -0.963326632976532, "rewards/margins": 0.3254411518573761, "rewards/rejected": -1.2887678146362305, "step": 115 }, { "epoch": 0.01, "learning_rate": 2.6976744186046514e-07, "logits/chosen": -2.0922317504882812, "logits/rejected": -1.9747600555419922, "logps/chosen": -196.18214416503906, "logps/rejected": -254.14593505859375, "loss": 0.7107, "rewards/accuracies": 0.625, "rewards/chosen": -0.38838067650794983, "rewards/margins": 0.14774498343467712, "rewards/rejected": -0.536125659942627, "step": 116 }, { "epoch": 0.01, "learning_rate": 2.720930232558139e-07, "logits/chosen": -2.570683002471924, "logits/rejected": -2.6108639240264893, "logps/chosen": -197.09706115722656, "logps/rejected": -157.64486694335938, "loss": 1.0065, "rewards/accuracies": 0.5, "rewards/chosen": -0.7618712186813354, "rewards/margins": -0.4027426540851593, "rewards/rejected": -0.3591286242008209, "step": 117 }, { "epoch": 0.01, "learning_rate": 2.744186046511628e-07, "logits/chosen": -2.60833477973938, "logits/rejected": -2.522894859313965, "logps/chosen": -243.58566284179688, "logps/rejected": -214.76171875, "loss": 0.4848, "rewards/accuracies": 1.0, "rewards/chosen": 0.10729708522558212, "rewards/margins": 0.991612434387207, "rewards/rejected": -0.8843153119087219, "step": 118 }, { "epoch": 0.01, "learning_rate": 2.767441860465116e-07, "logits/chosen": -2.2670044898986816, "logits/rejected": -2.2630767822265625, "logps/chosen": -168.00486755371094, "logps/rejected": -187.48045349121094, "loss": 0.7306, "rewards/accuracies": 0.625, "rewards/chosen": -0.1719190776348114, "rewards/margins": 0.003288939595222473, "rewards/rejected": -0.17520800232887268, "step": 119 }, { "epoch": 0.01, "learning_rate": 2.7906976744186043e-07, "logits/chosen": -2.029635190963745, "logits/rejected": -2.5271973609924316, "logps/chosen": -410.35430908203125, "logps/rejected": -224.42367553710938, "loss": 0.7204, "rewards/accuracies": 0.5, "rewards/chosen": -0.474770724773407, "rewards/margins": 0.07804751396179199, "rewards/rejected": -0.5528181791305542, "step": 120 }, { "epoch": 0.01, "learning_rate": 2.813953488372093e-07, "logits/chosen": -2.1471428871154785, "logits/rejected": -2.099607467651367, "logps/chosen": -376.0931396484375, "logps/rejected": -256.83087158203125, "loss": 0.4298, "rewards/accuracies": 0.875, "rewards/chosen": 0.009108111262321472, "rewards/margins": 0.683029055595398, "rewards/rejected": -0.6739209294319153, "step": 121 }, { "epoch": 0.01, "learning_rate": 2.837209302325581e-07, "logits/chosen": -1.8627537488937378, "logits/rejected": -2.174872636795044, "logps/chosen": -577.5985107421875, "logps/rejected": -456.60791015625, "loss": 0.4403, "rewards/accuracies": 1.0, "rewards/chosen": 0.033387936651706696, "rewards/margins": 0.6115955114364624, "rewards/rejected": -0.5782076120376587, "step": 122 }, { "epoch": 0.01, "learning_rate": 2.8604651162790695e-07, "logits/chosen": -2.4507200717926025, "logits/rejected": -2.474109411239624, "logps/chosen": -164.4906005859375, "logps/rejected": -224.8709716796875, "loss": 0.507, "rewards/accuracies": 0.75, "rewards/chosen": -0.604463517665863, "rewards/margins": 1.556099772453308, "rewards/rejected": -2.1605632305145264, "step": 123 }, { "epoch": 0.01, "learning_rate": 2.883720930232558e-07, "logits/chosen": -2.047478675842285, "logits/rejected": -2.321160078048706, "logps/chosen": -287.2242431640625, "logps/rejected": -174.9025115966797, "loss": 0.8151, "rewards/accuracies": 0.25, "rewards/chosen": -0.6188247799873352, "rewards/margins": -0.038447946310043335, "rewards/rejected": -0.5803768634796143, "step": 124 }, { "epoch": 0.01, "learning_rate": 2.9069767441860464e-07, "logits/chosen": -3.026174545288086, "logits/rejected": -3.0613205432891846, "logps/chosen": -56.339683532714844, "logps/rejected": -141.6298065185547, "loss": 0.6197, "rewards/accuracies": 0.75, "rewards/chosen": -0.13143277168273926, "rewards/margins": 0.16544213891029358, "rewards/rejected": -0.29687491059303284, "step": 125 }, { "epoch": 0.01, "learning_rate": 2.9302325581395347e-07, "logits/chosen": -2.4416348934173584, "logits/rejected": -2.265141725540161, "logps/chosen": -243.73532104492188, "logps/rejected": -367.46075439453125, "loss": 0.6641, "rewards/accuracies": 0.5, "rewards/chosen": -0.312120646238327, "rewards/margins": 0.17439718544483185, "rewards/rejected": -0.4865178167819977, "step": 126 }, { "epoch": 0.01, "learning_rate": 2.953488372093023e-07, "logits/chosen": -2.1385931968688965, "logits/rejected": -2.1570651531219482, "logps/chosen": -376.7693176269531, "logps/rejected": -324.1436767578125, "loss": 0.6087, "rewards/accuracies": 0.625, "rewards/chosen": -0.21629011631011963, "rewards/margins": 0.3393087387084961, "rewards/rejected": -0.5555988550186157, "step": 127 }, { "epoch": 0.01, "learning_rate": 2.9767441860465116e-07, "logits/chosen": -2.594001054763794, "logits/rejected": -2.5332846641540527, "logps/chosen": -265.2559814453125, "logps/rejected": -225.3300323486328, "loss": 0.4987, "rewards/accuracies": 0.75, "rewards/chosen": -0.11694058775901794, "rewards/margins": 0.7060776948928833, "rewards/rejected": -0.8230182528495789, "step": 128 }, { "epoch": 0.02, "learning_rate": 3e-07, "logits/chosen": -2.0931873321533203, "logits/rejected": -2.004478693008423, "logps/chosen": -155.2783966064453, "logps/rejected": -240.91732788085938, "loss": 0.7649, "rewards/accuracies": 0.625, "rewards/chosen": -0.4195050895214081, "rewards/margins": 0.28783467411994934, "rewards/rejected": -0.7073397636413574, "step": 129 }, { "epoch": 0.02, "learning_rate": 2.9996456832408174e-07, "logits/chosen": -2.7366340160369873, "logits/rejected": -2.691190242767334, "logps/chosen": -295.0311279296875, "logps/rejected": -242.97836303710938, "loss": 0.4413, "rewards/accuracies": 1.0, "rewards/chosen": 0.11736232787370682, "rewards/margins": 0.6299964785575867, "rewards/rejected": -0.5126341581344604, "step": 130 }, { "epoch": 0.02, "learning_rate": 2.9992913664816343e-07, "logits/chosen": -2.417391061782837, "logits/rejected": -2.5532987117767334, "logps/chosen": -463.8819885253906, "logps/rejected": -318.5346374511719, "loss": 0.7805, "rewards/accuracies": 0.5, "rewards/chosen": -0.17544816434383392, "rewards/margins": -0.02362711727619171, "rewards/rejected": -0.1518210470676422, "step": 131 }, { "epoch": 0.02, "learning_rate": 2.998937049722452e-07, "logits/chosen": -2.3182525634765625, "logits/rejected": -2.6131339073181152, "logps/chosen": -341.30474853515625, "logps/rejected": -267.3067626953125, "loss": 0.8798, "rewards/accuracies": 0.5, "rewards/chosen": -0.9353861212730408, "rewards/margins": -0.11911562830209732, "rewards/rejected": -0.8162704706192017, "step": 132 }, { "epoch": 0.02, "learning_rate": 2.998582732963269e-07, "logits/chosen": -2.659172296524048, "logits/rejected": -2.9109010696411133, "logps/chosen": -389.9925231933594, "logps/rejected": -177.78512573242188, "loss": 0.979, "rewards/accuracies": 0.25, "rewards/chosen": -0.6689620018005371, "rewards/margins": -0.3065325915813446, "rewards/rejected": -0.3624293804168701, "step": 133 }, { "epoch": 0.02, "learning_rate": 2.998228416204086e-07, "logits/chosen": -2.85929799079895, "logits/rejected": -2.7307815551757812, "logps/chosen": -206.71617126464844, "logps/rejected": -264.13751220703125, "loss": 0.8968, "rewards/accuracies": 0.625, "rewards/chosen": -1.0276267528533936, "rewards/margins": -0.05892267823219299, "rewards/rejected": -0.968704104423523, "step": 134 }, { "epoch": 0.02, "learning_rate": 2.9978740994449037e-07, "logits/chosen": -1.618591070175171, "logits/rejected": -1.894781470298767, "logps/chosen": -425.32305908203125, "logps/rejected": -257.943603515625, "loss": 0.5959, "rewards/accuracies": 0.75, "rewards/chosen": -0.24507570266723633, "rewards/margins": 0.26544684171676636, "rewards/rejected": -0.5105225443840027, "step": 135 }, { "epoch": 0.02, "learning_rate": 2.997519782685721e-07, "logits/chosen": -2.729038953781128, "logits/rejected": -2.7132813930511475, "logps/chosen": -161.44418334960938, "logps/rejected": -258.60394287109375, "loss": 0.521, "rewards/accuracies": 0.75, "rewards/chosen": -0.044339098036289215, "rewards/margins": 0.5262002348899841, "rewards/rejected": -0.5705393552780151, "step": 136 }, { "epoch": 0.02, "learning_rate": 2.997165465926538e-07, "logits/chosen": -2.5893824100494385, "logits/rejected": -2.2048580646514893, "logps/chosen": -209.68711853027344, "logps/rejected": -288.78070068359375, "loss": 0.6152, "rewards/accuracies": 0.625, "rewards/chosen": -0.3048330545425415, "rewards/margins": 0.313556432723999, "rewards/rejected": -0.6183894872665405, "step": 137 }, { "epoch": 0.02, "learning_rate": 2.9968111491673556e-07, "logits/chosen": -2.4292397499084473, "logits/rejected": -2.253242254257202, "logps/chosen": -294.5284729003906, "logps/rejected": -344.6085510253906, "loss": 0.5115, "rewards/accuracies": 0.75, "rewards/chosen": -0.5531035661697388, "rewards/margins": 1.3905061483383179, "rewards/rejected": -1.943609595298767, "step": 138 }, { "epoch": 0.02, "learning_rate": 2.9964568324081726e-07, "logits/chosen": -2.594527006149292, "logits/rejected": -2.4379348754882812, "logps/chosen": -142.90951538085938, "logps/rejected": -235.33216857910156, "loss": 0.6526, "rewards/accuracies": 0.75, "rewards/chosen": -0.42056623101234436, "rewards/margins": 0.17449112236499786, "rewards/rejected": -0.5950573682785034, "step": 139 }, { "epoch": 0.02, "learning_rate": 2.99610251564899e-07, "logits/chosen": -2.115158796310425, "logits/rejected": -2.400864601135254, "logps/chosen": -349.9581298828125, "logps/rejected": -284.10321044921875, "loss": 0.6305, "rewards/accuracies": 0.625, "rewards/chosen": -1.2188100814819336, "rewards/margins": 0.8487216234207153, "rewards/rejected": -2.0675318241119385, "step": 140 }, { "epoch": 0.02, "learning_rate": 2.9957481988898076e-07, "logits/chosen": -2.687493324279785, "logits/rejected": -2.610898971557617, "logps/chosen": -169.18585205078125, "logps/rejected": -354.4629211425781, "loss": 0.5257, "rewards/accuracies": 0.875, "rewards/chosen": -0.3375472128391266, "rewards/margins": 0.40620124340057373, "rewards/rejected": -0.7437484860420227, "step": 141 }, { "epoch": 0.02, "learning_rate": 2.9953938821306245e-07, "logits/chosen": -2.8204967975616455, "logits/rejected": -2.859799861907959, "logps/chosen": -91.15990447998047, "logps/rejected": -102.08074188232422, "loss": 0.7275, "rewards/accuracies": 0.625, "rewards/chosen": -0.34524649381637573, "rewards/margins": 0.06920135021209717, "rewards/rejected": -0.4144478440284729, "step": 142 }, { "epoch": 0.02, "learning_rate": 2.995039565371442e-07, "logits/chosen": -2.2518529891967773, "logits/rejected": -2.300924301147461, "logps/chosen": -256.9783020019531, "logps/rejected": -238.1842041015625, "loss": 0.589, "rewards/accuracies": 0.875, "rewards/chosen": -0.3805345296859741, "rewards/margins": 0.47803041338920593, "rewards/rejected": -0.8585649728775024, "step": 143 }, { "epoch": 0.02, "learning_rate": 2.994685248612259e-07, "logits/chosen": -2.2423295974731445, "logits/rejected": -2.2950899600982666, "logps/chosen": -212.7620849609375, "logps/rejected": -216.1529083251953, "loss": 0.5701, "rewards/accuracies": 0.625, "rewards/chosen": -0.23712702095508575, "rewards/margins": 0.41263240575790405, "rewards/rejected": -0.6497594714164734, "step": 144 }, { "epoch": 0.02, "learning_rate": 2.9943309318530765e-07, "logits/chosen": -2.735442638397217, "logits/rejected": -2.798872709274292, "logps/chosen": -167.79116821289062, "logps/rejected": -233.87918090820312, "loss": 0.5797, "rewards/accuracies": 0.625, "rewards/chosen": -0.15597400069236755, "rewards/margins": 0.42198696732521057, "rewards/rejected": -0.5779609680175781, "step": 145 }, { "epoch": 0.02, "learning_rate": 2.9939766150938934e-07, "logits/chosen": -1.4767216444015503, "logits/rejected": -2.1149492263793945, "logps/chosen": -628.093994140625, "logps/rejected": -442.81524658203125, "loss": 0.7641, "rewards/accuracies": 0.375, "rewards/chosen": -0.8022739887237549, "rewards/margins": 0.36251893639564514, "rewards/rejected": -1.1647928953170776, "step": 146 }, { "epoch": 0.02, "learning_rate": 2.9936222983347114e-07, "logits/chosen": -2.5397250652313232, "logits/rejected": -2.699514389038086, "logps/chosen": -329.48687744140625, "logps/rejected": -358.07769775390625, "loss": 0.6317, "rewards/accuracies": 0.625, "rewards/chosen": -0.4497101902961731, "rewards/margins": 0.17475473880767822, "rewards/rejected": -0.6244649291038513, "step": 147 }, { "epoch": 0.02, "learning_rate": 2.9932679815755284e-07, "logits/chosen": -2.2238693237304688, "logits/rejected": -2.359199285507202, "logps/chosen": -375.20623779296875, "logps/rejected": -263.2727966308594, "loss": 0.7545, "rewards/accuracies": 0.5, "rewards/chosen": -0.4355597496032715, "rewards/margins": -0.070066437125206, "rewards/rejected": -0.3654933571815491, "step": 148 }, { "epoch": 0.02, "learning_rate": 2.992913664816346e-07, "logits/chosen": -2.275559425354004, "logits/rejected": -2.1012988090515137, "logps/chosen": -267.72418212890625, "logps/rejected": -294.7813720703125, "loss": 0.7864, "rewards/accuracies": 0.625, "rewards/chosen": -0.7298777103424072, "rewards/margins": 0.41024768352508545, "rewards/rejected": -1.1401255130767822, "step": 149 }, { "epoch": 0.02, "learning_rate": 2.992559348057163e-07, "logits/chosen": -2.740659475326538, "logits/rejected": -2.6224112510681152, "logps/chosen": -269.9718017578125, "logps/rejected": -216.44573974609375, "loss": 0.6544, "rewards/accuracies": 0.75, "rewards/chosen": -0.6055588126182556, "rewards/margins": 0.40005630254745483, "rewards/rejected": -1.0056151151657104, "step": 150 }, { "epoch": 0.02, "learning_rate": 2.9922050312979803e-07, "logits/chosen": -1.780271053314209, "logits/rejected": -2.3586924076080322, "logps/chosen": -353.4786682128906, "logps/rejected": -280.44384765625, "loss": 0.6061, "rewards/accuracies": 0.625, "rewards/chosen": -0.6104757785797119, "rewards/margins": 0.7683894634246826, "rewards/rejected": -1.378865361213684, "step": 151 }, { "epoch": 0.02, "learning_rate": 2.991850714538798e-07, "logits/chosen": -1.7893050909042358, "logits/rejected": -1.875062346458435, "logps/chosen": -321.23736572265625, "logps/rejected": -259.16925048828125, "loss": 1.0307, "rewards/accuracies": 0.625, "rewards/chosen": -0.624213457107544, "rewards/margins": -0.25228065252304077, "rewards/rejected": -0.37193283438682556, "step": 152 }, { "epoch": 0.02, "learning_rate": 2.991496397779615e-07, "logits/chosen": -2.173344612121582, "logits/rejected": -2.4468865394592285, "logps/chosen": -339.6275634765625, "logps/rejected": -247.14126586914062, "loss": 0.6515, "rewards/accuracies": 0.625, "rewards/chosen": -1.259911060333252, "rewards/margins": 0.30777859687805176, "rewards/rejected": -1.5676896572113037, "step": 153 }, { "epoch": 0.02, "learning_rate": 2.991142081020432e-07, "logits/chosen": -2.2529501914978027, "logits/rejected": -2.5658624172210693, "logps/chosen": -340.80841064453125, "logps/rejected": -239.71432495117188, "loss": 0.8936, "rewards/accuracies": 0.5, "rewards/chosen": -0.8029078841209412, "rewards/margins": -0.23260381817817688, "rewards/rejected": -0.5703040361404419, "step": 154 }, { "epoch": 0.02, "learning_rate": 2.990787764261249e-07, "logits/chosen": -2.510453701019287, "logits/rejected": -2.5344133377075195, "logps/chosen": -109.82048034667969, "logps/rejected": -171.2945556640625, "loss": 0.514, "rewards/accuracies": 0.875, "rewards/chosen": -0.055533893406391144, "rewards/margins": 0.4897593855857849, "rewards/rejected": -0.5452932715415955, "step": 155 }, { "epoch": 0.02, "learning_rate": 2.9904334475020667e-07, "logits/chosen": -1.9162174463272095, "logits/rejected": -1.6850186586380005, "logps/chosen": -185.43527221679688, "logps/rejected": -444.98468017578125, "loss": 0.4895, "rewards/accuracies": 0.625, "rewards/chosen": -0.33354297280311584, "rewards/margins": 0.9943645000457764, "rewards/rejected": -1.3279074430465698, "step": 156 }, { "epoch": 0.02, "learning_rate": 2.9900791307428836e-07, "logits/chosen": -2.2484042644500732, "logits/rejected": -2.4392080307006836, "logps/chosen": -261.85552978515625, "logps/rejected": -204.5252685546875, "loss": 0.9553, "rewards/accuracies": 0.5, "rewards/chosen": -1.1330440044403076, "rewards/margins": -0.12059669196605682, "rewards/rejected": -1.0124472379684448, "step": 157 }, { "epoch": 0.02, "learning_rate": 2.989724813983701e-07, "logits/chosen": -2.699685573577881, "logits/rejected": -2.6827781200408936, "logps/chosen": -248.12286376953125, "logps/rejected": -275.5225830078125, "loss": 0.707, "rewards/accuracies": 0.625, "rewards/chosen": -0.20926009118556976, "rewards/margins": 0.05238410830497742, "rewards/rejected": -0.261644184589386, "step": 158 }, { "epoch": 0.02, "learning_rate": 2.9893704972245186e-07, "logits/chosen": -2.1270711421966553, "logits/rejected": -2.05656099319458, "logps/chosen": -279.4642639160156, "logps/rejected": -299.3373107910156, "loss": 0.6945, "rewards/accuracies": 0.5, "rewards/chosen": -0.3710419833660126, "rewards/margins": 0.33852648735046387, "rewards/rejected": -0.7095685005187988, "step": 159 }, { "epoch": 0.02, "learning_rate": 2.989016180465336e-07, "logits/chosen": -2.311832904815674, "logits/rejected": -2.2269809246063232, "logps/chosen": -258.2981872558594, "logps/rejected": -222.5327606201172, "loss": 0.7792, "rewards/accuracies": 0.625, "rewards/chosen": -1.6360909938812256, "rewards/margins": 0.20646511018276215, "rewards/rejected": -1.8425562381744385, "step": 160 }, { "epoch": 0.02, "learning_rate": 2.988661863706153e-07, "logits/chosen": -2.6292130947113037, "logits/rejected": -2.7311182022094727, "logps/chosen": -186.0697021484375, "logps/rejected": -234.8448486328125, "loss": 0.5622, "rewards/accuracies": 0.5, "rewards/chosen": -0.36750853061676025, "rewards/margins": 0.7177603244781494, "rewards/rejected": -1.0852688550949097, "step": 161 }, { "epoch": 0.02, "learning_rate": 2.9883075469469705e-07, "logits/chosen": -2.1932260990142822, "logits/rejected": -1.905879020690918, "logps/chosen": -141.46380615234375, "logps/rejected": -347.05877685546875, "loss": 0.6139, "rewards/accuracies": 0.625, "rewards/chosen": -0.9611469507217407, "rewards/margins": 0.2585405111312866, "rewards/rejected": -1.2196874618530273, "step": 162 }, { "epoch": 0.02, "learning_rate": 2.987953230187788e-07, "logits/chosen": -2.353823184967041, "logits/rejected": -2.472700595855713, "logps/chosen": -454.1716613769531, "logps/rejected": -289.56573486328125, "loss": 0.7534, "rewards/accuracies": 0.625, "rewards/chosen": -0.7410773634910583, "rewards/margins": 0.4614759683609009, "rewards/rejected": -1.2025532722473145, "step": 163 }, { "epoch": 0.02, "learning_rate": 2.987598913428605e-07, "logits/chosen": -2.3809735774993896, "logits/rejected": -2.438262462615967, "logps/chosen": -361.6285400390625, "logps/rejected": -271.9600830078125, "loss": 0.5782, "rewards/accuracies": 0.625, "rewards/chosen": -0.32255005836486816, "rewards/margins": 0.5883536338806152, "rewards/rejected": -0.9109036922454834, "step": 164 }, { "epoch": 0.02, "learning_rate": 2.9872445966694225e-07, "logits/chosen": -2.086742877960205, "logits/rejected": -2.1662802696228027, "logps/chosen": -135.5213165283203, "logps/rejected": -184.32989501953125, "loss": 0.5989, "rewards/accuracies": 0.625, "rewards/chosen": -0.44525524973869324, "rewards/margins": 0.3135383725166321, "rewards/rejected": -0.7587935924530029, "step": 165 }, { "epoch": 0.02, "learning_rate": 2.9868902799102394e-07, "logits/chosen": -2.264157295227051, "logits/rejected": -2.49031138420105, "logps/chosen": -341.9414978027344, "logps/rejected": -229.1075897216797, "loss": 0.5416, "rewards/accuracies": 0.625, "rewards/chosen": -0.1409548819065094, "rewards/margins": 0.591231644153595, "rewards/rejected": -0.732186496257782, "step": 166 }, { "epoch": 0.02, "learning_rate": 2.986535963151057e-07, "logits/chosen": -2.183047294616699, "logits/rejected": -2.2306580543518066, "logps/chosen": -447.38775634765625, "logps/rejected": -341.69586181640625, "loss": 0.4958, "rewards/accuracies": 0.875, "rewards/chosen": -0.4464993476867676, "rewards/margins": 0.678467869758606, "rewards/rejected": -1.1249672174453735, "step": 167 }, { "epoch": 0.02, "learning_rate": 2.986181646391874e-07, "logits/chosen": -2.5195505619049072, "logits/rejected": -2.6643764972686768, "logps/chosen": -185.33193969726562, "logps/rejected": -169.29483032226562, "loss": 0.7391, "rewards/accuracies": 0.625, "rewards/chosen": -0.3364061415195465, "rewards/margins": 0.1545141041278839, "rewards/rejected": -0.4909202456474304, "step": 168 }, { "epoch": 0.02, "learning_rate": 2.9858273296326914e-07, "logits/chosen": -2.520084857940674, "logits/rejected": -2.400113821029663, "logps/chosen": -158.7333526611328, "logps/rejected": -169.21876525878906, "loss": 0.7242, "rewards/accuracies": 0.5, "rewards/chosen": -0.21627792716026306, "rewards/margins": 0.10012604296207428, "rewards/rejected": -0.31640395522117615, "step": 169 }, { "epoch": 0.02, "learning_rate": 2.985473012873509e-07, "logits/chosen": -2.579526424407959, "logits/rejected": -2.632833480834961, "logps/chosen": -139.39089965820312, "logps/rejected": -160.66226196289062, "loss": 0.5402, "rewards/accuracies": 0.875, "rewards/chosen": -0.26232483983039856, "rewards/margins": 0.41159719228744507, "rewards/rejected": -0.673922061920166, "step": 170 }, { "epoch": 0.02, "learning_rate": 2.9851186961143263e-07, "logits/chosen": -2.3546195030212402, "logits/rejected": -2.4710028171539307, "logps/chosen": -250.13548278808594, "logps/rejected": -204.47930908203125, "loss": 0.4305, "rewards/accuracies": 1.0, "rewards/chosen": -0.0816798061132431, "rewards/margins": 0.7516033053398132, "rewards/rejected": -0.8332831263542175, "step": 171 }, { "epoch": 0.02, "learning_rate": 2.9847643793551433e-07, "logits/chosen": -2.461793899536133, "logits/rejected": -2.8258161544799805, "logps/chosen": -273.1517639160156, "logps/rejected": -222.17884826660156, "loss": 1.4372, "rewards/accuracies": 0.625, "rewards/chosen": -3.4574060440063477, "rewards/margins": -0.4638090133666992, "rewards/rejected": -2.9935970306396484, "step": 172 }, { "epoch": 0.02, "learning_rate": 2.984410062595961e-07, "logits/chosen": -1.6198781728744507, "logits/rejected": -1.8017055988311768, "logps/chosen": -447.2364196777344, "logps/rejected": -390.44378662109375, "loss": 0.8512, "rewards/accuracies": 0.375, "rewards/chosen": -0.6869930028915405, "rewards/margins": -0.1895975023508072, "rewards/rejected": -0.49739551544189453, "step": 173 }, { "epoch": 0.02, "learning_rate": 2.9840557458367777e-07, "logits/chosen": -2.711627960205078, "logits/rejected": -2.640700340270996, "logps/chosen": -295.3407287597656, "logps/rejected": -238.1574249267578, "loss": 0.4983, "rewards/accuracies": 0.75, "rewards/chosen": -0.4178104102611542, "rewards/margins": 0.8496869206428528, "rewards/rejected": -1.2674973011016846, "step": 174 }, { "epoch": 0.02, "learning_rate": 2.983701429077595e-07, "logits/chosen": -1.7103465795516968, "logits/rejected": -2.231901168823242, "logps/chosen": -369.5630187988281, "logps/rejected": -316.6242370605469, "loss": 0.5946, "rewards/accuracies": 0.75, "rewards/chosen": -0.21179567277431488, "rewards/margins": 0.6494349837303162, "rewards/rejected": -0.8612306118011475, "step": 175 }, { "epoch": 0.02, "learning_rate": 2.9833471123184127e-07, "logits/chosen": -2.610466957092285, "logits/rejected": -2.489729166030884, "logps/chosen": -225.26031494140625, "logps/rejected": -223.93878173828125, "loss": 0.3781, "rewards/accuracies": 0.75, "rewards/chosen": -0.1574171930551529, "rewards/margins": 1.0541033744812012, "rewards/rejected": -1.2115206718444824, "step": 176 }, { "epoch": 0.02, "learning_rate": 2.9829927955592297e-07, "logits/chosen": -2.463379383087158, "logits/rejected": -2.1589033603668213, "logps/chosen": -132.13174438476562, "logps/rejected": -240.66943359375, "loss": 0.4906, "rewards/accuracies": 0.75, "rewards/chosen": -0.4057082533836365, "rewards/margins": 0.5709302425384521, "rewards/rejected": -0.9766385555267334, "step": 177 }, { "epoch": 0.02, "learning_rate": 2.982638478800047e-07, "logits/chosen": -1.5857264995574951, "logits/rejected": -1.8790276050567627, "logps/chosen": -399.6611022949219, "logps/rejected": -325.9180908203125, "loss": 0.5951, "rewards/accuracies": 0.75, "rewards/chosen": -0.30008211731910706, "rewards/margins": 0.2790365219116211, "rewards/rejected": -0.5791186690330505, "step": 178 }, { "epoch": 0.02, "learning_rate": 2.982284162040864e-07, "logits/chosen": -2.4249980449676514, "logits/rejected": -2.5328400135040283, "logps/chosen": -145.7904052734375, "logps/rejected": -178.11697387695312, "loss": 0.543, "rewards/accuracies": 0.625, "rewards/chosen": -0.09167562425136566, "rewards/margins": 0.4494943618774414, "rewards/rejected": -0.5411700010299683, "step": 179 }, { "epoch": 0.02, "learning_rate": 2.9819298452816816e-07, "logits/chosen": -2.637824773788452, "logits/rejected": -2.14784574508667, "logps/chosen": -278.68115234375, "logps/rejected": -223.5745849609375, "loss": 0.8634, "rewards/accuracies": 0.625, "rewards/chosen": -1.012578010559082, "rewards/margins": 0.07178112119436264, "rewards/rejected": -1.0843591690063477, "step": 180 }, { "epoch": 0.02, "learning_rate": 2.981575528522499e-07, "logits/chosen": -1.8628668785095215, "logits/rejected": -2.1004085540771484, "logps/chosen": -375.42156982421875, "logps/rejected": -239.94210815429688, "loss": 0.6203, "rewards/accuracies": 0.875, "rewards/chosen": -0.1471804976463318, "rewards/margins": 0.21720591187477112, "rewards/rejected": -0.3643864095211029, "step": 181 }, { "epoch": 0.02, "learning_rate": 2.9812212117633166e-07, "logits/chosen": -1.7659504413604736, "logits/rejected": -2.2073898315429688, "logps/chosen": -487.9418640136719, "logps/rejected": -343.2994384765625, "loss": 1.3051, "rewards/accuracies": 0.375, "rewards/chosen": -1.8398267030715942, "rewards/margins": -0.7485359311103821, "rewards/rejected": -1.0912909507751465, "step": 182 }, { "epoch": 0.02, "learning_rate": 2.9808668950041335e-07, "logits/chosen": -2.2780654430389404, "logits/rejected": -2.2091846466064453, "logps/chosen": -311.5879821777344, "logps/rejected": -317.8841857910156, "loss": 0.6217, "rewards/accuracies": 0.5, "rewards/chosen": -0.4547070264816284, "rewards/margins": 0.3624263107776642, "rewards/rejected": -0.8171333074569702, "step": 183 }, { "epoch": 0.02, "learning_rate": 2.980512578244951e-07, "logits/chosen": -2.9426255226135254, "logits/rejected": -2.876356363296509, "logps/chosen": -295.82733154296875, "logps/rejected": -232.63882446289062, "loss": 0.6686, "rewards/accuracies": 0.5, "rewards/chosen": -0.41066229343414307, "rewards/margins": 0.3706735074520111, "rewards/rejected": -0.7813358306884766, "step": 184 }, { "epoch": 0.02, "learning_rate": 2.980158261485768e-07, "logits/chosen": -2.661207675933838, "logits/rejected": -2.7515451908111572, "logps/chosen": -266.5347900390625, "logps/rejected": -266.1720886230469, "loss": 0.4387, "rewards/accuracies": 0.75, "rewards/chosen": -0.5421686172485352, "rewards/margins": 0.7271385788917542, "rewards/rejected": -1.269307255744934, "step": 185 }, { "epoch": 0.02, "learning_rate": 2.9798039447265854e-07, "logits/chosen": -2.2362399101257324, "logits/rejected": -2.4736719131469727, "logps/chosen": -438.289794921875, "logps/rejected": -400.8497314453125, "loss": 0.7248, "rewards/accuracies": 0.625, "rewards/chosen": -0.14261962473392487, "rewards/margins": 0.5831397175788879, "rewards/rejected": -0.7257592678070068, "step": 186 }, { "epoch": 0.02, "learning_rate": 2.979449627967403e-07, "logits/chosen": -2.177757740020752, "logits/rejected": -2.4214768409729004, "logps/chosen": -424.8014831542969, "logps/rejected": -294.1171875, "loss": 0.7395, "rewards/accuracies": 0.625, "rewards/chosen": -0.48851269483566284, "rewards/margins": 0.37288495898246765, "rewards/rejected": -0.8613976240158081, "step": 187 }, { "epoch": 0.02, "learning_rate": 2.97909531120822e-07, "logits/chosen": -2.5479719638824463, "logits/rejected": -2.540255546569824, "logps/chosen": -173.61489868164062, "logps/rejected": -151.67105102539062, "loss": 0.481, "rewards/accuracies": 0.875, "rewards/chosen": 0.06813786923885345, "rewards/margins": 0.605137825012207, "rewards/rejected": -0.5369999408721924, "step": 188 }, { "epoch": 0.02, "learning_rate": 2.9787409944490374e-07, "logits/chosen": -1.9244165420532227, "logits/rejected": -2.060289144515991, "logps/chosen": -545.0999755859375, "logps/rejected": -347.0440673828125, "loss": 0.6863, "rewards/accuracies": 0.625, "rewards/chosen": -1.0551561117172241, "rewards/margins": 0.14402733743190765, "rewards/rejected": -1.199183464050293, "step": 189 }, { "epoch": 0.02, "learning_rate": 2.9783866776898543e-07, "logits/chosen": -2.305917501449585, "logits/rejected": -2.548753023147583, "logps/chosen": -480.833251953125, "logps/rejected": -307.805908203125, "loss": 0.5484, "rewards/accuracies": 0.875, "rewards/chosen": 0.11998571455478668, "rewards/margins": 0.49165478348731995, "rewards/rejected": -0.3716690242290497, "step": 190 }, { "epoch": 0.02, "learning_rate": 2.978032360930672e-07, "logits/chosen": -2.8891310691833496, "logits/rejected": -2.576831102371216, "logps/chosen": -170.96961975097656, "logps/rejected": -227.4477996826172, "loss": 0.5154, "rewards/accuracies": 0.625, "rewards/chosen": -0.4644376337528229, "rewards/margins": 1.6487505435943604, "rewards/rejected": -2.1131880283355713, "step": 191 }, { "epoch": 0.02, "learning_rate": 2.9776780441714893e-07, "logits/chosen": -2.3669207096099854, "logits/rejected": -2.4071383476257324, "logps/chosen": -235.2662811279297, "logps/rejected": -279.0393981933594, "loss": 0.5169, "rewards/accuracies": 0.75, "rewards/chosen": -0.8461290001869202, "rewards/margins": 0.7511619925498962, "rewards/rejected": -1.5972909927368164, "step": 192 }, { "epoch": 0.02, "learning_rate": 2.977323727412306e-07, "logits/chosen": -2.2885642051696777, "logits/rejected": -2.519991874694824, "logps/chosen": -253.64480590820312, "logps/rejected": -203.57786560058594, "loss": 0.5247, "rewards/accuracies": 0.625, "rewards/chosen": -0.42813435196876526, "rewards/margins": 0.6662355661392212, "rewards/rejected": -1.094369888305664, "step": 193 }, { "epoch": 0.02, "learning_rate": 2.976969410653124e-07, "logits/chosen": -2.3995888233184814, "logits/rejected": -2.4556751251220703, "logps/chosen": -171.52984619140625, "logps/rejected": -210.47445678710938, "loss": 0.4858, "rewards/accuracies": 0.75, "rewards/chosen": -0.046367041766643524, "rewards/margins": 0.7165279984474182, "rewards/rejected": -0.7628950476646423, "step": 194 }, { "epoch": 0.02, "learning_rate": 2.976615093893941e-07, "logits/chosen": -2.5814974308013916, "logits/rejected": -2.38312029838562, "logps/chosen": -272.8090515136719, "logps/rejected": -285.1854553222656, "loss": 0.4581, "rewards/accuracies": 0.625, "rewards/chosen": -0.1665532886981964, "rewards/margins": 0.7537395358085632, "rewards/rejected": -0.920292854309082, "step": 195 }, { "epoch": 0.02, "learning_rate": 2.976260777134758e-07, "logits/chosen": -2.504533529281616, "logits/rejected": -2.6208178997039795, "logps/chosen": -204.13047790527344, "logps/rejected": -169.91793823242188, "loss": 0.8929, "rewards/accuracies": 0.75, "rewards/chosen": -1.2568453550338745, "rewards/margins": 0.419555127620697, "rewards/rejected": -1.6764004230499268, "step": 196 }, { "epoch": 0.02, "learning_rate": 2.9759064603755757e-07, "logits/chosen": -3.0464584827423096, "logits/rejected": -3.003575325012207, "logps/chosen": -249.3004150390625, "logps/rejected": -148.258544921875, "loss": 0.4221, "rewards/accuracies": 0.75, "rewards/chosen": 0.18436755239963531, "rewards/margins": 0.8750616312026978, "rewards/rejected": -0.6906940937042236, "step": 197 }, { "epoch": 0.02, "learning_rate": 2.975552143616393e-07, "logits/chosen": -2.0811831951141357, "logits/rejected": -2.285975933074951, "logps/chosen": -316.1667785644531, "logps/rejected": -294.22918701171875, "loss": 1.1003, "rewards/accuracies": 0.5, "rewards/chosen": -0.7553806304931641, "rewards/margins": -0.23474159836769104, "rewards/rejected": -0.5206390023231506, "step": 198 }, { "epoch": 0.02, "learning_rate": 2.97519782685721e-07, "logits/chosen": -2.5461838245391846, "logits/rejected": -2.3391404151916504, "logps/chosen": -337.11712646484375, "logps/rejected": -305.74029541015625, "loss": 0.3651, "rewards/accuracies": 0.875, "rewards/chosen": -0.1557615101337433, "rewards/margins": 1.2772061824798584, "rewards/rejected": -1.4329677820205688, "step": 199 }, { "epoch": 0.02, "learning_rate": 2.9748435100980276e-07, "logits/chosen": -2.509634017944336, "logits/rejected": -2.122727155685425, "logps/chosen": -234.50094604492188, "logps/rejected": -224.40541076660156, "loss": 0.3924, "rewards/accuracies": 0.875, "rewards/chosen": -0.17068585753440857, "rewards/margins": 1.0835914611816406, "rewards/rejected": -1.2542774677276611, "step": 200 }, { "epoch": 0.02, "learning_rate": 2.9744891933388446e-07, "logits/chosen": -2.180366039276123, "logits/rejected": -2.0672030448913574, "logps/chosen": -341.3550720214844, "logps/rejected": -324.4943542480469, "loss": 0.4942, "rewards/accuracies": 0.75, "rewards/chosen": -0.29692578315734863, "rewards/margins": 0.7773337960243225, "rewards/rejected": -1.0742595195770264, "step": 201 }, { "epoch": 0.02, "learning_rate": 2.974134876579662e-07, "logits/chosen": -2.6591265201568604, "logits/rejected": -2.7623841762542725, "logps/chosen": -339.0309753417969, "logps/rejected": -250.5465087890625, "loss": 0.6343, "rewards/accuracies": 0.5, "rewards/chosen": -0.2517094910144806, "rewards/margins": 0.2982446253299713, "rewards/rejected": -0.5499541759490967, "step": 202 }, { "epoch": 0.02, "learning_rate": 2.973780559820479e-07, "logits/chosen": -2.6656172275543213, "logits/rejected": -2.3511343002319336, "logps/chosen": -208.1717529296875, "logps/rejected": -239.59095764160156, "loss": 0.575, "rewards/accuracies": 0.5, "rewards/chosen": -0.13784259557724, "rewards/margins": 0.5672997236251831, "rewards/rejected": -0.7051423192024231, "step": 203 }, { "epoch": 0.02, "learning_rate": 2.9734262430612965e-07, "logits/chosen": -1.9007630348205566, "logits/rejected": -2.0440890789031982, "logps/chosen": -247.72889709472656, "logps/rejected": -279.59552001953125, "loss": 0.8708, "rewards/accuracies": 0.625, "rewards/chosen": -1.0063238143920898, "rewards/margins": 1.0005090236663818, "rewards/rejected": -2.0068325996398926, "step": 204 }, { "epoch": 0.02, "learning_rate": 2.973071926302114e-07, "logits/chosen": -2.014699697494507, "logits/rejected": -2.4477901458740234, "logps/chosen": -469.59979248046875, "logps/rejected": -278.1065979003906, "loss": 0.777, "rewards/accuracies": 0.5, "rewards/chosen": -0.7977591753005981, "rewards/margins": 0.17854562401771545, "rewards/rejected": -0.976304829120636, "step": 205 }, { "epoch": 0.02, "learning_rate": 2.9727176095429315e-07, "logits/chosen": -2.726283073425293, "logits/rejected": -2.7086198329925537, "logps/chosen": -72.46257019042969, "logps/rejected": -136.21966552734375, "loss": 0.3958, "rewards/accuracies": 0.75, "rewards/chosen": -0.06324831396341324, "rewards/margins": 1.182030439376831, "rewards/rejected": -1.2452788352966309, "step": 206 }, { "epoch": 0.02, "learning_rate": 2.9723632927837484e-07, "logits/chosen": -2.4139962196350098, "logits/rejected": -2.327277898788452, "logps/chosen": -389.550048828125, "logps/rejected": -327.2835388183594, "loss": 0.6662, "rewards/accuracies": 0.625, "rewards/chosen": -0.48070067167282104, "rewards/margins": 0.6324909925460815, "rewards/rejected": -1.1131917238235474, "step": 207 }, { "epoch": 0.02, "learning_rate": 2.972008976024566e-07, "logits/chosen": -1.8973721265792847, "logits/rejected": -1.7923822402954102, "logps/chosen": -169.95416259765625, "logps/rejected": -277.5341796875, "loss": 0.8652, "rewards/accuracies": 0.625, "rewards/chosen": -0.7989225387573242, "rewards/margins": 0.6200987696647644, "rewards/rejected": -1.4190213680267334, "step": 208 }, { "epoch": 0.02, "learning_rate": 2.9716546592653834e-07, "logits/chosen": -2.751174211502075, "logits/rejected": -2.6011664867401123, "logps/chosen": -87.83241271972656, "logps/rejected": -170.61825561523438, "loss": 0.5183, "rewards/accuracies": 0.875, "rewards/chosen": -0.43172308802604675, "rewards/margins": 0.555282711982727, "rewards/rejected": -0.9870057702064514, "step": 209 }, { "epoch": 0.02, "learning_rate": 2.9713003425062003e-07, "logits/chosen": -2.216834545135498, "logits/rejected": -2.2389798164367676, "logps/chosen": -191.7452850341797, "logps/rejected": -334.9488525390625, "loss": 0.664, "rewards/accuracies": 0.5, "rewards/chosen": -0.29541483521461487, "rewards/margins": 0.3470202684402466, "rewards/rejected": -0.6424350738525391, "step": 210 }, { "epoch": 0.02, "learning_rate": 2.970946025747018e-07, "logits/chosen": -1.7326728105545044, "logits/rejected": -1.884152889251709, "logps/chosen": -258.3929443359375, "logps/rejected": -161.04075622558594, "loss": 0.6175, "rewards/accuracies": 0.625, "rewards/chosen": -1.3610254526138306, "rewards/margins": 0.2630250155925751, "rewards/rejected": -1.6240503787994385, "step": 211 }, { "epoch": 0.02, "learning_rate": 2.970591708987835e-07, "logits/chosen": -2.704021692276001, "logits/rejected": -2.5570011138916016, "logps/chosen": -315.704833984375, "logps/rejected": -191.2744140625, "loss": 2.3518, "rewards/accuracies": 0.5, "rewards/chosen": -3.783074140548706, "rewards/margins": -1.667793869972229, "rewards/rejected": -2.1152801513671875, "step": 212 }, { "epoch": 0.02, "learning_rate": 2.9702373922286523e-07, "logits/chosen": -2.2044677734375, "logits/rejected": -2.1459813117980957, "logps/chosen": -297.95281982421875, "logps/rejected": -326.42425537109375, "loss": 0.611, "rewards/accuracies": 0.75, "rewards/chosen": -1.3468668460845947, "rewards/margins": 0.9670214653015137, "rewards/rejected": -2.3138883113861084, "step": 213 }, { "epoch": 0.02, "learning_rate": 2.969883075469469e-07, "logits/chosen": -2.5468626022338867, "logits/rejected": -2.7631421089172363, "logps/chosen": -161.05430603027344, "logps/rejected": -238.472900390625, "loss": 0.7499, "rewards/accuracies": 0.75, "rewards/chosen": -0.4447501599788666, "rewards/margins": 0.4884619414806366, "rewards/rejected": -0.933212161064148, "step": 214 }, { "epoch": 0.03, "learning_rate": 2.9695287587102867e-07, "logits/chosen": -2.435810089111328, "logits/rejected": -2.504624366760254, "logps/chosen": -186.88778686523438, "logps/rejected": -237.7312774658203, "loss": 0.4955, "rewards/accuracies": 0.75, "rewards/chosen": -0.5053725838661194, "rewards/margins": 0.5947993993759155, "rewards/rejected": -1.1001720428466797, "step": 215 }, { "epoch": 0.03, "learning_rate": 2.969174441951104e-07, "logits/chosen": -2.254075050354004, "logits/rejected": -2.1868703365325928, "logps/chosen": -122.3339614868164, "logps/rejected": -216.272705078125, "loss": 0.7093, "rewards/accuracies": 0.75, "rewards/chosen": -0.47668716311454773, "rewards/margins": 0.585472822189331, "rewards/rejected": -1.0621600151062012, "step": 216 }, { "epoch": 0.03, "learning_rate": 2.9688201251919217e-07, "logits/chosen": -2.545182466506958, "logits/rejected": -2.6172900199890137, "logps/chosen": -330.7689208984375, "logps/rejected": -261.0626220703125, "loss": 0.6623, "rewards/accuracies": 0.5, "rewards/chosen": -0.9052447080612183, "rewards/margins": 0.5616000294685364, "rewards/rejected": -1.4668447971343994, "step": 217 }, { "epoch": 0.03, "learning_rate": 2.9684658084327386e-07, "logits/chosen": -2.6438913345336914, "logits/rejected": -2.718538522720337, "logps/chosen": -475.1298828125, "logps/rejected": -213.1118927001953, "loss": 0.9303, "rewards/accuracies": 0.625, "rewards/chosen": -0.9925950765609741, "rewards/margins": -0.061597973108291626, "rewards/rejected": -0.9309971928596497, "step": 218 }, { "epoch": 0.03, "learning_rate": 2.968111491673556e-07, "logits/chosen": -2.286801338195801, "logits/rejected": -2.335422992706299, "logps/chosen": -299.2216796875, "logps/rejected": -310.5240478515625, "loss": 0.5823, "rewards/accuracies": 0.75, "rewards/chosen": -0.4594840705394745, "rewards/margins": 0.631011962890625, "rewards/rejected": -1.0904960632324219, "step": 219 }, { "epoch": 0.03, "learning_rate": 2.9677571749143736e-07, "logits/chosen": -2.2917537689208984, "logits/rejected": -2.4121413230895996, "logps/chosen": -353.86700439453125, "logps/rejected": -210.02362060546875, "loss": 0.4395, "rewards/accuracies": 0.875, "rewards/chosen": -0.004545241594314575, "rewards/margins": 0.6690179705619812, "rewards/rejected": -0.6735632419586182, "step": 220 }, { "epoch": 0.03, "learning_rate": 2.9674028581551906e-07, "logits/chosen": -2.103459119796753, "logits/rejected": -2.2720165252685547, "logps/chosen": -176.75814819335938, "logps/rejected": -186.2899169921875, "loss": 0.4432, "rewards/accuracies": 0.75, "rewards/chosen": -1.5019952058792114, "rewards/margins": 0.8146330118179321, "rewards/rejected": -2.3166284561157227, "step": 221 }, { "epoch": 0.03, "learning_rate": 2.967048541396008e-07, "logits/chosen": -1.855933666229248, "logits/rejected": -2.3924572467803955, "logps/chosen": -390.41326904296875, "logps/rejected": -258.12005615234375, "loss": 0.649, "rewards/accuracies": 0.625, "rewards/chosen": -0.6132771968841553, "rewards/margins": 0.4132848381996155, "rewards/rejected": -1.026561975479126, "step": 222 }, { "epoch": 0.03, "learning_rate": 2.966694224636825e-07, "logits/chosen": -2.9040279388427734, "logits/rejected": -2.758218765258789, "logps/chosen": -136.47802734375, "logps/rejected": -199.02577209472656, "loss": 0.3799, "rewards/accuracies": 0.875, "rewards/chosen": -0.23595374822616577, "rewards/margins": 1.14297354221344, "rewards/rejected": -1.3789273500442505, "step": 223 }, { "epoch": 0.03, "learning_rate": 2.9663399078776425e-07, "logits/chosen": -1.877577304840088, "logits/rejected": -2.1572866439819336, "logps/chosen": -380.12945556640625, "logps/rejected": -156.77786254882812, "loss": 0.6586, "rewards/accuracies": 0.625, "rewards/chosen": -0.041225314140319824, "rewards/margins": 0.10930295288562775, "rewards/rejected": -0.15052828192710876, "step": 224 }, { "epoch": 0.03, "learning_rate": 2.9659855911184595e-07, "logits/chosen": -3.0273501873016357, "logits/rejected": -2.907764196395874, "logps/chosen": -311.0216064453125, "logps/rejected": -261.26324462890625, "loss": 0.5198, "rewards/accuracies": 0.625, "rewards/chosen": -0.3632568120956421, "rewards/margins": 0.4935813546180725, "rewards/rejected": -0.8568381667137146, "step": 225 }, { "epoch": 0.03, "learning_rate": 2.965631274359277e-07, "logits/chosen": -2.519087314605713, "logits/rejected": -2.3993542194366455, "logps/chosen": -224.118408203125, "logps/rejected": -263.0647888183594, "loss": 0.5003, "rewards/accuracies": 0.625, "rewards/chosen": -0.13065998256206512, "rewards/margins": 1.0385065078735352, "rewards/rejected": -1.1691664457321167, "step": 226 }, { "epoch": 0.03, "learning_rate": 2.9652769576000944e-07, "logits/chosen": -2.1350302696228027, "logits/rejected": -2.2670650482177734, "logps/chosen": -371.3938293457031, "logps/rejected": -282.5650939941406, "loss": 0.6179, "rewards/accuracies": 0.75, "rewards/chosen": -0.49811485409736633, "rewards/margins": 0.4753153920173645, "rewards/rejected": -0.9734302759170532, "step": 227 }, { "epoch": 0.03, "learning_rate": 2.9649226408409114e-07, "logits/chosen": -1.6433414220809937, "logits/rejected": -2.169814109802246, "logps/chosen": -391.1260986328125, "logps/rejected": -236.60311889648438, "loss": 0.5577, "rewards/accuracies": 0.75, "rewards/chosen": -0.6052765846252441, "rewards/margins": 0.4140346050262451, "rewards/rejected": -1.0193111896514893, "step": 228 }, { "epoch": 0.03, "learning_rate": 2.964568324081729e-07, "logits/chosen": -2.126410484313965, "logits/rejected": -2.1705334186553955, "logps/chosen": -243.95066833496094, "logps/rejected": -327.18896484375, "loss": 0.381, "rewards/accuracies": 0.875, "rewards/chosen": -0.21082711219787598, "rewards/margins": 1.200587272644043, "rewards/rejected": -1.411414384841919, "step": 229 }, { "epoch": 0.03, "learning_rate": 2.9642140073225464e-07, "logits/chosen": -2.5843443870544434, "logits/rejected": -2.34238600730896, "logps/chosen": -118.04267883300781, "logps/rejected": -295.7855224609375, "loss": 0.6246, "rewards/accuracies": 0.625, "rewards/chosen": -0.5008370876312256, "rewards/margins": 0.7272105813026428, "rewards/rejected": -1.2280477285385132, "step": 230 }, { "epoch": 0.03, "learning_rate": 2.963859690563364e-07, "logits/chosen": -2.4147214889526367, "logits/rejected": -2.3730053901672363, "logps/chosen": -246.2408447265625, "logps/rejected": -265.24371337890625, "loss": 0.4393, "rewards/accuracies": 0.75, "rewards/chosen": -0.3537565767765045, "rewards/margins": 0.9284340739250183, "rewards/rejected": -1.2821907997131348, "step": 231 }, { "epoch": 0.03, "learning_rate": 2.963505373804181e-07, "logits/chosen": -2.780045509338379, "logits/rejected": -2.573618173599243, "logps/chosen": -228.7745819091797, "logps/rejected": -192.944580078125, "loss": 0.3737, "rewards/accuracies": 0.875, "rewards/chosen": 0.0125674307346344, "rewards/margins": 1.2672791481018066, "rewards/rejected": -1.2547117471694946, "step": 232 }, { "epoch": 0.03, "learning_rate": 2.9631510570449983e-07, "logits/chosen": -2.135087251663208, "logits/rejected": -2.096663236618042, "logps/chosen": -174.82577514648438, "logps/rejected": -260.0534362792969, "loss": 0.5357, "rewards/accuracies": 0.875, "rewards/chosen": -0.6059907674789429, "rewards/margins": 0.9217367768287659, "rewards/rejected": -1.527727723121643, "step": 233 }, { "epoch": 0.03, "learning_rate": 2.962796740285815e-07, "logits/chosen": -2.2490909099578857, "logits/rejected": -2.2157199382781982, "logps/chosen": -359.9885559082031, "logps/rejected": -285.5429382324219, "loss": 0.7898, "rewards/accuracies": 0.5, "rewards/chosen": -0.3191032409667969, "rewards/margins": 0.15777510404586792, "rewards/rejected": -0.4768783450126648, "step": 234 }, { "epoch": 0.03, "learning_rate": 2.9624424235266327e-07, "logits/chosen": -2.2581114768981934, "logits/rejected": -1.9997048377990723, "logps/chosen": -228.3654327392578, "logps/rejected": -271.7457275390625, "loss": 0.8028, "rewards/accuracies": 0.75, "rewards/chosen": -0.9475138187408447, "rewards/margins": 0.20894011855125427, "rewards/rejected": -1.1564539670944214, "step": 235 }, { "epoch": 0.03, "learning_rate": 2.9620881067674497e-07, "logits/chosen": -2.1485652923583984, "logits/rejected": -2.2349109649658203, "logps/chosen": -326.4866638183594, "logps/rejected": -243.7642364501953, "loss": 0.7143, "rewards/accuracies": 0.5, "rewards/chosen": -0.4513411223888397, "rewards/margins": 0.2783699631690979, "rewards/rejected": -0.7297110557556152, "step": 236 }, { "epoch": 0.03, "learning_rate": 2.961733790008267e-07, "logits/chosen": -2.2010984420776367, "logits/rejected": -2.410281181335449, "logps/chosen": -322.23992919921875, "logps/rejected": -294.8111572265625, "loss": 0.4889, "rewards/accuracies": 0.75, "rewards/chosen": -0.1166505217552185, "rewards/margins": 0.6585292816162109, "rewards/rejected": -0.7751798033714294, "step": 237 }, { "epoch": 0.03, "learning_rate": 2.9613794732490847e-07, "logits/chosen": -2.2597479820251465, "logits/rejected": -2.2367334365844727, "logps/chosen": -167.72113037109375, "logps/rejected": -244.42709350585938, "loss": 0.965, "rewards/accuracies": 0.5, "rewards/chosen": -1.9110950231552124, "rewards/margins": 0.900370717048645, "rewards/rejected": -2.8114657402038574, "step": 238 }, { "epoch": 0.03, "learning_rate": 2.9610251564899016e-07, "logits/chosen": -2.5217127799987793, "logits/rejected": -2.4344356060028076, "logps/chosen": -260.0079040527344, "logps/rejected": -227.93646240234375, "loss": 0.5447, "rewards/accuracies": 0.75, "rewards/chosen": -0.36287394165992737, "rewards/margins": 0.5700594186782837, "rewards/rejected": -0.9329333901405334, "step": 239 }, { "epoch": 0.03, "learning_rate": 2.960670839730719e-07, "logits/chosen": -2.1234779357910156, "logits/rejected": -2.1665754318237305, "logps/chosen": -323.8233337402344, "logps/rejected": -286.0159606933594, "loss": 0.3277, "rewards/accuracies": 1.0, "rewards/chosen": -0.07391555607318878, "rewards/margins": 1.038224458694458, "rewards/rejected": -1.1121399402618408, "step": 240 }, { "epoch": 0.03, "learning_rate": 2.9603165229715366e-07, "logits/chosen": -2.753492832183838, "logits/rejected": -2.721874237060547, "logps/chosen": -168.72793579101562, "logps/rejected": -189.45932006835938, "loss": 0.3772, "rewards/accuracies": 0.75, "rewards/chosen": 0.28331437706947327, "rewards/margins": 1.6643120050430298, "rewards/rejected": -1.380997657775879, "step": 241 }, { "epoch": 0.03, "learning_rate": 2.959962206212354e-07, "logits/chosen": -2.226271152496338, "logits/rejected": -2.082524061203003, "logps/chosen": -403.5612487792969, "logps/rejected": -410.4591369628906, "loss": 0.7343, "rewards/accuracies": 0.75, "rewards/chosen": -0.5986248850822449, "rewards/margins": 0.41287389397621155, "rewards/rejected": -1.0114986896514893, "step": 242 }, { "epoch": 0.03, "learning_rate": 2.959607889453171e-07, "logits/chosen": -2.630030632019043, "logits/rejected": -2.8333232402801514, "logps/chosen": -456.001220703125, "logps/rejected": -368.57684326171875, "loss": 0.307, "rewards/accuracies": 1.0, "rewards/chosen": -0.10505972057580948, "rewards/margins": 1.203420639038086, "rewards/rejected": -1.3084805011749268, "step": 243 }, { "epoch": 0.03, "learning_rate": 2.9592535726939885e-07, "logits/chosen": -2.175078868865967, "logits/rejected": -2.2508466243743896, "logps/chosen": -154.53163146972656, "logps/rejected": -215.64236450195312, "loss": 0.9409, "rewards/accuracies": 0.625, "rewards/chosen": -1.2359592914581299, "rewards/margins": -0.060165584087371826, "rewards/rejected": -1.1757936477661133, "step": 244 }, { "epoch": 0.03, "learning_rate": 2.9588992559348055e-07, "logits/chosen": -2.532890558242798, "logits/rejected": -2.612595319747925, "logps/chosen": -222.73016357421875, "logps/rejected": -146.5400390625, "loss": 0.7311, "rewards/accuracies": 0.875, "rewards/chosen": -0.3925657868385315, "rewards/margins": 0.47019314765930176, "rewards/rejected": -0.8627589344978333, "step": 245 }, { "epoch": 0.03, "learning_rate": 2.958544939175623e-07, "logits/chosen": -2.3777523040771484, "logits/rejected": -2.2148380279541016, "logps/chosen": -138.219970703125, "logps/rejected": -248.41384887695312, "loss": 0.3746, "rewards/accuracies": 0.875, "rewards/chosen": -0.20181617140769958, "rewards/margins": 0.9688894748687744, "rewards/rejected": -1.1707056760787964, "step": 246 }, { "epoch": 0.03, "learning_rate": 2.95819062241644e-07, "logits/chosen": -2.3724496364593506, "logits/rejected": -2.488852024078369, "logps/chosen": -199.93492126464844, "logps/rejected": -174.43544006347656, "loss": 1.6197, "rewards/accuracies": 0.5, "rewards/chosen": -2.0855000019073486, "rewards/margins": -0.8561151623725891, "rewards/rejected": -1.2293848991394043, "step": 247 }, { "epoch": 0.03, "learning_rate": 2.9578363056572574e-07, "logits/chosen": -2.487362861633301, "logits/rejected": -2.739895820617676, "logps/chosen": -252.55865478515625, "logps/rejected": -235.70318603515625, "loss": 0.7096, "rewards/accuracies": 0.5, "rewards/chosen": -0.5399394035339355, "rewards/margins": 0.2241605669260025, "rewards/rejected": -0.7640999555587769, "step": 248 }, { "epoch": 0.03, "learning_rate": 2.957481988898075e-07, "logits/chosen": -2.588501214981079, "logits/rejected": -2.593179225921631, "logps/chosen": -266.2436828613281, "logps/rejected": -168.1346893310547, "loss": 0.6591, "rewards/accuracies": 0.625, "rewards/chosen": -0.4024375081062317, "rewards/margins": 0.3704236149787903, "rewards/rejected": -0.772861123085022, "step": 249 }, { "epoch": 0.03, "learning_rate": 2.957127672138892e-07, "logits/chosen": -2.942012310028076, "logits/rejected": -2.997328758239746, "logps/chosen": -126.1902084350586, "logps/rejected": -158.05067443847656, "loss": 0.5559, "rewards/accuracies": 0.625, "rewards/chosen": -0.13358812034130096, "rewards/margins": 0.6038130521774292, "rewards/rejected": -0.7374010682106018, "step": 250 }, { "epoch": 0.03, "learning_rate": 2.9567733553797093e-07, "logits/chosen": -1.9721193313598633, "logits/rejected": -1.7903449535369873, "logps/chosen": -154.9866943359375, "logps/rejected": -223.43446350097656, "loss": 0.4142, "rewards/accuracies": 0.75, "rewards/chosen": -0.8144962787628174, "rewards/margins": 1.9981340169906616, "rewards/rejected": -2.8126301765441895, "step": 251 }, { "epoch": 0.03, "learning_rate": 2.956419038620527e-07, "logits/chosen": -2.7903285026550293, "logits/rejected": -2.695744276046753, "logps/chosen": -319.083740234375, "logps/rejected": -239.67547607421875, "loss": 0.6354, "rewards/accuracies": 0.875, "rewards/chosen": -0.34986791014671326, "rewards/margins": 0.4811970293521881, "rewards/rejected": -0.8310648798942566, "step": 252 }, { "epoch": 0.03, "learning_rate": 2.9560647218613443e-07, "logits/chosen": -2.5284905433654785, "logits/rejected": -2.3725132942199707, "logps/chosen": -280.74383544921875, "logps/rejected": -321.76336669921875, "loss": 0.3781, "rewards/accuracies": 0.875, "rewards/chosen": -0.4790899157524109, "rewards/margins": 1.1579196453094482, "rewards/rejected": -1.6370095014572144, "step": 253 }, { "epoch": 0.03, "learning_rate": 2.955710405102161e-07, "logits/chosen": -2.1102724075317383, "logits/rejected": -2.428840398788452, "logps/chosen": -144.34535217285156, "logps/rejected": -170.6175537109375, "loss": 0.6721, "rewards/accuracies": 0.5, "rewards/chosen": -0.8028558492660522, "rewards/margins": 0.5086990594863892, "rewards/rejected": -1.311555027961731, "step": 254 }, { "epoch": 0.03, "learning_rate": 2.955356088342979e-07, "logits/chosen": -1.822654128074646, "logits/rejected": -2.174063205718994, "logps/chosen": -578.197998046875, "logps/rejected": -385.5157775878906, "loss": 1.1791, "rewards/accuracies": 0.25, "rewards/chosen": -1.130832314491272, "rewards/margins": -0.6686003804206848, "rewards/rejected": -0.46223190426826477, "step": 255 }, { "epoch": 0.03, "learning_rate": 2.9550017715837957e-07, "logits/chosen": -2.641218900680542, "logits/rejected": -2.665098190307617, "logps/chosen": -189.53826904296875, "logps/rejected": -170.3012237548828, "loss": 1.1807, "rewards/accuracies": 0.5, "rewards/chosen": -1.2722926139831543, "rewards/margins": 0.05193984508514404, "rewards/rejected": -1.3242323398590088, "step": 256 }, { "epoch": 0.03, "learning_rate": 2.954647454824613e-07, "logits/chosen": -2.4489333629608154, "logits/rejected": -2.611192464828491, "logps/chosen": -313.12982177734375, "logps/rejected": -188.38650512695312, "loss": 0.793, "rewards/accuracies": 0.625, "rewards/chosen": -0.25091975927352905, "rewards/margins": 0.15536347031593323, "rewards/rejected": -0.4062832295894623, "step": 257 }, { "epoch": 0.03, "learning_rate": 2.95429313806543e-07, "logits/chosen": -1.9388154745101929, "logits/rejected": -1.8804645538330078, "logps/chosen": -510.9178161621094, "logps/rejected": -424.8813171386719, "loss": 0.3365, "rewards/accuracies": 1.0, "rewards/chosen": -0.30925655364990234, "rewards/margins": 1.298856496810913, "rewards/rejected": -1.608113169670105, "step": 258 }, { "epoch": 0.03, "learning_rate": 2.9539388213062476e-07, "logits/chosen": -2.0558621883392334, "logits/rejected": -2.029038906097412, "logps/chosen": -407.1056213378906, "logps/rejected": -294.9714660644531, "loss": 0.5928, "rewards/accuracies": 0.5, "rewards/chosen": -0.4749473035335541, "rewards/margins": 0.32635682821273804, "rewards/rejected": -0.8013041019439697, "step": 259 }, { "epoch": 0.03, "learning_rate": 2.953584504547065e-07, "logits/chosen": -2.4706008434295654, "logits/rejected": -2.4558961391448975, "logps/chosen": -288.6748962402344, "logps/rejected": -236.25311279296875, "loss": 0.62, "rewards/accuracies": 0.75, "rewards/chosen": -0.7156071066856384, "rewards/margins": 0.5134493112564087, "rewards/rejected": -1.229056477546692, "step": 260 }, { "epoch": 0.03, "learning_rate": 2.953230187787882e-07, "logits/chosen": -1.9513782262802124, "logits/rejected": -2.3186841011047363, "logps/chosen": -297.04290771484375, "logps/rejected": -207.31565856933594, "loss": 0.7052, "rewards/accuracies": 0.625, "rewards/chosen": -0.7733050584793091, "rewards/margins": 0.2747534215450287, "rewards/rejected": -1.0480585098266602, "step": 261 }, { "epoch": 0.03, "learning_rate": 2.9528758710286996e-07, "logits/chosen": -2.430399179458618, "logits/rejected": -2.4979355335235596, "logps/chosen": -461.70343017578125, "logps/rejected": -287.472412109375, "loss": 0.4636, "rewards/accuracies": 0.75, "rewards/chosen": -0.4616781175136566, "rewards/margins": 1.174939513206482, "rewards/rejected": -1.636617660522461, "step": 262 }, { "epoch": 0.03, "learning_rate": 2.9525215542695165e-07, "logits/chosen": -2.741328001022339, "logits/rejected": -2.6582798957824707, "logps/chosen": -276.3038330078125, "logps/rejected": -283.74920654296875, "loss": 0.6814, "rewards/accuracies": 0.75, "rewards/chosen": -0.4555474519729614, "rewards/margins": 0.4569088816642761, "rewards/rejected": -0.9124563932418823, "step": 263 }, { "epoch": 0.03, "learning_rate": 2.952167237510334e-07, "logits/chosen": -1.8767638206481934, "logits/rejected": -2.134045362472534, "logps/chosen": -344.1347961425781, "logps/rejected": -342.5531921386719, "loss": 0.4246, "rewards/accuracies": 0.875, "rewards/chosen": -0.3596734404563904, "rewards/margins": 0.7209112048149109, "rewards/rejected": -1.0805846452713013, "step": 264 }, { "epoch": 0.03, "learning_rate": 2.9518129207511515e-07, "logits/chosen": -2.8755650520324707, "logits/rejected": -2.8715429306030273, "logps/chosen": -392.4359436035156, "logps/rejected": -252.48297119140625, "loss": 0.3009, "rewards/accuracies": 1.0, "rewards/chosen": 0.25258395075798035, "rewards/margins": 1.3310014009475708, "rewards/rejected": -1.0784173011779785, "step": 265 }, { "epoch": 0.03, "learning_rate": 2.951458603991969e-07, "logits/chosen": -2.176382303237915, "logits/rejected": -2.4258620738983154, "logps/chosen": -661.7243041992188, "logps/rejected": -342.0917053222656, "loss": 0.5881, "rewards/accuracies": 0.75, "rewards/chosen": -0.14278724789619446, "rewards/margins": 0.39230793714523315, "rewards/rejected": -0.5350951552391052, "step": 266 }, { "epoch": 0.03, "learning_rate": 2.951104287232786e-07, "logits/chosen": -1.8555489778518677, "logits/rejected": -2.122600793838501, "logps/chosen": -377.9168395996094, "logps/rejected": -287.09393310546875, "loss": 0.9388, "rewards/accuracies": 0.5, "rewards/chosen": -0.8848345279693604, "rewards/margins": -0.004211366176605225, "rewards/rejected": -0.8806231021881104, "step": 267 }, { "epoch": 0.03, "learning_rate": 2.9507499704736034e-07, "logits/chosen": -2.4599874019622803, "logits/rejected": -2.4040067195892334, "logps/chosen": -301.8019714355469, "logps/rejected": -254.99032592773438, "loss": 0.5875, "rewards/accuracies": 0.625, "rewards/chosen": -0.33585959672927856, "rewards/margins": 0.23818278312683105, "rewards/rejected": -0.5740423798561096, "step": 268 }, { "epoch": 0.03, "learning_rate": 2.9503956537144204e-07, "logits/chosen": -2.2341794967651367, "logits/rejected": -2.505638599395752, "logps/chosen": -501.1637878417969, "logps/rejected": -296.4586181640625, "loss": 0.8335, "rewards/accuracies": 0.5, "rewards/chosen": -0.2650803327560425, "rewards/margins": 0.013272076845169067, "rewards/rejected": -0.27835240960121155, "step": 269 }, { "epoch": 0.03, "learning_rate": 2.950041336955238e-07, "logits/chosen": -1.9049019813537598, "logits/rejected": -2.0151309967041016, "logps/chosen": -316.24652099609375, "logps/rejected": -277.3941955566406, "loss": 0.5259, "rewards/accuracies": 0.875, "rewards/chosen": -0.3008253276348114, "rewards/margins": 0.4698154926300049, "rewards/rejected": -0.7706408500671387, "step": 270 }, { "epoch": 0.03, "learning_rate": 2.9496870201960553e-07, "logits/chosen": -2.5724985599517822, "logits/rejected": -2.657470703125, "logps/chosen": -311.4435119628906, "logps/rejected": -196.11766052246094, "loss": 0.2728, "rewards/accuracies": 1.0, "rewards/chosen": -0.06979581713676453, "rewards/margins": 1.4767982959747314, "rewards/rejected": -1.5465940237045288, "step": 271 }, { "epoch": 0.03, "learning_rate": 2.9493327034368723e-07, "logits/chosen": -1.5899715423583984, "logits/rejected": -2.04009747505188, "logps/chosen": -332.3902282714844, "logps/rejected": -223.86692810058594, "loss": 0.951, "rewards/accuracies": 0.625, "rewards/chosen": -1.1163685321807861, "rewards/margins": 0.1418951153755188, "rewards/rejected": -1.2582635879516602, "step": 272 }, { "epoch": 0.03, "learning_rate": 2.94897838667769e-07, "logits/chosen": -2.602829933166504, "logits/rejected": -2.8829126358032227, "logps/chosen": -618.8819580078125, "logps/rejected": -253.289306640625, "loss": 0.7115, "rewards/accuracies": 0.875, "rewards/chosen": -0.20049412548542023, "rewards/margins": 0.8102298378944397, "rewards/rejected": -1.0107239484786987, "step": 273 }, { "epoch": 0.03, "learning_rate": 2.948624069918507e-07, "logits/chosen": -1.9868472814559937, "logits/rejected": -2.449455499649048, "logps/chosen": -310.1795654296875, "logps/rejected": -195.93051147460938, "loss": 0.5633, "rewards/accuracies": 0.625, "rewards/chosen": -0.23465867340564728, "rewards/margins": 0.4427909851074219, "rewards/rejected": -0.677449643611908, "step": 274 }, { "epoch": 0.03, "learning_rate": 2.948269753159324e-07, "logits/chosen": -2.969364881515503, "logits/rejected": -2.962390184402466, "logps/chosen": -157.34304809570312, "logps/rejected": -130.302490234375, "loss": 0.3617, "rewards/accuracies": 1.0, "rewards/chosen": -0.08375561237335205, "rewards/margins": 1.063370704650879, "rewards/rejected": -1.1471264362335205, "step": 275 }, { "epoch": 0.03, "learning_rate": 2.9479154364001417e-07, "logits/chosen": -2.027743339538574, "logits/rejected": -1.8373136520385742, "logps/chosen": -256.11737060546875, "logps/rejected": -318.2117004394531, "loss": 0.6053, "rewards/accuracies": 0.625, "rewards/chosen": -1.2073525190353394, "rewards/margins": 0.5226768255233765, "rewards/rejected": -1.7300293445587158, "step": 276 }, { "epoch": 0.03, "learning_rate": 2.947561119640959e-07, "logits/chosen": -2.2515320777893066, "logits/rejected": -2.0383710861206055, "logps/chosen": -416.75750732421875, "logps/rejected": -364.9051818847656, "loss": 0.3936, "rewards/accuracies": 0.875, "rewards/chosen": -0.5839005708694458, "rewards/margins": 0.9212774038314819, "rewards/rejected": -1.5051779747009277, "step": 277 }, { "epoch": 0.03, "learning_rate": 2.947206802881776e-07, "logits/chosen": -1.8994593620300293, "logits/rejected": -2.0609066486358643, "logps/chosen": -338.57421875, "logps/rejected": -273.29254150390625, "loss": 0.6417, "rewards/accuracies": 0.625, "rewards/chosen": -0.029149174690246582, "rewards/margins": 0.2943916320800781, "rewards/rejected": -0.3235408067703247, "step": 278 }, { "epoch": 0.03, "learning_rate": 2.9468524861225936e-07, "logits/chosen": -2.3778624534606934, "logits/rejected": -2.2536587715148926, "logps/chosen": -270.11712646484375, "logps/rejected": -240.03390502929688, "loss": 0.4873, "rewards/accuracies": 0.75, "rewards/chosen": -0.286642849445343, "rewards/margins": 0.7670831680297852, "rewards/rejected": -1.0537259578704834, "step": 279 }, { "epoch": 0.03, "learning_rate": 2.9464981693634106e-07, "logits/chosen": -1.9662208557128906, "logits/rejected": -2.114424705505371, "logps/chosen": -399.0927734375, "logps/rejected": -238.5034637451172, "loss": 0.4433, "rewards/accuracies": 0.75, "rewards/chosen": -0.36703047156333923, "rewards/margins": 1.092298984527588, "rewards/rejected": -1.4593294858932495, "step": 280 }, { "epoch": 0.03, "learning_rate": 2.946143852604228e-07, "logits/chosen": -2.291977882385254, "logits/rejected": -1.7917745113372803, "logps/chosen": -269.79132080078125, "logps/rejected": -449.75653076171875, "loss": 0.318, "rewards/accuracies": 0.875, "rewards/chosen": -0.37038087844848633, "rewards/margins": 1.6547183990478516, "rewards/rejected": -2.025099277496338, "step": 281 }, { "epoch": 0.03, "learning_rate": 2.9457895358450456e-07, "logits/chosen": -2.1801748275756836, "logits/rejected": -2.332432270050049, "logps/chosen": -325.0802917480469, "logps/rejected": -388.4512634277344, "loss": 0.5333, "rewards/accuracies": 0.625, "rewards/chosen": -0.367206871509552, "rewards/margins": 0.6799616813659668, "rewards/rejected": -1.047168493270874, "step": 282 }, { "epoch": 0.03, "learning_rate": 2.9454352190858625e-07, "logits/chosen": -1.3814514875411987, "logits/rejected": -1.726662039756775, "logps/chosen": -396.02337646484375, "logps/rejected": -281.3721923828125, "loss": 0.6014, "rewards/accuracies": 0.625, "rewards/chosen": -0.4060601592063904, "rewards/margins": 0.6215767860412598, "rewards/rejected": -1.027637004852295, "step": 283 }, { "epoch": 0.03, "learning_rate": 2.94508090232668e-07, "logits/chosen": -2.6151933670043945, "logits/rejected": -2.6169497966766357, "logps/chosen": -198.55926513671875, "logps/rejected": -137.8518829345703, "loss": 0.4985, "rewards/accuracies": 0.75, "rewards/chosen": -0.1619531661272049, "rewards/margins": 0.9281085133552551, "rewards/rejected": -1.0900616645812988, "step": 284 }, { "epoch": 0.03, "learning_rate": 2.944726585567497e-07, "logits/chosen": -2.9354021549224854, "logits/rejected": -2.7473397254943848, "logps/chosen": -205.51333618164062, "logps/rejected": -268.3748779296875, "loss": 0.4083, "rewards/accuracies": 0.875, "rewards/chosen": -0.3936096429824829, "rewards/margins": 0.9245909452438354, "rewards/rejected": -1.3182005882263184, "step": 285 }, { "epoch": 0.03, "learning_rate": 2.9443722688083145e-07, "logits/chosen": -1.4875712394714355, "logits/rejected": -2.0510120391845703, "logps/chosen": -409.445556640625, "logps/rejected": -196.56214904785156, "loss": 1.0093, "rewards/accuracies": 0.625, "rewards/chosen": -0.6522005796432495, "rewards/margins": -0.13873091340065002, "rewards/rejected": -0.5134696364402771, "step": 286 }, { "epoch": 0.03, "learning_rate": 2.944017952049132e-07, "logits/chosen": -2.3332650661468506, "logits/rejected": -2.0238451957702637, "logps/chosen": -238.11366271972656, "logps/rejected": -328.9278259277344, "loss": 0.5911, "rewards/accuracies": 0.625, "rewards/chosen": -0.3827953338623047, "rewards/margins": 1.046369194984436, "rewards/rejected": -1.4291645288467407, "step": 287 }, { "epoch": 0.03, "learning_rate": 2.9436636352899494e-07, "logits/chosen": -2.5052852630615234, "logits/rejected": -2.4758758544921875, "logps/chosen": -281.4767761230469, "logps/rejected": -350.5293884277344, "loss": 0.3689, "rewards/accuracies": 0.875, "rewards/chosen": -0.2601891756057739, "rewards/margins": 1.2887394428253174, "rewards/rejected": -1.5489286184310913, "step": 288 }, { "epoch": 0.03, "learning_rate": 2.9433093185307664e-07, "logits/chosen": -2.366420030593872, "logits/rejected": -2.3785367012023926, "logps/chosen": -301.2152404785156, "logps/rejected": -276.4963684082031, "loss": 0.3418, "rewards/accuracies": 0.875, "rewards/chosen": 0.02587474137544632, "rewards/margins": 1.1035606861114502, "rewards/rejected": -1.0776861906051636, "step": 289 }, { "epoch": 0.03, "learning_rate": 2.942955001771584e-07, "logits/chosen": -2.093283176422119, "logits/rejected": -2.4747838973999023, "logps/chosen": -420.95843505859375, "logps/rejected": -170.1190185546875, "loss": 0.7256, "rewards/accuracies": 0.5, "rewards/chosen": -0.5149084329605103, "rewards/margins": 0.5440642237663269, "rewards/rejected": -1.0589725971221924, "step": 290 }, { "epoch": 0.03, "learning_rate": 2.942600685012401e-07, "logits/chosen": -2.427334785461426, "logits/rejected": -2.646097183227539, "logps/chosen": -376.02117919921875, "logps/rejected": -304.498779296875, "loss": 0.4787, "rewards/accuracies": 0.625, "rewards/chosen": -0.0406392477452755, "rewards/margins": 1.0841079950332642, "rewards/rejected": -1.1247472763061523, "step": 291 }, { "epoch": 0.03, "learning_rate": 2.9422463682532183e-07, "logits/chosen": -2.721784830093384, "logits/rejected": -2.788503408432007, "logps/chosen": -330.68353271484375, "logps/rejected": -319.1875, "loss": 0.3812, "rewards/accuracies": 0.75, "rewards/chosen": -0.06904447078704834, "rewards/margins": 1.3375811576843262, "rewards/rejected": -1.406625747680664, "step": 292 }, { "epoch": 0.03, "learning_rate": 2.9418920514940353e-07, "logits/chosen": -2.5546584129333496, "logits/rejected": -2.2899272441864014, "logps/chosen": -247.6539306640625, "logps/rejected": -292.12799072265625, "loss": 0.8762, "rewards/accuracies": 0.75, "rewards/chosen": -0.759085476398468, "rewards/margins": 0.21519267559051514, "rewards/rejected": -0.9742782115936279, "step": 293 }, { "epoch": 0.03, "learning_rate": 2.941537734734853e-07, "logits/chosen": -2.8096203804016113, "logits/rejected": -2.6154065132141113, "logps/chosen": -178.92384338378906, "logps/rejected": -332.939453125, "loss": 0.2987, "rewards/accuracies": 0.875, "rewards/chosen": 0.16457784175872803, "rewards/margins": 1.7711654901504517, "rewards/rejected": -1.6065876483917236, "step": 294 }, { "epoch": 0.03, "learning_rate": 2.94118341797567e-07, "logits/chosen": -2.797778844833374, "logits/rejected": -2.829437494277954, "logps/chosen": -345.2289123535156, "logps/rejected": -283.1036376953125, "loss": 0.3835, "rewards/accuracies": 0.75, "rewards/chosen": -0.3279910087585449, "rewards/margins": 1.4313820600509644, "rewards/rejected": -1.7593731880187988, "step": 295 }, { "epoch": 0.03, "learning_rate": 2.940829101216487e-07, "logits/chosen": -1.6894688606262207, "logits/rejected": -1.6803333759307861, "logps/chosen": -598.8785400390625, "logps/rejected": -595.1441650390625, "loss": 0.4342, "rewards/accuracies": 0.75, "rewards/chosen": 0.010276809334754944, "rewards/margins": 1.0291900634765625, "rewards/rejected": -1.0189132690429688, "step": 296 }, { "epoch": 0.03, "learning_rate": 2.9404747844573047e-07, "logits/chosen": -2.36710786819458, "logits/rejected": -2.6229710578918457, "logps/chosen": -326.7007141113281, "logps/rejected": -257.9637451171875, "loss": 0.5844, "rewards/accuracies": 0.75, "rewards/chosen": -0.14547762274742126, "rewards/margins": 0.46267378330230713, "rewards/rejected": -0.608151376247406, "step": 297 }, { "epoch": 0.03, "learning_rate": 2.9401204676981216e-07, "logits/chosen": -2.285874366760254, "logits/rejected": -2.0000181198120117, "logps/chosen": -259.4213562011719, "logps/rejected": -284.89862060546875, "loss": 0.7303, "rewards/accuracies": 0.75, "rewards/chosen": -0.8663985729217529, "rewards/margins": 0.20266467332839966, "rewards/rejected": -1.0690631866455078, "step": 298 }, { "epoch": 0.03, "learning_rate": 2.9397661509389397e-07, "logits/chosen": -2.6445860862731934, "logits/rejected": -2.664821147918701, "logps/chosen": -245.68978881835938, "logps/rejected": -252.12106323242188, "loss": 0.9665, "rewards/accuracies": 0.625, "rewards/chosen": -1.0938446521759033, "rewards/margins": 0.18389992415905, "rewards/rejected": -1.2777445316314697, "step": 299 }, { "epoch": 0.03, "learning_rate": 2.9394118341797566e-07, "logits/chosen": -2.6599860191345215, "logits/rejected": -2.7291247844696045, "logps/chosen": -170.41127014160156, "logps/rejected": -158.5400390625, "loss": 0.5093, "rewards/accuracies": 0.875, "rewards/chosen": -0.9475737810134888, "rewards/margins": 0.47724682092666626, "rewards/rejected": -1.4248205423355103, "step": 300 }, { "epoch": 0.04, "learning_rate": 2.939057517420574e-07, "logits/chosen": -2.29803204536438, "logits/rejected": -2.607515811920166, "logps/chosen": -364.19329833984375, "logps/rejected": -204.98898315429688, "loss": 0.5787, "rewards/accuracies": 0.75, "rewards/chosen": -0.4560944139957428, "rewards/margins": 0.6856675148010254, "rewards/rejected": -1.1417618989944458, "step": 301 }, { "epoch": 0.04, "learning_rate": 2.938703200661391e-07, "logits/chosen": -1.950028896331787, "logits/rejected": -2.022510290145874, "logps/chosen": -394.421875, "logps/rejected": -287.7738952636719, "loss": 0.4218, "rewards/accuracies": 0.75, "rewards/chosen": 0.257552832365036, "rewards/margins": 1.3202406167984009, "rewards/rejected": -1.062687873840332, "step": 302 }, { "epoch": 0.04, "learning_rate": 2.9383488839022085e-07, "logits/chosen": -2.1507041454315186, "logits/rejected": -2.356308698654175, "logps/chosen": -305.9482116699219, "logps/rejected": -186.61788940429688, "loss": 0.4265, "rewards/accuracies": 0.875, "rewards/chosen": -0.15852631628513336, "rewards/margins": 0.9647853374481201, "rewards/rejected": -1.1233115196228027, "step": 303 }, { "epoch": 0.04, "learning_rate": 2.9379945671430255e-07, "logits/chosen": -2.5175838470458984, "logits/rejected": -2.486604690551758, "logps/chosen": -262.03350830078125, "logps/rejected": -255.69540405273438, "loss": 0.7578, "rewards/accuracies": 0.625, "rewards/chosen": -0.4626442790031433, "rewards/margins": -0.029388144612312317, "rewards/rejected": -0.4332561492919922, "step": 304 }, { "epoch": 0.04, "learning_rate": 2.937640250383843e-07, "logits/chosen": -2.5114057064056396, "logits/rejected": -2.107201337814331, "logps/chosen": -241.24156188964844, "logps/rejected": -297.77947998046875, "loss": 0.3567, "rewards/accuracies": 0.75, "rewards/chosen": -0.25777173042297363, "rewards/margins": 1.7837862968444824, "rewards/rejected": -2.041558027267456, "step": 305 }, { "epoch": 0.04, "learning_rate": 2.9372859336246605e-07, "logits/chosen": -2.3229546546936035, "logits/rejected": -2.4203202724456787, "logps/chosen": -307.3225402832031, "logps/rejected": -375.33990478515625, "loss": 0.2627, "rewards/accuracies": 1.0, "rewards/chosen": 0.3219256103038788, "rewards/margins": 1.551868200302124, "rewards/rejected": -1.2299425601959229, "step": 306 }, { "epoch": 0.04, "learning_rate": 2.9369316168654774e-07, "logits/chosen": -2.8530185222625732, "logits/rejected": -2.6325011253356934, "logps/chosen": -290.1589660644531, "logps/rejected": -219.94435119628906, "loss": 0.3821, "rewards/accuracies": 0.75, "rewards/chosen": 0.04211246967315674, "rewards/margins": 1.2057253122329712, "rewards/rejected": -1.1636128425598145, "step": 307 }, { "epoch": 0.04, "learning_rate": 2.936577300106295e-07, "logits/chosen": -2.1724071502685547, "logits/rejected": -1.9063711166381836, "logps/chosen": -239.16073608398438, "logps/rejected": -228.0086212158203, "loss": 0.5574, "rewards/accuracies": 0.5, "rewards/chosen": -0.40734410285949707, "rewards/margins": 0.5282961130142212, "rewards/rejected": -0.9356402158737183, "step": 308 }, { "epoch": 0.04, "learning_rate": 2.936222983347112e-07, "logits/chosen": -2.1313507556915283, "logits/rejected": -2.1726279258728027, "logps/chosen": -350.1370849609375, "logps/rejected": -376.45904541015625, "loss": 0.2704, "rewards/accuracies": 0.875, "rewards/chosen": -0.08623509109020233, "rewards/margins": 1.5222742557525635, "rewards/rejected": -1.6085093021392822, "step": 309 }, { "epoch": 0.04, "learning_rate": 2.9358686665879293e-07, "logits/chosen": -2.493105173110962, "logits/rejected": -2.646644115447998, "logps/chosen": -185.68478393554688, "logps/rejected": -159.75222778320312, "loss": 0.781, "rewards/accuracies": 0.625, "rewards/chosen": -0.7015661597251892, "rewards/margins": 0.41815176606178284, "rewards/rejected": -1.1197179555892944, "step": 310 }, { "epoch": 0.04, "learning_rate": 2.935514349828747e-07, "logits/chosen": -1.7797365188598633, "logits/rejected": -1.9488158226013184, "logps/chosen": -298.86151123046875, "logps/rejected": -264.2230224609375, "loss": 0.5771, "rewards/accuracies": 0.75, "rewards/chosen": -1.1043767929077148, "rewards/margins": 0.5647428631782532, "rewards/rejected": -1.6691197156906128, "step": 311 }, { "epoch": 0.04, "learning_rate": 2.9351600330695643e-07, "logits/chosen": -2.6082606315612793, "logits/rejected": -2.602665901184082, "logps/chosen": -174.48670959472656, "logps/rejected": -133.47076416015625, "loss": 1.0146, "rewards/accuracies": 0.375, "rewards/chosen": -0.512934684753418, "rewards/margins": -0.0038271695375442505, "rewards/rejected": -0.5091075301170349, "step": 312 }, { "epoch": 0.04, "learning_rate": 2.9348057163103813e-07, "logits/chosen": -2.2726187705993652, "logits/rejected": -2.2766458988189697, "logps/chosen": -151.23597717285156, "logps/rejected": -218.1590576171875, "loss": 0.5699, "rewards/accuracies": 0.75, "rewards/chosen": -0.930323600769043, "rewards/margins": 0.7915714979171753, "rewards/rejected": -1.7218952178955078, "step": 313 }, { "epoch": 0.04, "learning_rate": 2.934451399551199e-07, "logits/chosen": -2.8945980072021484, "logits/rejected": -2.8225159645080566, "logps/chosen": -230.0916290283203, "logps/rejected": -222.00045776367188, "loss": 0.3616, "rewards/accuracies": 0.875, "rewards/chosen": -0.5174702405929565, "rewards/margins": 1.3400602340698242, "rewards/rejected": -1.8575303554534912, "step": 314 }, { "epoch": 0.04, "learning_rate": 2.9340970827920157e-07, "logits/chosen": -2.62554931640625, "logits/rejected": -2.6453537940979004, "logps/chosen": -308.14825439453125, "logps/rejected": -176.90383911132812, "loss": 0.8522, "rewards/accuracies": 0.75, "rewards/chosen": -0.7629151344299316, "rewards/margins": 0.31568899750709534, "rewards/rejected": -1.0786041021347046, "step": 315 }, { "epoch": 0.04, "learning_rate": 2.933742766032833e-07, "logits/chosen": -2.029466152191162, "logits/rejected": -2.171477794647217, "logps/chosen": -314.6988525390625, "logps/rejected": -375.4151611328125, "loss": 0.685, "rewards/accuracies": 0.75, "rewards/chosen": -0.5350890755653381, "rewards/margins": 0.5873953104019165, "rewards/rejected": -1.1224844455718994, "step": 316 }, { "epoch": 0.04, "learning_rate": 2.9333884492736507e-07, "logits/chosen": -2.9043831825256348, "logits/rejected": -2.9594264030456543, "logps/chosen": -206.5111541748047, "logps/rejected": -225.08241271972656, "loss": 0.5474, "rewards/accuracies": 0.75, "rewards/chosen": -0.4688420295715332, "rewards/margins": 0.8206722736358643, "rewards/rejected": -1.2895143032073975, "step": 317 }, { "epoch": 0.04, "learning_rate": 2.9330341325144676e-07, "logits/chosen": -2.895951271057129, "logits/rejected": -2.8387908935546875, "logps/chosen": -337.5228271484375, "logps/rejected": -334.8077697753906, "loss": 0.7819, "rewards/accuracies": 0.75, "rewards/chosen": -0.659294843673706, "rewards/margins": 0.35979580879211426, "rewards/rejected": -1.0190907716751099, "step": 318 }, { "epoch": 0.04, "learning_rate": 2.932679815755285e-07, "logits/chosen": -2.223888874053955, "logits/rejected": -2.391298532485962, "logps/chosen": -213.8368682861328, "logps/rejected": -175.1593017578125, "loss": 0.4633, "rewards/accuracies": 0.75, "rewards/chosen": -0.17392238974571228, "rewards/margins": 0.6756976842880249, "rewards/rejected": -0.8496200442314148, "step": 319 }, { "epoch": 0.04, "learning_rate": 2.932325498996102e-07, "logits/chosen": -2.329521656036377, "logits/rejected": -2.2428598403930664, "logps/chosen": -331.2684631347656, "logps/rejected": -293.89569091796875, "loss": 0.853, "rewards/accuracies": 0.625, "rewards/chosen": -0.8404381275177002, "rewards/margins": 0.40912631154060364, "rewards/rejected": -1.2495644092559814, "step": 320 }, { "epoch": 0.04, "learning_rate": 2.9319711822369196e-07, "logits/chosen": -2.395176887512207, "logits/rejected": -2.6548402309417725, "logps/chosen": -200.82545471191406, "logps/rejected": -100.20393371582031, "loss": 1.996, "rewards/accuracies": 0.625, "rewards/chosen": -1.5338711738586426, "rewards/margins": -1.283815622329712, "rewards/rejected": -0.2500556707382202, "step": 321 }, { "epoch": 0.04, "learning_rate": 2.931616865477737e-07, "logits/chosen": -2.295654296875, "logits/rejected": -2.3269121646881104, "logps/chosen": -168.94126892089844, "logps/rejected": -181.1977996826172, "loss": 0.6319, "rewards/accuracies": 0.625, "rewards/chosen": -0.38651272654533386, "rewards/margins": 0.4317438006401062, "rewards/rejected": -0.8182565569877625, "step": 322 }, { "epoch": 0.04, "learning_rate": 2.9312625487185545e-07, "logits/chosen": -2.679442882537842, "logits/rejected": -2.5386698246002197, "logps/chosen": -357.37164306640625, "logps/rejected": -204.69769287109375, "loss": 0.4761, "rewards/accuracies": 0.625, "rewards/chosen": -0.49394285678863525, "rewards/margins": 0.948100209236145, "rewards/rejected": -1.4420431852340698, "step": 323 }, { "epoch": 0.04, "learning_rate": 2.9309082319593715e-07, "logits/chosen": -2.3916513919830322, "logits/rejected": -2.5804829597473145, "logps/chosen": -392.87078857421875, "logps/rejected": -171.0058135986328, "loss": 0.6124, "rewards/accuracies": 0.75, "rewards/chosen": -0.09215235710144043, "rewards/margins": 0.6518601179122925, "rewards/rejected": -0.7440124750137329, "step": 324 }, { "epoch": 0.04, "learning_rate": 2.930553915200189e-07, "logits/chosen": -2.369706153869629, "logits/rejected": -2.4630026817321777, "logps/chosen": -222.11709594726562, "logps/rejected": -211.93954467773438, "loss": 0.8231, "rewards/accuracies": 0.75, "rewards/chosen": -0.40757668018341064, "rewards/margins": 0.23469087481498718, "rewards/rejected": -0.6422675251960754, "step": 325 }, { "epoch": 0.04, "learning_rate": 2.930199598441006e-07, "logits/chosen": -2.2539374828338623, "logits/rejected": -2.287092447280884, "logps/chosen": -121.72743225097656, "logps/rejected": -111.5833740234375, "loss": 0.5449, "rewards/accuracies": 0.625, "rewards/chosen": -0.4091636538505554, "rewards/margins": 0.46893033385276794, "rewards/rejected": -0.878093957901001, "step": 326 }, { "epoch": 0.04, "learning_rate": 2.9298452816818234e-07, "logits/chosen": -2.0524537563323975, "logits/rejected": -2.1779425144195557, "logps/chosen": -275.94903564453125, "logps/rejected": -273.35821533203125, "loss": 0.4695, "rewards/accuracies": 0.875, "rewards/chosen": 0.055811215192079544, "rewards/margins": 0.7680109143257141, "rewards/rejected": -0.7121996879577637, "step": 327 }, { "epoch": 0.04, "learning_rate": 2.929490964922641e-07, "logits/chosen": -2.068142890930176, "logits/rejected": -2.379666805267334, "logps/chosen": -265.1993103027344, "logps/rejected": -225.3121337890625, "loss": 0.5916, "rewards/accuracies": 0.625, "rewards/chosen": -0.8519630432128906, "rewards/margins": 0.6603106260299683, "rewards/rejected": -1.5122735500335693, "step": 328 }, { "epoch": 0.04, "learning_rate": 2.929136648163458e-07, "logits/chosen": -2.3993899822235107, "logits/rejected": -2.6701223850250244, "logps/chosen": -240.1083221435547, "logps/rejected": -323.28564453125, "loss": 0.5382, "rewards/accuracies": 0.625, "rewards/chosen": -0.31654006242752075, "rewards/margins": 0.6563017964363098, "rewards/rejected": -0.9728418588638306, "step": 329 }, { "epoch": 0.04, "learning_rate": 2.9287823314042754e-07, "logits/chosen": -2.6669204235076904, "logits/rejected": -2.564671277999878, "logps/chosen": -123.85690307617188, "logps/rejected": -205.9306640625, "loss": 1.1335, "rewards/accuracies": 0.5, "rewards/chosen": -0.9808606505393982, "rewards/margins": 0.11081727594137192, "rewards/rejected": -1.0916779041290283, "step": 330 }, { "epoch": 0.04, "learning_rate": 2.9284280146450923e-07, "logits/chosen": -2.3550915718078613, "logits/rejected": -2.516937732696533, "logps/chosen": -391.8165283203125, "logps/rejected": -235.18252563476562, "loss": 0.5098, "rewards/accuracies": 0.625, "rewards/chosen": -0.623511016368866, "rewards/margins": 0.6541772484779358, "rewards/rejected": -1.2776882648468018, "step": 331 }, { "epoch": 0.04, "learning_rate": 2.92807369788591e-07, "logits/chosen": -2.9920287132263184, "logits/rejected": -2.9969563484191895, "logps/chosen": -180.14744567871094, "logps/rejected": -219.72019958496094, "loss": 0.4066, "rewards/accuracies": 0.75, "rewards/chosen": -0.24392685294151306, "rewards/margins": 0.9460574984550476, "rewards/rejected": -1.1899843215942383, "step": 332 }, { "epoch": 0.04, "learning_rate": 2.927719381126727e-07, "logits/chosen": -2.2411112785339355, "logits/rejected": -2.3199000358581543, "logps/chosen": -296.79571533203125, "logps/rejected": -371.7720947265625, "loss": 0.8679, "rewards/accuracies": 0.5, "rewards/chosen": -0.7076969742774963, "rewards/margins": 0.24727892875671387, "rewards/rejected": -0.9549759030342102, "step": 333 }, { "epoch": 0.04, "learning_rate": 2.927365064367545e-07, "logits/chosen": -2.8356363773345947, "logits/rejected": -2.749779224395752, "logps/chosen": -297.3299865722656, "logps/rejected": -207.79348754882812, "loss": 0.4644, "rewards/accuracies": 0.75, "rewards/chosen": -0.15958870947360992, "rewards/margins": 0.8317511081695557, "rewards/rejected": -0.9913398027420044, "step": 334 }, { "epoch": 0.04, "learning_rate": 2.9270107476083617e-07, "logits/chosen": -2.4010345935821533, "logits/rejected": -2.444368600845337, "logps/chosen": -196.5550079345703, "logps/rejected": -183.0149383544922, "loss": 0.4148, "rewards/accuracies": 0.75, "rewards/chosen": -0.1564604490995407, "rewards/margins": 1.3196423053741455, "rewards/rejected": -1.4761028289794922, "step": 335 }, { "epoch": 0.04, "learning_rate": 2.926656430849179e-07, "logits/chosen": -2.9160611629486084, "logits/rejected": -2.7852678298950195, "logps/chosen": -314.6018371582031, "logps/rejected": -218.02850341796875, "loss": 0.5763, "rewards/accuracies": 0.75, "rewards/chosen": -0.45732107758522034, "rewards/margins": 1.3264633417129517, "rewards/rejected": -1.7837843894958496, "step": 336 }, { "epoch": 0.04, "learning_rate": 2.926302114089996e-07, "logits/chosen": -1.8458666801452637, "logits/rejected": -1.565752387046814, "logps/chosen": -164.42196655273438, "logps/rejected": -238.62448120117188, "loss": 0.5317, "rewards/accuracies": 0.875, "rewards/chosen": -0.610163688659668, "rewards/margins": 0.7577385306358337, "rewards/rejected": -1.367902159690857, "step": 337 }, { "epoch": 0.04, "learning_rate": 2.9259477973308137e-07, "logits/chosen": -2.0817527770996094, "logits/rejected": -2.321756362915039, "logps/chosen": -344.75726318359375, "logps/rejected": -276.75738525390625, "loss": 0.696, "rewards/accuracies": 0.875, "rewards/chosen": -0.3976415693759918, "rewards/margins": 0.6477232575416565, "rewards/rejected": -1.0453648567199707, "step": 338 }, { "epoch": 0.04, "learning_rate": 2.925593480571631e-07, "logits/chosen": -2.6471240520477295, "logits/rejected": -2.897420883178711, "logps/chosen": -193.80697631835938, "logps/rejected": -297.48284912109375, "loss": 0.4418, "rewards/accuracies": 0.75, "rewards/chosen": -0.14745938777923584, "rewards/margins": 0.9114383459091187, "rewards/rejected": -1.0588977336883545, "step": 339 }, { "epoch": 0.04, "learning_rate": 2.925239163812448e-07, "logits/chosen": -2.2821202278137207, "logits/rejected": -2.2430543899536133, "logps/chosen": -230.7744140625, "logps/rejected": -260.9974365234375, "loss": 0.3477, "rewards/accuracies": 0.875, "rewards/chosen": -0.3567458987236023, "rewards/margins": 1.6605525016784668, "rewards/rejected": -2.017298460006714, "step": 340 }, { "epoch": 0.04, "learning_rate": 2.9248848470532656e-07, "logits/chosen": -2.4030134677886963, "logits/rejected": -2.3298275470733643, "logps/chosen": -262.053466796875, "logps/rejected": -201.80091857910156, "loss": 0.6638, "rewards/accuracies": 0.625, "rewards/chosen": -0.10247425734996796, "rewards/margins": 0.30203771591186523, "rewards/rejected": -0.4045120179653168, "step": 341 }, { "epoch": 0.04, "learning_rate": 2.9245305302940825e-07, "logits/chosen": -2.625683069229126, "logits/rejected": -2.8678855895996094, "logps/chosen": -406.8061828613281, "logps/rejected": -269.43145751953125, "loss": 0.3382, "rewards/accuracies": 0.875, "rewards/chosen": -0.1474510133266449, "rewards/margins": 1.4309965372085571, "rewards/rejected": -1.5784475803375244, "step": 342 }, { "epoch": 0.04, "learning_rate": 2.9241762135349e-07, "logits/chosen": -2.5967495441436768, "logits/rejected": -2.370731830596924, "logps/chosen": -167.21290588378906, "logps/rejected": -199.54653930664062, "loss": 0.4807, "rewards/accuracies": 0.875, "rewards/chosen": -0.5110343098640442, "rewards/margins": 0.6498540639877319, "rewards/rejected": -1.160888433456421, "step": 343 }, { "epoch": 0.04, "learning_rate": 2.923821896775717e-07, "logits/chosen": -1.9601327180862427, "logits/rejected": -2.131619930267334, "logps/chosen": -370.6251220703125, "logps/rejected": -403.8722839355469, "loss": 0.3625, "rewards/accuracies": 1.0, "rewards/chosen": -0.89009690284729, "rewards/margins": 1.1248087882995605, "rewards/rejected": -2.0149056911468506, "step": 344 }, { "epoch": 0.04, "learning_rate": 2.9234675800165345e-07, "logits/chosen": -2.1298558712005615, "logits/rejected": -2.444132089614868, "logps/chosen": -231.8939666748047, "logps/rejected": -254.7119903564453, "loss": 0.4597, "rewards/accuracies": 0.75, "rewards/chosen": -0.04477114602923393, "rewards/margins": 0.8427332639694214, "rewards/rejected": -0.8875043988227844, "step": 345 }, { "epoch": 0.04, "learning_rate": 2.923113263257352e-07, "logits/chosen": -2.1115477085113525, "logits/rejected": -2.266761064529419, "logps/chosen": -402.6540832519531, "logps/rejected": -273.7836608886719, "loss": 0.5504, "rewards/accuracies": 0.625, "rewards/chosen": -0.09515545517206192, "rewards/margins": 0.42880088090896606, "rewards/rejected": -0.523956298828125, "step": 346 }, { "epoch": 0.04, "learning_rate": 2.9227589464981694e-07, "logits/chosen": -2.0364744663238525, "logits/rejected": -2.3418893814086914, "logps/chosen": -266.392822265625, "logps/rejected": -260.46429443359375, "loss": 1.3493, "rewards/accuracies": 0.375, "rewards/chosen": -0.9322304725646973, "rewards/margins": -0.14251938462257385, "rewards/rejected": -0.7897111773490906, "step": 347 }, { "epoch": 0.04, "learning_rate": 2.9224046297389864e-07, "logits/chosen": -2.1186115741729736, "logits/rejected": -2.2281064987182617, "logps/chosen": -566.9127197265625, "logps/rejected": -355.6307373046875, "loss": 0.6121, "rewards/accuracies": 0.625, "rewards/chosen": -0.518950343132019, "rewards/margins": 0.7422138452529907, "rewards/rejected": -1.2611641883850098, "step": 348 }, { "epoch": 0.04, "learning_rate": 2.922050312979804e-07, "logits/chosen": -1.6921344995498657, "logits/rejected": -2.184948444366455, "logps/chosen": -526.6041870117188, "logps/rejected": -314.04559326171875, "loss": 0.7727, "rewards/accuracies": 0.75, "rewards/chosen": -0.4664980173110962, "rewards/margins": 0.33103129267692566, "rewards/rejected": -0.7975293397903442, "step": 349 }, { "epoch": 0.04, "learning_rate": 2.9216959962206214e-07, "logits/chosen": -2.6512913703918457, "logits/rejected": -2.5971741676330566, "logps/chosen": -103.80355834960938, "logps/rejected": -171.83544921875, "loss": 0.5209, "rewards/accuracies": 0.625, "rewards/chosen": -0.15030202269554138, "rewards/margins": 0.7159022092819214, "rewards/rejected": -0.8662042617797852, "step": 350 }, { "epoch": 0.04, "learning_rate": 2.9213416794614383e-07, "logits/chosen": -2.267549753189087, "logits/rejected": -2.382564067840576, "logps/chosen": -337.7655029296875, "logps/rejected": -219.90203857421875, "loss": 0.3232, "rewards/accuracies": 1.0, "rewards/chosen": 0.18075509369373322, "rewards/margins": 1.231830358505249, "rewards/rejected": -1.0510752201080322, "step": 351 }, { "epoch": 0.04, "learning_rate": 2.920987362702256e-07, "logits/chosen": -2.5430941581726074, "logits/rejected": -2.6897616386413574, "logps/chosen": -284.12066650390625, "logps/rejected": -167.8983612060547, "loss": 0.5895, "rewards/accuracies": 0.875, "rewards/chosen": -0.5054863691329956, "rewards/margins": 0.7512806057929993, "rewards/rejected": -1.25676691532135, "step": 352 }, { "epoch": 0.04, "learning_rate": 2.920633045943073e-07, "logits/chosen": -1.7325901985168457, "logits/rejected": -1.751321792602539, "logps/chosen": -339.93890380859375, "logps/rejected": -286.529052734375, "loss": 0.7085, "rewards/accuracies": 0.625, "rewards/chosen": -1.009088158607483, "rewards/margins": 0.9606056809425354, "rewards/rejected": -1.969693899154663, "step": 353 }, { "epoch": 0.04, "learning_rate": 2.92027872918389e-07, "logits/chosen": -1.8583483695983887, "logits/rejected": -2.534839630126953, "logps/chosen": -377.93359375, "logps/rejected": -160.0740966796875, "loss": 0.9, "rewards/accuracies": 0.625, "rewards/chosen": -1.100935459136963, "rewards/margins": 0.3936083912849426, "rewards/rejected": -1.4945437908172607, "step": 354 }, { "epoch": 0.04, "learning_rate": 2.919924412424707e-07, "logits/chosen": -2.6600818634033203, "logits/rejected": -2.6654810905456543, "logps/chosen": -333.54119873046875, "logps/rejected": -219.50796508789062, "loss": 1.0827, "rewards/accuracies": 0.75, "rewards/chosen": -0.6857550740242004, "rewards/margins": -0.1820569932460785, "rewards/rejected": -0.5036981105804443, "step": 355 }, { "epoch": 0.04, "learning_rate": 2.9195700956655247e-07, "logits/chosen": -2.3361635208129883, "logits/rejected": -2.1461362838745117, "logps/chosen": -220.15480041503906, "logps/rejected": -352.7494812011719, "loss": 0.379, "rewards/accuracies": 0.75, "rewards/chosen": -0.01200837828218937, "rewards/margins": 1.2757107019424438, "rewards/rejected": -1.2877191305160522, "step": 356 }, { "epoch": 0.04, "learning_rate": 2.919215778906342e-07, "logits/chosen": -2.3405277729034424, "logits/rejected": -2.2837538719177246, "logps/chosen": -310.7549743652344, "logps/rejected": -337.0315856933594, "loss": 1.6176, "rewards/accuracies": 0.375, "rewards/chosen": -2.3899741172790527, "rewards/margins": -1.1155414581298828, "rewards/rejected": -1.2744327783584595, "step": 357 }, { "epoch": 0.04, "learning_rate": 2.9188614621471597e-07, "logits/chosen": -2.6386992931365967, "logits/rejected": -2.5254485607147217, "logps/chosen": -245.9833526611328, "logps/rejected": -160.0703125, "loss": 0.4477, "rewards/accuracies": 0.75, "rewards/chosen": 0.10537806153297424, "rewards/margins": 0.9918594360351562, "rewards/rejected": -0.8864814043045044, "step": 358 }, { "epoch": 0.04, "learning_rate": 2.9185071453879766e-07, "logits/chosen": -2.2128055095672607, "logits/rejected": -2.035968542098999, "logps/chosen": -339.15765380859375, "logps/rejected": -299.94012451171875, "loss": 1.2344, "rewards/accuracies": 0.75, "rewards/chosen": -1.0853787660598755, "rewards/margins": 0.7561144828796387, "rewards/rejected": -1.8414932489395142, "step": 359 }, { "epoch": 0.04, "learning_rate": 2.918152828628794e-07, "logits/chosen": -2.756640672683716, "logits/rejected": -2.8360657691955566, "logps/chosen": -189.69451904296875, "logps/rejected": -297.2071533203125, "loss": 0.3574, "rewards/accuracies": 1.0, "rewards/chosen": -0.16082653403282166, "rewards/margins": 0.9469164609909058, "rewards/rejected": -1.1077430248260498, "step": 360 }, { "epoch": 0.04, "learning_rate": 2.9177985118696116e-07, "logits/chosen": -2.657911777496338, "logits/rejected": -2.5685245990753174, "logps/chosen": -187.61907958984375, "logps/rejected": -157.60479736328125, "loss": 1.09, "rewards/accuracies": 0.625, "rewards/chosen": -1.0232057571411133, "rewards/margins": -0.1979352831840515, "rewards/rejected": -0.8252705335617065, "step": 361 }, { "epoch": 0.04, "learning_rate": 2.9174441951104286e-07, "logits/chosen": -2.6753158569335938, "logits/rejected": -2.5553839206695557, "logps/chosen": -204.78146362304688, "logps/rejected": -216.9765167236328, "loss": 0.7002, "rewards/accuracies": 0.5, "rewards/chosen": -0.6889148950576782, "rewards/margins": 0.1984691619873047, "rewards/rejected": -0.8873841166496277, "step": 362 }, { "epoch": 0.04, "learning_rate": 2.917089878351246e-07, "logits/chosen": -2.1885862350463867, "logits/rejected": -2.39033842086792, "logps/chosen": -235.45162963867188, "logps/rejected": -144.41941833496094, "loss": 0.5936, "rewards/accuracies": 0.625, "rewards/chosen": -0.24951782822608948, "rewards/margins": 0.4134438633918762, "rewards/rejected": -0.6629617214202881, "step": 363 }, { "epoch": 0.04, "learning_rate": 2.916735561592063e-07, "logits/chosen": -2.741023063659668, "logits/rejected": -2.564554452896118, "logps/chosen": -321.51788330078125, "logps/rejected": -343.75238037109375, "loss": 0.2675, "rewards/accuracies": 1.0, "rewards/chosen": 0.3317694365978241, "rewards/margins": 1.8310751914978027, "rewards/rejected": -1.4993058443069458, "step": 364 }, { "epoch": 0.04, "learning_rate": 2.9163812448328805e-07, "logits/chosen": -2.019880771636963, "logits/rejected": -1.74005925655365, "logps/chosen": -342.08795166015625, "logps/rejected": -345.8409423828125, "loss": 0.6034, "rewards/accuracies": 0.625, "rewards/chosen": -0.6675082445144653, "rewards/margins": 0.8716038465499878, "rewards/rejected": -1.5391119718551636, "step": 365 }, { "epoch": 0.04, "learning_rate": 2.9160269280736974e-07, "logits/chosen": -2.4452133178710938, "logits/rejected": -2.361947536468506, "logps/chosen": -182.25216674804688, "logps/rejected": -186.62860107421875, "loss": 0.4143, "rewards/accuracies": 0.625, "rewards/chosen": -0.9851146936416626, "rewards/margins": 0.953251302242279, "rewards/rejected": -1.9383659362792969, "step": 366 }, { "epoch": 0.04, "learning_rate": 2.915672611314515e-07, "logits/chosen": -2.1833503246307373, "logits/rejected": -2.6416120529174805, "logps/chosen": -524.2366943359375, "logps/rejected": -317.4039306640625, "loss": 0.4942, "rewards/accuracies": 0.75, "rewards/chosen": -0.2800936996936798, "rewards/margins": 0.6419193744659424, "rewards/rejected": -0.9220131039619446, "step": 367 }, { "epoch": 0.04, "learning_rate": 2.9153182945553324e-07, "logits/chosen": -1.9374914169311523, "logits/rejected": -1.8206708431243896, "logps/chosen": -351.09735107421875, "logps/rejected": -376.822265625, "loss": 0.4398, "rewards/accuracies": 0.875, "rewards/chosen": -0.03537649288773537, "rewards/margins": 0.6947737336158752, "rewards/rejected": -0.7301502227783203, "step": 368 }, { "epoch": 0.04, "learning_rate": 2.91496397779615e-07, "logits/chosen": -1.9805552959442139, "logits/rejected": -2.0341053009033203, "logps/chosen": -237.97662353515625, "logps/rejected": -323.3060302734375, "loss": 0.6642, "rewards/accuracies": 0.5, "rewards/chosen": -0.055992819368839264, "rewards/margins": 0.17544439435005188, "rewards/rejected": -0.23143723607063293, "step": 369 }, { "epoch": 0.04, "learning_rate": 2.914609661036967e-07, "logits/chosen": -2.4620485305786133, "logits/rejected": -2.445059299468994, "logps/chosen": -381.3786315917969, "logps/rejected": -217.01284790039062, "loss": 0.5032, "rewards/accuracies": 0.75, "rewards/chosen": -0.027956679463386536, "rewards/margins": 1.0060575008392334, "rewards/rejected": -1.034014105796814, "step": 370 }, { "epoch": 0.04, "learning_rate": 2.9142553442777843e-07, "logits/chosen": -2.438389301300049, "logits/rejected": -2.386345863342285, "logps/chosen": -209.4347381591797, "logps/rejected": -285.638671875, "loss": 0.5042, "rewards/accuracies": 0.75, "rewards/chosen": -0.2624393403530121, "rewards/margins": 0.7292307615280151, "rewards/rejected": -0.9916701316833496, "step": 371 }, { "epoch": 0.04, "learning_rate": 2.9139010275186013e-07, "logits/chosen": -2.462907075881958, "logits/rejected": -2.776033401489258, "logps/chosen": -241.20034790039062, "logps/rejected": -192.75881958007812, "loss": 0.4781, "rewards/accuracies": 0.5, "rewards/chosen": -0.03623209521174431, "rewards/margins": 1.3184354305267334, "rewards/rejected": -1.3546675443649292, "step": 372 }, { "epoch": 0.04, "learning_rate": 2.913546710759419e-07, "logits/chosen": -2.61348557472229, "logits/rejected": -2.6519858837127686, "logps/chosen": -483.35443115234375, "logps/rejected": -399.0893249511719, "loss": 0.3253, "rewards/accuracies": 0.75, "rewards/chosen": -0.6226974725723267, "rewards/margins": 1.7413036823272705, "rewards/rejected": -2.3640010356903076, "step": 373 }, { "epoch": 0.04, "learning_rate": 2.9131923940002363e-07, "logits/chosen": -2.3559398651123047, "logits/rejected": -2.443772315979004, "logps/chosen": -276.01092529296875, "logps/rejected": -324.1201477050781, "loss": 0.5041, "rewards/accuracies": 0.75, "rewards/chosen": -0.19327013194561005, "rewards/margins": 1.7155585289001465, "rewards/rejected": -1.908828616142273, "step": 374 }, { "epoch": 0.04, "learning_rate": 2.912838077241053e-07, "logits/chosen": -2.5892324447631836, "logits/rejected": -2.6127986907958984, "logps/chosen": -288.15679931640625, "logps/rejected": -218.14593505859375, "loss": 0.505, "rewards/accuracies": 0.75, "rewards/chosen": -0.6188991069793701, "rewards/margins": 0.879996657371521, "rewards/rejected": -1.4988958835601807, "step": 375 }, { "epoch": 0.04, "learning_rate": 2.9124837604818707e-07, "logits/chosen": -2.741547107696533, "logits/rejected": -2.7076892852783203, "logps/chosen": -208.8468017578125, "logps/rejected": -168.34310913085938, "loss": 0.8012, "rewards/accuracies": 0.375, "rewards/chosen": -0.7391061782836914, "rewards/margins": 0.2517876625061035, "rewards/rejected": -0.9908938407897949, "step": 376 }, { "epoch": 0.04, "learning_rate": 2.9121294437226877e-07, "logits/chosen": -2.7398834228515625, "logits/rejected": -2.765960454940796, "logps/chosen": -210.3505859375, "logps/rejected": -157.2550048828125, "loss": 0.6086, "rewards/accuracies": 0.625, "rewards/chosen": -0.4951014518737793, "rewards/margins": 0.42920827865600586, "rewards/rejected": -0.9243097305297852, "step": 377 }, { "epoch": 0.04, "learning_rate": 2.911775126963505e-07, "logits/chosen": -2.597426176071167, "logits/rejected": -2.450941562652588, "logps/chosen": -452.9858703613281, "logps/rejected": -402.2458190917969, "loss": 0.5617, "rewards/accuracies": 0.75, "rewards/chosen": -0.4296402335166931, "rewards/margins": 0.5917314887046814, "rewards/rejected": -1.021371603012085, "step": 378 }, { "epoch": 0.04, "learning_rate": 2.9114208102043226e-07, "logits/chosen": -3.081923484802246, "logits/rejected": -2.9856417179107666, "logps/chosen": -210.20872497558594, "logps/rejected": -217.0289764404297, "loss": 0.6458, "rewards/accuracies": 0.875, "rewards/chosen": -0.4244641065597534, "rewards/margins": 0.5986112356185913, "rewards/rejected": -1.0230753421783447, "step": 379 }, { "epoch": 0.04, "learning_rate": 2.9110664934451396e-07, "logits/chosen": -2.674346923828125, "logits/rejected": -2.4168198108673096, "logps/chosen": -230.85279846191406, "logps/rejected": -154.787841796875, "loss": 0.6082, "rewards/accuracies": 0.75, "rewards/chosen": -0.4232533574104309, "rewards/margins": 0.39322400093078613, "rewards/rejected": -0.8164772987365723, "step": 380 }, { "epoch": 0.04, "learning_rate": 2.910712176685957e-07, "logits/chosen": -2.6380908489227295, "logits/rejected": -2.4891340732574463, "logps/chosen": -178.94752502441406, "logps/rejected": -223.0135498046875, "loss": 0.7279, "rewards/accuracies": 0.625, "rewards/chosen": -0.38809722661972046, "rewards/margins": 0.2853945791721344, "rewards/rejected": -0.673491895198822, "step": 381 }, { "epoch": 0.04, "learning_rate": 2.9103578599267746e-07, "logits/chosen": -1.5114073753356934, "logits/rejected": -2.4028563499450684, "logps/chosen": -353.2110595703125, "logps/rejected": -205.11846923828125, "loss": 1.1754, "rewards/accuracies": 0.375, "rewards/chosen": -1.1019339561462402, "rewards/margins": 0.12049147486686707, "rewards/rejected": -1.2224254608154297, "step": 382 }, { "epoch": 0.04, "learning_rate": 2.9100035431675915e-07, "logits/chosen": -2.36358642578125, "logits/rejected": -2.28446626663208, "logps/chosen": -219.1780242919922, "logps/rejected": -249.15045166015625, "loss": 0.6282, "rewards/accuracies": 0.625, "rewards/chosen": 0.03214915841817856, "rewards/margins": 0.36723947525024414, "rewards/rejected": -0.335090309381485, "step": 383 }, { "epoch": 0.04, "learning_rate": 2.909649226408409e-07, "logits/chosen": -2.4393086433410645, "logits/rejected": -2.344698667526245, "logps/chosen": -231.1734161376953, "logps/rejected": -276.0218505859375, "loss": 0.4909, "rewards/accuracies": 0.625, "rewards/chosen": -1.3179285526275635, "rewards/margins": 0.7397074103355408, "rewards/rejected": -2.057636022567749, "step": 384 }, { "epoch": 0.04, "learning_rate": 2.9092949096492265e-07, "logits/chosen": -1.9227142333984375, "logits/rejected": -2.378279209136963, "logps/chosen": -460.6540832519531, "logps/rejected": -297.25567626953125, "loss": 0.7016, "rewards/accuracies": 0.625, "rewards/chosen": -0.6133627891540527, "rewards/margins": 0.5649220943450928, "rewards/rejected": -1.1782848834991455, "step": 385 }, { "epoch": 0.04, "learning_rate": 2.9089405928900435e-07, "logits/chosen": -2.261834144592285, "logits/rejected": -2.0776326656341553, "logps/chosen": -210.3868408203125, "logps/rejected": -243.05325317382812, "loss": 0.4741, "rewards/accuracies": 0.75, "rewards/chosen": -0.21825449168682098, "rewards/margins": 0.7398832440376282, "rewards/rejected": -0.9581376314163208, "step": 386 }, { "epoch": 0.05, "learning_rate": 2.908586276130861e-07, "logits/chosen": -2.5592167377471924, "logits/rejected": -2.2397923469543457, "logps/chosen": -270.41009521484375, "logps/rejected": -252.57057189941406, "loss": 1.0451, "rewards/accuracies": 0.75, "rewards/chosen": -2.095860481262207, "rewards/margins": 0.40147170424461365, "rewards/rejected": -2.4973320960998535, "step": 387 }, { "epoch": 0.05, "learning_rate": 2.908231959371678e-07, "logits/chosen": -2.467172384262085, "logits/rejected": -2.4671850204467773, "logps/chosen": -176.31703186035156, "logps/rejected": -131.861083984375, "loss": 0.6416, "rewards/accuracies": 0.5, "rewards/chosen": -0.47119781374931335, "rewards/margins": 0.4313661456108093, "rewards/rejected": -0.9025639295578003, "step": 388 }, { "epoch": 0.05, "learning_rate": 2.9078776426124954e-07, "logits/chosen": -2.3285129070281982, "logits/rejected": -2.333667516708374, "logps/chosen": -286.033935546875, "logps/rejected": -182.059326171875, "loss": 0.8016, "rewards/accuracies": 0.375, "rewards/chosen": -0.700645387172699, "rewards/margins": 0.21044054627418518, "rewards/rejected": -0.9110859632492065, "step": 389 }, { "epoch": 0.05, "learning_rate": 2.907523325853313e-07, "logits/chosen": -2.387022018432617, "logits/rejected": -2.7735767364501953, "logps/chosen": -310.4424133300781, "logps/rejected": -248.2230987548828, "loss": 0.2051, "rewards/accuracies": 1.0, "rewards/chosen": -0.0037674754858016968, "rewards/margins": 1.8231241703033447, "rewards/rejected": -1.8268916606903076, "step": 390 }, { "epoch": 0.05, "learning_rate": 2.90716900909413e-07, "logits/chosen": -2.4947261810302734, "logits/rejected": -2.6018552780151367, "logps/chosen": -409.39495849609375, "logps/rejected": -274.84393310546875, "loss": 0.9161, "rewards/accuracies": 0.625, "rewards/chosen": -0.607440710067749, "rewards/margins": 0.10968615114688873, "rewards/rejected": -0.7171268463134766, "step": 391 }, { "epoch": 0.05, "learning_rate": 2.9068146923349473e-07, "logits/chosen": -2.6144323348999023, "logits/rejected": -2.44895076751709, "logps/chosen": -284.3625183105469, "logps/rejected": -261.03082275390625, "loss": 0.678, "rewards/accuracies": 0.875, "rewards/chosen": -0.6003652811050415, "rewards/margins": 0.3987134099006653, "rewards/rejected": -0.999078631401062, "step": 392 }, { "epoch": 0.05, "learning_rate": 2.906460375575765e-07, "logits/chosen": -2.2292399406433105, "logits/rejected": -2.152902364730835, "logps/chosen": -283.7800598144531, "logps/rejected": -256.03045654296875, "loss": 0.3496, "rewards/accuracies": 0.875, "rewards/chosen": -0.15193067491054535, "rewards/margins": 1.3511581420898438, "rewards/rejected": -1.5030887126922607, "step": 393 }, { "epoch": 0.05, "learning_rate": 2.906106058816582e-07, "logits/chosen": -2.0130889415740967, "logits/rejected": -2.03128719329834, "logps/chosen": -312.2867126464844, "logps/rejected": -338.41522216796875, "loss": 0.6895, "rewards/accuracies": 0.625, "rewards/chosen": -0.47012022137641907, "rewards/margins": 0.5652213096618652, "rewards/rejected": -1.0353416204452515, "step": 394 }, { "epoch": 0.05, "learning_rate": 2.905751742057399e-07, "logits/chosen": -2.3529956340789795, "logits/rejected": -2.301142692565918, "logps/chosen": -195.08575439453125, "logps/rejected": -193.3866729736328, "loss": 0.4212, "rewards/accuracies": 0.875, "rewards/chosen": -0.6629807949066162, "rewards/margins": 1.451525092124939, "rewards/rejected": -2.1145060062408447, "step": 395 }, { "epoch": 0.05, "learning_rate": 2.9053974252982167e-07, "logits/chosen": -2.63325834274292, "logits/rejected": -2.498455286026001, "logps/chosen": -216.3518524169922, "logps/rejected": -324.20367431640625, "loss": 0.5293, "rewards/accuracies": 0.625, "rewards/chosen": -0.2945455312728882, "rewards/margins": 0.92228102684021, "rewards/rejected": -1.2168265581130981, "step": 396 }, { "epoch": 0.05, "learning_rate": 2.9050431085390337e-07, "logits/chosen": -2.695741891860962, "logits/rejected": -2.668283700942993, "logps/chosen": -191.29249572753906, "logps/rejected": -209.6196746826172, "loss": 0.4286, "rewards/accuracies": 0.875, "rewards/chosen": -0.29533764719963074, "rewards/margins": 1.2358360290527344, "rewards/rejected": -1.5311737060546875, "step": 397 }, { "epoch": 0.05, "learning_rate": 2.904688791779851e-07, "logits/chosen": -2.4054059982299805, "logits/rejected": -2.5963492393493652, "logps/chosen": -307.03265380859375, "logps/rejected": -362.26361083984375, "loss": 1.063, "rewards/accuracies": 0.5, "rewards/chosen": -0.862945556640625, "rewards/margins": -0.06770412623882294, "rewards/rejected": -0.7952414155006409, "step": 398 }, { "epoch": 0.05, "learning_rate": 2.904334475020668e-07, "logits/chosen": -2.5434956550598145, "logits/rejected": -2.273796796798706, "logps/chosen": -252.62017822265625, "logps/rejected": -389.3582458496094, "loss": 0.6428, "rewards/accuracies": 0.75, "rewards/chosen": -0.814016580581665, "rewards/margins": 0.6457063555717468, "rewards/rejected": -1.4597229957580566, "step": 399 }, { "epoch": 0.05, "learning_rate": 2.9039801582614856e-07, "logits/chosen": -2.5275187492370605, "logits/rejected": -2.4476003646850586, "logps/chosen": -125.37290954589844, "logps/rejected": -200.5078125, "loss": 0.3068, "rewards/accuracies": 1.0, "rewards/chosen": -0.16572076082229614, "rewards/margins": 1.1224074363708496, "rewards/rejected": -1.288128137588501, "step": 400 }, { "epoch": 0.05, "learning_rate": 2.9036258415023026e-07, "logits/chosen": -2.55018949508667, "logits/rejected": -2.530451774597168, "logps/chosen": -215.76553344726562, "logps/rejected": -275.7241516113281, "loss": 0.3076, "rewards/accuracies": 0.875, "rewards/chosen": -0.3347064256668091, "rewards/margins": 1.3861677646636963, "rewards/rejected": -1.720874309539795, "step": 401 }, { "epoch": 0.05, "learning_rate": 2.90327152474312e-07, "logits/chosen": -1.6912660598754883, "logits/rejected": -1.7484867572784424, "logps/chosen": -438.5322265625, "logps/rejected": -355.2369689941406, "loss": 0.4955, "rewards/accuracies": 0.75, "rewards/chosen": -0.5170990228652954, "rewards/margins": 0.8915930390357971, "rewards/rejected": -1.4086921215057373, "step": 402 }, { "epoch": 0.05, "learning_rate": 2.9029172079839375e-07, "logits/chosen": -2.574814796447754, "logits/rejected": -2.5845723152160645, "logps/chosen": -202.02037048339844, "logps/rejected": -150.90652465820312, "loss": 0.4926, "rewards/accuracies": 0.875, "rewards/chosen": -0.12014584988355637, "rewards/margins": 0.662086009979248, "rewards/rejected": -0.7822319269180298, "step": 403 }, { "epoch": 0.05, "learning_rate": 2.902562891224755e-07, "logits/chosen": -2.0928592681884766, "logits/rejected": -2.258073091506958, "logps/chosen": -338.05963134765625, "logps/rejected": -288.1326904296875, "loss": 0.4268, "rewards/accuracies": 0.75, "rewards/chosen": -0.44505953788757324, "rewards/margins": 1.2010225057601929, "rewards/rejected": -1.6460820436477661, "step": 404 }, { "epoch": 0.05, "learning_rate": 2.902208574465572e-07, "logits/chosen": -2.6006851196289062, "logits/rejected": -2.6106038093566895, "logps/chosen": -217.82427978515625, "logps/rejected": -194.0026397705078, "loss": 0.6276, "rewards/accuracies": 0.75, "rewards/chosen": -0.6466972827911377, "rewards/margins": 0.9627809524536133, "rewards/rejected": -1.6094781160354614, "step": 405 }, { "epoch": 0.05, "learning_rate": 2.9018542577063895e-07, "logits/chosen": -1.9177751541137695, "logits/rejected": -1.4863187074661255, "logps/chosen": -353.9942626953125, "logps/rejected": -466.2381286621094, "loss": 0.6619, "rewards/accuracies": 0.625, "rewards/chosen": -0.6182900071144104, "rewards/margins": 0.5348527431488037, "rewards/rejected": -1.1531426906585693, "step": 406 }, { "epoch": 0.05, "learning_rate": 2.901499940947207e-07, "logits/chosen": -2.347299575805664, "logits/rejected": -2.581388235092163, "logps/chosen": -322.8605651855469, "logps/rejected": -222.009765625, "loss": 0.5515, "rewards/accuracies": 0.625, "rewards/chosen": -0.1542651355266571, "rewards/margins": 0.9100422263145447, "rewards/rejected": -1.064307451248169, "step": 407 }, { "epoch": 0.05, "learning_rate": 2.901145624188024e-07, "logits/chosen": -2.617509365081787, "logits/rejected": -2.600517749786377, "logps/chosen": -110.65100860595703, "logps/rejected": -189.55299377441406, "loss": 0.3341, "rewards/accuracies": 0.75, "rewards/chosen": -0.09722583740949631, "rewards/margins": 1.648722529411316, "rewards/rejected": -1.745948314666748, "step": 408 }, { "epoch": 0.05, "learning_rate": 2.9007913074288414e-07, "logits/chosen": -2.777291774749756, "logits/rejected": -2.7684519290924072, "logps/chosen": -280.8226318359375, "logps/rejected": -223.43063354492188, "loss": 0.6264, "rewards/accuracies": 0.5, "rewards/chosen": -0.18180479109287262, "rewards/margins": 0.9385455846786499, "rewards/rejected": -1.1203503608703613, "step": 409 }, { "epoch": 0.05, "learning_rate": 2.9004369906696584e-07, "logits/chosen": -2.1051454544067383, "logits/rejected": -2.399475574493408, "logps/chosen": -383.5094299316406, "logps/rejected": -267.96728515625, "loss": 1.1977, "rewards/accuracies": 0.75, "rewards/chosen": -1.4341328144073486, "rewards/margins": -0.2899511754512787, "rewards/rejected": -1.144181728363037, "step": 410 }, { "epoch": 0.05, "learning_rate": 2.900082673910476e-07, "logits/chosen": -2.1536006927490234, "logits/rejected": -1.9441380500793457, "logps/chosen": -97.18720245361328, "logps/rejected": -231.72508239746094, "loss": 0.3853, "rewards/accuracies": 0.75, "rewards/chosen": -0.29566866159439087, "rewards/margins": 1.2901939153671265, "rewards/rejected": -1.5858625173568726, "step": 411 }, { "epoch": 0.05, "learning_rate": 2.899728357151293e-07, "logits/chosen": -1.9044407606124878, "logits/rejected": -1.8881824016571045, "logps/chosen": -352.42572021484375, "logps/rejected": -344.89385986328125, "loss": 0.224, "rewards/accuracies": 1.0, "rewards/chosen": -0.49962738156318665, "rewards/margins": 2.233905553817749, "rewards/rejected": -2.7335331439971924, "step": 412 }, { "epoch": 0.05, "learning_rate": 2.8993740403921103e-07, "logits/chosen": -2.88010835647583, "logits/rejected": -2.8571999073028564, "logps/chosen": -182.77574157714844, "logps/rejected": -255.7477264404297, "loss": 0.7982, "rewards/accuracies": 0.625, "rewards/chosen": -0.76109379529953, "rewards/margins": 0.878085732460022, "rewards/rejected": -1.6391795873641968, "step": 413 }, { "epoch": 0.05, "learning_rate": 2.899019723632928e-07, "logits/chosen": -2.0980958938598633, "logits/rejected": -2.152815580368042, "logps/chosen": -298.0321960449219, "logps/rejected": -252.02593994140625, "loss": 0.5475, "rewards/accuracies": 0.75, "rewards/chosen": 0.10067229717969894, "rewards/margins": 0.668516993522644, "rewards/rejected": -0.5678446888923645, "step": 414 }, { "epoch": 0.05, "learning_rate": 2.8986654068737447e-07, "logits/chosen": -2.628714084625244, "logits/rejected": -2.790391445159912, "logps/chosen": -195.35165405273438, "logps/rejected": -230.36630249023438, "loss": 0.4103, "rewards/accuracies": 0.875, "rewards/chosen": -0.652872622013092, "rewards/margins": 1.6709685325622559, "rewards/rejected": -2.323841094970703, "step": 415 }, { "epoch": 0.05, "learning_rate": 2.898311090114562e-07, "logits/chosen": -2.105151653289795, "logits/rejected": -2.255107879638672, "logps/chosen": -397.61407470703125, "logps/rejected": -282.79150390625, "loss": 0.5097, "rewards/accuracies": 0.625, "rewards/chosen": -0.45666179060935974, "rewards/margins": 1.1008095741271973, "rewards/rejected": -1.5574712753295898, "step": 416 }, { "epoch": 0.05, "learning_rate": 2.8979567733553797e-07, "logits/chosen": -2.489713668823242, "logits/rejected": -2.4211790561676025, "logps/chosen": -157.96438598632812, "logps/rejected": -286.14056396484375, "loss": 0.3381, "rewards/accuracies": 0.875, "rewards/chosen": -0.15878267586231232, "rewards/margins": 1.4543695449829102, "rewards/rejected": -1.613152265548706, "step": 417 }, { "epoch": 0.05, "learning_rate": 2.897602456596197e-07, "logits/chosen": -2.194603204727173, "logits/rejected": -2.3707656860351562, "logps/chosen": -442.0442810058594, "logps/rejected": -257.8227233886719, "loss": 0.3791, "rewards/accuracies": 0.875, "rewards/chosen": -0.2044883817434311, "rewards/margins": 1.0412293672561646, "rewards/rejected": -1.2457177639007568, "step": 418 }, { "epoch": 0.05, "learning_rate": 2.897248139837014e-07, "logits/chosen": -2.3681437969207764, "logits/rejected": -2.239917278289795, "logps/chosen": -259.718505859375, "logps/rejected": -330.5833435058594, "loss": 0.8768, "rewards/accuracies": 0.5, "rewards/chosen": -0.7860607504844666, "rewards/margins": 0.4908476769924164, "rewards/rejected": -1.27690851688385, "step": 419 }, { "epoch": 0.05, "learning_rate": 2.8968938230778316e-07, "logits/chosen": -1.9029079675674438, "logits/rejected": -1.8731968402862549, "logps/chosen": -268.7479248046875, "logps/rejected": -279.08074951171875, "loss": 0.6306, "rewards/accuracies": 0.5, "rewards/chosen": -0.5833770036697388, "rewards/margins": 0.4556977152824402, "rewards/rejected": -1.0390746593475342, "step": 420 }, { "epoch": 0.05, "learning_rate": 2.8965395063186486e-07, "logits/chosen": -2.1960530281066895, "logits/rejected": -2.480104684829712, "logps/chosen": -349.0220031738281, "logps/rejected": -296.8118591308594, "loss": 0.7114, "rewards/accuracies": 0.625, "rewards/chosen": -0.47710901498794556, "rewards/margins": 0.4133308529853821, "rewards/rejected": -0.8904398679733276, "step": 421 }, { "epoch": 0.05, "learning_rate": 2.896185189559466e-07, "logits/chosen": -2.4694385528564453, "logits/rejected": -2.651801109313965, "logps/chosen": -270.26385498046875, "logps/rejected": -175.33889770507812, "loss": 0.6255, "rewards/accuracies": 0.75, "rewards/chosen": -0.773228645324707, "rewards/margins": 0.27232787013053894, "rewards/rejected": -1.0455564260482788, "step": 422 }, { "epoch": 0.05, "learning_rate": 2.895830872800283e-07, "logits/chosen": -1.3705748319625854, "logits/rejected": -1.8169828653335571, "logps/chosen": -330.394287109375, "logps/rejected": -286.46734619140625, "loss": 0.4229, "rewards/accuracies": 0.75, "rewards/chosen": -0.49803948402404785, "rewards/margins": 1.1607952117919922, "rewards/rejected": -1.658834457397461, "step": 423 }, { "epoch": 0.05, "learning_rate": 2.8954765560411005e-07, "logits/chosen": -2.41660475730896, "logits/rejected": -2.460914134979248, "logps/chosen": -442.93524169921875, "logps/rejected": -310.57049560546875, "loss": 0.3742, "rewards/accuracies": 0.875, "rewards/chosen": -0.24222737550735474, "rewards/margins": 1.2475676536560059, "rewards/rejected": -1.4897949695587158, "step": 424 }, { "epoch": 0.05, "learning_rate": 2.895122239281918e-07, "logits/chosen": -2.571394443511963, "logits/rejected": -2.3807296752929688, "logps/chosen": -160.80511474609375, "logps/rejected": -152.62330627441406, "loss": 0.3994, "rewards/accuracies": 0.875, "rewards/chosen": -0.11728984117507935, "rewards/margins": 1.0486934185028076, "rewards/rejected": -1.1659833192825317, "step": 425 }, { "epoch": 0.05, "learning_rate": 2.894767922522735e-07, "logits/chosen": -2.335704803466797, "logits/rejected": -2.5373404026031494, "logps/chosen": -400.5793762207031, "logps/rejected": -374.7125244140625, "loss": 0.3953, "rewards/accuracies": 0.875, "rewards/chosen": -0.15045621991157532, "rewards/margins": 1.0319464206695557, "rewards/rejected": -1.1824026107788086, "step": 426 }, { "epoch": 0.05, "learning_rate": 2.8944136057635524e-07, "logits/chosen": -1.9246761798858643, "logits/rejected": -2.2647767066955566, "logps/chosen": -484.20635986328125, "logps/rejected": -314.3674011230469, "loss": 0.5874, "rewards/accuracies": 0.75, "rewards/chosen": -0.38436758518218994, "rewards/margins": 0.6285203099250793, "rewards/rejected": -1.012887954711914, "step": 427 }, { "epoch": 0.05, "learning_rate": 2.89405928900437e-07, "logits/chosen": -2.5076851844787598, "logits/rejected": -2.515650987625122, "logps/chosen": -221.2677459716797, "logps/rejected": -333.34918212890625, "loss": 0.2052, "rewards/accuracies": 1.0, "rewards/chosen": -0.026145324110984802, "rewards/margins": 2.101147413253784, "rewards/rejected": -2.1272926330566406, "step": 428 }, { "epoch": 0.05, "learning_rate": 2.8937049722451874e-07, "logits/chosen": -2.255150318145752, "logits/rejected": -2.3165829181671143, "logps/chosen": -231.63540649414062, "logps/rejected": -191.27479553222656, "loss": 1.1615, "rewards/accuracies": 0.375, "rewards/chosen": -0.797600507736206, "rewards/margins": -0.5019054412841797, "rewards/rejected": -0.29569515585899353, "step": 429 }, { "epoch": 0.05, "learning_rate": 2.8933506554860044e-07, "logits/chosen": -1.9827651977539062, "logits/rejected": -2.0335562229156494, "logps/chosen": -233.53016662597656, "logps/rejected": -123.15323638916016, "loss": 0.6817, "rewards/accuracies": 0.375, "rewards/chosen": -0.5800879001617432, "rewards/margins": 0.15834546089172363, "rewards/rejected": -0.7384333610534668, "step": 430 }, { "epoch": 0.05, "learning_rate": 2.892996338726822e-07, "logits/chosen": -2.0162081718444824, "logits/rejected": -2.465643882751465, "logps/chosen": -289.4651794433594, "logps/rejected": -166.14990234375, "loss": 0.5709, "rewards/accuracies": 0.75, "rewards/chosen": -0.12164521217346191, "rewards/margins": 0.6192151308059692, "rewards/rejected": -0.7408603429794312, "step": 431 }, { "epoch": 0.05, "learning_rate": 2.892642021967639e-07, "logits/chosen": -1.8190035820007324, "logits/rejected": -2.064676523208618, "logps/chosen": -370.3983154296875, "logps/rejected": -320.833251953125, "loss": 0.6127, "rewards/accuracies": 0.625, "rewards/chosen": -0.5361925959587097, "rewards/margins": 0.8419095873832703, "rewards/rejected": -1.37810218334198, "step": 432 }, { "epoch": 0.05, "learning_rate": 2.8922877052084563e-07, "logits/chosen": -2.0875978469848633, "logits/rejected": -1.979960560798645, "logps/chosen": -186.75677490234375, "logps/rejected": -194.5227508544922, "loss": 0.3268, "rewards/accuracies": 1.0, "rewards/chosen": -0.03865785151720047, "rewards/margins": 1.0173653364181519, "rewards/rejected": -1.0560232400894165, "step": 433 }, { "epoch": 0.05, "learning_rate": 2.891933388449273e-07, "logits/chosen": -1.6656266450881958, "logits/rejected": -1.692918300628662, "logps/chosen": -581.7532348632812, "logps/rejected": -417.2818298339844, "loss": 0.2337, "rewards/accuracies": 1.0, "rewards/chosen": -0.2516899108886719, "rewards/margins": 1.714011788368225, "rewards/rejected": -1.9657018184661865, "step": 434 }, { "epoch": 0.05, "learning_rate": 2.891579071690091e-07, "logits/chosen": -2.3993992805480957, "logits/rejected": -2.3897898197174072, "logps/chosen": -219.9913787841797, "logps/rejected": -168.07618713378906, "loss": 0.6331, "rewards/accuracies": 0.75, "rewards/chosen": -0.4981522262096405, "rewards/margins": 0.35705751180648804, "rewards/rejected": -0.8552097082138062, "step": 435 }, { "epoch": 0.05, "learning_rate": 2.891224754930908e-07, "logits/chosen": -2.3494534492492676, "logits/rejected": -2.560002326965332, "logps/chosen": -247.70327758789062, "logps/rejected": -217.59912109375, "loss": 1.0221, "rewards/accuracies": 0.875, "rewards/chosen": -1.0105006694793701, "rewards/margins": -0.08980664610862732, "rewards/rejected": -0.9206939935684204, "step": 436 }, { "epoch": 0.05, "learning_rate": 2.890870438171725e-07, "logits/chosen": -2.293614387512207, "logits/rejected": -2.306755542755127, "logps/chosen": -284.87396240234375, "logps/rejected": -281.9623107910156, "loss": 0.6157, "rewards/accuracies": 0.625, "rewards/chosen": -1.229252815246582, "rewards/margins": 0.6979652643203735, "rewards/rejected": -1.9272180795669556, "step": 437 }, { "epoch": 0.05, "learning_rate": 2.8905161214125427e-07, "logits/chosen": -2.129169464111328, "logits/rejected": -2.250277042388916, "logps/chosen": -368.252197265625, "logps/rejected": -260.0812683105469, "loss": 0.4146, "rewards/accuracies": 0.75, "rewards/chosen": -0.47230294346809387, "rewards/margins": 1.3293743133544922, "rewards/rejected": -1.8016772270202637, "step": 438 }, { "epoch": 0.05, "learning_rate": 2.89016180465336e-07, "logits/chosen": -1.8125989437103271, "logits/rejected": -1.962503433227539, "logps/chosen": -225.67848205566406, "logps/rejected": -259.9812316894531, "loss": 0.7039, "rewards/accuracies": 0.75, "rewards/chosen": -0.47003689408302307, "rewards/margins": 0.5679247379302979, "rewards/rejected": -1.0379616022109985, "step": 439 }, { "epoch": 0.05, "learning_rate": 2.8898074878941776e-07, "logits/chosen": -2.305500030517578, "logits/rejected": -2.199249029159546, "logps/chosen": -214.42691040039062, "logps/rejected": -455.64117431640625, "loss": 0.4123, "rewards/accuracies": 0.75, "rewards/chosen": -0.24727585911750793, "rewards/margins": 1.1467941999435425, "rewards/rejected": -1.394070029258728, "step": 440 }, { "epoch": 0.05, "learning_rate": 2.8894531711349946e-07, "logits/chosen": -2.5705955028533936, "logits/rejected": -2.6412603855133057, "logps/chosen": -284.3816223144531, "logps/rejected": -274.05169677734375, "loss": 0.6873, "rewards/accuracies": 0.5, "rewards/chosen": -0.48974108695983887, "rewards/margins": 0.5115760564804077, "rewards/rejected": -1.0013171434402466, "step": 441 }, { "epoch": 0.05, "learning_rate": 2.889098854375812e-07, "logits/chosen": -2.197633981704712, "logits/rejected": -2.0193705558776855, "logps/chosen": -467.29144287109375, "logps/rejected": -411.1417236328125, "loss": 0.8187, "rewards/accuracies": 0.5, "rewards/chosen": -1.3258193731307983, "rewards/margins": 0.15741285681724548, "rewards/rejected": -1.4832321405410767, "step": 442 }, { "epoch": 0.05, "learning_rate": 2.888744537616629e-07, "logits/chosen": -1.7728595733642578, "logits/rejected": -2.0740301609039307, "logps/chosen": -283.5924987792969, "logps/rejected": -196.9116973876953, "loss": 0.6075, "rewards/accuracies": 0.875, "rewards/chosen": -0.674859881401062, "rewards/margins": 0.539892315864563, "rewards/rejected": -1.214752197265625, "step": 443 }, { "epoch": 0.05, "learning_rate": 2.8883902208574465e-07, "logits/chosen": -2.081737756729126, "logits/rejected": -2.0653939247131348, "logps/chosen": -289.5146179199219, "logps/rejected": -281.1646728515625, "loss": 0.501, "rewards/accuracies": 0.875, "rewards/chosen": -0.28984910249710083, "rewards/margins": 0.9263857007026672, "rewards/rejected": -1.2162346839904785, "step": 444 }, { "epoch": 0.05, "learning_rate": 2.8880359040982635e-07, "logits/chosen": -2.829387664794922, "logits/rejected": -2.8123908042907715, "logps/chosen": -186.57882690429688, "logps/rejected": -155.37716674804688, "loss": 0.5977, "rewards/accuracies": 0.625, "rewards/chosen": -0.12644381821155548, "rewards/margins": 0.2871760129928589, "rewards/rejected": -0.41361984610557556, "step": 445 }, { "epoch": 0.05, "learning_rate": 2.887681587339081e-07, "logits/chosen": -2.623011827468872, "logits/rejected": -2.6550562381744385, "logps/chosen": -212.9499053955078, "logps/rejected": -233.2044677734375, "loss": 0.6581, "rewards/accuracies": 0.625, "rewards/chosen": -0.5647568702697754, "rewards/margins": 0.45782092213630676, "rewards/rejected": -1.0225777626037598, "step": 446 }, { "epoch": 0.05, "learning_rate": 2.8873272705798985e-07, "logits/chosen": -2.1888375282287598, "logits/rejected": -2.374483585357666, "logps/chosen": -454.57757568359375, "logps/rejected": -346.9500732421875, "loss": 0.6295, "rewards/accuracies": 0.75, "rewards/chosen": -0.27782851457595825, "rewards/margins": 0.49870729446411133, "rewards/rejected": -0.7765358090400696, "step": 447 }, { "epoch": 0.05, "learning_rate": 2.8869729538207154e-07, "logits/chosen": -2.517354965209961, "logits/rejected": -2.5084331035614014, "logps/chosen": -110.6280517578125, "logps/rejected": -195.51849365234375, "loss": 0.4698, "rewards/accuracies": 0.75, "rewards/chosen": -0.8173120021820068, "rewards/margins": 0.8536496162414551, "rewards/rejected": -1.670961618423462, "step": 448 }, { "epoch": 0.05, "learning_rate": 2.886618637061533e-07, "logits/chosen": -2.7251298427581787, "logits/rejected": -2.80497145652771, "logps/chosen": -294.46685791015625, "logps/rejected": -295.30413818359375, "loss": 0.5665, "rewards/accuracies": 0.875, "rewards/chosen": -1.5064153671264648, "rewards/margins": 0.7449488043785095, "rewards/rejected": -2.25136399269104, "step": 449 }, { "epoch": 0.05, "learning_rate": 2.88626432030235e-07, "logits/chosen": -2.8603343963623047, "logits/rejected": -2.612391948699951, "logps/chosen": -278.3944091796875, "logps/rejected": -298.2635498046875, "loss": 0.4652, "rewards/accuracies": 0.875, "rewards/chosen": -0.4374709129333496, "rewards/margins": 0.7024620771408081, "rewards/rejected": -1.1399329900741577, "step": 450 }, { "epoch": 0.05, "learning_rate": 2.885910003543168e-07, "logits/chosen": -2.2044122219085693, "logits/rejected": -2.2808475494384766, "logps/chosen": -263.2228698730469, "logps/rejected": -171.86575317382812, "loss": 0.8299, "rewards/accuracies": 0.625, "rewards/chosen": -0.520301342010498, "rewards/margins": 0.10889454185962677, "rewards/rejected": -0.6291958689689636, "step": 451 }, { "epoch": 0.05, "learning_rate": 2.885555686783985e-07, "logits/chosen": -1.9963300228118896, "logits/rejected": -2.2340283393859863, "logps/chosen": -339.03076171875, "logps/rejected": -223.689453125, "loss": 0.5081, "rewards/accuracies": 0.875, "rewards/chosen": -0.27676182985305786, "rewards/margins": 0.4591420590877533, "rewards/rejected": -0.7359039187431335, "step": 452 }, { "epoch": 0.05, "learning_rate": 2.8852013700248023e-07, "logits/chosen": -2.3173396587371826, "logits/rejected": -2.1254189014434814, "logps/chosen": -291.9952697753906, "logps/rejected": -434.4285583496094, "loss": 0.2433, "rewards/accuracies": 0.875, "rewards/chosen": -0.5326418280601501, "rewards/margins": 1.6209514141082764, "rewards/rejected": -2.1535933017730713, "step": 453 }, { "epoch": 0.05, "learning_rate": 2.8848470532656193e-07, "logits/chosen": -2.454075336456299, "logits/rejected": -2.410428047180176, "logps/chosen": -484.0467224121094, "logps/rejected": -336.8230895996094, "loss": 0.517, "rewards/accuracies": 0.875, "rewards/chosen": -0.7301141619682312, "rewards/margins": 0.5541477203369141, "rewards/rejected": -1.2842618227005005, "step": 454 }, { "epoch": 0.05, "learning_rate": 2.884492736506437e-07, "logits/chosen": -2.7751338481903076, "logits/rejected": -2.804736614227295, "logps/chosen": -359.765625, "logps/rejected": -262.50189208984375, "loss": 0.4686, "rewards/accuracies": 0.75, "rewards/chosen": -0.24237971007823944, "rewards/margins": 0.769894003868103, "rewards/rejected": -1.0122736692428589, "step": 455 }, { "epoch": 0.05, "learning_rate": 2.8841384197472537e-07, "logits/chosen": -2.1996359825134277, "logits/rejected": -2.0379090309143066, "logps/chosen": -225.12881469726562, "logps/rejected": -243.345947265625, "loss": 0.647, "rewards/accuracies": 0.75, "rewards/chosen": -0.7290774583816528, "rewards/margins": 0.5943965911865234, "rewards/rejected": -1.3234740495681763, "step": 456 }, { "epoch": 0.05, "learning_rate": 2.883784102988071e-07, "logits/chosen": -2.2726848125457764, "logits/rejected": -2.2914962768554688, "logps/chosen": -409.12310791015625, "logps/rejected": -337.31634521484375, "loss": 0.5884, "rewards/accuracies": 0.625, "rewards/chosen": -0.3329915404319763, "rewards/margins": 0.4369245767593384, "rewards/rejected": -0.7699161767959595, "step": 457 }, { "epoch": 0.05, "learning_rate": 2.8834297862288887e-07, "logits/chosen": -2.439037799835205, "logits/rejected": -2.3562841415405273, "logps/chosen": -217.17013549804688, "logps/rejected": -320.61383056640625, "loss": 0.772, "rewards/accuracies": 0.625, "rewards/chosen": -0.78801029920578, "rewards/margins": 0.6818526983261108, "rewards/rejected": -1.469862937927246, "step": 458 }, { "epoch": 0.05, "learning_rate": 2.8830754694697056e-07, "logits/chosen": -2.3345448970794678, "logits/rejected": -1.9554619789123535, "logps/chosen": -276.74346923828125, "logps/rejected": -349.22515869140625, "loss": 0.5316, "rewards/accuracies": 0.75, "rewards/chosen": -0.37507063150405884, "rewards/margins": 0.9351624846458435, "rewards/rejected": -1.310233235359192, "step": 459 }, { "epoch": 0.05, "learning_rate": 2.882721152710523e-07, "logits/chosen": -2.6931076049804688, "logits/rejected": -2.6111483573913574, "logps/chosen": -229.34307861328125, "logps/rejected": -195.31698608398438, "loss": 0.5077, "rewards/accuracies": 0.625, "rewards/chosen": -0.039030686020851135, "rewards/margins": 0.6117849349975586, "rewards/rejected": -0.6508156657218933, "step": 460 }, { "epoch": 0.05, "learning_rate": 2.88236683595134e-07, "logits/chosen": -2.171952247619629, "logits/rejected": -2.2652018070220947, "logps/chosen": -410.07574462890625, "logps/rejected": -344.35809326171875, "loss": 0.5448, "rewards/accuracies": 0.75, "rewards/chosen": -0.7113549709320068, "rewards/margins": 0.6973752975463867, "rewards/rejected": -1.4087302684783936, "step": 461 }, { "epoch": 0.05, "learning_rate": 2.8820125191921576e-07, "logits/chosen": -2.3885738849639893, "logits/rejected": -2.5526578426361084, "logps/chosen": -252.19741821289062, "logps/rejected": -276.8016357421875, "loss": 0.5275, "rewards/accuracies": 0.875, "rewards/chosen": -0.0005277693271636963, "rewards/margins": 0.7073348760604858, "rewards/rejected": -0.7078626751899719, "step": 462 }, { "epoch": 0.05, "learning_rate": 2.881658202432975e-07, "logits/chosen": -2.6351418495178223, "logits/rejected": -2.8905675411224365, "logps/chosen": -473.2274169921875, "logps/rejected": -194.9286651611328, "loss": 0.6359, "rewards/accuracies": 0.625, "rewards/chosen": -0.7356879711151123, "rewards/margins": 0.7049455642700195, "rewards/rejected": -1.4406334161758423, "step": 463 }, { "epoch": 0.05, "learning_rate": 2.8813038856737925e-07, "logits/chosen": -2.4014363288879395, "logits/rejected": -2.565269947052002, "logps/chosen": -203.2305908203125, "logps/rejected": -263.9621887207031, "loss": 0.4605, "rewards/accuracies": 0.75, "rewards/chosen": -0.9398926496505737, "rewards/margins": 1.4968774318695068, "rewards/rejected": -2.436769962310791, "step": 464 }, { "epoch": 0.05, "learning_rate": 2.8809495689146095e-07, "logits/chosen": -1.9531371593475342, "logits/rejected": -1.6714953184127808, "logps/chosen": -419.4506530761719, "logps/rejected": -492.268310546875, "loss": 0.7231, "rewards/accuracies": 0.875, "rewards/chosen": -1.4054261445999146, "rewards/margins": 0.42764317989349365, "rewards/rejected": -1.8330694437026978, "step": 465 }, { "epoch": 0.05, "learning_rate": 2.880595252155427e-07, "logits/chosen": -2.2883946895599365, "logits/rejected": -2.557877779006958, "logps/chosen": -129.26202392578125, "logps/rejected": -135.85995483398438, "loss": 0.849, "rewards/accuracies": 0.625, "rewards/chosen": -0.506321907043457, "rewards/margins": 0.26170840859413147, "rewards/rejected": -0.7680302858352661, "step": 466 }, { "epoch": 0.05, "learning_rate": 2.880240935396244e-07, "logits/chosen": -2.6069416999816895, "logits/rejected": -2.7098441123962402, "logps/chosen": -252.12705993652344, "logps/rejected": -165.3023223876953, "loss": 0.4092, "rewards/accuracies": 0.75, "rewards/chosen": -0.248795747756958, "rewards/margins": 0.8864930868148804, "rewards/rejected": -1.1352887153625488, "step": 467 }, { "epoch": 0.05, "learning_rate": 2.8798866186370614e-07, "logits/chosen": -2.7018556594848633, "logits/rejected": -2.7645883560180664, "logps/chosen": -166.42974853515625, "logps/rejected": -220.08763122558594, "loss": 1.4401, "rewards/accuracies": 0.875, "rewards/chosen": -1.2216691970825195, "rewards/margins": -0.0849132239818573, "rewards/rejected": -1.1367559432983398, "step": 468 }, { "epoch": 0.05, "learning_rate": 2.879532301877879e-07, "logits/chosen": -2.6121859550476074, "logits/rejected": -2.5769598484039307, "logps/chosen": -269.4376220703125, "logps/rejected": -251.34103393554688, "loss": 0.6438, "rewards/accuracies": 0.75, "rewards/chosen": -0.15875861048698425, "rewards/margins": 0.8456829786300659, "rewards/rejected": -1.004441499710083, "step": 469 }, { "epoch": 0.05, "learning_rate": 2.879177985118696e-07, "logits/chosen": -2.2257862091064453, "logits/rejected": -2.1988463401794434, "logps/chosen": -183.557373046875, "logps/rejected": -219.3253631591797, "loss": 0.4975, "rewards/accuracies": 0.625, "rewards/chosen": -0.4910590648651123, "rewards/margins": 0.707735002040863, "rewards/rejected": -1.1987941265106201, "step": 470 }, { "epoch": 0.05, "learning_rate": 2.8788236683595134e-07, "logits/chosen": -2.3143200874328613, "logits/rejected": -2.397932291030884, "logps/chosen": -454.4237060546875, "logps/rejected": -352.91497802734375, "loss": 0.4827, "rewards/accuracies": 0.625, "rewards/chosen": -0.27401429414749146, "rewards/margins": 0.8136337995529175, "rewards/rejected": -1.0876481533050537, "step": 471 }, { "epoch": 0.05, "learning_rate": 2.8784693516003303e-07, "logits/chosen": -2.225881576538086, "logits/rejected": -2.4619812965393066, "logps/chosen": -236.94288635253906, "logps/rejected": -186.68948364257812, "loss": 0.6113, "rewards/accuracies": 0.75, "rewards/chosen": -0.721285343170166, "rewards/margins": 0.8076116442680359, "rewards/rejected": -1.5288970470428467, "step": 472 }, { "epoch": 0.06, "learning_rate": 2.878115034841148e-07, "logits/chosen": -2.2152652740478516, "logits/rejected": -2.28939151763916, "logps/chosen": -184.28762817382812, "logps/rejected": -242.72091674804688, "loss": 0.2799, "rewards/accuracies": 0.875, "rewards/chosen": 0.1465131938457489, "rewards/margins": 1.4585176706314087, "rewards/rejected": -1.3120044469833374, "step": 473 }, { "epoch": 0.06, "learning_rate": 2.8777607180819653e-07, "logits/chosen": -2.443805694580078, "logits/rejected": -2.402092456817627, "logps/chosen": -370.5953369140625, "logps/rejected": -383.960205078125, "loss": 0.3794, "rewards/accuracies": 0.875, "rewards/chosen": -0.48763585090637207, "rewards/margins": 1.3761087656021118, "rewards/rejected": -1.8637446165084839, "step": 474 }, { "epoch": 0.06, "learning_rate": 2.877406401322783e-07, "logits/chosen": -1.9888908863067627, "logits/rejected": -2.5703654289245605, "logps/chosen": -298.15582275390625, "logps/rejected": -149.9966583251953, "loss": 0.741, "rewards/accuracies": 0.5, "rewards/chosen": -1.1638567447662354, "rewards/margins": 0.12602999806404114, "rewards/rejected": -1.289886713027954, "step": 475 }, { "epoch": 0.06, "learning_rate": 2.8770520845635997e-07, "logits/chosen": -2.442338705062866, "logits/rejected": -2.4861013889312744, "logps/chosen": -293.5130920410156, "logps/rejected": -335.413818359375, "loss": 0.3171, "rewards/accuracies": 1.0, "rewards/chosen": -0.38817447423934937, "rewards/margins": 1.1757229566574097, "rewards/rejected": -1.5638973712921143, "step": 476 }, { "epoch": 0.06, "learning_rate": 2.876697767804417e-07, "logits/chosen": -2.6148037910461426, "logits/rejected": -2.7018911838531494, "logps/chosen": -435.9836120605469, "logps/rejected": -243.5842742919922, "loss": 0.4451, "rewards/accuracies": 0.75, "rewards/chosen": -0.2591713070869446, "rewards/margins": 0.6935673952102661, "rewards/rejected": -0.9527387022972107, "step": 477 }, { "epoch": 0.06, "learning_rate": 2.876343451045234e-07, "logits/chosen": -2.1867642402648926, "logits/rejected": -2.363856554031372, "logps/chosen": -595.6495361328125, "logps/rejected": -229.33285522460938, "loss": 0.7856, "rewards/accuracies": 0.625, "rewards/chosen": -0.6898307800292969, "rewards/margins": 0.469849556684494, "rewards/rejected": -1.1596803665161133, "step": 478 }, { "epoch": 0.06, "learning_rate": 2.8759891342860517e-07, "logits/chosen": -2.5236077308654785, "logits/rejected": -2.6806118488311768, "logps/chosen": -228.54183959960938, "logps/rejected": -136.3505859375, "loss": 0.8091, "rewards/accuracies": 0.25, "rewards/chosen": -0.5312069654464722, "rewards/margins": -0.10284203290939331, "rewards/rejected": -0.42836493253707886, "step": 479 }, { "epoch": 0.06, "learning_rate": 2.875634817526869e-07, "logits/chosen": -1.848218560218811, "logits/rejected": -2.02508544921875, "logps/chosen": -385.38470458984375, "logps/rejected": -336.7276306152344, "loss": 0.5346, "rewards/accuracies": 0.625, "rewards/chosen": -0.8404441475868225, "rewards/margins": 0.7621071338653564, "rewards/rejected": -1.6025512218475342, "step": 480 }, { "epoch": 0.06, "learning_rate": 2.875280500767686e-07, "logits/chosen": -2.7000069618225098, "logits/rejected": -2.7673792839050293, "logps/chosen": -301.5589599609375, "logps/rejected": -436.10357666015625, "loss": 0.3349, "rewards/accuracies": 0.875, "rewards/chosen": 0.03319627791643143, "rewards/margins": 1.7067854404449463, "rewards/rejected": -1.6735892295837402, "step": 481 }, { "epoch": 0.06, "learning_rate": 2.8749261840085036e-07, "logits/chosen": -2.736711025238037, "logits/rejected": -2.7535383701324463, "logps/chosen": -114.55261993408203, "logps/rejected": -236.68429565429688, "loss": 0.5251, "rewards/accuracies": 0.5, "rewards/chosen": -0.3576797842979431, "rewards/margins": 1.492825984954834, "rewards/rejected": -1.8505055904388428, "step": 482 }, { "epoch": 0.06, "learning_rate": 2.8745718672493205e-07, "logits/chosen": -2.719964027404785, "logits/rejected": -2.647434949874878, "logps/chosen": -300.51800537109375, "logps/rejected": -205.2421875, "loss": 0.5212, "rewards/accuracies": 0.625, "rewards/chosen": -0.7528806924819946, "rewards/margins": 0.9341813921928406, "rewards/rejected": -1.68706214427948, "step": 483 }, { "epoch": 0.06, "learning_rate": 2.874217550490138e-07, "logits/chosen": -2.7279610633850098, "logits/rejected": -2.6858766078948975, "logps/chosen": -277.12933349609375, "logps/rejected": -255.76255798339844, "loss": 0.4553, "rewards/accuracies": 0.875, "rewards/chosen": -0.7410680055618286, "rewards/margins": 1.2947826385498047, "rewards/rejected": -2.0358505249023438, "step": 484 }, { "epoch": 0.06, "learning_rate": 2.873863233730955e-07, "logits/chosen": -2.8642711639404297, "logits/rejected": -2.873987913131714, "logps/chosen": -251.55746459960938, "logps/rejected": -184.16329956054688, "loss": 0.4018, "rewards/accuracies": 0.875, "rewards/chosen": -0.32012537121772766, "rewards/margins": 1.0811015367507935, "rewards/rejected": -1.4012269973754883, "step": 485 }, { "epoch": 0.06, "learning_rate": 2.873508916971773e-07, "logits/chosen": -2.201634407043457, "logits/rejected": -2.2737879753112793, "logps/chosen": -188.0582733154297, "logps/rejected": -170.73974609375, "loss": 0.4998, "rewards/accuracies": 0.75, "rewards/chosen": -0.3140193223953247, "rewards/margins": 0.8652557134628296, "rewards/rejected": -1.1792750358581543, "step": 486 }, { "epoch": 0.06, "learning_rate": 2.87315460021259e-07, "logits/chosen": -1.9628938436508179, "logits/rejected": -2.2958121299743652, "logps/chosen": -390.8328552246094, "logps/rejected": -257.3310546875, "loss": 0.622, "rewards/accuracies": 0.75, "rewards/chosen": -0.239447683095932, "rewards/margins": 0.3572545051574707, "rewards/rejected": -0.5967022180557251, "step": 487 }, { "epoch": 0.06, "learning_rate": 2.8728002834534074e-07, "logits/chosen": -2.032559633255005, "logits/rejected": -2.0186400413513184, "logps/chosen": -205.0035400390625, "logps/rejected": -285.2829284667969, "loss": 0.2318, "rewards/accuracies": 0.875, "rewards/chosen": -0.48124760389328003, "rewards/margins": 2.1955108642578125, "rewards/rejected": -2.6767587661743164, "step": 488 }, { "epoch": 0.06, "learning_rate": 2.8724459666942244e-07, "logits/chosen": -2.1903562545776367, "logits/rejected": -2.337620258331299, "logps/chosen": -309.85595703125, "logps/rejected": -178.13546752929688, "loss": 0.6911, "rewards/accuracies": 0.5, "rewards/chosen": -0.8676897287368774, "rewards/margins": 0.29718202352523804, "rewards/rejected": -1.1648716926574707, "step": 489 }, { "epoch": 0.06, "learning_rate": 2.872091649935042e-07, "logits/chosen": -2.307166337966919, "logits/rejected": -2.303006649017334, "logps/chosen": -172.33628845214844, "logps/rejected": -411.9163513183594, "loss": 0.3838, "rewards/accuracies": 0.875, "rewards/chosen": -0.46171411871910095, "rewards/margins": 1.38311767578125, "rewards/rejected": -1.844831943511963, "step": 490 }, { "epoch": 0.06, "learning_rate": 2.871737333175859e-07, "logits/chosen": -2.1132290363311768, "logits/rejected": -1.9482606649398804, "logps/chosen": -172.27197265625, "logps/rejected": -285.3385314941406, "loss": 0.9715, "rewards/accuracies": 0.375, "rewards/chosen": -0.9262038469314575, "rewards/margins": 0.0935896784067154, "rewards/rejected": -1.0197935104370117, "step": 491 }, { "epoch": 0.06, "learning_rate": 2.8713830164166763e-07, "logits/chosen": -1.9774844646453857, "logits/rejected": -1.946367859840393, "logps/chosen": -325.24462890625, "logps/rejected": -341.6451721191406, "loss": 0.2237, "rewards/accuracies": 0.875, "rewards/chosen": -0.043151985853910446, "rewards/margins": 1.921749472618103, "rewards/rejected": -1.9649015665054321, "step": 492 }, { "epoch": 0.06, "learning_rate": 2.871028699657494e-07, "logits/chosen": -2.476759195327759, "logits/rejected": -2.372145414352417, "logps/chosen": -226.0302276611328, "logps/rejected": -206.26907348632812, "loss": 0.3359, "rewards/accuracies": 0.875, "rewards/chosen": -0.4979541003704071, "rewards/margins": 1.2120133638381958, "rewards/rejected": -1.7099673748016357, "step": 493 }, { "epoch": 0.06, "learning_rate": 2.870674382898311e-07, "logits/chosen": -1.9751077890396118, "logits/rejected": -1.6521974802017212, "logps/chosen": -302.59716796875, "logps/rejected": -288.4311218261719, "loss": 0.2314, "rewards/accuracies": 1.0, "rewards/chosen": -0.2044219970703125, "rewards/margins": 1.7481882572174072, "rewards/rejected": -1.9526102542877197, "step": 494 }, { "epoch": 0.06, "learning_rate": 2.870320066139128e-07, "logits/chosen": -2.711160659790039, "logits/rejected": -2.455995559692383, "logps/chosen": -104.32241821289062, "logps/rejected": -253.74916076660156, "loss": 0.4919, "rewards/accuracies": 0.75, "rewards/chosen": -0.34323960542678833, "rewards/margins": 1.5447239875793457, "rewards/rejected": -1.8879635334014893, "step": 495 }, { "epoch": 0.06, "learning_rate": 2.869965749379945e-07, "logits/chosen": -2.9557652473449707, "logits/rejected": -2.897108554840088, "logps/chosen": -173.22103881835938, "logps/rejected": -147.4190216064453, "loss": 0.4603, "rewards/accuracies": 0.75, "rewards/chosen": -0.7069957852363586, "rewards/margins": 0.7420517206192017, "rewards/rejected": -1.449047565460205, "step": 496 }, { "epoch": 0.06, "learning_rate": 2.869611432620763e-07, "logits/chosen": -2.351804256439209, "logits/rejected": -2.493929386138916, "logps/chosen": -226.66146850585938, "logps/rejected": -198.23118591308594, "loss": 0.6335, "rewards/accuracies": 0.625, "rewards/chosen": -0.6662598252296448, "rewards/margins": 0.6029986143112183, "rewards/rejected": -1.2692583799362183, "step": 497 }, { "epoch": 0.06, "learning_rate": 2.86925711586158e-07, "logits/chosen": -2.3872475624084473, "logits/rejected": -2.4446985721588135, "logps/chosen": -212.6195068359375, "logps/rejected": -321.24969482421875, "loss": 0.2941, "rewards/accuracies": 0.875, "rewards/chosen": -0.14084210991859436, "rewards/margins": 2.517458200454712, "rewards/rejected": -2.6583003997802734, "step": 498 }, { "epoch": 0.06, "learning_rate": 2.8689027991023977e-07, "logits/chosen": -2.4775023460388184, "logits/rejected": -2.4002418518066406, "logps/chosen": -370.8385925292969, "logps/rejected": -404.053466796875, "loss": 1.3206, "rewards/accuracies": 0.375, "rewards/chosen": -1.1094766855239868, "rewards/margins": -0.2916056513786316, "rewards/rejected": -0.81787109375, "step": 499 }, { "epoch": 0.06, "learning_rate": 2.8685484823432146e-07, "logits/chosen": -2.2234346866607666, "logits/rejected": -2.5067219734191895, "logps/chosen": -338.90447998046875, "logps/rejected": -209.46636962890625, "loss": 0.7608, "rewards/accuracies": 0.375, "rewards/chosen": -0.6381328701972961, "rewards/margins": 0.14408577978610992, "rewards/rejected": -0.7822186946868896, "step": 500 }, { "epoch": 0.06, "learning_rate": 2.868194165584032e-07, "logits/chosen": -2.7447075843811035, "logits/rejected": -2.7495927810668945, "logps/chosen": -204.04478454589844, "logps/rejected": -212.54946899414062, "loss": 0.5815, "rewards/accuracies": 0.625, "rewards/chosen": -0.3632766604423523, "rewards/margins": 0.6391451358795166, "rewards/rejected": -1.0024218559265137, "step": 501 }, { "epoch": 0.06, "learning_rate": 2.867839848824849e-07, "logits/chosen": -2.311384677886963, "logits/rejected": -2.520874500274658, "logps/chosen": -294.0456237792969, "logps/rejected": -198.28070068359375, "loss": 0.4262, "rewards/accuracies": 0.75, "rewards/chosen": -0.3848944902420044, "rewards/margins": 0.9312523603439331, "rewards/rejected": -1.3161468505859375, "step": 502 }, { "epoch": 0.06, "learning_rate": 2.8674855320656665e-07, "logits/chosen": -2.4274258613586426, "logits/rejected": -2.3611042499542236, "logps/chosen": -190.7697296142578, "logps/rejected": -181.71424865722656, "loss": 0.4085, "rewards/accuracies": 0.875, "rewards/chosen": -0.024285294115543365, "rewards/margins": 1.4035873413085938, "rewards/rejected": -1.427872657775879, "step": 503 }, { "epoch": 0.06, "learning_rate": 2.867131215306484e-07, "logits/chosen": -1.810881495475769, "logits/rejected": -1.9104323387145996, "logps/chosen": -225.77105712890625, "logps/rejected": -205.95071411132812, "loss": 0.775, "rewards/accuracies": 0.625, "rewards/chosen": -0.5099223852157593, "rewards/margins": 0.5111232399940491, "rewards/rejected": -1.0210456848144531, "step": 504 }, { "epoch": 0.06, "learning_rate": 2.866776898547301e-07, "logits/chosen": -2.82720685005188, "logits/rejected": -2.7215538024902344, "logps/chosen": -206.60256958007812, "logps/rejected": -277.6413879394531, "loss": 0.3142, "rewards/accuracies": 0.875, "rewards/chosen": -0.3899177610874176, "rewards/margins": 1.6895167827606201, "rewards/rejected": -2.079434633255005, "step": 505 }, { "epoch": 0.06, "learning_rate": 2.8664225817881185e-07, "logits/chosen": -2.1150169372558594, "logits/rejected": -1.6521074771881104, "logps/chosen": -115.73867797851562, "logps/rejected": -199.58462524414062, "loss": 0.3515, "rewards/accuracies": 0.875, "rewards/chosen": -0.6508910655975342, "rewards/margins": 1.1830346584320068, "rewards/rejected": -1.833925724029541, "step": 506 }, { "epoch": 0.06, "learning_rate": 2.8660682650289354e-07, "logits/chosen": -2.2337965965270996, "logits/rejected": -2.0642075538635254, "logps/chosen": -217.8906707763672, "logps/rejected": -297.22027587890625, "loss": 0.4655, "rewards/accuracies": 0.75, "rewards/chosen": -0.7175643444061279, "rewards/margins": 1.0016785860061646, "rewards/rejected": -1.7192429304122925, "step": 507 }, { "epoch": 0.06, "learning_rate": 2.865713948269753e-07, "logits/chosen": -2.6315131187438965, "logits/rejected": -2.808044910430908, "logps/chosen": -212.70486450195312, "logps/rejected": -201.50839233398438, "loss": 0.5888, "rewards/accuracies": 0.625, "rewards/chosen": -0.8553215861320496, "rewards/margins": 0.5989441275596619, "rewards/rejected": -1.4542657136917114, "step": 508 }, { "epoch": 0.06, "learning_rate": 2.8653596315105704e-07, "logits/chosen": -1.8543720245361328, "logits/rejected": -2.0296168327331543, "logps/chosen": -459.28076171875, "logps/rejected": -386.000244140625, "loss": 0.3854, "rewards/accuracies": 0.875, "rewards/chosen": -0.41743218898773193, "rewards/margins": 1.1974530220031738, "rewards/rejected": -1.6148850917816162, "step": 509 }, { "epoch": 0.06, "learning_rate": 2.865005314751388e-07, "logits/chosen": -2.2033531665802, "logits/rejected": -2.0924296379089355, "logps/chosen": -297.481689453125, "logps/rejected": -288.87139892578125, "loss": 0.3865, "rewards/accuracies": 0.75, "rewards/chosen": -0.3853161334991455, "rewards/margins": 1.689305305480957, "rewards/rejected": -2.0746214389801025, "step": 510 }, { "epoch": 0.06, "learning_rate": 2.864650997992205e-07, "logits/chosen": -2.591458320617676, "logits/rejected": -2.3992488384246826, "logps/chosen": -301.3564147949219, "logps/rejected": -333.9700927734375, "loss": 0.3947, "rewards/accuracies": 0.875, "rewards/chosen": -0.04263585805892944, "rewards/margins": 1.2566362619400024, "rewards/rejected": -1.299272060394287, "step": 511 }, { "epoch": 0.06, "learning_rate": 2.8642966812330223e-07, "logits/chosen": -2.07089900970459, "logits/rejected": -2.598860740661621, "logps/chosen": -510.76593017578125, "logps/rejected": -330.44775390625, "loss": 1.0799, "rewards/accuracies": 0.625, "rewards/chosen": -1.311828374862671, "rewards/margins": 0.15822213888168335, "rewards/rejected": -1.4700504541397095, "step": 512 }, { "epoch": 0.06, "learning_rate": 2.8639423644738393e-07, "logits/chosen": -2.004502773284912, "logits/rejected": -2.0472323894500732, "logps/chosen": -379.2911071777344, "logps/rejected": -414.2760009765625, "loss": 0.2539, "rewards/accuracies": 1.0, "rewards/chosen": -0.336702823638916, "rewards/margins": 2.075023651123047, "rewards/rejected": -2.411726474761963, "step": 513 }, { "epoch": 0.06, "learning_rate": 2.863588047714657e-07, "logits/chosen": -2.4964797496795654, "logits/rejected": -2.0879085063934326, "logps/chosen": -195.63156127929688, "logps/rejected": -373.75909423828125, "loss": 0.3865, "rewards/accuracies": 0.75, "rewards/chosen": -0.3250020742416382, "rewards/margins": 1.4956847429275513, "rewards/rejected": -1.8206868171691895, "step": 514 }, { "epoch": 0.06, "learning_rate": 2.863233730955474e-07, "logits/chosen": -1.67506742477417, "logits/rejected": -1.9044263362884521, "logps/chosen": -221.3343505859375, "logps/rejected": -159.6397247314453, "loss": 0.717, "rewards/accuracies": 0.625, "rewards/chosen": -0.5118857026100159, "rewards/margins": 0.7656558752059937, "rewards/rejected": -1.2775416374206543, "step": 515 }, { "epoch": 0.06, "learning_rate": 2.862879414196291e-07, "logits/chosen": -2.238819122314453, "logits/rejected": -2.3344180583953857, "logps/chosen": -292.8455810546875, "logps/rejected": -201.6183319091797, "loss": 1.1519, "rewards/accuracies": 0.5, "rewards/chosen": -1.1026118993759155, "rewards/margins": -0.1640138477087021, "rewards/rejected": -0.938598096370697, "step": 516 }, { "epoch": 0.06, "learning_rate": 2.8625250974371087e-07, "logits/chosen": -2.199331760406494, "logits/rejected": -2.4519035816192627, "logps/chosen": -499.5540771484375, "logps/rejected": -354.43280029296875, "loss": 0.5898, "rewards/accuracies": 0.625, "rewards/chosen": -0.6915086507797241, "rewards/margins": 0.5066684484481812, "rewards/rejected": -1.1981772184371948, "step": 517 }, { "epoch": 0.06, "learning_rate": 2.8621707806779257e-07, "logits/chosen": -1.9242891073226929, "logits/rejected": -1.9771735668182373, "logps/chosen": -182.25662231445312, "logps/rejected": -198.211181640625, "loss": 0.5615, "rewards/accuracies": 0.625, "rewards/chosen": -0.4929031431674957, "rewards/margins": 0.47970664501190186, "rewards/rejected": -0.97260981798172, "step": 518 }, { "epoch": 0.06, "learning_rate": 2.861816463918743e-07, "logits/chosen": -1.5745623111724854, "logits/rejected": -1.6949636936187744, "logps/chosen": -452.0148620605469, "logps/rejected": -415.39544677734375, "loss": 0.528, "rewards/accuracies": 0.625, "rewards/chosen": -0.8255192637443542, "rewards/margins": 0.45034247636795044, "rewards/rejected": -1.2758617401123047, "step": 519 }, { "epoch": 0.06, "learning_rate": 2.86146214715956e-07, "logits/chosen": -2.8264522552490234, "logits/rejected": -2.7948410511016846, "logps/chosen": -190.25497436523438, "logps/rejected": -235.7244873046875, "loss": 0.4328, "rewards/accuracies": 0.875, "rewards/chosen": -1.2126975059509277, "rewards/margins": 1.3871009349822998, "rewards/rejected": -2.5997984409332275, "step": 520 }, { "epoch": 0.06, "learning_rate": 2.861107830400378e-07, "logits/chosen": -2.314972162246704, "logits/rejected": -2.3311662673950195, "logps/chosen": -323.1270751953125, "logps/rejected": -323.8503723144531, "loss": 1.5594, "rewards/accuracies": 0.75, "rewards/chosen": -2.346125602722168, "rewards/margins": 0.4829988479614258, "rewards/rejected": -2.8291244506835938, "step": 521 }, { "epoch": 0.06, "learning_rate": 2.860753513641195e-07, "logits/chosen": -2.328765392303467, "logits/rejected": -2.3360581398010254, "logps/chosen": -109.8043212890625, "logps/rejected": -138.8583526611328, "loss": 0.4922, "rewards/accuracies": 0.75, "rewards/chosen": 0.33297431468963623, "rewards/margins": 0.5839194655418396, "rewards/rejected": -0.25094518065452576, "step": 522 }, { "epoch": 0.06, "learning_rate": 2.8603991968820126e-07, "logits/chosen": -2.251887083053589, "logits/rejected": -2.291194200515747, "logps/chosen": -241.99241638183594, "logps/rejected": -271.60943603515625, "loss": 0.2842, "rewards/accuracies": 0.875, "rewards/chosen": -0.16754008829593658, "rewards/margins": 1.7293349504470825, "rewards/rejected": -1.8968749046325684, "step": 523 }, { "epoch": 0.06, "learning_rate": 2.8600448801228295e-07, "logits/chosen": -2.694969654083252, "logits/rejected": -2.5892202854156494, "logps/chosen": -335.2853698730469, "logps/rejected": -323.2527770996094, "loss": 0.5201, "rewards/accuracies": 0.75, "rewards/chosen": -0.4566298723220825, "rewards/margins": 0.8106477856636047, "rewards/rejected": -1.2672775983810425, "step": 524 }, { "epoch": 0.06, "learning_rate": 2.859690563363647e-07, "logits/chosen": -2.410719394683838, "logits/rejected": -2.3729844093322754, "logps/chosen": -213.3101043701172, "logps/rejected": -202.72064208984375, "loss": 0.7818, "rewards/accuracies": 0.75, "rewards/chosen": -0.5485799312591553, "rewards/margins": 0.45479100942611694, "rewards/rejected": -1.003371000289917, "step": 525 }, { "epoch": 0.06, "learning_rate": 2.8593362466044645e-07, "logits/chosen": -2.5247349739074707, "logits/rejected": -2.3739495277404785, "logps/chosen": -98.12782287597656, "logps/rejected": -162.59182739257812, "loss": 0.5076, "rewards/accuracies": 0.5, "rewards/chosen": -0.5434640645980835, "rewards/margins": 0.8047605156898499, "rewards/rejected": -1.3482245206832886, "step": 526 }, { "epoch": 0.06, "learning_rate": 2.8589819298452814e-07, "logits/chosen": -2.1265206336975098, "logits/rejected": -2.174743175506592, "logps/chosen": -372.384521484375, "logps/rejected": -355.48297119140625, "loss": 0.4598, "rewards/accuracies": 0.75, "rewards/chosen": -0.9017437100410461, "rewards/margins": 1.0171101093292236, "rewards/rejected": -1.9188538789749146, "step": 527 }, { "epoch": 0.06, "learning_rate": 2.858627613086099e-07, "logits/chosen": -2.403273105621338, "logits/rejected": -2.4131946563720703, "logps/chosen": -313.4476318359375, "logps/rejected": -283.8671875, "loss": 0.4587, "rewards/accuracies": 0.75, "rewards/chosen": -0.7013782858848572, "rewards/margins": 0.9358751177787781, "rewards/rejected": -1.6372534036636353, "step": 528 }, { "epoch": 0.06, "learning_rate": 2.858273296326916e-07, "logits/chosen": -2.7461700439453125, "logits/rejected": -2.4763565063476562, "logps/chosen": -153.01715087890625, "logps/rejected": -210.9293212890625, "loss": 0.4145, "rewards/accuracies": 0.875, "rewards/chosen": -0.8665658235549927, "rewards/margins": 1.2266720533370972, "rewards/rejected": -2.09323787689209, "step": 529 }, { "epoch": 0.06, "learning_rate": 2.8579189795677334e-07, "logits/chosen": -1.754564642906189, "logits/rejected": -2.068523406982422, "logps/chosen": -357.93121337890625, "logps/rejected": -292.3747253417969, "loss": 0.5568, "rewards/accuracies": 0.625, "rewards/chosen": -0.08435069769620895, "rewards/margins": 0.612540602684021, "rewards/rejected": -0.6968913674354553, "step": 530 }, { "epoch": 0.06, "learning_rate": 2.8575646628085503e-07, "logits/chosen": -1.759860634803772, "logits/rejected": -2.098111152648926, "logps/chosen": -276.138916015625, "logps/rejected": -220.85955810546875, "loss": 0.6222, "rewards/accuracies": 0.625, "rewards/chosen": -0.5003688335418701, "rewards/margins": 0.6859586238861084, "rewards/rejected": -1.186327576637268, "step": 531 }, { "epoch": 0.06, "learning_rate": 2.8572103460493683e-07, "logits/chosen": -2.1432905197143555, "logits/rejected": -2.074483871459961, "logps/chosen": -351.5444030761719, "logps/rejected": -338.73565673828125, "loss": 0.4228, "rewards/accuracies": 0.75, "rewards/chosen": -0.31365320086479187, "rewards/margins": 1.5232373476028442, "rewards/rejected": -1.836890459060669, "step": 532 }, { "epoch": 0.06, "learning_rate": 2.8568560292901853e-07, "logits/chosen": -2.273639678955078, "logits/rejected": -1.9727565050125122, "logps/chosen": -250.47303771972656, "logps/rejected": -284.10040283203125, "loss": 0.6594, "rewards/accuracies": 0.625, "rewards/chosen": -1.0097332000732422, "rewards/margins": 0.3556445837020874, "rewards/rejected": -1.3653777837753296, "step": 533 }, { "epoch": 0.06, "learning_rate": 2.856501712531003e-07, "logits/chosen": -2.804464340209961, "logits/rejected": -2.7689015865325928, "logps/chosen": -161.2711944580078, "logps/rejected": -200.8800048828125, "loss": 2.0158, "rewards/accuracies": 0.75, "rewards/chosen": -2.093459367752075, "rewards/margins": -0.6570478081703186, "rewards/rejected": -1.4364116191864014, "step": 534 }, { "epoch": 0.06, "learning_rate": 2.85614739577182e-07, "logits/chosen": -3.0661938190460205, "logits/rejected": -3.0833563804626465, "logps/chosen": -303.5691833496094, "logps/rejected": -255.90713500976562, "loss": 0.6971, "rewards/accuracies": 0.625, "rewards/chosen": -0.3442786633968353, "rewards/margins": 0.4057345986366272, "rewards/rejected": -0.7500132322311401, "step": 535 }, { "epoch": 0.06, "learning_rate": 2.855793079012637e-07, "logits/chosen": -2.6102354526519775, "logits/rejected": -2.5817737579345703, "logps/chosen": -173.6082000732422, "logps/rejected": -171.6916046142578, "loss": 1.0745, "rewards/accuracies": 0.75, "rewards/chosen": -1.174984097480774, "rewards/margins": 0.05065372586250305, "rewards/rejected": -1.2256379127502441, "step": 536 }, { "epoch": 0.06, "learning_rate": 2.8554387622534547e-07, "logits/chosen": -2.1097970008850098, "logits/rejected": -2.2835311889648438, "logps/chosen": -323.673828125, "logps/rejected": -237.627685546875, "loss": 0.6171, "rewards/accuracies": 0.5, "rewards/chosen": -0.7752617001533508, "rewards/margins": 0.4622430205345154, "rewards/rejected": -1.2375047206878662, "step": 537 }, { "epoch": 0.06, "learning_rate": 2.8550844454942717e-07, "logits/chosen": -2.563974142074585, "logits/rejected": -2.4530539512634277, "logps/chosen": -265.3961486816406, "logps/rejected": -285.02984619140625, "loss": 0.9371, "rewards/accuracies": 0.625, "rewards/chosen": -0.8578805923461914, "rewards/margins": 0.1478942632675171, "rewards/rejected": -1.005774974822998, "step": 538 }, { "epoch": 0.06, "learning_rate": 2.854730128735089e-07, "logits/chosen": -2.417131185531616, "logits/rejected": -2.495516061782837, "logps/chosen": -420.91461181640625, "logps/rejected": -291.0054931640625, "loss": 0.8533, "rewards/accuracies": 0.5, "rewards/chosen": -1.0879417657852173, "rewards/margins": 0.6263333559036255, "rewards/rejected": -1.7142751216888428, "step": 539 }, { "epoch": 0.06, "learning_rate": 2.854375811975906e-07, "logits/chosen": -2.0525481700897217, "logits/rejected": -2.0311365127563477, "logps/chosen": -344.7208251953125, "logps/rejected": -397.4723815917969, "loss": 0.5427, "rewards/accuracies": 0.5, "rewards/chosen": -0.5082827806472778, "rewards/margins": 0.8180028796195984, "rewards/rejected": -1.3262856006622314, "step": 540 }, { "epoch": 0.06, "learning_rate": 2.8540214952167236e-07, "logits/chosen": -2.5007472038269043, "logits/rejected": -2.6125829219818115, "logps/chosen": -217.9143524169922, "logps/rejected": -293.10125732421875, "loss": 0.377, "rewards/accuracies": 0.75, "rewards/chosen": -0.5075303316116333, "rewards/margins": 1.2771257162094116, "rewards/rejected": -1.784656047821045, "step": 541 }, { "epoch": 0.06, "learning_rate": 2.8536671784575406e-07, "logits/chosen": -1.8281805515289307, "logits/rejected": -2.1173787117004395, "logps/chosen": -285.6739501953125, "logps/rejected": -302.1130065917969, "loss": 0.6101, "rewards/accuracies": 0.625, "rewards/chosen": -0.4645143747329712, "rewards/margins": 0.7863868474960327, "rewards/rejected": -1.2509013414382935, "step": 542 }, { "epoch": 0.06, "learning_rate": 2.853312861698358e-07, "logits/chosen": -2.258251190185547, "logits/rejected": -2.320873975753784, "logps/chosen": -105.01898956298828, "logps/rejected": -177.27391052246094, "loss": 0.258, "rewards/accuracies": 1.0, "rewards/chosen": -0.2593585252761841, "rewards/margins": 1.6169419288635254, "rewards/rejected": -1.87630033493042, "step": 543 }, { "epoch": 0.06, "learning_rate": 2.8529585449391755e-07, "logits/chosen": -2.678955078125, "logits/rejected": -2.6826601028442383, "logps/chosen": -291.085693359375, "logps/rejected": -344.6938781738281, "loss": 0.5035, "rewards/accuracies": 0.625, "rewards/chosen": -0.6986862421035767, "rewards/margins": 1.35354483127594, "rewards/rejected": -2.0522310733795166, "step": 544 }, { "epoch": 0.06, "learning_rate": 2.852604228179993e-07, "logits/chosen": -2.3905558586120605, "logits/rejected": -2.27713942527771, "logps/chosen": -109.41691589355469, "logps/rejected": -161.7470703125, "loss": 0.4125, "rewards/accuracies": 0.75, "rewards/chosen": -0.6202859878540039, "rewards/margins": 1.4117522239685059, "rewards/rejected": -2.0320382118225098, "step": 545 }, { "epoch": 0.06, "learning_rate": 2.85224991142081e-07, "logits/chosen": -2.698762893676758, "logits/rejected": -2.413696527481079, "logps/chosen": -219.04464721679688, "logps/rejected": -335.5111999511719, "loss": 0.4113, "rewards/accuracies": 0.625, "rewards/chosen": -0.6372820734977722, "rewards/margins": 1.124608039855957, "rewards/rejected": -1.761889934539795, "step": 546 }, { "epoch": 0.06, "learning_rate": 2.8518955946616275e-07, "logits/chosen": -2.8585376739501953, "logits/rejected": -2.8951199054718018, "logps/chosen": -112.4608154296875, "logps/rejected": -181.8551025390625, "loss": 0.2764, "rewards/accuracies": 0.875, "rewards/chosen": -0.07646259665489197, "rewards/margins": 1.5269970893859863, "rewards/rejected": -1.6034598350524902, "step": 547 }, { "epoch": 0.06, "learning_rate": 2.851541277902445e-07, "logits/chosen": -2.631103038787842, "logits/rejected": -2.609153985977173, "logps/chosen": -201.09202575683594, "logps/rejected": -181.19757080078125, "loss": 0.4601, "rewards/accuracies": 0.625, "rewards/chosen": -0.23847419023513794, "rewards/margins": 1.3434467315673828, "rewards/rejected": -1.5819209814071655, "step": 548 }, { "epoch": 0.06, "learning_rate": 2.851186961143262e-07, "logits/chosen": -2.270334482192993, "logits/rejected": -2.2233192920684814, "logps/chosen": -158.02249145507812, "logps/rejected": -148.6134796142578, "loss": 0.7252, "rewards/accuracies": 0.75, "rewards/chosen": -0.4448128938674927, "rewards/margins": 0.5420027375221252, "rewards/rejected": -0.9868156313896179, "step": 549 }, { "epoch": 0.06, "learning_rate": 2.8508326443840794e-07, "logits/chosen": -2.369446039199829, "logits/rejected": -2.135037660598755, "logps/chosen": -254.35166931152344, "logps/rejected": -361.304931640625, "loss": 0.3178, "rewards/accuracies": 0.875, "rewards/chosen": -0.1920468509197235, "rewards/margins": 1.2199519872665405, "rewards/rejected": -1.411998987197876, "step": 550 }, { "epoch": 0.06, "learning_rate": 2.8504783276248963e-07, "logits/chosen": -2.638059616088867, "logits/rejected": -2.576690196990967, "logps/chosen": -214.31524658203125, "logps/rejected": -192.39529418945312, "loss": 0.624, "rewards/accuracies": 0.75, "rewards/chosen": -0.44820234179496765, "rewards/margins": 0.769474983215332, "rewards/rejected": -1.2176772356033325, "step": 551 }, { "epoch": 0.06, "learning_rate": 2.850124010865714e-07, "logits/chosen": -2.2462854385375977, "logits/rejected": -2.163957118988037, "logps/chosen": -360.5517272949219, "logps/rejected": -251.49844360351562, "loss": 0.5072, "rewards/accuracies": 0.75, "rewards/chosen": -1.4803833961486816, "rewards/margins": 0.9514158964157104, "rewards/rejected": -2.4317994117736816, "step": 552 }, { "epoch": 0.06, "learning_rate": 2.849769694106531e-07, "logits/chosen": -2.6802244186401367, "logits/rejected": -2.910111904144287, "logps/chosen": -377.9355773925781, "logps/rejected": -275.34112548828125, "loss": 0.3389, "rewards/accuracies": 1.0, "rewards/chosen": -0.41612130403518677, "rewards/margins": 1.3597418069839478, "rewards/rejected": -1.7758629322052002, "step": 553 }, { "epoch": 0.06, "learning_rate": 2.8494153773473483e-07, "logits/chosen": -2.739457130432129, "logits/rejected": -2.568617820739746, "logps/chosen": -133.44430541992188, "logps/rejected": -148.55262756347656, "loss": 1.2516, "rewards/accuracies": 0.875, "rewards/chosen": -1.3007218837738037, "rewards/margins": -0.14218169450759888, "rewards/rejected": -1.1585402488708496, "step": 554 }, { "epoch": 0.06, "learning_rate": 2.849061060588166e-07, "logits/chosen": -1.8668720722198486, "logits/rejected": -2.1898341178894043, "logps/chosen": -535.6143188476562, "logps/rejected": -281.7651672363281, "loss": 0.4174, "rewards/accuracies": 0.875, "rewards/chosen": -0.2744632661342621, "rewards/margins": 1.174797534942627, "rewards/rejected": -1.4492608308792114, "step": 555 }, { "epoch": 0.06, "learning_rate": 2.848706743828983e-07, "logits/chosen": -1.7967478036880493, "logits/rejected": -2.06424880027771, "logps/chosen": -442.9310302734375, "logps/rejected": -445.7369689941406, "loss": 0.2728, "rewards/accuracies": 0.875, "rewards/chosen": -0.5566263198852539, "rewards/margins": 1.9546340703964233, "rewards/rejected": -2.511260509490967, "step": 556 }, { "epoch": 0.06, "learning_rate": 2.8483524270698e-07, "logits/chosen": -2.343796968460083, "logits/rejected": -2.4122018814086914, "logps/chosen": -262.1907958984375, "logps/rejected": -186.04566955566406, "loss": 0.4684, "rewards/accuracies": 0.875, "rewards/chosen": -0.7042860984802246, "rewards/margins": 0.9044474363327026, "rewards/rejected": -1.6087336540222168, "step": 557 }, { "epoch": 0.06, "learning_rate": 2.8479981103106177e-07, "logits/chosen": -2.3609390258789062, "logits/rejected": -2.401257038116455, "logps/chosen": -276.2867126464844, "logps/rejected": -203.52835083007812, "loss": 0.6571, "rewards/accuracies": 0.625, "rewards/chosen": -0.5534374713897705, "rewards/margins": 0.649053156375885, "rewards/rejected": -1.2024905681610107, "step": 558 }, { "epoch": 0.07, "learning_rate": 2.847643793551435e-07, "logits/chosen": -2.0271008014678955, "logits/rejected": -1.9784281253814697, "logps/chosen": -385.4505615234375, "logps/rejected": -353.7092590332031, "loss": 0.2353, "rewards/accuracies": 0.875, "rewards/chosen": -0.21390002965927124, "rewards/margins": 1.7905845642089844, "rewards/rejected": -2.0044846534729004, "step": 559 }, { "epoch": 0.07, "learning_rate": 2.847289476792252e-07, "logits/chosen": -1.9069173336029053, "logits/rejected": -2.188859224319458, "logps/chosen": -185.70729064941406, "logps/rejected": -245.00003051757812, "loss": 1.0886, "rewards/accuracies": 0.625, "rewards/chosen": -1.3435674905776978, "rewards/margins": 0.256305456161499, "rewards/rejected": -1.5998730659484863, "step": 560 }, { "epoch": 0.07, "learning_rate": 2.8469351600330696e-07, "logits/chosen": -2.100656032562256, "logits/rejected": -2.2159180641174316, "logps/chosen": -341.1153259277344, "logps/rejected": -372.8584899902344, "loss": 0.3967, "rewards/accuracies": 0.75, "rewards/chosen": -0.7885330319404602, "rewards/margins": 0.9494342803955078, "rewards/rejected": -1.7379672527313232, "step": 561 }, { "epoch": 0.07, "learning_rate": 2.8465808432738866e-07, "logits/chosen": -2.6383142471313477, "logits/rejected": -2.6143205165863037, "logps/chosen": -180.5325927734375, "logps/rejected": -190.25733947753906, "loss": 0.3534, "rewards/accuracies": 1.0, "rewards/chosen": -0.03114059567451477, "rewards/margins": 1.09554123878479, "rewards/rejected": -1.1266816854476929, "step": 562 }, { "epoch": 0.07, "learning_rate": 2.846226526514704e-07, "logits/chosen": -2.4956278800964355, "logits/rejected": -2.396529197692871, "logps/chosen": -408.535400390625, "logps/rejected": -384.07464599609375, "loss": 0.4308, "rewards/accuracies": 0.875, "rewards/chosen": -0.7730699181556702, "rewards/margins": 0.9954893589019775, "rewards/rejected": -1.768559217453003, "step": 563 }, { "epoch": 0.07, "learning_rate": 2.845872209755521e-07, "logits/chosen": -2.032639503479004, "logits/rejected": -1.9179755449295044, "logps/chosen": -430.50579833984375, "logps/rejected": -490.73333740234375, "loss": 0.3127, "rewards/accuracies": 1.0, "rewards/chosen": -0.25208768248558044, "rewards/margins": 1.3802275657653809, "rewards/rejected": -1.6323151588439941, "step": 564 }, { "epoch": 0.07, "learning_rate": 2.8455178929963385e-07, "logits/chosen": -2.101374626159668, "logits/rejected": -2.172497034072876, "logps/chosen": -249.16525268554688, "logps/rejected": -223.83316040039062, "loss": 0.6541, "rewards/accuracies": 0.5, "rewards/chosen": -0.6702892780303955, "rewards/margins": 1.3814959526062012, "rewards/rejected": -2.051785469055176, "step": 565 }, { "epoch": 0.07, "learning_rate": 2.845163576237156e-07, "logits/chosen": -2.8870742321014404, "logits/rejected": -2.889741897583008, "logps/chosen": -93.1162109375, "logps/rejected": -180.5037384033203, "loss": 0.438, "rewards/accuracies": 0.75, "rewards/chosen": -0.13500943779945374, "rewards/margins": 0.8672112226486206, "rewards/rejected": -1.002220630645752, "step": 566 }, { "epoch": 0.07, "learning_rate": 2.8448092594779735e-07, "logits/chosen": -1.924746036529541, "logits/rejected": -2.1833443641662598, "logps/chosen": -393.8853454589844, "logps/rejected": -214.32781982421875, "loss": 0.7415, "rewards/accuracies": 0.5, "rewards/chosen": -1.1905595064163208, "rewards/margins": 1.1827194690704346, "rewards/rejected": -2.373279094696045, "step": 567 }, { "epoch": 0.07, "learning_rate": 2.8444549427187904e-07, "logits/chosen": -2.338766098022461, "logits/rejected": -2.310725212097168, "logps/chosen": -190.17820739746094, "logps/rejected": -192.0225067138672, "loss": 0.5433, "rewards/accuracies": 0.75, "rewards/chosen": -0.45062631368637085, "rewards/margins": 0.6148356199264526, "rewards/rejected": -1.0654618740081787, "step": 568 }, { "epoch": 0.07, "learning_rate": 2.844100625959608e-07, "logits/chosen": -2.637502670288086, "logits/rejected": -2.4023377895355225, "logps/chosen": -160.08731079101562, "logps/rejected": -260.2137451171875, "loss": 0.6336, "rewards/accuracies": 0.75, "rewards/chosen": -0.5600972175598145, "rewards/margins": 0.5039157271385193, "rewards/rejected": -1.0640130043029785, "step": 569 }, { "epoch": 0.07, "learning_rate": 2.843746309200425e-07, "logits/chosen": -2.236431121826172, "logits/rejected": -1.5225576162338257, "logps/chosen": -166.45741271972656, "logps/rejected": -439.48797607421875, "loss": 0.3235, "rewards/accuracies": 0.875, "rewards/chosen": -0.8114561438560486, "rewards/margins": 1.4516148567199707, "rewards/rejected": -2.263071060180664, "step": 570 }, { "epoch": 0.07, "learning_rate": 2.8433919924412424e-07, "logits/chosen": -2.011160373687744, "logits/rejected": -2.2315304279327393, "logps/chosen": -241.73687744140625, "logps/rejected": -184.26658630371094, "loss": 0.6331, "rewards/accuracies": 0.75, "rewards/chosen": -0.36120763421058655, "rewards/margins": 0.5312861800193787, "rewards/rejected": -0.8924937844276428, "step": 571 }, { "epoch": 0.07, "learning_rate": 2.84303767568206e-07, "logits/chosen": -1.904073715209961, "logits/rejected": -1.9552388191223145, "logps/chosen": -591.3861694335938, "logps/rejected": -640.4342041015625, "loss": 0.2905, "rewards/accuracies": 0.875, "rewards/chosen": -0.15657061338424683, "rewards/margins": 2.418710231781006, "rewards/rejected": -2.5752811431884766, "step": 572 }, { "epoch": 0.07, "learning_rate": 2.842683358922877e-07, "logits/chosen": -2.004777669906616, "logits/rejected": -1.9348944425582886, "logps/chosen": -485.394287109375, "logps/rejected": -456.34613037109375, "loss": 0.3506, "rewards/accuracies": 0.75, "rewards/chosen": -0.3032083809375763, "rewards/margins": 1.9522475004196167, "rewards/rejected": -2.255455732345581, "step": 573 }, { "epoch": 0.07, "learning_rate": 2.8423290421636943e-07, "logits/chosen": -2.0323987007141113, "logits/rejected": -2.2565672397613525, "logps/chosen": -376.1615295410156, "logps/rejected": -293.8371887207031, "loss": 0.3753, "rewards/accuracies": 0.75, "rewards/chosen": -0.6021791696548462, "rewards/margins": 1.4226195812225342, "rewards/rejected": -2.02479887008667, "step": 574 }, { "epoch": 0.07, "learning_rate": 2.841974725404511e-07, "logits/chosen": -2.7000463008880615, "logits/rejected": -2.7524003982543945, "logps/chosen": -200.11676025390625, "logps/rejected": -181.8428497314453, "loss": 0.6657, "rewards/accuracies": 0.625, "rewards/chosen": -0.6449150443077087, "rewards/margins": 1.3228996992111206, "rewards/rejected": -1.9678149223327637, "step": 575 }, { "epoch": 0.07, "learning_rate": 2.8416204086453287e-07, "logits/chosen": -2.293550968170166, "logits/rejected": -2.221982955932617, "logps/chosen": -284.7962341308594, "logps/rejected": -240.72853088378906, "loss": 0.4742, "rewards/accuracies": 0.75, "rewards/chosen": -0.6617366671562195, "rewards/margins": 0.8491596579551697, "rewards/rejected": -1.5108962059020996, "step": 576 }, { "epoch": 0.07, "learning_rate": 2.841266091886146e-07, "logits/chosen": -2.594348430633545, "logits/rejected": -2.5795907974243164, "logps/chosen": -403.5820617675781, "logps/rejected": -441.0587463378906, "loss": 0.678, "rewards/accuracies": 0.75, "rewards/chosen": -0.1816064566373825, "rewards/margins": 1.0647549629211426, "rewards/rejected": -1.2463613748550415, "step": 577 }, { "epoch": 0.07, "learning_rate": 2.840911775126963e-07, "logits/chosen": -1.8871886730194092, "logits/rejected": -2.048887252807617, "logps/chosen": -441.9034423828125, "logps/rejected": -282.894287109375, "loss": 0.6531, "rewards/accuracies": 0.625, "rewards/chosen": -1.0569076538085938, "rewards/margins": 0.5572657585144043, "rewards/rejected": -1.614173412322998, "step": 578 }, { "epoch": 0.07, "learning_rate": 2.8405574583677807e-07, "logits/chosen": -2.4486382007598877, "logits/rejected": -2.6536693572998047, "logps/chosen": -307.143798828125, "logps/rejected": -252.06643676757812, "loss": 0.585, "rewards/accuracies": 0.75, "rewards/chosen": -0.7573120594024658, "rewards/margins": 1.1727367639541626, "rewards/rejected": -1.9300488233566284, "step": 579 }, { "epoch": 0.07, "learning_rate": 2.840203141608598e-07, "logits/chosen": -2.450269937515259, "logits/rejected": -2.4061622619628906, "logps/chosen": -399.0839538574219, "logps/rejected": -429.9809875488281, "loss": 0.3457, "rewards/accuracies": 0.875, "rewards/chosen": 0.044712163507938385, "rewards/margins": 1.7830820083618164, "rewards/rejected": -1.7383698225021362, "step": 580 }, { "epoch": 0.07, "learning_rate": 2.839848824849415e-07, "logits/chosen": -2.6853950023651123, "logits/rejected": -2.7101523876190186, "logps/chosen": -225.93035888671875, "logps/rejected": -291.9564208984375, "loss": 0.1699, "rewards/accuracies": 1.0, "rewards/chosen": -0.48067495226860046, "rewards/margins": 2.566830635070801, "rewards/rejected": -3.0475056171417236, "step": 581 }, { "epoch": 0.07, "learning_rate": 2.8394945080902326e-07, "logits/chosen": -2.6570026874542236, "logits/rejected": -2.828355073928833, "logps/chosen": -472.3564758300781, "logps/rejected": -299.128173828125, "loss": 0.3127, "rewards/accuracies": 0.875, "rewards/chosen": -0.663401186466217, "rewards/margins": 1.259992241859436, "rewards/rejected": -1.9233934879302979, "step": 582 }, { "epoch": 0.07, "learning_rate": 2.83914019133105e-07, "logits/chosen": -2.654460906982422, "logits/rejected": -2.8254292011260986, "logps/chosen": -183.21360778808594, "logps/rejected": -211.34127807617188, "loss": 0.2901, "rewards/accuracies": 1.0, "rewards/chosen": -0.3141680061817169, "rewards/margins": 1.4108171463012695, "rewards/rejected": -1.724985122680664, "step": 583 }, { "epoch": 0.07, "learning_rate": 2.838785874571867e-07, "logits/chosen": -2.8397536277770996, "logits/rejected": -2.9731905460357666, "logps/chosen": -286.7995910644531, "logps/rejected": -322.51629638671875, "loss": 0.245, "rewards/accuracies": 1.0, "rewards/chosen": -0.5188305974006653, "rewards/margins": 1.7199426889419556, "rewards/rejected": -2.2387733459472656, "step": 584 }, { "epoch": 0.07, "learning_rate": 2.8384315578126845e-07, "logits/chosen": -2.499354124069214, "logits/rejected": -2.6108410358428955, "logps/chosen": -253.4418182373047, "logps/rejected": -343.5899658203125, "loss": 1.0835, "rewards/accuracies": 0.625, "rewards/chosen": -1.0491673946380615, "rewards/margins": -0.12267941236495972, "rewards/rejected": -0.9264879822731018, "step": 585 }, { "epoch": 0.07, "learning_rate": 2.8380772410535015e-07, "logits/chosen": -2.4853053092956543, "logits/rejected": -2.6456799507141113, "logps/chosen": -313.1239013671875, "logps/rejected": -213.18861389160156, "loss": 0.4806, "rewards/accuracies": 0.75, "rewards/chosen": -0.7546948790550232, "rewards/margins": 0.8017174005508423, "rewards/rejected": -1.5564121007919312, "step": 586 }, { "epoch": 0.07, "learning_rate": 2.837722924294319e-07, "logits/chosen": -2.3221936225891113, "logits/rejected": -2.5736799240112305, "logps/chosen": -431.1414794921875, "logps/rejected": -371.3270263671875, "loss": 0.3454, "rewards/accuracies": 0.875, "rewards/chosen": -0.6498576402664185, "rewards/margins": 1.3634686470031738, "rewards/rejected": -2.0133261680603027, "step": 587 }, { "epoch": 0.07, "learning_rate": 2.8373686075351364e-07, "logits/chosen": -2.2068943977355957, "logits/rejected": -2.1449995040893555, "logps/chosen": -245.41595458984375, "logps/rejected": -201.13003540039062, "loss": 0.4847, "rewards/accuracies": 0.75, "rewards/chosen": -1.7180060148239136, "rewards/margins": 1.0727077722549438, "rewards/rejected": -2.7907137870788574, "step": 588 }, { "epoch": 0.07, "learning_rate": 2.8370142907759534e-07, "logits/chosen": -2.7657008171081543, "logits/rejected": -2.870767116546631, "logps/chosen": -221.01698303222656, "logps/rejected": -213.3057861328125, "loss": 0.3241, "rewards/accuracies": 0.875, "rewards/chosen": -0.534647524356842, "rewards/margins": 1.4820157289505005, "rewards/rejected": -2.016663074493408, "step": 589 }, { "epoch": 0.07, "learning_rate": 2.836659974016771e-07, "logits/chosen": -2.561528444290161, "logits/rejected": -2.377598762512207, "logps/chosen": -158.01663208007812, "logps/rejected": -260.22589111328125, "loss": 0.4218, "rewards/accuracies": 0.875, "rewards/chosen": -0.4788733720779419, "rewards/margins": 1.0858718156814575, "rewards/rejected": -1.5647451877593994, "step": 590 }, { "epoch": 0.07, "learning_rate": 2.8363056572575884e-07, "logits/chosen": -2.373501777648926, "logits/rejected": -2.4982798099517822, "logps/chosen": -215.85755920410156, "logps/rejected": -145.58670043945312, "loss": 0.5412, "rewards/accuracies": 0.875, "rewards/chosen": -0.6830252408981323, "rewards/margins": 0.4874327480792999, "rewards/rejected": -1.1704579591751099, "step": 591 }, { "epoch": 0.07, "learning_rate": 2.8359513404984053e-07, "logits/chosen": -2.644239664077759, "logits/rejected": -2.832881450653076, "logps/chosen": -174.84352111816406, "logps/rejected": -259.2848205566406, "loss": 0.238, "rewards/accuracies": 1.0, "rewards/chosen": -0.6102717518806458, "rewards/margins": 2.061450481414795, "rewards/rejected": -2.671722412109375, "step": 592 }, { "epoch": 0.07, "learning_rate": 2.835597023739223e-07, "logits/chosen": -2.2125868797302246, "logits/rejected": -2.1189050674438477, "logps/chosen": -118.67438507080078, "logps/rejected": -160.91433715820312, "loss": 0.4665, "rewards/accuracies": 0.75, "rewards/chosen": -0.5902976989746094, "rewards/margins": 1.2112171649932861, "rewards/rejected": -1.8015148639678955, "step": 593 }, { "epoch": 0.07, "learning_rate": 2.8352427069800403e-07, "logits/chosen": -2.72749924659729, "logits/rejected": -2.665203332901001, "logps/chosen": -105.28466796875, "logps/rejected": -106.30815124511719, "loss": 0.6002, "rewards/accuracies": 0.625, "rewards/chosen": -0.7433668375015259, "rewards/margins": 0.4511909782886505, "rewards/rejected": -1.1945579051971436, "step": 594 }, { "epoch": 0.07, "learning_rate": 2.834888390220857e-07, "logits/chosen": -2.207937002182007, "logits/rejected": -2.4642491340637207, "logps/chosen": -415.00994873046875, "logps/rejected": -220.6689453125, "loss": 0.7089, "rewards/accuracies": 0.625, "rewards/chosen": -0.9031976461410522, "rewards/margins": 0.46310779452323914, "rewards/rejected": -1.3663054704666138, "step": 595 }, { "epoch": 0.07, "learning_rate": 2.834534073461675e-07, "logits/chosen": -2.3175909519195557, "logits/rejected": -2.2985143661499023, "logps/chosen": -232.93284606933594, "logps/rejected": -250.12237548828125, "loss": 0.7043, "rewards/accuracies": 0.75, "rewards/chosen": -0.7460999488830566, "rewards/margins": 0.812207818031311, "rewards/rejected": -1.5583077669143677, "step": 596 }, { "epoch": 0.07, "learning_rate": 2.8341797567024917e-07, "logits/chosen": -2.2121782302856445, "logits/rejected": -1.9086616039276123, "logps/chosen": -297.9239501953125, "logps/rejected": -399.9457092285156, "loss": 0.5678, "rewards/accuracies": 0.625, "rewards/chosen": -0.20001479983329773, "rewards/margins": 0.5181649923324585, "rewards/rejected": -0.7181798219680786, "step": 597 }, { "epoch": 0.07, "learning_rate": 2.833825439943309e-07, "logits/chosen": -2.7228288650512695, "logits/rejected": -2.6960911750793457, "logps/chosen": -252.58743286132812, "logps/rejected": -181.63479614257812, "loss": 0.8179, "rewards/accuracies": 0.5, "rewards/chosen": -1.059566855430603, "rewards/margins": 0.8404138684272766, "rewards/rejected": -1.8999807834625244, "step": 598 }, { "epoch": 0.07, "learning_rate": 2.833471123184126e-07, "logits/chosen": -2.2037315368652344, "logits/rejected": -2.185729503631592, "logps/chosen": -310.79974365234375, "logps/rejected": -283.07550048828125, "loss": 0.4263, "rewards/accuracies": 0.75, "rewards/chosen": -0.36597710847854614, "rewards/margins": 0.9915189743041992, "rewards/rejected": -1.3574961423873901, "step": 599 }, { "epoch": 0.07, "learning_rate": 2.8331168064249436e-07, "logits/chosen": -2.351884365081787, "logits/rejected": -2.5851497650146484, "logps/chosen": -272.38763427734375, "logps/rejected": -228.53138732910156, "loss": 0.431, "rewards/accuracies": 0.75, "rewards/chosen": -0.6801834106445312, "rewards/margins": 1.0062822103500366, "rewards/rejected": -1.6864656209945679, "step": 600 }, { "epoch": 0.07, "learning_rate": 2.832762489665761e-07, "logits/chosen": -2.4204070568084717, "logits/rejected": -2.7393598556518555, "logps/chosen": -299.3605651855469, "logps/rejected": -145.13677978515625, "loss": 0.7684, "rewards/accuracies": 0.625, "rewards/chosen": -0.5935815572738647, "rewards/margins": 0.34504058957099915, "rewards/rejected": -0.938622236251831, "step": 601 }, { "epoch": 0.07, "learning_rate": 2.8324081729065786e-07, "logits/chosen": -2.4079909324645996, "logits/rejected": -2.4792494773864746, "logps/chosen": -161.25982666015625, "logps/rejected": -155.2931671142578, "loss": 0.5439, "rewards/accuracies": 0.875, "rewards/chosen": -0.6917847394943237, "rewards/margins": 0.9195724725723267, "rewards/rejected": -1.6113572120666504, "step": 602 }, { "epoch": 0.07, "learning_rate": 2.8320538561473956e-07, "logits/chosen": -2.05000376701355, "logits/rejected": -2.067012071609497, "logps/chosen": -241.41250610351562, "logps/rejected": -244.8616180419922, "loss": 0.9123, "rewards/accuracies": 0.75, "rewards/chosen": -1.3803644180297852, "rewards/margins": 1.09917414188385, "rewards/rejected": -2.4795384407043457, "step": 603 }, { "epoch": 0.07, "learning_rate": 2.831699539388213e-07, "logits/chosen": -2.2408676147460938, "logits/rejected": -2.6611380577087402, "logps/chosen": -573.1158447265625, "logps/rejected": -283.30657958984375, "loss": 0.5248, "rewards/accuracies": 0.75, "rewards/chosen": -0.2845990061759949, "rewards/margins": 0.7190200090408325, "rewards/rejected": -1.0036190748214722, "step": 604 }, { "epoch": 0.07, "learning_rate": 2.8313452226290305e-07, "logits/chosen": -2.2413711547851562, "logits/rejected": -1.9701826572418213, "logps/chosen": -384.5602111816406, "logps/rejected": -411.017333984375, "loss": 0.5941, "rewards/accuracies": 0.625, "rewards/chosen": -1.3150925636291504, "rewards/margins": 1.1022653579711914, "rewards/rejected": -2.417357921600342, "step": 605 }, { "epoch": 0.07, "learning_rate": 2.8309909058698475e-07, "logits/chosen": -1.7727774381637573, "logits/rejected": -1.9457204341888428, "logps/chosen": -336.6259460449219, "logps/rejected": -318.899658203125, "loss": 0.8065, "rewards/accuracies": 0.375, "rewards/chosen": -0.6014400124549866, "rewards/margins": 0.32296398282051086, "rewards/rejected": -0.924403965473175, "step": 606 }, { "epoch": 0.07, "learning_rate": 2.830636589110665e-07, "logits/chosen": -1.8818624019622803, "logits/rejected": -2.0092155933380127, "logps/chosen": -362.704345703125, "logps/rejected": -311.9741516113281, "loss": 0.4337, "rewards/accuracies": 0.75, "rewards/chosen": -0.2584696412086487, "rewards/margins": 1.507845401763916, "rewards/rejected": -1.7663151025772095, "step": 607 }, { "epoch": 0.07, "learning_rate": 2.830282272351482e-07, "logits/chosen": -2.657205104827881, "logits/rejected": -2.791828155517578, "logps/chosen": -237.78347778320312, "logps/rejected": -172.70120239257812, "loss": 0.729, "rewards/accuracies": 0.375, "rewards/chosen": -1.1202346086502075, "rewards/margins": 0.17119766771793365, "rewards/rejected": -1.29143226146698, "step": 608 }, { "epoch": 0.07, "learning_rate": 2.8299279555922994e-07, "logits/chosen": -2.8653438091278076, "logits/rejected": -2.6305055618286133, "logps/chosen": -181.93601989746094, "logps/rejected": -266.245849609375, "loss": 0.2102, "rewards/accuracies": 1.0, "rewards/chosen": -0.4023686945438385, "rewards/margins": 2.941915988922119, "rewards/rejected": -3.3442845344543457, "step": 609 }, { "epoch": 0.07, "learning_rate": 2.8295736388331164e-07, "logits/chosen": -2.596820592880249, "logits/rejected": -2.533358335494995, "logps/chosen": -261.4521789550781, "logps/rejected": -145.0147247314453, "loss": 0.8639, "rewards/accuracies": 0.5, "rewards/chosen": -1.4233194589614868, "rewards/margins": 0.36191022396087646, "rewards/rejected": -1.7852296829223633, "step": 610 }, { "epoch": 0.07, "learning_rate": 2.829219322073934e-07, "logits/chosen": -1.9775631427764893, "logits/rejected": -2.1341824531555176, "logps/chosen": -576.28466796875, "logps/rejected": -334.25592041015625, "loss": 0.6744, "rewards/accuracies": 0.625, "rewards/chosen": -1.0930986404418945, "rewards/margins": 0.3086680769920349, "rewards/rejected": -1.4017667770385742, "step": 611 }, { "epoch": 0.07, "learning_rate": 2.8288650053147513e-07, "logits/chosen": -2.603475332260132, "logits/rejected": -2.3198750019073486, "logps/chosen": -132.3052978515625, "logps/rejected": -290.5291442871094, "loss": 0.3141, "rewards/accuracies": 0.75, "rewards/chosen": -0.5465155243873596, "rewards/margins": 1.6672029495239258, "rewards/rejected": -2.2137184143066406, "step": 612 }, { "epoch": 0.07, "learning_rate": 2.8285106885555683e-07, "logits/chosen": -2.4631457328796387, "logits/rejected": -2.1344504356384277, "logps/chosen": -259.9687194824219, "logps/rejected": -278.46881103515625, "loss": 0.3318, "rewards/accuracies": 0.75, "rewards/chosen": -0.4875625967979431, "rewards/margins": 1.749682068824768, "rewards/rejected": -2.2372446060180664, "step": 613 }, { "epoch": 0.07, "learning_rate": 2.828156371796386e-07, "logits/chosen": -2.502852439880371, "logits/rejected": -2.4360191822052, "logps/chosen": -219.77206420898438, "logps/rejected": -236.79733276367188, "loss": 0.5117, "rewards/accuracies": 0.625, "rewards/chosen": -0.7535436153411865, "rewards/margins": 0.6197067499160767, "rewards/rejected": -1.3732503652572632, "step": 614 }, { "epoch": 0.07, "learning_rate": 2.8278020550372033e-07, "logits/chosen": -2.331800937652588, "logits/rejected": -2.140773296356201, "logps/chosen": -189.0993194580078, "logps/rejected": -201.6796875, "loss": 0.4374, "rewards/accuracies": 0.875, "rewards/chosen": -0.3698059916496277, "rewards/margins": 0.84446120262146, "rewards/rejected": -1.2142672538757324, "step": 615 }, { "epoch": 0.07, "learning_rate": 2.827447738278021e-07, "logits/chosen": -2.684727191925049, "logits/rejected": -2.515939950942993, "logps/chosen": -165.89263916015625, "logps/rejected": -188.135986328125, "loss": 0.5219, "rewards/accuracies": 0.5, "rewards/chosen": -0.8118862509727478, "rewards/margins": 1.0984495878219604, "rewards/rejected": -1.9103360176086426, "step": 616 }, { "epoch": 0.07, "learning_rate": 2.8270934215188377e-07, "logits/chosen": -2.184600830078125, "logits/rejected": -2.1141839027404785, "logps/chosen": -237.5020751953125, "logps/rejected": -301.9725036621094, "loss": 0.4071, "rewards/accuracies": 0.75, "rewards/chosen": -0.4573739767074585, "rewards/margins": 1.5413289070129395, "rewards/rejected": -1.9987026453018188, "step": 617 }, { "epoch": 0.07, "learning_rate": 2.826739104759655e-07, "logits/chosen": -2.179051160812378, "logits/rejected": -2.357790231704712, "logps/chosen": -267.117431640625, "logps/rejected": -273.4722900390625, "loss": 0.3951, "rewards/accuracies": 0.75, "rewards/chosen": -0.5316153168678284, "rewards/margins": 1.1263729333877563, "rewards/rejected": -1.65798819065094, "step": 618 }, { "epoch": 0.07, "learning_rate": 2.826384788000472e-07, "logits/chosen": -2.4466304779052734, "logits/rejected": -2.367253065109253, "logps/chosen": -180.52737426757812, "logps/rejected": -219.88311767578125, "loss": 0.2799, "rewards/accuracies": 1.0, "rewards/chosen": 0.05940570682287216, "rewards/margins": 1.480074405670166, "rewards/rejected": -1.4206687211990356, "step": 619 }, { "epoch": 0.07, "learning_rate": 2.8260304712412896e-07, "logits/chosen": -2.663456916809082, "logits/rejected": -2.586817502975464, "logps/chosen": -188.86251831054688, "logps/rejected": -197.51052856445312, "loss": 0.4668, "rewards/accuracies": 0.625, "rewards/chosen": -0.21705666184425354, "rewards/margins": 1.0164827108383179, "rewards/rejected": -1.233539342880249, "step": 620 }, { "epoch": 0.07, "learning_rate": 2.8256761544821066e-07, "logits/chosen": -2.2668964862823486, "logits/rejected": -2.3446195125579834, "logps/chosen": -243.80783081054688, "logps/rejected": -212.63470458984375, "loss": 0.4425, "rewards/accuracies": 0.75, "rewards/chosen": -0.31664007902145386, "rewards/margins": 1.5920805931091309, "rewards/rejected": -1.9087207317352295, "step": 621 }, { "epoch": 0.07, "learning_rate": 2.825321837722924e-07, "logits/chosen": -2.6614184379577637, "logits/rejected": -2.8160336017608643, "logps/chosen": -541.4915771484375, "logps/rejected": -400.97100830078125, "loss": 0.1325, "rewards/accuracies": 1.0, "rewards/chosen": -0.06345769762992859, "rewards/margins": 2.4909658432006836, "rewards/rejected": -2.5544238090515137, "step": 622 }, { "epoch": 0.07, "learning_rate": 2.8249675209637416e-07, "logits/chosen": -2.5059781074523926, "logits/rejected": -2.671403408050537, "logps/chosen": -303.0697326660156, "logps/rejected": -257.12982177734375, "loss": 0.4902, "rewards/accuracies": 0.75, "rewards/chosen": -0.38753604888916016, "rewards/margins": 1.1425929069519043, "rewards/rejected": -1.530129075050354, "step": 623 }, { "epoch": 0.07, "learning_rate": 2.8246132042045585e-07, "logits/chosen": -2.230849266052246, "logits/rejected": -2.5255587100982666, "logps/chosen": -402.5303955078125, "logps/rejected": -307.230712890625, "loss": 0.3641, "rewards/accuracies": 0.875, "rewards/chosen": -0.2931148409843445, "rewards/margins": 1.1207062005996704, "rewards/rejected": -1.4138211011886597, "step": 624 }, { "epoch": 0.07, "learning_rate": 2.824258887445376e-07, "logits/chosen": -2.224437713623047, "logits/rejected": -2.2340521812438965, "logps/chosen": -176.03976440429688, "logps/rejected": -234.7279052734375, "loss": 0.6201, "rewards/accuracies": 0.5, "rewards/chosen": -0.7022520303726196, "rewards/margins": 1.3898818492889404, "rewards/rejected": -2.0921339988708496, "step": 625 }, { "epoch": 0.07, "learning_rate": 2.8239045706861935e-07, "logits/chosen": -2.258146047592163, "logits/rejected": -2.388251304626465, "logps/chosen": -379.135009765625, "logps/rejected": -307.49359130859375, "loss": 0.7614, "rewards/accuracies": 0.625, "rewards/chosen": -1.1313681602478027, "rewards/margins": 0.4820912182331085, "rewards/rejected": -1.6134594678878784, "step": 626 }, { "epoch": 0.07, "learning_rate": 2.823550253927011e-07, "logits/chosen": -2.3732810020446777, "logits/rejected": -2.358731508255005, "logps/chosen": -190.1050262451172, "logps/rejected": -221.21060180664062, "loss": 0.3903, "rewards/accuracies": 0.875, "rewards/chosen": -1.8787813186645508, "rewards/margins": 1.8298004865646362, "rewards/rejected": -3.7085819244384766, "step": 627 }, { "epoch": 0.07, "learning_rate": 2.823195937167828e-07, "logits/chosen": -2.172062397003174, "logits/rejected": -2.0315685272216797, "logps/chosen": -286.6888427734375, "logps/rejected": -233.4279327392578, "loss": 0.6726, "rewards/accuracies": 0.625, "rewards/chosen": -0.8270248174667358, "rewards/margins": 0.9048182368278503, "rewards/rejected": -1.7318429946899414, "step": 628 }, { "epoch": 0.07, "learning_rate": 2.8228416204086454e-07, "logits/chosen": -2.1710007190704346, "logits/rejected": -2.284658432006836, "logps/chosen": -223.65042114257812, "logps/rejected": -269.5903625488281, "loss": 0.3905, "rewards/accuracies": 0.75, "rewards/chosen": -0.11111211776733398, "rewards/margins": 1.2375226020812988, "rewards/rejected": -1.3486347198486328, "step": 629 }, { "epoch": 0.07, "learning_rate": 2.8224873036494624e-07, "logits/chosen": -2.1916775703430176, "logits/rejected": -1.9913333654403687, "logps/chosen": -284.353759765625, "logps/rejected": -263.2596740722656, "loss": 0.2975, "rewards/accuracies": 0.875, "rewards/chosen": -0.23000317811965942, "rewards/margins": 1.3193955421447754, "rewards/rejected": -1.54939866065979, "step": 630 }, { "epoch": 0.07, "learning_rate": 2.82213298689028e-07, "logits/chosen": -1.9303104877471924, "logits/rejected": -2.1487889289855957, "logps/chosen": -332.79400634765625, "logps/rejected": -226.94430541992188, "loss": 0.5281, "rewards/accuracies": 0.625, "rewards/chosen": -0.41981393098831177, "rewards/margins": 0.5807924866676331, "rewards/rejected": -1.0006064176559448, "step": 631 }, { "epoch": 0.07, "learning_rate": 2.821778670131097e-07, "logits/chosen": -2.355780839920044, "logits/rejected": -2.298170566558838, "logps/chosen": -153.72251892089844, "logps/rejected": -267.8560791015625, "loss": 0.3884, "rewards/accuracies": 0.875, "rewards/chosen": -0.3935573101043701, "rewards/margins": 0.978131890296936, "rewards/rejected": -1.3716892004013062, "step": 632 }, { "epoch": 0.07, "learning_rate": 2.8214243533719143e-07, "logits/chosen": -2.59535813331604, "logits/rejected": -2.3859457969665527, "logps/chosen": -188.00755310058594, "logps/rejected": -224.98385620117188, "loss": 0.5924, "rewards/accuracies": 0.75, "rewards/chosen": -0.3312590420246124, "rewards/margins": 0.8680870532989502, "rewards/rejected": -1.1993461847305298, "step": 633 }, { "epoch": 0.07, "learning_rate": 2.821070036612732e-07, "logits/chosen": -2.8450257778167725, "logits/rejected": -2.8692235946655273, "logps/chosen": -237.82232666015625, "logps/rejected": -215.40042114257812, "loss": 0.391, "rewards/accuracies": 0.875, "rewards/chosen": -0.3587251305580139, "rewards/margins": 1.4409229755401611, "rewards/rejected": -1.7996481657028198, "step": 634 }, { "epoch": 0.07, "learning_rate": 2.820715719853549e-07, "logits/chosen": -2.593151330947876, "logits/rejected": -2.5801234245300293, "logps/chosen": -93.4423828125, "logps/rejected": -153.20626831054688, "loss": 0.5335, "rewards/accuracies": 0.75, "rewards/chosen": -0.2512911856174469, "rewards/margins": 0.5896918773651123, "rewards/rejected": -0.8409830927848816, "step": 635 }, { "epoch": 0.07, "learning_rate": 2.820361403094366e-07, "logits/chosen": -1.9407367706298828, "logits/rejected": -2.032761573791504, "logps/chosen": -309.00830078125, "logps/rejected": -270.1577453613281, "loss": 0.354, "rewards/accuracies": 0.875, "rewards/chosen": 0.2378094494342804, "rewards/margins": 1.2084672451019287, "rewards/rejected": -0.9706577658653259, "step": 636 }, { "epoch": 0.07, "learning_rate": 2.820007086335183e-07, "logits/chosen": -1.994848608970642, "logits/rejected": -1.6797795295715332, "logps/chosen": -309.9917297363281, "logps/rejected": -294.864013671875, "loss": 0.501, "rewards/accuracies": 0.625, "rewards/chosen": -0.5989134907722473, "rewards/margins": 0.9195924997329712, "rewards/rejected": -1.5185060501098633, "step": 637 }, { "epoch": 0.07, "learning_rate": 2.819652769576001e-07, "logits/chosen": -2.3502488136291504, "logits/rejected": -2.3329994678497314, "logps/chosen": -277.3572692871094, "logps/rejected": -219.76028442382812, "loss": 0.5654, "rewards/accuracies": 0.5, "rewards/chosen": -1.1290271282196045, "rewards/margins": 0.581454873085022, "rewards/rejected": -1.7104820013046265, "step": 638 }, { "epoch": 0.07, "learning_rate": 2.819298452816818e-07, "logits/chosen": -2.8418495655059814, "logits/rejected": -2.8609778881073, "logps/chosen": -221.52622985839844, "logps/rejected": -243.91497802734375, "loss": 0.3318, "rewards/accuracies": 0.875, "rewards/chosen": -0.9092757701873779, "rewards/margins": 2.5488829612731934, "rewards/rejected": -3.4581589698791504, "step": 639 }, { "epoch": 0.07, "learning_rate": 2.8189441360576357e-07, "logits/chosen": -2.5195651054382324, "logits/rejected": -2.632418155670166, "logps/chosen": -273.74127197265625, "logps/rejected": -258.5153503417969, "loss": 0.4445, "rewards/accuracies": 0.875, "rewards/chosen": -1.0813536643981934, "rewards/margins": 1.3315284252166748, "rewards/rejected": -2.412882089614868, "step": 640 }, { "epoch": 0.07, "learning_rate": 2.8185898192984526e-07, "logits/chosen": -2.381563186645508, "logits/rejected": -2.6133275032043457, "logps/chosen": -375.8746643066406, "logps/rejected": -323.3339538574219, "loss": 0.3711, "rewards/accuracies": 0.875, "rewards/chosen": -0.7131337523460388, "rewards/margins": 1.5325254201889038, "rewards/rejected": -2.245659351348877, "step": 641 }, { "epoch": 0.07, "learning_rate": 2.81823550253927e-07, "logits/chosen": -2.434840202331543, "logits/rejected": -2.401885509490967, "logps/chosen": -414.1163024902344, "logps/rejected": -309.9866943359375, "loss": 0.2143, "rewards/accuracies": 1.0, "rewards/chosen": -0.158794105052948, "rewards/margins": 1.675749659538269, "rewards/rejected": -1.8345437049865723, "step": 642 }, { "epoch": 0.07, "learning_rate": 2.817881185780087e-07, "logits/chosen": -3.029679775238037, "logits/rejected": -2.9532246589660645, "logps/chosen": -182.62249755859375, "logps/rejected": -177.52967834472656, "loss": 0.5461, "rewards/accuracies": 0.75, "rewards/chosen": -1.1524386405944824, "rewards/margins": 0.4387982189655304, "rewards/rejected": -1.5912368297576904, "step": 643 }, { "epoch": 0.07, "learning_rate": 2.8175268690209045e-07, "logits/chosen": -2.324056386947632, "logits/rejected": -2.1222987174987793, "logps/chosen": -181.572021484375, "logps/rejected": -289.6360168457031, "loss": 0.2322, "rewards/accuracies": 1.0, "rewards/chosen": -0.28997987508773804, "rewards/margins": 2.0215511322021484, "rewards/rejected": -2.3115310668945312, "step": 644 }, { "epoch": 0.08, "learning_rate": 2.817172552261722e-07, "logits/chosen": -2.059887170791626, "logits/rejected": -2.1407175064086914, "logps/chosen": -290.55230712890625, "logps/rejected": -280.3440856933594, "loss": 0.4756, "rewards/accuracies": 0.75, "rewards/chosen": -0.19593696296215057, "rewards/margins": 1.9595015048980713, "rewards/rejected": -2.1554384231567383, "step": 645 }, { "epoch": 0.08, "learning_rate": 2.816818235502539e-07, "logits/chosen": -2.6764943599700928, "logits/rejected": -2.7170908451080322, "logps/chosen": -152.4773712158203, "logps/rejected": -200.98910522460938, "loss": 0.7438, "rewards/accuracies": 0.625, "rewards/chosen": -0.6851381063461304, "rewards/margins": 0.8004188537597656, "rewards/rejected": -1.4855568408966064, "step": 646 }, { "epoch": 0.08, "learning_rate": 2.8164639187433565e-07, "logits/chosen": -2.1113569736480713, "logits/rejected": -2.3172569274902344, "logps/chosen": -191.6326904296875, "logps/rejected": -130.34942626953125, "loss": 0.8519, "rewards/accuracies": 0.5, "rewards/chosen": -0.6206393241882324, "rewards/margins": 0.1768942028284073, "rewards/rejected": -0.7975335717201233, "step": 647 }, { "epoch": 0.08, "learning_rate": 2.8161096019841734e-07, "logits/chosen": -2.4157989025115967, "logits/rejected": -2.4171743392944336, "logps/chosen": -139.25582885742188, "logps/rejected": -156.53382873535156, "loss": 0.7906, "rewards/accuracies": 0.625, "rewards/chosen": -0.7041699290275574, "rewards/margins": 0.8959304094314575, "rewards/rejected": -1.6001003980636597, "step": 648 }, { "epoch": 0.08, "learning_rate": 2.815755285224991e-07, "logits/chosen": -2.6967780590057373, "logits/rejected": -2.621246814727783, "logps/chosen": -178.10220336914062, "logps/rejected": -317.8310241699219, "loss": 0.4521, "rewards/accuracies": 0.75, "rewards/chosen": 0.0364319309592247, "rewards/margins": 1.4366344213485718, "rewards/rejected": -1.4002025127410889, "step": 649 }, { "epoch": 0.08, "learning_rate": 2.8154009684658084e-07, "logits/chosen": -2.7764599323272705, "logits/rejected": -2.8255114555358887, "logps/chosen": -614.005126953125, "logps/rejected": -288.136962890625, "loss": 0.4876, "rewards/accuracies": 0.75, "rewards/chosen": -1.1919710636138916, "rewards/margins": 0.9879306554794312, "rewards/rejected": -2.179901599884033, "step": 650 }, { "epoch": 0.08, "learning_rate": 2.815046651706626e-07, "logits/chosen": -2.3394343852996826, "logits/rejected": -2.317230463027954, "logps/chosen": -408.80438232421875, "logps/rejected": -493.7617492675781, "loss": 0.7363, "rewards/accuracies": 0.75, "rewards/chosen": -0.7283014059066772, "rewards/margins": 1.6206783056259155, "rewards/rejected": -2.3489794731140137, "step": 651 }, { "epoch": 0.08, "learning_rate": 2.814692334947443e-07, "logits/chosen": -1.9347187280654907, "logits/rejected": -2.369868516921997, "logps/chosen": -707.7894287109375, "logps/rejected": -472.04498291015625, "loss": 0.5317, "rewards/accuracies": 0.75, "rewards/chosen": -0.4498329758644104, "rewards/margins": 1.9948474168777466, "rewards/rejected": -2.4446804523468018, "step": 652 }, { "epoch": 0.08, "learning_rate": 2.8143380181882603e-07, "logits/chosen": -2.857086420059204, "logits/rejected": -2.9411962032318115, "logps/chosen": -227.7477264404297, "logps/rejected": -208.58953857421875, "loss": 0.2632, "rewards/accuracies": 0.875, "rewards/chosen": 0.10916734486818314, "rewards/margins": 2.8450326919555664, "rewards/rejected": -2.735865354537964, "step": 653 }, { "epoch": 0.08, "learning_rate": 2.8139837014290773e-07, "logits/chosen": -2.3495724201202393, "logits/rejected": -2.4254541397094727, "logps/chosen": -238.48065185546875, "logps/rejected": -158.19439697265625, "loss": 0.4422, "rewards/accuracies": 0.75, "rewards/chosen": -0.19937562942504883, "rewards/margins": 1.0859156847000122, "rewards/rejected": -1.2852911949157715, "step": 654 }, { "epoch": 0.08, "learning_rate": 2.813629384669895e-07, "logits/chosen": -2.220245599746704, "logits/rejected": -2.145049810409546, "logps/chosen": -287.5046691894531, "logps/rejected": -268.61468505859375, "loss": 0.5303, "rewards/accuracies": 0.75, "rewards/chosen": -1.036236047744751, "rewards/margins": 0.9143364429473877, "rewards/rejected": -1.9505724906921387, "step": 655 }, { "epoch": 0.08, "learning_rate": 2.813275067910712e-07, "logits/chosen": -2.379141330718994, "logits/rejected": -2.4098448753356934, "logps/chosen": -159.46533203125, "logps/rejected": -106.53160858154297, "loss": 0.751, "rewards/accuracies": 0.375, "rewards/chosen": -0.5797417163848877, "rewards/margins": -0.02314772456884384, "rewards/rejected": -0.5565939545631409, "step": 656 }, { "epoch": 0.08, "learning_rate": 2.812920751151529e-07, "logits/chosen": -2.646141529083252, "logits/rejected": -2.3955111503601074, "logps/chosen": -134.55548095703125, "logps/rejected": -196.1585235595703, "loss": 0.5017, "rewards/accuracies": 0.875, "rewards/chosen": -0.8190967440605164, "rewards/margins": 0.8932404518127441, "rewards/rejected": -1.7123371362686157, "step": 657 }, { "epoch": 0.08, "learning_rate": 2.8125664343923467e-07, "logits/chosen": -2.254502773284912, "logits/rejected": -2.3867976665496826, "logps/chosen": -225.52923583984375, "logps/rejected": -225.027587890625, "loss": 0.3895, "rewards/accuracies": 0.75, "rewards/chosen": -0.2669307589530945, "rewards/margins": 2.5856473445892334, "rewards/rejected": -2.8525781631469727, "step": 658 }, { "epoch": 0.08, "learning_rate": 2.8122121176331636e-07, "logits/chosen": -2.238013505935669, "logits/rejected": -2.355064630508423, "logps/chosen": -184.70205688476562, "logps/rejected": -166.54415893554688, "loss": 0.9496, "rewards/accuracies": 0.625, "rewards/chosen": -0.7971018552780151, "rewards/margins": -0.13034646213054657, "rewards/rejected": -0.6667553782463074, "step": 659 }, { "epoch": 0.08, "learning_rate": 2.811857800873981e-07, "logits/chosen": -2.711026906967163, "logits/rejected": -2.477163553237915, "logps/chosen": -322.17315673828125, "logps/rejected": -200.6646270751953, "loss": 0.3882, "rewards/accuracies": 0.625, "rewards/chosen": -0.3079068660736084, "rewards/margins": 1.2629361152648926, "rewards/rejected": -1.570842981338501, "step": 660 }, { "epoch": 0.08, "learning_rate": 2.8115034841147986e-07, "logits/chosen": -2.244220733642578, "logits/rejected": -2.0889992713928223, "logps/chosen": -345.2870178222656, "logps/rejected": -403.8587646484375, "loss": 0.9451, "rewards/accuracies": 0.375, "rewards/chosen": -0.9911388754844666, "rewards/margins": 0.5271368026733398, "rewards/rejected": -1.518275499343872, "step": 661 }, { "epoch": 0.08, "learning_rate": 2.811149167355616e-07, "logits/chosen": -2.150815010070801, "logits/rejected": -2.3247618675231934, "logps/chosen": -227.82418823242188, "logps/rejected": -253.78684997558594, "loss": 0.3304, "rewards/accuracies": 0.75, "rewards/chosen": -0.09931284189224243, "rewards/margins": 1.9400739669799805, "rewards/rejected": -2.039386749267578, "step": 662 }, { "epoch": 0.08, "learning_rate": 2.810794850596433e-07, "logits/chosen": -2.4930598735809326, "logits/rejected": -2.3492417335510254, "logps/chosen": -297.0482177734375, "logps/rejected": -339.7794189453125, "loss": 0.7551, "rewards/accuracies": 0.5, "rewards/chosen": -0.7139439582824707, "rewards/margins": 0.9855427742004395, "rewards/rejected": -1.6994867324829102, "step": 663 }, { "epoch": 0.08, "learning_rate": 2.8104405338372506e-07, "logits/chosen": -2.600454807281494, "logits/rejected": -2.3083202838897705, "logps/chosen": -161.7836456298828, "logps/rejected": -250.01954650878906, "loss": 0.9111, "rewards/accuracies": 0.375, "rewards/chosen": -0.9141299724578857, "rewards/margins": 0.05952081084251404, "rewards/rejected": -0.9736508131027222, "step": 664 }, { "epoch": 0.08, "learning_rate": 2.8100862170780675e-07, "logits/chosen": -2.201402425765991, "logits/rejected": -2.5956668853759766, "logps/chosen": -447.8415832519531, "logps/rejected": -284.311767578125, "loss": 0.3513, "rewards/accuracies": 0.75, "rewards/chosen": -0.7406899333000183, "rewards/margins": 1.4940874576568604, "rewards/rejected": -2.2347772121429443, "step": 665 }, { "epoch": 0.08, "learning_rate": 2.809731900318885e-07, "logits/chosen": -1.9570379257202148, "logits/rejected": -2.1581883430480957, "logps/chosen": -203.88983154296875, "logps/rejected": -204.23867797851562, "loss": 0.8223, "rewards/accuracies": 0.75, "rewards/chosen": -1.0374674797058105, "rewards/margins": 1.0673367977142334, "rewards/rejected": -2.104804277420044, "step": 666 }, { "epoch": 0.08, "learning_rate": 2.8093775835597025e-07, "logits/chosen": -2.271881103515625, "logits/rejected": -2.7551355361938477, "logps/chosen": -425.5056457519531, "logps/rejected": -253.63589477539062, "loss": 0.4484, "rewards/accuracies": 0.875, "rewards/chosen": -0.18098919093608856, "rewards/margins": 1.0286552906036377, "rewards/rejected": -1.2096445560455322, "step": 667 }, { "epoch": 0.08, "learning_rate": 2.8090232668005194e-07, "logits/chosen": -1.9986205101013184, "logits/rejected": -1.900841236114502, "logps/chosen": -413.8116760253906, "logps/rejected": -403.51251220703125, "loss": 0.537, "rewards/accuracies": 0.875, "rewards/chosen": -2.2627365589141846, "rewards/margins": 1.8818767070770264, "rewards/rejected": -4.144613265991211, "step": 668 }, { "epoch": 0.08, "learning_rate": 2.808668950041337e-07, "logits/chosen": -2.1321682929992676, "logits/rejected": -2.290189266204834, "logps/chosen": -221.2316131591797, "logps/rejected": -209.66131591796875, "loss": 0.3719, "rewards/accuracies": 0.75, "rewards/chosen": -0.26084786653518677, "rewards/margins": 1.5150394439697266, "rewards/rejected": -1.7758872509002686, "step": 669 }, { "epoch": 0.08, "learning_rate": 2.808314633282154e-07, "logits/chosen": -2.051877975463867, "logits/rejected": -2.444598913192749, "logps/chosen": -339.264404296875, "logps/rejected": -192.53775024414062, "loss": 0.2962, "rewards/accuracies": 0.875, "rewards/chosen": -0.011827416718006134, "rewards/margins": 1.426331639289856, "rewards/rejected": -1.4381592273712158, "step": 670 }, { "epoch": 0.08, "learning_rate": 2.8079603165229714e-07, "logits/chosen": -2.2946462631225586, "logits/rejected": -2.5074288845062256, "logps/chosen": -170.9038543701172, "logps/rejected": -247.75242614746094, "loss": 0.5485, "rewards/accuracies": 0.5, "rewards/chosen": -0.5765238404273987, "rewards/margins": 1.430662989616394, "rewards/rejected": -2.0071868896484375, "step": 671 }, { "epoch": 0.08, "learning_rate": 2.8076059997637883e-07, "logits/chosen": -1.8959805965423584, "logits/rejected": -2.4402034282684326, "logps/chosen": -300.51104736328125, "logps/rejected": -140.6285400390625, "loss": 0.5086, "rewards/accuracies": 0.75, "rewards/chosen": -0.3240899443626404, "rewards/margins": 0.9766297340393066, "rewards/rejected": -1.3007197380065918, "step": 672 }, { "epoch": 0.08, "learning_rate": 2.8072516830046063e-07, "logits/chosen": -2.3013556003570557, "logits/rejected": -2.4747202396392822, "logps/chosen": -241.64178466796875, "logps/rejected": -203.61398315429688, "loss": 0.7173, "rewards/accuracies": 0.5, "rewards/chosen": -0.6803563237190247, "rewards/margins": 0.10033737123012543, "rewards/rejected": -0.7806937098503113, "step": 673 }, { "epoch": 0.08, "learning_rate": 2.8068973662454233e-07, "logits/chosen": -2.445082187652588, "logits/rejected": -1.9891459941864014, "logps/chosen": -246.13426208496094, "logps/rejected": -246.265625, "loss": 0.4529, "rewards/accuracies": 0.75, "rewards/chosen": -0.3844401240348816, "rewards/margins": 1.3719998598098755, "rewards/rejected": -1.7564398050308228, "step": 674 }, { "epoch": 0.08, "learning_rate": 2.806543049486241e-07, "logits/chosen": -2.2285172939300537, "logits/rejected": -2.497166633605957, "logps/chosen": -260.9329528808594, "logps/rejected": -170.54322814941406, "loss": 1.1059, "rewards/accuracies": 0.5, "rewards/chosen": -1.7965224981307983, "rewards/margins": 0.37358638644218445, "rewards/rejected": -2.1701087951660156, "step": 675 }, { "epoch": 0.08, "learning_rate": 2.8061887327270577e-07, "logits/chosen": -2.3908731937408447, "logits/rejected": -2.4542956352233887, "logps/chosen": -181.05657958984375, "logps/rejected": -160.290771484375, "loss": 0.6856, "rewards/accuracies": 0.875, "rewards/chosen": -0.9020822048187256, "rewards/margins": 0.33048588037490845, "rewards/rejected": -1.2325680255889893, "step": 676 }, { "epoch": 0.08, "learning_rate": 2.805834415967875e-07, "logits/chosen": -1.972471833229065, "logits/rejected": -2.1061344146728516, "logps/chosen": -469.3251953125, "logps/rejected": -386.42938232421875, "loss": 0.3062, "rewards/accuracies": 0.875, "rewards/chosen": -0.9037914872169495, "rewards/margins": 1.7410629987716675, "rewards/rejected": -2.6448545455932617, "step": 677 }, { "epoch": 0.08, "learning_rate": 2.805480099208692e-07, "logits/chosen": -2.8037707805633545, "logits/rejected": -2.8312735557556152, "logps/chosen": -224.88235473632812, "logps/rejected": -197.22244262695312, "loss": 0.511, "rewards/accuracies": 0.75, "rewards/chosen": -0.5849311947822571, "rewards/margins": 2.288635492324829, "rewards/rejected": -2.8735663890838623, "step": 678 }, { "epoch": 0.08, "learning_rate": 2.8051257824495097e-07, "logits/chosen": -2.513211965560913, "logits/rejected": -2.4965219497680664, "logps/chosen": -215.69085693359375, "logps/rejected": -226.50442504882812, "loss": 0.4229, "rewards/accuracies": 0.75, "rewards/chosen": -0.7692616581916809, "rewards/margins": 1.492958426475525, "rewards/rejected": -2.2622201442718506, "step": 679 }, { "epoch": 0.08, "learning_rate": 2.804771465690327e-07, "logits/chosen": -1.805816650390625, "logits/rejected": -1.7954068183898926, "logps/chosen": -294.740966796875, "logps/rejected": -285.6097412109375, "loss": 0.603, "rewards/accuracies": 0.5, "rewards/chosen": -0.715692400932312, "rewards/margins": 0.38370200991630554, "rewards/rejected": -1.09939444065094, "step": 680 }, { "epoch": 0.08, "learning_rate": 2.804417148931144e-07, "logits/chosen": -2.2543814182281494, "logits/rejected": -2.4916348457336426, "logps/chosen": -188.1788330078125, "logps/rejected": -194.21267700195312, "loss": 0.4896, "rewards/accuracies": 0.75, "rewards/chosen": -0.4180101156234741, "rewards/margins": 1.1600381135940552, "rewards/rejected": -1.5780482292175293, "step": 681 }, { "epoch": 0.08, "learning_rate": 2.8040628321719616e-07, "logits/chosen": -2.9856488704681396, "logits/rejected": -2.9655675888061523, "logps/chosen": -347.2264099121094, "logps/rejected": -204.5126953125, "loss": 0.4166, "rewards/accuracies": 0.875, "rewards/chosen": -0.28545743227005005, "rewards/margins": 1.1703705787658691, "rewards/rejected": -1.455828070640564, "step": 682 }, { "epoch": 0.08, "learning_rate": 2.8037085154127785e-07, "logits/chosen": -2.879774332046509, "logits/rejected": -2.765890121459961, "logps/chosen": -273.8333740234375, "logps/rejected": -270.3662109375, "loss": 0.5096, "rewards/accuracies": 0.875, "rewards/chosen": -0.5706639885902405, "rewards/margins": 1.928100824356079, "rewards/rejected": -2.498764753341675, "step": 683 }, { "epoch": 0.08, "learning_rate": 2.8033541986535966e-07, "logits/chosen": -2.3434393405914307, "logits/rejected": -2.2945632934570312, "logps/chosen": -323.439453125, "logps/rejected": -308.67352294921875, "loss": 0.4146, "rewards/accuracies": 0.75, "rewards/chosen": -0.8997571468353271, "rewards/margins": 1.7598652839660645, "rewards/rejected": -2.6596224308013916, "step": 684 }, { "epoch": 0.08, "learning_rate": 2.8029998818944135e-07, "logits/chosen": -2.49277925491333, "logits/rejected": -2.3509836196899414, "logps/chosen": -311.6294860839844, "logps/rejected": -301.6763000488281, "loss": 0.6562, "rewards/accuracies": 0.625, "rewards/chosen": -0.5908518433570862, "rewards/margins": 0.5877425074577332, "rewards/rejected": -1.1785943508148193, "step": 685 }, { "epoch": 0.08, "learning_rate": 2.802645565135231e-07, "logits/chosen": -3.058283567428589, "logits/rejected": -2.9940643310546875, "logps/chosen": -185.73513793945312, "logps/rejected": -170.11874389648438, "loss": 0.2695, "rewards/accuracies": 1.0, "rewards/chosen": -0.06979950517416, "rewards/margins": 1.678873062133789, "rewards/rejected": -1.748672604560852, "step": 686 }, { "epoch": 0.08, "learning_rate": 2.802291248376048e-07, "logits/chosen": -1.7685227394104004, "logits/rejected": -2.010348320007324, "logps/chosen": -365.30889892578125, "logps/rejected": -236.9215850830078, "loss": 0.666, "rewards/accuracies": 0.625, "rewards/chosen": -0.6521315574645996, "rewards/margins": 0.23895293474197388, "rewards/rejected": -0.8910845518112183, "step": 687 }, { "epoch": 0.08, "learning_rate": 2.8019369316168654e-07, "logits/chosen": -2.598428249359131, "logits/rejected": -2.434915065765381, "logps/chosen": -132.9219970703125, "logps/rejected": -233.4220733642578, "loss": 0.6621, "rewards/accuracies": 0.625, "rewards/chosen": -0.7431386113166809, "rewards/margins": 0.6722696423530579, "rewards/rejected": -1.4154083728790283, "step": 688 }, { "epoch": 0.08, "learning_rate": 2.8015826148576824e-07, "logits/chosen": -2.456496238708496, "logits/rejected": -2.3771629333496094, "logps/chosen": -262.38531494140625, "logps/rejected": -288.2380065917969, "loss": 0.3292, "rewards/accuracies": 0.75, "rewards/chosen": -0.42245495319366455, "rewards/margins": 1.4673452377319336, "rewards/rejected": -1.8898003101348877, "step": 689 }, { "epoch": 0.08, "learning_rate": 2.8012282980985e-07, "logits/chosen": -1.4079651832580566, "logits/rejected": -2.055075168609619, "logps/chosen": -450.5494079589844, "logps/rejected": -198.27972412109375, "loss": 0.5562, "rewards/accuracies": 0.625, "rewards/chosen": -0.47688814997673035, "rewards/margins": 0.5374201536178589, "rewards/rejected": -1.014308214187622, "step": 690 }, { "epoch": 0.08, "learning_rate": 2.8008739813393174e-07, "logits/chosen": -2.591388702392578, "logits/rejected": -2.5524582862854004, "logps/chosen": -133.5125732421875, "logps/rejected": -156.23046875, "loss": 0.6672, "rewards/accuracies": 0.5, "rewards/chosen": -0.6534445285797119, "rewards/margins": 0.6154847741127014, "rewards/rejected": -1.268929362297058, "step": 691 }, { "epoch": 0.08, "learning_rate": 2.8005196645801343e-07, "logits/chosen": -2.6435165405273438, "logits/rejected": -2.7400946617126465, "logps/chosen": -207.95704650878906, "logps/rejected": -237.688720703125, "loss": 0.3476, "rewards/accuracies": 0.875, "rewards/chosen": -0.6209278702735901, "rewards/margins": 1.43307363986969, "rewards/rejected": -2.054001569747925, "step": 692 }, { "epoch": 0.08, "learning_rate": 2.800165347820952e-07, "logits/chosen": -2.5939781665802, "logits/rejected": -2.648477077484131, "logps/chosen": -134.758056640625, "logps/rejected": -232.98541259765625, "loss": 0.5362, "rewards/accuracies": 0.625, "rewards/chosen": -0.2669229507446289, "rewards/margins": 1.2116219997406006, "rewards/rejected": -1.4785449504852295, "step": 693 }, { "epoch": 0.08, "learning_rate": 2.799811031061769e-07, "logits/chosen": -2.6734209060668945, "logits/rejected": -2.3314242362976074, "logps/chosen": -145.61383056640625, "logps/rejected": -229.9839324951172, "loss": 0.4777, "rewards/accuracies": 0.875, "rewards/chosen": -0.5519739985466003, "rewards/margins": 0.7958526611328125, "rewards/rejected": -1.347826600074768, "step": 694 }, { "epoch": 0.08, "learning_rate": 2.799456714302586e-07, "logits/chosen": -2.6144814491271973, "logits/rejected": -2.888803005218506, "logps/chosen": -354.63031005859375, "logps/rejected": -235.78070068359375, "loss": 0.297, "rewards/accuracies": 1.0, "rewards/chosen": -0.06772447377443314, "rewards/margins": 1.6161918640136719, "rewards/rejected": -1.6839163303375244, "step": 695 }, { "epoch": 0.08, "learning_rate": 2.799102397543404e-07, "logits/chosen": -2.860764741897583, "logits/rejected": -2.8972458839416504, "logps/chosen": -231.79254150390625, "logps/rejected": -261.70855712890625, "loss": 0.306, "rewards/accuracies": 0.875, "rewards/chosen": -0.5158629417419434, "rewards/margins": 1.3141611814498901, "rewards/rejected": -1.8300241231918335, "step": 696 }, { "epoch": 0.08, "learning_rate": 2.798748080784221e-07, "logits/chosen": -2.0907487869262695, "logits/rejected": -1.709822416305542, "logps/chosen": -275.63763427734375, "logps/rejected": -333.2041320800781, "loss": 0.4658, "rewards/accuracies": 0.625, "rewards/chosen": -0.18836429715156555, "rewards/margins": 1.2075763940811157, "rewards/rejected": -1.3959407806396484, "step": 697 }, { "epoch": 0.08, "learning_rate": 2.798393764025038e-07, "logits/chosen": -1.857750415802002, "logits/rejected": -1.8417351245880127, "logps/chosen": -418.12078857421875, "logps/rejected": -387.6116943359375, "loss": 0.1157, "rewards/accuracies": 1.0, "rewards/chosen": 0.04824839159846306, "rewards/margins": 2.574727773666382, "rewards/rejected": -2.5264792442321777, "step": 698 }, { "epoch": 0.08, "learning_rate": 2.7980394472658557e-07, "logits/chosen": -2.4613399505615234, "logits/rejected": -2.3161675930023193, "logps/chosen": -174.2075653076172, "logps/rejected": -184.5574951171875, "loss": 0.3076, "rewards/accuracies": 0.875, "rewards/chosen": -0.7121073603630066, "rewards/margins": 2.3927316665649414, "rewards/rejected": -3.1048388481140137, "step": 699 }, { "epoch": 0.08, "learning_rate": 2.7976851305066726e-07, "logits/chosen": -2.0053398609161377, "logits/rejected": -2.2668185234069824, "logps/chosen": -323.29925537109375, "logps/rejected": -193.83499145507812, "loss": 0.7332, "rewards/accuracies": 0.5, "rewards/chosen": -0.6456852555274963, "rewards/margins": 0.4359254837036133, "rewards/rejected": -1.0816106796264648, "step": 700 }, { "epoch": 0.08, "learning_rate": 2.79733081374749e-07, "logits/chosen": -2.719074010848999, "logits/rejected": -2.7703237533569336, "logps/chosen": -283.40362548828125, "logps/rejected": -333.7422790527344, "loss": 0.2756, "rewards/accuracies": 0.875, "rewards/chosen": -0.8003613352775574, "rewards/margins": 1.5607712268829346, "rewards/rejected": -2.361132860183716, "step": 701 }, { "epoch": 0.08, "learning_rate": 2.7969764969883076e-07, "logits/chosen": -2.345130681991577, "logits/rejected": -2.1284444332122803, "logps/chosen": -412.54864501953125, "logps/rejected": -377.3162841796875, "loss": 0.3025, "rewards/accuracies": 0.75, "rewards/chosen": -0.6909152269363403, "rewards/margins": 1.6313015222549438, "rewards/rejected": -2.322216749191284, "step": 702 }, { "epoch": 0.08, "learning_rate": 2.7966221802291246e-07, "logits/chosen": -2.021716594696045, "logits/rejected": -2.0690855979919434, "logps/chosen": -338.63262939453125, "logps/rejected": -316.3762512207031, "loss": 0.7908, "rewards/accuracies": 0.5, "rewards/chosen": -0.7158768773078918, "rewards/margins": 0.5332122445106506, "rewards/rejected": -1.2490891218185425, "step": 703 }, { "epoch": 0.08, "learning_rate": 2.796267863469942e-07, "logits/chosen": -1.840503454208374, "logits/rejected": -2.06848406791687, "logps/chosen": -565.7543334960938, "logps/rejected": -361.3687744140625, "loss": 0.2301, "rewards/accuracies": 1.0, "rewards/chosen": -0.1513749063014984, "rewards/margins": 1.6841871738433838, "rewards/rejected": -1.835561990737915, "step": 704 }, { "epoch": 0.08, "learning_rate": 2.795913546710759e-07, "logits/chosen": -2.1994693279266357, "logits/rejected": -2.34114933013916, "logps/chosen": -464.0210876464844, "logps/rejected": -300.4583740234375, "loss": 1.1177, "rewards/accuracies": 0.5, "rewards/chosen": -1.5121523141860962, "rewards/margins": 0.05686815083026886, "rewards/rejected": -1.5690205097198486, "step": 705 }, { "epoch": 0.08, "learning_rate": 2.7955592299515765e-07, "logits/chosen": -2.586184024810791, "logits/rejected": -2.7434518337249756, "logps/chosen": -593.52783203125, "logps/rejected": -334.9260559082031, "loss": 0.4639, "rewards/accuracies": 0.625, "rewards/chosen": -0.7823278903961182, "rewards/margins": 1.1696197986602783, "rewards/rejected": -1.9519476890563965, "step": 706 }, { "epoch": 0.08, "learning_rate": 2.7952049131923934e-07, "logits/chosen": -2.434286594390869, "logits/rejected": -2.3348546028137207, "logps/chosen": -190.62185668945312, "logps/rejected": -220.13125610351562, "loss": 0.4043, "rewards/accuracies": 0.875, "rewards/chosen": -0.5339385271072388, "rewards/margins": 1.9141103029251099, "rewards/rejected": -2.4480488300323486, "step": 707 }, { "epoch": 0.08, "learning_rate": 2.7948505964332115e-07, "logits/chosen": -2.7167341709136963, "logits/rejected": -2.6435136795043945, "logps/chosen": -376.6390075683594, "logps/rejected": -335.6934814453125, "loss": 0.3032, "rewards/accuracies": 0.875, "rewards/chosen": -0.47025322914123535, "rewards/margins": 1.9096953868865967, "rewards/rejected": -2.379948854446411, "step": 708 }, { "epoch": 0.08, "learning_rate": 2.7944962796740284e-07, "logits/chosen": -2.669339656829834, "logits/rejected": -2.7214407920837402, "logps/chosen": -287.1677551269531, "logps/rejected": -232.77674865722656, "loss": 0.4921, "rewards/accuracies": 0.875, "rewards/chosen": -0.4828144907951355, "rewards/margins": 1.136594533920288, "rewards/rejected": -1.6194088459014893, "step": 709 }, { "epoch": 0.08, "learning_rate": 2.794141962914846e-07, "logits/chosen": -2.4019737243652344, "logits/rejected": -2.5598459243774414, "logps/chosen": -267.4624328613281, "logps/rejected": -260.18896484375, "loss": 0.4349, "rewards/accuracies": 0.75, "rewards/chosen": -0.46108418703079224, "rewards/margins": 1.21381413936615, "rewards/rejected": -1.6748985052108765, "step": 710 }, { "epoch": 0.08, "learning_rate": 2.793787646155663e-07, "logits/chosen": -2.3857693672180176, "logits/rejected": -2.0118579864501953, "logps/chosen": -236.5018310546875, "logps/rejected": -277.8370056152344, "loss": 0.4318, "rewards/accuracies": 0.875, "rewards/chosen": -0.21453361213207245, "rewards/margins": 1.2105472087860107, "rewards/rejected": -1.4250807762145996, "step": 711 }, { "epoch": 0.08, "learning_rate": 2.7934333293964803e-07, "logits/chosen": -2.282078742980957, "logits/rejected": -2.257857322692871, "logps/chosen": -327.7319641113281, "logps/rejected": -289.92364501953125, "loss": 0.39, "rewards/accuracies": 0.75, "rewards/chosen": -0.485090970993042, "rewards/margins": 1.2860441207885742, "rewards/rejected": -1.7711350917816162, "step": 712 }, { "epoch": 0.08, "learning_rate": 2.793079012637298e-07, "logits/chosen": -1.8446171283721924, "logits/rejected": -1.83958101272583, "logps/chosen": -301.27081298828125, "logps/rejected": -311.137451171875, "loss": 0.2616, "rewards/accuracies": 0.875, "rewards/chosen": -0.5433977842330933, "rewards/margins": 1.9595355987548828, "rewards/rejected": -2.5029335021972656, "step": 713 }, { "epoch": 0.08, "learning_rate": 2.792724695878115e-07, "logits/chosen": -2.216991424560547, "logits/rejected": -2.2228689193725586, "logps/chosen": -183.7284393310547, "logps/rejected": -220.22628784179688, "loss": 1.035, "rewards/accuracies": 0.625, "rewards/chosen": -1.8529211282730103, "rewards/margins": 0.05384710431098938, "rewards/rejected": -1.9067683219909668, "step": 714 }, { "epoch": 0.08, "learning_rate": 2.7923703791189323e-07, "logits/chosen": -2.1335768699645996, "logits/rejected": -2.0502963066101074, "logps/chosen": -429.21673583984375, "logps/rejected": -377.0115966796875, "loss": 0.6808, "rewards/accuracies": 0.5, "rewards/chosen": -0.3235827684402466, "rewards/margins": 1.298314094543457, "rewards/rejected": -1.621896743774414, "step": 715 }, { "epoch": 0.08, "learning_rate": 2.792016062359749e-07, "logits/chosen": -2.2320785522460938, "logits/rejected": -2.269399642944336, "logps/chosen": -235.19699096679688, "logps/rejected": -287.6104736328125, "loss": 0.6823, "rewards/accuracies": 0.625, "rewards/chosen": -0.8427947759628296, "rewards/margins": 1.0084350109100342, "rewards/rejected": -1.8512297868728638, "step": 716 }, { "epoch": 0.08, "learning_rate": 2.7916617456005667e-07, "logits/chosen": -2.188983917236328, "logits/rejected": -2.080789089202881, "logps/chosen": -269.00555419921875, "logps/rejected": -257.24346923828125, "loss": 0.4682, "rewards/accuracies": 0.75, "rewards/chosen": -0.31422367691993713, "rewards/margins": 1.3701045513153076, "rewards/rejected": -1.6843281984329224, "step": 717 }, { "epoch": 0.08, "learning_rate": 2.7913074288413837e-07, "logits/chosen": -2.17915678024292, "logits/rejected": -2.2116074562072754, "logps/chosen": -291.34039306640625, "logps/rejected": -225.12042236328125, "loss": 0.5031, "rewards/accuracies": 0.625, "rewards/chosen": -0.23856277763843536, "rewards/margins": 0.7490602731704712, "rewards/rejected": -0.9876230955123901, "step": 718 }, { "epoch": 0.08, "learning_rate": 2.7909531120822017e-07, "logits/chosen": -2.093573570251465, "logits/rejected": -2.3347115516662598, "logps/chosen": -328.695068359375, "logps/rejected": -300.95806884765625, "loss": 0.5753, "rewards/accuracies": 0.75, "rewards/chosen": -0.9026257991790771, "rewards/margins": 1.1759573221206665, "rewards/rejected": -2.078583240509033, "step": 719 }, { "epoch": 0.08, "learning_rate": 2.7905987953230186e-07, "logits/chosen": -2.8407208919525146, "logits/rejected": -2.7154998779296875, "logps/chosen": -169.1520233154297, "logps/rejected": -226.8980712890625, "loss": 0.1991, "rewards/accuracies": 0.875, "rewards/chosen": -0.39350438117980957, "rewards/margins": 2.5129528045654297, "rewards/rejected": -2.9064574241638184, "step": 720 }, { "epoch": 0.08, "learning_rate": 2.790244478563836e-07, "logits/chosen": -2.3318159580230713, "logits/rejected": -2.4468765258789062, "logps/chosen": -397.9857177734375, "logps/rejected": -306.89410400390625, "loss": 0.3031, "rewards/accuracies": 0.75, "rewards/chosen": -0.3837026357650757, "rewards/margins": 1.9383556842803955, "rewards/rejected": -2.3220582008361816, "step": 721 }, { "epoch": 0.08, "learning_rate": 2.789890161804653e-07, "logits/chosen": -1.924243688583374, "logits/rejected": -1.798012375831604, "logps/chosen": -187.9967498779297, "logps/rejected": -194.39389038085938, "loss": 0.6395, "rewards/accuracies": 0.5, "rewards/chosen": -0.5219980478286743, "rewards/margins": 0.6820647716522217, "rewards/rejected": -1.2040629386901855, "step": 722 }, { "epoch": 0.08, "learning_rate": 2.7895358450454706e-07, "logits/chosen": -2.528423547744751, "logits/rejected": -2.359203338623047, "logps/chosen": -275.095458984375, "logps/rejected": -277.7443542480469, "loss": 0.4522, "rewards/accuracies": 0.75, "rewards/chosen": -0.4424862265586853, "rewards/margins": 1.1355905532836914, "rewards/rejected": -1.5780766010284424, "step": 723 }, { "epoch": 0.08, "learning_rate": 2.789181528286288e-07, "logits/chosen": -2.9008686542510986, "logits/rejected": -2.8426506519317627, "logps/chosen": -134.49192810058594, "logps/rejected": -131.39321899414062, "loss": 0.3508, "rewards/accuracies": 0.875, "rewards/chosen": -0.07774001359939575, "rewards/margins": 1.6229171752929688, "rewards/rejected": -1.7006571292877197, "step": 724 }, { "epoch": 0.08, "learning_rate": 2.788827211527105e-07, "logits/chosen": -2.314517021179199, "logits/rejected": -2.372591733932495, "logps/chosen": -402.5282287597656, "logps/rejected": -233.35751342773438, "loss": 0.3686, "rewards/accuracies": 0.75, "rewards/chosen": 0.03741845488548279, "rewards/margins": 1.3896923065185547, "rewards/rejected": -1.352273941040039, "step": 725 }, { "epoch": 0.08, "learning_rate": 2.7884728947679225e-07, "logits/chosen": -2.0849499702453613, "logits/rejected": -2.025916814804077, "logps/chosen": -344.33416748046875, "logps/rejected": -257.15655517578125, "loss": 0.2946, "rewards/accuracies": 0.875, "rewards/chosen": -0.6020482778549194, "rewards/margins": 1.996051549911499, "rewards/rejected": -2.598099708557129, "step": 726 }, { "epoch": 0.08, "learning_rate": 2.7881185780087395e-07, "logits/chosen": -2.5358870029449463, "logits/rejected": -2.5003550052642822, "logps/chosen": -203.62911987304688, "logps/rejected": -220.8814697265625, "loss": 0.6079, "rewards/accuracies": 0.625, "rewards/chosen": 0.02344139665365219, "rewards/margins": 0.6157550811767578, "rewards/rejected": -0.5923135876655579, "step": 727 }, { "epoch": 0.08, "learning_rate": 2.787764261249557e-07, "logits/chosen": -2.0504770278930664, "logits/rejected": -1.8686686754226685, "logps/chosen": -363.89306640625, "logps/rejected": -502.861328125, "loss": 0.6617, "rewards/accuracies": 0.375, "rewards/chosen": -0.6800999045372009, "rewards/margins": 0.7529337406158447, "rewards/rejected": -1.4330337047576904, "step": 728 }, { "epoch": 0.08, "learning_rate": 2.787409944490374e-07, "logits/chosen": -2.374544620513916, "logits/rejected": -2.033883571624756, "logps/chosen": -146.85714721679688, "logps/rejected": -300.08538818359375, "loss": 0.3987, "rewards/accuracies": 0.625, "rewards/chosen": -0.5079129934310913, "rewards/margins": 2.0036351680755615, "rewards/rejected": -2.5115480422973633, "step": 729 }, { "epoch": 0.08, "learning_rate": 2.7870556277311914e-07, "logits/chosen": -2.5551064014434814, "logits/rejected": -2.3524866104125977, "logps/chosen": -176.80889892578125, "logps/rejected": -254.78125, "loss": 0.2503, "rewards/accuracies": 1.0, "rewards/chosen": 0.06395383179187775, "rewards/margins": 1.668338418006897, "rewards/rejected": -1.6043845415115356, "step": 730 }, { "epoch": 0.09, "learning_rate": 2.786701310972009e-07, "logits/chosen": -2.0015459060668945, "logits/rejected": -2.36329984664917, "logps/chosen": -489.8224182128906, "logps/rejected": -320.53790283203125, "loss": 0.4448, "rewards/accuracies": 0.875, "rewards/chosen": -0.5152555108070374, "rewards/margins": 1.4673027992248535, "rewards/rejected": -1.982558250427246, "step": 731 }, { "epoch": 0.09, "learning_rate": 2.7863469942128264e-07, "logits/chosen": -2.347792625427246, "logits/rejected": -2.2414045333862305, "logps/chosen": -403.95318603515625, "logps/rejected": -478.7794494628906, "loss": 0.7786, "rewards/accuracies": 0.5, "rewards/chosen": -0.49405935406684875, "rewards/margins": 0.552676796913147, "rewards/rejected": -1.046736240386963, "step": 732 }, { "epoch": 0.09, "learning_rate": 2.7859926774536433e-07, "logits/chosen": -2.3601300716400146, "logits/rejected": -2.4505324363708496, "logps/chosen": -492.1108093261719, "logps/rejected": -448.75201416015625, "loss": 0.5489, "rewards/accuracies": 0.625, "rewards/chosen": -0.5677691698074341, "rewards/margins": 0.538233757019043, "rewards/rejected": -1.1060028076171875, "step": 733 }, { "epoch": 0.09, "learning_rate": 2.785638360694461e-07, "logits/chosen": -2.652724266052246, "logits/rejected": -2.379751443862915, "logps/chosen": -149.8656005859375, "logps/rejected": -295.765869140625, "loss": 0.161, "rewards/accuracies": 1.0, "rewards/chosen": -0.4850791096687317, "rewards/margins": 2.219998836517334, "rewards/rejected": -2.705078125, "step": 734 }, { "epoch": 0.09, "learning_rate": 2.7852840439352783e-07, "logits/chosen": -2.695128917694092, "logits/rejected": -2.4105587005615234, "logps/chosen": -238.45635986328125, "logps/rejected": -201.91998291015625, "loss": 0.4138, "rewards/accuracies": 0.75, "rewards/chosen": -0.3905141353607178, "rewards/margins": 0.9921962022781372, "rewards/rejected": -1.3827104568481445, "step": 735 }, { "epoch": 0.09, "learning_rate": 2.784929727176095e-07, "logits/chosen": -2.409235954284668, "logits/rejected": -2.511838912963867, "logps/chosen": -246.9296417236328, "logps/rejected": -258.2899169921875, "loss": 0.3567, "rewards/accuracies": 0.875, "rewards/chosen": -0.1772826761007309, "rewards/margins": 1.1907989978790283, "rewards/rejected": -1.3680816888809204, "step": 736 }, { "epoch": 0.09, "learning_rate": 2.7845754104169127e-07, "logits/chosen": -1.7174152135849, "logits/rejected": -1.937434434890747, "logps/chosen": -467.7893371582031, "logps/rejected": -287.3309020996094, "loss": 0.3511, "rewards/accuracies": 0.875, "rewards/chosen": -0.1859460175037384, "rewards/margins": 1.3349452018737793, "rewards/rejected": -1.5208911895751953, "step": 737 }, { "epoch": 0.09, "learning_rate": 2.7842210936577297e-07, "logits/chosen": -2.61128306388855, "logits/rejected": -2.646527051925659, "logps/chosen": -242.2703094482422, "logps/rejected": -256.3012390136719, "loss": 0.4762, "rewards/accuracies": 0.75, "rewards/chosen": -0.5834391117095947, "rewards/margins": 0.6834787726402283, "rewards/rejected": -1.2669178247451782, "step": 738 }, { "epoch": 0.09, "learning_rate": 2.783866776898547e-07, "logits/chosen": -1.547605276107788, "logits/rejected": -1.8730759620666504, "logps/chosen": -483.8611145019531, "logps/rejected": -262.24493408203125, "loss": 0.4126, "rewards/accuracies": 0.75, "rewards/chosen": -0.30781662464141846, "rewards/margins": 1.2256001234054565, "rewards/rejected": -1.533416748046875, "step": 739 }, { "epoch": 0.09, "learning_rate": 2.783512460139364e-07, "logits/chosen": -2.767202377319336, "logits/rejected": -2.5742387771606445, "logps/chosen": -243.34011840820312, "logps/rejected": -310.51641845703125, "loss": 0.2148, "rewards/accuracies": 1.0, "rewards/chosen": -0.1371321678161621, "rewards/margins": 1.861680030822754, "rewards/rejected": -1.998812198638916, "step": 740 }, { "epoch": 0.09, "learning_rate": 2.7831581433801816e-07, "logits/chosen": -1.9457716941833496, "logits/rejected": -2.2742390632629395, "logps/chosen": -501.174072265625, "logps/rejected": -301.8258056640625, "loss": 0.2804, "rewards/accuracies": 0.875, "rewards/chosen": -0.9595270156860352, "rewards/margins": 1.911973237991333, "rewards/rejected": -2.871500253677368, "step": 741 }, { "epoch": 0.09, "learning_rate": 2.782803826620999e-07, "logits/chosen": -1.4854416847229004, "logits/rejected": -2.0714879035949707, "logps/chosen": -451.8368225097656, "logps/rejected": -287.3528137207031, "loss": 0.624, "rewards/accuracies": 0.5, "rewards/chosen": -1.311759352684021, "rewards/margins": 0.4674542546272278, "rewards/rejected": -1.779213547706604, "step": 742 }, { "epoch": 0.09, "learning_rate": 2.7824495098618166e-07, "logits/chosen": -2.3233892917633057, "logits/rejected": -2.397526741027832, "logps/chosen": -415.1954040527344, "logps/rejected": -445.427734375, "loss": 0.1793, "rewards/accuracies": 1.0, "rewards/chosen": 0.1484445333480835, "rewards/margins": 2.864758014678955, "rewards/rejected": -2.716313362121582, "step": 743 }, { "epoch": 0.09, "learning_rate": 2.7820951931026335e-07, "logits/chosen": -2.107908010482788, "logits/rejected": -2.0752065181732178, "logps/chosen": -237.20498657226562, "logps/rejected": -181.3135223388672, "loss": 1.4602, "rewards/accuracies": 0.875, "rewards/chosen": -1.2329823970794678, "rewards/margins": -0.00503915548324585, "rewards/rejected": -1.2279433012008667, "step": 744 }, { "epoch": 0.09, "learning_rate": 2.781740876343451e-07, "logits/chosen": -2.3671107292175293, "logits/rejected": -2.007723093032837, "logps/chosen": -198.54006958007812, "logps/rejected": -294.02581787109375, "loss": 0.4608, "rewards/accuracies": 0.875, "rewards/chosen": -0.5045974850654602, "rewards/margins": 0.9476430416107178, "rewards/rejected": -1.4522404670715332, "step": 745 }, { "epoch": 0.09, "learning_rate": 2.7813865595842685e-07, "logits/chosen": -1.6849923133850098, "logits/rejected": -1.9355881214141846, "logps/chosen": -273.6889953613281, "logps/rejected": -263.3406982421875, "loss": 0.6669, "rewards/accuracies": 0.625, "rewards/chosen": -1.3472254276275635, "rewards/margins": 1.2660205364227295, "rewards/rejected": -2.613245964050293, "step": 746 }, { "epoch": 0.09, "learning_rate": 2.7810322428250855e-07, "logits/chosen": -2.4438881874084473, "logits/rejected": -2.0594675540924072, "logps/chosen": -191.34117126464844, "logps/rejected": -248.95152282714844, "loss": 1.7044, "rewards/accuracies": 0.75, "rewards/chosen": -2.3999998569488525, "rewards/margins": -0.08021494746208191, "rewards/rejected": -2.3197848796844482, "step": 747 }, { "epoch": 0.09, "learning_rate": 2.780677926065903e-07, "logits/chosen": -1.3684498071670532, "logits/rejected": -1.8824512958526611, "logps/chosen": -457.8465881347656, "logps/rejected": -249.6936492919922, "loss": 0.7575, "rewards/accuracies": 0.375, "rewards/chosen": -0.932547390460968, "rewards/margins": 0.055296219885349274, "rewards/rejected": -0.9878436326980591, "step": 748 }, { "epoch": 0.09, "learning_rate": 2.78032360930672e-07, "logits/chosen": -2.6013617515563965, "logits/rejected": -2.5035312175750732, "logps/chosen": -218.39523315429688, "logps/rejected": -292.25823974609375, "loss": 0.2676, "rewards/accuracies": 0.875, "rewards/chosen": 0.12753260135650635, "rewards/margins": 3.019993543624878, "rewards/rejected": -2.892460823059082, "step": 749 }, { "epoch": 0.09, "learning_rate": 2.7799692925475374e-07, "logits/chosen": -1.5520503520965576, "logits/rejected": -2.206054210662842, "logps/chosen": -348.1911926269531, "logps/rejected": -161.59873962402344, "loss": 0.6764, "rewards/accuracies": 0.625, "rewards/chosen": -0.5584234595298767, "rewards/margins": 0.31963321566581726, "rewards/rejected": -0.8780567049980164, "step": 750 }, { "epoch": 0.09, "learning_rate": 2.7796149757883544e-07, "logits/chosen": -2.3355801105499268, "logits/rejected": -2.4781787395477295, "logps/chosen": -347.6722106933594, "logps/rejected": -282.9828186035156, "loss": 0.5541, "rewards/accuracies": 0.5, "rewards/chosen": -0.31582963466644287, "rewards/margins": 0.6623234748840332, "rewards/rejected": -0.9781531095504761, "step": 751 }, { "epoch": 0.09, "learning_rate": 2.779260659029172e-07, "logits/chosen": -2.9521708488464355, "logits/rejected": -2.682919502258301, "logps/chosen": -173.8658447265625, "logps/rejected": -238.75140380859375, "loss": 0.237, "rewards/accuracies": 1.0, "rewards/chosen": -0.27960172295570374, "rewards/margins": 2.2849879264831543, "rewards/rejected": -2.564589500427246, "step": 752 }, { "epoch": 0.09, "learning_rate": 2.7789063422699893e-07, "logits/chosen": -2.027177333831787, "logits/rejected": -2.2728569507598877, "logps/chosen": -333.5521545410156, "logps/rejected": -225.32772827148438, "loss": 0.757, "rewards/accuracies": 0.625, "rewards/chosen": -0.975767970085144, "rewards/margins": 0.1552283614873886, "rewards/rejected": -1.1309963464736938, "step": 753 }, { "epoch": 0.09, "learning_rate": 2.778552025510807e-07, "logits/chosen": -2.277604579925537, "logits/rejected": -2.258746862411499, "logps/chosen": -161.8819580078125, "logps/rejected": -165.3174285888672, "loss": 0.718, "rewards/accuracies": 0.75, "rewards/chosen": -0.7111003398895264, "rewards/margins": 0.17237190902233124, "rewards/rejected": -0.883472204208374, "step": 754 }, { "epoch": 0.09, "learning_rate": 2.778197708751624e-07, "logits/chosen": -1.8956842422485352, "logits/rejected": -1.7877538204193115, "logps/chosen": -289.3042907714844, "logps/rejected": -367.7824401855469, "loss": 0.2828, "rewards/accuracies": 0.875, "rewards/chosen": -0.821601152420044, "rewards/margins": 1.4629815816879272, "rewards/rejected": -2.2845826148986816, "step": 755 }, { "epoch": 0.09, "learning_rate": 2.777843391992441e-07, "logits/chosen": -2.3617255687713623, "logits/rejected": -2.436927556991577, "logps/chosen": -268.787353515625, "logps/rejected": -225.15150451660156, "loss": 0.6219, "rewards/accuracies": 0.75, "rewards/chosen": -0.7100657224655151, "rewards/margins": 0.6736667156219482, "rewards/rejected": -1.3837324380874634, "step": 756 }, { "epoch": 0.09, "learning_rate": 2.777489075233259e-07, "logits/chosen": -2.4909229278564453, "logits/rejected": -2.2607004642486572, "logps/chosen": -84.35908508300781, "logps/rejected": -181.00213623046875, "loss": 0.3527, "rewards/accuracies": 0.875, "rewards/chosen": -0.6459870934486389, "rewards/margins": 1.2605112791061401, "rewards/rejected": -1.9064984321594238, "step": 757 }, { "epoch": 0.09, "learning_rate": 2.7771347584740757e-07, "logits/chosen": -2.337822198867798, "logits/rejected": -2.4811174869537354, "logps/chosen": -336.9784240722656, "logps/rejected": -266.4892883300781, "loss": 0.5226, "rewards/accuracies": 0.625, "rewards/chosen": -0.7801718711853027, "rewards/margins": 0.5696690678596497, "rewards/rejected": -1.3498408794403076, "step": 758 }, { "epoch": 0.09, "learning_rate": 2.776780441714893e-07, "logits/chosen": -2.4789628982543945, "logits/rejected": -2.607809543609619, "logps/chosen": -115.53268432617188, "logps/rejected": -136.46316528320312, "loss": 0.3211, "rewards/accuracies": 0.875, "rewards/chosen": 0.11838234961032867, "rewards/margins": 1.2281792163848877, "rewards/rejected": -1.1097968816757202, "step": 759 }, { "epoch": 0.09, "learning_rate": 2.77642612495571e-07, "logits/chosen": -1.9995639324188232, "logits/rejected": -2.273143768310547, "logps/chosen": -286.893798828125, "logps/rejected": -187.37335205078125, "loss": 0.6047, "rewards/accuracies": 0.75, "rewards/chosen": -0.40587806701660156, "rewards/margins": 1.0974023342132568, "rewards/rejected": -1.5032804012298584, "step": 760 }, { "epoch": 0.09, "learning_rate": 2.7760718081965276e-07, "logits/chosen": -1.9202815294265747, "logits/rejected": -1.9412760734558105, "logps/chosen": -381.6779479980469, "logps/rejected": -389.0871276855469, "loss": 0.267, "rewards/accuracies": 0.875, "rewards/chosen": -0.576455295085907, "rewards/margins": 1.6500946283340454, "rewards/rejected": -2.2265498638153076, "step": 761 }, { "epoch": 0.09, "learning_rate": 2.7757174914373446e-07, "logits/chosen": -2.048334836959839, "logits/rejected": -2.059481382369995, "logps/chosen": -163.4219512939453, "logps/rejected": -160.32611083984375, "loss": 0.7496, "rewards/accuracies": 0.625, "rewards/chosen": -0.7007333636283875, "rewards/margins": 0.2021094560623169, "rewards/rejected": -0.9028427600860596, "step": 762 }, { "epoch": 0.09, "learning_rate": 2.775363174678162e-07, "logits/chosen": -2.343045473098755, "logits/rejected": -1.9673285484313965, "logps/chosen": -300.00970458984375, "logps/rejected": -433.0460205078125, "loss": 0.299, "rewards/accuracies": 0.875, "rewards/chosen": -0.3984956443309784, "rewards/margins": 2.5965304374694824, "rewards/rejected": -2.995026111602783, "step": 763 }, { "epoch": 0.09, "learning_rate": 2.7750088579189796e-07, "logits/chosen": -2.638866424560547, "logits/rejected": -2.860140085220337, "logps/chosen": -325.6002197265625, "logps/rejected": -262.08892822265625, "loss": 0.3829, "rewards/accuracies": 1.0, "rewards/chosen": -1.1484016180038452, "rewards/margins": 1.3936734199523926, "rewards/rejected": -2.5420749187469482, "step": 764 }, { "epoch": 0.09, "learning_rate": 2.7746545411597965e-07, "logits/chosen": -2.7840490341186523, "logits/rejected": -2.6404807567596436, "logps/chosen": -100.87081909179688, "logps/rejected": -264.884765625, "loss": 0.4451, "rewards/accuracies": 0.625, "rewards/chosen": -0.7375617027282715, "rewards/margins": 1.909604549407959, "rewards/rejected": -2.6471662521362305, "step": 765 }, { "epoch": 0.09, "learning_rate": 2.774300224400614e-07, "logits/chosen": -2.1366701126098633, "logits/rejected": -2.3100757598876953, "logps/chosen": -411.67822265625, "logps/rejected": -304.1395263671875, "loss": 0.2685, "rewards/accuracies": 0.875, "rewards/chosen": -0.563908576965332, "rewards/margins": 1.5846513509750366, "rewards/rejected": -2.148560047149658, "step": 766 }, { "epoch": 0.09, "learning_rate": 2.7739459076414315e-07, "logits/chosen": -2.471198558807373, "logits/rejected": -2.5612523555755615, "logps/chosen": -307.8586730957031, "logps/rejected": -304.1485595703125, "loss": 0.3437, "rewards/accuracies": 0.75, "rewards/chosen": -0.26797568798065186, "rewards/margins": 5.004037857055664, "rewards/rejected": -5.272013187408447, "step": 767 }, { "epoch": 0.09, "learning_rate": 2.7735915908822484e-07, "logits/chosen": -2.29203462600708, "logits/rejected": -2.4890825748443604, "logps/chosen": -374.6341857910156, "logps/rejected": -294.659423828125, "loss": 0.3593, "rewards/accuracies": 0.875, "rewards/chosen": -0.005583435297012329, "rewards/margins": 1.8434807062149048, "rewards/rejected": -1.8490641117095947, "step": 768 }, { "epoch": 0.09, "learning_rate": 2.773237274123066e-07, "logits/chosen": -1.830995798110962, "logits/rejected": -1.9686992168426514, "logps/chosen": -439.869140625, "logps/rejected": -289.37225341796875, "loss": 0.5706, "rewards/accuracies": 0.75, "rewards/chosen": -0.5556020736694336, "rewards/margins": 1.1723510026931763, "rewards/rejected": -1.7279530763626099, "step": 769 }, { "epoch": 0.09, "learning_rate": 2.7728829573638834e-07, "logits/chosen": -2.4538049697875977, "logits/rejected": -2.726793050765991, "logps/chosen": -261.624755859375, "logps/rejected": -196.87759399414062, "loss": 0.2587, "rewards/accuracies": 0.875, "rewards/chosen": 0.03366359323263168, "rewards/margins": 1.9900729656219482, "rewards/rejected": -1.956409215927124, "step": 770 }, { "epoch": 0.09, "learning_rate": 2.7725286406047004e-07, "logits/chosen": -2.227140426635742, "logits/rejected": -2.5036261081695557, "logps/chosen": -346.79925537109375, "logps/rejected": -195.57916259765625, "loss": 0.4527, "rewards/accuracies": 0.75, "rewards/chosen": -0.42519277334213257, "rewards/margins": 0.9860503673553467, "rewards/rejected": -1.411243200302124, "step": 771 }, { "epoch": 0.09, "learning_rate": 2.772174323845518e-07, "logits/chosen": -2.4367258548736572, "logits/rejected": -2.472235918045044, "logps/chosen": -252.1932373046875, "logps/rejected": -319.0783386230469, "loss": 0.7871, "rewards/accuracies": 0.625, "rewards/chosen": -1.5913159847259521, "rewards/margins": 0.7784263491630554, "rewards/rejected": -2.3697423934936523, "step": 772 }, { "epoch": 0.09, "learning_rate": 2.771820007086335e-07, "logits/chosen": -2.182321310043335, "logits/rejected": -2.109429121017456, "logps/chosen": -230.63787841796875, "logps/rejected": -213.37655639648438, "loss": 0.2286, "rewards/accuracies": 1.0, "rewards/chosen": -0.2099878340959549, "rewards/margins": 3.0668489933013916, "rewards/rejected": -3.27683687210083, "step": 773 }, { "epoch": 0.09, "learning_rate": 2.7714656903271523e-07, "logits/chosen": -2.0221927165985107, "logits/rejected": -2.1753525733947754, "logps/chosen": -232.27548217773438, "logps/rejected": -201.60400390625, "loss": 0.484, "rewards/accuracies": 0.875, "rewards/chosen": -0.48170843720436096, "rewards/margins": 1.1055957078933716, "rewards/rejected": -1.5873041152954102, "step": 774 }, { "epoch": 0.09, "learning_rate": 2.77111137356797e-07, "logits/chosen": -2.184272527694702, "logits/rejected": -2.142824172973633, "logps/chosen": -158.35577392578125, "logps/rejected": -155.5572509765625, "loss": 0.7086, "rewards/accuracies": 0.625, "rewards/chosen": -0.9336909651756287, "rewards/margins": 0.2074240744113922, "rewards/rejected": -1.1411150693893433, "step": 775 }, { "epoch": 0.09, "learning_rate": 2.770757056808787e-07, "logits/chosen": -2.5685808658599854, "logits/rejected": -2.6487743854522705, "logps/chosen": -544.484619140625, "logps/rejected": -298.53662109375, "loss": 0.341, "rewards/accuracies": 0.875, "rewards/chosen": -0.4809766709804535, "rewards/margins": 1.0506956577301025, "rewards/rejected": -1.5316723585128784, "step": 776 }, { "epoch": 0.09, "learning_rate": 2.770402740049604e-07, "logits/chosen": -2.2997710704803467, "logits/rejected": -2.443389654159546, "logps/chosen": -234.91281127929688, "logps/rejected": -327.29913330078125, "loss": 0.4065, "rewards/accuracies": 0.875, "rewards/chosen": -0.6117860078811646, "rewards/margins": 1.0197553634643555, "rewards/rejected": -1.6315414905548096, "step": 777 }, { "epoch": 0.09, "learning_rate": 2.7700484232904217e-07, "logits/chosen": -2.995149850845337, "logits/rejected": -3.0570287704467773, "logps/chosen": -168.66322326660156, "logps/rejected": -243.2884979248047, "loss": 0.6478, "rewards/accuracies": 0.625, "rewards/chosen": -0.9907075762748718, "rewards/margins": 1.7721009254455566, "rewards/rejected": -2.7628087997436523, "step": 778 }, { "epoch": 0.09, "learning_rate": 2.7696941065312387e-07, "logits/chosen": -2.3316986560821533, "logits/rejected": -2.5488295555114746, "logps/chosen": -252.421142578125, "logps/rejected": -265.6931457519531, "loss": 0.4779, "rewards/accuracies": 0.625, "rewards/chosen": -0.2507890462875366, "rewards/margins": 1.328528642654419, "rewards/rejected": -1.579317569732666, "step": 779 }, { "epoch": 0.09, "learning_rate": 2.769339789772056e-07, "logits/chosen": -2.075249195098877, "logits/rejected": -2.1930127143859863, "logps/chosen": -263.2025146484375, "logps/rejected": -127.05046844482422, "loss": 0.6815, "rewards/accuracies": 0.5, "rewards/chosen": -0.3834928870201111, "rewards/margins": 0.12096354365348816, "rewards/rejected": -0.5044564008712769, "step": 780 }, { "epoch": 0.09, "learning_rate": 2.7689854730128736e-07, "logits/chosen": -1.9515782594680786, "logits/rejected": -2.0021324157714844, "logps/chosen": -267.23907470703125, "logps/rejected": -254.88040161132812, "loss": 1.2877, "rewards/accuracies": 0.75, "rewards/chosen": -1.5350732803344727, "rewards/margins": 0.027923956513404846, "rewards/rejected": -1.5629972219467163, "step": 781 }, { "epoch": 0.09, "learning_rate": 2.7686311562536906e-07, "logits/chosen": -2.95241641998291, "logits/rejected": -2.967081308364868, "logps/chosen": -248.35562133789062, "logps/rejected": -201.17227172851562, "loss": 0.6809, "rewards/accuracies": 0.625, "rewards/chosen": -0.7876554131507874, "rewards/margins": 0.456155002117157, "rewards/rejected": -1.2438104152679443, "step": 782 }, { "epoch": 0.09, "learning_rate": 2.768276839494508e-07, "logits/chosen": -2.4732584953308105, "logits/rejected": -2.427335262298584, "logps/chosen": -364.05792236328125, "logps/rejected": -264.27178955078125, "loss": 0.3338, "rewards/accuracies": 0.875, "rewards/chosen": -0.3528885245323181, "rewards/margins": 1.496238112449646, "rewards/rejected": -1.8491265773773193, "step": 783 }, { "epoch": 0.09, "learning_rate": 2.767922522735325e-07, "logits/chosen": -2.363307476043701, "logits/rejected": -2.545637845993042, "logps/chosen": -347.4714660644531, "logps/rejected": -372.4090576171875, "loss": 1.3909, "rewards/accuracies": 0.5, "rewards/chosen": -1.3513449430465698, "rewards/margins": -0.5820550918579102, "rewards/rejected": -0.7692897319793701, "step": 784 }, { "epoch": 0.09, "learning_rate": 2.7675682059761425e-07, "logits/chosen": -2.2637083530426025, "logits/rejected": -2.5251107215881348, "logps/chosen": -331.9575500488281, "logps/rejected": -218.31024169921875, "loss": 0.7148, "rewards/accuracies": 0.75, "rewards/chosen": -0.7790206670761108, "rewards/margins": 0.4762803316116333, "rewards/rejected": -1.2553009986877441, "step": 785 }, { "epoch": 0.09, "learning_rate": 2.76721388921696e-07, "logits/chosen": -2.2073974609375, "logits/rejected": -2.0144035816192627, "logps/chosen": -146.69845581054688, "logps/rejected": -170.0240478515625, "loss": 0.8367, "rewards/accuracies": 0.625, "rewards/chosen": -0.7718556523323059, "rewards/margins": 0.18709015846252441, "rewards/rejected": -0.9589458107948303, "step": 786 }, { "epoch": 0.09, "learning_rate": 2.766859572457777e-07, "logits/chosen": -2.3679733276367188, "logits/rejected": -2.3844170570373535, "logps/chosen": -225.81332397460938, "logps/rejected": -206.98191833496094, "loss": 0.5774, "rewards/accuracies": 0.5, "rewards/chosen": -0.6836738586425781, "rewards/margins": 0.5918539762496948, "rewards/rejected": -1.2755277156829834, "step": 787 }, { "epoch": 0.09, "learning_rate": 2.7665052556985945e-07, "logits/chosen": -2.55051326751709, "logits/rejected": -2.6735756397247314, "logps/chosen": -209.61671447753906, "logps/rejected": -279.142578125, "loss": 0.2528, "rewards/accuracies": 0.875, "rewards/chosen": -0.45962581038475037, "rewards/margins": 2.245039463043213, "rewards/rejected": -2.704665184020996, "step": 788 }, { "epoch": 0.09, "learning_rate": 2.766150938939412e-07, "logits/chosen": -2.55784273147583, "logits/rejected": -2.3539445400238037, "logps/chosen": -117.7677230834961, "logps/rejected": -215.36190795898438, "loss": 0.2899, "rewards/accuracies": 0.875, "rewards/chosen": 0.1559140384197235, "rewards/margins": 1.5776423215866089, "rewards/rejected": -1.4217283725738525, "step": 789 }, { "epoch": 0.09, "learning_rate": 2.765796622180229e-07, "logits/chosen": -2.49919056892395, "logits/rejected": -2.1292238235473633, "logps/chosen": -186.37905883789062, "logps/rejected": -212.44821166992188, "loss": 0.4155, "rewards/accuracies": 0.75, "rewards/chosen": -0.6736366748809814, "rewards/margins": 1.3675307035446167, "rewards/rejected": -2.0411672592163086, "step": 790 }, { "epoch": 0.09, "learning_rate": 2.7654423054210464e-07, "logits/chosen": -2.3219194412231445, "logits/rejected": -2.026634693145752, "logps/chosen": -202.41001892089844, "logps/rejected": -380.6002502441406, "loss": 0.4469, "rewards/accuracies": 0.75, "rewards/chosen": -0.47527679800987244, "rewards/margins": 0.9600129127502441, "rewards/rejected": -1.435289740562439, "step": 791 }, { "epoch": 0.09, "learning_rate": 2.765087988661864e-07, "logits/chosen": -2.4608097076416016, "logits/rejected": -2.4931468963623047, "logps/chosen": -262.5205993652344, "logps/rejected": -265.9735412597656, "loss": 0.3976, "rewards/accuracies": 0.875, "rewards/chosen": -0.3391968011856079, "rewards/margins": 1.0428065061569214, "rewards/rejected": -1.3820033073425293, "step": 792 }, { "epoch": 0.09, "learning_rate": 2.764733671902681e-07, "logits/chosen": -1.8869818449020386, "logits/rejected": -2.116821527481079, "logps/chosen": -379.5481262207031, "logps/rejected": -283.2991027832031, "loss": 0.3781, "rewards/accuracies": 0.875, "rewards/chosen": -0.28465360403060913, "rewards/margins": 1.3219412565231323, "rewards/rejected": -1.6065948009490967, "step": 793 }, { "epoch": 0.09, "learning_rate": 2.7643793551434983e-07, "logits/chosen": -2.203321933746338, "logits/rejected": -2.240670680999756, "logps/chosen": -323.3741149902344, "logps/rejected": -466.890625, "loss": 0.2431, "rewards/accuracies": 0.875, "rewards/chosen": -1.1400532722473145, "rewards/margins": 1.7018001079559326, "rewards/rejected": -2.841853141784668, "step": 794 }, { "epoch": 0.09, "learning_rate": 2.7640250383843153e-07, "logits/chosen": -1.8497653007507324, "logits/rejected": -1.9135180711746216, "logps/chosen": -158.06689453125, "logps/rejected": -240.0703125, "loss": 0.573, "rewards/accuracies": 0.625, "rewards/chosen": -0.4657929539680481, "rewards/margins": 0.5899807810783386, "rewards/rejected": -1.0557737350463867, "step": 795 }, { "epoch": 0.09, "learning_rate": 2.763670721625133e-07, "logits/chosen": -2.804980754852295, "logits/rejected": -2.5880014896392822, "logps/chosen": -217.15866088867188, "logps/rejected": -249.5355987548828, "loss": 0.5109, "rewards/accuracies": 0.75, "rewards/chosen": -0.5490109324455261, "rewards/margins": 0.7773458957672119, "rewards/rejected": -1.3263567686080933, "step": 796 }, { "epoch": 0.09, "learning_rate": 2.7633164048659497e-07, "logits/chosen": -2.3626606464385986, "logits/rejected": -2.200685977935791, "logps/chosen": -120.37093353271484, "logps/rejected": -232.69943237304688, "loss": 0.5993, "rewards/accuracies": 0.625, "rewards/chosen": -1.3119090795516968, "rewards/margins": 1.0987520217895508, "rewards/rejected": -2.410661220550537, "step": 797 }, { "epoch": 0.09, "learning_rate": 2.762962088106767e-07, "logits/chosen": -2.2215070724487305, "logits/rejected": -2.170691728591919, "logps/chosen": -225.33628845214844, "logps/rejected": -256.47784423828125, "loss": 0.8262, "rewards/accuracies": 0.5, "rewards/chosen": -1.21883225440979, "rewards/margins": 0.43528297543525696, "rewards/rejected": -1.6541152000427246, "step": 798 }, { "epoch": 0.09, "learning_rate": 2.7626077713475847e-07, "logits/chosen": -2.135340690612793, "logits/rejected": -2.3261606693267822, "logps/chosen": -344.9084167480469, "logps/rejected": -310.552001953125, "loss": 0.3069, "rewards/accuracies": 1.0, "rewards/chosen": 0.006478369235992432, "rewards/margins": 1.3188191652297974, "rewards/rejected": -1.3123407363891602, "step": 799 }, { "epoch": 0.09, "learning_rate": 2.7622534545884016e-07, "logits/chosen": -1.9407858848571777, "logits/rejected": -1.762431025505066, "logps/chosen": -232.25344848632812, "logps/rejected": -340.0291748046875, "loss": 0.4721, "rewards/accuracies": 0.75, "rewards/chosen": -0.2694483995437622, "rewards/margins": 0.9252721071243286, "rewards/rejected": -1.1947205066680908, "step": 800 }, { "epoch": 0.09, "learning_rate": 2.761899137829219e-07, "logits/chosen": -2.562016248703003, "logits/rejected": -2.43929123878479, "logps/chosen": -203.78733825683594, "logps/rejected": -274.46759033203125, "loss": 0.3273, "rewards/accuracies": 1.0, "rewards/chosen": -0.5996531844139099, "rewards/margins": 1.1518138647079468, "rewards/rejected": -1.7514671087265015, "step": 801 }, { "epoch": 0.09, "learning_rate": 2.7615448210700366e-07, "logits/chosen": -2.294565200805664, "logits/rejected": -2.561570167541504, "logps/chosen": -348.9151611328125, "logps/rejected": -220.5670166015625, "loss": 1.037, "rewards/accuracies": 0.5, "rewards/chosen": -1.6222509145736694, "rewards/margins": 0.014660343527793884, "rewards/rejected": -1.636911392211914, "step": 802 }, { "epoch": 0.09, "learning_rate": 2.761190504310854e-07, "logits/chosen": -1.995142936706543, "logits/rejected": -2.0590567588806152, "logps/chosen": -217.62197875976562, "logps/rejected": -251.7452392578125, "loss": 0.3422, "rewards/accuracies": 0.75, "rewards/chosen": -0.1148129478096962, "rewards/margins": 1.4329867362976074, "rewards/rejected": -1.5477995872497559, "step": 803 }, { "epoch": 0.09, "learning_rate": 2.760836187551671e-07, "logits/chosen": -2.3252084255218506, "logits/rejected": -2.1150801181793213, "logps/chosen": -445.9859313964844, "logps/rejected": -413.94976806640625, "loss": 0.3354, "rewards/accuracies": 0.875, "rewards/chosen": -0.7510362267494202, "rewards/margins": 1.5921887159347534, "rewards/rejected": -2.3432250022888184, "step": 804 }, { "epoch": 0.09, "learning_rate": 2.7604818707924885e-07, "logits/chosen": -2.165978193283081, "logits/rejected": -1.8848586082458496, "logps/chosen": -402.4139709472656, "logps/rejected": -357.16595458984375, "loss": 0.1877, "rewards/accuracies": 1.0, "rewards/chosen": -0.23427307605743408, "rewards/margins": 2.4704904556274414, "rewards/rejected": -2.704763412475586, "step": 805 }, { "epoch": 0.09, "learning_rate": 2.7601275540333055e-07, "logits/chosen": -2.431500196456909, "logits/rejected": -2.123365879058838, "logps/chosen": -253.3337860107422, "logps/rejected": -286.802001953125, "loss": 0.7088, "rewards/accuracies": 0.5, "rewards/chosen": -0.5514963865280151, "rewards/margins": 0.2381865382194519, "rewards/rejected": -0.789682924747467, "step": 806 }, { "epoch": 0.09, "learning_rate": 2.759773237274123e-07, "logits/chosen": -1.5364643335342407, "logits/rejected": -1.8320200443267822, "logps/chosen": -453.3089904785156, "logps/rejected": -368.9715576171875, "loss": 0.3616, "rewards/accuracies": 0.875, "rewards/chosen": -0.31580618023872375, "rewards/margins": 1.396337866783142, "rewards/rejected": -1.7121440172195435, "step": 807 }, { "epoch": 0.09, "learning_rate": 2.75941892051494e-07, "logits/chosen": -2.7579281330108643, "logits/rejected": -2.680347442626953, "logps/chosen": -333.203125, "logps/rejected": -458.37664794921875, "loss": 0.4691, "rewards/accuracies": 0.625, "rewards/chosen": -0.6152685284614563, "rewards/margins": 1.2705790996551514, "rewards/rejected": -1.8858474493026733, "step": 808 }, { "epoch": 0.09, "learning_rate": 2.7590646037557574e-07, "logits/chosen": -2.128244400024414, "logits/rejected": -1.9988393783569336, "logps/chosen": -540.3739013671875, "logps/rejected": -458.36773681640625, "loss": 0.438, "rewards/accuracies": 0.875, "rewards/chosen": -0.6936752200126648, "rewards/margins": 0.8882251977920532, "rewards/rejected": -1.5819003582000732, "step": 809 }, { "epoch": 0.09, "learning_rate": 2.758710286996575e-07, "logits/chosen": -2.770843505859375, "logits/rejected": -2.86104154586792, "logps/chosen": -116.48832702636719, "logps/rejected": -141.7459716796875, "loss": 0.5111, "rewards/accuracies": 0.625, "rewards/chosen": -0.625515878200531, "rewards/margins": 1.3086469173431396, "rewards/rejected": -1.9341628551483154, "step": 810 }, { "epoch": 0.09, "learning_rate": 2.758355970237392e-07, "logits/chosen": -2.7642717361450195, "logits/rejected": -2.7046964168548584, "logps/chosen": -295.29949951171875, "logps/rejected": -351.4145812988281, "loss": 0.3549, "rewards/accuracies": 0.75, "rewards/chosen": -0.6863878965377808, "rewards/margins": 1.9236444234848022, "rewards/rejected": -2.610032558441162, "step": 811 }, { "epoch": 0.09, "learning_rate": 2.7580016534782094e-07, "logits/chosen": -2.656972646713257, "logits/rejected": -2.689328670501709, "logps/chosen": -144.65496826171875, "logps/rejected": -242.3056640625, "loss": 0.3494, "rewards/accuracies": 0.875, "rewards/chosen": -0.5340954661369324, "rewards/margins": 1.5823067426681519, "rewards/rejected": -2.1164021492004395, "step": 812 }, { "epoch": 0.09, "learning_rate": 2.757647336719027e-07, "logits/chosen": -2.014843225479126, "logits/rejected": -2.2872374057769775, "logps/chosen": -323.4866027832031, "logps/rejected": -271.4276428222656, "loss": 1.1907, "rewards/accuracies": 0.375, "rewards/chosen": -1.0805068016052246, "rewards/margins": -0.24969954788684845, "rewards/rejected": -0.8308073282241821, "step": 813 }, { "epoch": 0.09, "learning_rate": 2.7572930199598443e-07, "logits/chosen": -2.2908191680908203, "logits/rejected": -2.3647541999816895, "logps/chosen": -348.09942626953125, "logps/rejected": -304.20098876953125, "loss": 0.4112, "rewards/accuracies": 0.875, "rewards/chosen": -0.6252138614654541, "rewards/margins": 2.6163878440856934, "rewards/rejected": -3.2416019439697266, "step": 814 }, { "epoch": 0.09, "learning_rate": 2.7569387032006613e-07, "logits/chosen": -2.087074041366577, "logits/rejected": -2.454625129699707, "logps/chosen": -548.4599609375, "logps/rejected": -283.82672119140625, "loss": 0.379, "rewards/accuracies": 0.875, "rewards/chosen": -0.17108744382858276, "rewards/margins": 1.8601222038269043, "rewards/rejected": -2.031209707260132, "step": 815 }, { "epoch": 0.09, "learning_rate": 2.756584386441479e-07, "logits/chosen": -2.2962212562561035, "logits/rejected": -2.016892910003662, "logps/chosen": -529.9929809570312, "logps/rejected": -318.89251708984375, "loss": 0.9321, "rewards/accuracies": 0.75, "rewards/chosen": -1.376366138458252, "rewards/margins": 0.29294195771217346, "rewards/rejected": -1.6693079471588135, "step": 816 }, { "epoch": 0.1, "learning_rate": 2.7562300696822957e-07, "logits/chosen": -2.312739133834839, "logits/rejected": -2.5307068824768066, "logps/chosen": -254.0530242919922, "logps/rejected": -207.68264770507812, "loss": 0.4818, "rewards/accuracies": 0.625, "rewards/chosen": -0.7174158096313477, "rewards/margins": 0.9763070940971375, "rewards/rejected": -1.6937229633331299, "step": 817 }, { "epoch": 0.1, "learning_rate": 2.755875752923113e-07, "logits/chosen": -2.681137800216675, "logits/rejected": -2.8213984966278076, "logps/chosen": -418.1108703613281, "logps/rejected": -314.2308654785156, "loss": 0.5405, "rewards/accuracies": 0.75, "rewards/chosen": -0.6917369365692139, "rewards/margins": 0.6524041295051575, "rewards/rejected": -1.3441410064697266, "step": 818 }, { "epoch": 0.1, "learning_rate": 2.75552143616393e-07, "logits/chosen": -2.7520992755889893, "logits/rejected": -2.8722448348999023, "logps/chosen": -472.7545166015625, "logps/rejected": -353.07562255859375, "loss": 0.268, "rewards/accuracies": 0.75, "rewards/chosen": -0.4939306974411011, "rewards/margins": 2.4535770416259766, "rewards/rejected": -2.947507858276367, "step": 819 }, { "epoch": 0.1, "learning_rate": 2.7551671194047477e-07, "logits/chosen": -1.8440983295440674, "logits/rejected": -2.1140377521514893, "logps/chosen": -400.38946533203125, "logps/rejected": -260.4787292480469, "loss": 0.4221, "rewards/accuracies": 0.875, "rewards/chosen": -0.81428462266922, "rewards/margins": 1.009574055671692, "rewards/rejected": -1.8238587379455566, "step": 820 }, { "epoch": 0.1, "learning_rate": 2.754812802645565e-07, "logits/chosen": -2.354236364364624, "logits/rejected": -2.3987433910369873, "logps/chosen": -113.60345458984375, "logps/rejected": -190.42843627929688, "loss": 0.588, "rewards/accuracies": 0.625, "rewards/chosen": -1.2221328020095825, "rewards/margins": 1.5217992067337036, "rewards/rejected": -2.7439322471618652, "step": 821 }, { "epoch": 0.1, "learning_rate": 2.754458485886382e-07, "logits/chosen": -2.6065571308135986, "logits/rejected": -2.5549871921539307, "logps/chosen": -380.747802734375, "logps/rejected": -299.6596984863281, "loss": 0.436, "rewards/accuracies": 0.875, "rewards/chosen": -0.6584911346435547, "rewards/margins": 0.7793545722961426, "rewards/rejected": -1.4378457069396973, "step": 822 }, { "epoch": 0.1, "learning_rate": 2.7541041691271996e-07, "logits/chosen": -2.1508445739746094, "logits/rejected": -2.6069607734680176, "logps/chosen": -387.8492431640625, "logps/rejected": -151.61810302734375, "loss": 0.5937, "rewards/accuracies": 0.75, "rewards/chosen": -0.28590136766433716, "rewards/margins": 1.1521022319793701, "rewards/rejected": -1.4380037784576416, "step": 823 }, { "epoch": 0.1, "learning_rate": 2.753749852368017e-07, "logits/chosen": -2.051076889038086, "logits/rejected": -2.016786575317383, "logps/chosen": -266.8189392089844, "logps/rejected": -381.87945556640625, "loss": 0.4265, "rewards/accuracies": 0.875, "rewards/chosen": -0.27443787455558777, "rewards/margins": 0.8248253464698792, "rewards/rejected": -1.0992631912231445, "step": 824 }, { "epoch": 0.1, "learning_rate": 2.7533955356088346e-07, "logits/chosen": -2.5787770748138428, "logits/rejected": -2.460585594177246, "logps/chosen": -335.9285583496094, "logps/rejected": -282.3567810058594, "loss": 0.1457, "rewards/accuracies": 1.0, "rewards/chosen": -0.3766709268093109, "rewards/margins": 2.8563716411590576, "rewards/rejected": -3.2330424785614014, "step": 825 }, { "epoch": 0.1, "learning_rate": 2.7530412188496515e-07, "logits/chosen": -2.5509676933288574, "logits/rejected": -2.343088150024414, "logps/chosen": -263.2168884277344, "logps/rejected": -388.8572998046875, "loss": 0.2012, "rewards/accuracies": 0.875, "rewards/chosen": -0.836493194103241, "rewards/margins": 2.912576675415039, "rewards/rejected": -3.7490696907043457, "step": 826 }, { "epoch": 0.1, "learning_rate": 2.752686902090469e-07, "logits/chosen": -2.0626823902130127, "logits/rejected": -2.183810234069824, "logps/chosen": -329.9076232910156, "logps/rejected": -288.15325927734375, "loss": 0.4588, "rewards/accuracies": 0.625, "rewards/chosen": -0.7152891159057617, "rewards/margins": 1.0756343603134155, "rewards/rejected": -1.7909233570098877, "step": 827 }, { "epoch": 0.1, "learning_rate": 2.752332585331286e-07, "logits/chosen": -2.2275002002716064, "logits/rejected": -2.2359678745269775, "logps/chosen": -280.36541748046875, "logps/rejected": -317.4617004394531, "loss": 2.1092, "rewards/accuracies": 0.5, "rewards/chosen": -2.762106418609619, "rewards/margins": -1.0440540313720703, "rewards/rejected": -1.718052625656128, "step": 828 }, { "epoch": 0.1, "learning_rate": 2.7519782685721034e-07, "logits/chosen": -2.3201491832733154, "logits/rejected": -2.424807548522949, "logps/chosen": -338.68682861328125, "logps/rejected": -359.15985107421875, "loss": 0.1832, "rewards/accuracies": 1.0, "rewards/chosen": -0.25985994935035706, "rewards/margins": 2.596632719039917, "rewards/rejected": -2.8564929962158203, "step": 829 }, { "epoch": 0.1, "learning_rate": 2.7516239518129204e-07, "logits/chosen": -2.8091440200805664, "logits/rejected": -2.9383184909820557, "logps/chosen": -238.34129333496094, "logps/rejected": -284.0589599609375, "loss": 0.3886, "rewards/accuracies": 0.75, "rewards/chosen": -0.5881557464599609, "rewards/margins": 1.5575106143951416, "rewards/rejected": -2.1456663608551025, "step": 830 }, { "epoch": 0.1, "learning_rate": 2.751269635053738e-07, "logits/chosen": -2.321010112762451, "logits/rejected": -2.2876906394958496, "logps/chosen": -296.724365234375, "logps/rejected": -382.295166015625, "loss": 0.3211, "rewards/accuracies": 1.0, "rewards/chosen": -0.7009415030479431, "rewards/margins": 1.2440650463104248, "rewards/rejected": -1.9450066089630127, "step": 831 }, { "epoch": 0.1, "learning_rate": 2.7509153182945554e-07, "logits/chosen": -2.815554618835449, "logits/rejected": -2.699995756149292, "logps/chosen": -278.7213134765625, "logps/rejected": -290.8868408203125, "loss": 0.4161, "rewards/accuracies": 0.875, "rewards/chosen": -0.29484686255455017, "rewards/margins": 1.5645017623901367, "rewards/rejected": -1.8593485355377197, "step": 832 }, { "epoch": 0.1, "learning_rate": 2.7505610015353723e-07, "logits/chosen": -2.4364542961120605, "logits/rejected": -2.422778367996216, "logps/chosen": -194.6625213623047, "logps/rejected": -200.44354248046875, "loss": 0.3371, "rewards/accuracies": 0.875, "rewards/chosen": -0.5941398739814758, "rewards/margins": 1.494678258895874, "rewards/rejected": -2.088818311691284, "step": 833 }, { "epoch": 0.1, "learning_rate": 2.75020668477619e-07, "logits/chosen": -2.4262027740478516, "logits/rejected": -2.6542892456054688, "logps/chosen": -171.06784057617188, "logps/rejected": -175.1933135986328, "loss": 0.6241, "rewards/accuracies": 0.75, "rewards/chosen": -1.725385069847107, "rewards/margins": 0.5255303382873535, "rewards/rejected": -2.250915288925171, "step": 834 }, { "epoch": 0.1, "learning_rate": 2.749852368017007e-07, "logits/chosen": -2.219679355621338, "logits/rejected": -2.5764517784118652, "logps/chosen": -329.9806213378906, "logps/rejected": -246.863525390625, "loss": 0.3792, "rewards/accuracies": 0.75, "rewards/chosen": -0.7566499710083008, "rewards/margins": 1.4804521799087524, "rewards/rejected": -2.2371022701263428, "step": 835 }, { "epoch": 0.1, "learning_rate": 2.749498051257825e-07, "logits/chosen": -2.0636887550354004, "logits/rejected": -2.3678150177001953, "logps/chosen": -270.44635009765625, "logps/rejected": -139.17648315429688, "loss": 0.4403, "rewards/accuracies": 0.875, "rewards/chosen": -0.1750926971435547, "rewards/margins": 1.2872021198272705, "rewards/rejected": -1.4622948169708252, "step": 836 }, { "epoch": 0.1, "learning_rate": 2.749143734498642e-07, "logits/chosen": -1.9599565267562866, "logits/rejected": -2.2607314586639404, "logps/chosen": -384.07073974609375, "logps/rejected": -308.3196105957031, "loss": 0.7774, "rewards/accuracies": 0.625, "rewards/chosen": -1.087849736213684, "rewards/margins": 0.8594973087310791, "rewards/rejected": -1.9473469257354736, "step": 837 }, { "epoch": 0.1, "learning_rate": 2.748789417739459e-07, "logits/chosen": -2.095362663269043, "logits/rejected": -2.6405675411224365, "logps/chosen": -378.84808349609375, "logps/rejected": -232.74066162109375, "loss": 0.4293, "rewards/accuracies": 0.875, "rewards/chosen": -0.6868550777435303, "rewards/margins": 1.2646958827972412, "rewards/rejected": -1.951551079750061, "step": 838 }, { "epoch": 0.1, "learning_rate": 2.748435100980276e-07, "logits/chosen": -2.1725120544433594, "logits/rejected": -2.5240583419799805, "logps/chosen": -375.93560791015625, "logps/rejected": -292.49542236328125, "loss": 0.424, "rewards/accuracies": 0.875, "rewards/chosen": -0.5310734510421753, "rewards/margins": 1.3675363063812256, "rewards/rejected": -1.8986096382141113, "step": 839 }, { "epoch": 0.1, "learning_rate": 2.7480807842210937e-07, "logits/chosen": -2.0917673110961914, "logits/rejected": -2.2914419174194336, "logps/chosen": -272.8973388671875, "logps/rejected": -261.6075744628906, "loss": 0.5795, "rewards/accuracies": 0.625, "rewards/chosen": -0.8168570399284363, "rewards/margins": 0.7078297734260559, "rewards/rejected": -1.5246866941452026, "step": 840 }, { "epoch": 0.1, "learning_rate": 2.7477264674619106e-07, "logits/chosen": -2.3124282360076904, "logits/rejected": -2.3117291927337646, "logps/chosen": -269.9890441894531, "logps/rejected": -234.67852783203125, "loss": 0.7279, "rewards/accuracies": 0.625, "rewards/chosen": -0.8774211406707764, "rewards/margins": 0.31102246046066284, "rewards/rejected": -1.188443660736084, "step": 841 }, { "epoch": 0.1, "learning_rate": 2.747372150702728e-07, "logits/chosen": -2.4581098556518555, "logits/rejected": -2.333850145339966, "logps/chosen": -276.5857849121094, "logps/rejected": -269.3536682128906, "loss": 0.3724, "rewards/accuracies": 0.75, "rewards/chosen": -0.18652315437793732, "rewards/margins": 1.307881474494934, "rewards/rejected": -1.494404673576355, "step": 842 }, { "epoch": 0.1, "learning_rate": 2.7470178339435456e-07, "logits/chosen": -2.123569965362549, "logits/rejected": -2.2368123531341553, "logps/chosen": -245.21453857421875, "logps/rejected": -237.89602661132812, "loss": 0.6574, "rewards/accuracies": 0.75, "rewards/chosen": -1.0193591117858887, "rewards/margins": 0.42842838168144226, "rewards/rejected": -1.4477875232696533, "step": 843 }, { "epoch": 0.1, "learning_rate": 2.7466635171843625e-07, "logits/chosen": -2.3953006267547607, "logits/rejected": -2.4411137104034424, "logps/chosen": -314.9009094238281, "logps/rejected": -248.25347900390625, "loss": 0.694, "rewards/accuracies": 0.625, "rewards/chosen": -0.9203782081604004, "rewards/margins": 0.7418824434280396, "rewards/rejected": -1.66226065158844, "step": 844 }, { "epoch": 0.1, "learning_rate": 2.74630920042518e-07, "logits/chosen": -2.8249924182891846, "logits/rejected": -2.7767493724823, "logps/chosen": -227.067138671875, "logps/rejected": -235.408935546875, "loss": 0.3778, "rewards/accuracies": 0.875, "rewards/chosen": -0.6507272124290466, "rewards/margins": 2.040569543838501, "rewards/rejected": -2.6912968158721924, "step": 845 }, { "epoch": 0.1, "learning_rate": 2.745954883665997e-07, "logits/chosen": -2.493678569793701, "logits/rejected": -2.1503067016601562, "logps/chosen": -352.4228515625, "logps/rejected": -269.718994140625, "loss": 0.6205, "rewards/accuracies": 0.75, "rewards/chosen": -0.5583662986755371, "rewards/margins": 0.9630569815635681, "rewards/rejected": -1.521423101425171, "step": 846 }, { "epoch": 0.1, "learning_rate": 2.7456005669068145e-07, "logits/chosen": -1.944936990737915, "logits/rejected": -2.2964425086975098, "logps/chosen": -341.35345458984375, "logps/rejected": -255.51739501953125, "loss": 0.4922, "rewards/accuracies": 0.75, "rewards/chosen": -0.24993163347244263, "rewards/margins": 0.9776173830032349, "rewards/rejected": -1.2275490760803223, "step": 847 }, { "epoch": 0.1, "learning_rate": 2.745246250147632e-07, "logits/chosen": -2.4579014778137207, "logits/rejected": -2.5890052318573, "logps/chosen": -228.53457641601562, "logps/rejected": -188.947021484375, "loss": 0.3411, "rewards/accuracies": 0.875, "rewards/chosen": 0.11777147650718689, "rewards/margins": 1.3392691612243652, "rewards/rejected": -1.2214977741241455, "step": 848 }, { "epoch": 0.1, "learning_rate": 2.7448919333884494e-07, "logits/chosen": -2.4853193759918213, "logits/rejected": -2.653001308441162, "logps/chosen": -432.8856201171875, "logps/rejected": -284.4534912109375, "loss": 0.5759, "rewards/accuracies": 0.75, "rewards/chosen": -1.009524941444397, "rewards/margins": 0.7271549105644226, "rewards/rejected": -1.7366799116134644, "step": 849 }, { "epoch": 0.1, "learning_rate": 2.7445376166292664e-07, "logits/chosen": -2.7064807415008545, "logits/rejected": -2.914205551147461, "logps/chosen": -274.72332763671875, "logps/rejected": -232.59280395507812, "loss": 0.7594, "rewards/accuracies": 0.625, "rewards/chosen": -0.39183905720710754, "rewards/margins": 0.6476144790649414, "rewards/rejected": -1.0394536256790161, "step": 850 }, { "epoch": 0.1, "learning_rate": 2.744183299870084e-07, "logits/chosen": -2.561817169189453, "logits/rejected": -2.5045077800750732, "logps/chosen": -256.9534606933594, "logps/rejected": -239.01370239257812, "loss": 0.3352, "rewards/accuracies": 0.75, "rewards/chosen": -1.0992029905319214, "rewards/margins": 1.8558465242385864, "rewards/rejected": -2.9550492763519287, "step": 851 }, { "epoch": 0.1, "learning_rate": 2.743828983110901e-07, "logits/chosen": -1.9374275207519531, "logits/rejected": -2.3309454917907715, "logps/chosen": -372.3678894042969, "logps/rejected": -276.0098876953125, "loss": 0.2581, "rewards/accuracies": 0.875, "rewards/chosen": -0.5369650721549988, "rewards/margins": 1.8163683414459229, "rewards/rejected": -2.353332996368408, "step": 852 }, { "epoch": 0.1, "learning_rate": 2.7434746663517183e-07, "logits/chosen": -2.095811367034912, "logits/rejected": -2.2285728454589844, "logps/chosen": -198.23341369628906, "logps/rejected": -226.40940856933594, "loss": 0.4082, "rewards/accuracies": 0.875, "rewards/chosen": -0.6585460305213928, "rewards/margins": 1.7346041202545166, "rewards/rejected": -2.3931500911712646, "step": 853 }, { "epoch": 0.1, "learning_rate": 2.743120349592536e-07, "logits/chosen": -2.4165713787078857, "logits/rejected": -2.41168212890625, "logps/chosen": -357.34576416015625, "logps/rejected": -437.22637939453125, "loss": 0.3846, "rewards/accuracies": 0.75, "rewards/chosen": 0.044335730373859406, "rewards/margins": 1.661699891090393, "rewards/rejected": -1.6173641681671143, "step": 854 }, { "epoch": 0.1, "learning_rate": 2.742766032833353e-07, "logits/chosen": -2.3180534839630127, "logits/rejected": -2.0205044746398926, "logps/chosen": -235.99032592773438, "logps/rejected": -224.90594482421875, "loss": 0.3298, "rewards/accuracies": 0.875, "rewards/chosen": -0.49009013175964355, "rewards/margins": 1.4078333377838135, "rewards/rejected": -1.897923469543457, "step": 855 }, { "epoch": 0.1, "learning_rate": 2.74241171607417e-07, "logits/chosen": -2.4835808277130127, "logits/rejected": -2.466080665588379, "logps/chosen": -206.12327575683594, "logps/rejected": -224.22584533691406, "loss": 0.6719, "rewards/accuracies": 0.625, "rewards/chosen": -0.7109304070472717, "rewards/margins": 0.41522058844566345, "rewards/rejected": -1.1261508464813232, "step": 856 }, { "epoch": 0.1, "learning_rate": 2.742057399314987e-07, "logits/chosen": -2.671407461166382, "logits/rejected": -2.879507064819336, "logps/chosen": -447.0558776855469, "logps/rejected": -243.20494079589844, "loss": 0.5974, "rewards/accuracies": 0.625, "rewards/chosen": -1.0094728469848633, "rewards/margins": 0.8336586356163025, "rewards/rejected": -1.8431315422058105, "step": 857 }, { "epoch": 0.1, "learning_rate": 2.7417030825558047e-07, "logits/chosen": -2.5173215866088867, "logits/rejected": -2.3970425128936768, "logps/chosen": -183.6345977783203, "logps/rejected": -295.270263671875, "loss": 0.297, "rewards/accuracies": 1.0, "rewards/chosen": -0.7753420472145081, "rewards/margins": 1.450514793395996, "rewards/rejected": -2.2258567810058594, "step": 858 }, { "epoch": 0.1, "learning_rate": 2.741348765796622e-07, "logits/chosen": -2.1595349311828613, "logits/rejected": -1.8072996139526367, "logps/chosen": -382.2784729003906, "logps/rejected": -504.5029602050781, "loss": 0.6275, "rewards/accuracies": 0.5, "rewards/chosen": -1.0388683080673218, "rewards/margins": 0.9335626363754272, "rewards/rejected": -1.9724310636520386, "step": 859 }, { "epoch": 0.1, "learning_rate": 2.7409944490374397e-07, "logits/chosen": -1.8402773141860962, "logits/rejected": -1.7585285902023315, "logps/chosen": -352.027099609375, "logps/rejected": -330.027099609375, "loss": 1.1536, "rewards/accuracies": 0.375, "rewards/chosen": -1.1935362815856934, "rewards/margins": -0.0043143630027771, "rewards/rejected": -1.1892218589782715, "step": 860 }, { "epoch": 0.1, "learning_rate": 2.7406401322782566e-07, "logits/chosen": -2.498201847076416, "logits/rejected": -2.469534397125244, "logps/chosen": -201.34637451171875, "logps/rejected": -206.03701782226562, "loss": 0.3057, "rewards/accuracies": 0.75, "rewards/chosen": -0.38887739181518555, "rewards/margins": 1.4462579488754272, "rewards/rejected": -1.8351353406906128, "step": 861 }, { "epoch": 0.1, "learning_rate": 2.740285815519074e-07, "logits/chosen": -2.607278823852539, "logits/rejected": -2.6182618141174316, "logps/chosen": -306.421875, "logps/rejected": -161.61061096191406, "loss": 0.6008, "rewards/accuracies": 0.875, "rewards/chosen": -0.7242245078086853, "rewards/margins": 0.9896548986434937, "rewards/rejected": -1.7138793468475342, "step": 862 }, { "epoch": 0.1, "learning_rate": 2.739931498759891e-07, "logits/chosen": -2.9798707962036133, "logits/rejected": -2.9436299800872803, "logps/chosen": -153.89254760742188, "logps/rejected": -195.43585205078125, "loss": 0.5373, "rewards/accuracies": 0.625, "rewards/chosen": -0.4530704915523529, "rewards/margins": 2.262968063354492, "rewards/rejected": -2.716038465499878, "step": 863 }, { "epoch": 0.1, "learning_rate": 2.7395771820007086e-07, "logits/chosen": -2.4028046131134033, "logits/rejected": -2.621156692504883, "logps/chosen": -217.90699768066406, "logps/rejected": -198.68719482421875, "loss": 0.3619, "rewards/accuracies": 1.0, "rewards/chosen": -0.18249444663524628, "rewards/margins": 1.246960997581482, "rewards/rejected": -1.4294555187225342, "step": 864 }, { "epoch": 0.1, "learning_rate": 2.739222865241526e-07, "logits/chosen": -2.384629249572754, "logits/rejected": -2.1730425357818604, "logps/chosen": -172.04638671875, "logps/rejected": -214.42144775390625, "loss": 0.4042, "rewards/accuracies": 0.875, "rewards/chosen": -0.7669409513473511, "rewards/margins": 1.453324317932129, "rewards/rejected": -2.2202653884887695, "step": 865 }, { "epoch": 0.1, "learning_rate": 2.738868548482343e-07, "logits/chosen": -2.648963212966919, "logits/rejected": -2.7522172927856445, "logps/chosen": -158.70213317871094, "logps/rejected": -228.22970581054688, "loss": 0.2112, "rewards/accuracies": 1.0, "rewards/chosen": -0.24649617075920105, "rewards/margins": 2.320326566696167, "rewards/rejected": -2.5668227672576904, "step": 866 }, { "epoch": 0.1, "learning_rate": 2.7385142317231605e-07, "logits/chosen": -2.6052682399749756, "logits/rejected": -2.755232810974121, "logps/chosen": -244.13059997558594, "logps/rejected": -248.38491821289062, "loss": 1.0656, "rewards/accuracies": 0.625, "rewards/chosen": -1.9004136323928833, "rewards/margins": 0.17074266076087952, "rewards/rejected": -2.0711562633514404, "step": 867 }, { "epoch": 0.1, "learning_rate": 2.7381599149639774e-07, "logits/chosen": -2.1230006217956543, "logits/rejected": -2.153796672821045, "logps/chosen": -206.5810546875, "logps/rejected": -218.51907348632812, "loss": 0.7607, "rewards/accuracies": 0.75, "rewards/chosen": -1.0023295879364014, "rewards/margins": 0.769444465637207, "rewards/rejected": -1.7717740535736084, "step": 868 }, { "epoch": 0.1, "learning_rate": 2.737805598204795e-07, "logits/chosen": -2.394606590270996, "logits/rejected": -2.255336284637451, "logps/chosen": -252.6491241455078, "logps/rejected": -407.2364196777344, "loss": 0.8131, "rewards/accuracies": 0.5, "rewards/chosen": -2.094109296798706, "rewards/margins": 1.452845811843872, "rewards/rejected": -3.546955108642578, "step": 869 }, { "epoch": 0.1, "learning_rate": 2.737451281445612e-07, "logits/chosen": -2.2578048706054688, "logits/rejected": -2.39266037940979, "logps/chosen": -307.3096008300781, "logps/rejected": -381.4688720703125, "loss": 0.5199, "rewards/accuracies": 0.75, "rewards/chosen": -0.40601804852485657, "rewards/margins": 1.038485050201416, "rewards/rejected": -1.4445029497146606, "step": 870 }, { "epoch": 0.1, "learning_rate": 2.73709696468643e-07, "logits/chosen": -2.3872618675231934, "logits/rejected": -2.7459304332733154, "logps/chosen": -266.2658996582031, "logps/rejected": -232.3358154296875, "loss": 0.4569, "rewards/accuracies": 0.875, "rewards/chosen": -0.4222407341003418, "rewards/margins": 1.456223964691162, "rewards/rejected": -1.878464698791504, "step": 871 }, { "epoch": 0.1, "learning_rate": 2.736742647927247e-07, "logits/chosen": -2.322699546813965, "logits/rejected": -2.590506076812744, "logps/chosen": -254.16798400878906, "logps/rejected": -327.3200988769531, "loss": 0.1898, "rewards/accuracies": 1.0, "rewards/chosen": -1.679387092590332, "rewards/margins": 2.2622036933898926, "rewards/rejected": -3.9415905475616455, "step": 872 }, { "epoch": 0.1, "learning_rate": 2.7363883311680643e-07, "logits/chosen": -2.5678744316101074, "logits/rejected": -2.3240013122558594, "logps/chosen": -161.8018341064453, "logps/rejected": -184.2908477783203, "loss": 0.2974, "rewards/accuracies": 0.875, "rewards/chosen": -0.5455507040023804, "rewards/margins": 1.9467980861663818, "rewards/rejected": -2.4923489093780518, "step": 873 }, { "epoch": 0.1, "learning_rate": 2.7360340144088813e-07, "logits/chosen": -2.3069074153900146, "logits/rejected": -2.7034010887145996, "logps/chosen": -366.5351257324219, "logps/rejected": -145.95130920410156, "loss": 0.7808, "rewards/accuracies": 0.625, "rewards/chosen": -1.1422231197357178, "rewards/margins": 0.17090915143489838, "rewards/rejected": -1.3131322860717773, "step": 874 }, { "epoch": 0.1, "learning_rate": 2.735679697649699e-07, "logits/chosen": -2.0309371948242188, "logits/rejected": -2.013925075531006, "logps/chosen": -203.388427734375, "logps/rejected": -168.79827880859375, "loss": 1.3891, "rewards/accuracies": 0.5, "rewards/chosen": -2.0217442512512207, "rewards/margins": -0.3231983780860901, "rewards/rejected": -1.6985459327697754, "step": 875 }, { "epoch": 0.1, "learning_rate": 2.735325380890516e-07, "logits/chosen": -2.37561297416687, "logits/rejected": -2.5144829750061035, "logps/chosen": -267.6269226074219, "logps/rejected": -221.1397705078125, "loss": 0.4809, "rewards/accuracies": 0.625, "rewards/chosen": -0.08708879351615906, "rewards/margins": 0.9561891555786133, "rewards/rejected": -1.0432779788970947, "step": 876 }, { "epoch": 0.1, "learning_rate": 2.734971064131333e-07, "logits/chosen": -2.50106143951416, "logits/rejected": -2.5077974796295166, "logps/chosen": -400.8428955078125, "logps/rejected": -249.34075927734375, "loss": 0.6773, "rewards/accuracies": 0.5, "rewards/chosen": -1.5207757949829102, "rewards/margins": 0.4289727509021759, "rewards/rejected": -1.9497485160827637, "step": 877 }, { "epoch": 0.1, "learning_rate": 2.7346167473721507e-07, "logits/chosen": -2.448944091796875, "logits/rejected": -2.3112964630126953, "logps/chosen": -248.82896423339844, "logps/rejected": -189.35926818847656, "loss": 0.5305, "rewards/accuracies": 0.625, "rewards/chosen": -0.5540949106216431, "rewards/margins": 0.8031787872314453, "rewards/rejected": -1.3572736978530884, "step": 878 }, { "epoch": 0.1, "learning_rate": 2.7342624306129677e-07, "logits/chosen": -2.4445419311523438, "logits/rejected": -2.591122627258301, "logps/chosen": -211.64498901367188, "logps/rejected": -202.17271423339844, "loss": 0.7751, "rewards/accuracies": 0.625, "rewards/chosen": -0.6425549983978271, "rewards/margins": 0.9915646314620972, "rewards/rejected": -1.6341195106506348, "step": 879 }, { "epoch": 0.1, "learning_rate": 2.733908113853785e-07, "logits/chosen": -2.38649320602417, "logits/rejected": -2.575735330581665, "logps/chosen": -409.8760681152344, "logps/rejected": -291.2311706542969, "loss": 0.1687, "rewards/accuracies": 1.0, "rewards/chosen": -0.1492311954498291, "rewards/margins": 1.9618580341339111, "rewards/rejected": -2.1110892295837402, "step": 880 }, { "epoch": 0.1, "learning_rate": 2.733553797094602e-07, "logits/chosen": -2.7239973545074463, "logits/rejected": -2.6517820358276367, "logps/chosen": -386.863037109375, "logps/rejected": -382.99322509765625, "loss": 0.4186, "rewards/accuracies": 0.75, "rewards/chosen": -0.746008038520813, "rewards/margins": 1.208742380142212, "rewards/rejected": -1.954750418663025, "step": 881 }, { "epoch": 0.1, "learning_rate": 2.7331994803354196e-07, "logits/chosen": -2.5964696407318115, "logits/rejected": -2.650857448577881, "logps/chosen": -297.2395935058594, "logps/rejected": -243.79217529296875, "loss": 0.3043, "rewards/accuracies": 1.0, "rewards/chosen": -0.707123875617981, "rewards/margins": 1.2238788604736328, "rewards/rejected": -1.9310027360916138, "step": 882 }, { "epoch": 0.1, "learning_rate": 2.732845163576237e-07, "logits/chosen": -2.3817601203918457, "logits/rejected": -2.4090349674224854, "logps/chosen": -182.64474487304688, "logps/rejected": -280.52496337890625, "loss": 0.4095, "rewards/accuracies": 0.75, "rewards/chosen": -1.1690198183059692, "rewards/margins": 1.5565727949142456, "rewards/rejected": -2.725592613220215, "step": 883 }, { "epoch": 0.1, "learning_rate": 2.7324908468170546e-07, "logits/chosen": -2.886898994445801, "logits/rejected": -3.025939464569092, "logps/chosen": -207.57025146484375, "logps/rejected": -332.8024597167969, "loss": 0.2393, "rewards/accuracies": 0.875, "rewards/chosen": -0.3404068946838379, "rewards/margins": 2.942467451095581, "rewards/rejected": -3.282874345779419, "step": 884 }, { "epoch": 0.1, "learning_rate": 2.7321365300578715e-07, "logits/chosen": -2.549455165863037, "logits/rejected": -2.3082275390625, "logps/chosen": -196.8700408935547, "logps/rejected": -308.0885009765625, "loss": 0.2574, "rewards/accuracies": 0.875, "rewards/chosen": -0.8000203967094421, "rewards/margins": 2.0495731830596924, "rewards/rejected": -2.8495936393737793, "step": 885 }, { "epoch": 0.1, "learning_rate": 2.731782213298689e-07, "logits/chosen": -1.8205680847167969, "logits/rejected": -1.972083330154419, "logps/chosen": -260.3123779296875, "logps/rejected": -295.5809020996094, "loss": 0.4628, "rewards/accuracies": 0.875, "rewards/chosen": -0.6785041093826294, "rewards/margins": 0.8022993803024292, "rewards/rejected": -1.4808034896850586, "step": 886 }, { "epoch": 0.1, "learning_rate": 2.731427896539506e-07, "logits/chosen": -2.3177781105041504, "logits/rejected": -2.51983642578125, "logps/chosen": -404.57147216796875, "logps/rejected": -334.4754638671875, "loss": 0.3444, "rewards/accuracies": 0.875, "rewards/chosen": -0.4713652729988098, "rewards/margins": 1.2826460599899292, "rewards/rejected": -1.7540113925933838, "step": 887 }, { "epoch": 0.1, "learning_rate": 2.7310735797803235e-07, "logits/chosen": -2.1799988746643066, "logits/rejected": -2.3793368339538574, "logps/chosen": -255.95895385742188, "logps/rejected": -378.0698547363281, "loss": 0.6832, "rewards/accuracies": 0.875, "rewards/chosen": -0.7940413951873779, "rewards/margins": 1.2387806177139282, "rewards/rejected": -2.0328221321105957, "step": 888 }, { "epoch": 0.1, "learning_rate": 2.730719263021141e-07, "logits/chosen": -2.2570106983184814, "logits/rejected": -2.0357398986816406, "logps/chosen": -266.73779296875, "logps/rejected": -374.1632080078125, "loss": 0.2, "rewards/accuracies": 1.0, "rewards/chosen": -0.3968670964241028, "rewards/margins": 2.2420458793640137, "rewards/rejected": -2.6389129161834717, "step": 889 }, { "epoch": 0.1, "learning_rate": 2.730364946261958e-07, "logits/chosen": -2.657562255859375, "logits/rejected": -2.475883960723877, "logps/chosen": -131.65756225585938, "logps/rejected": -232.9833526611328, "loss": 0.4919, "rewards/accuracies": 0.875, "rewards/chosen": -0.2719879746437073, "rewards/margins": 1.4077529907226562, "rewards/rejected": -1.6797407865524292, "step": 890 }, { "epoch": 0.1, "learning_rate": 2.7300106295027754e-07, "logits/chosen": -2.6084165573120117, "logits/rejected": -2.811049222946167, "logps/chosen": -279.0136413574219, "logps/rejected": -116.66950225830078, "loss": 0.8331, "rewards/accuracies": 0.625, "rewards/chosen": -1.0136877298355103, "rewards/margins": 0.30267006158828735, "rewards/rejected": -1.3163578510284424, "step": 891 }, { "epoch": 0.1, "learning_rate": 2.7296563127435923e-07, "logits/chosen": -2.4550797939300537, "logits/rejected": -2.4692957401275635, "logps/chosen": -303.04541015625, "logps/rejected": -240.83773803710938, "loss": 0.4646, "rewards/accuracies": 0.875, "rewards/chosen": -0.7586424946784973, "rewards/margins": 1.103433609008789, "rewards/rejected": -1.8620760440826416, "step": 892 }, { "epoch": 0.1, "learning_rate": 2.72930199598441e-07, "logits/chosen": -2.421027660369873, "logits/rejected": -2.2862043380737305, "logps/chosen": -139.3295135498047, "logps/rejected": -113.74028778076172, "loss": 0.6243, "rewards/accuracies": 0.625, "rewards/chosen": -0.29174280166625977, "rewards/margins": 0.7601765394210815, "rewards/rejected": -1.0519193410873413, "step": 893 }, { "epoch": 0.1, "learning_rate": 2.7289476792252273e-07, "logits/chosen": -1.9997868537902832, "logits/rejected": -2.1521716117858887, "logps/chosen": -353.5522766113281, "logps/rejected": -255.4780731201172, "loss": 0.3388, "rewards/accuracies": 0.75, "rewards/chosen": -0.6226689219474792, "rewards/margins": 1.3320801258087158, "rewards/rejected": -1.9547489881515503, "step": 894 }, { "epoch": 0.1, "learning_rate": 2.728593362466045e-07, "logits/chosen": -2.382061243057251, "logits/rejected": -2.3215227127075195, "logps/chosen": -338.754150390625, "logps/rejected": -352.0447998046875, "loss": 0.1408, "rewards/accuracies": 1.0, "rewards/chosen": -0.5773947834968567, "rewards/margins": 2.4783215522766113, "rewards/rejected": -3.0557162761688232, "step": 895 }, { "epoch": 0.1, "learning_rate": 2.728239045706862e-07, "logits/chosen": -1.8443820476531982, "logits/rejected": -1.7422621250152588, "logps/chosen": -278.4216003417969, "logps/rejected": -270.6524658203125, "loss": 0.2563, "rewards/accuracies": 0.875, "rewards/chosen": -0.19742241501808167, "rewards/margins": 2.5799741744995117, "rewards/rejected": -2.7773964405059814, "step": 896 }, { "epoch": 0.1, "learning_rate": 2.727884728947679e-07, "logits/chosen": -2.600281238555908, "logits/rejected": -2.753805637359619, "logps/chosen": -288.9471435546875, "logps/rejected": -200.90052795410156, "loss": 0.6112, "rewards/accuracies": 0.75, "rewards/chosen": -0.955651044845581, "rewards/margins": 1.0883605480194092, "rewards/rejected": -2.0440115928649902, "step": 897 }, { "epoch": 0.1, "learning_rate": 2.727530412188496e-07, "logits/chosen": -2.183803081512451, "logits/rejected": -2.5019640922546387, "logps/chosen": -326.1049499511719, "logps/rejected": -244.45465087890625, "loss": 0.8303, "rewards/accuracies": 0.5, "rewards/chosen": -0.7634547352790833, "rewards/margins": 0.5251582860946655, "rewards/rejected": -1.288612961769104, "step": 898 }, { "epoch": 0.1, "learning_rate": 2.7271760954293137e-07, "logits/chosen": -2.256326198577881, "logits/rejected": -2.7890090942382812, "logps/chosen": -265.24517822265625, "logps/rejected": -215.26483154296875, "loss": 0.8532, "rewards/accuracies": 0.625, "rewards/chosen": -0.8901289701461792, "rewards/margins": 0.5248301029205322, "rewards/rejected": -1.4149590730667114, "step": 899 }, { "epoch": 0.1, "learning_rate": 2.726821778670131e-07, "logits/chosen": -2.1389808654785156, "logits/rejected": -2.256173849105835, "logps/chosen": -293.9353942871094, "logps/rejected": -305.8691711425781, "loss": 0.4095, "rewards/accuracies": 0.625, "rewards/chosen": -0.19598381221294403, "rewards/margins": 1.8666284084320068, "rewards/rejected": -2.062612295150757, "step": 900 }, { "epoch": 0.1, "learning_rate": 2.726467461910948e-07, "logits/chosen": -2.2993905544281006, "logits/rejected": -2.367767572402954, "logps/chosen": -192.41049194335938, "logps/rejected": -130.84104919433594, "loss": 0.8098, "rewards/accuracies": 0.5, "rewards/chosen": -1.0114803314208984, "rewards/margins": 0.6559673547744751, "rewards/rejected": -1.667447805404663, "step": 901 }, { "epoch": 0.1, "learning_rate": 2.7261131451517656e-07, "logits/chosen": -2.2684309482574463, "logits/rejected": -2.3545074462890625, "logps/chosen": -211.46060180664062, "logps/rejected": -216.25440979003906, "loss": 0.3949, "rewards/accuracies": 0.875, "rewards/chosen": -0.2944456934928894, "rewards/margins": 0.9296656847000122, "rewards/rejected": -1.2241114377975464, "step": 902 }, { "epoch": 0.11, "learning_rate": 2.7257588283925826e-07, "logits/chosen": -1.9035773277282715, "logits/rejected": -2.1768484115600586, "logps/chosen": -426.9383544921875, "logps/rejected": -333.42236328125, "loss": 0.4376, "rewards/accuracies": 0.75, "rewards/chosen": -0.6385980844497681, "rewards/margins": 1.7906768321990967, "rewards/rejected": -2.4292750358581543, "step": 903 }, { "epoch": 0.11, "learning_rate": 2.7254045116334e-07, "logits/chosen": -2.4897422790527344, "logits/rejected": -2.7430386543273926, "logps/chosen": -281.6868591308594, "logps/rejected": -220.74661254882812, "loss": 0.5002, "rewards/accuracies": 0.625, "rewards/chosen": -0.6289548277854919, "rewards/margins": 0.8401647210121155, "rewards/rejected": -1.4691195487976074, "step": 904 }, { "epoch": 0.11, "learning_rate": 2.725050194874217e-07, "logits/chosen": -2.539670467376709, "logits/rejected": -2.6542413234710693, "logps/chosen": -280.54193115234375, "logps/rejected": -235.5479736328125, "loss": 0.6285, "rewards/accuracies": 0.625, "rewards/chosen": -0.2756689786911011, "rewards/margins": 0.8308883905410767, "rewards/rejected": -1.1065573692321777, "step": 905 }, { "epoch": 0.11, "learning_rate": 2.724695878115035e-07, "logits/chosen": -2.2356343269348145, "logits/rejected": -2.4383907318115234, "logps/chosen": -259.6686096191406, "logps/rejected": -326.6102294921875, "loss": 0.3928, "rewards/accuracies": 0.875, "rewards/chosen": -0.917183518409729, "rewards/margins": 1.1936628818511963, "rewards/rejected": -2.110846519470215, "step": 906 }, { "epoch": 0.11, "learning_rate": 2.724341561355852e-07, "logits/chosen": -1.8490135669708252, "logits/rejected": -1.9987668991088867, "logps/chosen": -403.26812744140625, "logps/rejected": -316.74371337890625, "loss": 0.715, "rewards/accuracies": 0.5, "rewards/chosen": -1.0296697616577148, "rewards/margins": 0.22748082876205444, "rewards/rejected": -1.257150650024414, "step": 907 }, { "epoch": 0.11, "learning_rate": 2.7239872445966695e-07, "logits/chosen": -2.0095643997192383, "logits/rejected": -1.7829467058181763, "logps/chosen": -204.13865661621094, "logps/rejected": -521.1851806640625, "loss": 0.3158, "rewards/accuracies": 0.75, "rewards/chosen": -0.31645703315734863, "rewards/margins": 2.4370474815368652, "rewards/rejected": -2.753504514694214, "step": 908 }, { "epoch": 0.11, "learning_rate": 2.7236329278374864e-07, "logits/chosen": -2.5511474609375, "logits/rejected": -2.6521260738372803, "logps/chosen": -329.65252685546875, "logps/rejected": -244.117919921875, "loss": 0.9573, "rewards/accuracies": 0.375, "rewards/chosen": -1.6139683723449707, "rewards/margins": 0.01573871076107025, "rewards/rejected": -1.6297069787979126, "step": 909 }, { "epoch": 0.11, "learning_rate": 2.723278611078304e-07, "logits/chosen": -2.7357118129730225, "logits/rejected": -2.6527299880981445, "logps/chosen": -329.1422424316406, "logps/rejected": -334.13330078125, "loss": 0.418, "rewards/accuracies": 0.875, "rewards/chosen": -1.2626457214355469, "rewards/margins": 0.8937562108039856, "rewards/rejected": -2.1564018726348877, "step": 910 }, { "epoch": 0.11, "learning_rate": 2.7229242943191214e-07, "logits/chosen": -2.4607765674591064, "logits/rejected": -2.361212730407715, "logps/chosen": -276.8341064453125, "logps/rejected": -279.7308349609375, "loss": 0.2715, "rewards/accuracies": 1.0, "rewards/chosen": -0.844237208366394, "rewards/margins": 2.1750621795654297, "rewards/rejected": -3.019299268722534, "step": 911 }, { "epoch": 0.11, "learning_rate": 2.7225699775599384e-07, "logits/chosen": -2.513491630554199, "logits/rejected": -2.7593019008636475, "logps/chosen": -206.03125, "logps/rejected": -206.5583953857422, "loss": 0.5679, "rewards/accuracies": 0.625, "rewards/chosen": -0.8312869071960449, "rewards/margins": 1.5916966199874878, "rewards/rejected": -2.422983407974243, "step": 912 }, { "epoch": 0.11, "learning_rate": 2.722215660800756e-07, "logits/chosen": -2.7785422801971436, "logits/rejected": -2.6310136318206787, "logps/chosen": -87.30070495605469, "logps/rejected": -151.00238037109375, "loss": 0.4385, "rewards/accuracies": 0.75, "rewards/chosen": -0.6325681805610657, "rewards/margins": 1.2288638353347778, "rewards/rejected": -1.8614320755004883, "step": 913 }, { "epoch": 0.11, "learning_rate": 2.721861344041573e-07, "logits/chosen": -2.4290454387664795, "logits/rejected": -2.3605217933654785, "logps/chosen": -288.1297607421875, "logps/rejected": -211.72531127929688, "loss": 0.4898, "rewards/accuracies": 0.625, "rewards/chosen": -0.6617876291275024, "rewards/margins": 1.375885009765625, "rewards/rejected": -2.037672519683838, "step": 914 }, { "epoch": 0.11, "learning_rate": 2.7215070272823903e-07, "logits/chosen": -2.1356759071350098, "logits/rejected": -2.027170419692993, "logps/chosen": -127.61013793945312, "logps/rejected": -265.3847351074219, "loss": 0.323, "rewards/accuracies": 0.875, "rewards/chosen": -0.19862046837806702, "rewards/margins": 2.511618137359619, "rewards/rejected": -2.7102389335632324, "step": 915 }, { "epoch": 0.11, "learning_rate": 2.721152710523207e-07, "logits/chosen": -2.080789804458618, "logits/rejected": -2.166923761367798, "logps/chosen": -276.890625, "logps/rejected": -347.9039611816406, "loss": 0.5852, "rewards/accuracies": 0.75, "rewards/chosen": -0.17628367245197296, "rewards/margins": 0.9203159213066101, "rewards/rejected": -1.0965995788574219, "step": 916 }, { "epoch": 0.11, "learning_rate": 2.7207983937640247e-07, "logits/chosen": -2.4532315731048584, "logits/rejected": -2.565042734146118, "logps/chosen": -324.721923828125, "logps/rejected": -255.77439880371094, "loss": 0.4855, "rewards/accuracies": 0.75, "rewards/chosen": -1.2365282773971558, "rewards/margins": 0.9416915774345398, "rewards/rejected": -2.178219795227051, "step": 917 }, { "epoch": 0.11, "learning_rate": 2.720444077004842e-07, "logits/chosen": -2.923097610473633, "logits/rejected": -2.6730334758758545, "logps/chosen": -222.44705200195312, "logps/rejected": -276.507080078125, "loss": 0.3765, "rewards/accuracies": 0.75, "rewards/chosen": -0.4248666763305664, "rewards/margins": 1.8859057426452637, "rewards/rejected": -2.31077241897583, "step": 918 }, { "epoch": 0.11, "learning_rate": 2.7200897602456597e-07, "logits/chosen": -2.006549835205078, "logits/rejected": -2.166224718093872, "logps/chosen": -361.4042663574219, "logps/rejected": -331.1242980957031, "loss": 0.5611, "rewards/accuracies": 0.75, "rewards/chosen": -1.0590423345565796, "rewards/margins": 1.1534174680709839, "rewards/rejected": -2.2124595642089844, "step": 919 }, { "epoch": 0.11, "learning_rate": 2.7197354434864767e-07, "logits/chosen": -2.330815315246582, "logits/rejected": -2.3359081745147705, "logps/chosen": -220.25315856933594, "logps/rejected": -266.927001953125, "loss": 0.3108, "rewards/accuracies": 1.0, "rewards/chosen": -0.495333731174469, "rewards/margins": 1.6684662103652954, "rewards/rejected": -2.16379976272583, "step": 920 }, { "epoch": 0.11, "learning_rate": 2.719381126727294e-07, "logits/chosen": -2.5380189418792725, "logits/rejected": -2.4181535243988037, "logps/chosen": -374.0831298828125, "logps/rejected": -326.0532531738281, "loss": 0.7044, "rewards/accuracies": 0.625, "rewards/chosen": -1.318453311920166, "rewards/margins": 0.9853094816207886, "rewards/rejected": -2.303762674331665, "step": 921 }, { "epoch": 0.11, "learning_rate": 2.7190268099681116e-07, "logits/chosen": -2.4655890464782715, "logits/rejected": -2.752384901046753, "logps/chosen": -269.3939208984375, "logps/rejected": -189.465576171875, "loss": 1.3645, "rewards/accuracies": 0.75, "rewards/chosen": -2.01847505569458, "rewards/margins": 0.623408854007721, "rewards/rejected": -2.6418838500976562, "step": 922 }, { "epoch": 0.11, "learning_rate": 2.7186724932089286e-07, "logits/chosen": -2.142364263534546, "logits/rejected": -2.142500400543213, "logps/chosen": -298.6378479003906, "logps/rejected": -289.3003845214844, "loss": 0.6167, "rewards/accuracies": 0.75, "rewards/chosen": -1.1336969137191772, "rewards/margins": 1.4006930589675903, "rewards/rejected": -2.5343899726867676, "step": 923 }, { "epoch": 0.11, "learning_rate": 2.718318176449746e-07, "logits/chosen": -2.345696449279785, "logits/rejected": -2.5615217685699463, "logps/chosen": -390.5589599609375, "logps/rejected": -261.41229248046875, "loss": 0.4671, "rewards/accuracies": 0.875, "rewards/chosen": -0.32437652349472046, "rewards/margins": 0.9223254919052124, "rewards/rejected": -1.2467020750045776, "step": 924 }, { "epoch": 0.11, "learning_rate": 2.717963859690563e-07, "logits/chosen": -1.8304967880249023, "logits/rejected": -2.089359998703003, "logps/chosen": -293.8280944824219, "logps/rejected": -199.13397216796875, "loss": 0.8512, "rewards/accuracies": 0.625, "rewards/chosen": -0.7305306196212769, "rewards/margins": 0.2128722369670868, "rewards/rejected": -0.943402886390686, "step": 925 }, { "epoch": 0.11, "learning_rate": 2.7176095429313805e-07, "logits/chosen": -1.956066608428955, "logits/rejected": -1.9802474975585938, "logps/chosen": -384.04412841796875, "logps/rejected": -392.45703125, "loss": 0.2595, "rewards/accuracies": 1.0, "rewards/chosen": -0.4696843922138214, "rewards/margins": 1.5475095510482788, "rewards/rejected": -2.0171940326690674, "step": 926 }, { "epoch": 0.11, "learning_rate": 2.7172552261721975e-07, "logits/chosen": -2.0599639415740967, "logits/rejected": -2.404447078704834, "logps/chosen": -416.64996337890625, "logps/rejected": -207.01539611816406, "loss": 0.4634, "rewards/accuracies": 0.75, "rewards/chosen": -0.3243686556816101, "rewards/margins": 0.890299916267395, "rewards/rejected": -1.21466863155365, "step": 927 }, { "epoch": 0.11, "learning_rate": 2.716900909413015e-07, "logits/chosen": -2.380183696746826, "logits/rejected": -2.4667577743530273, "logps/chosen": -189.44952392578125, "logps/rejected": -177.78472900390625, "loss": 0.5511, "rewards/accuracies": 0.625, "rewards/chosen": -0.4223308265209198, "rewards/margins": 1.0884935855865479, "rewards/rejected": -1.5108243227005005, "step": 928 }, { "epoch": 0.11, "learning_rate": 2.7165465926538324e-07, "logits/chosen": -2.347574234008789, "logits/rejected": -2.580345392227173, "logps/chosen": -634.8778686523438, "logps/rejected": -458.26422119140625, "loss": 0.2894, "rewards/accuracies": 0.875, "rewards/chosen": 0.12248080968856812, "rewards/margins": 1.4319700002670288, "rewards/rejected": -1.3094892501831055, "step": 929 }, { "epoch": 0.11, "learning_rate": 2.71619227589465e-07, "logits/chosen": -2.146653175354004, "logits/rejected": -2.1664810180664062, "logps/chosen": -228.718505859375, "logps/rejected": -201.86090087890625, "loss": 0.4006, "rewards/accuracies": 0.875, "rewards/chosen": -0.27546143531799316, "rewards/margins": 0.8638690114021301, "rewards/rejected": -1.1393303871154785, "step": 930 }, { "epoch": 0.11, "learning_rate": 2.715837959135467e-07, "logits/chosen": -2.17645001411438, "logits/rejected": -2.2205986976623535, "logps/chosen": -381.80706787109375, "logps/rejected": -393.1808776855469, "loss": 0.5834, "rewards/accuracies": 0.5, "rewards/chosen": -0.38829800486564636, "rewards/margins": 0.7978224754333496, "rewards/rejected": -1.1861203908920288, "step": 931 }, { "epoch": 0.11, "learning_rate": 2.7154836423762844e-07, "logits/chosen": -1.8894619941711426, "logits/rejected": -2.2668843269348145, "logps/chosen": -337.0008239746094, "logps/rejected": -232.4938201904297, "loss": 0.4556, "rewards/accuracies": 0.875, "rewards/chosen": -0.6895352005958557, "rewards/margins": 1.438370704650879, "rewards/rejected": -2.127906084060669, "step": 932 }, { "epoch": 0.11, "learning_rate": 2.715129325617102e-07, "logits/chosen": -1.7128411531448364, "logits/rejected": -1.767407774925232, "logps/chosen": -292.68756103515625, "logps/rejected": -299.2854919433594, "loss": 0.2923, "rewards/accuracies": 0.875, "rewards/chosen": -0.484789103269577, "rewards/margins": 1.4878734350204468, "rewards/rejected": -1.9726624488830566, "step": 933 }, { "epoch": 0.11, "learning_rate": 2.714775008857919e-07, "logits/chosen": -2.296245813369751, "logits/rejected": -2.5920512676239014, "logps/chosen": -267.03961181640625, "logps/rejected": -178.40972900390625, "loss": 0.6542, "rewards/accuracies": 0.625, "rewards/chosen": -1.3996336460113525, "rewards/margins": 0.6982619762420654, "rewards/rejected": -2.097895622253418, "step": 934 }, { "epoch": 0.11, "learning_rate": 2.7144206920987363e-07, "logits/chosen": -2.285618305206299, "logits/rejected": -2.1237363815307617, "logps/chosen": -216.46176147460938, "logps/rejected": -332.8358459472656, "loss": 0.3169, "rewards/accuracies": 0.875, "rewards/chosen": -0.8936880826950073, "rewards/margins": 1.8163669109344482, "rewards/rejected": -2.710054874420166, "step": 935 }, { "epoch": 0.11, "learning_rate": 2.714066375339553e-07, "logits/chosen": -2.8034615516662598, "logits/rejected": -2.819854974746704, "logps/chosen": -253.01705932617188, "logps/rejected": -274.55865478515625, "loss": 0.3811, "rewards/accuracies": 0.875, "rewards/chosen": -1.0268718004226685, "rewards/margins": 1.480276107788086, "rewards/rejected": -2.507148027420044, "step": 936 }, { "epoch": 0.11, "learning_rate": 2.713712058580371e-07, "logits/chosen": -2.437837600708008, "logits/rejected": -2.415449380874634, "logps/chosen": -219.99838256835938, "logps/rejected": -153.76742553710938, "loss": 1.5873, "rewards/accuracies": 0.5, "rewards/chosen": -1.936269998550415, "rewards/margins": 0.11902236938476562, "rewards/rejected": -2.0552923679351807, "step": 937 }, { "epoch": 0.11, "learning_rate": 2.7133577418211877e-07, "logits/chosen": -2.644559621810913, "logits/rejected": -2.476400375366211, "logps/chosen": -291.0596923828125, "logps/rejected": -208.57508850097656, "loss": 0.316, "rewards/accuracies": 0.875, "rewards/chosen": -0.03176143765449524, "rewards/margins": 1.3453729152679443, "rewards/rejected": -1.3771344423294067, "step": 938 }, { "epoch": 0.11, "learning_rate": 2.713003425062005e-07, "logits/chosen": -2.1710925102233887, "logits/rejected": -2.2746121883392334, "logps/chosen": -306.44354248046875, "logps/rejected": -310.90045166015625, "loss": 0.4897, "rewards/accuracies": 0.625, "rewards/chosen": -0.9752366542816162, "rewards/margins": 1.3312605619430542, "rewards/rejected": -2.306497097015381, "step": 939 }, { "epoch": 0.11, "learning_rate": 2.7126491083028227e-07, "logits/chosen": -1.9625437259674072, "logits/rejected": -1.939466118812561, "logps/chosen": -254.60308837890625, "logps/rejected": -293.1266174316406, "loss": 0.7012, "rewards/accuracies": 0.875, "rewards/chosen": -0.6357609033584595, "rewards/margins": 0.7906701564788818, "rewards/rejected": -1.4264310598373413, "step": 940 }, { "epoch": 0.11, "learning_rate": 2.71229479154364e-07, "logits/chosen": -2.2355852127075195, "logits/rejected": -2.006657600402832, "logps/chosen": -484.8934631347656, "logps/rejected": -449.8263854980469, "loss": 0.4179, "rewards/accuracies": 0.875, "rewards/chosen": -0.08952587842941284, "rewards/margins": 1.2084903717041016, "rewards/rejected": -1.2980163097381592, "step": 941 }, { "epoch": 0.11, "learning_rate": 2.711940474784457e-07, "logits/chosen": -2.3699445724487305, "logits/rejected": -2.009874105453491, "logps/chosen": -201.49783325195312, "logps/rejected": -425.10455322265625, "loss": 0.4446, "rewards/accuracies": 0.625, "rewards/chosen": -0.6293970346450806, "rewards/margins": 2.5021114349365234, "rewards/rejected": -3.1315083503723145, "step": 942 }, { "epoch": 0.11, "learning_rate": 2.7115861580252746e-07, "logits/chosen": -1.9901020526885986, "logits/rejected": -2.2917094230651855, "logps/chosen": -305.6833801269531, "logps/rejected": -242.10855102539062, "loss": 0.2968, "rewards/accuracies": 0.75, "rewards/chosen": -0.5023388266563416, "rewards/margins": 2.0069451332092285, "rewards/rejected": -2.5092835426330566, "step": 943 }, { "epoch": 0.11, "learning_rate": 2.711231841266092e-07, "logits/chosen": -2.687199115753174, "logits/rejected": -2.5252292156219482, "logps/chosen": -141.91226196289062, "logps/rejected": -162.43707275390625, "loss": 0.2374, "rewards/accuracies": 1.0, "rewards/chosen": -0.5231041312217712, "rewards/margins": 1.6486130952835083, "rewards/rejected": -2.1717171669006348, "step": 944 }, { "epoch": 0.11, "learning_rate": 2.710877524506909e-07, "logits/chosen": -2.9363417625427246, "logits/rejected": -2.9411022663116455, "logps/chosen": -120.79934692382812, "logps/rejected": -147.89247131347656, "loss": 0.5199, "rewards/accuracies": 0.625, "rewards/chosen": -0.899508535861969, "rewards/margins": 1.201124906539917, "rewards/rejected": -2.1006336212158203, "step": 945 }, { "epoch": 0.11, "learning_rate": 2.7105232077477265e-07, "logits/chosen": -2.233623504638672, "logits/rejected": -2.2427022457122803, "logps/chosen": -499.53350830078125, "logps/rejected": -490.29248046875, "loss": 0.6722, "rewards/accuracies": 0.75, "rewards/chosen": -0.325993150472641, "rewards/margins": 1.074951410293579, "rewards/rejected": -1.400944709777832, "step": 946 }, { "epoch": 0.11, "learning_rate": 2.7101688909885435e-07, "logits/chosen": -2.242744207382202, "logits/rejected": -2.5705549716949463, "logps/chosen": -234.45559692382812, "logps/rejected": -181.55722045898438, "loss": 0.7848, "rewards/accuracies": 0.5, "rewards/chosen": -1.1378593444824219, "rewards/margins": 0.6810678839683533, "rewards/rejected": -1.81892728805542, "step": 947 }, { "epoch": 0.11, "learning_rate": 2.709814574229361e-07, "logits/chosen": -2.67470121383667, "logits/rejected": -2.6241462230682373, "logps/chosen": -463.63232421875, "logps/rejected": -439.31268310546875, "loss": 1.0878, "rewards/accuracies": 0.625, "rewards/chosen": -1.0071697235107422, "rewards/margins": 0.46164369583129883, "rewards/rejected": -1.4688133001327515, "step": 948 }, { "epoch": 0.11, "learning_rate": 2.709460257470178e-07, "logits/chosen": -2.4138174057006836, "logits/rejected": -2.3026328086853027, "logps/chosen": -229.15750122070312, "logps/rejected": -237.95738220214844, "loss": 0.3924, "rewards/accuracies": 0.875, "rewards/chosen": -0.7025728225708008, "rewards/margins": 1.0323164463043213, "rewards/rejected": -1.734889268875122, "step": 949 }, { "epoch": 0.11, "learning_rate": 2.7091059407109954e-07, "logits/chosen": -2.178194284439087, "logits/rejected": -2.4063143730163574, "logps/chosen": -236.43006896972656, "logps/rejected": -213.5054473876953, "loss": 0.7791, "rewards/accuracies": 0.625, "rewards/chosen": -0.8494874238967896, "rewards/margins": 1.5621049404144287, "rewards/rejected": -2.411592483520508, "step": 950 }, { "epoch": 0.11, "learning_rate": 2.708751623951813e-07, "logits/chosen": -2.0526390075683594, "logits/rejected": -1.946610927581787, "logps/chosen": -358.041015625, "logps/rejected": -354.4357604980469, "loss": 0.3057, "rewards/accuracies": 0.75, "rewards/chosen": -0.1416274905204773, "rewards/margins": 1.560659408569336, "rewards/rejected": -1.702286958694458, "step": 951 }, { "epoch": 0.11, "learning_rate": 2.70839730719263e-07, "logits/chosen": -2.665771722793579, "logits/rejected": -2.5515379905700684, "logps/chosen": -162.84962463378906, "logps/rejected": -346.3074645996094, "loss": 0.2483, "rewards/accuracies": 1.0, "rewards/chosen": -0.45774391293525696, "rewards/margins": 2.006432056427002, "rewards/rejected": -2.4641759395599365, "step": 952 }, { "epoch": 0.11, "learning_rate": 2.7080429904334473e-07, "logits/chosen": -2.61605167388916, "logits/rejected": -2.34519100189209, "logps/chosen": -188.31442260742188, "logps/rejected": -218.47964477539062, "loss": 0.2548, "rewards/accuracies": 1.0, "rewards/chosen": -0.6505228281021118, "rewards/margins": 1.6718947887420654, "rewards/rejected": -2.322417736053467, "step": 953 }, { "epoch": 0.11, "learning_rate": 2.707688673674265e-07, "logits/chosen": -2.5794734954833984, "logits/rejected": -2.619561195373535, "logps/chosen": -204.60955810546875, "logps/rejected": -218.64749145507812, "loss": 0.388, "rewards/accuracies": 0.875, "rewards/chosen": -1.6478289365768433, "rewards/margins": 2.849297523498535, "rewards/rejected": -4.497126579284668, "step": 954 }, { "epoch": 0.11, "learning_rate": 2.7073343569150823e-07, "logits/chosen": -2.30175518989563, "logits/rejected": -2.4560694694519043, "logps/chosen": -232.8940887451172, "logps/rejected": -266.4120788574219, "loss": 0.7766, "rewards/accuracies": 0.875, "rewards/chosen": -1.102486252784729, "rewards/margins": 0.4204743802547455, "rewards/rejected": -1.5229606628417969, "step": 955 }, { "epoch": 0.11, "learning_rate": 2.7069800401558993e-07, "logits/chosen": -2.271836280822754, "logits/rejected": -2.19091534614563, "logps/chosen": -247.7337646484375, "logps/rejected": -225.79930114746094, "loss": 0.5345, "rewards/accuracies": 0.5, "rewards/chosen": -0.7941255569458008, "rewards/margins": 1.5109639167785645, "rewards/rejected": -2.3050894737243652, "step": 956 }, { "epoch": 0.11, "learning_rate": 2.706625723396717e-07, "logits/chosen": -2.5408670902252197, "logits/rejected": -2.4554221630096436, "logps/chosen": -219.6632080078125, "logps/rejected": -307.2583923339844, "loss": 0.5009, "rewards/accuracies": 0.625, "rewards/chosen": -1.2068538665771484, "rewards/margins": 0.8313860893249512, "rewards/rejected": -2.0382399559020996, "step": 957 }, { "epoch": 0.11, "learning_rate": 2.7062714066375337e-07, "logits/chosen": -2.969264507293701, "logits/rejected": -2.9796438217163086, "logps/chosen": -209.68667602539062, "logps/rejected": -164.04110717773438, "loss": 0.3116, "rewards/accuracies": 1.0, "rewards/chosen": -0.14447641372680664, "rewards/margins": 1.472083330154419, "rewards/rejected": -1.6165597438812256, "step": 958 }, { "epoch": 0.11, "learning_rate": 2.705917089878351e-07, "logits/chosen": -2.173837423324585, "logits/rejected": -2.0854241847991943, "logps/chosen": -242.50711059570312, "logps/rejected": -284.5107421875, "loss": 0.5917, "rewards/accuracies": 0.625, "rewards/chosen": -0.029210835695266724, "rewards/margins": 0.5560863614082336, "rewards/rejected": -0.5852972269058228, "step": 959 }, { "epoch": 0.11, "learning_rate": 2.705562773119168e-07, "logits/chosen": -1.9699152708053589, "logits/rejected": -2.205106258392334, "logps/chosen": -260.61328125, "logps/rejected": -171.63856506347656, "loss": 0.3393, "rewards/accuracies": 1.0, "rewards/chosen": -0.49491944909095764, "rewards/margins": 1.6287928819656372, "rewards/rejected": -2.1237123012542725, "step": 960 }, { "epoch": 0.11, "learning_rate": 2.7052084563599856e-07, "logits/chosen": -2.141350030899048, "logits/rejected": -2.1041855812072754, "logps/chosen": -121.7099609375, "logps/rejected": -119.96987915039062, "loss": 0.6142, "rewards/accuracies": 0.625, "rewards/chosen": -0.31868740916252136, "rewards/margins": 0.376986563205719, "rewards/rejected": -0.6956740617752075, "step": 961 }, { "epoch": 0.11, "learning_rate": 2.704854139600803e-07, "logits/chosen": -2.2591631412506104, "logits/rejected": -2.1635005474090576, "logps/chosen": -190.437744140625, "logps/rejected": -407.61016845703125, "loss": 0.1809, "rewards/accuracies": 1.0, "rewards/chosen": -0.3189304769039154, "rewards/margins": 2.40791392326355, "rewards/rejected": -2.726844549179077, "step": 962 }, { "epoch": 0.11, "learning_rate": 2.70449982284162e-07, "logits/chosen": -2.6043293476104736, "logits/rejected": -2.440664052963257, "logps/chosen": -274.945556640625, "logps/rejected": -217.32041931152344, "loss": 0.57, "rewards/accuracies": 0.75, "rewards/chosen": -0.012274503707885742, "rewards/margins": 1.777383804321289, "rewards/rejected": -1.7896581888198853, "step": 963 }, { "epoch": 0.11, "learning_rate": 2.7041455060824376e-07, "logits/chosen": -2.8405799865722656, "logits/rejected": -2.9440767765045166, "logps/chosen": -299.4005432128906, "logps/rejected": -284.3367919921875, "loss": 0.6998, "rewards/accuracies": 0.625, "rewards/chosen": -1.0614229440689087, "rewards/margins": 1.359285831451416, "rewards/rejected": -2.4207088947296143, "step": 964 }, { "epoch": 0.11, "learning_rate": 2.703791189323255e-07, "logits/chosen": -1.6438281536102295, "logits/rejected": -2.4547836780548096, "logps/chosen": -456.79559326171875, "logps/rejected": -259.07952880859375, "loss": 1.0071, "rewards/accuracies": 0.375, "rewards/chosen": -2.0017545223236084, "rewards/margins": -0.24269232153892517, "rewards/rejected": -1.7590621709823608, "step": 965 }, { "epoch": 0.11, "learning_rate": 2.703436872564072e-07, "logits/chosen": -2.45007586479187, "logits/rejected": -2.6021673679351807, "logps/chosen": -432.3465881347656, "logps/rejected": -266.842529296875, "loss": 0.3269, "rewards/accuracies": 0.875, "rewards/chosen": -0.18648281693458557, "rewards/margins": 1.6305298805236816, "rewards/rejected": -1.8170127868652344, "step": 966 }, { "epoch": 0.11, "learning_rate": 2.7030825558048895e-07, "logits/chosen": -1.9774234294891357, "logits/rejected": -1.9750330448150635, "logps/chosen": -271.7263488769531, "logps/rejected": -239.47695922851562, "loss": 0.6459, "rewards/accuracies": 0.75, "rewards/chosen": -0.275549978017807, "rewards/margins": 0.46789300441741943, "rewards/rejected": -0.7434430122375488, "step": 967 }, { "epoch": 0.11, "learning_rate": 2.702728239045707e-07, "logits/chosen": -2.7427351474761963, "logits/rejected": -2.6258456707000732, "logps/chosen": -211.6383056640625, "logps/rejected": -285.5758056640625, "loss": 0.534, "rewards/accuracies": 0.75, "rewards/chosen": -1.0184810161590576, "rewards/margins": 1.6231517791748047, "rewards/rejected": -2.641632556915283, "step": 968 }, { "epoch": 0.11, "learning_rate": 2.702373922286524e-07, "logits/chosen": -2.216651678085327, "logits/rejected": -1.9870820045471191, "logps/chosen": -199.74261474609375, "logps/rejected": -292.4718017578125, "loss": 0.3559, "rewards/accuracies": 0.875, "rewards/chosen": -0.20429731905460358, "rewards/margins": 1.5516011714935303, "rewards/rejected": -1.7558984756469727, "step": 969 }, { "epoch": 0.11, "learning_rate": 2.7020196055273414e-07, "logits/chosen": -2.504601001739502, "logits/rejected": -2.307323932647705, "logps/chosen": -358.5874938964844, "logps/rejected": -243.50167846679688, "loss": 0.654, "rewards/accuracies": 0.75, "rewards/chosen": -1.0002554655075073, "rewards/margins": 0.807239294052124, "rewards/rejected": -1.8074947595596313, "step": 970 }, { "epoch": 0.11, "learning_rate": 2.7016652887681584e-07, "logits/chosen": -1.9807053804397583, "logits/rejected": -2.2712152004241943, "logps/chosen": -267.39324951171875, "logps/rejected": -222.39932250976562, "loss": 0.3321, "rewards/accuracies": 0.75, "rewards/chosen": -0.7652286291122437, "rewards/margins": 1.6488046646118164, "rewards/rejected": -2.4140334129333496, "step": 971 }, { "epoch": 0.11, "learning_rate": 2.701310972008976e-07, "logits/chosen": -2.443753242492676, "logits/rejected": -2.135472059249878, "logps/chosen": -163.60165405273438, "logps/rejected": -246.2024688720703, "loss": 0.558, "rewards/accuracies": 0.625, "rewards/chosen": -1.2948487997055054, "rewards/margins": 1.2444394826889038, "rewards/rejected": -2.53928804397583, "step": 972 }, { "epoch": 0.11, "learning_rate": 2.7009566552497934e-07, "logits/chosen": -1.8400704860687256, "logits/rejected": -2.1991758346557617, "logps/chosen": -358.5091552734375, "logps/rejected": -187.8582763671875, "loss": 0.4122, "rewards/accuracies": 0.875, "rewards/chosen": -1.047920823097229, "rewards/margins": 1.0896859169006348, "rewards/rejected": -2.137606620788574, "step": 973 }, { "epoch": 0.11, "learning_rate": 2.7006023384906103e-07, "logits/chosen": -2.9017271995544434, "logits/rejected": -2.67814564704895, "logps/chosen": -204.1541748046875, "logps/rejected": -173.2073516845703, "loss": 0.307, "rewards/accuracies": 1.0, "rewards/chosen": -0.09796231985092163, "rewards/margins": 1.3070521354675293, "rewards/rejected": -1.4050145149230957, "step": 974 }, { "epoch": 0.11, "learning_rate": 2.700248021731428e-07, "logits/chosen": -2.377932548522949, "logits/rejected": -2.4133005142211914, "logps/chosen": -431.3186340332031, "logps/rejected": -349.1793518066406, "loss": 0.3346, "rewards/accuracies": 0.875, "rewards/chosen": -0.6560257077217102, "rewards/margins": 1.3788739442825317, "rewards/rejected": -2.0348997116088867, "step": 975 }, { "epoch": 0.11, "learning_rate": 2.6998937049722453e-07, "logits/chosen": -1.9924702644348145, "logits/rejected": -2.17643666267395, "logps/chosen": -231.34219360351562, "logps/rejected": -207.58334350585938, "loss": 0.788, "rewards/accuracies": 0.5, "rewards/chosen": -0.9338446855545044, "rewards/margins": 0.06547223776578903, "rewards/rejected": -0.999316930770874, "step": 976 }, { "epoch": 0.11, "learning_rate": 2.699539388213062e-07, "logits/chosen": -2.583069086074829, "logits/rejected": -2.2863717079162598, "logps/chosen": -109.478515625, "logps/rejected": -222.06886291503906, "loss": 0.4769, "rewards/accuracies": 0.75, "rewards/chosen": -0.13123570382595062, "rewards/margins": 1.020374059677124, "rewards/rejected": -1.1516097784042358, "step": 977 }, { "epoch": 0.11, "learning_rate": 2.6991850714538797e-07, "logits/chosen": -2.516909599304199, "logits/rejected": -2.1172170639038086, "logps/chosen": -280.1630859375, "logps/rejected": -259.759033203125, "loss": 0.2998, "rewards/accuracies": 0.875, "rewards/chosen": -0.5964169502258301, "rewards/margins": 2.2446861267089844, "rewards/rejected": -2.8411030769348145, "step": 978 }, { "epoch": 0.11, "learning_rate": 2.698830754694697e-07, "logits/chosen": -2.264089584350586, "logits/rejected": -2.491731643676758, "logps/chosen": -151.08004760742188, "logps/rejected": -157.851806640625, "loss": 0.8215, "rewards/accuracies": 0.75, "rewards/chosen": -1.1391971111297607, "rewards/margins": 0.8416120409965515, "rewards/rejected": -1.980809211730957, "step": 979 }, { "epoch": 0.11, "learning_rate": 2.698476437935514e-07, "logits/chosen": -2.520704984664917, "logits/rejected": -2.6834716796875, "logps/chosen": -140.41580200195312, "logps/rejected": -296.4446105957031, "loss": 0.4611, "rewards/accuracies": 0.875, "rewards/chosen": -0.32723724842071533, "rewards/margins": 1.9960947036743164, "rewards/rejected": -2.3233320713043213, "step": 980 }, { "epoch": 0.11, "learning_rate": 2.6981221211763317e-07, "logits/chosen": -2.4870009422302246, "logits/rejected": -2.4610238075256348, "logps/chosen": -150.78016662597656, "logps/rejected": -143.23858642578125, "loss": 0.5831, "rewards/accuracies": 0.625, "rewards/chosen": -0.6107122898101807, "rewards/margins": 0.4325818121433258, "rewards/rejected": -1.043294072151184, "step": 981 }, { "epoch": 0.11, "learning_rate": 2.6977678044171486e-07, "logits/chosen": -2.203237295150757, "logits/rejected": -2.2119123935699463, "logps/chosen": -209.34475708007812, "logps/rejected": -263.0777282714844, "loss": 0.6946, "rewards/accuracies": 0.75, "rewards/chosen": -2.53007173538208, "rewards/margins": 0.9476156234741211, "rewards/rejected": -3.477687358856201, "step": 982 }, { "epoch": 0.11, "learning_rate": 2.697413487657966e-07, "logits/chosen": -2.299835681915283, "logits/rejected": -2.275193929672241, "logps/chosen": -279.0255126953125, "logps/rejected": -283.16473388671875, "loss": 0.2407, "rewards/accuracies": 0.875, "rewards/chosen": -0.01750333607196808, "rewards/margins": 1.699542760848999, "rewards/rejected": -1.7170461416244507, "step": 983 }, { "epoch": 0.11, "learning_rate": 2.6970591708987836e-07, "logits/chosen": -2.0187652111053467, "logits/rejected": -1.912190318107605, "logps/chosen": -181.69741821289062, "logps/rejected": -294.7813720703125, "loss": 0.6848, "rewards/accuracies": 0.875, "rewards/chosen": -0.6780277490615845, "rewards/margins": 1.2629801034927368, "rewards/rejected": -1.9410078525543213, "step": 984 }, { "epoch": 0.11, "learning_rate": 2.6967048541396005e-07, "logits/chosen": -2.339277982711792, "logits/rejected": -2.1288444995880127, "logps/chosen": -295.7228088378906, "logps/rejected": -318.90911865234375, "loss": 0.4986, "rewards/accuracies": 0.75, "rewards/chosen": -0.5251777172088623, "rewards/margins": 0.8384890556335449, "rewards/rejected": -1.3636667728424072, "step": 985 }, { "epoch": 0.11, "learning_rate": 2.696350537380418e-07, "logits/chosen": -2.2033209800720215, "logits/rejected": -2.2501397132873535, "logps/chosen": -239.3297119140625, "logps/rejected": -297.31036376953125, "loss": 0.2424, "rewards/accuracies": 0.875, "rewards/chosen": -0.27753037214279175, "rewards/margins": 2.3806142807006836, "rewards/rejected": -2.65814471244812, "step": 986 }, { "epoch": 0.11, "learning_rate": 2.695996220621235e-07, "logits/chosen": -2.5718889236450195, "logits/rejected": -2.5732922554016113, "logps/chosen": -230.68995666503906, "logps/rejected": -224.53616333007812, "loss": 0.4789, "rewards/accuracies": 0.625, "rewards/chosen": -0.7810390591621399, "rewards/margins": 1.1940538883209229, "rewards/rejected": -1.975092887878418, "step": 987 }, { "epoch": 0.11, "learning_rate": 2.6956419038620525e-07, "logits/chosen": -2.2265970706939697, "logits/rejected": -2.504373550415039, "logps/chosen": -196.70303344726562, "logps/rejected": -146.44589233398438, "loss": 2.0652, "rewards/accuracies": 0.625, "rewards/chosen": -2.9207096099853516, "rewards/margins": -1.3861323595046997, "rewards/rejected": -1.5345770120620728, "step": 988 }, { "epoch": 0.12, "learning_rate": 2.69528758710287e-07, "logits/chosen": -1.5918678045272827, "logits/rejected": -2.1438658237457275, "logps/chosen": -523.1455078125, "logps/rejected": -281.2669372558594, "loss": 0.3936, "rewards/accuracies": 0.75, "rewards/chosen": -0.9514124393463135, "rewards/margins": 1.2430256605148315, "rewards/rejected": -2.1944379806518555, "step": 989 }, { "epoch": 0.12, "learning_rate": 2.6949332703436874e-07, "logits/chosen": -1.6903914213180542, "logits/rejected": -1.9921557903289795, "logps/chosen": -478.1346740722656, "logps/rejected": -284.3717956542969, "loss": 0.5703, "rewards/accuracies": 0.75, "rewards/chosen": -0.05689529329538345, "rewards/margins": 1.129807472229004, "rewards/rejected": -1.1867027282714844, "step": 990 }, { "epoch": 0.12, "learning_rate": 2.6945789535845044e-07, "logits/chosen": -2.220155954360962, "logits/rejected": -2.5502142906188965, "logps/chosen": -272.29620361328125, "logps/rejected": -194.90628051757812, "loss": 0.8725, "rewards/accuracies": 0.75, "rewards/chosen": -1.4292720556259155, "rewards/margins": 0.4773487448692322, "rewards/rejected": -1.906620740890503, "step": 991 }, { "epoch": 0.12, "learning_rate": 2.694224636825322e-07, "logits/chosen": -2.482943058013916, "logits/rejected": -2.5690479278564453, "logps/chosen": -251.05987548828125, "logps/rejected": -261.6587829589844, "loss": 0.3977, "rewards/accuracies": 0.875, "rewards/chosen": -0.8658809661865234, "rewards/margins": 2.8298652172088623, "rewards/rejected": -3.695746421813965, "step": 992 }, { "epoch": 0.12, "learning_rate": 2.693870320066139e-07, "logits/chosen": -1.9928791522979736, "logits/rejected": -2.0464701652526855, "logps/chosen": -347.8475341796875, "logps/rejected": -310.4609680175781, "loss": 0.7428, "rewards/accuracies": 0.625, "rewards/chosen": -0.81305992603302, "rewards/margins": 1.2728923559188843, "rewards/rejected": -2.0859522819519043, "step": 993 }, { "epoch": 0.12, "learning_rate": 2.6935160033069563e-07, "logits/chosen": -2.233989953994751, "logits/rejected": -2.1681997776031494, "logps/chosen": -211.832275390625, "logps/rejected": -230.78082275390625, "loss": 0.6408, "rewards/accuracies": 0.75, "rewards/chosen": -0.4697113633155823, "rewards/margins": 0.8247967958450317, "rewards/rejected": -1.2945082187652588, "step": 994 }, { "epoch": 0.12, "learning_rate": 2.6931616865477733e-07, "logits/chosen": -2.1802260875701904, "logits/rejected": -2.216493606567383, "logps/chosen": -331.4581298828125, "logps/rejected": -282.35430908203125, "loss": 0.7249, "rewards/accuracies": 0.625, "rewards/chosen": -0.9356716275215149, "rewards/margins": 1.0254273414611816, "rewards/rejected": -1.9610990285873413, "step": 995 }, { "epoch": 0.12, "learning_rate": 2.692807369788591e-07, "logits/chosen": -2.2380433082580566, "logits/rejected": -2.4743010997772217, "logps/chosen": -450.3731384277344, "logps/rejected": -299.612060546875, "loss": 0.5604, "rewards/accuracies": 0.875, "rewards/chosen": -0.2313624918460846, "rewards/margins": 1.202096939086914, "rewards/rejected": -1.4334595203399658, "step": 996 }, { "epoch": 0.12, "learning_rate": 2.692453053029408e-07, "logits/chosen": -2.113428831100464, "logits/rejected": -2.360644578933716, "logps/chosen": -424.91766357421875, "logps/rejected": -318.7708740234375, "loss": 0.2216, "rewards/accuracies": 0.875, "rewards/chosen": -0.5906178951263428, "rewards/margins": 2.0988833904266357, "rewards/rejected": -2.6895012855529785, "step": 997 }, { "epoch": 0.12, "learning_rate": 2.692098736270225e-07, "logits/chosen": -2.3517794609069824, "logits/rejected": -2.319622278213501, "logps/chosen": -251.69647216796875, "logps/rejected": -279.42230224609375, "loss": 0.5412, "rewards/accuracies": 0.625, "rewards/chosen": -0.7958537340164185, "rewards/margins": 1.0292994976043701, "rewards/rejected": -1.8251532316207886, "step": 998 }, { "epoch": 0.12, "learning_rate": 2.6917444195110427e-07, "logits/chosen": -2.1526498794555664, "logits/rejected": -2.3987255096435547, "logps/chosen": -369.7178649902344, "logps/rejected": -444.3445129394531, "loss": 0.3053, "rewards/accuracies": 0.875, "rewards/chosen": -0.5917199850082397, "rewards/margins": 2.2089836597442627, "rewards/rejected": -2.800703525543213, "step": 999 }, { "epoch": 0.12, "learning_rate": 2.69139010275186e-07, "logits/chosen": -2.566873550415039, "logits/rejected": -2.574442148208618, "logps/chosen": -281.74371337890625, "logps/rejected": -290.25799560546875, "loss": 1.1318, "rewards/accuracies": 0.75, "rewards/chosen": -1.5471998453140259, "rewards/margins": 1.3452056646347046, "rewards/rejected": -2.8924055099487305, "step": 1000 }, { "epoch": 0.12, "eval_logits/chosen": -1.7510888576507568, "eval_logits/rejected": -1.749880075454712, "eval_logps/chosen": -275.8467712402344, "eval_logps/rejected": -268.0012512207031, "eval_loss": 0.4248879849910736, "eval_rewards/accuracies": 0.806034505367279, "eval_rewards/chosen": -0.36813417077064514, "eval_rewards/margins": 1.331338882446289, "eval_rewards/rejected": -1.6994729042053223, "eval_runtime": 237.7213, "eval_samples_per_second": 2.924, "eval_steps_per_second": 1.464, "step": 1000 }, { "epoch": 0.12, "learning_rate": 2.6910357859926777e-07, "logits/chosen": -2.8470873832702637, "logits/rejected": -2.9010086059570312, "logps/chosen": -251.39373779296875, "logps/rejected": -336.2052917480469, "loss": 0.2039, "rewards/accuracies": 0.875, "rewards/chosen": -0.050964951515197754, "rewards/margins": 2.9765784740448, "rewards/rejected": -3.027543306350708, "step": 1001 }, { "epoch": 0.12, "learning_rate": 2.6906814692334946e-07, "logits/chosen": -2.1684656143188477, "logits/rejected": -2.3903608322143555, "logps/chosen": -372.3252258300781, "logps/rejected": -221.97869873046875, "loss": 0.4121, "rewards/accuracies": 0.875, "rewards/chosen": -0.589804470539093, "rewards/margins": 0.8823971748352051, "rewards/rejected": -1.4722017049789429, "step": 1002 }, { "epoch": 0.12, "learning_rate": 2.690327152474312e-07, "logits/chosen": -1.3848621845245361, "logits/rejected": -1.6030499935150146, "logps/chosen": -482.38446044921875, "logps/rejected": -601.138916015625, "loss": 0.1853, "rewards/accuracies": 0.875, "rewards/chosen": -0.5383899807929993, "rewards/margins": 3.0925827026367188, "rewards/rejected": -3.6309728622436523, "step": 1003 }, { "epoch": 0.12, "learning_rate": 2.689972835715129e-07, "logits/chosen": -2.1628260612487793, "logits/rejected": -2.359450101852417, "logps/chosen": -231.13623046875, "logps/rejected": -262.7522277832031, "loss": 0.4516, "rewards/accuracies": 0.625, "rewards/chosen": -0.704407274723053, "rewards/margins": 2.1793715953826904, "rewards/rejected": -2.8837790489196777, "step": 1004 }, { "epoch": 0.12, "learning_rate": 2.6896185189559466e-07, "logits/chosen": -2.8480520248413086, "logits/rejected": -2.732140064239502, "logps/chosen": -137.153564453125, "logps/rejected": -176.18075561523438, "loss": 0.54, "rewards/accuracies": 0.75, "rewards/chosen": -1.045426845550537, "rewards/margins": 1.170914888381958, "rewards/rejected": -2.216341972351074, "step": 1005 }, { "epoch": 0.12, "learning_rate": 2.6892642021967635e-07, "logits/chosen": -2.7391953468322754, "logits/rejected": -2.872476577758789, "logps/chosen": -199.84474182128906, "logps/rejected": -214.40982055664062, "loss": 0.7541, "rewards/accuracies": 0.5, "rewards/chosen": -0.40230244398117065, "rewards/margins": 0.21437223255634308, "rewards/rejected": -0.6166746616363525, "step": 1006 }, { "epoch": 0.12, "learning_rate": 2.688909885437581e-07, "logits/chosen": -2.429762840270996, "logits/rejected": -2.5749754905700684, "logps/chosen": -204.0440673828125, "logps/rejected": -200.61854553222656, "loss": 0.7508, "rewards/accuracies": 0.625, "rewards/chosen": -0.7839182615280151, "rewards/margins": 0.5106223821640015, "rewards/rejected": -1.2945406436920166, "step": 1007 }, { "epoch": 0.12, "learning_rate": 2.6885555686783985e-07, "logits/chosen": -2.6658272743225098, "logits/rejected": -2.7179317474365234, "logps/chosen": -259.44232177734375, "logps/rejected": -241.65744018554688, "loss": 0.1792, "rewards/accuracies": 1.0, "rewards/chosen": 0.025270909070968628, "rewards/margins": 1.8805454969406128, "rewards/rejected": -1.8552747964859009, "step": 1008 }, { "epoch": 0.12, "learning_rate": 2.6882012519192154e-07, "logits/chosen": -2.2947378158569336, "logits/rejected": -2.145418643951416, "logps/chosen": -282.08453369140625, "logps/rejected": -368.7288818359375, "loss": 0.3944, "rewards/accuracies": 0.75, "rewards/chosen": -0.4319809079170227, "rewards/margins": 1.8353404998779297, "rewards/rejected": -2.2673215866088867, "step": 1009 }, { "epoch": 0.12, "learning_rate": 2.687846935160033e-07, "logits/chosen": -2.488420248031616, "logits/rejected": -2.3148677349090576, "logps/chosen": -183.70462036132812, "logps/rejected": -190.0177001953125, "loss": 0.949, "rewards/accuracies": 0.75, "rewards/chosen": -1.1047427654266357, "rewards/margins": 0.6015448570251465, "rewards/rejected": -1.7062877416610718, "step": 1010 }, { "epoch": 0.12, "learning_rate": 2.6874926184008504e-07, "logits/chosen": -2.5630640983581543, "logits/rejected": -2.443657159805298, "logps/chosen": -260.3871154785156, "logps/rejected": -161.87973022460938, "loss": 0.4285, "rewards/accuracies": 0.75, "rewards/chosen": -0.8149291276931763, "rewards/margins": 1.055453896522522, "rewards/rejected": -1.8703830242156982, "step": 1011 }, { "epoch": 0.12, "learning_rate": 2.687138301641668e-07, "logits/chosen": -2.4237070083618164, "logits/rejected": -2.3634684085845947, "logps/chosen": -303.64410400390625, "logps/rejected": -199.79078674316406, "loss": 1.2535, "rewards/accuracies": 0.625, "rewards/chosen": -1.3026437759399414, "rewards/margins": 0.2994152009487152, "rewards/rejected": -1.6020588874816895, "step": 1012 }, { "epoch": 0.12, "learning_rate": 2.686783984882485e-07, "logits/chosen": -2.263988494873047, "logits/rejected": -2.643456220626831, "logps/chosen": -582.6007080078125, "logps/rejected": -341.6959228515625, "loss": 0.6402, "rewards/accuracies": 0.625, "rewards/chosen": -1.199205994606018, "rewards/margins": 1.4099606275558472, "rewards/rejected": -2.609166383743286, "step": 1013 }, { "epoch": 0.12, "learning_rate": 2.6864296681233023e-07, "logits/chosen": -2.2881956100463867, "logits/rejected": -2.4538326263427734, "logps/chosen": -316.73895263671875, "logps/rejected": -320.487060546875, "loss": 0.2472, "rewards/accuracies": 0.875, "rewards/chosen": -0.7972468137741089, "rewards/margins": 2.2829596996307373, "rewards/rejected": -3.0802063941955566, "step": 1014 }, { "epoch": 0.12, "learning_rate": 2.6860753513641193e-07, "logits/chosen": -2.637885332107544, "logits/rejected": -2.8041915893554688, "logps/chosen": -248.788330078125, "logps/rejected": -320.8568420410156, "loss": 0.3368, "rewards/accuracies": 0.875, "rewards/chosen": -0.7933220267295837, "rewards/margins": 2.0839321613311768, "rewards/rejected": -2.8772542476654053, "step": 1015 }, { "epoch": 0.12, "learning_rate": 2.685721034604937e-07, "logits/chosen": -1.9706429243087769, "logits/rejected": -2.210228681564331, "logps/chosen": -321.562255859375, "logps/rejected": -189.73077392578125, "loss": 0.6467, "rewards/accuracies": 0.5, "rewards/chosen": -0.5760781764984131, "rewards/margins": 0.7637758851051331, "rewards/rejected": -1.3398540019989014, "step": 1016 }, { "epoch": 0.12, "learning_rate": 2.685366717845754e-07, "logits/chosen": -2.612414836883545, "logits/rejected": -2.4439995288848877, "logps/chosen": -333.61468505859375, "logps/rejected": -375.1265869140625, "loss": 0.9089, "rewards/accuracies": 0.625, "rewards/chosen": -1.840308427810669, "rewards/margins": 0.40208178758621216, "rewards/rejected": -2.2423901557922363, "step": 1017 }, { "epoch": 0.12, "learning_rate": 2.685012401086571e-07, "logits/chosen": -2.449991226196289, "logits/rejected": -2.4547030925750732, "logps/chosen": -268.7228088378906, "logps/rejected": -258.485107421875, "loss": 0.3502, "rewards/accuracies": 0.875, "rewards/chosen": -0.3177988529205322, "rewards/margins": 1.7583271265029907, "rewards/rejected": -2.0761260986328125, "step": 1018 }, { "epoch": 0.12, "learning_rate": 2.6846580843273887e-07, "logits/chosen": -2.499650478363037, "logits/rejected": -2.3996729850769043, "logps/chosen": -439.25872802734375, "logps/rejected": -342.2508544921875, "loss": 0.6307, "rewards/accuracies": 0.625, "rewards/chosen": -1.0192344188690186, "rewards/margins": 0.9353653788566589, "rewards/rejected": -1.9545998573303223, "step": 1019 }, { "epoch": 0.12, "learning_rate": 2.6843037675682057e-07, "logits/chosen": -1.8308236598968506, "logits/rejected": -2.215582847595215, "logps/chosen": -378.8659973144531, "logps/rejected": -320.2466735839844, "loss": 0.1961, "rewards/accuracies": 1.0, "rewards/chosen": -0.7679028511047363, "rewards/margins": 1.972186803817749, "rewards/rejected": -2.7400894165039062, "step": 1020 }, { "epoch": 0.12, "learning_rate": 2.683949450809023e-07, "logits/chosen": -2.5429532527923584, "logits/rejected": -2.707886219024658, "logps/chosen": -203.77883911132812, "logps/rejected": -276.91156005859375, "loss": 0.4668, "rewards/accuracies": 0.75, "rewards/chosen": -1.0387959480285645, "rewards/margins": 2.4250433444976807, "rewards/rejected": -3.463839054107666, "step": 1021 }, { "epoch": 0.12, "learning_rate": 2.68359513404984e-07, "logits/chosen": -2.2999420166015625, "logits/rejected": -1.9869012832641602, "logps/chosen": -219.1651611328125, "logps/rejected": -359.604736328125, "loss": 0.4938, "rewards/accuracies": 0.75, "rewards/chosen": -1.085327386856079, "rewards/margins": 1.3165650367736816, "rewards/rejected": -2.4018921852111816, "step": 1022 }, { "epoch": 0.12, "learning_rate": 2.683240817290658e-07, "logits/chosen": -2.741697311401367, "logits/rejected": -2.594670057296753, "logps/chosen": -383.4318542480469, "logps/rejected": -237.98895263671875, "loss": 0.2533, "rewards/accuracies": 1.0, "rewards/chosen": -0.8507862687110901, "rewards/margins": 1.8031563758850098, "rewards/rejected": -2.653942584991455, "step": 1023 }, { "epoch": 0.12, "learning_rate": 2.682886500531475e-07, "logits/chosen": -1.5400416851043701, "logits/rejected": -1.4896056652069092, "logps/chosen": -357.1156311035156, "logps/rejected": -436.74041748046875, "loss": 0.7384, "rewards/accuracies": 0.5, "rewards/chosen": -0.8909907341003418, "rewards/margins": 0.24667218327522278, "rewards/rejected": -1.1376628875732422, "step": 1024 }, { "epoch": 0.12, "learning_rate": 2.6825321837722926e-07, "logits/chosen": -2.8661704063415527, "logits/rejected": -2.8446669578552246, "logps/chosen": -318.3084411621094, "logps/rejected": -144.72373962402344, "loss": 1.0597, "rewards/accuracies": 0.875, "rewards/chosen": -0.8996989130973816, "rewards/margins": 0.8577672243118286, "rewards/rejected": -1.7574660778045654, "step": 1025 }, { "epoch": 0.12, "learning_rate": 2.6821778670131095e-07, "logits/chosen": -2.320706367492676, "logits/rejected": -2.2479805946350098, "logps/chosen": -362.08465576171875, "logps/rejected": -291.94635009765625, "loss": 0.6712, "rewards/accuracies": 0.625, "rewards/chosen": -1.065910816192627, "rewards/margins": 0.6854327917098999, "rewards/rejected": -1.7513437271118164, "step": 1026 }, { "epoch": 0.12, "learning_rate": 2.681823550253927e-07, "logits/chosen": -2.347588300704956, "logits/rejected": -2.508213996887207, "logps/chosen": -438.076904296875, "logps/rejected": -318.760498046875, "loss": 0.1472, "rewards/accuracies": 1.0, "rewards/chosen": 0.0752687007188797, "rewards/margins": 2.4860312938690186, "rewards/rejected": -2.4107627868652344, "step": 1027 }, { "epoch": 0.12, "learning_rate": 2.681469233494744e-07, "logits/chosen": -2.4436614513397217, "logits/rejected": -2.191115617752075, "logps/chosen": -169.04754638671875, "logps/rejected": -317.03387451171875, "loss": 0.3172, "rewards/accuracies": 0.75, "rewards/chosen": -0.3345242738723755, "rewards/margins": 1.572483777999878, "rewards/rejected": -1.9070080518722534, "step": 1028 }, { "epoch": 0.12, "learning_rate": 2.6811149167355614e-07, "logits/chosen": -1.8599722385406494, "logits/rejected": -2.0045199394226074, "logps/chosen": -295.9490661621094, "logps/rejected": -218.71075439453125, "loss": 0.6777, "rewards/accuracies": 0.75, "rewards/chosen": -0.37321341037750244, "rewards/margins": 1.003804326057434, "rewards/rejected": -1.3770177364349365, "step": 1029 }, { "epoch": 0.12, "learning_rate": 2.680760599976379e-07, "logits/chosen": -2.6005265712738037, "logits/rejected": -2.4518320560455322, "logps/chosen": -118.36698150634766, "logps/rejected": -168.40753173828125, "loss": 0.4732, "rewards/accuracies": 0.75, "rewards/chosen": -0.31334924697875977, "rewards/margins": 0.9789475798606873, "rewards/rejected": -1.2922968864440918, "step": 1030 }, { "epoch": 0.12, "learning_rate": 2.680406283217196e-07, "logits/chosen": -2.7847235202789307, "logits/rejected": -2.4587996006011963, "logps/chosen": -162.13720703125, "logps/rejected": -193.2193603515625, "loss": 0.6723, "rewards/accuracies": 0.625, "rewards/chosen": -0.5871151685714722, "rewards/margins": 0.37776851654052734, "rewards/rejected": -0.9648836851119995, "step": 1031 }, { "epoch": 0.12, "learning_rate": 2.6800519664580134e-07, "logits/chosen": -2.4462485313415527, "logits/rejected": -2.3342058658599854, "logps/chosen": -320.865966796875, "logps/rejected": -336.8431396484375, "loss": 0.4604, "rewards/accuracies": 0.75, "rewards/chosen": 0.057936348021030426, "rewards/margins": 1.15945565700531, "rewards/rejected": -1.1015193462371826, "step": 1032 }, { "epoch": 0.12, "learning_rate": 2.6796976496988303e-07, "logits/chosen": -2.7538814544677734, "logits/rejected": -2.6490368843078613, "logps/chosen": -221.382568359375, "logps/rejected": -213.8437042236328, "loss": 0.6717, "rewards/accuracies": 0.5, "rewards/chosen": -0.2699287533760071, "rewards/margins": 0.51250821352005, "rewards/rejected": -0.7824369668960571, "step": 1033 }, { "epoch": 0.12, "learning_rate": 2.679343332939648e-07, "logits/chosen": -1.8708608150482178, "logits/rejected": -2.0062992572784424, "logps/chosen": -420.2269592285156, "logps/rejected": -411.08538818359375, "loss": 0.6734, "rewards/accuracies": 0.75, "rewards/chosen": -0.8716568350791931, "rewards/margins": 0.5073645114898682, "rewards/rejected": -1.379021406173706, "step": 1034 }, { "epoch": 0.12, "learning_rate": 2.6789890161804653e-07, "logits/chosen": -2.680546522140503, "logits/rejected": -2.8294050693511963, "logps/chosen": -385.24090576171875, "logps/rejected": -415.5498046875, "loss": 0.2318, "rewards/accuracies": 0.875, "rewards/chosen": -0.05524454265832901, "rewards/margins": 1.9907004833221436, "rewards/rejected": -2.045945167541504, "step": 1035 }, { "epoch": 0.12, "learning_rate": 2.678634699421283e-07, "logits/chosen": -1.9042677879333496, "logits/rejected": -1.7738113403320312, "logps/chosen": -311.94989013671875, "logps/rejected": -328.2655944824219, "loss": 0.6011, "rewards/accuracies": 0.625, "rewards/chosen": -1.0442520380020142, "rewards/margins": 0.5859363079071045, "rewards/rejected": -1.6301883459091187, "step": 1036 }, { "epoch": 0.12, "learning_rate": 2.6782803826621e-07, "logits/chosen": -2.7975544929504395, "logits/rejected": -2.9573593139648438, "logps/chosen": -231.49923706054688, "logps/rejected": -259.990478515625, "loss": 0.249, "rewards/accuracies": 0.875, "rewards/chosen": -0.9792052507400513, "rewards/margins": 2.034585475921631, "rewards/rejected": -3.0137906074523926, "step": 1037 }, { "epoch": 0.12, "learning_rate": 2.677926065902917e-07, "logits/chosen": -2.600123405456543, "logits/rejected": -2.729048490524292, "logps/chosen": -285.822021484375, "logps/rejected": -156.70590209960938, "loss": 0.4391, "rewards/accuracies": 0.75, "rewards/chosen": -1.4918278455734253, "rewards/margins": 1.4295551776885986, "rewards/rejected": -2.9213831424713135, "step": 1038 }, { "epoch": 0.12, "learning_rate": 2.677571749143734e-07, "logits/chosen": -2.539416790008545, "logits/rejected": -2.422215700149536, "logps/chosen": -299.0378723144531, "logps/rejected": -319.9189453125, "loss": 0.3588, "rewards/accuracies": 0.75, "rewards/chosen": 0.06421977281570435, "rewards/margins": 2.081080436706543, "rewards/rejected": -2.0168607234954834, "step": 1039 }, { "epoch": 0.12, "learning_rate": 2.6772174323845517e-07, "logits/chosen": -2.2689976692199707, "logits/rejected": -2.600353240966797, "logps/chosen": -349.33258056640625, "logps/rejected": -260.7159729003906, "loss": 0.6346, "rewards/accuracies": 0.5, "rewards/chosen": -0.7608904242515564, "rewards/margins": 0.7425460815429688, "rewards/rejected": -1.50343656539917, "step": 1040 }, { "epoch": 0.12, "learning_rate": 2.676863115625369e-07, "logits/chosen": -2.373967170715332, "logits/rejected": -2.2361016273498535, "logps/chosen": -336.94317626953125, "logps/rejected": -358.672119140625, "loss": 0.1783, "rewards/accuracies": 1.0, "rewards/chosen": -0.5066670179367065, "rewards/margins": 1.9474403858184814, "rewards/rejected": -2.4541072845458984, "step": 1041 }, { "epoch": 0.12, "learning_rate": 2.676508798866186e-07, "logits/chosen": -2.100290298461914, "logits/rejected": -1.957705020904541, "logps/chosen": -341.8646240234375, "logps/rejected": -343.6378173828125, "loss": 0.1893, "rewards/accuracies": 0.875, "rewards/chosen": -0.5427795648574829, "rewards/margins": 2.7412524223327637, "rewards/rejected": -3.284031867980957, "step": 1042 }, { "epoch": 0.12, "learning_rate": 2.6761544821070036e-07, "logits/chosen": -2.1402664184570312, "logits/rejected": -2.432070255279541, "logps/chosen": -398.04815673828125, "logps/rejected": -218.48489379882812, "loss": 0.5562, "rewards/accuracies": 0.625, "rewards/chosen": -0.5323309898376465, "rewards/margins": 1.032625675201416, "rewards/rejected": -1.564956784248352, "step": 1043 }, { "epoch": 0.12, "learning_rate": 2.6758001653478206e-07, "logits/chosen": -1.7007567882537842, "logits/rejected": -1.9585708379745483, "logps/chosen": -431.3917236328125, "logps/rejected": -395.8056640625, "loss": 0.3277, "rewards/accuracies": 0.875, "rewards/chosen": -0.7914546728134155, "rewards/margins": 1.623566746711731, "rewards/rejected": -2.4150211811065674, "step": 1044 }, { "epoch": 0.12, "learning_rate": 2.675445848588638e-07, "logits/chosen": -2.5013341903686523, "logits/rejected": -2.2819950580596924, "logps/chosen": -155.30113220214844, "logps/rejected": -162.28001403808594, "loss": 0.6906, "rewards/accuracies": 0.625, "rewards/chosen": -0.9755802154541016, "rewards/margins": 0.9947835206985474, "rewards/rejected": -1.9703636169433594, "step": 1045 }, { "epoch": 0.12, "learning_rate": 2.6750915318294555e-07, "logits/chosen": -2.3592047691345215, "logits/rejected": -2.3650314807891846, "logps/chosen": -312.146728515625, "logps/rejected": -354.27667236328125, "loss": 0.4743, "rewards/accuracies": 0.625, "rewards/chosen": -0.13659609854221344, "rewards/margins": 1.3142424821853638, "rewards/rejected": -1.450838565826416, "step": 1046 }, { "epoch": 0.12, "learning_rate": 2.674737215070273e-07, "logits/chosen": -2.448552131652832, "logits/rejected": -2.7879810333251953, "logps/chosen": -431.3397216796875, "logps/rejected": -270.61578369140625, "loss": 0.4333, "rewards/accuracies": 0.875, "rewards/chosen": -0.619140625, "rewards/margins": 2.046046733856201, "rewards/rejected": -2.665187358856201, "step": 1047 }, { "epoch": 0.12, "learning_rate": 2.67438289831109e-07, "logits/chosen": -2.2619872093200684, "logits/rejected": -2.1790249347686768, "logps/chosen": -285.6820983886719, "logps/rejected": -324.9290771484375, "loss": 0.4521, "rewards/accuracies": 0.75, "rewards/chosen": -0.13921624422073364, "rewards/margins": 1.1460033655166626, "rewards/rejected": -1.2852195501327515, "step": 1048 }, { "epoch": 0.12, "learning_rate": 2.6740285815519075e-07, "logits/chosen": -2.2631313800811768, "logits/rejected": -2.3668324947357178, "logps/chosen": -288.9767761230469, "logps/rejected": -269.7618713378906, "loss": 0.2444, "rewards/accuracies": 0.875, "rewards/chosen": 0.2361612766981125, "rewards/margins": 2.0142362117767334, "rewards/rejected": -1.778074860572815, "step": 1049 }, { "epoch": 0.12, "learning_rate": 2.6736742647927244e-07, "logits/chosen": -2.451120615005493, "logits/rejected": -2.4749855995178223, "logps/chosen": -418.6518859863281, "logps/rejected": -321.02978515625, "loss": 0.6123, "rewards/accuracies": 0.625, "rewards/chosen": -1.2040013074874878, "rewards/margins": 1.1053097248077393, "rewards/rejected": -2.3093109130859375, "step": 1050 }, { "epoch": 0.12, "learning_rate": 2.673319948033542e-07, "logits/chosen": -2.3888111114501953, "logits/rejected": -2.379237174987793, "logps/chosen": -271.69091796875, "logps/rejected": -232.19171142578125, "loss": 0.7941, "rewards/accuracies": 0.625, "rewards/chosen": -0.7459869384765625, "rewards/margins": 0.46350181102752686, "rewards/rejected": -1.2094886302947998, "step": 1051 }, { "epoch": 0.12, "learning_rate": 2.6729656312743594e-07, "logits/chosen": -2.1089117527008057, "logits/rejected": -2.0546164512634277, "logps/chosen": -275.7714538574219, "logps/rejected": -291.5171203613281, "loss": 0.2901, "rewards/accuracies": 1.0, "rewards/chosen": -0.5756916403770447, "rewards/margins": 1.736299753189087, "rewards/rejected": -2.3119914531707764, "step": 1052 }, { "epoch": 0.12, "learning_rate": 2.6726113145151763e-07, "logits/chosen": -2.112663745880127, "logits/rejected": -2.066485643386841, "logps/chosen": -129.0723876953125, "logps/rejected": -210.68544006347656, "loss": 0.5336, "rewards/accuracies": 0.75, "rewards/chosen": -0.9590966105461121, "rewards/margins": 1.7200921773910522, "rewards/rejected": -2.6791887283325195, "step": 1053 }, { "epoch": 0.12, "learning_rate": 2.672256997755994e-07, "logits/chosen": -2.0749671459198, "logits/rejected": -2.2923312187194824, "logps/chosen": -438.4505615234375, "logps/rejected": -365.7938537597656, "loss": 0.3381, "rewards/accuracies": 1.0, "rewards/chosen": -0.5590173602104187, "rewards/margins": 1.7726699113845825, "rewards/rejected": -2.3316872119903564, "step": 1054 }, { "epoch": 0.12, "learning_rate": 2.671902680996811e-07, "logits/chosen": -2.6246416568756104, "logits/rejected": -2.4869894981384277, "logps/chosen": -194.2587127685547, "logps/rejected": -286.2919921875, "loss": 0.4861, "rewards/accuracies": 0.625, "rewards/chosen": -0.7361899614334106, "rewards/margins": 1.0961368083953857, "rewards/rejected": -1.832326889038086, "step": 1055 }, { "epoch": 0.12, "learning_rate": 2.6715483642376283e-07, "logits/chosen": -2.2494685649871826, "logits/rejected": -2.169074058532715, "logps/chosen": -239.6207275390625, "logps/rejected": -222.606689453125, "loss": 0.4798, "rewards/accuracies": 0.75, "rewards/chosen": -0.3303607702255249, "rewards/margins": 0.931678056716919, "rewards/rejected": -1.2620388269424438, "step": 1056 }, { "epoch": 0.12, "learning_rate": 2.671194047478445e-07, "logits/chosen": -2.022153615951538, "logits/rejected": -2.2176949977874756, "logps/chosen": -398.5050354003906, "logps/rejected": -317.08270263671875, "loss": 0.3072, "rewards/accuracies": 1.0, "rewards/chosen": -0.46833130717277527, "rewards/margins": 1.2909793853759766, "rewards/rejected": -1.7593106031417847, "step": 1057 }, { "epoch": 0.12, "learning_rate": 2.670839730719263e-07, "logits/chosen": -2.6683053970336914, "logits/rejected": -2.702651023864746, "logps/chosen": -155.39759826660156, "logps/rejected": -196.35496520996094, "loss": 0.6205, "rewards/accuracies": 0.625, "rewards/chosen": -0.26053130626678467, "rewards/margins": 0.3053209185600281, "rewards/rejected": -0.5658522248268127, "step": 1058 }, { "epoch": 0.12, "learning_rate": 2.67048541396008e-07, "logits/chosen": -2.400468111038208, "logits/rejected": -2.214951992034912, "logps/chosen": -269.39361572265625, "logps/rejected": -289.14898681640625, "loss": 0.4452, "rewards/accuracies": 0.875, "rewards/chosen": -0.4557674527168274, "rewards/margins": 1.1904085874557495, "rewards/rejected": -1.6461760997772217, "step": 1059 }, { "epoch": 0.12, "learning_rate": 2.6701310972008977e-07, "logits/chosen": -2.0544698238372803, "logits/rejected": -2.095590829849243, "logps/chosen": -377.2997131347656, "logps/rejected": -337.1888732910156, "loss": 0.3652, "rewards/accuracies": 0.875, "rewards/chosen": -1.8498482704162598, "rewards/margins": 1.130028247833252, "rewards/rejected": -2.9798765182495117, "step": 1060 }, { "epoch": 0.12, "learning_rate": 2.6697767804417146e-07, "logits/chosen": -2.3078534603118896, "logits/rejected": -2.4021599292755127, "logps/chosen": -424.0606689453125, "logps/rejected": -335.88702392578125, "loss": 0.2092, "rewards/accuracies": 1.0, "rewards/chosen": -0.027568520978093147, "rewards/margins": 2.34454607963562, "rewards/rejected": -2.372114658355713, "step": 1061 }, { "epoch": 0.12, "learning_rate": 2.669422463682532e-07, "logits/chosen": -2.1604154109954834, "logits/rejected": -1.9535936117172241, "logps/chosen": -247.72769165039062, "logps/rejected": -342.96441650390625, "loss": 0.4757, "rewards/accuracies": 0.875, "rewards/chosen": -0.45916077494621277, "rewards/margins": 0.8364137411117554, "rewards/rejected": -1.295574426651001, "step": 1062 }, { "epoch": 0.12, "learning_rate": 2.6690681469233496e-07, "logits/chosen": -2.328622817993164, "logits/rejected": -2.3256759643554688, "logps/chosen": -311.0596923828125, "logps/rejected": -326.9505615234375, "loss": 0.2971, "rewards/accuracies": 0.875, "rewards/chosen": -0.5495116710662842, "rewards/margins": 2.840487480163574, "rewards/rejected": -3.3899993896484375, "step": 1063 }, { "epoch": 0.12, "learning_rate": 2.6687138301641666e-07, "logits/chosen": -2.2215142250061035, "logits/rejected": -2.021106719970703, "logps/chosen": -349.39068603515625, "logps/rejected": -427.28857421875, "loss": 0.1654, "rewards/accuracies": 1.0, "rewards/chosen": -0.13576972484588623, "rewards/margins": 2.2788681983947754, "rewards/rejected": -2.414638042449951, "step": 1064 }, { "epoch": 0.12, "learning_rate": 2.668359513404984e-07, "logits/chosen": -2.242788791656494, "logits/rejected": -2.4979422092437744, "logps/chosen": -285.5115051269531, "logps/rejected": -151.16806030273438, "loss": 0.4754, "rewards/accuracies": 0.75, "rewards/chosen": -0.7337660789489746, "rewards/margins": 1.0450332164764404, "rewards/rejected": -1.778799295425415, "step": 1065 }, { "epoch": 0.12, "learning_rate": 2.668005196645801e-07, "logits/chosen": -2.6846566200256348, "logits/rejected": -2.3883676528930664, "logps/chosen": -217.34634399414062, "logps/rejected": -293.59197998046875, "loss": 0.6862, "rewards/accuracies": 0.875, "rewards/chosen": -0.6707612872123718, "rewards/margins": 1.7070541381835938, "rewards/rejected": -2.3778157234191895, "step": 1066 }, { "epoch": 0.12, "learning_rate": 2.6676508798866185e-07, "logits/chosen": -1.967170000076294, "logits/rejected": -2.39233660697937, "logps/chosen": -344.6943359375, "logps/rejected": -191.7589111328125, "loss": 0.3268, "rewards/accuracies": 0.875, "rewards/chosen": -0.33706381916999817, "rewards/margins": 1.3534246683120728, "rewards/rejected": -1.690488576889038, "step": 1067 }, { "epoch": 0.12, "learning_rate": 2.6672965631274355e-07, "logits/chosen": -2.408377170562744, "logits/rejected": -2.4218435287475586, "logps/chosen": -258.0489501953125, "logps/rejected": -309.2147216796875, "loss": 0.1901, "rewards/accuracies": 1.0, "rewards/chosen": -0.4693720042705536, "rewards/margins": 2.582160472869873, "rewards/rejected": -3.051532506942749, "step": 1068 }, { "epoch": 0.12, "learning_rate": 2.666942246368253e-07, "logits/chosen": -2.5162513256073, "logits/rejected": -2.636547803878784, "logps/chosen": -314.39862060546875, "logps/rejected": -177.69070434570312, "loss": 0.5505, "rewards/accuracies": 0.625, "rewards/chosen": -0.6609842777252197, "rewards/margins": 0.594925045967102, "rewards/rejected": -1.2559092044830322, "step": 1069 }, { "epoch": 0.12, "learning_rate": 2.6665879296090704e-07, "logits/chosen": -2.0990500450134277, "logits/rejected": -2.2031667232513428, "logps/chosen": -382.60821533203125, "logps/rejected": -249.86444091796875, "loss": 0.674, "rewards/accuracies": 0.625, "rewards/chosen": -0.5362615585327148, "rewards/margins": 0.42597728967666626, "rewards/rejected": -0.9622387886047363, "step": 1070 }, { "epoch": 0.12, "learning_rate": 2.666233612849888e-07, "logits/chosen": -3.0147786140441895, "logits/rejected": -3.030062675476074, "logps/chosen": -302.000732421875, "logps/rejected": -319.54034423828125, "loss": 0.5529, "rewards/accuracies": 0.625, "rewards/chosen": -1.0089507102966309, "rewards/margins": 0.9571303129196167, "rewards/rejected": -1.966080904006958, "step": 1071 }, { "epoch": 0.12, "learning_rate": 2.665879296090705e-07, "logits/chosen": -2.7148094177246094, "logits/rejected": -2.6293106079101562, "logps/chosen": -210.87208557128906, "logps/rejected": -290.826904296875, "loss": 0.629, "rewards/accuracies": 0.625, "rewards/chosen": -0.7553330063819885, "rewards/margins": 1.174296498298645, "rewards/rejected": -1.9296294450759888, "step": 1072 }, { "epoch": 0.12, "learning_rate": 2.6655249793315224e-07, "logits/chosen": -2.3252639770507812, "logits/rejected": -2.387633800506592, "logps/chosen": -204.40345764160156, "logps/rejected": -276.6667785644531, "loss": 0.4621, "rewards/accuracies": 0.75, "rewards/chosen": 0.045179013162851334, "rewards/margins": 1.420233964920044, "rewards/rejected": -1.3750548362731934, "step": 1073 }, { "epoch": 0.12, "learning_rate": 2.6651706625723393e-07, "logits/chosen": -2.3053317070007324, "logits/rejected": -2.6894307136535645, "logps/chosen": -247.8425750732422, "logps/rejected": -222.68019104003906, "loss": 0.3709, "rewards/accuracies": 0.75, "rewards/chosen": -0.8157211542129517, "rewards/margins": 1.6436864137649536, "rewards/rejected": -2.4594078063964844, "step": 1074 }, { "epoch": 0.13, "learning_rate": 2.664816345813157e-07, "logits/chosen": -2.504063367843628, "logits/rejected": -2.294929265975952, "logps/chosen": -201.06796264648438, "logps/rejected": -220.40338134765625, "loss": 0.8078, "rewards/accuracies": 0.5, "rewards/chosen": -1.9903233051300049, "rewards/margins": 0.35713207721710205, "rewards/rejected": -2.3474552631378174, "step": 1075 }, { "epoch": 0.13, "learning_rate": 2.6644620290539743e-07, "logits/chosen": -2.418185234069824, "logits/rejected": -2.3862133026123047, "logps/chosen": -311.5484619140625, "logps/rejected": -432.5367431640625, "loss": 0.2949, "rewards/accuracies": 0.875, "rewards/chosen": -0.5764299035072327, "rewards/margins": 1.8395686149597168, "rewards/rejected": -2.4159984588623047, "step": 1076 }, { "epoch": 0.13, "learning_rate": 2.664107712294791e-07, "logits/chosen": -2.168757438659668, "logits/rejected": -2.131538152694702, "logps/chosen": -261.13519287109375, "logps/rejected": -289.5082092285156, "loss": 0.6945, "rewards/accuracies": 0.625, "rewards/chosen": -1.723724126815796, "rewards/margins": 0.9001120328903198, "rewards/rejected": -2.623836040496826, "step": 1077 }, { "epoch": 0.13, "learning_rate": 2.6637533955356087e-07, "logits/chosen": -2.464836359024048, "logits/rejected": -2.612760305404663, "logps/chosen": -278.18328857421875, "logps/rejected": -285.47003173828125, "loss": 0.2856, "rewards/accuracies": 0.75, "rewards/chosen": -0.5230402946472168, "rewards/margins": 2.4169726371765137, "rewards/rejected": -2.9400129318237305, "step": 1078 }, { "epoch": 0.13, "learning_rate": 2.6633990787764257e-07, "logits/chosen": -2.3580260276794434, "logits/rejected": -2.4667322635650635, "logps/chosen": -205.69796752929688, "logps/rejected": -205.69241333007812, "loss": 0.3664, "rewards/accuracies": 0.75, "rewards/chosen": -0.3595908284187317, "rewards/margins": 1.5037263631820679, "rewards/rejected": -1.8633171319961548, "step": 1079 }, { "epoch": 0.13, "learning_rate": 2.663044762017243e-07, "logits/chosen": -2.302065849304199, "logits/rejected": -2.739877223968506, "logps/chosen": -484.3369140625, "logps/rejected": -358.4576110839844, "loss": 0.3192, "rewards/accuracies": 0.875, "rewards/chosen": -0.11011756956577301, "rewards/margins": 1.4185330867767334, "rewards/rejected": -1.5286506414413452, "step": 1080 }, { "epoch": 0.13, "learning_rate": 2.6626904452580607e-07, "logits/chosen": -2.418699026107788, "logits/rejected": -2.093188762664795, "logps/chosen": -136.64671325683594, "logps/rejected": -221.22161865234375, "loss": 0.3133, "rewards/accuracies": 0.875, "rewards/chosen": -0.5187832117080688, "rewards/margins": 1.5653152465820312, "rewards/rejected": -2.0840985774993896, "step": 1081 }, { "epoch": 0.13, "learning_rate": 2.662336128498878e-07, "logits/chosen": -1.652097463607788, "logits/rejected": -2.124221086502075, "logps/chosen": -378.49139404296875, "logps/rejected": -193.49049377441406, "loss": 1.7457, "rewards/accuracies": 0.625, "rewards/chosen": -2.7611725330352783, "rewards/margins": -0.3386959135532379, "rewards/rejected": -2.4224765300750732, "step": 1082 }, { "epoch": 0.13, "learning_rate": 2.661981811739695e-07, "logits/chosen": -2.2756805419921875, "logits/rejected": -2.5502874851226807, "logps/chosen": -191.4690704345703, "logps/rejected": -157.41888427734375, "loss": 0.3008, "rewards/accuracies": 0.75, "rewards/chosen": -0.5995219945907593, "rewards/margins": 1.74029541015625, "rewards/rejected": -2.3398172855377197, "step": 1083 }, { "epoch": 0.13, "learning_rate": 2.6616274949805126e-07, "logits/chosen": -2.98826265335083, "logits/rejected": -2.957146167755127, "logps/chosen": -302.45941162109375, "logps/rejected": -239.65228271484375, "loss": 0.4662, "rewards/accuracies": 0.875, "rewards/chosen": -0.5962383151054382, "rewards/margins": 1.1420583724975586, "rewards/rejected": -1.7382967472076416, "step": 1084 }, { "epoch": 0.13, "learning_rate": 2.6612731782213295e-07, "logits/chosen": -2.2169206142425537, "logits/rejected": -2.3974270820617676, "logps/chosen": -212.6136474609375, "logps/rejected": -240.02340698242188, "loss": 0.8872, "rewards/accuracies": 0.625, "rewards/chosen": -1.573089361190796, "rewards/margins": 1.3270184993743896, "rewards/rejected": -2.9001078605651855, "step": 1085 }, { "epoch": 0.13, "learning_rate": 2.660918861462147e-07, "logits/chosen": -2.68259334564209, "logits/rejected": -2.5670857429504395, "logps/chosen": -400.1985168457031, "logps/rejected": -377.7809143066406, "loss": 0.3017, "rewards/accuracies": 0.875, "rewards/chosen": -0.7853826284408569, "rewards/margins": 2.0247440338134766, "rewards/rejected": -2.810126781463623, "step": 1086 }, { "epoch": 0.13, "learning_rate": 2.6605645447029645e-07, "logits/chosen": -2.4805777072906494, "logits/rejected": -2.3111910820007324, "logps/chosen": -177.0059051513672, "logps/rejected": -282.23992919921875, "loss": 0.1999, "rewards/accuracies": 0.875, "rewards/chosen": -0.6601979732513428, "rewards/margins": 3.146947145462036, "rewards/rejected": -3.807145118713379, "step": 1087 }, { "epoch": 0.13, "learning_rate": 2.6602102279437815e-07, "logits/chosen": -1.7882392406463623, "logits/rejected": -2.0531795024871826, "logps/chosen": -545.419921875, "logps/rejected": -408.034912109375, "loss": 0.6133, "rewards/accuracies": 0.75, "rewards/chosen": -0.942764163017273, "rewards/margins": 0.8641189932823181, "rewards/rejected": -1.8068830966949463, "step": 1088 }, { "epoch": 0.13, "learning_rate": 2.659855911184599e-07, "logits/chosen": -2.2806711196899414, "logits/rejected": -1.8763352632522583, "logps/chosen": -163.35858154296875, "logps/rejected": -327.4326171875, "loss": 0.2895, "rewards/accuracies": 0.875, "rewards/chosen": -0.32234013080596924, "rewards/margins": 2.028449535369873, "rewards/rejected": -2.3507895469665527, "step": 1089 }, { "epoch": 0.13, "learning_rate": 2.659501594425416e-07, "logits/chosen": -2.550938129425049, "logits/rejected": -2.4827687740325928, "logps/chosen": -163.6614990234375, "logps/rejected": -177.9490203857422, "loss": 0.4412, "rewards/accuracies": 0.75, "rewards/chosen": -1.6325076818466187, "rewards/margins": 1.0199638605117798, "rewards/rejected": -2.6524715423583984, "step": 1090 }, { "epoch": 0.13, "learning_rate": 2.6591472776662334e-07, "logits/chosen": -2.247694969177246, "logits/rejected": -2.2467408180236816, "logps/chosen": -180.94143676757812, "logps/rejected": -234.705810546875, "loss": 0.2663, "rewards/accuracies": 1.0, "rewards/chosen": -0.5827271342277527, "rewards/margins": 2.0683765411376953, "rewards/rejected": -2.6511034965515137, "step": 1091 }, { "epoch": 0.13, "learning_rate": 2.658792960907051e-07, "logits/chosen": -2.0928311347961426, "logits/rejected": -2.2146756649017334, "logps/chosen": -260.311279296875, "logps/rejected": -222.48345947265625, "loss": 0.817, "rewards/accuracies": 0.375, "rewards/chosen": -1.2820231914520264, "rewards/margins": 0.635793149471283, "rewards/rejected": -1.917816400527954, "step": 1092 }, { "epoch": 0.13, "learning_rate": 2.6584386441478684e-07, "logits/chosen": -2.783607006072998, "logits/rejected": -2.773996591567993, "logps/chosen": -235.49896240234375, "logps/rejected": -243.24166870117188, "loss": 0.8725, "rewards/accuracies": 0.5, "rewards/chosen": -1.2005155086517334, "rewards/margins": 0.8133785724639893, "rewards/rejected": -2.0138938426971436, "step": 1093 }, { "epoch": 0.13, "learning_rate": 2.6580843273886853e-07, "logits/chosen": -2.3221139907836914, "logits/rejected": -2.1175389289855957, "logps/chosen": -290.72161865234375, "logps/rejected": -289.4841613769531, "loss": 0.3888, "rewards/accuracies": 0.75, "rewards/chosen": -0.15285608172416687, "rewards/margins": 1.6266357898712158, "rewards/rejected": -1.779491901397705, "step": 1094 }, { "epoch": 0.13, "learning_rate": 2.657730010629503e-07, "logits/chosen": -2.4182841777801514, "logits/rejected": -2.357902765274048, "logps/chosen": -222.34373474121094, "logps/rejected": -359.35150146484375, "loss": 0.2674, "rewards/accuracies": 0.875, "rewards/chosen": -0.6775766611099243, "rewards/margins": 1.882901668548584, "rewards/rejected": -2.5604782104492188, "step": 1095 }, { "epoch": 0.13, "learning_rate": 2.65737569387032e-07, "logits/chosen": -2.413295269012451, "logits/rejected": -2.527707099914551, "logps/chosen": -321.5057373046875, "logps/rejected": -233.25619506835938, "loss": 0.5285, "rewards/accuracies": 0.875, "rewards/chosen": -1.1913217306137085, "rewards/margins": 0.5805375576019287, "rewards/rejected": -1.7718591690063477, "step": 1096 }, { "epoch": 0.13, "learning_rate": 2.657021377111137e-07, "logits/chosen": -1.4416167736053467, "logits/rejected": -2.289273500442505, "logps/chosen": -598.5987548828125, "logps/rejected": -286.2430725097656, "loss": 0.7164, "rewards/accuracies": 0.625, "rewards/chosen": -1.2400336265563965, "rewards/margins": 0.589780867099762, "rewards/rejected": -1.8298144340515137, "step": 1097 }, { "epoch": 0.13, "learning_rate": 2.656667060351955e-07, "logits/chosen": -2.509615898132324, "logits/rejected": -2.2834386825561523, "logps/chosen": -157.9860382080078, "logps/rejected": -253.32806396484375, "loss": 0.3401, "rewards/accuracies": 0.75, "rewards/chosen": 0.14680764079093933, "rewards/margins": 1.544450283050537, "rewards/rejected": -1.3976426124572754, "step": 1098 }, { "epoch": 0.13, "learning_rate": 2.6563127435927717e-07, "logits/chosen": -2.926828145980835, "logits/rejected": -2.830982208251953, "logps/chosen": -217.80709838867188, "logps/rejected": -326.3794250488281, "loss": 0.7466, "rewards/accuracies": 0.5, "rewards/chosen": -0.6181491017341614, "rewards/margins": 0.8377421498298645, "rewards/rejected": -1.4558912515640259, "step": 1099 }, { "epoch": 0.13, "learning_rate": 2.655958426833589e-07, "logits/chosen": -2.3615589141845703, "logits/rejected": -2.2544095516204834, "logps/chosen": -415.98504638671875, "logps/rejected": -414.3159484863281, "loss": 0.869, "rewards/accuracies": 0.5, "rewards/chosen": -0.9406054019927979, "rewards/margins": 0.16447995603084564, "rewards/rejected": -1.1050854921340942, "step": 1100 }, { "epoch": 0.13, "learning_rate": 2.655604110074406e-07, "logits/chosen": -2.0426700115203857, "logits/rejected": -1.7218594551086426, "logps/chosen": -389.1139831542969, "logps/rejected": -369.3939208984375, "loss": 0.9614, "rewards/accuracies": 0.375, "rewards/chosen": -0.770494282245636, "rewards/margins": 0.3803935945034027, "rewards/rejected": -1.1508879661560059, "step": 1101 }, { "epoch": 0.13, "learning_rate": 2.6552497933152236e-07, "logits/chosen": -2.2631208896636963, "logits/rejected": -2.1580097675323486, "logps/chosen": -358.5208740234375, "logps/rejected": -318.9033203125, "loss": 0.2931, "rewards/accuracies": 0.875, "rewards/chosen": -0.8357605338096619, "rewards/margins": 1.4440300464630127, "rewards/rejected": -2.2797906398773193, "step": 1102 }, { "epoch": 0.13, "learning_rate": 2.6548954765560406e-07, "logits/chosen": -2.308929204940796, "logits/rejected": -2.5453543663024902, "logps/chosen": -591.123046875, "logps/rejected": -351.06622314453125, "loss": 0.3023, "rewards/accuracies": 0.875, "rewards/chosen": -1.282114028930664, "rewards/margins": 1.4868993759155273, "rewards/rejected": -2.7690136432647705, "step": 1103 }, { "epoch": 0.13, "learning_rate": 2.654541159796858e-07, "logits/chosen": -2.7831220626831055, "logits/rejected": -2.698190927505493, "logps/chosen": -190.55572509765625, "logps/rejected": -407.96051025390625, "loss": 0.2952, "rewards/accuracies": 0.875, "rewards/chosen": -0.1295367181301117, "rewards/margins": 2.2018814086914062, "rewards/rejected": -2.33141827583313, "step": 1104 }, { "epoch": 0.13, "learning_rate": 2.6541868430376756e-07, "logits/chosen": -2.6207308769226074, "logits/rejected": -2.2476484775543213, "logps/chosen": -129.5454864501953, "logps/rejected": -194.4342498779297, "loss": 0.4873, "rewards/accuracies": 0.875, "rewards/chosen": -0.6489426493644714, "rewards/margins": 2.1855711936950684, "rewards/rejected": -2.8345139026641846, "step": 1105 }, { "epoch": 0.13, "learning_rate": 2.653832526278493e-07, "logits/chosen": -1.9586948156356812, "logits/rejected": -2.546908378601074, "logps/chosen": -476.88177490234375, "logps/rejected": -208.41064453125, "loss": 0.5031, "rewards/accuracies": 0.875, "rewards/chosen": -1.0982667207717896, "rewards/margins": 0.824949324131012, "rewards/rejected": -1.9232158660888672, "step": 1106 }, { "epoch": 0.13, "learning_rate": 2.65347820951931e-07, "logits/chosen": -1.5147030353546143, "logits/rejected": -1.755483865737915, "logps/chosen": -356.2819519042969, "logps/rejected": -265.4173278808594, "loss": 0.7831, "rewards/accuracies": 0.5, "rewards/chosen": -1.0069767236709595, "rewards/margins": 0.11011778563261032, "rewards/rejected": -1.1170945167541504, "step": 1107 }, { "epoch": 0.13, "learning_rate": 2.6531238927601275e-07, "logits/chosen": -2.457608699798584, "logits/rejected": -2.6591076850891113, "logps/chosen": -340.73577880859375, "logps/rejected": -279.0849914550781, "loss": 2.7111, "rewards/accuracies": 0.5, "rewards/chosen": -3.239335298538208, "rewards/margins": -1.6881706714630127, "rewards/rejected": -1.5511645078659058, "step": 1108 }, { "epoch": 0.13, "learning_rate": 2.652769576000945e-07, "logits/chosen": -2.078481912612915, "logits/rejected": -2.4076108932495117, "logps/chosen": -330.88232421875, "logps/rejected": -244.72325134277344, "loss": 0.5204, "rewards/accuracies": 0.75, "rewards/chosen": -0.7113993167877197, "rewards/margins": 1.6688363552093506, "rewards/rejected": -2.380235433578491, "step": 1109 }, { "epoch": 0.13, "learning_rate": 2.652415259241762e-07, "logits/chosen": -2.51499342918396, "logits/rejected": -2.336141586303711, "logps/chosen": -103.07061004638672, "logps/rejected": -165.7377166748047, "loss": 0.8417, "rewards/accuracies": 0.5, "rewards/chosen": -0.8785473704338074, "rewards/margins": -0.0045035481452941895, "rewards/rejected": -0.8740438222885132, "step": 1110 }, { "epoch": 0.13, "learning_rate": 2.6520609424825794e-07, "logits/chosen": -2.9689080715179443, "logits/rejected": -2.8139913082122803, "logps/chosen": -296.6859436035156, "logps/rejected": -149.15565490722656, "loss": 0.7821, "rewards/accuracies": 0.5, "rewards/chosen": -0.8711401224136353, "rewards/margins": 0.423603892326355, "rewards/rejected": -1.2947440147399902, "step": 1111 }, { "epoch": 0.13, "learning_rate": 2.6517066257233964e-07, "logits/chosen": -1.3513450622558594, "logits/rejected": -1.8739275932312012, "logps/chosen": -547.1605224609375, "logps/rejected": -282.3814697265625, "loss": 1.0077, "rewards/accuracies": 0.375, "rewards/chosen": -1.064497709274292, "rewards/margins": 0.17994588613510132, "rewards/rejected": -1.244443416595459, "step": 1112 }, { "epoch": 0.13, "learning_rate": 2.651352308964214e-07, "logits/chosen": -2.0606064796447754, "logits/rejected": -2.0770702362060547, "logps/chosen": -437.1154479980469, "logps/rejected": -370.8592529296875, "loss": 0.3114, "rewards/accuracies": 0.875, "rewards/chosen": -0.8396235108375549, "rewards/margins": 1.8326356410980225, "rewards/rejected": -2.6722593307495117, "step": 1113 }, { "epoch": 0.13, "learning_rate": 2.650997992205031e-07, "logits/chosen": -2.340580940246582, "logits/rejected": -2.4258506298065186, "logps/chosen": -297.9563903808594, "logps/rejected": -197.63385009765625, "loss": 0.5661, "rewards/accuracies": 0.75, "rewards/chosen": -1.2167377471923828, "rewards/margins": 0.4782801866531372, "rewards/rejected": -1.6950178146362305, "step": 1114 }, { "epoch": 0.13, "learning_rate": 2.6506436754458483e-07, "logits/chosen": -2.303903341293335, "logits/rejected": -2.414968967437744, "logps/chosen": -165.5876922607422, "logps/rejected": -220.72561645507812, "loss": 0.5413, "rewards/accuracies": 0.75, "rewards/chosen": -1.0871793031692505, "rewards/margins": 0.6749810576438904, "rewards/rejected": -1.762160301208496, "step": 1115 }, { "epoch": 0.13, "learning_rate": 2.650289358686666e-07, "logits/chosen": -2.4756009578704834, "logits/rejected": -2.348827838897705, "logps/chosen": -193.2213897705078, "logps/rejected": -254.95155334472656, "loss": 0.3764, "rewards/accuracies": 0.625, "rewards/chosen": -0.8099127411842346, "rewards/margins": 1.7865036725997925, "rewards/rejected": -2.596416473388672, "step": 1116 }, { "epoch": 0.13, "learning_rate": 2.6499350419274833e-07, "logits/chosen": -2.6768245697021484, "logits/rejected": -2.703096866607666, "logps/chosen": -227.60227966308594, "logps/rejected": -261.08349609375, "loss": 0.3892, "rewards/accuracies": 0.75, "rewards/chosen": -0.9357527494430542, "rewards/margins": 1.187957525253296, "rewards/rejected": -2.1237101554870605, "step": 1117 }, { "epoch": 0.13, "learning_rate": 2.6495807251683e-07, "logits/chosen": -2.486966133117676, "logits/rejected": -2.2645680904388428, "logps/chosen": -156.65118408203125, "logps/rejected": -322.9341125488281, "loss": 0.4817, "rewards/accuracies": 0.625, "rewards/chosen": -0.01912519335746765, "rewards/margins": 1.8248274326324463, "rewards/rejected": -1.8439527750015259, "step": 1118 }, { "epoch": 0.13, "learning_rate": 2.6492264084091177e-07, "logits/chosen": -2.4739532470703125, "logits/rejected": -2.3989529609680176, "logps/chosen": -256.1156005859375, "logps/rejected": -376.54736328125, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/chosen": -0.00928959995508194, "rewards/margins": 3.7912755012512207, "rewards/rejected": -3.800564765930176, "step": 1119 }, { "epoch": 0.13, "learning_rate": 2.648872091649935e-07, "logits/chosen": -2.5460989475250244, "logits/rejected": -2.5991601943969727, "logps/chosen": -312.82537841796875, "logps/rejected": -141.22872924804688, "loss": 0.4532, "rewards/accuracies": 0.875, "rewards/chosen": -0.5341533422470093, "rewards/margins": 0.8455988764762878, "rewards/rejected": -1.3797521591186523, "step": 1120 }, { "epoch": 0.13, "learning_rate": 2.648517774890752e-07, "logits/chosen": -1.8204810619354248, "logits/rejected": -1.9572088718414307, "logps/chosen": -262.59674072265625, "logps/rejected": -257.204833984375, "loss": 0.6376, "rewards/accuracies": 0.625, "rewards/chosen": -1.321199655532837, "rewards/margins": 0.4694935083389282, "rewards/rejected": -1.7906930446624756, "step": 1121 }, { "epoch": 0.13, "learning_rate": 2.6481634581315696e-07, "logits/chosen": -2.5142767429351807, "logits/rejected": -2.7048089504241943, "logps/chosen": -331.5746154785156, "logps/rejected": -279.2624206542969, "loss": 0.4942, "rewards/accuracies": 0.75, "rewards/chosen": -1.3908864259719849, "rewards/margins": 0.8765456676483154, "rewards/rejected": -2.26743221282959, "step": 1122 }, { "epoch": 0.13, "learning_rate": 2.6478091413723866e-07, "logits/chosen": -2.2930855751037598, "logits/rejected": -1.8471429347991943, "logps/chosen": -452.04217529296875, "logps/rejected": -402.63690185546875, "loss": 0.7316, "rewards/accuracies": 0.5, "rewards/chosen": -1.3275957107543945, "rewards/margins": 0.5119313597679138, "rewards/rejected": -1.8395270109176636, "step": 1123 }, { "epoch": 0.13, "learning_rate": 2.647454824613204e-07, "logits/chosen": -2.7456159591674805, "logits/rejected": -2.6556918621063232, "logps/chosen": -362.9642639160156, "logps/rejected": -250.8321533203125, "loss": 0.3985, "rewards/accuracies": 0.75, "rewards/chosen": -0.6638115048408508, "rewards/margins": 1.9315266609191895, "rewards/rejected": -2.5953383445739746, "step": 1124 }, { "epoch": 0.13, "learning_rate": 2.647100507854021e-07, "logits/chosen": -1.888339877128601, "logits/rejected": -1.832153558731079, "logps/chosen": -227.54742431640625, "logps/rejected": -290.60870361328125, "loss": 0.4358, "rewards/accuracies": 0.875, "rewards/chosen": -0.40289634466171265, "rewards/margins": 0.8044824600219727, "rewards/rejected": -1.20737886428833, "step": 1125 }, { "epoch": 0.13, "learning_rate": 2.6467461910948385e-07, "logits/chosen": -2.500401735305786, "logits/rejected": -2.471601963043213, "logps/chosen": -172.76699829101562, "logps/rejected": -253.16342163085938, "loss": 0.5051, "rewards/accuracies": 0.75, "rewards/chosen": -0.4813537001609802, "rewards/margins": 1.4733651876449585, "rewards/rejected": -1.954718828201294, "step": 1126 }, { "epoch": 0.13, "learning_rate": 2.646391874335656e-07, "logits/chosen": -2.450838327407837, "logits/rejected": -2.178544044494629, "logps/chosen": -430.1990966796875, "logps/rejected": -673.9619750976562, "loss": 0.6437, "rewards/accuracies": 0.75, "rewards/chosen": -0.4758790135383606, "rewards/margins": 1.9519634246826172, "rewards/rejected": -2.427842617034912, "step": 1127 }, { "epoch": 0.13, "learning_rate": 2.6460375575764735e-07, "logits/chosen": -2.049823522567749, "logits/rejected": -2.4346795082092285, "logps/chosen": -316.6060791015625, "logps/rejected": -138.97998046875, "loss": 0.6029, "rewards/accuracies": 0.5, "rewards/chosen": -0.782996416091919, "rewards/margins": 0.7283990979194641, "rewards/rejected": -1.5113954544067383, "step": 1128 }, { "epoch": 0.13, "learning_rate": 2.6456832408172905e-07, "logits/chosen": -2.3951737880706787, "logits/rejected": -2.4126908779144287, "logps/chosen": -318.27471923828125, "logps/rejected": -388.6593933105469, "loss": 0.328, "rewards/accuracies": 1.0, "rewards/chosen": -0.8942172527313232, "rewards/margins": 1.593430757522583, "rewards/rejected": -2.4876480102539062, "step": 1129 }, { "epoch": 0.13, "learning_rate": 2.645328924058108e-07, "logits/chosen": -2.577788829803467, "logits/rejected": -2.449037551879883, "logps/chosen": -159.98333740234375, "logps/rejected": -198.4295654296875, "loss": 0.3265, "rewards/accuracies": 0.75, "rewards/chosen": -0.3254249095916748, "rewards/margins": 2.2527410984039307, "rewards/rejected": -2.5781660079956055, "step": 1130 }, { "epoch": 0.13, "learning_rate": 2.6449746072989254e-07, "logits/chosen": -1.8045743703842163, "logits/rejected": -1.8222558498382568, "logps/chosen": -157.07113647460938, "logps/rejected": -314.88970947265625, "loss": 0.4468, "rewards/accuracies": 0.75, "rewards/chosen": -0.2372838854789734, "rewards/margins": 0.6673785448074341, "rewards/rejected": -0.904662549495697, "step": 1131 }, { "epoch": 0.13, "learning_rate": 2.6446202905397424e-07, "logits/chosen": -1.8360787630081177, "logits/rejected": -2.179034471511841, "logps/chosen": -498.54241943359375, "logps/rejected": -295.2343444824219, "loss": 0.4774, "rewards/accuracies": 0.75, "rewards/chosen": -0.5115102529525757, "rewards/margins": 1.0113028287887573, "rewards/rejected": -1.522813081741333, "step": 1132 }, { "epoch": 0.13, "learning_rate": 2.64426597378056e-07, "logits/chosen": -2.666804313659668, "logits/rejected": -2.6620712280273438, "logps/chosen": -210.9462432861328, "logps/rejected": -299.9907531738281, "loss": 0.2188, "rewards/accuracies": 1.0, "rewards/chosen": -0.19027571380138397, "rewards/margins": 1.890649437904358, "rewards/rejected": -2.080925226211548, "step": 1133 }, { "epoch": 0.13, "learning_rate": 2.643911657021377e-07, "logits/chosen": -2.0493626594543457, "logits/rejected": -2.429739475250244, "logps/chosen": -323.2890319824219, "logps/rejected": -242.64871215820312, "loss": 0.711, "rewards/accuracies": 0.625, "rewards/chosen": -0.6791374683380127, "rewards/margins": 0.3479766249656677, "rewards/rejected": -1.0271141529083252, "step": 1134 }, { "epoch": 0.13, "learning_rate": 2.6435573402621943e-07, "logits/chosen": -2.422412395477295, "logits/rejected": -2.2253293991088867, "logps/chosen": -218.44216918945312, "logps/rejected": -229.6834716796875, "loss": 0.3579, "rewards/accuracies": 0.75, "rewards/chosen": -0.29916635155677795, "rewards/margins": 1.7493116855621338, "rewards/rejected": -2.048478126525879, "step": 1135 }, { "epoch": 0.13, "learning_rate": 2.6432030235030113e-07, "logits/chosen": -1.953818440437317, "logits/rejected": -1.7557413578033447, "logps/chosen": -263.484375, "logps/rejected": -354.6983947753906, "loss": 0.4716, "rewards/accuracies": 0.625, "rewards/chosen": -1.6189831495285034, "rewards/margins": 1.167496919631958, "rewards/rejected": -2.786479949951172, "step": 1136 }, { "epoch": 0.13, "learning_rate": 2.642848706743829e-07, "logits/chosen": -2.321093797683716, "logits/rejected": -2.606283664703369, "logps/chosen": -575.6839599609375, "logps/rejected": -295.5623474121094, "loss": 0.5115, "rewards/accuracies": 0.75, "rewards/chosen": -0.6694839000701904, "rewards/margins": 0.7393667101860046, "rewards/rejected": -1.4088505506515503, "step": 1137 }, { "epoch": 0.13, "learning_rate": 2.642494389984646e-07, "logits/chosen": -2.1147918701171875, "logits/rejected": -2.159086227416992, "logps/chosen": -305.5387878417969, "logps/rejected": -240.6249237060547, "loss": 0.828, "rewards/accuracies": 0.625, "rewards/chosen": -0.8223506212234497, "rewards/margins": 0.2699509859085083, "rewards/rejected": -1.092301607131958, "step": 1138 }, { "epoch": 0.13, "learning_rate": 2.642140073225463e-07, "logits/chosen": -2.278658866882324, "logits/rejected": -2.263537645339966, "logps/chosen": -261.3817443847656, "logps/rejected": -258.72625732421875, "loss": 0.772, "rewards/accuracies": 0.5, "rewards/chosen": -1.7387564182281494, "rewards/margins": 0.6883573532104492, "rewards/rejected": -2.4271137714385986, "step": 1139 }, { "epoch": 0.13, "learning_rate": 2.6417857564662807e-07, "logits/chosen": -2.410578966140747, "logits/rejected": -2.2240686416625977, "logps/chosen": -219.3800506591797, "logps/rejected": -255.1181640625, "loss": 0.4691, "rewards/accuracies": 0.875, "rewards/chosen": -0.43520426750183105, "rewards/margins": 0.8628276586532593, "rewards/rejected": -1.2980319261550903, "step": 1140 }, { "epoch": 0.13, "learning_rate": 2.641431439707098e-07, "logits/chosen": -2.3582286834716797, "logits/rejected": -2.6278676986694336, "logps/chosen": -243.3148651123047, "logps/rejected": -131.18405151367188, "loss": 0.483, "rewards/accuracies": 0.875, "rewards/chosen": -0.3725321590900421, "rewards/margins": 1.076711654663086, "rewards/rejected": -1.4492437839508057, "step": 1141 }, { "epoch": 0.13, "learning_rate": 2.6410771229479157e-07, "logits/chosen": -2.3114359378814697, "logits/rejected": -2.2087669372558594, "logps/chosen": -226.905029296875, "logps/rejected": -233.50100708007812, "loss": 1.1821, "rewards/accuracies": 0.625, "rewards/chosen": -1.2513518333435059, "rewards/margins": -0.03444682061672211, "rewards/rejected": -1.216904878616333, "step": 1142 }, { "epoch": 0.13, "learning_rate": 2.6407228061887326e-07, "logits/chosen": -2.3138952255249023, "logits/rejected": -1.9072593450546265, "logps/chosen": -716.3370971679688, "logps/rejected": -401.35479736328125, "loss": 0.3242, "rewards/accuracies": 0.875, "rewards/chosen": -0.8961679935455322, "rewards/margins": 1.8221282958984375, "rewards/rejected": -2.718296527862549, "step": 1143 }, { "epoch": 0.13, "learning_rate": 2.64036848942955e-07, "logits/chosen": -2.4234542846679688, "logits/rejected": -2.2874369621276855, "logps/chosen": -137.1421356201172, "logps/rejected": -193.217529296875, "loss": 0.3462, "rewards/accuracies": 1.0, "rewards/chosen": -0.09147003293037415, "rewards/margins": 1.089120626449585, "rewards/rejected": -1.1805906295776367, "step": 1144 }, { "epoch": 0.13, "learning_rate": 2.640014172670367e-07, "logits/chosen": -2.5103533267974854, "logits/rejected": -2.282189130783081, "logps/chosen": -195.56314086914062, "logps/rejected": -291.3131103515625, "loss": 0.2306, "rewards/accuracies": 0.875, "rewards/chosen": -0.4826018214225769, "rewards/margins": 2.0470077991485596, "rewards/rejected": -2.5296096801757812, "step": 1145 }, { "epoch": 0.13, "learning_rate": 2.6396598559111845e-07, "logits/chosen": -2.4132399559020996, "logits/rejected": -2.4032554626464844, "logps/chosen": -259.71466064453125, "logps/rejected": -336.08624267578125, "loss": 0.2779, "rewards/accuracies": 0.75, "rewards/chosen": -0.26261571049690247, "rewards/margins": 1.910552978515625, "rewards/rejected": -2.173168897628784, "step": 1146 }, { "epoch": 0.13, "learning_rate": 2.6393055391520015e-07, "logits/chosen": -2.321951150894165, "logits/rejected": -2.4085428714752197, "logps/chosen": -141.59036254882812, "logps/rejected": -222.86492919921875, "loss": 0.5874, "rewards/accuracies": 0.5, "rewards/chosen": -0.24035170674324036, "rewards/margins": 0.6349031329154968, "rewards/rejected": -0.8752548694610596, "step": 1147 }, { "epoch": 0.13, "learning_rate": 2.638951222392819e-07, "logits/chosen": -2.500476837158203, "logits/rejected": -2.2185559272766113, "logps/chosen": -282.4196472167969, "logps/rejected": -346.120361328125, "loss": 0.2628, "rewards/accuracies": 1.0, "rewards/chosen": -0.48218774795532227, "rewards/margins": 3.124493360519409, "rewards/rejected": -3.6066813468933105, "step": 1148 }, { "epoch": 0.13, "learning_rate": 2.6385969056336365e-07, "logits/chosen": -2.470456123352051, "logits/rejected": -2.6245312690734863, "logps/chosen": -379.9046325683594, "logps/rejected": -315.7315673828125, "loss": 0.4635, "rewards/accuracies": 0.875, "rewards/chosen": -0.8521103262901306, "rewards/margins": 2.3367209434509277, "rewards/rejected": -3.188831329345703, "step": 1149 }, { "epoch": 0.13, "learning_rate": 2.6382425888744534e-07, "logits/chosen": -2.362119197845459, "logits/rejected": -2.438901901245117, "logps/chosen": -240.6522216796875, "logps/rejected": -267.0792236328125, "loss": 0.2825, "rewards/accuracies": 0.875, "rewards/chosen": -0.37331151962280273, "rewards/margins": 2.65958571434021, "rewards/rejected": -3.0328972339630127, "step": 1150 }, { "epoch": 0.13, "learning_rate": 2.637888272115271e-07, "logits/chosen": -1.946824312210083, "logits/rejected": -2.186746835708618, "logps/chosen": -447.8275451660156, "logps/rejected": -336.83599853515625, "loss": 0.6399, "rewards/accuracies": 0.5, "rewards/chosen": -0.8495250940322876, "rewards/margins": 0.7241283655166626, "rewards/rejected": -1.5736534595489502, "step": 1151 }, { "epoch": 0.13, "learning_rate": 2.6375339553560884e-07, "logits/chosen": -2.306161880493164, "logits/rejected": -2.333437442779541, "logps/chosen": -251.08688354492188, "logps/rejected": -255.38507080078125, "loss": 0.6567, "rewards/accuracies": 0.75, "rewards/chosen": -1.0192550420761108, "rewards/margins": 1.5989925861358643, "rewards/rejected": -2.6182477474212646, "step": 1152 }, { "epoch": 0.13, "learning_rate": 2.637179638596906e-07, "logits/chosen": -1.8576173782348633, "logits/rejected": -1.7964882850646973, "logps/chosen": -343.2393798828125, "logps/rejected": -339.1413269042969, "loss": 0.6276, "rewards/accuracies": 0.625, "rewards/chosen": -0.9936886429786682, "rewards/margins": 0.9223480820655823, "rewards/rejected": -1.916036605834961, "step": 1153 }, { "epoch": 0.13, "learning_rate": 2.636825321837723e-07, "logits/chosen": -2.435818672180176, "logits/rejected": -2.4917922019958496, "logps/chosen": -289.1380920410156, "logps/rejected": -265.42437744140625, "loss": 0.5782, "rewards/accuracies": 0.875, "rewards/chosen": -1.2091882228851318, "rewards/margins": 1.4961382150650024, "rewards/rejected": -2.7053263187408447, "step": 1154 }, { "epoch": 0.13, "learning_rate": 2.6364710050785403e-07, "logits/chosen": -2.1539082527160645, "logits/rejected": -2.3165366649627686, "logps/chosen": -214.49935913085938, "logps/rejected": -208.94674682617188, "loss": 0.2486, "rewards/accuracies": 0.875, "rewards/chosen": -0.13195329904556274, "rewards/margins": 1.7579033374786377, "rewards/rejected": -1.8898565769195557, "step": 1155 }, { "epoch": 0.13, "learning_rate": 2.6361166883193573e-07, "logits/chosen": -2.00500226020813, "logits/rejected": -2.322610855102539, "logps/chosen": -470.88458251953125, "logps/rejected": -292.46820068359375, "loss": 0.4336, "rewards/accuracies": 0.875, "rewards/chosen": -0.770811915397644, "rewards/margins": 1.118998408317566, "rewards/rejected": -1.88981032371521, "step": 1156 }, { "epoch": 0.13, "learning_rate": 2.635762371560175e-07, "logits/chosen": -2.3267693519592285, "logits/rejected": -2.507692575454712, "logps/chosen": -278.3951416015625, "logps/rejected": -159.1973114013672, "loss": 0.4496, "rewards/accuracies": 0.875, "rewards/chosen": -0.13042141497135162, "rewards/margins": 1.0755350589752197, "rewards/rejected": -1.2059565782546997, "step": 1157 }, { "epoch": 0.13, "learning_rate": 2.6354080548009917e-07, "logits/chosen": -2.3902435302734375, "logits/rejected": -2.7036831378936768, "logps/chosen": -286.00567626953125, "logps/rejected": -270.46026611328125, "loss": 0.3364, "rewards/accuracies": 0.875, "rewards/chosen": -0.9938353300094604, "rewards/margins": 1.4550251960754395, "rewards/rejected": -2.4488604068756104, "step": 1158 }, { "epoch": 0.13, "learning_rate": 2.635053738041809e-07, "logits/chosen": -2.45825457572937, "logits/rejected": -2.304996967315674, "logps/chosen": -66.20259094238281, "logps/rejected": -214.89642333984375, "loss": 0.5714, "rewards/accuracies": 0.625, "rewards/chosen": -0.5067706108093262, "rewards/margins": 1.611901044845581, "rewards/rejected": -2.1186716556549072, "step": 1159 }, { "epoch": 0.13, "learning_rate": 2.6346994212826267e-07, "logits/chosen": -2.286484956741333, "logits/rejected": -2.21768856048584, "logps/chosen": -173.68682861328125, "logps/rejected": -174.7557830810547, "loss": 1.4401, "rewards/accuracies": 0.5, "rewards/chosen": -1.524367332458496, "rewards/margins": 0.017395317554473877, "rewards/rejected": -1.5417625904083252, "step": 1160 }, { "epoch": 0.14, "learning_rate": 2.6343451045234437e-07, "logits/chosen": -2.4704878330230713, "logits/rejected": -2.1469638347625732, "logps/chosen": -250.3804931640625, "logps/rejected": -317.4695129394531, "loss": 0.4295, "rewards/accuracies": 0.875, "rewards/chosen": -0.8876146674156189, "rewards/margins": 1.1253702640533447, "rewards/rejected": -2.0129847526550293, "step": 1161 }, { "epoch": 0.14, "learning_rate": 2.633990787764261e-07, "logits/chosen": -3.063331127166748, "logits/rejected": -3.0439505577087402, "logps/chosen": -282.2261657714844, "logps/rejected": -238.1290740966797, "loss": 0.3045, "rewards/accuracies": 0.75, "rewards/chosen": -0.6587254405021667, "rewards/margins": 2.3743972778320312, "rewards/rejected": -3.0331225395202637, "step": 1162 }, { "epoch": 0.14, "learning_rate": 2.6336364710050786e-07, "logits/chosen": -2.143299102783203, "logits/rejected": -2.0063767433166504, "logps/chosen": -202.81326293945312, "logps/rejected": -202.39772033691406, "loss": 0.4905, "rewards/accuracies": 0.75, "rewards/chosen": -0.8678746223449707, "rewards/margins": 0.9972575902938843, "rewards/rejected": -1.8651323318481445, "step": 1163 }, { "epoch": 0.14, "learning_rate": 2.6332821542458956e-07, "logits/chosen": -2.108022689819336, "logits/rejected": -2.1015584468841553, "logps/chosen": -340.32159423828125, "logps/rejected": -305.5045166015625, "loss": 0.2573, "rewards/accuracies": 0.875, "rewards/chosen": -0.3668273985385895, "rewards/margins": 2.319349765777588, "rewards/rejected": -2.6861772537231445, "step": 1164 }, { "epoch": 0.14, "learning_rate": 2.632927837486713e-07, "logits/chosen": -2.429131269454956, "logits/rejected": -2.4190282821655273, "logps/chosen": -208.83245849609375, "logps/rejected": -396.3777160644531, "loss": 0.2876, "rewards/accuracies": 0.875, "rewards/chosen": -0.239047572016716, "rewards/margins": 2.181807041168213, "rewards/rejected": -2.4208548069000244, "step": 1165 }, { "epoch": 0.14, "learning_rate": 2.6325735207275306e-07, "logits/chosen": -1.760108232498169, "logits/rejected": -1.770267367362976, "logps/chosen": -235.20016479492188, "logps/rejected": -316.287353515625, "loss": 0.5786, "rewards/accuracies": 0.75, "rewards/chosen": -0.2826373875141144, "rewards/margins": 1.1674845218658447, "rewards/rejected": -1.4501218795776367, "step": 1166 }, { "epoch": 0.14, "learning_rate": 2.6322192039683475e-07, "logits/chosen": -2.501248836517334, "logits/rejected": -2.513336420059204, "logps/chosen": -318.531982421875, "logps/rejected": -179.18014526367188, "loss": 0.4329, "rewards/accuracies": 0.75, "rewards/chosen": -0.3961593508720398, "rewards/margins": 1.1899309158325195, "rewards/rejected": -1.586090326309204, "step": 1167 }, { "epoch": 0.14, "learning_rate": 2.631864887209165e-07, "logits/chosen": -2.183765172958374, "logits/rejected": -2.099086046218872, "logps/chosen": -409.15118408203125, "logps/rejected": -308.73162841796875, "loss": 0.3923, "rewards/accuracies": 0.875, "rewards/chosen": 0.021406937390565872, "rewards/margins": 1.1926398277282715, "rewards/rejected": -1.1712329387664795, "step": 1168 }, { "epoch": 0.14, "learning_rate": 2.631510570449982e-07, "logits/chosen": -1.9123578071594238, "logits/rejected": -2.0035784244537354, "logps/chosen": -298.87176513671875, "logps/rejected": -341.4882507324219, "loss": 0.5553, "rewards/accuracies": 0.625, "rewards/chosen": -0.6749172806739807, "rewards/margins": 0.7260206341743469, "rewards/rejected": -1.400937795639038, "step": 1169 }, { "epoch": 0.14, "learning_rate": 2.6311562536907994e-07, "logits/chosen": -2.946542739868164, "logits/rejected": -3.0337436199188232, "logps/chosen": -292.3608703613281, "logps/rejected": -364.19183349609375, "loss": 0.3069, "rewards/accuracies": 0.875, "rewards/chosen": -0.04323352873325348, "rewards/margins": 2.130321979522705, "rewards/rejected": -2.173555612564087, "step": 1170 }, { "epoch": 0.14, "learning_rate": 2.630801936931617e-07, "logits/chosen": -2.239610195159912, "logits/rejected": -2.7027440071105957, "logps/chosen": -261.6641540527344, "logps/rejected": -176.2165985107422, "loss": 0.172, "rewards/accuracies": 1.0, "rewards/chosen": -0.48191219568252563, "rewards/margins": 2.5597918033599854, "rewards/rejected": -3.0417041778564453, "step": 1171 }, { "epoch": 0.14, "learning_rate": 2.630447620172434e-07, "logits/chosen": -2.0850253105163574, "logits/rejected": -2.200005531311035, "logps/chosen": -384.02728271484375, "logps/rejected": -309.0516052246094, "loss": 0.4421, "rewards/accuracies": 0.75, "rewards/chosen": -0.8015917539596558, "rewards/margins": 1.5241868495941162, "rewards/rejected": -2.3257784843444824, "step": 1172 }, { "epoch": 0.14, "learning_rate": 2.6300933034132514e-07, "logits/chosen": -1.9583872556686401, "logits/rejected": -2.246471881866455, "logps/chosen": -282.86395263671875, "logps/rejected": -247.89865112304688, "loss": 0.7712, "rewards/accuracies": 0.625, "rewards/chosen": -1.6944634914398193, "rewards/margins": 0.166303351521492, "rewards/rejected": -1.8607667684555054, "step": 1173 }, { "epoch": 0.14, "learning_rate": 2.6297389866540683e-07, "logits/chosen": -2.5510568618774414, "logits/rejected": -2.576976776123047, "logps/chosen": -145.6563720703125, "logps/rejected": -133.72323608398438, "loss": 0.4499, "rewards/accuracies": 0.75, "rewards/chosen": -0.5828529596328735, "rewards/margins": 1.038272500038147, "rewards/rejected": -1.6211254596710205, "step": 1174 }, { "epoch": 0.14, "learning_rate": 2.629384669894886e-07, "logits/chosen": -2.595310926437378, "logits/rejected": -2.4826741218566895, "logps/chosen": -353.17718505859375, "logps/rejected": -329.4533386230469, "loss": 0.2795, "rewards/accuracies": 0.875, "rewards/chosen": -0.6807783842086792, "rewards/margins": 2.031381130218506, "rewards/rejected": -2.7121593952178955, "step": 1175 }, { "epoch": 0.14, "learning_rate": 2.6290303531357033e-07, "logits/chosen": -2.4998817443847656, "logits/rejected": -2.2793662548065186, "logps/chosen": -265.1583557128906, "logps/rejected": -347.1614685058594, "loss": 0.299, "rewards/accuracies": 1.0, "rewards/chosen": -0.4692988693714142, "rewards/margins": 1.5370488166809082, "rewards/rejected": -2.00634765625, "step": 1176 }, { "epoch": 0.14, "learning_rate": 2.628676036376521e-07, "logits/chosen": -2.4551334381103516, "logits/rejected": -2.7493672370910645, "logps/chosen": -419.4507751464844, "logps/rejected": -396.3478088378906, "loss": 0.3919, "rewards/accuracies": 0.875, "rewards/chosen": -0.3036234974861145, "rewards/margins": 1.1721701622009277, "rewards/rejected": -1.4757936000823975, "step": 1177 }, { "epoch": 0.14, "learning_rate": 2.628321719617338e-07, "logits/chosen": -2.5716795921325684, "logits/rejected": -2.5402305126190186, "logps/chosen": -254.3050994873047, "logps/rejected": -291.22003173828125, "loss": 0.4109, "rewards/accuracies": 0.625, "rewards/chosen": -1.3384926319122314, "rewards/margins": 1.2213716506958008, "rewards/rejected": -2.5598642826080322, "step": 1178 }, { "epoch": 0.14, "learning_rate": 2.627967402858155e-07, "logits/chosen": -1.796653151512146, "logits/rejected": -1.9004765748977661, "logps/chosen": -420.7557067871094, "logps/rejected": -398.71917724609375, "loss": 0.4922, "rewards/accuracies": 0.625, "rewards/chosen": -0.7709764242172241, "rewards/margins": 1.1081091165542603, "rewards/rejected": -1.879085659980774, "step": 1179 }, { "epoch": 0.14, "learning_rate": 2.627613086098972e-07, "logits/chosen": -2.485393524169922, "logits/rejected": -2.565720558166504, "logps/chosen": -392.0726623535156, "logps/rejected": -187.7865753173828, "loss": 0.5966, "rewards/accuracies": 0.625, "rewards/chosen": -0.5854492783546448, "rewards/margins": 0.5064091682434082, "rewards/rejected": -1.0918583869934082, "step": 1180 }, { "epoch": 0.14, "learning_rate": 2.6272587693397897e-07, "logits/chosen": -2.1885361671447754, "logits/rejected": -2.459245204925537, "logps/chosen": -360.46527099609375, "logps/rejected": -204.71995544433594, "loss": 0.3446, "rewards/accuracies": 0.75, "rewards/chosen": -0.2684474587440491, "rewards/margins": 1.7257689237594604, "rewards/rejected": -1.9942164421081543, "step": 1181 }, { "epoch": 0.14, "learning_rate": 2.626904452580607e-07, "logits/chosen": -1.7976491451263428, "logits/rejected": -1.7719624042510986, "logps/chosen": -334.6031494140625, "logps/rejected": -409.6170654296875, "loss": 0.4979, "rewards/accuracies": 0.75, "rewards/chosen": -0.05143106356263161, "rewards/margins": 1.2966761589050293, "rewards/rejected": -1.3481073379516602, "step": 1182 }, { "epoch": 0.14, "learning_rate": 2.626550135821424e-07, "logits/chosen": -2.431842803955078, "logits/rejected": -2.246018409729004, "logps/chosen": -230.97503662109375, "logps/rejected": -289.959228515625, "loss": 0.5661, "rewards/accuracies": 0.625, "rewards/chosen": -1.1663565635681152, "rewards/margins": 0.8083146810531616, "rewards/rejected": -1.9746712446212769, "step": 1183 }, { "epoch": 0.14, "learning_rate": 2.6261958190622416e-07, "logits/chosen": -1.9843817949295044, "logits/rejected": -2.199049472808838, "logps/chosen": -368.1208801269531, "logps/rejected": -300.5089416503906, "loss": 0.4808, "rewards/accuracies": 0.875, "rewards/chosen": -0.420304536819458, "rewards/margins": 0.9655351042747498, "rewards/rejected": -1.3858397006988525, "step": 1184 }, { "epoch": 0.14, "learning_rate": 2.6258415023030586e-07, "logits/chosen": -2.9077420234680176, "logits/rejected": -2.9521031379699707, "logps/chosen": -252.1707763671875, "logps/rejected": -295.3094482421875, "loss": 0.1838, "rewards/accuracies": 1.0, "rewards/chosen": -0.7913833260536194, "rewards/margins": 2.14546799659729, "rewards/rejected": -2.9368512630462646, "step": 1185 }, { "epoch": 0.14, "learning_rate": 2.625487185543876e-07, "logits/chosen": -2.6385560035705566, "logits/rejected": -2.499655246734619, "logps/chosen": -170.9019317626953, "logps/rejected": -203.0529022216797, "loss": 0.986, "rewards/accuracies": 0.75, "rewards/chosen": -0.8377872705459595, "rewards/margins": -0.10899387300014496, "rewards/rejected": -0.7287933826446533, "step": 1186 }, { "epoch": 0.14, "learning_rate": 2.6251328687846935e-07, "logits/chosen": -2.0109195709228516, "logits/rejected": -2.114600658416748, "logps/chosen": -203.72732543945312, "logps/rejected": -210.8975830078125, "loss": 0.3907, "rewards/accuracies": 0.75, "rewards/chosen": 0.01627233624458313, "rewards/margins": 1.3074917793273926, "rewards/rejected": -1.2912194728851318, "step": 1187 }, { "epoch": 0.14, "learning_rate": 2.624778552025511e-07, "logits/chosen": -2.3000502586364746, "logits/rejected": -2.3431100845336914, "logps/chosen": -304.0159912109375, "logps/rejected": -357.82757568359375, "loss": 0.2197, "rewards/accuracies": 1.0, "rewards/chosen": 0.04214784502983093, "rewards/margins": 1.8568241596221924, "rewards/rejected": -1.814676284790039, "step": 1188 }, { "epoch": 0.14, "learning_rate": 2.624424235266328e-07, "logits/chosen": -2.133063793182373, "logits/rejected": -1.6254559755325317, "logps/chosen": -483.1354064941406, "logps/rejected": -451.7200622558594, "loss": 0.3453, "rewards/accuracies": 0.875, "rewards/chosen": -1.1215705871582031, "rewards/margins": 1.9926273822784424, "rewards/rejected": -3.1141977310180664, "step": 1189 }, { "epoch": 0.14, "learning_rate": 2.6240699185071455e-07, "logits/chosen": -2.5405287742614746, "logits/rejected": -2.7738232612609863, "logps/chosen": -314.0227355957031, "logps/rejected": -289.7928466796875, "loss": 0.2003, "rewards/accuracies": 0.875, "rewards/chosen": -0.22033053636550903, "rewards/margins": 2.578366756439209, "rewards/rejected": -2.798696994781494, "step": 1190 }, { "epoch": 0.14, "learning_rate": 2.6237156017479624e-07, "logits/chosen": -2.0926194190979004, "logits/rejected": -2.184406042098999, "logps/chosen": -343.1962890625, "logps/rejected": -179.75657653808594, "loss": 0.8271, "rewards/accuracies": 0.625, "rewards/chosen": -0.8657636046409607, "rewards/margins": 0.31198519468307495, "rewards/rejected": -1.1777487993240356, "step": 1191 }, { "epoch": 0.14, "learning_rate": 2.62336128498878e-07, "logits/chosen": -2.6737442016601562, "logits/rejected": -2.7173068523406982, "logps/chosen": -236.8052978515625, "logps/rejected": -184.764404296875, "loss": 0.2962, "rewards/accuracies": 0.75, "rewards/chosen": -0.02981293946504593, "rewards/margins": 2.2216544151306152, "rewards/rejected": -2.251467227935791, "step": 1192 }, { "epoch": 0.14, "learning_rate": 2.623006968229597e-07, "logits/chosen": -2.276961326599121, "logits/rejected": -2.25769305229187, "logps/chosen": -292.73095703125, "logps/rejected": -371.23980712890625, "loss": 0.4385, "rewards/accuracies": 0.875, "rewards/chosen": -1.0152618885040283, "rewards/margins": 1.9563926458358765, "rewards/rejected": -2.9716544151306152, "step": 1193 }, { "epoch": 0.14, "learning_rate": 2.6226526514704143e-07, "logits/chosen": -2.0135021209716797, "logits/rejected": -2.1968777179718018, "logps/chosen": -354.910400390625, "logps/rejected": -254.7936248779297, "loss": 0.5928, "rewards/accuracies": 0.625, "rewards/chosen": -1.2552728652954102, "rewards/margins": 0.5037976503372192, "rewards/rejected": -1.7590703964233398, "step": 1194 }, { "epoch": 0.14, "learning_rate": 2.622298334711232e-07, "logits/chosen": -2.6499695777893066, "logits/rejected": -2.869297981262207, "logps/chosen": -367.01806640625, "logps/rejected": -354.322021484375, "loss": 0.4736, "rewards/accuracies": 0.875, "rewards/chosen": -0.38934648036956787, "rewards/margins": 1.0461153984069824, "rewards/rejected": -1.4354617595672607, "step": 1195 }, { "epoch": 0.14, "learning_rate": 2.621944017952049e-07, "logits/chosen": -1.9062325954437256, "logits/rejected": -2.2725536823272705, "logps/chosen": -402.7008361816406, "logps/rejected": -261.5166015625, "loss": 0.5861, "rewards/accuracies": 0.5, "rewards/chosen": -1.0627286434173584, "rewards/margins": 0.7243897318840027, "rewards/rejected": -1.7871183156967163, "step": 1196 }, { "epoch": 0.14, "learning_rate": 2.621589701192866e-07, "logits/chosen": -1.7307496070861816, "logits/rejected": -2.127225399017334, "logps/chosen": -543.2939453125, "logps/rejected": -495.5115966796875, "loss": 0.9426, "rewards/accuracies": 0.375, "rewards/chosen": -1.229633092880249, "rewards/margins": 0.22584637999534607, "rewards/rejected": -1.455479383468628, "step": 1197 }, { "epoch": 0.14, "learning_rate": 2.621235384433684e-07, "logits/chosen": -2.2439160346984863, "logits/rejected": -2.332536220550537, "logps/chosen": -243.64869689941406, "logps/rejected": -142.17454528808594, "loss": 0.4622, "rewards/accuracies": 0.75, "rewards/chosen": -0.4734541177749634, "rewards/margins": 0.986446738243103, "rewards/rejected": -1.459900975227356, "step": 1198 }, { "epoch": 0.14, "learning_rate": 2.620881067674501e-07, "logits/chosen": -1.8006635904312134, "logits/rejected": -1.9461729526519775, "logps/chosen": -200.7579803466797, "logps/rejected": -188.3125457763672, "loss": 0.8033, "rewards/accuracies": 0.875, "rewards/chosen": -0.9019895195960999, "rewards/margins": 0.6709003448486328, "rewards/rejected": -1.572889804840088, "step": 1199 }, { "epoch": 0.14, "learning_rate": 2.620526750915318e-07, "logits/chosen": -2.531041145324707, "logits/rejected": -2.4935429096221924, "logps/chosen": -180.9168243408203, "logps/rejected": -223.1610107421875, "loss": 0.1341, "rewards/accuracies": 1.0, "rewards/chosen": -0.2965756952762604, "rewards/margins": 2.6207337379455566, "rewards/rejected": -2.9173097610473633, "step": 1200 }, { "epoch": 0.14, "learning_rate": 2.6201724341561357e-07, "logits/chosen": -2.7763967514038086, "logits/rejected": -2.9098730087280273, "logps/chosen": -270.83380126953125, "logps/rejected": -226.41517639160156, "loss": 0.1937, "rewards/accuracies": 0.875, "rewards/chosen": -0.5548675060272217, "rewards/margins": 2.5873751640319824, "rewards/rejected": -3.142242908477783, "step": 1201 }, { "epoch": 0.14, "learning_rate": 2.6198181173969526e-07, "logits/chosen": -2.3674473762512207, "logits/rejected": -2.3587379455566406, "logps/chosen": -459.4141845703125, "logps/rejected": -279.41668701171875, "loss": 0.297, "rewards/accuracies": 0.75, "rewards/chosen": -0.6752339005470276, "rewards/margins": 2.298826217651367, "rewards/rejected": -2.974060297012329, "step": 1202 }, { "epoch": 0.14, "learning_rate": 2.61946380063777e-07, "logits/chosen": -1.9262473583221436, "logits/rejected": -2.0069022178649902, "logps/chosen": -385.65643310546875, "logps/rejected": -297.6188049316406, "loss": 0.8983, "rewards/accuracies": 0.5, "rewards/chosen": -0.7674019932746887, "rewards/margins": 0.3854081630706787, "rewards/rejected": -1.1528100967407227, "step": 1203 }, { "epoch": 0.14, "learning_rate": 2.619109483878587e-07, "logits/chosen": -2.362590789794922, "logits/rejected": -2.553309202194214, "logps/chosen": -393.9818420410156, "logps/rejected": -386.4381103515625, "loss": 1.2557, "rewards/accuracies": 0.875, "rewards/chosen": -1.4655123949050903, "rewards/margins": 1.1759333610534668, "rewards/rejected": -2.6414456367492676, "step": 1204 }, { "epoch": 0.14, "learning_rate": 2.6187551671194046e-07, "logits/chosen": -2.242875337600708, "logits/rejected": -1.9882506132125854, "logps/chosen": -323.76617431640625, "logps/rejected": -330.654052734375, "loss": 0.302, "rewards/accuracies": 1.0, "rewards/chosen": -0.39922797679901123, "rewards/margins": 1.7042144536972046, "rewards/rejected": -2.103442430496216, "step": 1205 }, { "epoch": 0.14, "learning_rate": 2.618400850360222e-07, "logits/chosen": -2.1490612030029297, "logits/rejected": -2.0353264808654785, "logps/chosen": -229.81558227539062, "logps/rejected": -325.930419921875, "loss": 0.5084, "rewards/accuracies": 0.875, "rewards/chosen": -0.6471105813980103, "rewards/margins": 1.5092400312423706, "rewards/rejected": -2.156350612640381, "step": 1206 }, { "epoch": 0.14, "learning_rate": 2.618046533601039e-07, "logits/chosen": -2.101057529449463, "logits/rejected": -2.0508899688720703, "logps/chosen": -290.75604248046875, "logps/rejected": -298.0745544433594, "loss": 0.5806, "rewards/accuracies": 0.625, "rewards/chosen": -0.5231325626373291, "rewards/margins": 1.4701275825500488, "rewards/rejected": -1.993260145187378, "step": 1207 }, { "epoch": 0.14, "learning_rate": 2.6176922168418565e-07, "logits/chosen": -2.351505756378174, "logits/rejected": -2.3583590984344482, "logps/chosen": -232.89732360839844, "logps/rejected": -262.2727966308594, "loss": 0.6907, "rewards/accuracies": 0.75, "rewards/chosen": -0.7882159352302551, "rewards/margins": 1.2391718626022339, "rewards/rejected": -2.0273876190185547, "step": 1208 }, { "epoch": 0.14, "learning_rate": 2.6173379000826734e-07, "logits/chosen": -2.822025775909424, "logits/rejected": -2.6406497955322266, "logps/chosen": -168.3657684326172, "logps/rejected": -187.40231323242188, "loss": 0.4297, "rewards/accuracies": 0.625, "rewards/chosen": -0.7623965740203857, "rewards/margins": 1.1170769929885864, "rewards/rejected": -1.8794736862182617, "step": 1209 }, { "epoch": 0.14, "learning_rate": 2.6169835833234915e-07, "logits/chosen": -2.6718530654907227, "logits/rejected": -2.711927890777588, "logps/chosen": -431.6191711425781, "logps/rejected": -244.91815185546875, "loss": 0.6715, "rewards/accuracies": 0.625, "rewards/chosen": -0.8967389464378357, "rewards/margins": 0.5498989224433899, "rewards/rejected": -1.4466378688812256, "step": 1210 }, { "epoch": 0.14, "learning_rate": 2.6166292665643084e-07, "logits/chosen": -1.991060733795166, "logits/rejected": -2.0923988819122314, "logps/chosen": -362.5167236328125, "logps/rejected": -289.84393310546875, "loss": 0.2355, "rewards/accuracies": 0.875, "rewards/chosen": -0.6435012221336365, "rewards/margins": 2.5302891731262207, "rewards/rejected": -3.173790454864502, "step": 1211 }, { "epoch": 0.14, "learning_rate": 2.616274949805126e-07, "logits/chosen": -2.430065393447876, "logits/rejected": -2.1822991371154785, "logps/chosen": -250.44174194335938, "logps/rejected": -302.0910339355469, "loss": 0.3632, "rewards/accuracies": 1.0, "rewards/chosen": -0.1955638825893402, "rewards/margins": 0.9921808242797852, "rewards/rejected": -1.1877448558807373, "step": 1212 }, { "epoch": 0.14, "learning_rate": 2.615920633045943e-07, "logits/chosen": -2.2868592739105225, "logits/rejected": -2.3052589893341064, "logps/chosen": -316.4286193847656, "logps/rejected": -315.93817138671875, "loss": 0.3579, "rewards/accuracies": 0.875, "rewards/chosen": -0.6329947710037231, "rewards/margins": 1.6671862602233887, "rewards/rejected": -2.3001809120178223, "step": 1213 }, { "epoch": 0.14, "learning_rate": 2.6155663162867603e-07, "logits/chosen": -2.4589529037475586, "logits/rejected": -2.1748476028442383, "logps/chosen": -361.7392578125, "logps/rejected": -335.0190124511719, "loss": 0.8517, "rewards/accuracies": 0.75, "rewards/chosen": -0.9949109554290771, "rewards/margins": 0.9016218185424805, "rewards/rejected": -1.8965328931808472, "step": 1214 }, { "epoch": 0.14, "learning_rate": 2.6152119995275773e-07, "logits/chosen": -2.241535186767578, "logits/rejected": -2.297055244445801, "logps/chosen": -243.60638427734375, "logps/rejected": -133.24386596679688, "loss": 0.6223, "rewards/accuracies": 0.5, "rewards/chosen": -1.0300573110580444, "rewards/margins": 0.9724359512329102, "rewards/rejected": -2.002493143081665, "step": 1215 }, { "epoch": 0.14, "learning_rate": 2.614857682768395e-07, "logits/chosen": -2.250706434249878, "logits/rejected": -2.513474941253662, "logps/chosen": -362.91754150390625, "logps/rejected": -271.84552001953125, "loss": 0.5401, "rewards/accuracies": 0.75, "rewards/chosen": -1.1390385627746582, "rewards/margins": 1.4305357933044434, "rewards/rejected": -2.5695741176605225, "step": 1216 }, { "epoch": 0.14, "learning_rate": 2.6145033660092123e-07, "logits/chosen": -2.137702226638794, "logits/rejected": -2.1692233085632324, "logps/chosen": -211.78146362304688, "logps/rejected": -131.8563690185547, "loss": 0.994, "rewards/accuracies": 0.625, "rewards/chosen": -1.1482348442077637, "rewards/margins": 0.5150662660598755, "rewards/rejected": -1.6633012294769287, "step": 1217 }, { "epoch": 0.14, "learning_rate": 2.614149049250029e-07, "logits/chosen": -2.6752381324768066, "logits/rejected": -2.4664547443389893, "logps/chosen": -141.66978454589844, "logps/rejected": -225.0955352783203, "loss": 0.7433, "rewards/accuracies": 0.625, "rewards/chosen": -0.20321819186210632, "rewards/margins": 0.17518356442451477, "rewards/rejected": -0.3784017860889435, "step": 1218 }, { "epoch": 0.14, "learning_rate": 2.6137947324908467e-07, "logits/chosen": -2.2906172275543213, "logits/rejected": -2.5458648204803467, "logps/chosen": -281.67572021484375, "logps/rejected": -286.2783203125, "loss": 0.3528, "rewards/accuracies": 0.75, "rewards/chosen": -1.0083242654800415, "rewards/margins": 1.3587497472763062, "rewards/rejected": -2.3670740127563477, "step": 1219 }, { "epoch": 0.14, "learning_rate": 2.6134404157316637e-07, "logits/chosen": -2.37449312210083, "logits/rejected": -2.358083963394165, "logps/chosen": -157.3820343017578, "logps/rejected": -220.6537628173828, "loss": 0.4983, "rewards/accuracies": 0.75, "rewards/chosen": -0.5086790323257446, "rewards/margins": 1.149797797203064, "rewards/rejected": -1.6584768295288086, "step": 1220 }, { "epoch": 0.14, "learning_rate": 2.6130860989724817e-07, "logits/chosen": -2.7063205242156982, "logits/rejected": -2.554245948791504, "logps/chosen": -198.2356414794922, "logps/rejected": -259.17889404296875, "loss": 0.3145, "rewards/accuracies": 0.875, "rewards/chosen": -0.3179255723953247, "rewards/margins": 1.4808061122894287, "rewards/rejected": -1.7987315654754639, "step": 1221 }, { "epoch": 0.14, "learning_rate": 2.6127317822132986e-07, "logits/chosen": -2.154592514038086, "logits/rejected": -2.034823179244995, "logps/chosen": -325.4721374511719, "logps/rejected": -308.1068115234375, "loss": 0.5773, "rewards/accuracies": 0.875, "rewards/chosen": -0.7155643105506897, "rewards/margins": 1.0639994144439697, "rewards/rejected": -1.7795637845993042, "step": 1222 }, { "epoch": 0.14, "learning_rate": 2.612377465454116e-07, "logits/chosen": -1.8830881118774414, "logits/rejected": -1.857619047164917, "logps/chosen": -451.446533203125, "logps/rejected": -442.03125, "loss": 0.3536, "rewards/accuracies": 0.875, "rewards/chosen": -0.20267298817634583, "rewards/margins": 1.9951961040496826, "rewards/rejected": -2.197868824005127, "step": 1223 }, { "epoch": 0.14, "learning_rate": 2.612023148694933e-07, "logits/chosen": -2.2011451721191406, "logits/rejected": -1.870465874671936, "logps/chosen": -332.98443603515625, "logps/rejected": -241.1824493408203, "loss": 0.44, "rewards/accuracies": 0.75, "rewards/chosen": -0.8765718936920166, "rewards/margins": 0.9335952997207642, "rewards/rejected": -1.8101671934127808, "step": 1224 }, { "epoch": 0.14, "learning_rate": 2.6116688319357506e-07, "logits/chosen": -2.053405523300171, "logits/rejected": -1.9158811569213867, "logps/chosen": -299.52105712890625, "logps/rejected": -421.9799499511719, "loss": 0.272, "rewards/accuracies": 0.875, "rewards/chosen": -0.5662707090377808, "rewards/margins": 1.7271157503128052, "rewards/rejected": -2.293386459350586, "step": 1225 }, { "epoch": 0.14, "learning_rate": 2.6113145151765675e-07, "logits/chosen": -2.336432456970215, "logits/rejected": -2.6305625438690186, "logps/chosen": -249.67605590820312, "logps/rejected": -223.44863891601562, "loss": 0.3659, "rewards/accuracies": 0.875, "rewards/chosen": -0.3940158486366272, "rewards/margins": 1.4299249649047852, "rewards/rejected": -1.8239408731460571, "step": 1226 }, { "epoch": 0.14, "learning_rate": 2.610960198417385e-07, "logits/chosen": -2.632175922393799, "logits/rejected": -2.365797758102417, "logps/chosen": -137.59799194335938, "logps/rejected": -276.6572570800781, "loss": 0.6939, "rewards/accuracies": 0.625, "rewards/chosen": -1.031806468963623, "rewards/margins": 1.4824001789093018, "rewards/rejected": -2.514206886291504, "step": 1227 }, { "epoch": 0.14, "learning_rate": 2.6106058816582025e-07, "logits/chosen": -1.9882938861846924, "logits/rejected": -2.1020126342773438, "logps/chosen": -418.59149169921875, "logps/rejected": -370.8414306640625, "loss": 0.5675, "rewards/accuracies": 0.75, "rewards/chosen": -0.11219042539596558, "rewards/margins": 1.2009856700897217, "rewards/rejected": -1.313176155090332, "step": 1228 }, { "epoch": 0.14, "learning_rate": 2.6102515648990195e-07, "logits/chosen": -1.9346270561218262, "logits/rejected": -1.883954405784607, "logps/chosen": -296.3021240234375, "logps/rejected": -224.70205688476562, "loss": 0.3362, "rewards/accuracies": 0.75, "rewards/chosen": -0.07661155611276627, "rewards/margins": 1.8448667526245117, "rewards/rejected": -1.921478271484375, "step": 1229 }, { "epoch": 0.14, "learning_rate": 2.609897248139837e-07, "logits/chosen": -2.0371227264404297, "logits/rejected": -2.2506399154663086, "logps/chosen": -355.3033752441406, "logps/rejected": -297.62603759765625, "loss": 0.2657, "rewards/accuracies": 1.0, "rewards/chosen": -0.46851930022239685, "rewards/margins": 1.7664119005203247, "rewards/rejected": -2.234931230545044, "step": 1230 }, { "epoch": 0.14, "learning_rate": 2.609542931380654e-07, "logits/chosen": -2.5912065505981445, "logits/rejected": -2.7296056747436523, "logps/chosen": -178.59237670898438, "logps/rejected": -212.0275421142578, "loss": 0.4854, "rewards/accuracies": 0.75, "rewards/chosen": -0.9331275820732117, "rewards/margins": 2.161269187927246, "rewards/rejected": -3.0943965911865234, "step": 1231 }, { "epoch": 0.14, "learning_rate": 2.6091886146214714e-07, "logits/chosen": -2.214390516281128, "logits/rejected": -2.0811753273010254, "logps/chosen": -259.7727966308594, "logps/rejected": -337.0928039550781, "loss": 0.6228, "rewards/accuracies": 0.75, "rewards/chosen": -1.073153018951416, "rewards/margins": 1.5224473476409912, "rewards/rejected": -2.5956006050109863, "step": 1232 }, { "epoch": 0.14, "learning_rate": 2.608834297862289e-07, "logits/chosen": -2.130297899246216, "logits/rejected": -2.202854871749878, "logps/chosen": -282.682861328125, "logps/rejected": -306.7928161621094, "loss": 0.1232, "rewards/accuracies": 1.0, "rewards/chosen": -0.5135722756385803, "rewards/margins": 2.5404725074768066, "rewards/rejected": -3.054044723510742, "step": 1233 }, { "epoch": 0.14, "learning_rate": 2.6084799811031064e-07, "logits/chosen": -2.2196273803710938, "logits/rejected": -2.0463874340057373, "logps/chosen": -244.0476531982422, "logps/rejected": -290.8463134765625, "loss": 0.3742, "rewards/accuracies": 0.875, "rewards/chosen": -0.22885075211524963, "rewards/margins": 1.4578667879104614, "rewards/rejected": -1.6867173910140991, "step": 1234 }, { "epoch": 0.14, "learning_rate": 2.6081256643439233e-07, "logits/chosen": -1.8478654623031616, "logits/rejected": -2.281740188598633, "logps/chosen": -440.760498046875, "logps/rejected": -316.838623046875, "loss": 0.484, "rewards/accuracies": 0.875, "rewards/chosen": -0.553683340549469, "rewards/margins": 1.7635353803634644, "rewards/rejected": -2.317218780517578, "step": 1235 }, { "epoch": 0.14, "learning_rate": 2.607771347584741e-07, "logits/chosen": -1.8633878231048584, "logits/rejected": -2.240372657775879, "logps/chosen": -276.9737548828125, "logps/rejected": -246.07305908203125, "loss": 0.4326, "rewards/accuracies": 0.75, "rewards/chosen": -0.7352249026298523, "rewards/margins": 1.6205451488494873, "rewards/rejected": -2.3557701110839844, "step": 1236 }, { "epoch": 0.14, "learning_rate": 2.607417030825558e-07, "logits/chosen": -2.397573947906494, "logits/rejected": -1.9563982486724854, "logps/chosen": -297.1358642578125, "logps/rejected": -246.6844482421875, "loss": 0.2937, "rewards/accuracies": 0.75, "rewards/chosen": -0.7042604088783264, "rewards/margins": 1.9653129577636719, "rewards/rejected": -2.6695733070373535, "step": 1237 }, { "epoch": 0.14, "learning_rate": 2.607062714066375e-07, "logits/chosen": -2.0208499431610107, "logits/rejected": -2.1037914752960205, "logps/chosen": -504.9766845703125, "logps/rejected": -365.8271789550781, "loss": 0.3582, "rewards/accuracies": 0.875, "rewards/chosen": -0.7214065790176392, "rewards/margins": 1.671844482421875, "rewards/rejected": -2.3932509422302246, "step": 1238 }, { "epoch": 0.14, "learning_rate": 2.6067083973071927e-07, "logits/chosen": -1.9319088459014893, "logits/rejected": -2.1614065170288086, "logps/chosen": -423.293701171875, "logps/rejected": -276.984130859375, "loss": 0.6646, "rewards/accuracies": 0.5, "rewards/chosen": -1.01180100440979, "rewards/margins": 0.7641793489456177, "rewards/rejected": -1.7759804725646973, "step": 1239 }, { "epoch": 0.14, "learning_rate": 2.6063540805480097e-07, "logits/chosen": -2.349661111831665, "logits/rejected": -2.53029727935791, "logps/chosen": -521.2225341796875, "logps/rejected": -370.9586181640625, "loss": 0.458, "rewards/accuracies": 0.75, "rewards/chosen": -0.7600740194320679, "rewards/margins": 1.2158392667770386, "rewards/rejected": -1.9759132862091064, "step": 1240 }, { "epoch": 0.14, "learning_rate": 2.605999763788827e-07, "logits/chosen": -2.6423542499542236, "logits/rejected": -2.8583455085754395, "logps/chosen": -138.44232177734375, "logps/rejected": -290.8701171875, "loss": 0.4664, "rewards/accuracies": 0.75, "rewards/chosen": -0.5801395177841187, "rewards/margins": 1.0942535400390625, "rewards/rejected": -1.6743930578231812, "step": 1241 }, { "epoch": 0.14, "learning_rate": 2.605645447029644e-07, "logits/chosen": -1.8430917263031006, "logits/rejected": -2.3714470863342285, "logps/chosen": -305.1477966308594, "logps/rejected": -223.27792358398438, "loss": 0.3282, "rewards/accuracies": 0.875, "rewards/chosen": -0.22728799283504486, "rewards/margins": 1.3993114233016968, "rewards/rejected": -1.6265994310379028, "step": 1242 }, { "epoch": 0.14, "learning_rate": 2.6052911302704616e-07, "logits/chosen": -2.206584930419922, "logits/rejected": -2.243499279022217, "logps/chosen": -458.9821472167969, "logps/rejected": -413.85784912109375, "loss": 0.5828, "rewards/accuracies": 0.75, "rewards/chosen": -1.3924663066864014, "rewards/margins": 1.4343593120574951, "rewards/rejected": -2.8268258571624756, "step": 1243 }, { "epoch": 0.14, "learning_rate": 2.6049368135112786e-07, "logits/chosen": -2.993178129196167, "logits/rejected": -2.8497660160064697, "logps/chosen": -215.64817810058594, "logps/rejected": -273.05218505859375, "loss": 0.3837, "rewards/accuracies": 0.875, "rewards/chosen": -0.8832869529724121, "rewards/margins": 1.5810787677764893, "rewards/rejected": -2.4643657207489014, "step": 1244 }, { "epoch": 0.14, "learning_rate": 2.6045824967520966e-07, "logits/chosen": -2.456869125366211, "logits/rejected": -2.540454864501953, "logps/chosen": -149.39254760742188, "logps/rejected": -237.100341796875, "loss": 0.3548, "rewards/accuracies": 0.75, "rewards/chosen": 0.22591577470302582, "rewards/margins": 1.687772512435913, "rewards/rejected": -1.4618569612503052, "step": 1245 }, { "epoch": 0.14, "learning_rate": 2.6042281799929135e-07, "logits/chosen": -2.252779245376587, "logits/rejected": -2.7211036682128906, "logps/chosen": -302.27105712890625, "logps/rejected": -147.2112579345703, "loss": 0.6604, "rewards/accuracies": 0.625, "rewards/chosen": -1.0700712203979492, "rewards/margins": 0.6435593366622925, "rewards/rejected": -1.7136304378509521, "step": 1246 }, { "epoch": 0.15, "learning_rate": 2.603873863233731e-07, "logits/chosen": -3.0307254791259766, "logits/rejected": -2.97756290435791, "logps/chosen": -169.63092041015625, "logps/rejected": -113.05150604248047, "loss": 0.7544, "rewards/accuracies": 0.375, "rewards/chosen": -0.7444263100624084, "rewards/margins": 0.39856547117233276, "rewards/rejected": -1.1429919004440308, "step": 1247 }, { "epoch": 0.15, "learning_rate": 2.603519546474548e-07, "logits/chosen": -2.8420052528381348, "logits/rejected": -2.6869869232177734, "logps/chosen": -222.10028076171875, "logps/rejected": -218.86453247070312, "loss": 0.2293, "rewards/accuracies": 1.0, "rewards/chosen": -0.07362756133079529, "rewards/margins": 2.1067304611206055, "rewards/rejected": -2.1803579330444336, "step": 1248 }, { "epoch": 0.15, "learning_rate": 2.6031652297153655e-07, "logits/chosen": -1.9170403480529785, "logits/rejected": -2.0209596157073975, "logps/chosen": -400.3038635253906, "logps/rejected": -319.9889221191406, "loss": 0.455, "rewards/accuracies": 0.75, "rewards/chosen": -0.3565017879009247, "rewards/margins": 1.1411786079406738, "rewards/rejected": -1.4976803064346313, "step": 1249 }, { "epoch": 0.15, "learning_rate": 2.602810912956183e-07, "logits/chosen": -2.292919158935547, "logits/rejected": -2.4757347106933594, "logps/chosen": -317.056640625, "logps/rejected": -305.7164001464844, "loss": 0.1871, "rewards/accuracies": 1.0, "rewards/chosen": -0.7052890062332153, "rewards/margins": 2.174309015274048, "rewards/rejected": -2.8795979022979736, "step": 1250 }, { "epoch": 0.15, "learning_rate": 2.602456596197e-07, "logits/chosen": -2.516244888305664, "logits/rejected": -2.4459445476531982, "logps/chosen": -235.16522216796875, "logps/rejected": -211.745361328125, "loss": 0.39, "rewards/accuracies": 0.75, "rewards/chosen": -0.38409173488616943, "rewards/margins": 2.1268701553344727, "rewards/rejected": -2.5109620094299316, "step": 1251 }, { "epoch": 0.15, "learning_rate": 2.6021022794378174e-07, "logits/chosen": -2.236812114715576, "logits/rejected": -2.0823521614074707, "logps/chosen": -212.92654418945312, "logps/rejected": -213.06077575683594, "loss": 0.2804, "rewards/accuracies": 1.0, "rewards/chosen": -0.6693189144134521, "rewards/margins": 2.098865032196045, "rewards/rejected": -2.768183708190918, "step": 1252 }, { "epoch": 0.15, "learning_rate": 2.6017479626786344e-07, "logits/chosen": -2.3237688541412354, "logits/rejected": -2.413252353668213, "logps/chosen": -237.13018798828125, "logps/rejected": -133.48849487304688, "loss": 0.9022, "rewards/accuracies": 0.5, "rewards/chosen": -1.1386258602142334, "rewards/margins": 0.2657497525215149, "rewards/rejected": -1.4043755531311035, "step": 1253 }, { "epoch": 0.15, "learning_rate": 2.601393645919452e-07, "logits/chosen": -2.266758680343628, "logits/rejected": -2.3881494998931885, "logps/chosen": -260.87646484375, "logps/rejected": -471.16302490234375, "loss": 0.8151, "rewards/accuracies": 0.75, "rewards/chosen": -1.1986429691314697, "rewards/margins": 0.25457435846328735, "rewards/rejected": -1.4532173871994019, "step": 1254 }, { "epoch": 0.15, "learning_rate": 2.601039329160269e-07, "logits/chosen": -2.8981451988220215, "logits/rejected": -2.8271474838256836, "logps/chosen": -141.50965881347656, "logps/rejected": -157.44015502929688, "loss": 0.2766, "rewards/accuracies": 0.875, "rewards/chosen": -0.11638477444648743, "rewards/margins": 1.9493556022644043, "rewards/rejected": -2.0657403469085693, "step": 1255 }, { "epoch": 0.15, "learning_rate": 2.600685012401087e-07, "logits/chosen": -2.693871259689331, "logits/rejected": -2.4761247634887695, "logps/chosen": -125.81049346923828, "logps/rejected": -251.10403442382812, "loss": 0.4626, "rewards/accuracies": 0.75, "rewards/chosen": -0.5399969220161438, "rewards/margins": 1.9713658094406128, "rewards/rejected": -2.5113627910614014, "step": 1256 }, { "epoch": 0.15, "learning_rate": 2.600330695641904e-07, "logits/chosen": -2.442492723464966, "logits/rejected": -2.28092885017395, "logps/chosen": -165.7157440185547, "logps/rejected": -261.3372497558594, "loss": 0.5349, "rewards/accuracies": 0.75, "rewards/chosen": -1.3454078435897827, "rewards/margins": 0.5472221374511719, "rewards/rejected": -1.892629861831665, "step": 1257 }, { "epoch": 0.15, "learning_rate": 2.599976378882721e-07, "logits/chosen": -2.115691661834717, "logits/rejected": -1.9491918087005615, "logps/chosen": -317.12054443359375, "logps/rejected": -340.6336364746094, "loss": 0.4501, "rewards/accuracies": 0.75, "rewards/chosen": -0.18840591609477997, "rewards/margins": 1.293190360069275, "rewards/rejected": -1.4815962314605713, "step": 1258 }, { "epoch": 0.15, "learning_rate": 2.599622062123538e-07, "logits/chosen": -2.2306504249572754, "logits/rejected": -2.14399790763855, "logps/chosen": -184.25833129882812, "logps/rejected": -207.76370239257812, "loss": 0.5517, "rewards/accuracies": 0.625, "rewards/chosen": -0.2603466510772705, "rewards/margins": 0.9439060688018799, "rewards/rejected": -1.2042527198791504, "step": 1259 }, { "epoch": 0.15, "learning_rate": 2.5992677453643557e-07, "logits/chosen": -2.549180507659912, "logits/rejected": -2.6407978534698486, "logps/chosen": -213.83932495117188, "logps/rejected": -207.66494750976562, "loss": 0.4595, "rewards/accuracies": 0.625, "rewards/chosen": -0.711537778377533, "rewards/margins": 1.0398741960525513, "rewards/rejected": -1.75141179561615, "step": 1260 }, { "epoch": 0.15, "learning_rate": 2.598913428605173e-07, "logits/chosen": -2.313573122024536, "logits/rejected": -2.205308675765991, "logps/chosen": -180.78720092773438, "logps/rejected": -199.6486358642578, "loss": 0.2204, "rewards/accuracies": 0.875, "rewards/chosen": -0.1279129981994629, "rewards/margins": 1.8264344930648804, "rewards/rejected": -1.9543473720550537, "step": 1261 }, { "epoch": 0.15, "learning_rate": 2.59855911184599e-07, "logits/chosen": -2.2265775203704834, "logits/rejected": -2.345930337905884, "logps/chosen": -316.6727294921875, "logps/rejected": -272.6983642578125, "loss": 0.5024, "rewards/accuracies": 0.75, "rewards/chosen": -0.7236360311508179, "rewards/margins": 1.0970818996429443, "rewards/rejected": -1.8207180500030518, "step": 1262 }, { "epoch": 0.15, "learning_rate": 2.5982047950868076e-07, "logits/chosen": -2.5143589973449707, "logits/rejected": -2.669133186340332, "logps/chosen": -222.92259216308594, "logps/rejected": -343.65167236328125, "loss": 0.2482, "rewards/accuracies": 1.0, "rewards/chosen": -0.8658496141433716, "rewards/margins": 1.8789976835250854, "rewards/rejected": -2.744847297668457, "step": 1263 }, { "epoch": 0.15, "learning_rate": 2.5978504783276246e-07, "logits/chosen": -1.77280855178833, "logits/rejected": -1.9820737838745117, "logps/chosen": -437.0660095214844, "logps/rejected": -421.7515869140625, "loss": 0.2053, "rewards/accuracies": 1.0, "rewards/chosen": -0.5205976963043213, "rewards/margins": 2.0472793579101562, "rewards/rejected": -2.5678770542144775, "step": 1264 }, { "epoch": 0.15, "learning_rate": 2.597496161568442e-07, "logits/chosen": -2.181020736694336, "logits/rejected": -2.156886577606201, "logps/chosen": -361.36712646484375, "logps/rejected": -278.8703308105469, "loss": 0.4018, "rewards/accuracies": 0.625, "rewards/chosen": -0.7749343514442444, "rewards/margins": 1.345133900642395, "rewards/rejected": -2.120068073272705, "step": 1265 }, { "epoch": 0.15, "learning_rate": 2.597141844809259e-07, "logits/chosen": -2.119680643081665, "logits/rejected": -2.056746006011963, "logps/chosen": -277.7623291015625, "logps/rejected": -273.0042724609375, "loss": 0.2764, "rewards/accuracies": 1.0, "rewards/chosen": -0.7121495604515076, "rewards/margins": 1.2374653816223145, "rewards/rejected": -1.9496151208877563, "step": 1266 }, { "epoch": 0.15, "learning_rate": 2.5967875280500765e-07, "logits/chosen": -2.1201016902923584, "logits/rejected": -2.235252618789673, "logps/chosen": -326.6087951660156, "logps/rejected": -325.4222412109375, "loss": 0.2567, "rewards/accuracies": 1.0, "rewards/chosen": -0.22725319862365723, "rewards/margins": 1.6974499225616455, "rewards/rejected": -1.9247030019760132, "step": 1267 }, { "epoch": 0.15, "learning_rate": 2.596433211290894e-07, "logits/chosen": -2.818470001220703, "logits/rejected": -2.705915927886963, "logps/chosen": -280.83734130859375, "logps/rejected": -216.49269104003906, "loss": 0.3247, "rewards/accuracies": 0.875, "rewards/chosen": -0.7131451964378357, "rewards/margins": 1.7519458532333374, "rewards/rejected": -2.4650912284851074, "step": 1268 }, { "epoch": 0.15, "learning_rate": 2.5960788945317115e-07, "logits/chosen": -2.4428930282592773, "logits/rejected": -2.331605911254883, "logps/chosen": -295.9519348144531, "logps/rejected": -289.4832458496094, "loss": 0.3211, "rewards/accuracies": 0.875, "rewards/chosen": -1.192646861076355, "rewards/margins": 1.6220908164978027, "rewards/rejected": -2.814737558364868, "step": 1269 }, { "epoch": 0.15, "learning_rate": 2.5957245777725284e-07, "logits/chosen": -2.6269357204437256, "logits/rejected": -2.687150716781616, "logps/chosen": -169.66976928710938, "logps/rejected": -177.86288452148438, "loss": 0.6397, "rewards/accuracies": 0.75, "rewards/chosen": -0.7958784103393555, "rewards/margins": 0.6250782012939453, "rewards/rejected": -1.4209566116333008, "step": 1270 }, { "epoch": 0.15, "learning_rate": 2.595370261013346e-07, "logits/chosen": -2.6670022010803223, "logits/rejected": -2.565812587738037, "logps/chosen": -311.00115966796875, "logps/rejected": -217.16049194335938, "loss": 0.4264, "rewards/accuracies": 0.875, "rewards/chosen": -0.8078153729438782, "rewards/margins": 1.1430182456970215, "rewards/rejected": -1.950833797454834, "step": 1271 }, { "epoch": 0.15, "learning_rate": 2.595015944254163e-07, "logits/chosen": -1.3872480392456055, "logits/rejected": -1.5931191444396973, "logps/chosen": -735.7073974609375, "logps/rejected": -551.5750732421875, "loss": 0.695, "rewards/accuracies": 0.625, "rewards/chosen": -1.3547881841659546, "rewards/margins": 1.506098747253418, "rewards/rejected": -2.860887050628662, "step": 1272 }, { "epoch": 0.15, "learning_rate": 2.5946616274949804e-07, "logits/chosen": -2.882721185684204, "logits/rejected": -2.767637014389038, "logps/chosen": -125.76010131835938, "logps/rejected": -193.9713592529297, "loss": 0.3611, "rewards/accuracies": 0.875, "rewards/chosen": -0.3950304687023163, "rewards/margins": 1.374147891998291, "rewards/rejected": -1.7691782712936401, "step": 1273 }, { "epoch": 0.15, "learning_rate": 2.594307310735798e-07, "logits/chosen": -1.8718408346176147, "logits/rejected": -2.2689945697784424, "logps/chosen": -510.5018005371094, "logps/rejected": -342.74365234375, "loss": 0.2101, "rewards/accuracies": 1.0, "rewards/chosen": -0.6649178862571716, "rewards/margins": 2.3887991905212402, "rewards/rejected": -3.0537166595458984, "step": 1274 }, { "epoch": 0.15, "learning_rate": 2.593952993976615e-07, "logits/chosen": -2.3788914680480957, "logits/rejected": -2.28865385055542, "logps/chosen": -241.61886596679688, "logps/rejected": -230.29275512695312, "loss": 0.2552, "rewards/accuracies": 0.875, "rewards/chosen": -0.6105338335037231, "rewards/margins": 2.803457021713257, "rewards/rejected": -3.4139909744262695, "step": 1275 }, { "epoch": 0.15, "learning_rate": 2.5935986772174323e-07, "logits/chosen": -2.2635085582733154, "logits/rejected": -2.330038070678711, "logps/chosen": -523.0123291015625, "logps/rejected": -468.148193359375, "loss": 0.5342, "rewards/accuracies": 0.75, "rewards/chosen": -0.740538477897644, "rewards/margins": 1.3421372175216675, "rewards/rejected": -2.0826756954193115, "step": 1276 }, { "epoch": 0.15, "learning_rate": 2.593244360458249e-07, "logits/chosen": -2.5202741622924805, "logits/rejected": -2.2912259101867676, "logps/chosen": -297.2366943359375, "logps/rejected": -251.6663818359375, "loss": 0.4441, "rewards/accuracies": 0.875, "rewards/chosen": -0.31972527503967285, "rewards/margins": 1.4612170457839966, "rewards/rejected": -1.7809423208236694, "step": 1277 }, { "epoch": 0.15, "learning_rate": 2.592890043699067e-07, "logits/chosen": -1.3900766372680664, "logits/rejected": -2.0586559772491455, "logps/chosen": -529.3613891601562, "logps/rejected": -280.0226745605469, "loss": 0.285, "rewards/accuracies": 1.0, "rewards/chosen": -0.645638108253479, "rewards/margins": 1.191230297088623, "rewards/rejected": -1.8368682861328125, "step": 1278 }, { "epoch": 0.15, "learning_rate": 2.592535726939884e-07, "logits/chosen": -2.0372562408447266, "logits/rejected": -2.2701494693756104, "logps/chosen": -329.7569580078125, "logps/rejected": -306.30706787109375, "loss": 0.5051, "rewards/accuracies": 0.75, "rewards/chosen": -0.44235673546791077, "rewards/margins": 0.9151731729507446, "rewards/rejected": -1.3575299978256226, "step": 1279 }, { "epoch": 0.15, "learning_rate": 2.5921814101807017e-07, "logits/chosen": -2.6823337078094482, "logits/rejected": -2.837756872177124, "logps/chosen": -297.3595275878906, "logps/rejected": -250.33102416992188, "loss": 0.2531, "rewards/accuracies": 1.0, "rewards/chosen": -0.11365848779678345, "rewards/margins": 2.0839953422546387, "rewards/rejected": -2.1976537704467773, "step": 1280 }, { "epoch": 0.15, "learning_rate": 2.5918270934215187e-07, "logits/chosen": -2.8172483444213867, "logits/rejected": -2.738985300064087, "logps/chosen": -280.9693603515625, "logps/rejected": -155.98281860351562, "loss": 0.5415, "rewards/accuracies": 0.625, "rewards/chosen": -0.7478078603744507, "rewards/margins": 0.9187616109848022, "rewards/rejected": -1.666569471359253, "step": 1281 }, { "epoch": 0.15, "learning_rate": 2.591472776662336e-07, "logits/chosen": -2.4865734577178955, "logits/rejected": -2.6910340785980225, "logps/chosen": -153.28834533691406, "logps/rejected": -170.6597900390625, "loss": 0.3194, "rewards/accuracies": 0.875, "rewards/chosen": -0.6262564659118652, "rewards/margins": 1.2164052724838257, "rewards/rejected": -1.842661738395691, "step": 1282 }, { "epoch": 0.15, "learning_rate": 2.591118459903153e-07, "logits/chosen": -2.588737964630127, "logits/rejected": -2.7611048221588135, "logps/chosen": -254.65057373046875, "logps/rejected": -225.90975952148438, "loss": 0.18, "rewards/accuracies": 1.0, "rewards/chosen": -0.24436911940574646, "rewards/margins": 2.8849329948425293, "rewards/rejected": -3.1293022632598877, "step": 1283 }, { "epoch": 0.15, "learning_rate": 2.5907641431439706e-07, "logits/chosen": -1.9059133529663086, "logits/rejected": -2.0604407787323, "logps/chosen": -330.07269287109375, "logps/rejected": -272.6793518066406, "loss": 0.5477, "rewards/accuracies": 0.625, "rewards/chosen": -0.1716097593307495, "rewards/margins": 1.5171852111816406, "rewards/rejected": -1.6887949705123901, "step": 1284 }, { "epoch": 0.15, "learning_rate": 2.590409826384788e-07, "logits/chosen": -2.7010927200317383, "logits/rejected": -2.336820602416992, "logps/chosen": -393.81097412109375, "logps/rejected": -324.70880126953125, "loss": 1.1451, "rewards/accuracies": 0.875, "rewards/chosen": -1.014214038848877, "rewards/margins": 0.9166098237037659, "rewards/rejected": -1.9308240413665771, "step": 1285 }, { "epoch": 0.15, "learning_rate": 2.590055509625605e-07, "logits/chosen": -2.9210479259490967, "logits/rejected": -2.9542672634124756, "logps/chosen": -150.58636474609375, "logps/rejected": -185.82281494140625, "loss": 0.3094, "rewards/accuracies": 0.875, "rewards/chosen": -0.4505285620689392, "rewards/margins": 1.8772376775741577, "rewards/rejected": -2.3277664184570312, "step": 1286 }, { "epoch": 0.15, "learning_rate": 2.5897011928664225e-07, "logits/chosen": -1.6713709831237793, "logits/rejected": -1.8420507907867432, "logps/chosen": -336.9733581542969, "logps/rejected": -304.183837890625, "loss": 0.7613, "rewards/accuracies": 0.5, "rewards/chosen": -1.2765471935272217, "rewards/margins": 0.2487141191959381, "rewards/rejected": -1.5252611637115479, "step": 1287 }, { "epoch": 0.15, "learning_rate": 2.5893468761072395e-07, "logits/chosen": -3.0366063117980957, "logits/rejected": -3.0322883129119873, "logps/chosen": -266.9208984375, "logps/rejected": -182.05624389648438, "loss": 0.6769, "rewards/accuracies": 0.875, "rewards/chosen": -0.8897483348846436, "rewards/margins": 1.1671676635742188, "rewards/rejected": -2.0569159984588623, "step": 1288 }, { "epoch": 0.15, "learning_rate": 2.588992559348057e-07, "logits/chosen": -2.128631114959717, "logits/rejected": -2.6411750316619873, "logps/chosen": -339.3894958496094, "logps/rejected": -236.3607177734375, "loss": 0.327, "rewards/accuracies": 0.75, "rewards/chosen": -0.41288837790489197, "rewards/margins": 2.578314781188965, "rewards/rejected": -2.9912033081054688, "step": 1289 }, { "epoch": 0.15, "learning_rate": 2.5886382425888745e-07, "logits/chosen": -1.7747128009796143, "logits/rejected": -1.798820972442627, "logps/chosen": -419.06622314453125, "logps/rejected": -370.3547058105469, "loss": 0.5943, "rewards/accuracies": 0.75, "rewards/chosen": -0.6487704515457153, "rewards/margins": 1.4400386810302734, "rewards/rejected": -2.088809013366699, "step": 1290 }, { "epoch": 0.15, "learning_rate": 2.5882839258296914e-07, "logits/chosen": -2.159764289855957, "logits/rejected": -2.5085325241088867, "logps/chosen": -292.51318359375, "logps/rejected": -194.88055419921875, "loss": 0.9448, "rewards/accuracies": 0.75, "rewards/chosen": -1.1800484657287598, "rewards/margins": 1.1285276412963867, "rewards/rejected": -2.3085761070251465, "step": 1291 }, { "epoch": 0.15, "learning_rate": 2.587929609070509e-07, "logits/chosen": -1.8661205768585205, "logits/rejected": -1.8758271932601929, "logps/chosen": -315.619873046875, "logps/rejected": -403.6959228515625, "loss": 0.3323, "rewards/accuracies": 0.875, "rewards/chosen": -0.20793503522872925, "rewards/margins": 2.5689449310302734, "rewards/rejected": -2.7768802642822266, "step": 1292 }, { "epoch": 0.15, "learning_rate": 2.5875752923113264e-07, "logits/chosen": -2.6157755851745605, "logits/rejected": -2.588261127471924, "logps/chosen": -208.9537353515625, "logps/rejected": -253.08358764648438, "loss": 0.3741, "rewards/accuracies": 0.75, "rewards/chosen": -0.7660631537437439, "rewards/margins": 1.7738869190216064, "rewards/rejected": -2.539950132369995, "step": 1293 }, { "epoch": 0.15, "learning_rate": 2.5872209755521433e-07, "logits/chosen": -2.117156744003296, "logits/rejected": -2.127070188522339, "logps/chosen": -216.6184539794922, "logps/rejected": -235.77740478515625, "loss": 0.5052, "rewards/accuracies": 0.875, "rewards/chosen": -0.6661102771759033, "rewards/margins": 1.7357144355773926, "rewards/rejected": -2.401824712753296, "step": 1294 }, { "epoch": 0.15, "learning_rate": 2.586866658792961e-07, "logits/chosen": -2.4313063621520996, "logits/rejected": -2.4967944622039795, "logps/chosen": -264.153564453125, "logps/rejected": -257.64544677734375, "loss": 1.432, "rewards/accuracies": 0.375, "rewards/chosen": -3.849565267562866, "rewards/margins": -0.31178268790245056, "rewards/rejected": -3.5377824306488037, "step": 1295 }, { "epoch": 0.15, "learning_rate": 2.5865123420337783e-07, "logits/chosen": -2.7848920822143555, "logits/rejected": -2.9087843894958496, "logps/chosen": -386.3840637207031, "logps/rejected": -272.14630126953125, "loss": 0.6555, "rewards/accuracies": 0.625, "rewards/chosen": -1.0581109523773193, "rewards/margins": 0.4849759042263031, "rewards/rejected": -1.5430867671966553, "step": 1296 }, { "epoch": 0.15, "learning_rate": 2.5861580252745953e-07, "logits/chosen": -2.2209672927856445, "logits/rejected": -2.2732667922973633, "logps/chosen": -309.7784118652344, "logps/rejected": -227.14389038085938, "loss": 0.3946, "rewards/accuracies": 1.0, "rewards/chosen": -0.8161208629608154, "rewards/margins": 0.9809682369232178, "rewards/rejected": -1.7970889806747437, "step": 1297 }, { "epoch": 0.15, "learning_rate": 2.585803708515413e-07, "logits/chosen": -2.4885759353637695, "logits/rejected": -2.8750669956207275, "logps/chosen": -380.5934753417969, "logps/rejected": -159.957763671875, "loss": 0.4369, "rewards/accuracies": 0.75, "rewards/chosen": -0.20302613079547882, "rewards/margins": 1.425229549407959, "rewards/rejected": -1.6282557249069214, "step": 1298 }, { "epoch": 0.15, "learning_rate": 2.5854493917562297e-07, "logits/chosen": -2.6753623485565186, "logits/rejected": -2.6322524547576904, "logps/chosen": -206.34133911132812, "logps/rejected": -177.70205688476562, "loss": 0.4729, "rewards/accuracies": 0.75, "rewards/chosen": -0.8837290406227112, "rewards/margins": 1.03102707862854, "rewards/rejected": -1.9147560596466064, "step": 1299 }, { "epoch": 0.15, "learning_rate": 2.585095074997047e-07, "logits/chosen": -2.6090126037597656, "logits/rejected": -2.4821982383728027, "logps/chosen": -237.47515869140625, "logps/rejected": -259.5172119140625, "loss": 0.2584, "rewards/accuracies": 0.875, "rewards/chosen": 0.19561664760112762, "rewards/margins": 1.6479816436767578, "rewards/rejected": -1.4523649215698242, "step": 1300 }, { "epoch": 0.15, "learning_rate": 2.584740758237864e-07, "logits/chosen": -2.201249122619629, "logits/rejected": -2.2727041244506836, "logps/chosen": -239.57505798339844, "logps/rejected": -172.9882049560547, "loss": 0.7372, "rewards/accuracies": 0.625, "rewards/chosen": -1.3061528205871582, "rewards/margins": 0.45246267318725586, "rewards/rejected": -1.758615493774414, "step": 1301 }, { "epoch": 0.15, "learning_rate": 2.5843864414786816e-07, "logits/chosen": -2.3956263065338135, "logits/rejected": -2.4774601459503174, "logps/chosen": -251.79852294921875, "logps/rejected": -280.4782409667969, "loss": 0.5548, "rewards/accuracies": 0.625, "rewards/chosen": -0.9851399660110474, "rewards/margins": 2.0133299827575684, "rewards/rejected": -2.998469829559326, "step": 1302 }, { "epoch": 0.15, "learning_rate": 2.584032124719499e-07, "logits/chosen": -1.9091551303863525, "logits/rejected": -1.9462592601776123, "logps/chosen": -382.4305419921875, "logps/rejected": -323.70904541015625, "loss": 0.5732, "rewards/accuracies": 0.625, "rewards/chosen": -0.4826089143753052, "rewards/margins": 0.8115018606185913, "rewards/rejected": -1.294110655784607, "step": 1303 }, { "epoch": 0.15, "learning_rate": 2.5836778079603166e-07, "logits/chosen": -2.7944390773773193, "logits/rejected": -2.7513723373413086, "logps/chosen": -136.1783447265625, "logps/rejected": -163.79800415039062, "loss": 0.8062, "rewards/accuracies": 0.5, "rewards/chosen": -1.4117655754089355, "rewards/margins": 0.8798112273216248, "rewards/rejected": -2.291576862335205, "step": 1304 }, { "epoch": 0.15, "learning_rate": 2.5833234912011336e-07, "logits/chosen": -2.2910573482513428, "logits/rejected": -2.236543655395508, "logps/chosen": -266.98468017578125, "logps/rejected": -240.9828338623047, "loss": 0.6776, "rewards/accuracies": 0.5, "rewards/chosen": -1.0236725807189941, "rewards/margins": 0.5636706948280334, "rewards/rejected": -1.5873432159423828, "step": 1305 }, { "epoch": 0.15, "learning_rate": 2.582969174441951e-07, "logits/chosen": -2.1433401107788086, "logits/rejected": -2.179068088531494, "logps/chosen": -265.03179931640625, "logps/rejected": -440.089111328125, "loss": 0.2582, "rewards/accuracies": 0.875, "rewards/chosen": -0.39379751682281494, "rewards/margins": 1.749212384223938, "rewards/rejected": -2.143009901046753, "step": 1306 }, { "epoch": 0.15, "learning_rate": 2.5826148576827685e-07, "logits/chosen": -2.9952545166015625, "logits/rejected": -2.9684958457946777, "logps/chosen": -278.02020263671875, "logps/rejected": -241.33447265625, "loss": 0.3047, "rewards/accuracies": 0.875, "rewards/chosen": -0.06552602350711823, "rewards/margins": 1.7741624116897583, "rewards/rejected": -1.8396886587142944, "step": 1307 }, { "epoch": 0.15, "learning_rate": 2.5822605409235855e-07, "logits/chosen": -2.938021183013916, "logits/rejected": -2.882143259048462, "logps/chosen": -262.88726806640625, "logps/rejected": -257.85260009765625, "loss": 0.2466, "rewards/accuracies": 0.875, "rewards/chosen": -0.04030492156744003, "rewards/margins": 2.654791831970215, "rewards/rejected": -2.695096969604492, "step": 1308 }, { "epoch": 0.15, "learning_rate": 2.581906224164403e-07, "logits/chosen": -2.0528976917266846, "logits/rejected": -2.0843005180358887, "logps/chosen": -349.78253173828125, "logps/rejected": -385.2113037109375, "loss": 0.3894, "rewards/accuracies": 0.875, "rewards/chosen": -0.4173968434333801, "rewards/margins": 1.789353370666504, "rewards/rejected": -2.2067501544952393, "step": 1309 }, { "epoch": 0.15, "learning_rate": 2.58155190740522e-07, "logits/chosen": -2.4269938468933105, "logits/rejected": -2.6440858840942383, "logps/chosen": -269.74468994140625, "logps/rejected": -230.79983520507812, "loss": 0.2703, "rewards/accuracies": 1.0, "rewards/chosen": -0.7360973358154297, "rewards/margins": 1.325857400894165, "rewards/rejected": -2.0619544982910156, "step": 1310 }, { "epoch": 0.15, "learning_rate": 2.5811975906460374e-07, "logits/chosen": -2.4051551818847656, "logits/rejected": -2.556385040283203, "logps/chosen": -170.62881469726562, "logps/rejected": -211.15036010742188, "loss": 1.2095, "rewards/accuracies": 0.625, "rewards/chosen": -1.3140572309494019, "rewards/margins": -0.5253822803497314, "rewards/rejected": -0.7886749505996704, "step": 1311 }, { "epoch": 0.15, "learning_rate": 2.5808432738868544e-07, "logits/chosen": -2.218942642211914, "logits/rejected": -2.1835391521453857, "logps/chosen": -357.28985595703125, "logps/rejected": -313.3472900390625, "loss": 0.3957, "rewards/accuracies": 0.875, "rewards/chosen": -1.170084834098816, "rewards/margins": 1.0231269598007202, "rewards/rejected": -2.193211555480957, "step": 1312 }, { "epoch": 0.15, "learning_rate": 2.580488957127672e-07, "logits/chosen": -2.1596434116363525, "logits/rejected": -2.027926445007324, "logps/chosen": -374.4466247558594, "logps/rejected": -249.26625061035156, "loss": 0.2326, "rewards/accuracies": 0.875, "rewards/chosen": 0.023939654231071472, "rewards/margins": 2.0093207359313965, "rewards/rejected": -1.9853808879852295, "step": 1313 }, { "epoch": 0.15, "learning_rate": 2.5801346403684894e-07, "logits/chosen": -2.1539344787597656, "logits/rejected": -2.3923866748809814, "logps/chosen": -523.3306884765625, "logps/rejected": -216.06979370117188, "loss": 0.5128, "rewards/accuracies": 0.625, "rewards/chosen": -0.9596171975135803, "rewards/margins": 1.064602255821228, "rewards/rejected": -2.024219274520874, "step": 1314 }, { "epoch": 0.15, "learning_rate": 2.579780323609307e-07, "logits/chosen": -2.6117448806762695, "logits/rejected": -2.838325023651123, "logps/chosen": -271.9878845214844, "logps/rejected": -177.68978881835938, "loss": 0.3914, "rewards/accuracies": 0.875, "rewards/chosen": -0.6180356740951538, "rewards/margins": 1.081275224685669, "rewards/rejected": -1.6993108987808228, "step": 1315 }, { "epoch": 0.15, "learning_rate": 2.579426006850124e-07, "logits/chosen": -3.0081565380096436, "logits/rejected": -2.921945095062256, "logps/chosen": -300.8717346191406, "logps/rejected": -232.50546264648438, "loss": 0.2556, "rewards/accuracies": 0.875, "rewards/chosen": -0.6478986740112305, "rewards/margins": 2.2287042140960693, "rewards/rejected": -2.8766026496887207, "step": 1316 }, { "epoch": 0.15, "learning_rate": 2.5790716900909413e-07, "logits/chosen": -2.403447389602661, "logits/rejected": -2.7603302001953125, "logps/chosen": -178.54513549804688, "logps/rejected": -237.83572387695312, "loss": 1.5268, "rewards/accuracies": 0.75, "rewards/chosen": -1.9448628425598145, "rewards/margins": 0.6794756054878235, "rewards/rejected": -2.6243386268615723, "step": 1317 }, { "epoch": 0.15, "learning_rate": 2.578717373331759e-07, "logits/chosen": -2.6839637756347656, "logits/rejected": -2.8709754943847656, "logps/chosen": -195.9561767578125, "logps/rejected": -149.57472229003906, "loss": 0.5216, "rewards/accuracies": 0.625, "rewards/chosen": -0.6218922138214111, "rewards/margins": 1.4257190227508545, "rewards/rejected": -2.0476112365722656, "step": 1318 }, { "epoch": 0.15, "learning_rate": 2.5783630565725757e-07, "logits/chosen": -2.5710055828094482, "logits/rejected": -2.729818820953369, "logps/chosen": -243.94366455078125, "logps/rejected": -198.39141845703125, "loss": 0.2747, "rewards/accuracies": 0.875, "rewards/chosen": -1.286311388015747, "rewards/margins": 1.6263314485549927, "rewards/rejected": -2.9126429557800293, "step": 1319 }, { "epoch": 0.15, "learning_rate": 2.578008739813393e-07, "logits/chosen": -2.606369733810425, "logits/rejected": -2.4206910133361816, "logps/chosen": -150.28768920898438, "logps/rejected": -226.32090759277344, "loss": 0.302, "rewards/accuracies": 1.0, "rewards/chosen": -0.9405965209007263, "rewards/margins": 1.6854833364486694, "rewards/rejected": -2.626079797744751, "step": 1320 }, { "epoch": 0.15, "learning_rate": 2.57765442305421e-07, "logits/chosen": -2.5065817832946777, "logits/rejected": -2.4328176975250244, "logps/chosen": -270.79266357421875, "logps/rejected": -174.7091827392578, "loss": 0.6692, "rewards/accuracies": 0.5, "rewards/chosen": -0.8658044338226318, "rewards/margins": 0.844093382358551, "rewards/rejected": -1.7098979949951172, "step": 1321 }, { "epoch": 0.15, "learning_rate": 2.5773001062950277e-07, "logits/chosen": -2.623093605041504, "logits/rejected": -2.3319602012634277, "logps/chosen": -260.4769287109375, "logps/rejected": -421.634033203125, "loss": 0.1187, "rewards/accuracies": 1.0, "rewards/chosen": -0.5434201955795288, "rewards/margins": 3.3658337593078613, "rewards/rejected": -3.9092538356781006, "step": 1322 }, { "epoch": 0.15, "learning_rate": 2.5769457895358446e-07, "logits/chosen": -2.72861909866333, "logits/rejected": -2.633969306945801, "logps/chosen": -252.8408660888672, "logps/rejected": -325.76739501953125, "loss": 0.1924, "rewards/accuracies": 1.0, "rewards/chosen": -0.48445379734039307, "rewards/margins": 2.6676597595214844, "rewards/rejected": -3.152113437652588, "step": 1323 }, { "epoch": 0.15, "learning_rate": 2.576591472776662e-07, "logits/chosen": -2.271425485610962, "logits/rejected": -2.6053483486175537, "logps/chosen": -382.7127685546875, "logps/rejected": -262.7862548828125, "loss": 0.5149, "rewards/accuracies": 0.75, "rewards/chosen": -0.6939293742179871, "rewards/margins": 1.2890352010726929, "rewards/rejected": -1.9829645156860352, "step": 1324 }, { "epoch": 0.15, "learning_rate": 2.5762371560174796e-07, "logits/chosen": -2.2549819946289062, "logits/rejected": -2.463052988052368, "logps/chosen": -190.62564086914062, "logps/rejected": -226.77798461914062, "loss": 0.3452, "rewards/accuracies": 0.75, "rewards/chosen": -0.30566123127937317, "rewards/margins": 1.878478765487671, "rewards/rejected": -2.1841399669647217, "step": 1325 }, { "epoch": 0.15, "learning_rate": 2.5758828392582965e-07, "logits/chosen": -2.437272548675537, "logits/rejected": -2.2199971675872803, "logps/chosen": -190.73500061035156, "logps/rejected": -217.64028930664062, "loss": 0.4331, "rewards/accuracies": 0.75, "rewards/chosen": -1.4925930500030518, "rewards/margins": 1.1181799173355103, "rewards/rejected": -2.6107730865478516, "step": 1326 }, { "epoch": 0.15, "learning_rate": 2.575528522499114e-07, "logits/chosen": -2.4711978435516357, "logits/rejected": -2.6265907287597656, "logps/chosen": -220.8153533935547, "logps/rejected": -142.74517822265625, "loss": 0.3583, "rewards/accuracies": 0.75, "rewards/chosen": -0.5246231555938721, "rewards/margins": 1.7034332752227783, "rewards/rejected": -2.2280566692352295, "step": 1327 }, { "epoch": 0.15, "learning_rate": 2.5751742057399315e-07, "logits/chosen": -2.591740846633911, "logits/rejected": -2.3444879055023193, "logps/chosen": -288.9967346191406, "logps/rejected": -199.04701232910156, "loss": 0.3303, "rewards/accuracies": 1.0, "rewards/chosen": -1.0520128011703491, "rewards/margins": 1.585219383239746, "rewards/rejected": -2.6372323036193848, "step": 1328 }, { "epoch": 0.15, "learning_rate": 2.574819888980749e-07, "logits/chosen": -1.963822364807129, "logits/rejected": -1.9915214776992798, "logps/chosen": -360.9922180175781, "logps/rejected": -268.242919921875, "loss": 0.2897, "rewards/accuracies": 1.0, "rewards/chosen": 0.21962544322013855, "rewards/margins": 1.5154588222503662, "rewards/rejected": -1.2958333492279053, "step": 1329 }, { "epoch": 0.15, "learning_rate": 2.574465572221566e-07, "logits/chosen": -2.7419259548187256, "logits/rejected": -2.723878860473633, "logps/chosen": -220.52293395996094, "logps/rejected": -303.1905822753906, "loss": 0.4512, "rewards/accuracies": 0.75, "rewards/chosen": -0.19251510500907898, "rewards/margins": 2.7715210914611816, "rewards/rejected": -2.964036464691162, "step": 1330 }, { "epoch": 0.15, "learning_rate": 2.5741112554623834e-07, "logits/chosen": -2.5507712364196777, "logits/rejected": -2.513840675354004, "logps/chosen": -123.3845443725586, "logps/rejected": -243.87869262695312, "loss": 0.3827, "rewards/accuracies": 0.625, "rewards/chosen": -0.6389557123184204, "rewards/margins": 1.8937108516693115, "rewards/rejected": -2.5326666831970215, "step": 1331 }, { "epoch": 0.15, "learning_rate": 2.5737569387032004e-07, "logits/chosen": -2.1146020889282227, "logits/rejected": -2.1643099784851074, "logps/chosen": -392.3270568847656, "logps/rejected": -263.0843811035156, "loss": 0.4183, "rewards/accuracies": 0.875, "rewards/chosen": -0.5789353847503662, "rewards/margins": 1.3677681684494019, "rewards/rejected": -1.946703553199768, "step": 1332 }, { "epoch": 0.16, "learning_rate": 2.573402621944018e-07, "logits/chosen": -2.618570327758789, "logits/rejected": -2.6255764961242676, "logps/chosen": -384.5768737792969, "logps/rejected": -272.1915283203125, "loss": 0.2364, "rewards/accuracies": 0.875, "rewards/chosen": -0.2640588879585266, "rewards/margins": 2.0706539154052734, "rewards/rejected": -2.3347127437591553, "step": 1333 }, { "epoch": 0.16, "learning_rate": 2.573048305184835e-07, "logits/chosen": -2.331184148788452, "logits/rejected": -2.4042835235595703, "logps/chosen": -130.33792114257812, "logps/rejected": -156.94134521484375, "loss": 0.6061, "rewards/accuracies": 0.625, "rewards/chosen": -1.297194242477417, "rewards/margins": 1.6097898483276367, "rewards/rejected": -2.9069840908050537, "step": 1334 }, { "epoch": 0.16, "learning_rate": 2.5726939884256523e-07, "logits/chosen": -2.280982732772827, "logits/rejected": -2.605081558227539, "logps/chosen": -256.3509826660156, "logps/rejected": -113.83618927001953, "loss": 1.7714, "rewards/accuracies": 0.5, "rewards/chosen": -1.7609096765518188, "rewards/margins": -1.1171070337295532, "rewards/rejected": -0.6438026428222656, "step": 1335 }, { "epoch": 0.16, "learning_rate": 2.57233967166647e-07, "logits/chosen": -3.0859014987945557, "logits/rejected": -3.128732919692993, "logps/chosen": -152.41734313964844, "logps/rejected": -270.3587646484375, "loss": 0.311, "rewards/accuracies": 0.875, "rewards/chosen": 0.10739638656377792, "rewards/margins": 2.7830190658569336, "rewards/rejected": -2.6756224632263184, "step": 1336 }, { "epoch": 0.16, "learning_rate": 2.571985354907287e-07, "logits/chosen": -2.878413200378418, "logits/rejected": -2.913888454437256, "logps/chosen": -148.760009765625, "logps/rejected": -181.4589385986328, "loss": 0.5932, "rewards/accuracies": 0.5, "rewards/chosen": -0.9090589284896851, "rewards/margins": 1.4192866086959839, "rewards/rejected": -2.328345537185669, "step": 1337 }, { "epoch": 0.16, "learning_rate": 2.571631038148104e-07, "logits/chosen": -2.5668108463287354, "logits/rejected": -2.4721970558166504, "logps/chosen": -313.0288391113281, "logps/rejected": -310.969970703125, "loss": 0.3041, "rewards/accuracies": 1.0, "rewards/chosen": -0.5441226959228516, "rewards/margins": 1.5696065425872803, "rewards/rejected": -2.113729238510132, "step": 1338 }, { "epoch": 0.16, "learning_rate": 2.571276721388922e-07, "logits/chosen": -2.1148698329925537, "logits/rejected": -2.149202346801758, "logps/chosen": -262.81231689453125, "logps/rejected": -295.19342041015625, "loss": 0.4222, "rewards/accuracies": 0.875, "rewards/chosen": -0.39852988719940186, "rewards/margins": 2.0969290733337402, "rewards/rejected": -2.4954588413238525, "step": 1339 }, { "epoch": 0.16, "learning_rate": 2.570922404629739e-07, "logits/chosen": -2.5729949474334717, "logits/rejected": -2.4704928398132324, "logps/chosen": -156.22219848632812, "logps/rejected": -180.4857177734375, "loss": 0.5178, "rewards/accuracies": 0.75, "rewards/chosen": -0.6211029291152954, "rewards/margins": 1.0970302820205688, "rewards/rejected": -1.7181332111358643, "step": 1340 }, { "epoch": 0.16, "learning_rate": 2.570568087870556e-07, "logits/chosen": -2.2308919429779053, "logits/rejected": -2.254133462905884, "logps/chosen": -496.53070068359375, "logps/rejected": -366.00787353515625, "loss": 0.2625, "rewards/accuracies": 0.875, "rewards/chosen": -0.09063857793807983, "rewards/margins": 1.8412785530090332, "rewards/rejected": -1.9319171905517578, "step": 1341 }, { "epoch": 0.16, "learning_rate": 2.5702137711113737e-07, "logits/chosen": -1.6075242757797241, "logits/rejected": -1.7619737386703491, "logps/chosen": -291.399658203125, "logps/rejected": -225.1926727294922, "loss": 0.9244, "rewards/accuracies": 0.625, "rewards/chosen": -0.7909039258956909, "rewards/margins": 0.2895992696285248, "rewards/rejected": -1.080503225326538, "step": 1342 }, { "epoch": 0.16, "learning_rate": 2.5698594543521906e-07, "logits/chosen": -1.9972341060638428, "logits/rejected": -1.772900938987732, "logps/chosen": -270.8129577636719, "logps/rejected": -346.89361572265625, "loss": 0.2179, "rewards/accuracies": 0.875, "rewards/chosen": -1.2643498182296753, "rewards/margins": 2.4869964122772217, "rewards/rejected": -3.7513463497161865, "step": 1343 }, { "epoch": 0.16, "learning_rate": 2.569505137593008e-07, "logits/chosen": -1.8463990688323975, "logits/rejected": -1.9862339496612549, "logps/chosen": -443.3493957519531, "logps/rejected": -355.87371826171875, "loss": 0.5692, "rewards/accuracies": 0.75, "rewards/chosen": -0.15197624266147614, "rewards/margins": 1.3466036319732666, "rewards/rejected": -1.498579978942871, "step": 1344 }, { "epoch": 0.16, "learning_rate": 2.569150820833825e-07, "logits/chosen": -2.4043538570404053, "logits/rejected": -2.223766803741455, "logps/chosen": -198.34397888183594, "logps/rejected": -279.2518310546875, "loss": 0.4058, "rewards/accuracies": 0.625, "rewards/chosen": -0.48655498027801514, "rewards/margins": 1.6732103824615479, "rewards/rejected": -2.1597654819488525, "step": 1345 }, { "epoch": 0.16, "learning_rate": 2.5687965040746426e-07, "logits/chosen": -2.5667433738708496, "logits/rejected": -2.280487060546875, "logps/chosen": -142.64883422851562, "logps/rejected": -205.41995239257812, "loss": 0.2952, "rewards/accuracies": 0.875, "rewards/chosen": -0.28380006551742554, "rewards/margins": 2.515974521636963, "rewards/rejected": -2.799774408340454, "step": 1346 }, { "epoch": 0.16, "learning_rate": 2.56844218731546e-07, "logits/chosen": -2.4165611267089844, "logits/rejected": -2.3871965408325195, "logps/chosen": -193.16836547851562, "logps/rejected": -202.58477783203125, "loss": 0.7506, "rewards/accuracies": 0.5, "rewards/chosen": -1.1418840885162354, "rewards/margins": 1.4332711696624756, "rewards/rejected": -2.57515549659729, "step": 1347 }, { "epoch": 0.16, "learning_rate": 2.568087870556277e-07, "logits/chosen": -2.6768314838409424, "logits/rejected": -2.7175498008728027, "logps/chosen": -254.9682159423828, "logps/rejected": -220.0244903564453, "loss": 0.4285, "rewards/accuracies": 0.625, "rewards/chosen": -0.03170609474182129, "rewards/margins": 1.2717669010162354, "rewards/rejected": -1.3034729957580566, "step": 1348 }, { "epoch": 0.16, "learning_rate": 2.5677335537970945e-07, "logits/chosen": -2.4211204051971436, "logits/rejected": -2.2839698791503906, "logps/chosen": -142.7660675048828, "logps/rejected": -234.14300537109375, "loss": 0.6832, "rewards/accuracies": 0.75, "rewards/chosen": -1.0588620901107788, "rewards/margins": 1.2528676986694336, "rewards/rejected": -2.311729669570923, "step": 1349 }, { "epoch": 0.16, "learning_rate": 2.567379237037912e-07, "logits/chosen": -2.4518966674804688, "logits/rejected": -2.2803406715393066, "logps/chosen": -114.3258056640625, "logps/rejected": -185.38467407226562, "loss": 0.5015, "rewards/accuracies": 0.875, "rewards/chosen": -0.4380975663661957, "rewards/margins": 2.8100509643554688, "rewards/rejected": -3.2481489181518555, "step": 1350 }, { "epoch": 0.16, "learning_rate": 2.5670249202787295e-07, "logits/chosen": -2.5462796688079834, "logits/rejected": -2.256603956222534, "logps/chosen": -180.09469604492188, "logps/rejected": -230.59991455078125, "loss": 0.615, "rewards/accuracies": 0.625, "rewards/chosen": -0.2468372881412506, "rewards/margins": 0.9785282015800476, "rewards/rejected": -1.2253655195236206, "step": 1351 }, { "epoch": 0.16, "learning_rate": 2.5666706035195464e-07, "logits/chosen": -2.766403913497925, "logits/rejected": -2.698084592819214, "logps/chosen": -286.5577697753906, "logps/rejected": -217.39964294433594, "loss": 0.3568, "rewards/accuracies": 0.875, "rewards/chosen": -0.6951740384101868, "rewards/margins": 1.902270793914795, "rewards/rejected": -2.597445011138916, "step": 1352 }, { "epoch": 0.16, "learning_rate": 2.566316286760364e-07, "logits/chosen": -2.8344321250915527, "logits/rejected": -2.8785691261291504, "logps/chosen": -302.2632141113281, "logps/rejected": -224.1578826904297, "loss": 0.5536, "rewards/accuracies": 0.5, "rewards/chosen": -0.9144960641860962, "rewards/margins": 1.2336583137512207, "rewards/rejected": -2.1481542587280273, "step": 1353 }, { "epoch": 0.16, "learning_rate": 2.565961970001181e-07, "logits/chosen": -2.5811986923217773, "logits/rejected": -2.459149122238159, "logps/chosen": -111.94300079345703, "logps/rejected": -245.4078369140625, "loss": 0.3764, "rewards/accuracies": 0.75, "rewards/chosen": -0.3571208715438843, "rewards/margins": 2.2172932624816895, "rewards/rejected": -2.574413776397705, "step": 1354 }, { "epoch": 0.16, "learning_rate": 2.5656076532419983e-07, "logits/chosen": -1.9355041980743408, "logits/rejected": -1.6608161926269531, "logps/chosen": -307.44268798828125, "logps/rejected": -341.14312744140625, "loss": 0.5796, "rewards/accuracies": 0.75, "rewards/chosen": -0.5280605554580688, "rewards/margins": 1.0630884170532227, "rewards/rejected": -1.591149091720581, "step": 1355 }, { "epoch": 0.16, "learning_rate": 2.5652533364828153e-07, "logits/chosen": -2.4808425903320312, "logits/rejected": -2.803968906402588, "logps/chosen": -120.89959716796875, "logps/rejected": -171.71539306640625, "loss": 0.6546, "rewards/accuracies": 0.75, "rewards/chosen": -0.8607321977615356, "rewards/margins": 1.451585292816162, "rewards/rejected": -2.3123176097869873, "step": 1356 }, { "epoch": 0.16, "learning_rate": 2.564899019723633e-07, "logits/chosen": -2.4859161376953125, "logits/rejected": -2.514075756072998, "logps/chosen": -244.73773193359375, "logps/rejected": -359.1509704589844, "loss": 0.2803, "rewards/accuracies": 0.875, "rewards/chosen": -0.4761694073677063, "rewards/margins": 3.351926565170288, "rewards/rejected": -3.8280959129333496, "step": 1357 }, { "epoch": 0.16, "learning_rate": 2.5645447029644503e-07, "logits/chosen": -2.4028098583221436, "logits/rejected": -2.524381637573242, "logps/chosen": -125.20744323730469, "logps/rejected": -190.55715942382812, "loss": 1.0148, "rewards/accuracies": 0.625, "rewards/chosen": -1.1335519552230835, "rewards/margins": 1.6602842807769775, "rewards/rejected": -2.7938363552093506, "step": 1358 }, { "epoch": 0.16, "learning_rate": 2.564190386205267e-07, "logits/chosen": -2.175311326980591, "logits/rejected": -2.1788289546966553, "logps/chosen": -295.306640625, "logps/rejected": -220.8590087890625, "loss": 0.8267, "rewards/accuracies": 0.875, "rewards/chosen": -1.1686832904815674, "rewards/margins": 0.896936297416687, "rewards/rejected": -2.065619707107544, "step": 1359 }, { "epoch": 0.16, "learning_rate": 2.5638360694460847e-07, "logits/chosen": -2.4242844581604004, "logits/rejected": -2.4405691623687744, "logps/chosen": -336.92950439453125, "logps/rejected": -355.7394714355469, "loss": 0.2288, "rewards/accuracies": 1.0, "rewards/chosen": -0.1461772918701172, "rewards/margins": 2.3708856105804443, "rewards/rejected": -2.5170631408691406, "step": 1360 }, { "epoch": 0.16, "learning_rate": 2.5634817526869017e-07, "logits/chosen": -2.347684860229492, "logits/rejected": -2.2065742015838623, "logps/chosen": -234.31336975097656, "logps/rejected": -455.9063720703125, "loss": 0.3685, "rewards/accuracies": 0.625, "rewards/chosen": -0.42973679304122925, "rewards/margins": 1.9545629024505615, "rewards/rejected": -2.3842997550964355, "step": 1361 }, { "epoch": 0.16, "learning_rate": 2.563127435927719e-07, "logits/chosen": -2.2957115173339844, "logits/rejected": -2.381695032119751, "logps/chosen": -228.0542755126953, "logps/rejected": -222.90185546875, "loss": 0.8831, "rewards/accuracies": 0.625, "rewards/chosen": -1.084787130355835, "rewards/margins": 0.5302080512046814, "rewards/rejected": -1.6149951219558716, "step": 1362 }, { "epoch": 0.16, "learning_rate": 2.5627731191685366e-07, "logits/chosen": -2.4535489082336426, "logits/rejected": -2.6556990146636963, "logps/chosen": -322.32257080078125, "logps/rejected": -631.0873413085938, "loss": 0.1926, "rewards/accuracies": 0.875, "rewards/chosen": -0.15538160502910614, "rewards/margins": 2.9914615154266357, "rewards/rejected": -3.1468429565429688, "step": 1363 }, { "epoch": 0.16, "learning_rate": 2.562418802409354e-07, "logits/chosen": -1.9608724117279053, "logits/rejected": -2.2341926097869873, "logps/chosen": -288.7054748535156, "logps/rejected": -326.9286804199219, "loss": 0.5128, "rewards/accuracies": 0.75, "rewards/chosen": -0.8637473583221436, "rewards/margins": 1.4533658027648926, "rewards/rejected": -2.317113161087036, "step": 1364 }, { "epoch": 0.16, "learning_rate": 2.562064485650171e-07, "logits/chosen": -2.2859160900115967, "logits/rejected": -2.3826234340667725, "logps/chosen": -401.4815368652344, "logps/rejected": -371.4460144042969, "loss": 0.4785, "rewards/accuracies": 0.75, "rewards/chosen": -0.10463443398475647, "rewards/margins": 1.0209720134735107, "rewards/rejected": -1.1256064176559448, "step": 1365 }, { "epoch": 0.16, "learning_rate": 2.5617101688909886e-07, "logits/chosen": -1.9445691108703613, "logits/rejected": -2.1344969272613525, "logps/chosen": -462.28594970703125, "logps/rejected": -227.2145538330078, "loss": 0.6172, "rewards/accuracies": 0.75, "rewards/chosen": -0.6244882345199585, "rewards/margins": 0.7379720211029053, "rewards/rejected": -1.3624602556228638, "step": 1366 }, { "epoch": 0.16, "learning_rate": 2.5613558521318055e-07, "logits/chosen": -2.6682169437408447, "logits/rejected": -2.3720436096191406, "logps/chosen": -174.49417114257812, "logps/rejected": -295.71661376953125, "loss": 0.3073, "rewards/accuracies": 0.875, "rewards/chosen": -0.7137288451194763, "rewards/margins": 1.3852548599243164, "rewards/rejected": -2.0989837646484375, "step": 1367 }, { "epoch": 0.16, "learning_rate": 2.561001535372623e-07, "logits/chosen": -2.4442567825317383, "logits/rejected": -2.2579214572906494, "logps/chosen": -228.07669067382812, "logps/rejected": -257.4449462890625, "loss": 0.6508, "rewards/accuracies": 0.625, "rewards/chosen": -0.5556291937828064, "rewards/margins": 0.6714709997177124, "rewards/rejected": -1.2271002531051636, "step": 1368 }, { "epoch": 0.16, "learning_rate": 2.5606472186134405e-07, "logits/chosen": -2.4356842041015625, "logits/rejected": -2.390456438064575, "logps/chosen": -157.12460327148438, "logps/rejected": -259.62945556640625, "loss": 0.2091, "rewards/accuracies": 1.0, "rewards/chosen": -0.5258910655975342, "rewards/margins": 2.343991994857788, "rewards/rejected": -2.869882822036743, "step": 1369 }, { "epoch": 0.16, "learning_rate": 2.5602929018542575e-07, "logits/chosen": -2.737581729888916, "logits/rejected": -2.7239394187927246, "logps/chosen": -265.8567810058594, "logps/rejected": -222.8844451904297, "loss": 0.423, "rewards/accuracies": 0.875, "rewards/chosen": -0.5852282047271729, "rewards/margins": 1.593953251838684, "rewards/rejected": -2.1791815757751465, "step": 1370 }, { "epoch": 0.16, "learning_rate": 2.559938585095075e-07, "logits/chosen": -1.7767667770385742, "logits/rejected": -1.64446222782135, "logps/chosen": -462.2033996582031, "logps/rejected": -507.33270263671875, "loss": 0.6732, "rewards/accuracies": 0.5, "rewards/chosen": -1.0352072715759277, "rewards/margins": 1.3890979290008545, "rewards/rejected": -2.4243052005767822, "step": 1371 }, { "epoch": 0.16, "learning_rate": 2.559584268335892e-07, "logits/chosen": -2.5318613052368164, "logits/rejected": -2.5394906997680664, "logps/chosen": -81.87847900390625, "logps/rejected": -237.6739501953125, "loss": 0.2442, "rewards/accuracies": 0.875, "rewards/chosen": -0.32076162099838257, "rewards/margins": 2.4318761825561523, "rewards/rejected": -2.7526376247406006, "step": 1372 }, { "epoch": 0.16, "learning_rate": 2.5592299515767094e-07, "logits/chosen": -2.315117359161377, "logits/rejected": -2.050536870956421, "logps/chosen": -223.43609619140625, "logps/rejected": -340.232177734375, "loss": 0.2637, "rewards/accuracies": 0.875, "rewards/chosen": -0.6744475364685059, "rewards/margins": 3.0697433948516846, "rewards/rejected": -3.7441906929016113, "step": 1373 }, { "epoch": 0.16, "learning_rate": 2.558875634817527e-07, "logits/chosen": -2.433814525604248, "logits/rejected": -2.3043675422668457, "logps/chosen": -524.0728149414062, "logps/rejected": -313.24139404296875, "loss": 0.5718, "rewards/accuracies": 0.75, "rewards/chosen": -0.876115083694458, "rewards/margins": 0.9196082949638367, "rewards/rejected": -1.79572331905365, "step": 1374 }, { "epoch": 0.16, "learning_rate": 2.5585213180583444e-07, "logits/chosen": -2.665214776992798, "logits/rejected": -2.723144769668579, "logps/chosen": -253.61248779296875, "logps/rejected": -307.5253601074219, "loss": 0.1694, "rewards/accuracies": 1.0, "rewards/chosen": -1.1106758117675781, "rewards/margins": 3.1171908378601074, "rewards/rejected": -4.2278666496276855, "step": 1375 }, { "epoch": 0.16, "learning_rate": 2.5581670012991613e-07, "logits/chosen": -2.1885123252868652, "logits/rejected": -2.2223191261291504, "logps/chosen": -265.1954650878906, "logps/rejected": -218.85366821289062, "loss": 0.3875, "rewards/accuracies": 0.75, "rewards/chosen": -0.7820048332214355, "rewards/margins": 1.2820836305618286, "rewards/rejected": -2.0640883445739746, "step": 1376 }, { "epoch": 0.16, "learning_rate": 2.557812684539979e-07, "logits/chosen": -2.271019697189331, "logits/rejected": -2.5251927375793457, "logps/chosen": -300.3597106933594, "logps/rejected": -215.67977905273438, "loss": 0.4245, "rewards/accuracies": 0.75, "rewards/chosen": -0.6886817216873169, "rewards/margins": 1.2243361473083496, "rewards/rejected": -1.913017988204956, "step": 1377 }, { "epoch": 0.16, "learning_rate": 2.557458367780796e-07, "logits/chosen": -2.1268584728240967, "logits/rejected": -2.065530776977539, "logps/chosen": -338.9162292480469, "logps/rejected": -451.73406982421875, "loss": 0.2847, "rewards/accuracies": 0.875, "rewards/chosen": -0.807189404964447, "rewards/margins": 2.9132394790649414, "rewards/rejected": -3.720428943634033, "step": 1378 }, { "epoch": 0.16, "learning_rate": 2.557104051021613e-07, "logits/chosen": -2.2316670417785645, "logits/rejected": -2.183450222015381, "logps/chosen": -292.0419616699219, "logps/rejected": -225.8133544921875, "loss": 0.8372, "rewards/accuracies": 0.375, "rewards/chosen": -1.0222045183181763, "rewards/margins": 0.6569688320159912, "rewards/rejected": -1.679173231124878, "step": 1379 }, { "epoch": 0.16, "learning_rate": 2.5567497342624307e-07, "logits/chosen": -2.0154285430908203, "logits/rejected": -2.4534904956817627, "logps/chosen": -242.72067260742188, "logps/rejected": -173.418212890625, "loss": 0.6977, "rewards/accuracies": 0.625, "rewards/chosen": -0.788324236869812, "rewards/margins": 0.409254252910614, "rewards/rejected": -1.1975784301757812, "step": 1380 }, { "epoch": 0.16, "learning_rate": 2.5563954175032477e-07, "logits/chosen": -2.3626084327697754, "logits/rejected": -2.285541534423828, "logps/chosen": -314.45977783203125, "logps/rejected": -246.85671997070312, "loss": 0.3774, "rewards/accuracies": 0.75, "rewards/chosen": -0.3344404101371765, "rewards/margins": 1.343726396560669, "rewards/rejected": -1.6781668663024902, "step": 1381 }, { "epoch": 0.16, "learning_rate": 2.556041100744065e-07, "logits/chosen": -2.135277509689331, "logits/rejected": -1.9412298202514648, "logps/chosen": -162.67222595214844, "logps/rejected": -309.967529296875, "loss": 0.2542, "rewards/accuracies": 1.0, "rewards/chosen": -0.32310378551483154, "rewards/margins": 1.8537085056304932, "rewards/rejected": -2.176812171936035, "step": 1382 }, { "epoch": 0.16, "learning_rate": 2.555686783984882e-07, "logits/chosen": -1.503592610359192, "logits/rejected": -2.3268470764160156, "logps/chosen": -652.925537109375, "logps/rejected": -299.5897216796875, "loss": 0.8479, "rewards/accuracies": 0.5, "rewards/chosen": -1.2690744400024414, "rewards/margins": 0.31684812903404236, "rewards/rejected": -1.5859225988388062, "step": 1383 }, { "epoch": 0.16, "learning_rate": 2.5553324672256996e-07, "logits/chosen": -2.298218250274658, "logits/rejected": -2.4614973068237305, "logps/chosen": -410.8437805175781, "logps/rejected": -306.4954528808594, "loss": 0.4452, "rewards/accuracies": 0.625, "rewards/chosen": -0.4800405502319336, "rewards/margins": 1.0028564929962158, "rewards/rejected": -1.4828970432281494, "step": 1384 }, { "epoch": 0.16, "learning_rate": 2.554978150466517e-07, "logits/chosen": -2.329585552215576, "logits/rejected": -2.800426483154297, "logps/chosen": -309.0374755859375, "logps/rejected": -173.8949737548828, "loss": 0.2383, "rewards/accuracies": 1.0, "rewards/chosen": -0.263191819190979, "rewards/margins": 1.628694772720337, "rewards/rejected": -1.8918864727020264, "step": 1385 }, { "epoch": 0.16, "learning_rate": 2.5546238337073346e-07, "logits/chosen": -2.497685432434082, "logits/rejected": -2.3363919258117676, "logps/chosen": -319.3771667480469, "logps/rejected": -323.34844970703125, "loss": 0.2772, "rewards/accuracies": 1.0, "rewards/chosen": -0.31427067518234253, "rewards/margins": 1.4474875926971436, "rewards/rejected": -1.7617582082748413, "step": 1386 }, { "epoch": 0.16, "learning_rate": 2.5542695169481515e-07, "logits/chosen": -2.2611162662506104, "logits/rejected": -2.261871814727783, "logps/chosen": -208.4417266845703, "logps/rejected": -241.4300994873047, "loss": 0.5067, "rewards/accuracies": 0.875, "rewards/chosen": -0.8845191597938538, "rewards/margins": 1.5092263221740723, "rewards/rejected": -2.3937456607818604, "step": 1387 }, { "epoch": 0.16, "learning_rate": 2.553915200188969e-07, "logits/chosen": -2.4850268363952637, "logits/rejected": -2.5756609439849854, "logps/chosen": -372.37286376953125, "logps/rejected": -369.28857421875, "loss": 0.4674, "rewards/accuracies": 0.875, "rewards/chosen": -0.35601887106895447, "rewards/margins": 2.714913845062256, "rewards/rejected": -3.0709328651428223, "step": 1388 }, { "epoch": 0.16, "learning_rate": 2.553560883429786e-07, "logits/chosen": -2.078963279724121, "logits/rejected": -2.195012092590332, "logps/chosen": -173.0458221435547, "logps/rejected": -174.69119262695312, "loss": 0.9262, "rewards/accuracies": 0.625, "rewards/chosen": -1.8130710124969482, "rewards/margins": 0.16199657320976257, "rewards/rejected": -1.9750676155090332, "step": 1389 }, { "epoch": 0.16, "learning_rate": 2.5532065666706035e-07, "logits/chosen": -2.1735665798187256, "logits/rejected": -2.3815298080444336, "logps/chosen": -209.03826904296875, "logps/rejected": -237.97760009765625, "loss": 0.46, "rewards/accuracies": 0.875, "rewards/chosen": -0.9716988801956177, "rewards/margins": 1.7051472663879395, "rewards/rejected": -2.6768462657928467, "step": 1390 }, { "epoch": 0.16, "learning_rate": 2.5528522499114204e-07, "logits/chosen": -2.5698699951171875, "logits/rejected": -2.5621001720428467, "logps/chosen": -380.8415832519531, "logps/rejected": -294.79547119140625, "loss": 0.1941, "rewards/accuracies": 1.0, "rewards/chosen": -0.4688035845756531, "rewards/margins": 2.826749563217163, "rewards/rejected": -3.29555344581604, "step": 1391 }, { "epoch": 0.16, "learning_rate": 2.552497933152238e-07, "logits/chosen": -2.318331718444824, "logits/rejected": -2.4113733768463135, "logps/chosen": -393.7342529296875, "logps/rejected": -246.3109130859375, "loss": 0.6815, "rewards/accuracies": 0.75, "rewards/chosen": -0.8531315922737122, "rewards/margins": 0.5590286254882812, "rewards/rejected": -1.4121601581573486, "step": 1392 }, { "epoch": 0.16, "learning_rate": 2.5521436163930554e-07, "logits/chosen": -2.1233081817626953, "logits/rejected": -2.1238319873809814, "logps/chosen": -241.42254638671875, "logps/rejected": -193.67308044433594, "loss": 0.4516, "rewards/accuracies": 0.625, "rewards/chosen": -0.4255526661872864, "rewards/margins": 1.3450242280960083, "rewards/rejected": -1.7705767154693604, "step": 1393 }, { "epoch": 0.16, "learning_rate": 2.5517892996338723e-07, "logits/chosen": -2.2593624591827393, "logits/rejected": -1.9347436428070068, "logps/chosen": -197.30935668945312, "logps/rejected": -351.3586730957031, "loss": 0.092, "rewards/accuracies": 1.0, "rewards/chosen": -0.7607911825180054, "rewards/margins": 3.3266098499298096, "rewards/rejected": -4.087401390075684, "step": 1394 }, { "epoch": 0.16, "learning_rate": 2.55143498287469e-07, "logits/chosen": -2.233491897583008, "logits/rejected": -2.3034064769744873, "logps/chosen": -281.9127502441406, "logps/rejected": -178.6834259033203, "loss": 0.1675, "rewards/accuracies": 1.0, "rewards/chosen": -0.8219869136810303, "rewards/margins": 2.246628999710083, "rewards/rejected": -3.0686159133911133, "step": 1395 }, { "epoch": 0.16, "learning_rate": 2.551080666115507e-07, "logits/chosen": -2.4315638542175293, "logits/rejected": -2.3997724056243896, "logps/chosen": -328.5556945800781, "logps/rejected": -253.26925659179688, "loss": 0.3248, "rewards/accuracies": 0.875, "rewards/chosen": -0.6127278208732605, "rewards/margins": 1.7955207824707031, "rewards/rejected": -2.4082484245300293, "step": 1396 }, { "epoch": 0.16, "learning_rate": 2.550726349356325e-07, "logits/chosen": -2.0050408840179443, "logits/rejected": -2.0629847049713135, "logps/chosen": -300.78106689453125, "logps/rejected": -335.582763671875, "loss": 0.8176, "rewards/accuracies": 0.625, "rewards/chosen": -0.9264078140258789, "rewards/margins": 0.7214505672454834, "rewards/rejected": -1.6478583812713623, "step": 1397 }, { "epoch": 0.16, "learning_rate": 2.550372032597142e-07, "logits/chosen": -2.550112009048462, "logits/rejected": -2.543943405151367, "logps/chosen": -160.225341796875, "logps/rejected": -161.807861328125, "loss": 0.6014, "rewards/accuracies": 0.75, "rewards/chosen": -0.8151851892471313, "rewards/margins": 0.847303032875061, "rewards/rejected": -1.662488341331482, "step": 1398 }, { "epoch": 0.16, "learning_rate": 2.550017715837959e-07, "logits/chosen": -2.269137382507324, "logits/rejected": -2.0593338012695312, "logps/chosen": -243.57907104492188, "logps/rejected": -275.1543884277344, "loss": 0.3182, "rewards/accuracies": 1.0, "rewards/chosen": -0.2068558782339096, "rewards/margins": 1.350659728050232, "rewards/rejected": -1.5575157403945923, "step": 1399 }, { "epoch": 0.16, "learning_rate": 2.549663399078776e-07, "logits/chosen": -2.265998363494873, "logits/rejected": -1.954296350479126, "logps/chosen": -226.85997009277344, "logps/rejected": -283.2669372558594, "loss": 0.5612, "rewards/accuracies": 0.625, "rewards/chosen": -0.4435381591320038, "rewards/margins": 1.1067390441894531, "rewards/rejected": -1.5502773523330688, "step": 1400 }, { "epoch": 0.16, "learning_rate": 2.5493090823195937e-07, "logits/chosen": -2.5113468170166016, "logits/rejected": -2.5954315662384033, "logps/chosen": -163.50094604492188, "logps/rejected": -206.63861083984375, "loss": 0.3264, "rewards/accuracies": 0.875, "rewards/chosen": -0.5241639614105225, "rewards/margins": 1.8429217338562012, "rewards/rejected": -2.3670856952667236, "step": 1401 }, { "epoch": 0.16, "learning_rate": 2.5489547655604106e-07, "logits/chosen": -2.233236312866211, "logits/rejected": -2.124704122543335, "logps/chosen": -240.0445556640625, "logps/rejected": -384.36895751953125, "loss": 0.9748, "rewards/accuracies": 0.625, "rewards/chosen": -0.6617593169212341, "rewards/margins": 0.6771878004074097, "rewards/rejected": -1.338947057723999, "step": 1402 }, { "epoch": 0.16, "learning_rate": 2.548600448801228e-07, "logits/chosen": -2.21439266204834, "logits/rejected": -2.3331658840179443, "logps/chosen": -161.24575805664062, "logps/rejected": -209.04949951171875, "loss": 0.1877, "rewards/accuracies": 1.0, "rewards/chosen": -0.021859414875507355, "rewards/margins": 2.559685468673706, "rewards/rejected": -2.581544876098633, "step": 1403 }, { "epoch": 0.16, "learning_rate": 2.5482461320420456e-07, "logits/chosen": -2.147665023803711, "logits/rejected": -2.1107535362243652, "logps/chosen": -279.0518493652344, "logps/rejected": -419.6611022949219, "loss": 0.3079, "rewards/accuracies": 0.75, "rewards/chosen": -0.8689256906509399, "rewards/margins": 3.273087978363037, "rewards/rejected": -4.1420135498046875, "step": 1404 }, { "epoch": 0.16, "learning_rate": 2.5478918152828626e-07, "logits/chosen": -2.8913490772247314, "logits/rejected": -3.001032829284668, "logps/chosen": -290.12628173828125, "logps/rejected": -221.062255859375, "loss": 0.5532, "rewards/accuracies": 0.75, "rewards/chosen": -0.955291748046875, "rewards/margins": 0.7878057956695557, "rewards/rejected": -1.7430974245071411, "step": 1405 }, { "epoch": 0.16, "learning_rate": 2.54753749852368e-07, "logits/chosen": -2.6902358531951904, "logits/rejected": -2.475578784942627, "logps/chosen": -186.46896362304688, "logps/rejected": -219.74148559570312, "loss": 0.4485, "rewards/accuracies": 0.75, "rewards/chosen": -0.6496744751930237, "rewards/margins": 1.7185355424880981, "rewards/rejected": -2.3682098388671875, "step": 1406 }, { "epoch": 0.16, "learning_rate": 2.547183181764497e-07, "logits/chosen": -1.928343653678894, "logits/rejected": -2.0593793392181396, "logps/chosen": -121.61155700683594, "logps/rejected": -112.32550048828125, "loss": 0.5566, "rewards/accuracies": 0.75, "rewards/chosen": -0.062213234603405, "rewards/margins": 0.7572500109672546, "rewards/rejected": -0.819463312625885, "step": 1407 }, { "epoch": 0.16, "learning_rate": 2.546828865005315e-07, "logits/chosen": -2.183595895767212, "logits/rejected": -2.0249199867248535, "logps/chosen": -250.4213104248047, "logps/rejected": -244.65008544921875, "loss": 0.5559, "rewards/accuracies": 0.75, "rewards/chosen": -1.117912769317627, "rewards/margins": 1.2221598625183105, "rewards/rejected": -2.3400726318359375, "step": 1408 }, { "epoch": 0.16, "learning_rate": 2.546474548246132e-07, "logits/chosen": -2.457787036895752, "logits/rejected": -2.091348648071289, "logps/chosen": -158.55740356445312, "logps/rejected": -228.960205078125, "loss": 0.4562, "rewards/accuracies": 0.75, "rewards/chosen": -0.8764073252677917, "rewards/margins": 1.4063568115234375, "rewards/rejected": -2.282764196395874, "step": 1409 }, { "epoch": 0.16, "learning_rate": 2.5461202314869495e-07, "logits/chosen": -2.66800856590271, "logits/rejected": -2.6126551628112793, "logps/chosen": -503.172119140625, "logps/rejected": -299.84820556640625, "loss": 0.2342, "rewards/accuracies": 0.875, "rewards/chosen": -0.5421231985092163, "rewards/margins": 1.9351019859313965, "rewards/rejected": -2.4772253036499023, "step": 1410 }, { "epoch": 0.16, "learning_rate": 2.5457659147277664e-07, "logits/chosen": -2.4586262702941895, "logits/rejected": -2.377124309539795, "logps/chosen": -335.5270690917969, "logps/rejected": -225.5157470703125, "loss": 0.401, "rewards/accuracies": 0.875, "rewards/chosen": -0.12433779239654541, "rewards/margins": 1.3591395616531372, "rewards/rejected": -1.483477234840393, "step": 1411 }, { "epoch": 0.16, "learning_rate": 2.545411597968584e-07, "logits/chosen": -2.5692687034606934, "logits/rejected": -2.4680094718933105, "logps/chosen": -211.6266632080078, "logps/rejected": -270.94122314453125, "loss": 0.2269, "rewards/accuracies": 0.875, "rewards/chosen": -0.47314298152923584, "rewards/margins": 2.4017884731292725, "rewards/rejected": -2.874931573867798, "step": 1412 }, { "epoch": 0.16, "learning_rate": 2.545057281209401e-07, "logits/chosen": -1.3163652420043945, "logits/rejected": -1.7163945436477661, "logps/chosen": -356.2242431640625, "logps/rejected": -352.26458740234375, "loss": 0.532, "rewards/accuracies": 0.625, "rewards/chosen": -0.4944991171360016, "rewards/margins": 1.0069080591201782, "rewards/rejected": -1.501407265663147, "step": 1413 }, { "epoch": 0.16, "learning_rate": 2.5447029644502184e-07, "logits/chosen": -2.384418487548828, "logits/rejected": -2.6034252643585205, "logps/chosen": -452.16510009765625, "logps/rejected": -348.62579345703125, "loss": 0.5851, "rewards/accuracies": 0.75, "rewards/chosen": -1.4056226015090942, "rewards/margins": 1.7280495166778564, "rewards/rejected": -3.1336722373962402, "step": 1414 }, { "epoch": 0.16, "learning_rate": 2.544348647691036e-07, "logits/chosen": -2.114053726196289, "logits/rejected": -2.2562427520751953, "logps/chosen": -383.5710144042969, "logps/rejected": -190.02723693847656, "loss": 0.6396, "rewards/accuracies": 0.75, "rewards/chosen": -0.8270288109779358, "rewards/margins": 0.8645678162574768, "rewards/rejected": -1.6915966272354126, "step": 1415 }, { "epoch": 0.16, "learning_rate": 2.543994330931853e-07, "logits/chosen": -2.4630913734436035, "logits/rejected": -2.5685012340545654, "logps/chosen": -163.06808471679688, "logps/rejected": -181.20407104492188, "loss": 0.3612, "rewards/accuracies": 0.75, "rewards/chosen": -0.3045794665813446, "rewards/margins": 1.7313039302825928, "rewards/rejected": -2.0358831882476807, "step": 1416 }, { "epoch": 0.16, "learning_rate": 2.5436400141726703e-07, "logits/chosen": -1.9290547370910645, "logits/rejected": -2.1793529987335205, "logps/chosen": -268.989501953125, "logps/rejected": -308.44696044921875, "loss": 0.2541, "rewards/accuracies": 0.875, "rewards/chosen": -1.0143487453460693, "rewards/margins": 2.9500749111175537, "rewards/rejected": -3.964423656463623, "step": 1417 }, { "epoch": 0.16, "learning_rate": 2.543285697413487e-07, "logits/chosen": -2.6333277225494385, "logits/rejected": -2.6915454864501953, "logps/chosen": -435.1848449707031, "logps/rejected": -287.0091857910156, "loss": 0.5823, "rewards/accuracies": 0.625, "rewards/chosen": -0.8731905221939087, "rewards/margins": 1.3949429988861084, "rewards/rejected": -2.2681336402893066, "step": 1418 }, { "epoch": 0.17, "learning_rate": 2.5429313806543047e-07, "logits/chosen": -2.6009397506713867, "logits/rejected": -2.6713075637817383, "logps/chosen": -233.719482421875, "logps/rejected": -265.51361083984375, "loss": 0.4733, "rewards/accuracies": 0.875, "rewards/chosen": -0.19008395075798035, "rewards/margins": 1.0172450542449951, "rewards/rejected": -1.2073290348052979, "step": 1419 }, { "epoch": 0.17, "learning_rate": 2.542577063895122e-07, "logits/chosen": -2.3597967624664307, "logits/rejected": -2.5746517181396484, "logps/chosen": -368.05743408203125, "logps/rejected": -260.9460754394531, "loss": 0.5387, "rewards/accuracies": 0.75, "rewards/chosen": -0.7789199352264404, "rewards/margins": 1.4355340003967285, "rewards/rejected": -2.214453935623169, "step": 1420 }, { "epoch": 0.17, "learning_rate": 2.5422227471359397e-07, "logits/chosen": -2.4228570461273193, "logits/rejected": -2.019545555114746, "logps/chosen": -246.39735412597656, "logps/rejected": -329.96337890625, "loss": 1.0397, "rewards/accuracies": 0.5, "rewards/chosen": -0.6318446397781372, "rewards/margins": 0.029366105794906616, "rewards/rejected": -0.6612107753753662, "step": 1421 }, { "epoch": 0.17, "learning_rate": 2.5418684303767567e-07, "logits/chosen": -2.503176689147949, "logits/rejected": -2.5368566513061523, "logps/chosen": -247.5775604248047, "logps/rejected": -186.21414184570312, "loss": 0.8579, "rewards/accuracies": 0.5, "rewards/chosen": -0.9218241572380066, "rewards/margins": 0.4099642336368561, "rewards/rejected": -1.3317883014678955, "step": 1422 }, { "epoch": 0.17, "learning_rate": 2.541514113617574e-07, "logits/chosen": -2.4616856575012207, "logits/rejected": -2.6306047439575195, "logps/chosen": -217.22451782226562, "logps/rejected": -239.80418395996094, "loss": 0.6277, "rewards/accuracies": 0.75, "rewards/chosen": -1.1337361335754395, "rewards/margins": 2.218123435974121, "rewards/rejected": -3.3518595695495605, "step": 1423 }, { "epoch": 0.17, "learning_rate": 2.541159796858391e-07, "logits/chosen": -2.4855356216430664, "logits/rejected": -2.711442232131958, "logps/chosen": -219.52381896972656, "logps/rejected": -214.70159912109375, "loss": 0.7115, "rewards/accuracies": 0.625, "rewards/chosen": -0.8022868037223816, "rewards/margins": 0.6162475347518921, "rewards/rejected": -1.4185343980789185, "step": 1424 }, { "epoch": 0.17, "learning_rate": 2.5408054800992086e-07, "logits/chosen": -2.5001630783081055, "logits/rejected": -2.267730712890625, "logps/chosen": -158.97799682617188, "logps/rejected": -205.411376953125, "loss": 0.4727, "rewards/accuracies": 0.75, "rewards/chosen": -0.6244603395462036, "rewards/margins": 0.6764624118804932, "rewards/rejected": -1.3009227514266968, "step": 1425 }, { "epoch": 0.17, "learning_rate": 2.540451163340026e-07, "logits/chosen": -1.657132863998413, "logits/rejected": -1.9204819202423096, "logps/chosen": -443.85302734375, "logps/rejected": -338.977783203125, "loss": 1.0436, "rewards/accuracies": 0.375, "rewards/chosen": -1.3679190874099731, "rewards/margins": -0.36289939284324646, "rewards/rejected": -1.0050196647644043, "step": 1426 }, { "epoch": 0.17, "learning_rate": 2.540096846580843e-07, "logits/chosen": -2.427781105041504, "logits/rejected": -2.5696349143981934, "logps/chosen": -234.84890747070312, "logps/rejected": -251.01605224609375, "loss": 0.626, "rewards/accuracies": 0.625, "rewards/chosen": -1.1094026565551758, "rewards/margins": 0.5414320230484009, "rewards/rejected": -1.6508347988128662, "step": 1427 }, { "epoch": 0.17, "learning_rate": 2.5397425298216605e-07, "logits/chosen": -2.6352429389953613, "logits/rejected": -2.749058485031128, "logps/chosen": -361.50592041015625, "logps/rejected": -295.8741455078125, "loss": 0.255, "rewards/accuracies": 0.875, "rewards/chosen": -0.5807444453239441, "rewards/margins": 1.774131417274475, "rewards/rejected": -2.3548758029937744, "step": 1428 }, { "epoch": 0.17, "learning_rate": 2.5393882130624775e-07, "logits/chosen": -1.5335426330566406, "logits/rejected": -1.8775932788848877, "logps/chosen": -304.6595458984375, "logps/rejected": -194.12515258789062, "loss": 0.5354, "rewards/accuracies": 0.625, "rewards/chosen": -0.6694098711013794, "rewards/margins": 0.6565424799919128, "rewards/rejected": -1.3259522914886475, "step": 1429 }, { "epoch": 0.17, "learning_rate": 2.539033896303295e-07, "logits/chosen": -1.9526619911193848, "logits/rejected": -2.251145362854004, "logps/chosen": -351.6664123535156, "logps/rejected": -272.32220458984375, "loss": 0.7102, "rewards/accuracies": 0.625, "rewards/chosen": -1.2508903741836548, "rewards/margins": 0.6008719801902771, "rewards/rejected": -1.8517624139785767, "step": 1430 }, { "epoch": 0.17, "learning_rate": 2.538679579544112e-07, "logits/chosen": -2.1874873638153076, "logits/rejected": -2.4208993911743164, "logps/chosen": -213.03195190429688, "logps/rejected": -164.6880645751953, "loss": 0.5484, "rewards/accuracies": 0.625, "rewards/chosen": -0.3417195677757263, "rewards/margins": 0.7480848431587219, "rewards/rejected": -1.0898044109344482, "step": 1431 }, { "epoch": 0.17, "learning_rate": 2.53832526278493e-07, "logits/chosen": -1.9024730920791626, "logits/rejected": -1.9694797992706299, "logps/chosen": -274.88433837890625, "logps/rejected": -332.10333251953125, "loss": 0.373, "rewards/accuracies": 0.75, "rewards/chosen": -0.4101518392562866, "rewards/margins": 1.4350073337554932, "rewards/rejected": -1.8451590538024902, "step": 1432 }, { "epoch": 0.17, "learning_rate": 2.537970946025747e-07, "logits/chosen": -2.4270613193511963, "logits/rejected": -2.2756803035736084, "logps/chosen": -220.02890014648438, "logps/rejected": -306.5950622558594, "loss": 0.4172, "rewards/accuracies": 0.75, "rewards/chosen": -0.4129566550254822, "rewards/margins": 1.4227460622787476, "rewards/rejected": -1.8357027769088745, "step": 1433 }, { "epoch": 0.17, "learning_rate": 2.5376166292665644e-07, "logits/chosen": -2.533468723297119, "logits/rejected": -2.691944122314453, "logps/chosen": -374.1702575683594, "logps/rejected": -174.51168823242188, "loss": 0.2381, "rewards/accuracies": 0.875, "rewards/chosen": 0.19594290852546692, "rewards/margins": 2.793304443359375, "rewards/rejected": -2.5973610877990723, "step": 1434 }, { "epoch": 0.17, "learning_rate": 2.5372623125073813e-07, "logits/chosen": -1.8424973487854004, "logits/rejected": -1.99131178855896, "logps/chosen": -225.1488800048828, "logps/rejected": -230.59222412109375, "loss": 1.1071, "rewards/accuracies": 0.625, "rewards/chosen": -1.6483787298202515, "rewards/margins": -0.12687551975250244, "rewards/rejected": -1.5215030908584595, "step": 1435 }, { "epoch": 0.17, "learning_rate": 2.536907995748199e-07, "logits/chosen": -2.661696195602417, "logits/rejected": -2.8085148334503174, "logps/chosen": -667.4241333007812, "logps/rejected": -493.30181884765625, "loss": 0.4306, "rewards/accuracies": 0.875, "rewards/chosen": -1.160178303718567, "rewards/margins": 2.886575222015381, "rewards/rejected": -4.046753406524658, "step": 1436 }, { "epoch": 0.17, "learning_rate": 2.5365536789890163e-07, "logits/chosen": -2.760230302810669, "logits/rejected": -2.5395493507385254, "logps/chosen": -199.6373291015625, "logps/rejected": -176.92825317382812, "loss": 0.5366, "rewards/accuracies": 0.625, "rewards/chosen": -1.0865223407745361, "rewards/margins": 2.866060256958008, "rewards/rejected": -3.952582359313965, "step": 1437 }, { "epoch": 0.17, "learning_rate": 2.536199362229833e-07, "logits/chosen": -1.9585072994232178, "logits/rejected": -1.838178038597107, "logps/chosen": -330.27825927734375, "logps/rejected": -347.9726867675781, "loss": 0.4702, "rewards/accuracies": 0.75, "rewards/chosen": -0.7503769397735596, "rewards/margins": 1.6042174100875854, "rewards/rejected": -2.3545942306518555, "step": 1438 }, { "epoch": 0.17, "learning_rate": 2.535845045470651e-07, "logits/chosen": -2.203791856765747, "logits/rejected": -2.369785785675049, "logps/chosen": -383.0072021484375, "logps/rejected": -364.023681640625, "loss": 0.2097, "rewards/accuracies": 1.0, "rewards/chosen": -0.5691184401512146, "rewards/margins": 2.0655503273010254, "rewards/rejected": -2.634669065475464, "step": 1439 }, { "epoch": 0.17, "learning_rate": 2.5354907287114677e-07, "logits/chosen": -2.1995997428894043, "logits/rejected": -2.2223525047302246, "logps/chosen": -219.06793212890625, "logps/rejected": -272.837158203125, "loss": 0.1328, "rewards/accuracies": 1.0, "rewards/chosen": -0.564666748046875, "rewards/margins": 3.3644495010375977, "rewards/rejected": -3.9291162490844727, "step": 1440 }, { "epoch": 0.17, "learning_rate": 2.535136411952285e-07, "logits/chosen": -2.4065871238708496, "logits/rejected": -2.5182597637176514, "logps/chosen": -404.03265380859375, "logps/rejected": -325.8576354980469, "loss": 0.4643, "rewards/accuracies": 0.875, "rewards/chosen": -0.3538506031036377, "rewards/margins": 0.7207666039466858, "rewards/rejected": -1.0746171474456787, "step": 1441 }, { "epoch": 0.17, "learning_rate": 2.534782095193102e-07, "logits/chosen": -2.5117228031158447, "logits/rejected": -2.1108460426330566, "logps/chosen": -147.0909881591797, "logps/rejected": -186.81729125976562, "loss": 0.5254, "rewards/accuracies": 0.75, "rewards/chosen": -0.9482824802398682, "rewards/margins": 1.3818496465682983, "rewards/rejected": -2.330132007598877, "step": 1442 }, { "epoch": 0.17, "learning_rate": 2.53442777843392e-07, "logits/chosen": -2.4570987224578857, "logits/rejected": -2.3983781337738037, "logps/chosen": -242.34097290039062, "logps/rejected": -274.68206787109375, "loss": 0.4763, "rewards/accuracies": 0.75, "rewards/chosen": -0.610363781452179, "rewards/margins": 1.764404058456421, "rewards/rejected": -2.374767780303955, "step": 1443 }, { "epoch": 0.17, "learning_rate": 2.534073461674737e-07, "logits/chosen": -2.4009642601013184, "logits/rejected": -2.6849422454833984, "logps/chosen": -567.9620361328125, "logps/rejected": -240.4154510498047, "loss": 0.2596, "rewards/accuracies": 0.875, "rewards/chosen": -0.547966480255127, "rewards/margins": 2.2115731239318848, "rewards/rejected": -2.7595396041870117, "step": 1444 }, { "epoch": 0.17, "learning_rate": 2.5337191449155546e-07, "logits/chosen": -2.2850019931793213, "logits/rejected": -2.1390790939331055, "logps/chosen": -220.06048583984375, "logps/rejected": -336.79296875, "loss": 0.3418, "rewards/accuracies": 0.75, "rewards/chosen": -0.5401608347892761, "rewards/margins": 1.63780677318573, "rewards/rejected": -2.1779675483703613, "step": 1445 }, { "epoch": 0.17, "learning_rate": 2.5333648281563716e-07, "logits/chosen": -2.5842723846435547, "logits/rejected": -2.622328519821167, "logps/chosen": -227.978515625, "logps/rejected": -337.231689453125, "loss": 0.1047, "rewards/accuracies": 1.0, "rewards/chosen": -0.08642464876174927, "rewards/margins": 3.774301528930664, "rewards/rejected": -3.8607263565063477, "step": 1446 }, { "epoch": 0.17, "learning_rate": 2.533010511397189e-07, "logits/chosen": -2.3159894943237305, "logits/rejected": -2.339876174926758, "logps/chosen": -424.4003601074219, "logps/rejected": -246.01416015625, "loss": 0.6247, "rewards/accuracies": 0.75, "rewards/chosen": -1.890349268913269, "rewards/margins": 0.9741147756576538, "rewards/rejected": -2.864464044570923, "step": 1447 }, { "epoch": 0.17, "learning_rate": 2.5326561946380065e-07, "logits/chosen": -2.5215792655944824, "logits/rejected": -2.537692070007324, "logps/chosen": -143.82635498046875, "logps/rejected": -120.31312561035156, "loss": 0.7959, "rewards/accuracies": 0.5, "rewards/chosen": -1.347123622894287, "rewards/margins": 1.232596755027771, "rewards/rejected": -2.5797204971313477, "step": 1448 }, { "epoch": 0.17, "learning_rate": 2.5323018778788235e-07, "logits/chosen": -2.9211254119873047, "logits/rejected": -2.835559606552124, "logps/chosen": -152.33050537109375, "logps/rejected": -218.90386962890625, "loss": 0.588, "rewards/accuracies": 0.75, "rewards/chosen": -1.2231467962265015, "rewards/margins": 1.9937942028045654, "rewards/rejected": -3.2169408798217773, "step": 1449 }, { "epoch": 0.17, "learning_rate": 2.531947561119641e-07, "logits/chosen": -2.105167865753174, "logits/rejected": -2.3442540168762207, "logps/chosen": -506.58294677734375, "logps/rejected": -351.8052978515625, "loss": 0.5419, "rewards/accuracies": 0.625, "rewards/chosen": -0.22831220924854279, "rewards/margins": 1.669630765914917, "rewards/rejected": -1.8979430198669434, "step": 1450 }, { "epoch": 0.17, "learning_rate": 2.531593244360458e-07, "logits/chosen": -2.279019594192505, "logits/rejected": -2.3845438957214355, "logps/chosen": -438.55230712890625, "logps/rejected": -406.7987976074219, "loss": 0.2837, "rewards/accuracies": 0.875, "rewards/chosen": -0.5728898644447327, "rewards/margins": 1.429671287536621, "rewards/rejected": -2.002561092376709, "step": 1451 }, { "epoch": 0.17, "learning_rate": 2.5312389276012754e-07, "logits/chosen": -1.4532936811447144, "logits/rejected": -1.802952527999878, "logps/chosen": -473.18121337890625, "logps/rejected": -335.4903564453125, "loss": 0.6637, "rewards/accuracies": 0.625, "rewards/chosen": -0.6462604999542236, "rewards/margins": 0.34779414534568787, "rewards/rejected": -0.9940545558929443, "step": 1452 }, { "epoch": 0.17, "learning_rate": 2.5308846108420924e-07, "logits/chosen": -2.804143190383911, "logits/rejected": -2.635232925415039, "logps/chosen": -270.97552490234375, "logps/rejected": -190.22747802734375, "loss": 0.2667, "rewards/accuracies": 0.875, "rewards/chosen": -0.1187671422958374, "rewards/margins": 1.8212106227874756, "rewards/rejected": -1.939977765083313, "step": 1453 }, { "epoch": 0.17, "learning_rate": 2.53053029408291e-07, "logits/chosen": -2.361576557159424, "logits/rejected": -2.020791530609131, "logps/chosen": -186.4981689453125, "logps/rejected": -302.084716796875, "loss": 0.5477, "rewards/accuracies": 0.875, "rewards/chosen": -1.264258623123169, "rewards/margins": 0.8958760499954224, "rewards/rejected": -2.1601345539093018, "step": 1454 }, { "epoch": 0.17, "learning_rate": 2.5301759773237273e-07, "logits/chosen": -2.504761219024658, "logits/rejected": -2.341108560562134, "logps/chosen": -380.6441345214844, "logps/rejected": -370.5119934082031, "loss": 0.3151, "rewards/accuracies": 1.0, "rewards/chosen": -1.039496660232544, "rewards/margins": 1.3657381534576416, "rewards/rejected": -2.4052350521087646, "step": 1455 }, { "epoch": 0.17, "learning_rate": 2.529821660564545e-07, "logits/chosen": -2.1535794734954834, "logits/rejected": -1.9789355993270874, "logps/chosen": -170.31314086914062, "logps/rejected": -259.0187683105469, "loss": 0.6131, "rewards/accuracies": 0.875, "rewards/chosen": -0.8147975206375122, "rewards/margins": 0.7351446747779846, "rewards/rejected": -1.5499422550201416, "step": 1456 }, { "epoch": 0.17, "learning_rate": 2.529467343805362e-07, "logits/chosen": -2.1239097118377686, "logits/rejected": -2.1084089279174805, "logps/chosen": -238.11167907714844, "logps/rejected": -223.52252197265625, "loss": 0.4978, "rewards/accuracies": 0.75, "rewards/chosen": -0.40069636702537537, "rewards/margins": 1.4288387298583984, "rewards/rejected": -1.829534888267517, "step": 1457 }, { "epoch": 0.17, "learning_rate": 2.5291130270461793e-07, "logits/chosen": -2.118220090866089, "logits/rejected": -2.2238495349884033, "logps/chosen": -342.74456787109375, "logps/rejected": -306.6408996582031, "loss": 0.2798, "rewards/accuracies": 0.875, "rewards/chosen": -0.6236253976821899, "rewards/margins": 2.4561071395874023, "rewards/rejected": -3.079732656478882, "step": 1458 }, { "epoch": 0.17, "learning_rate": 2.528758710286997e-07, "logits/chosen": -2.684354305267334, "logits/rejected": -2.5362372398376465, "logps/chosen": -349.3921203613281, "logps/rejected": -269.8887939453125, "loss": 0.2044, "rewards/accuracies": 0.875, "rewards/chosen": -0.28179481625556946, "rewards/margins": 2.8368444442749023, "rewards/rejected": -3.1186392307281494, "step": 1459 }, { "epoch": 0.17, "learning_rate": 2.5284043935278137e-07, "logits/chosen": -2.517568349838257, "logits/rejected": -2.8301491737365723, "logps/chosen": -182.41664123535156, "logps/rejected": -193.70089721679688, "loss": 0.3487, "rewards/accuracies": 0.75, "rewards/chosen": -0.511802613735199, "rewards/margins": 1.7629618644714355, "rewards/rejected": -2.2747645378112793, "step": 1460 }, { "epoch": 0.17, "learning_rate": 2.528050076768631e-07, "logits/chosen": -2.5903003215789795, "logits/rejected": -2.3646419048309326, "logps/chosen": -289.8793029785156, "logps/rejected": -405.5638732910156, "loss": 0.3027, "rewards/accuracies": 0.875, "rewards/chosen": -0.6927700042724609, "rewards/margins": 1.9571034908294678, "rewards/rejected": -2.6498734951019287, "step": 1461 }, { "epoch": 0.17, "learning_rate": 2.527695760009448e-07, "logits/chosen": -2.493424415588379, "logits/rejected": -2.4858362674713135, "logps/chosen": -139.37237548828125, "logps/rejected": -178.51715087890625, "loss": 0.2804, "rewards/accuracies": 0.875, "rewards/chosen": 0.571618914604187, "rewards/margins": 2.6915283203125, "rewards/rejected": -2.1199095249176025, "step": 1462 }, { "epoch": 0.17, "learning_rate": 2.5273414432502656e-07, "logits/chosen": -2.5667154788970947, "logits/rejected": -2.4331793785095215, "logps/chosen": -407.3218994140625, "logps/rejected": -395.99395751953125, "loss": 0.536, "rewards/accuracies": 0.75, "rewards/chosen": -0.4371188282966614, "rewards/margins": 1.0734144449234009, "rewards/rejected": -1.5105332136154175, "step": 1463 }, { "epoch": 0.17, "learning_rate": 2.5269871264910826e-07, "logits/chosen": -2.50931453704834, "logits/rejected": -2.313822031021118, "logps/chosen": -254.01461791992188, "logps/rejected": -428.7002258300781, "loss": 0.7307, "rewards/accuracies": 0.5, "rewards/chosen": -0.560958981513977, "rewards/margins": 0.4432069659233093, "rewards/rejected": -1.0041660070419312, "step": 1464 }, { "epoch": 0.17, "learning_rate": 2.5266328097319e-07, "logits/chosen": -2.5206174850463867, "logits/rejected": -2.7586684226989746, "logps/chosen": -256.38397216796875, "logps/rejected": -162.2035369873047, "loss": 0.6075, "rewards/accuracies": 0.625, "rewards/chosen": -1.0403635501861572, "rewards/margins": 0.5496081709861755, "rewards/rejected": -1.5899717807769775, "step": 1465 }, { "epoch": 0.17, "learning_rate": 2.5262784929727176e-07, "logits/chosen": -2.9470343589782715, "logits/rejected": -2.9561822414398193, "logps/chosen": -140.767578125, "logps/rejected": -216.20858764648438, "loss": 0.5327, "rewards/accuracies": 0.75, "rewards/chosen": -0.9156724810600281, "rewards/margins": 0.7848986387252808, "rewards/rejected": -1.7005711793899536, "step": 1466 }, { "epoch": 0.17, "learning_rate": 2.525924176213535e-07, "logits/chosen": -2.6634774208068848, "logits/rejected": -2.582221269607544, "logps/chosen": -126.55690002441406, "logps/rejected": -262.790283203125, "loss": 0.2676, "rewards/accuracies": 1.0, "rewards/chosen": -0.44727328419685364, "rewards/margins": 2.4207444190979004, "rewards/rejected": -2.8680176734924316, "step": 1467 }, { "epoch": 0.17, "learning_rate": 2.525569859454352e-07, "logits/chosen": -1.8738160133361816, "logits/rejected": -1.6901065111160278, "logps/chosen": -353.93524169921875, "logps/rejected": -404.9190368652344, "loss": 0.462, "rewards/accuracies": 0.875, "rewards/chosen": -0.6788508892059326, "rewards/margins": 2.2588632106781006, "rewards/rejected": -2.937714099884033, "step": 1468 }, { "epoch": 0.17, "learning_rate": 2.5252155426951695e-07, "logits/chosen": -2.218775510787964, "logits/rejected": -2.259687900543213, "logps/chosen": -258.6424560546875, "logps/rejected": -302.3805236816406, "loss": 0.3018, "rewards/accuracies": 1.0, "rewards/chosen": -0.23344296216964722, "rewards/margins": 1.8039829730987549, "rewards/rejected": -2.037425994873047, "step": 1469 }, { "epoch": 0.17, "learning_rate": 2.5248612259359865e-07, "logits/chosen": -2.7824223041534424, "logits/rejected": -2.6774966716766357, "logps/chosen": -451.36932373046875, "logps/rejected": -418.739990234375, "loss": 0.0994, "rewards/accuracies": 1.0, "rewards/chosen": 0.04473362863063812, "rewards/margins": 3.5978102684020996, "rewards/rejected": -3.5530762672424316, "step": 1470 }, { "epoch": 0.17, "learning_rate": 2.524506909176804e-07, "logits/chosen": -2.01977801322937, "logits/rejected": -1.8278520107269287, "logps/chosen": -221.29095458984375, "logps/rejected": -286.4285888671875, "loss": 0.6296, "rewards/accuracies": 0.75, "rewards/chosen": -1.226374864578247, "rewards/margins": 0.9805516004562378, "rewards/rejected": -2.2069263458251953, "step": 1471 }, { "epoch": 0.17, "learning_rate": 2.5241525924176214e-07, "logits/chosen": -2.4640140533447266, "logits/rejected": -2.4418015480041504, "logps/chosen": -175.91354370117188, "logps/rejected": -89.90542602539062, "loss": 0.6366, "rewards/accuracies": 0.625, "rewards/chosen": -0.4747963845729828, "rewards/margins": 0.9292072057723999, "rewards/rejected": -1.404003620147705, "step": 1472 }, { "epoch": 0.17, "learning_rate": 2.5237982756584384e-07, "logits/chosen": -2.6665611267089844, "logits/rejected": -2.36313533782959, "logps/chosen": -298.3760070800781, "logps/rejected": -326.38592529296875, "loss": 0.393, "rewards/accuracies": 0.875, "rewards/chosen": -0.9506881237030029, "rewards/margins": 1.1170557737350464, "rewards/rejected": -2.0677437782287598, "step": 1473 }, { "epoch": 0.17, "learning_rate": 2.523443958899256e-07, "logits/chosen": -2.282341480255127, "logits/rejected": -2.2311012744903564, "logps/chosen": -366.27227783203125, "logps/rejected": -271.2642822265625, "loss": 0.4887, "rewards/accuracies": 0.625, "rewards/chosen": -0.6854420304298401, "rewards/margins": 1.458312749862671, "rewards/rejected": -2.1437549591064453, "step": 1474 }, { "epoch": 0.17, "learning_rate": 2.523089642140073e-07, "logits/chosen": -2.5329833030700684, "logits/rejected": -2.6229522228240967, "logps/chosen": -196.85362243652344, "logps/rejected": -234.38980102539062, "loss": 0.5354, "rewards/accuracies": 0.625, "rewards/chosen": -1.2521753311157227, "rewards/margins": 1.101252555847168, "rewards/rejected": -2.3534281253814697, "step": 1475 }, { "epoch": 0.17, "learning_rate": 2.5227353253808903e-07, "logits/chosen": -2.8712286949157715, "logits/rejected": -2.6965372562408447, "logps/chosen": -265.0549011230469, "logps/rejected": -357.2110290527344, "loss": 0.438, "rewards/accuracies": 0.625, "rewards/chosen": -0.6656264066696167, "rewards/margins": 1.6585111618041992, "rewards/rejected": -2.3241376876831055, "step": 1476 }, { "epoch": 0.17, "learning_rate": 2.522381008621708e-07, "logits/chosen": -2.3227853775024414, "logits/rejected": -2.1487667560577393, "logps/chosen": -374.4190979003906, "logps/rejected": -251.34140014648438, "loss": 0.4249, "rewards/accuracies": 0.875, "rewards/chosen": -0.7997742891311646, "rewards/margins": 1.7518787384033203, "rewards/rejected": -2.5516531467437744, "step": 1477 }, { "epoch": 0.17, "learning_rate": 2.5220266918625253e-07, "logits/chosen": -2.3767035007476807, "logits/rejected": -2.31461501121521, "logps/chosen": -222.5201873779297, "logps/rejected": -286.67578125, "loss": 0.7692, "rewards/accuracies": 0.875, "rewards/chosen": -1.2054404020309448, "rewards/margins": 1.1064964532852173, "rewards/rejected": -2.311936855316162, "step": 1478 }, { "epoch": 0.17, "learning_rate": 2.521672375103342e-07, "logits/chosen": -2.448293685913086, "logits/rejected": -2.4010252952575684, "logps/chosen": -237.29776000976562, "logps/rejected": -248.95242309570312, "loss": 0.326, "rewards/accuracies": 0.875, "rewards/chosen": -0.8617949485778809, "rewards/margins": 2.5980401039123535, "rewards/rejected": -3.4598350524902344, "step": 1479 }, { "epoch": 0.17, "learning_rate": 2.5213180583441597e-07, "logits/chosen": -2.286609172821045, "logits/rejected": -2.2503767013549805, "logps/chosen": -252.99923706054688, "logps/rejected": -294.22064208984375, "loss": 0.3255, "rewards/accuracies": 0.875, "rewards/chosen": -0.6527867913246155, "rewards/margins": 1.88228440284729, "rewards/rejected": -2.5350711345672607, "step": 1480 }, { "epoch": 0.17, "learning_rate": 2.5209637415849767e-07, "logits/chosen": -2.2258667945861816, "logits/rejected": -2.4330830574035645, "logps/chosen": -485.60833740234375, "logps/rejected": -304.7433776855469, "loss": 0.6107, "rewards/accuracies": 0.75, "rewards/chosen": -0.4552658796310425, "rewards/margins": 0.5030224323272705, "rewards/rejected": -0.9582882523536682, "step": 1481 }, { "epoch": 0.17, "learning_rate": 2.520609424825794e-07, "logits/chosen": -1.648511528968811, "logits/rejected": -1.666149377822876, "logps/chosen": -280.4410095214844, "logps/rejected": -294.32696533203125, "loss": 0.4216, "rewards/accuracies": 0.625, "rewards/chosen": -1.2772990465164185, "rewards/margins": 1.4583731889724731, "rewards/rejected": -2.7356722354888916, "step": 1482 }, { "epoch": 0.17, "learning_rate": 2.5202551080666117e-07, "logits/chosen": -1.572220802307129, "logits/rejected": -1.4331250190734863, "logps/chosen": -342.74383544921875, "logps/rejected": -325.2814025878906, "loss": 0.6678, "rewards/accuracies": 0.75, "rewards/chosen": -0.7219731211662292, "rewards/margins": 1.875354290008545, "rewards/rejected": -2.597327470779419, "step": 1483 }, { "epoch": 0.17, "learning_rate": 2.5199007913074286e-07, "logits/chosen": -2.4022998809814453, "logits/rejected": -2.5381269454956055, "logps/chosen": -249.94735717773438, "logps/rejected": -174.20851135253906, "loss": 0.6335, "rewards/accuracies": 0.625, "rewards/chosen": -0.21491488814353943, "rewards/margins": 0.3866581618785858, "rewards/rejected": -0.6015730500221252, "step": 1484 }, { "epoch": 0.17, "learning_rate": 2.519546474548246e-07, "logits/chosen": -2.5062367916107178, "logits/rejected": -2.5851614475250244, "logps/chosen": -171.6474609375, "logps/rejected": -153.98004150390625, "loss": 0.6289, "rewards/accuracies": 0.875, "rewards/chosen": -1.000792384147644, "rewards/margins": 0.7175020575523376, "rewards/rejected": -1.718294382095337, "step": 1485 }, { "epoch": 0.17, "learning_rate": 2.519192157789063e-07, "logits/chosen": -2.7694687843322754, "logits/rejected": -2.8860514163970947, "logps/chosen": -214.14553833007812, "logps/rejected": -153.65623474121094, "loss": 0.5742, "rewards/accuracies": 0.75, "rewards/chosen": -0.9531052112579346, "rewards/margins": 1.4404003620147705, "rewards/rejected": -2.393505573272705, "step": 1486 }, { "epoch": 0.17, "learning_rate": 2.5188378410298805e-07, "logits/chosen": -2.143268585205078, "logits/rejected": -2.3119587898254395, "logps/chosen": -329.4967041015625, "logps/rejected": -239.57135009765625, "loss": 0.4642, "rewards/accuracies": 0.625, "rewards/chosen": -0.7891160249710083, "rewards/margins": 1.988013744354248, "rewards/rejected": -2.777129650115967, "step": 1487 }, { "epoch": 0.17, "learning_rate": 2.518483524270698e-07, "logits/chosen": -2.0523874759674072, "logits/rejected": -2.098930597305298, "logps/chosen": -361.4208984375, "logps/rejected": -363.96978759765625, "loss": 0.2568, "rewards/accuracies": 0.875, "rewards/chosen": -0.7395920753479004, "rewards/margins": 2.080350160598755, "rewards/rejected": -2.8199422359466553, "step": 1488 }, { "epoch": 0.17, "learning_rate": 2.518129207511515e-07, "logits/chosen": -2.91645884513855, "logits/rejected": -2.985250949859619, "logps/chosen": -123.6478271484375, "logps/rejected": -186.11561584472656, "loss": 0.5719, "rewards/accuracies": 0.5, "rewards/chosen": -0.570517361164093, "rewards/margins": 0.915500283241272, "rewards/rejected": -1.4860177040100098, "step": 1489 }, { "epoch": 0.17, "learning_rate": 2.5177748907523325e-07, "logits/chosen": -2.481931209564209, "logits/rejected": -2.4050991535186768, "logps/chosen": -122.435302734375, "logps/rejected": -148.4688720703125, "loss": 1.0949, "rewards/accuracies": 0.75, "rewards/chosen": -1.07850980758667, "rewards/margins": 1.361609697341919, "rewards/rejected": -2.440119504928589, "step": 1490 }, { "epoch": 0.17, "learning_rate": 2.51742057399315e-07, "logits/chosen": -2.098560333251953, "logits/rejected": -1.9676381349563599, "logps/chosen": -258.23199462890625, "logps/rejected": -277.38873291015625, "loss": 0.3262, "rewards/accuracies": 0.875, "rewards/chosen": -0.44052034616470337, "rewards/margins": 1.443532943725586, "rewards/rejected": -1.884053349494934, "step": 1491 }, { "epoch": 0.17, "learning_rate": 2.517066257233967e-07, "logits/chosen": -2.5159173011779785, "logits/rejected": -2.380880832672119, "logps/chosen": -375.6462097167969, "logps/rejected": -298.427978515625, "loss": 0.369, "rewards/accuracies": 0.75, "rewards/chosen": -1.1143500804901123, "rewards/margins": 2.1153600215911865, "rewards/rejected": -3.229710102081299, "step": 1492 }, { "epoch": 0.17, "learning_rate": 2.5167119404747844e-07, "logits/chosen": -2.467053174972534, "logits/rejected": -2.1456637382507324, "logps/chosen": -225.94448852539062, "logps/rejected": -333.5818786621094, "loss": 0.0667, "rewards/accuracies": 1.0, "rewards/chosen": -0.23798520863056183, "rewards/margins": 3.728442430496216, "rewards/rejected": -3.966427803039551, "step": 1493 }, { "epoch": 0.17, "learning_rate": 2.516357623715602e-07, "logits/chosen": -2.2891793251037598, "logits/rejected": -2.242398738861084, "logps/chosen": -242.3644561767578, "logps/rejected": -213.1519012451172, "loss": 0.433, "rewards/accuracies": 0.875, "rewards/chosen": -0.5102680325508118, "rewards/margins": 1.1294386386871338, "rewards/rejected": -1.6397066116333008, "step": 1494 }, { "epoch": 0.17, "learning_rate": 2.516003306956419e-07, "logits/chosen": -2.814871311187744, "logits/rejected": -2.8710761070251465, "logps/chosen": -205.79241943359375, "logps/rejected": -331.1839599609375, "loss": 0.4704, "rewards/accuracies": 0.75, "rewards/chosen": -1.1354008913040161, "rewards/margins": 1.0878069400787354, "rewards/rejected": -2.223207950592041, "step": 1495 }, { "epoch": 0.17, "learning_rate": 2.5156489901972363e-07, "logits/chosen": -2.330258846282959, "logits/rejected": -2.1349592208862305, "logps/chosen": -176.95285034179688, "logps/rejected": -260.0544128417969, "loss": 0.264, "rewards/accuracies": 0.875, "rewards/chosen": -0.3431151509284973, "rewards/margins": 3.0848309993743896, "rewards/rejected": -3.427946090698242, "step": 1496 }, { "epoch": 0.17, "learning_rate": 2.5152946734380533e-07, "logits/chosen": -2.4142892360687256, "logits/rejected": -2.5818979740142822, "logps/chosen": -245.11949157714844, "logps/rejected": -266.0391540527344, "loss": 0.4893, "rewards/accuracies": 0.75, "rewards/chosen": -1.3396854400634766, "rewards/margins": 1.96656334400177, "rewards/rejected": -3.306248664855957, "step": 1497 }, { "epoch": 0.17, "learning_rate": 2.514940356678871e-07, "logits/chosen": -2.2812745571136475, "logits/rejected": -2.6983373165130615, "logps/chosen": -488.57568359375, "logps/rejected": -215.8365478515625, "loss": 0.948, "rewards/accuracies": 0.75, "rewards/chosen": -1.9289575815200806, "rewards/margins": 0.8433178663253784, "rewards/rejected": -2.77227520942688, "step": 1498 }, { "epoch": 0.17, "learning_rate": 2.5145860399196877e-07, "logits/chosen": -2.6693925857543945, "logits/rejected": -2.780869960784912, "logps/chosen": -201.55117797851562, "logps/rejected": -144.3550567626953, "loss": 0.6541, "rewards/accuracies": 0.625, "rewards/chosen": -0.8537842035293579, "rewards/margins": 1.250685214996338, "rewards/rejected": -2.1044692993164062, "step": 1499 }, { "epoch": 0.17, "learning_rate": 2.514231723160505e-07, "logits/chosen": -2.7454564571380615, "logits/rejected": -2.447235107421875, "logps/chosen": -166.00390625, "logps/rejected": -311.5794372558594, "loss": 0.1824, "rewards/accuracies": 1.0, "rewards/chosen": -0.7722806930541992, "rewards/margins": 3.235421657562256, "rewards/rejected": -4.007702827453613, "step": 1500 }, { "epoch": 0.17, "learning_rate": 2.5138774064013227e-07, "logits/chosen": -2.5107481479644775, "logits/rejected": -2.718550205230713, "logps/chosen": -202.51046752929688, "logps/rejected": -229.15499877929688, "loss": 0.595, "rewards/accuracies": 0.625, "rewards/chosen": -0.7731063365936279, "rewards/margins": 0.5997617840766907, "rewards/rejected": -1.3728680610656738, "step": 1501 }, { "epoch": 0.17, "learning_rate": 2.51352308964214e-07, "logits/chosen": -1.7060024738311768, "logits/rejected": -2.0919294357299805, "logps/chosen": -407.52056884765625, "logps/rejected": -325.4770812988281, "loss": 0.5004, "rewards/accuracies": 0.625, "rewards/chosen": -0.06882903724908829, "rewards/margins": 2.1670894622802734, "rewards/rejected": -2.2359185218811035, "step": 1502 }, { "epoch": 0.17, "learning_rate": 2.513168772882957e-07, "logits/chosen": -2.310089588165283, "logits/rejected": -2.7284066677093506, "logps/chosen": -445.20355224609375, "logps/rejected": -306.43377685546875, "loss": 0.8819, "rewards/accuracies": 0.75, "rewards/chosen": -1.2190284729003906, "rewards/margins": 1.0821205377578735, "rewards/rejected": -2.3011488914489746, "step": 1503 }, { "epoch": 0.17, "learning_rate": 2.5128144561237746e-07, "logits/chosen": -2.676833152770996, "logits/rejected": -2.618711471557617, "logps/chosen": -323.45513916015625, "logps/rejected": -291.5965576171875, "loss": 0.5727, "rewards/accuracies": 0.75, "rewards/chosen": -0.5250710248947144, "rewards/margins": 1.2448660135269165, "rewards/rejected": -1.7699370384216309, "step": 1504 }, { "epoch": 0.18, "learning_rate": 2.512460139364592e-07, "logits/chosen": -2.694737434387207, "logits/rejected": -2.798943519592285, "logps/chosen": -247.27523803710938, "logps/rejected": -242.6188201904297, "loss": 0.605, "rewards/accuracies": 0.75, "rewards/chosen": -1.0828218460083008, "rewards/margins": 0.8403846621513367, "rewards/rejected": -1.9232065677642822, "step": 1505 }, { "epoch": 0.18, "learning_rate": 2.512105822605409e-07, "logits/chosen": -2.408123254776001, "logits/rejected": -2.6716556549072266, "logps/chosen": -303.4869384765625, "logps/rejected": -263.4012145996094, "loss": 0.1863, "rewards/accuracies": 1.0, "rewards/chosen": -1.1410737037658691, "rewards/margins": 2.0144271850585938, "rewards/rejected": -3.1555004119873047, "step": 1506 }, { "epoch": 0.18, "learning_rate": 2.5117515058462266e-07, "logits/chosen": -2.3236141204833984, "logits/rejected": -2.5322561264038086, "logps/chosen": -339.0780944824219, "logps/rejected": -256.5987854003906, "loss": 0.3905, "rewards/accuracies": 0.875, "rewards/chosen": -0.7493884563446045, "rewards/margins": 1.286633014678955, "rewards/rejected": -2.0360214710235596, "step": 1507 }, { "epoch": 0.18, "learning_rate": 2.5113971890870435e-07, "logits/chosen": -2.2775211334228516, "logits/rejected": -2.094860076904297, "logps/chosen": -119.67781829833984, "logps/rejected": -168.27798461914062, "loss": 0.3962, "rewards/accuracies": 0.75, "rewards/chosen": -1.5066086053848267, "rewards/margins": 1.8082300424575806, "rewards/rejected": -3.3148386478424072, "step": 1508 }, { "epoch": 0.18, "learning_rate": 2.511042872327861e-07, "logits/chosen": -2.970726251602173, "logits/rejected": -3.0316827297210693, "logps/chosen": -165.9956817626953, "logps/rejected": -187.49472045898438, "loss": 0.5006, "rewards/accuracies": 0.625, "rewards/chosen": -0.6500924825668335, "rewards/margins": 1.3673573732376099, "rewards/rejected": -2.0174498558044434, "step": 1509 }, { "epoch": 0.18, "learning_rate": 2.510688555568678e-07, "logits/chosen": -2.691666603088379, "logits/rejected": -2.638864517211914, "logps/chosen": -272.77398681640625, "logps/rejected": -287.48046875, "loss": 0.5889, "rewards/accuracies": 0.875, "rewards/chosen": -0.8160590529441833, "rewards/margins": 0.33200401067733765, "rewards/rejected": -1.148063063621521, "step": 1510 }, { "epoch": 0.18, "learning_rate": 2.5103342388094954e-07, "logits/chosen": -1.9297447204589844, "logits/rejected": -2.050508737564087, "logps/chosen": -412.77947998046875, "logps/rejected": -310.130126953125, "loss": 0.6256, "rewards/accuracies": 0.5, "rewards/chosen": -0.4392031729221344, "rewards/margins": 0.4176883399486542, "rewards/rejected": -0.8568915724754333, "step": 1511 }, { "epoch": 0.18, "learning_rate": 2.509979922050313e-07, "logits/chosen": -2.0909669399261475, "logits/rejected": -2.3059537410736084, "logps/chosen": -548.546142578125, "logps/rejected": -271.6788024902344, "loss": 0.3569, "rewards/accuracies": 0.75, "rewards/chosen": -0.28837189078330994, "rewards/margins": 1.3042964935302734, "rewards/rejected": -1.5926682949066162, "step": 1512 }, { "epoch": 0.18, "learning_rate": 2.5096256052911304e-07, "logits/chosen": -2.0764822959899902, "logits/rejected": -2.0307776927948, "logps/chosen": -141.51707458496094, "logps/rejected": -207.7806396484375, "loss": 0.9553, "rewards/accuracies": 0.5, "rewards/chosen": -1.1588444709777832, "rewards/margins": 1.1488786935806274, "rewards/rejected": -2.3077232837677, "step": 1513 }, { "epoch": 0.18, "learning_rate": 2.5092712885319474e-07, "logits/chosen": -2.116191864013672, "logits/rejected": -2.328605890274048, "logps/chosen": -332.21551513671875, "logps/rejected": -208.269287109375, "loss": 0.6543, "rewards/accuracies": 0.75, "rewards/chosen": -0.82952880859375, "rewards/margins": 1.8702086210250854, "rewards/rejected": -2.699737310409546, "step": 1514 }, { "epoch": 0.18, "learning_rate": 2.508916971772765e-07, "logits/chosen": -2.771699905395508, "logits/rejected": -2.7069451808929443, "logps/chosen": -148.96571350097656, "logps/rejected": -241.12701416015625, "loss": 0.3221, "rewards/accuracies": 0.875, "rewards/chosen": -0.2643533945083618, "rewards/margins": 2.9518089294433594, "rewards/rejected": -3.2161622047424316, "step": 1515 }, { "epoch": 0.18, "learning_rate": 2.5085626550135823e-07, "logits/chosen": -2.319643974304199, "logits/rejected": -2.509733200073242, "logps/chosen": -299.4090881347656, "logps/rejected": -195.2904510498047, "loss": 0.6658, "rewards/accuracies": 0.625, "rewards/chosen": -0.5797232389450073, "rewards/margins": 1.4602982997894287, "rewards/rejected": -2.0400214195251465, "step": 1516 }, { "epoch": 0.18, "learning_rate": 2.5082083382543993e-07, "logits/chosen": -2.4056739807128906, "logits/rejected": -2.429441452026367, "logps/chosen": -193.17047119140625, "logps/rejected": -396.156494140625, "loss": 0.2096, "rewards/accuracies": 0.75, "rewards/chosen": -0.11505136638879776, "rewards/margins": 3.0897388458251953, "rewards/rejected": -3.2047903537750244, "step": 1517 }, { "epoch": 0.18, "learning_rate": 2.507854021495217e-07, "logits/chosen": -2.002331018447876, "logits/rejected": -2.284052848815918, "logps/chosen": -229.08644104003906, "logps/rejected": -248.72535705566406, "loss": 0.7581, "rewards/accuracies": 0.75, "rewards/chosen": -1.1977550983428955, "rewards/margins": 0.9308830499649048, "rewards/rejected": -2.1286380290985107, "step": 1518 }, { "epoch": 0.18, "learning_rate": 2.507499704736034e-07, "logits/chosen": -2.3316404819488525, "logits/rejected": -2.146022319793701, "logps/chosen": -263.36114501953125, "logps/rejected": -375.27020263671875, "loss": 0.1333, "rewards/accuracies": 1.0, "rewards/chosen": -0.30984193086624146, "rewards/margins": 2.6746063232421875, "rewards/rejected": -2.9844484329223633, "step": 1519 }, { "epoch": 0.18, "learning_rate": 2.507145387976851e-07, "logits/chosen": -2.1641502380371094, "logits/rejected": -2.245091438293457, "logps/chosen": -200.29647827148438, "logps/rejected": -186.30877685546875, "loss": 0.3509, "rewards/accuracies": 1.0, "rewards/chosen": -0.576932430267334, "rewards/margins": 1.4339933395385742, "rewards/rejected": -2.010925531387329, "step": 1520 }, { "epoch": 0.18, "learning_rate": 2.506791071217668e-07, "logits/chosen": -2.2197673320770264, "logits/rejected": -2.269116163253784, "logps/chosen": -240.0064239501953, "logps/rejected": -172.71014404296875, "loss": 1.5375, "rewards/accuracies": 0.625, "rewards/chosen": -2.226947784423828, "rewards/margins": 0.20662975311279297, "rewards/rejected": -2.4335777759552, "step": 1521 }, { "epoch": 0.18, "learning_rate": 2.5064367544584857e-07, "logits/chosen": -1.9302241802215576, "logits/rejected": -2.5047032833099365, "logps/chosen": -417.939697265625, "logps/rejected": -231.62835693359375, "loss": 0.4084, "rewards/accuracies": 0.75, "rewards/chosen": -0.9730070233345032, "rewards/margins": 1.7290160655975342, "rewards/rejected": -2.7020230293273926, "step": 1522 }, { "epoch": 0.18, "learning_rate": 2.506082437699303e-07, "logits/chosen": -1.79429292678833, "logits/rejected": -2.072009801864624, "logps/chosen": -503.75482177734375, "logps/rejected": -350.647705078125, "loss": 0.6102, "rewards/accuracies": 0.75, "rewards/chosen": -1.7235039472579956, "rewards/margins": 0.7640112638473511, "rewards/rejected": -2.4875152111053467, "step": 1523 }, { "epoch": 0.18, "learning_rate": 2.50572812094012e-07, "logits/chosen": -2.486649751663208, "logits/rejected": -2.353630304336548, "logps/chosen": -415.8222351074219, "logps/rejected": -319.5169372558594, "loss": 0.3025, "rewards/accuracies": 0.75, "rewards/chosen": -0.051511406898498535, "rewards/margins": 1.9093295335769653, "rewards/rejected": -1.9608409404754639, "step": 1524 }, { "epoch": 0.18, "learning_rate": 2.5053738041809376e-07, "logits/chosen": -1.9334419965744019, "logits/rejected": -1.8891215324401855, "logps/chosen": -238.2892608642578, "logps/rejected": -355.1315002441406, "loss": 0.2152, "rewards/accuracies": 1.0, "rewards/chosen": -0.4996374845504761, "rewards/margins": 2.9236738681793213, "rewards/rejected": -3.423311233520508, "step": 1525 }, { "epoch": 0.18, "learning_rate": 2.505019487421755e-07, "logits/chosen": -1.782972812652588, "logits/rejected": -2.0354576110839844, "logps/chosen": -426.4249267578125, "logps/rejected": -413.3320007324219, "loss": 0.3341, "rewards/accuracies": 0.75, "rewards/chosen": -0.021389901638031006, "rewards/margins": 1.850546956062317, "rewards/rejected": -1.8719367980957031, "step": 1526 }, { "epoch": 0.18, "learning_rate": 2.5046651706625726e-07, "logits/chosen": -2.332895517349243, "logits/rejected": -2.4367105960845947, "logps/chosen": -272.3119812011719, "logps/rejected": -290.2843933105469, "loss": 0.2917, "rewards/accuracies": 0.875, "rewards/chosen": 0.10114973783493042, "rewards/margins": 2.8727073669433594, "rewards/rejected": -2.771557569503784, "step": 1527 }, { "epoch": 0.18, "learning_rate": 2.5043108539033895e-07, "logits/chosen": -2.5286829471588135, "logits/rejected": -2.5712099075317383, "logps/chosen": -119.3431396484375, "logps/rejected": -193.16403198242188, "loss": 0.4972, "rewards/accuracies": 0.875, "rewards/chosen": 0.03827935829758644, "rewards/margins": 2.533092498779297, "rewards/rejected": -2.4948129653930664, "step": 1528 }, { "epoch": 0.18, "learning_rate": 2.503956537144207e-07, "logits/chosen": -2.456690788269043, "logits/rejected": -2.5445916652679443, "logps/chosen": -421.7591857910156, "logps/rejected": -218.651611328125, "loss": 0.2689, "rewards/accuracies": 0.875, "rewards/chosen": -0.6789423227310181, "rewards/margins": 1.5002985000610352, "rewards/rejected": -2.1792409420013428, "step": 1529 }, { "epoch": 0.18, "learning_rate": 2.503602220385024e-07, "logits/chosen": -2.3817851543426514, "logits/rejected": -2.317093849182129, "logps/chosen": -342.08489990234375, "logps/rejected": -358.735107421875, "loss": 0.48, "rewards/accuracies": 0.875, "rewards/chosen": -0.26870760321617126, "rewards/margins": 1.1289433240890503, "rewards/rejected": -1.397650957107544, "step": 1530 }, { "epoch": 0.18, "learning_rate": 2.5032479036258415e-07, "logits/chosen": -2.820840358734131, "logits/rejected": -2.798705577850342, "logps/chosen": -112.78612518310547, "logps/rejected": -151.77853393554688, "loss": 0.3404, "rewards/accuracies": 0.875, "rewards/chosen": -0.28455623984336853, "rewards/margins": 1.9404067993164062, "rewards/rejected": -2.2249629497528076, "step": 1531 }, { "epoch": 0.18, "learning_rate": 2.5028935868666584e-07, "logits/chosen": -2.0861053466796875, "logits/rejected": -2.261622905731201, "logps/chosen": -163.0729217529297, "logps/rejected": -126.7846450805664, "loss": 0.2721, "rewards/accuracies": 1.0, "rewards/chosen": -0.1815764605998993, "rewards/margins": 1.646965503692627, "rewards/rejected": -1.8285419940948486, "step": 1532 }, { "epoch": 0.18, "learning_rate": 2.502539270107476e-07, "logits/chosen": -2.575253486633301, "logits/rejected": -2.467475652694702, "logps/chosen": -220.7331085205078, "logps/rejected": -287.87469482421875, "loss": 0.3955, "rewards/accuracies": 0.75, "rewards/chosen": -0.6447773575782776, "rewards/margins": 1.8861191272735596, "rewards/rejected": -2.5308966636657715, "step": 1533 }, { "epoch": 0.18, "learning_rate": 2.5021849533482934e-07, "logits/chosen": -2.555385112762451, "logits/rejected": -2.70149302482605, "logps/chosen": -356.900146484375, "logps/rejected": -274.7266540527344, "loss": 0.3525, "rewards/accuracies": 0.875, "rewards/chosen": -0.560936689376831, "rewards/margins": 1.5035853385925293, "rewards/rejected": -2.0645222663879395, "step": 1534 }, { "epoch": 0.18, "learning_rate": 2.5018306365891103e-07, "logits/chosen": -1.6017833948135376, "logits/rejected": -2.0642645359039307, "logps/chosen": -419.71417236328125, "logps/rejected": -233.4368896484375, "loss": 0.4205, "rewards/accuracies": 0.875, "rewards/chosen": -0.1526518017053604, "rewards/margins": 1.17689049243927, "rewards/rejected": -1.3295423984527588, "step": 1535 }, { "epoch": 0.18, "learning_rate": 2.501476319829928e-07, "logits/chosen": -2.3041820526123047, "logits/rejected": -2.480492353439331, "logps/chosen": -194.6871337890625, "logps/rejected": -186.84197998046875, "loss": 0.5952, "rewards/accuracies": 0.75, "rewards/chosen": -0.9411494731903076, "rewards/margins": 1.057884931564331, "rewards/rejected": -1.9990344047546387, "step": 1536 }, { "epoch": 0.18, "learning_rate": 2.5011220030707453e-07, "logits/chosen": -2.2466225624084473, "logits/rejected": -2.128579616546631, "logps/chosen": -299.0981750488281, "logps/rejected": -321.57757568359375, "loss": 0.3536, "rewards/accuracies": 0.875, "rewards/chosen": -0.16149570047855377, "rewards/margins": 2.5537667274475098, "rewards/rejected": -2.7152624130249023, "step": 1537 }, { "epoch": 0.18, "learning_rate": 2.500767686311563e-07, "logits/chosen": -1.9885157346725464, "logits/rejected": -2.50860595703125, "logps/chosen": -519.5257568359375, "logps/rejected": -193.3264923095703, "loss": 0.7358, "rewards/accuracies": 0.5, "rewards/chosen": -0.562435507774353, "rewards/margins": 0.36027100682258606, "rewards/rejected": -0.9227065443992615, "step": 1538 }, { "epoch": 0.18, "learning_rate": 2.50041336955238e-07, "logits/chosen": -2.0376358032226562, "logits/rejected": -2.376227378845215, "logps/chosen": -252.80661010742188, "logps/rejected": -237.258056640625, "loss": 0.7729, "rewards/accuracies": 0.75, "rewards/chosen": -1.784058690071106, "rewards/margins": 2.472616195678711, "rewards/rejected": -4.256674766540527, "step": 1539 }, { "epoch": 0.18, "learning_rate": 2.500059052793197e-07, "logits/chosen": -2.5512025356292725, "logits/rejected": -2.6009674072265625, "logps/chosen": -247.01156616210938, "logps/rejected": -182.61239624023438, "loss": 0.2311, "rewards/accuracies": 1.0, "rewards/chosen": -0.09920815378427505, "rewards/margins": 1.5679889917373657, "rewards/rejected": -1.6671972274780273, "step": 1540 }, { "epoch": 0.18, "learning_rate": 2.499704736034014e-07, "logits/chosen": -2.2715861797332764, "logits/rejected": -2.4705450534820557, "logps/chosen": -377.1352233886719, "logps/rejected": -197.96771240234375, "loss": 0.4618, "rewards/accuracies": 0.75, "rewards/chosen": -0.5443218350410461, "rewards/margins": 1.310238003730774, "rewards/rejected": -1.8545598983764648, "step": 1541 }, { "epoch": 0.18, "learning_rate": 2.4993504192748317e-07, "logits/chosen": -2.1200127601623535, "logits/rejected": -1.989243507385254, "logps/chosen": -120.77857971191406, "logps/rejected": -208.59091186523438, "loss": 0.4189, "rewards/accuracies": 0.75, "rewards/chosen": -0.2152397632598877, "rewards/margins": 1.7213993072509766, "rewards/rejected": -1.9366391897201538, "step": 1542 }, { "epoch": 0.18, "learning_rate": 2.4989961025156486e-07, "logits/chosen": -2.3126885890960693, "logits/rejected": -2.4105472564697266, "logps/chosen": -293.9986877441406, "logps/rejected": -313.76531982421875, "loss": 0.1994, "rewards/accuracies": 0.875, "rewards/chosen": -0.23705968260765076, "rewards/margins": 2.400604009628296, "rewards/rejected": -2.6376638412475586, "step": 1543 }, { "epoch": 0.18, "learning_rate": 2.498641785756466e-07, "logits/chosen": -2.412146806716919, "logits/rejected": -2.2679452896118164, "logps/chosen": -284.7437744140625, "logps/rejected": -249.28123474121094, "loss": 0.6143, "rewards/accuracies": 0.75, "rewards/chosen": -0.45002251863479614, "rewards/margins": 0.7889208793640137, "rewards/rejected": -1.238943338394165, "step": 1544 }, { "epoch": 0.18, "learning_rate": 2.4982874689972836e-07, "logits/chosen": -2.254892349243164, "logits/rejected": -2.2283618450164795, "logps/chosen": -252.6599578857422, "logps/rejected": -270.6684875488281, "loss": 0.4212, "rewards/accuracies": 0.75, "rewards/chosen": -0.9043716192245483, "rewards/margins": 1.2205901145935059, "rewards/rejected": -2.1249618530273438, "step": 1545 }, { "epoch": 0.18, "learning_rate": 2.4979331522381006e-07, "logits/chosen": -2.9894254207611084, "logits/rejected": -3.0304489135742188, "logps/chosen": -221.2386474609375, "logps/rejected": -275.53564453125, "loss": 0.2116, "rewards/accuracies": 0.875, "rewards/chosen": -0.785041868686676, "rewards/margins": 2.2690277099609375, "rewards/rejected": -3.054069757461548, "step": 1546 }, { "epoch": 0.18, "learning_rate": 2.497578835478918e-07, "logits/chosen": -1.704725980758667, "logits/rejected": -1.6853471994400024, "logps/chosen": -353.46514892578125, "logps/rejected": -442.17962646484375, "loss": 0.7695, "rewards/accuracies": 0.625, "rewards/chosen": -0.8085172176361084, "rewards/margins": 0.3466312289237976, "rewards/rejected": -1.1551483869552612, "step": 1547 }, { "epoch": 0.18, "learning_rate": 2.4972245187197355e-07, "logits/chosen": -2.9490342140197754, "logits/rejected": -3.0080983638763428, "logps/chosen": -256.296142578125, "logps/rejected": -336.89080810546875, "loss": 0.1557, "rewards/accuracies": 1.0, "rewards/chosen": -0.3070848882198334, "rewards/margins": 3.4800562858581543, "rewards/rejected": -3.7871413230895996, "step": 1548 }, { "epoch": 0.18, "learning_rate": 2.496870201960553e-07, "logits/chosen": -1.6188452243804932, "logits/rejected": -1.8767045736312866, "logps/chosen": -288.3042907714844, "logps/rejected": -251.66665649414062, "loss": 0.8084, "rewards/accuracies": 0.5, "rewards/chosen": -1.3664723634719849, "rewards/margins": 0.5599867105484009, "rewards/rejected": -1.9264590740203857, "step": 1549 }, { "epoch": 0.18, "learning_rate": 2.49651588520137e-07, "logits/chosen": -2.3736724853515625, "logits/rejected": -2.232447385787964, "logps/chosen": -147.672119140625, "logps/rejected": -301.1698913574219, "loss": 0.1984, "rewards/accuracies": 0.875, "rewards/chosen": -0.4978691637516022, "rewards/margins": 4.263669013977051, "rewards/rejected": -4.761538028717041, "step": 1550 }, { "epoch": 0.18, "learning_rate": 2.4961615684421875e-07, "logits/chosen": -2.8217196464538574, "logits/rejected": -2.7765698432922363, "logps/chosen": -156.39334106445312, "logps/rejected": -95.13815307617188, "loss": 0.4199, "rewards/accuracies": 0.875, "rewards/chosen": -0.2756620943546295, "rewards/margins": 0.8543390035629272, "rewards/rejected": -1.1300010681152344, "step": 1551 }, { "epoch": 0.18, "learning_rate": 2.4958072516830044e-07, "logits/chosen": -2.1318089962005615, "logits/rejected": -2.181318521499634, "logps/chosen": -336.5406494140625, "logps/rejected": -261.7124328613281, "loss": 0.6391, "rewards/accuracies": 0.5, "rewards/chosen": -0.7594951391220093, "rewards/margins": 0.637673020362854, "rewards/rejected": -1.3971681594848633, "step": 1552 }, { "epoch": 0.18, "learning_rate": 2.495452934923822e-07, "logits/chosen": -2.3397531509399414, "logits/rejected": -2.337611198425293, "logps/chosen": -362.28680419921875, "logps/rejected": -319.5302429199219, "loss": 0.5237, "rewards/accuracies": 0.75, "rewards/chosen": -1.0497620105743408, "rewards/margins": 0.6509684920310974, "rewards/rejected": -1.700730323791504, "step": 1553 }, { "epoch": 0.18, "learning_rate": 2.495098618164639e-07, "logits/chosen": -2.3607804775238037, "logits/rejected": -2.3720505237579346, "logps/chosen": -215.00918579101562, "logps/rejected": -299.317138671875, "loss": 0.5216, "rewards/accuracies": 0.625, "rewards/chosen": -1.0098356008529663, "rewards/margins": 0.6015739440917969, "rewards/rejected": -1.6114094257354736, "step": 1554 }, { "epoch": 0.18, "learning_rate": 2.4947443014054563e-07, "logits/chosen": -2.1997742652893066, "logits/rejected": -2.315394878387451, "logps/chosen": -182.441650390625, "logps/rejected": -265.46026611328125, "loss": 0.3587, "rewards/accuracies": 0.75, "rewards/chosen": -0.34851276874542236, "rewards/margins": 2.4511661529541016, "rewards/rejected": -2.7996788024902344, "step": 1555 }, { "epoch": 0.18, "learning_rate": 2.494389984646274e-07, "logits/chosen": -2.161530017852783, "logits/rejected": -2.1934127807617188, "logps/chosen": -287.91009521484375, "logps/rejected": -365.0387878417969, "loss": 0.2391, "rewards/accuracies": 0.875, "rewards/chosen": -0.46796494722366333, "rewards/margins": 3.2339091300964355, "rewards/rejected": -3.701873779296875, "step": 1556 }, { "epoch": 0.18, "learning_rate": 2.494035667887091e-07, "logits/chosen": -1.258085012435913, "logits/rejected": -1.5871508121490479, "logps/chosen": -341.570068359375, "logps/rejected": -349.7861328125, "loss": 0.7188, "rewards/accuracies": 0.75, "rewards/chosen": -1.4134564399719238, "rewards/margins": 0.8886083960533142, "rewards/rejected": -2.302064895629883, "step": 1557 }, { "epoch": 0.18, "learning_rate": 2.4936813511279083e-07, "logits/chosen": -2.3276209831237793, "logits/rejected": -2.4646005630493164, "logps/chosen": -415.11175537109375, "logps/rejected": -320.3667907714844, "loss": 0.4829, "rewards/accuracies": 0.75, "rewards/chosen": -0.7355742454528809, "rewards/margins": 2.945573568344116, "rewards/rejected": -3.681147813796997, "step": 1558 }, { "epoch": 0.18, "learning_rate": 2.493327034368725e-07, "logits/chosen": -2.492936849594116, "logits/rejected": -2.308199882507324, "logps/chosen": -220.50656127929688, "logps/rejected": -416.9328918457031, "loss": 0.2885, "rewards/accuracies": 1.0, "rewards/chosen": -0.5229282975196838, "rewards/margins": 1.9308075904846191, "rewards/rejected": -2.453735828399658, "step": 1559 }, { "epoch": 0.18, "learning_rate": 2.4929727176095427e-07, "logits/chosen": -2.207489013671875, "logits/rejected": -2.4328033924102783, "logps/chosen": -208.309814453125, "logps/rejected": -186.4434814453125, "loss": 0.5231, "rewards/accuracies": 0.625, "rewards/chosen": -1.767505168914795, "rewards/margins": 1.68399977684021, "rewards/rejected": -3.451504945755005, "step": 1560 }, { "epoch": 0.18, "learning_rate": 2.49261840085036e-07, "logits/chosen": -2.080179452896118, "logits/rejected": -1.813443899154663, "logps/chosen": -338.6604919433594, "logps/rejected": -389.72528076171875, "loss": 0.1733, "rewards/accuracies": 1.0, "rewards/chosen": -0.8504956364631653, "rewards/margins": 2.295844078063965, "rewards/rejected": -3.1463398933410645, "step": 1561 }, { "epoch": 0.18, "learning_rate": 2.4922640840911777e-07, "logits/chosen": -2.216139793395996, "logits/rejected": -2.17146372795105, "logps/chosen": -503.2596740722656, "logps/rejected": -366.78765869140625, "loss": 0.1294, "rewards/accuracies": 1.0, "rewards/chosen": -0.41000109910964966, "rewards/margins": 2.8530595302581787, "rewards/rejected": -3.2630605697631836, "step": 1562 }, { "epoch": 0.18, "learning_rate": 2.4919097673319946e-07, "logits/chosen": -2.6860244274139404, "logits/rejected": -2.766765594482422, "logps/chosen": -200.458740234375, "logps/rejected": -150.7438201904297, "loss": 0.6081, "rewards/accuracies": 0.75, "rewards/chosen": -1.1114439964294434, "rewards/margins": 0.7730604410171509, "rewards/rejected": -1.8845045566558838, "step": 1563 }, { "epoch": 0.18, "learning_rate": 2.491555450572812e-07, "logits/chosen": -2.2474124431610107, "logits/rejected": -2.1233408451080322, "logps/chosen": -210.52536010742188, "logps/rejected": -352.32879638671875, "loss": 0.2111, "rewards/accuracies": 0.875, "rewards/chosen": -0.4565039277076721, "rewards/margins": 2.5767085552215576, "rewards/rejected": -3.033212423324585, "step": 1564 }, { "epoch": 0.18, "learning_rate": 2.491201133813629e-07, "logits/chosen": -2.098848342895508, "logits/rejected": -1.7748303413391113, "logps/chosen": -165.5954132080078, "logps/rejected": -325.849609375, "loss": 0.2319, "rewards/accuracies": 0.875, "rewards/chosen": -0.8838582634925842, "rewards/margins": 1.9544262886047363, "rewards/rejected": -2.838284492492676, "step": 1565 }, { "epoch": 0.18, "learning_rate": 2.4908468170544466e-07, "logits/chosen": -2.384402275085449, "logits/rejected": -2.451805830001831, "logps/chosen": -192.85546875, "logps/rejected": -197.05657958984375, "loss": 0.6436, "rewards/accuracies": 0.75, "rewards/chosen": -1.249718427658081, "rewards/margins": 0.6748732328414917, "rewards/rejected": -1.9245917797088623, "step": 1566 }, { "epoch": 0.18, "learning_rate": 2.490492500295264e-07, "logits/chosen": -1.9718502759933472, "logits/rejected": -2.046419620513916, "logps/chosen": -162.57589721679688, "logps/rejected": -243.692138671875, "loss": 0.2601, "rewards/accuracies": 0.875, "rewards/chosen": -0.03492674231529236, "rewards/margins": 1.9404371976852417, "rewards/rejected": -1.9753642082214355, "step": 1567 }, { "epoch": 0.18, "learning_rate": 2.490138183536081e-07, "logits/chosen": -2.1151649951934814, "logits/rejected": -2.110372304916382, "logps/chosen": -294.4685363769531, "logps/rejected": -276.30523681640625, "loss": 0.4506, "rewards/accuracies": 0.75, "rewards/chosen": -0.1113777682185173, "rewards/margins": 1.663012981414795, "rewards/rejected": -1.774390697479248, "step": 1568 }, { "epoch": 0.18, "learning_rate": 2.4897838667768985e-07, "logits/chosen": -1.8254774808883667, "logits/rejected": -2.0198965072631836, "logps/chosen": -473.74884033203125, "logps/rejected": -358.2305908203125, "loss": 0.7886, "rewards/accuracies": 0.625, "rewards/chosen": -0.9407311677932739, "rewards/margins": 0.2967672348022461, "rewards/rejected": -1.23749840259552, "step": 1569 }, { "epoch": 0.18, "learning_rate": 2.4894295500177155e-07, "logits/chosen": -2.7633907794952393, "logits/rejected": -2.7207157611846924, "logps/chosen": -276.8345031738281, "logps/rejected": -250.54745483398438, "loss": 0.0817, "rewards/accuracies": 1.0, "rewards/chosen": -0.6509556770324707, "rewards/margins": 3.0137288570404053, "rewards/rejected": -3.664684534072876, "step": 1570 }, { "epoch": 0.18, "learning_rate": 2.489075233258533e-07, "logits/chosen": -2.2127819061279297, "logits/rejected": -2.0117361545562744, "logps/chosen": -226.91339111328125, "logps/rejected": -345.9314270019531, "loss": 0.6639, "rewards/accuracies": 0.5, "rewards/chosen": -1.141160011291504, "rewards/margins": 0.25041306018829346, "rewards/rejected": -1.3915729522705078, "step": 1571 }, { "epoch": 0.18, "learning_rate": 2.4887209164993504e-07, "logits/chosen": -2.5478482246398926, "logits/rejected": -2.5732626914978027, "logps/chosen": -382.9358215332031, "logps/rejected": -266.1111145019531, "loss": 0.2062, "rewards/accuracies": 1.0, "rewards/chosen": -0.6796671152114868, "rewards/margins": 2.462045431137085, "rewards/rejected": -3.1417124271392822, "step": 1572 }, { "epoch": 0.18, "learning_rate": 2.488366599740168e-07, "logits/chosen": -2.8376684188842773, "logits/rejected": -2.787668228149414, "logps/chosen": -150.76702880859375, "logps/rejected": -210.85662841796875, "loss": 0.2817, "rewards/accuracies": 0.875, "rewards/chosen": -0.9955230355262756, "rewards/margins": 2.8594465255737305, "rewards/rejected": -3.8549695014953613, "step": 1573 }, { "epoch": 0.18, "learning_rate": 2.488012282980985e-07, "logits/chosen": -1.7406549453735352, "logits/rejected": -2.188563108444214, "logps/chosen": -395.1571960449219, "logps/rejected": -305.38916015625, "loss": 0.4145, "rewards/accuracies": 0.875, "rewards/chosen": -1.3284485340118408, "rewards/margins": 1.1766144037246704, "rewards/rejected": -2.5050628185272217, "step": 1574 }, { "epoch": 0.18, "learning_rate": 2.4876579662218024e-07, "logits/chosen": -2.2789371013641357, "logits/rejected": -2.428443193435669, "logps/chosen": -235.85421752929688, "logps/rejected": -300.2201232910156, "loss": 0.765, "rewards/accuracies": 0.875, "rewards/chosen": -0.7787348031997681, "rewards/margins": 0.5990278720855713, "rewards/rejected": -1.3777626752853394, "step": 1575 }, { "epoch": 0.18, "learning_rate": 2.4873036494626193e-07, "logits/chosen": -2.3398284912109375, "logits/rejected": -2.3455464839935303, "logps/chosen": -317.6192626953125, "logps/rejected": -335.8066101074219, "loss": 0.2223, "rewards/accuracies": 1.0, "rewards/chosen": -0.7553648948669434, "rewards/margins": 1.6736154556274414, "rewards/rejected": -2.4289803504943848, "step": 1576 }, { "epoch": 0.18, "learning_rate": 2.486949332703437e-07, "logits/chosen": -2.6407413482666016, "logits/rejected": -2.3293187618255615, "logps/chosen": -361.5087585449219, "logps/rejected": -410.8941955566406, "loss": 0.3785, "rewards/accuracies": 0.875, "rewards/chosen": -1.2666555643081665, "rewards/margins": 1.2130881547927856, "rewards/rejected": -2.479743719100952, "step": 1577 }, { "epoch": 0.18, "learning_rate": 2.486595015944254e-07, "logits/chosen": -2.0700161457061768, "logits/rejected": -2.2613749504089355, "logps/chosen": -309.083251953125, "logps/rejected": -269.8721923828125, "loss": 0.8522, "rewards/accuracies": 0.625, "rewards/chosen": -0.7533205151557922, "rewards/margins": 0.5553483366966248, "rewards/rejected": -1.308668851852417, "step": 1578 }, { "epoch": 0.18, "learning_rate": 2.486240699185071e-07, "logits/chosen": -2.700705051422119, "logits/rejected": -2.837153911590576, "logps/chosen": -221.9197998046875, "logps/rejected": -246.61788940429688, "loss": 0.3478, "rewards/accuracies": 0.75, "rewards/chosen": -0.051607534289360046, "rewards/margins": 2.577146530151367, "rewards/rejected": -2.628753900527954, "step": 1579 }, { "epoch": 0.18, "learning_rate": 2.4858863824258887e-07, "logits/chosen": -2.209132671356201, "logits/rejected": -2.039478063583374, "logps/chosen": -364.98101806640625, "logps/rejected": -301.44305419921875, "loss": 0.4007, "rewards/accuracies": 0.75, "rewards/chosen": -0.5551789999008179, "rewards/margins": 1.5739115476608276, "rewards/rejected": -2.1290907859802246, "step": 1580 }, { "epoch": 0.18, "learning_rate": 2.4855320656667057e-07, "logits/chosen": -1.939245343208313, "logits/rejected": -2.3522422313690186, "logps/chosen": -312.7124938964844, "logps/rejected": -155.19241333007812, "loss": 0.7534, "rewards/accuracies": 0.75, "rewards/chosen": -0.609461784362793, "rewards/margins": 1.535059928894043, "rewards/rejected": -2.144521713256836, "step": 1581 }, { "epoch": 0.18, "learning_rate": 2.485177748907523e-07, "logits/chosen": -2.3637471199035645, "logits/rejected": -2.4151687622070312, "logps/chosen": -272.49432373046875, "logps/rejected": -357.1744079589844, "loss": 0.4078, "rewards/accuracies": 0.75, "rewards/chosen": -0.6448510885238647, "rewards/margins": 1.9272202253341675, "rewards/rejected": -2.5720713138580322, "step": 1582 }, { "epoch": 0.18, "learning_rate": 2.4848234321483407e-07, "logits/chosen": -2.557943820953369, "logits/rejected": -2.4998703002929688, "logps/chosen": -214.78375244140625, "logps/rejected": -206.8654327392578, "loss": 0.3317, "rewards/accuracies": 0.75, "rewards/chosen": -0.4295068383216858, "rewards/margins": 1.4204964637756348, "rewards/rejected": -1.8500032424926758, "step": 1583 }, { "epoch": 0.18, "learning_rate": 2.484469115389158e-07, "logits/chosen": -2.3122305870056152, "logits/rejected": -2.575441360473633, "logps/chosen": -402.0369567871094, "logps/rejected": -300.9462890625, "loss": 0.5467, "rewards/accuracies": 0.625, "rewards/chosen": -0.622820258140564, "rewards/margins": 0.9086141586303711, "rewards/rejected": -1.5314345359802246, "step": 1584 }, { "epoch": 0.18, "learning_rate": 2.484114798629975e-07, "logits/chosen": -2.761150360107422, "logits/rejected": -2.6542038917541504, "logps/chosen": -278.630126953125, "logps/rejected": -163.2982177734375, "loss": 0.4481, "rewards/accuracies": 0.75, "rewards/chosen": -0.6468818783760071, "rewards/margins": 1.3342843055725098, "rewards/rejected": -1.981166124343872, "step": 1585 }, { "epoch": 0.18, "learning_rate": 2.4837604818707926e-07, "logits/chosen": -2.5496127605438232, "logits/rejected": -2.63279390335083, "logps/chosen": -120.31170654296875, "logps/rejected": -187.0506591796875, "loss": 0.444, "rewards/accuracies": 0.75, "rewards/chosen": -0.3913326859474182, "rewards/margins": 1.3959109783172607, "rewards/rejected": -1.7872437238693237, "step": 1586 }, { "epoch": 0.18, "learning_rate": 2.4834061651116095e-07, "logits/chosen": -1.5563194751739502, "logits/rejected": -2.171633243560791, "logps/chosen": -344.95831298828125, "logps/rejected": -323.2354431152344, "loss": 0.7187, "rewards/accuracies": 0.625, "rewards/chosen": -1.8726016283035278, "rewards/margins": 1.0214619636535645, "rewards/rejected": -2.894063711166382, "step": 1587 }, { "epoch": 0.18, "learning_rate": 2.483051848352427e-07, "logits/chosen": -2.6318774223327637, "logits/rejected": -2.3683998584747314, "logps/chosen": -226.24993896484375, "logps/rejected": -293.6310729980469, "loss": 0.3794, "rewards/accuracies": 1.0, "rewards/chosen": -0.9208877086639404, "rewards/margins": 1.1803008317947388, "rewards/rejected": -2.1011886596679688, "step": 1588 }, { "epoch": 0.18, "learning_rate": 2.482697531593244e-07, "logits/chosen": -2.240182399749756, "logits/rejected": -2.3369972705841064, "logps/chosen": -391.2474670410156, "logps/rejected": -480.9591369628906, "loss": 0.5563, "rewards/accuracies": 0.625, "rewards/chosen": -0.4819144010543823, "rewards/margins": 1.003433346748352, "rewards/rejected": -1.4853477478027344, "step": 1589 }, { "epoch": 0.18, "learning_rate": 2.4823432148340615e-07, "logits/chosen": -2.4501278400421143, "logits/rejected": -2.0264902114868164, "logps/chosen": -110.75640106201172, "logps/rejected": -235.8238067626953, "loss": 0.5417, "rewards/accuracies": 0.75, "rewards/chosen": -0.4876578748226166, "rewards/margins": 1.5461961030960083, "rewards/rejected": -2.0338540077209473, "step": 1590 }, { "epoch": 0.19, "learning_rate": 2.481988898074879e-07, "logits/chosen": -2.4581921100616455, "logits/rejected": -2.372211456298828, "logps/chosen": -163.26498413085938, "logps/rejected": -149.0720672607422, "loss": 0.4934, "rewards/accuracies": 0.625, "rewards/chosen": -0.5460359454154968, "rewards/margins": 1.2152831554412842, "rewards/rejected": -1.7613190412521362, "step": 1591 }, { "epoch": 0.19, "learning_rate": 2.481634581315696e-07, "logits/chosen": -2.2796969413757324, "logits/rejected": -2.4551892280578613, "logps/chosen": -450.71478271484375, "logps/rejected": -369.28179931640625, "loss": 0.0862, "rewards/accuracies": 1.0, "rewards/chosen": -0.3657446503639221, "rewards/margins": 3.0056686401367188, "rewards/rejected": -3.371412992477417, "step": 1592 }, { "epoch": 0.19, "learning_rate": 2.4812802645565134e-07, "logits/chosen": -2.307183027267456, "logits/rejected": -2.3147084712982178, "logps/chosen": -226.97027587890625, "logps/rejected": -216.875, "loss": 0.5567, "rewards/accuracies": 0.75, "rewards/chosen": -0.02982049062848091, "rewards/margins": 1.4486101865768433, "rewards/rejected": -1.4784308671951294, "step": 1593 }, { "epoch": 0.19, "learning_rate": 2.4809259477973304e-07, "logits/chosen": -2.0645434856414795, "logits/rejected": -2.2933509349823, "logps/chosen": -330.84869384765625, "logps/rejected": -300.9118347167969, "loss": 0.4436, "rewards/accuracies": 0.875, "rewards/chosen": -1.295011043548584, "rewards/margins": 1.1833829879760742, "rewards/rejected": -2.478394031524658, "step": 1594 }, { "epoch": 0.19, "learning_rate": 2.4805716310381484e-07, "logits/chosen": -2.3275811672210693, "logits/rejected": -2.3218350410461426, "logps/chosen": -298.8724670410156, "logps/rejected": -232.14610290527344, "loss": 0.3714, "rewards/accuracies": 0.875, "rewards/chosen": -0.4331524968147278, "rewards/margins": 1.6051493883132935, "rewards/rejected": -2.038301944732666, "step": 1595 }, { "epoch": 0.19, "learning_rate": 2.4802173142789653e-07, "logits/chosen": -2.1744062900543213, "logits/rejected": -2.1563806533813477, "logps/chosen": -82.81033325195312, "logps/rejected": -96.90888977050781, "loss": 0.6217, "rewards/accuracies": 0.5, "rewards/chosen": -0.7297319173812866, "rewards/margins": 0.6425690650939941, "rewards/rejected": -1.3723009824752808, "step": 1596 }, { "epoch": 0.19, "learning_rate": 2.479862997519783e-07, "logits/chosen": -2.583070755004883, "logits/rejected": -2.737325668334961, "logps/chosen": -468.8005676269531, "logps/rejected": -343.81292724609375, "loss": 0.3587, "rewards/accuracies": 0.875, "rewards/chosen": 0.1997784972190857, "rewards/margins": 1.5027507543563843, "rewards/rejected": -1.3029723167419434, "step": 1597 }, { "epoch": 0.19, "learning_rate": 2.4795086807606e-07, "logits/chosen": -2.1398887634277344, "logits/rejected": -2.3512613773345947, "logps/chosen": -338.07275390625, "logps/rejected": -294.638671875, "loss": 0.2652, "rewards/accuracies": 0.875, "rewards/chosen": -0.45333316922187805, "rewards/margins": 2.3017525672912598, "rewards/rejected": -2.7550857067108154, "step": 1598 }, { "epoch": 0.19, "learning_rate": 2.479154364001417e-07, "logits/chosen": -2.729024887084961, "logits/rejected": -2.6687569618225098, "logps/chosen": -101.48443603515625, "logps/rejected": -130.80653381347656, "loss": 0.506, "rewards/accuracies": 0.625, "rewards/chosen": -0.724341869354248, "rewards/margins": 0.5748811960220337, "rewards/rejected": -1.2992231845855713, "step": 1599 }, { "epoch": 0.19, "learning_rate": 2.478800047242234e-07, "logits/chosen": -2.228588104248047, "logits/rejected": -2.1324849128723145, "logps/chosen": -277.98114013671875, "logps/rejected": -322.24591064453125, "loss": 0.1962, "rewards/accuracies": 1.0, "rewards/chosen": -0.2622077465057373, "rewards/margins": 2.598194122314453, "rewards/rejected": -2.8604016304016113, "step": 1600 }, { "epoch": 0.19, "learning_rate": 2.4784457304830517e-07, "logits/chosen": -2.204655408859253, "logits/rejected": -2.154161214828491, "logps/chosen": -263.87445068359375, "logps/rejected": -304.49224853515625, "loss": 0.3959, "rewards/accuracies": 0.75, "rewards/chosen": -0.7854706048965454, "rewards/margins": 1.3493664264678955, "rewards/rejected": -2.1348371505737305, "step": 1601 }, { "epoch": 0.19, "learning_rate": 2.478091413723869e-07, "logits/chosen": -2.2698259353637695, "logits/rejected": -2.225752830505371, "logps/chosen": -234.9207305908203, "logps/rejected": -203.18589782714844, "loss": 0.5206, "rewards/accuracies": 0.75, "rewards/chosen": -0.9391934871673584, "rewards/margins": 0.9642164707183838, "rewards/rejected": -1.9034098386764526, "step": 1602 }, { "epoch": 0.19, "learning_rate": 2.477737096964686e-07, "logits/chosen": -2.749161958694458, "logits/rejected": -2.7164313793182373, "logps/chosen": -612.2333984375, "logps/rejected": -378.42803955078125, "loss": 0.5938, "rewards/accuracies": 0.625, "rewards/chosen": -1.110091209411621, "rewards/margins": 1.1412692070007324, "rewards/rejected": -2.2513604164123535, "step": 1603 }, { "epoch": 0.19, "learning_rate": 2.4773827802055036e-07, "logits/chosen": -2.3034439086914062, "logits/rejected": -2.4417028427124023, "logps/chosen": -194.1005859375, "logps/rejected": -203.76385498046875, "loss": 0.7465, "rewards/accuracies": 0.625, "rewards/chosen": -1.3800054788589478, "rewards/margins": 1.4787821769714355, "rewards/rejected": -2.8587875366210938, "step": 1604 }, { "epoch": 0.19, "learning_rate": 2.4770284634463206e-07, "logits/chosen": -1.3297688961029053, "logits/rejected": -1.4826452732086182, "logps/chosen": -408.37188720703125, "logps/rejected": -330.91595458984375, "loss": 0.623, "rewards/accuracies": 0.625, "rewards/chosen": -0.2023015171289444, "rewards/margins": 0.3903564214706421, "rewards/rejected": -0.5926579833030701, "step": 1605 }, { "epoch": 0.19, "learning_rate": 2.476674146687138e-07, "logits/chosen": -2.4535305500030518, "logits/rejected": -2.406219005584717, "logps/chosen": -507.44140625, "logps/rejected": -408.9334411621094, "loss": 1.0871, "rewards/accuracies": 0.5, "rewards/chosen": -1.275648593902588, "rewards/margins": -0.1464366316795349, "rewards/rejected": -1.1292119026184082, "step": 1606 }, { "epoch": 0.19, "learning_rate": 2.4763198299279556e-07, "logits/chosen": -2.068693161010742, "logits/rejected": -2.1565823554992676, "logps/chosen": -494.1838073730469, "logps/rejected": -425.1766357421875, "loss": 0.3269, "rewards/accuracies": 0.75, "rewards/chosen": -0.32804620265960693, "rewards/margins": 2.312948226928711, "rewards/rejected": -2.6409945487976074, "step": 1607 }, { "epoch": 0.19, "learning_rate": 2.475965513168773e-07, "logits/chosen": -2.9662556648254395, "logits/rejected": -2.959144115447998, "logps/chosen": -126.59747314453125, "logps/rejected": -191.7133026123047, "loss": 0.1927, "rewards/accuracies": 1.0, "rewards/chosen": -0.5092255473136902, "rewards/margins": 2.4579687118530273, "rewards/rejected": -2.9671945571899414, "step": 1608 }, { "epoch": 0.19, "learning_rate": 2.47561119640959e-07, "logits/chosen": -1.9998652935028076, "logits/rejected": -2.4058237075805664, "logps/chosen": -337.8708190917969, "logps/rejected": -211.22630310058594, "loss": 0.5098, "rewards/accuracies": 0.625, "rewards/chosen": -0.37040379643440247, "rewards/margins": 0.8850733041763306, "rewards/rejected": -1.2554770708084106, "step": 1609 }, { "epoch": 0.19, "learning_rate": 2.4752568796504075e-07, "logits/chosen": -1.973876953125, "logits/rejected": -1.842827558517456, "logps/chosen": -300.53106689453125, "logps/rejected": -287.74493408203125, "loss": 0.1765, "rewards/accuracies": 0.875, "rewards/chosen": 0.03964511677622795, "rewards/margins": 3.9510371685028076, "rewards/rejected": -3.9113922119140625, "step": 1610 }, { "epoch": 0.19, "learning_rate": 2.4749025628912244e-07, "logits/chosen": -2.706831216812134, "logits/rejected": -2.89486026763916, "logps/chosen": -220.701171875, "logps/rejected": -188.09579467773438, "loss": 0.5161, "rewards/accuracies": 0.625, "rewards/chosen": -0.30700773000717163, "rewards/margins": 2.026865005493164, "rewards/rejected": -2.3338727951049805, "step": 1611 }, { "epoch": 0.19, "learning_rate": 2.474548246132042e-07, "logits/chosen": -2.5341427326202393, "logits/rejected": -2.3706843852996826, "logps/chosen": -345.1861877441406, "logps/rejected": -383.37677001953125, "loss": 0.6979, "rewards/accuracies": 0.625, "rewards/chosen": -1.391100525856018, "rewards/margins": 1.091567039489746, "rewards/rejected": -2.4826674461364746, "step": 1612 }, { "epoch": 0.19, "learning_rate": 2.4741939293728594e-07, "logits/chosen": -2.5054895877838135, "logits/rejected": -2.702303409576416, "logps/chosen": -414.1036376953125, "logps/rejected": -323.4469299316406, "loss": 0.352, "rewards/accuracies": 0.875, "rewards/chosen": -0.33217793703079224, "rewards/margins": 1.9536728858947754, "rewards/rejected": -2.285851001739502, "step": 1613 }, { "epoch": 0.19, "learning_rate": 2.4738396126136764e-07, "logits/chosen": -2.337235927581787, "logits/rejected": -2.364502191543579, "logps/chosen": -167.00189208984375, "logps/rejected": -171.8983612060547, "loss": 0.3064, "rewards/accuracies": 0.875, "rewards/chosen": -0.6457769870758057, "rewards/margins": 1.5658310651779175, "rewards/rejected": -2.2116081714630127, "step": 1614 }, { "epoch": 0.19, "learning_rate": 2.473485295854494e-07, "logits/chosen": -2.6664135456085205, "logits/rejected": -2.4973599910736084, "logps/chosen": -110.15576934814453, "logps/rejected": -206.4669189453125, "loss": 0.4191, "rewards/accuracies": 0.75, "rewards/chosen": -0.48772403597831726, "rewards/margins": 1.1777324676513672, "rewards/rejected": -1.6654565334320068, "step": 1615 }, { "epoch": 0.19, "learning_rate": 2.473130979095311e-07, "logits/chosen": -2.620840549468994, "logits/rejected": -2.348323345184326, "logps/chosen": -270.09375, "logps/rejected": -467.27288818359375, "loss": 0.4726, "rewards/accuracies": 0.625, "rewards/chosen": -0.2675047814846039, "rewards/margins": 1.3290588855743408, "rewards/rejected": -1.5965638160705566, "step": 1616 }, { "epoch": 0.19, "learning_rate": 2.4727766623361283e-07, "logits/chosen": -2.6567177772521973, "logits/rejected": -2.418938398361206, "logps/chosen": -201.27809143066406, "logps/rejected": -192.06259155273438, "loss": 1.1171, "rewards/accuracies": 0.75, "rewards/chosen": -1.2269737720489502, "rewards/margins": 0.3122572898864746, "rewards/rejected": -1.5392310619354248, "step": 1617 }, { "epoch": 0.19, "learning_rate": 2.472422345576945e-07, "logits/chosen": -2.4071056842803955, "logits/rejected": -2.4194586277008057, "logps/chosen": -298.5013122558594, "logps/rejected": -293.9930419921875, "loss": 0.1454, "rewards/accuracies": 1.0, "rewards/chosen": 0.10188588500022888, "rewards/margins": 3.5577855110168457, "rewards/rejected": -3.455899238586426, "step": 1618 }, { "epoch": 0.19, "learning_rate": 2.4720680288177633e-07, "logits/chosen": -1.9060449600219727, "logits/rejected": -2.1322808265686035, "logps/chosen": -426.47613525390625, "logps/rejected": -470.16729736328125, "loss": 0.3165, "rewards/accuracies": 0.875, "rewards/chosen": -0.46652740240097046, "rewards/margins": 2.090425968170166, "rewards/rejected": -2.5569534301757812, "step": 1619 }, { "epoch": 0.19, "learning_rate": 2.47171371205858e-07, "logits/chosen": -1.8772386312484741, "logits/rejected": -1.9901789426803589, "logps/chosen": -371.2685546875, "logps/rejected": -292.62823486328125, "loss": 0.1938, "rewards/accuracies": 1.0, "rewards/chosen": -0.6496948003768921, "rewards/margins": 2.4084155559539795, "rewards/rejected": -3.058109998703003, "step": 1620 }, { "epoch": 0.19, "learning_rate": 2.4713593952993977e-07, "logits/chosen": -2.1469171047210693, "logits/rejected": -2.1494839191436768, "logps/chosen": -733.3466186523438, "logps/rejected": -340.6827392578125, "loss": 0.886, "rewards/accuracies": 0.625, "rewards/chosen": -1.7093323469161987, "rewards/margins": -0.022974446415901184, "rewards/rejected": -1.686357855796814, "step": 1621 }, { "epoch": 0.19, "learning_rate": 2.4710050785402147e-07, "logits/chosen": -2.586357831954956, "logits/rejected": -2.54103946685791, "logps/chosen": -172.69334411621094, "logps/rejected": -233.7097930908203, "loss": 0.464, "rewards/accuracies": 0.625, "rewards/chosen": -0.8136546611785889, "rewards/margins": 1.3482310771942139, "rewards/rejected": -2.1618854999542236, "step": 1622 }, { "epoch": 0.19, "learning_rate": 2.470650761781032e-07, "logits/chosen": -1.8564023971557617, "logits/rejected": -1.7837629318237305, "logps/chosen": -319.5545349121094, "logps/rejected": -362.03765869140625, "loss": 0.4426, "rewards/accuracies": 0.75, "rewards/chosen": -0.6021819710731506, "rewards/margins": 1.3160587549209595, "rewards/rejected": -1.9182407855987549, "step": 1623 }, { "epoch": 0.19, "learning_rate": 2.4702964450218496e-07, "logits/chosen": -2.212630033493042, "logits/rejected": -2.3848471641540527, "logps/chosen": -335.470458984375, "logps/rejected": -296.99810791015625, "loss": 0.1804, "rewards/accuracies": 1.0, "rewards/chosen": -0.1617671549320221, "rewards/margins": 2.6009302139282227, "rewards/rejected": -2.762697219848633, "step": 1624 }, { "epoch": 0.19, "learning_rate": 2.4699421282626666e-07, "logits/chosen": -2.035496711730957, "logits/rejected": -1.8638505935668945, "logps/chosen": -128.8785858154297, "logps/rejected": -226.30331420898438, "loss": 1.1301, "rewards/accuracies": 0.75, "rewards/chosen": -1.4559930562973022, "rewards/margins": 1.1078910827636719, "rewards/rejected": -2.5638840198516846, "step": 1625 }, { "epoch": 0.19, "learning_rate": 2.469587811503484e-07, "logits/chosen": -2.193399667739868, "logits/rejected": -2.013120174407959, "logps/chosen": -139.99827575683594, "logps/rejected": -288.7445983886719, "loss": 0.4829, "rewards/accuracies": 0.625, "rewards/chosen": -0.2497853934764862, "rewards/margins": 1.8501697778701782, "rewards/rejected": -2.0999550819396973, "step": 1626 }, { "epoch": 0.19, "learning_rate": 2.469233494744301e-07, "logits/chosen": -2.6202967166900635, "logits/rejected": -2.762650966644287, "logps/chosen": -255.84738159179688, "logps/rejected": -134.32598876953125, "loss": 0.76, "rewards/accuracies": 0.625, "rewards/chosen": -1.0226532220840454, "rewards/margins": 0.8043995499610901, "rewards/rejected": -1.8270527124404907, "step": 1627 }, { "epoch": 0.19, "learning_rate": 2.4688791779851185e-07, "logits/chosen": -2.3699445724487305, "logits/rejected": -2.646303415298462, "logps/chosen": -317.4638671875, "logps/rejected": -190.20298767089844, "loss": 0.388, "rewards/accuracies": 0.75, "rewards/chosen": -0.8943278193473816, "rewards/margins": 1.3383461236953735, "rewards/rejected": -2.2326736450195312, "step": 1628 }, { "epoch": 0.19, "learning_rate": 2.4685248612259355e-07, "logits/chosen": -2.3288087844848633, "logits/rejected": -2.0339713096618652, "logps/chosen": -215.6480712890625, "logps/rejected": -349.1902160644531, "loss": 0.2556, "rewards/accuracies": 1.0, "rewards/chosen": -0.5649926066398621, "rewards/margins": 2.178818464279175, "rewards/rejected": -2.7438108921051025, "step": 1629 }, { "epoch": 0.19, "learning_rate": 2.4681705444667535e-07, "logits/chosen": -2.1724677085876465, "logits/rejected": -2.3439793586730957, "logps/chosen": -166.39064025878906, "logps/rejected": -233.6205596923828, "loss": 0.7274, "rewards/accuracies": 0.625, "rewards/chosen": -1.1248620748519897, "rewards/margins": 1.6157026290893555, "rewards/rejected": -2.7405645847320557, "step": 1630 }, { "epoch": 0.19, "learning_rate": 2.4678162277075705e-07, "logits/chosen": -1.8891522884368896, "logits/rejected": -1.6884751319885254, "logps/chosen": -329.92291259765625, "logps/rejected": -384.15570068359375, "loss": 0.2456, "rewards/accuracies": 0.875, "rewards/chosen": -0.531204879283905, "rewards/margins": 2.9794764518737793, "rewards/rejected": -3.510681629180908, "step": 1631 }, { "epoch": 0.19, "learning_rate": 2.467461910948388e-07, "logits/chosen": -2.124361753463745, "logits/rejected": -2.388129234313965, "logps/chosen": -218.65716552734375, "logps/rejected": -177.06634521484375, "loss": 0.7028, "rewards/accuracies": 0.625, "rewards/chosen": -1.683070182800293, "rewards/margins": 0.6870852708816528, "rewards/rejected": -2.3701555728912354, "step": 1632 }, { "epoch": 0.19, "learning_rate": 2.467107594189205e-07, "logits/chosen": -2.3786916732788086, "logits/rejected": -2.60400390625, "logps/chosen": -225.3875274658203, "logps/rejected": -274.54571533203125, "loss": 0.4018, "rewards/accuracies": 0.75, "rewards/chosen": -0.6577668190002441, "rewards/margins": 1.3098082542419434, "rewards/rejected": -1.9675750732421875, "step": 1633 }, { "epoch": 0.19, "learning_rate": 2.4667532774300224e-07, "logits/chosen": -2.202277183532715, "logits/rejected": -2.0530648231506348, "logps/chosen": -306.07611083984375, "logps/rejected": -276.73834228515625, "loss": 0.403, "rewards/accuracies": 0.875, "rewards/chosen": -1.0460026264190674, "rewards/margins": 1.4301319122314453, "rewards/rejected": -2.4761343002319336, "step": 1634 }, { "epoch": 0.19, "learning_rate": 2.46639896067084e-07, "logits/chosen": -2.3885772228240967, "logits/rejected": -2.242403984069824, "logps/chosen": -177.55831909179688, "logps/rejected": -174.19418334960938, "loss": 0.5828, "rewards/accuracies": 0.625, "rewards/chosen": -0.6558078527450562, "rewards/margins": 0.270005464553833, "rewards/rejected": -0.9258133769035339, "step": 1635 }, { "epoch": 0.19, "learning_rate": 2.466044643911657e-07, "logits/chosen": -2.704501152038574, "logits/rejected": -2.5327951908111572, "logps/chosen": -454.31817626953125, "logps/rejected": -354.42816162109375, "loss": 0.5104, "rewards/accuracies": 0.75, "rewards/chosen": -1.4679523706436157, "rewards/margins": 1.1737794876098633, "rewards/rejected": -2.6417319774627686, "step": 1636 }, { "epoch": 0.19, "learning_rate": 2.4656903271524743e-07, "logits/chosen": -2.1108548641204834, "logits/rejected": -2.061577796936035, "logps/chosen": -121.50480651855469, "logps/rejected": -176.50103759765625, "loss": 0.471, "rewards/accuracies": 0.75, "rewards/chosen": -0.8984793424606323, "rewards/margins": 1.512269139289856, "rewards/rejected": -2.4107484817504883, "step": 1637 }, { "epoch": 0.19, "learning_rate": 2.4653360103932913e-07, "logits/chosen": -2.217385768890381, "logits/rejected": -2.501033306121826, "logps/chosen": -197.13693237304688, "logps/rejected": -137.0806121826172, "loss": 0.6157, "rewards/accuracies": 0.625, "rewards/chosen": -1.1189005374908447, "rewards/margins": 0.5807891488075256, "rewards/rejected": -1.6996896266937256, "step": 1638 }, { "epoch": 0.19, "learning_rate": 2.464981693634109e-07, "logits/chosen": -2.185270071029663, "logits/rejected": -2.253105401992798, "logps/chosen": -363.3568115234375, "logps/rejected": -346.2731018066406, "loss": 0.4347, "rewards/accuracies": 0.75, "rewards/chosen": -0.8733185529708862, "rewards/margins": 1.4368016719818115, "rewards/rejected": -2.310120105743408, "step": 1639 }, { "epoch": 0.19, "learning_rate": 2.4646273768749257e-07, "logits/chosen": -2.6977291107177734, "logits/rejected": -2.575279712677002, "logps/chosen": -139.70884704589844, "logps/rejected": -201.92620849609375, "loss": 0.4039, "rewards/accuracies": 0.875, "rewards/chosen": -0.6454865336418152, "rewards/margins": 1.1370195150375366, "rewards/rejected": -1.782505989074707, "step": 1640 }, { "epoch": 0.19, "learning_rate": 2.464273060115743e-07, "logits/chosen": -2.3424806594848633, "logits/rejected": -2.342161178588867, "logps/chosen": -302.66943359375, "logps/rejected": -252.23831176757812, "loss": 0.5064, "rewards/accuracies": 0.75, "rewards/chosen": -0.5033154487609863, "rewards/margins": 1.184190034866333, "rewards/rejected": -1.6875056028366089, "step": 1641 }, { "epoch": 0.19, "learning_rate": 2.4639187433565607e-07, "logits/chosen": -2.7281360626220703, "logits/rejected": -2.693927526473999, "logps/chosen": -222.90823364257812, "logps/rejected": -261.49078369140625, "loss": 0.5569, "rewards/accuracies": 0.75, "rewards/chosen": -0.5387765765190125, "rewards/margins": 1.818695306777954, "rewards/rejected": -2.3574721813201904, "step": 1642 }, { "epoch": 0.19, "learning_rate": 2.463564426597378e-07, "logits/chosen": -1.4799219369888306, "logits/rejected": -1.6706809997558594, "logps/chosen": -382.2003479003906, "logps/rejected": -265.8782653808594, "loss": 0.6896, "rewards/accuracies": 0.625, "rewards/chosen": -1.490817904472351, "rewards/margins": 0.4824899435043335, "rewards/rejected": -1.9733078479766846, "step": 1643 }, { "epoch": 0.19, "learning_rate": 2.463210109838195e-07, "logits/chosen": -2.345161199569702, "logits/rejected": -2.1760034561157227, "logps/chosen": -153.2363739013672, "logps/rejected": -314.918701171875, "loss": 0.5383, "rewards/accuracies": 0.75, "rewards/chosen": -1.2774553298950195, "rewards/margins": 1.6682074069976807, "rewards/rejected": -2.9456629753112793, "step": 1644 }, { "epoch": 0.19, "learning_rate": 2.4628557930790126e-07, "logits/chosen": -2.497775077819824, "logits/rejected": -2.461188554763794, "logps/chosen": -250.63442993164062, "logps/rejected": -219.34765625, "loss": 0.3639, "rewards/accuracies": 0.75, "rewards/chosen": -0.03010420873761177, "rewards/margins": 2.285468339920044, "rewards/rejected": -2.315572500228882, "step": 1645 }, { "epoch": 0.19, "learning_rate": 2.46250147631983e-07, "logits/chosen": -2.8053550720214844, "logits/rejected": -2.863651990890503, "logps/chosen": -216.71046447753906, "logps/rejected": -241.51486206054688, "loss": 0.4356, "rewards/accuracies": 0.75, "rewards/chosen": -0.599645733833313, "rewards/margins": 1.560119867324829, "rewards/rejected": -2.1597657203674316, "step": 1646 }, { "epoch": 0.19, "learning_rate": 2.462147159560647e-07, "logits/chosen": -2.4533729553222656, "logits/rejected": -2.4672818183898926, "logps/chosen": -146.9352569580078, "logps/rejected": -213.19842529296875, "loss": 0.1786, "rewards/accuracies": 1.0, "rewards/chosen": -0.36431729793548584, "rewards/margins": 2.724088191986084, "rewards/rejected": -3.0884053707122803, "step": 1647 }, { "epoch": 0.19, "learning_rate": 2.4617928428014645e-07, "logits/chosen": -2.381521701812744, "logits/rejected": -2.6353683471679688, "logps/chosen": -393.6332702636719, "logps/rejected": -341.0672912597656, "loss": 0.8122, "rewards/accuracies": 0.75, "rewards/chosen": -1.5208781957626343, "rewards/margins": 0.5866612195968628, "rewards/rejected": -2.107539415359497, "step": 1648 }, { "epoch": 0.19, "learning_rate": 2.4614385260422815e-07, "logits/chosen": -1.7442585229873657, "logits/rejected": -1.8897044658660889, "logps/chosen": -301.6856384277344, "logps/rejected": -255.666259765625, "loss": 0.6217, "rewards/accuracies": 0.625, "rewards/chosen": -0.47638359665870667, "rewards/margins": 0.3181024491786957, "rewards/rejected": -0.7944860458374023, "step": 1649 }, { "epoch": 0.19, "learning_rate": 2.461084209283099e-07, "logits/chosen": -2.0834336280822754, "logits/rejected": -1.9874670505523682, "logps/chosen": -431.22625732421875, "logps/rejected": -451.61602783203125, "loss": 0.3955, "rewards/accuracies": 0.875, "rewards/chosen": -0.8054955005645752, "rewards/margins": 1.2469618320465088, "rewards/rejected": -2.052457332611084, "step": 1650 }, { "epoch": 0.19, "learning_rate": 2.460729892523916e-07, "logits/chosen": -2.6949408054351807, "logits/rejected": -2.643350839614868, "logps/chosen": -232.38587951660156, "logps/rejected": -181.9983673095703, "loss": 0.4744, "rewards/accuracies": 0.75, "rewards/chosen": -0.41399484872817993, "rewards/margins": 2.2576088905334473, "rewards/rejected": -2.6716039180755615, "step": 1651 }, { "epoch": 0.19, "learning_rate": 2.4603755757647334e-07, "logits/chosen": -1.906083106994629, "logits/rejected": -2.0011038780212402, "logps/chosen": -134.99073791503906, "logps/rejected": -179.1443328857422, "loss": 0.6521, "rewards/accuracies": 0.625, "rewards/chosen": -0.9401741027832031, "rewards/margins": 1.3125808238983154, "rewards/rejected": -2.2527549266815186, "step": 1652 }, { "epoch": 0.19, "learning_rate": 2.460021259005551e-07, "logits/chosen": -2.2177886962890625, "logits/rejected": -2.2340290546417236, "logps/chosen": -216.84971618652344, "logps/rejected": -298.9881896972656, "loss": 0.5447, "rewards/accuracies": 0.625, "rewards/chosen": -0.7549578547477722, "rewards/margins": 1.1231160163879395, "rewards/rejected": -1.8780736923217773, "step": 1653 }, { "epoch": 0.19, "learning_rate": 2.4596669422463684e-07, "logits/chosen": -2.5723934173583984, "logits/rejected": -2.2472755908966064, "logps/chosen": -236.8076171875, "logps/rejected": -295.8975830078125, "loss": 0.5187, "rewards/accuracies": 0.625, "rewards/chosen": -0.6546620726585388, "rewards/margins": 1.9492071866989136, "rewards/rejected": -2.6038694381713867, "step": 1654 }, { "epoch": 0.19, "learning_rate": 2.4593126254871854e-07, "logits/chosen": -2.250518321990967, "logits/rejected": -2.2545807361602783, "logps/chosen": -331.33544921875, "logps/rejected": -317.98345947265625, "loss": 0.2505, "rewards/accuracies": 1.0, "rewards/chosen": -0.6543898582458496, "rewards/margins": 1.596533179283142, "rewards/rejected": -2.2509231567382812, "step": 1655 }, { "epoch": 0.19, "learning_rate": 2.458958308728003e-07, "logits/chosen": -2.544801712036133, "logits/rejected": -2.5372486114501953, "logps/chosen": -276.8548889160156, "logps/rejected": -282.4296875, "loss": 0.0917, "rewards/accuracies": 1.0, "rewards/chosen": -0.291813462972641, "rewards/margins": 3.296168804168701, "rewards/rejected": -3.587982177734375, "step": 1656 }, { "epoch": 0.19, "learning_rate": 2.4586039919688203e-07, "logits/chosen": -2.466866970062256, "logits/rejected": -2.5726068019866943, "logps/chosen": -362.256103515625, "logps/rejected": -311.67041015625, "loss": 0.5525, "rewards/accuracies": 0.625, "rewards/chosen": -0.6920166015625, "rewards/margins": 1.1808189153671265, "rewards/rejected": -1.872835397720337, "step": 1657 }, { "epoch": 0.19, "learning_rate": 2.4582496752096373e-07, "logits/chosen": -2.522550582885742, "logits/rejected": -2.6290950775146484, "logps/chosen": -272.4031677246094, "logps/rejected": -217.59832763671875, "loss": 0.3586, "rewards/accuracies": 0.75, "rewards/chosen": -0.5821040868759155, "rewards/margins": 1.3093690872192383, "rewards/rejected": -1.8914731740951538, "step": 1658 }, { "epoch": 0.19, "learning_rate": 2.457895358450455e-07, "logits/chosen": -2.2223665714263916, "logits/rejected": -2.523585319519043, "logps/chosen": -392.3723449707031, "logps/rejected": -327.9945068359375, "loss": 0.3464, "rewards/accuracies": 0.875, "rewards/chosen": -0.03784913569688797, "rewards/margins": 1.205685019493103, "rewards/rejected": -1.2435342073440552, "step": 1659 }, { "epoch": 0.19, "learning_rate": 2.4575410416912717e-07, "logits/chosen": -2.2344162464141846, "logits/rejected": -2.3971595764160156, "logps/chosen": -261.149169921875, "logps/rejected": -270.95501708984375, "loss": 0.2615, "rewards/accuracies": 0.875, "rewards/chosen": -0.02218186855316162, "rewards/margins": 1.8940696716308594, "rewards/rejected": -1.9162514209747314, "step": 1660 }, { "epoch": 0.19, "learning_rate": 2.457186724932089e-07, "logits/chosen": -2.402071714401245, "logits/rejected": -2.302401542663574, "logps/chosen": -365.9566345214844, "logps/rejected": -309.6323547363281, "loss": 0.2403, "rewards/accuracies": 0.875, "rewards/chosen": -0.4284933805465698, "rewards/margins": 1.7880432605743408, "rewards/rejected": -2.2165367603302, "step": 1661 }, { "epoch": 0.19, "learning_rate": 2.456832408172906e-07, "logits/chosen": -2.629833698272705, "logits/rejected": -2.5907821655273438, "logps/chosen": -170.3055419921875, "logps/rejected": -206.78289794921875, "loss": 0.4387, "rewards/accuracies": 0.75, "rewards/chosen": -0.15459054708480835, "rewards/margins": 1.1244741678237915, "rewards/rejected": -1.279064655303955, "step": 1662 }, { "epoch": 0.19, "learning_rate": 2.4564780914137237e-07, "logits/chosen": -1.9750040769577026, "logits/rejected": -2.087946891784668, "logps/chosen": -222.98129272460938, "logps/rejected": -227.36981201171875, "loss": 0.4206, "rewards/accuracies": 0.875, "rewards/chosen": -1.0134376287460327, "rewards/margins": 0.8043762445449829, "rewards/rejected": -1.8178138732910156, "step": 1663 }, { "epoch": 0.19, "learning_rate": 2.456123774654541e-07, "logits/chosen": -2.5260679721832275, "logits/rejected": -2.8178257942199707, "logps/chosen": -180.5194091796875, "logps/rejected": -196.1873016357422, "loss": 0.5166, "rewards/accuracies": 0.75, "rewards/chosen": -0.48911577463150024, "rewards/margins": 2.1488428115844727, "rewards/rejected": -2.637958288192749, "step": 1664 }, { "epoch": 0.19, "learning_rate": 2.4557694578953586e-07, "logits/chosen": -2.1998372077941895, "logits/rejected": -2.041743755340576, "logps/chosen": -171.25265502929688, "logps/rejected": -278.51043701171875, "loss": 0.6175, "rewards/accuracies": 0.75, "rewards/chosen": -0.6311626434326172, "rewards/margins": 1.034217357635498, "rewards/rejected": -1.6653801202774048, "step": 1665 }, { "epoch": 0.19, "learning_rate": 2.4554151411361756e-07, "logits/chosen": -1.9722144603729248, "logits/rejected": -2.081778049468994, "logps/chosen": -252.91864013671875, "logps/rejected": -268.276123046875, "loss": 0.4252, "rewards/accuracies": 0.75, "rewards/chosen": -1.319704532623291, "rewards/margins": 1.5936336517333984, "rewards/rejected": -2.9133381843566895, "step": 1666 }, { "epoch": 0.19, "learning_rate": 2.455060824376993e-07, "logits/chosen": -2.25974702835083, "logits/rejected": -2.577329635620117, "logps/chosen": -366.6850280761719, "logps/rejected": -280.7003173828125, "loss": 0.3052, "rewards/accuracies": 0.875, "rewards/chosen": -0.6532570719718933, "rewards/margins": 2.4932761192321777, "rewards/rejected": -3.146533250808716, "step": 1667 }, { "epoch": 0.19, "learning_rate": 2.45470650761781e-07, "logits/chosen": -2.4962587356567383, "logits/rejected": -2.619797468185425, "logps/chosen": -374.7917785644531, "logps/rejected": -343.32672119140625, "loss": 0.8062, "rewards/accuracies": 0.625, "rewards/chosen": -1.2500332593917847, "rewards/margins": 0.9725916981697083, "rewards/rejected": -2.2226247787475586, "step": 1668 }, { "epoch": 0.19, "learning_rate": 2.4543521908586275e-07, "logits/chosen": -2.8976550102233887, "logits/rejected": -2.7989583015441895, "logps/chosen": -79.97217559814453, "logps/rejected": -212.48098754882812, "loss": 0.1916, "rewards/accuracies": 0.875, "rewards/chosen": -0.1216549202799797, "rewards/margins": 3.2446837425231934, "rewards/rejected": -3.3663384914398193, "step": 1669 }, { "epoch": 0.19, "learning_rate": 2.453997874099445e-07, "logits/chosen": -2.394810914993286, "logits/rejected": -2.648609161376953, "logps/chosen": -387.6202392578125, "logps/rejected": -223.49432373046875, "loss": 0.6886, "rewards/accuracies": 0.875, "rewards/chosen": -0.9422688484191895, "rewards/margins": 0.30546462535858154, "rewards/rejected": -1.247733473777771, "step": 1670 }, { "epoch": 0.19, "learning_rate": 2.453643557340262e-07, "logits/chosen": -1.7729119062423706, "logits/rejected": -1.8861068487167358, "logps/chosen": -343.9400939941406, "logps/rejected": -301.35186767578125, "loss": 0.3627, "rewards/accuracies": 0.875, "rewards/chosen": -0.48091018199920654, "rewards/margins": 1.2555638551712036, "rewards/rejected": -1.7364740371704102, "step": 1671 }, { "epoch": 0.19, "learning_rate": 2.4532892405810794e-07, "logits/chosen": -2.526780128479004, "logits/rejected": -2.3439173698425293, "logps/chosen": -110.61292266845703, "logps/rejected": -217.36231994628906, "loss": 0.4063, "rewards/accuracies": 0.75, "rewards/chosen": -0.654490053653717, "rewards/margins": 1.2306182384490967, "rewards/rejected": -1.885108232498169, "step": 1672 }, { "epoch": 0.19, "learning_rate": 2.4529349238218964e-07, "logits/chosen": -2.3020904064178467, "logits/rejected": -2.290064811706543, "logps/chosen": -234.40968322753906, "logps/rejected": -259.13128662109375, "loss": 0.3523, "rewards/accuracies": 0.75, "rewards/chosen": -0.46267086267471313, "rewards/margins": 2.5941081047058105, "rewards/rejected": -3.056779384613037, "step": 1673 }, { "epoch": 0.19, "learning_rate": 2.452580607062714e-07, "logits/chosen": -2.213320732116699, "logits/rejected": -2.45706844329834, "logps/chosen": -439.188232421875, "logps/rejected": -267.59515380859375, "loss": 0.2236, "rewards/accuracies": 0.875, "rewards/chosen": -0.41147154569625854, "rewards/margins": 2.104182481765747, "rewards/rejected": -2.5156538486480713, "step": 1674 }, { "epoch": 0.19, "learning_rate": 2.4522262903035314e-07, "logits/chosen": -1.7336218357086182, "logits/rejected": -2.2416436672210693, "logps/chosen": -522.2005004882812, "logps/rejected": -300.19049072265625, "loss": 0.4166, "rewards/accuracies": 0.75, "rewards/chosen": -0.23829028010368347, "rewards/margins": 1.2266545295715332, "rewards/rejected": -1.464944839477539, "step": 1675 }, { "epoch": 0.19, "learning_rate": 2.4518719735443483e-07, "logits/chosen": -2.3902876377105713, "logits/rejected": -2.3526713848114014, "logps/chosen": -266.8745422363281, "logps/rejected": -222.37503051757812, "loss": 0.5636, "rewards/accuracies": 0.75, "rewards/chosen": -0.6263242959976196, "rewards/margins": 1.5320783853530884, "rewards/rejected": -2.158402681350708, "step": 1676 }, { "epoch": 0.2, "learning_rate": 2.451517656785166e-07, "logits/chosen": -2.2450807094573975, "logits/rejected": -1.9490177631378174, "logps/chosen": -215.37759399414062, "logps/rejected": -284.7712707519531, "loss": 1.0238, "rewards/accuracies": 0.75, "rewards/chosen": -1.4993760585784912, "rewards/margins": -0.12135636806488037, "rewards/rejected": -1.3780198097229004, "step": 1677 }, { "epoch": 0.2, "learning_rate": 2.4511633400259833e-07, "logits/chosen": -1.7775871753692627, "logits/rejected": -1.8299651145935059, "logps/chosen": -213.48779296875, "logps/rejected": -259.5183410644531, "loss": 0.6161, "rewards/accuracies": 0.625, "rewards/chosen": -0.5519853830337524, "rewards/margins": 0.9574552178382874, "rewards/rejected": -1.509440541267395, "step": 1678 }, { "epoch": 0.2, "learning_rate": 2.4508090232668e-07, "logits/chosen": -2.671874761581421, "logits/rejected": -2.7615647315979004, "logps/chosen": -232.97877502441406, "logps/rejected": -269.1032409667969, "loss": 0.5784, "rewards/accuracies": 0.625, "rewards/chosen": -0.9884816408157349, "rewards/margins": 2.013230562210083, "rewards/rejected": -3.0017120838165283, "step": 1679 }, { "epoch": 0.2, "learning_rate": 2.450454706507618e-07, "logits/chosen": -2.2875802516937256, "logits/rejected": -2.2927958965301514, "logps/chosen": -170.98037719726562, "logps/rejected": -205.88623046875, "loss": 0.3948, "rewards/accuracies": 0.875, "rewards/chosen": -0.8095455765724182, "rewards/margins": 2.273000717163086, "rewards/rejected": -3.0825462341308594, "step": 1680 }, { "epoch": 0.2, "learning_rate": 2.450100389748435e-07, "logits/chosen": -2.0003745555877686, "logits/rejected": -1.6723541021347046, "logps/chosen": -156.29800415039062, "logps/rejected": -292.14337158203125, "loss": 0.6547, "rewards/accuracies": 0.625, "rewards/chosen": -0.8538736701011658, "rewards/margins": 1.4568774700164795, "rewards/rejected": -2.31075119972229, "step": 1681 }, { "epoch": 0.2, "learning_rate": 2.449746072989252e-07, "logits/chosen": -1.869611382484436, "logits/rejected": -1.941335916519165, "logps/chosen": -413.1986083984375, "logps/rejected": -372.622314453125, "loss": 0.4483, "rewards/accuracies": 0.625, "rewards/chosen": -0.547063946723938, "rewards/margins": 1.2055723667144775, "rewards/rejected": -1.752636194229126, "step": 1682 }, { "epoch": 0.2, "learning_rate": 2.4493917562300697e-07, "logits/chosen": -2.2228286266326904, "logits/rejected": -2.206742525100708, "logps/chosen": -199.76353454589844, "logps/rejected": -235.65126037597656, "loss": 0.4029, "rewards/accuracies": 0.625, "rewards/chosen": -0.7561392784118652, "rewards/margins": 1.791888952255249, "rewards/rejected": -2.5480282306671143, "step": 1683 }, { "epoch": 0.2, "learning_rate": 2.4490374394708866e-07, "logits/chosen": -1.8739206790924072, "logits/rejected": -2.337437152862549, "logps/chosen": -295.287841796875, "logps/rejected": -192.88650512695312, "loss": 0.3685, "rewards/accuracies": 0.625, "rewards/chosen": -0.3055373430252075, "rewards/margins": 1.8031154870986938, "rewards/rejected": -2.1086528301239014, "step": 1684 }, { "epoch": 0.2, "learning_rate": 2.448683122711704e-07, "logits/chosen": -2.243102550506592, "logits/rejected": -2.0844855308532715, "logps/chosen": -127.83473205566406, "logps/rejected": -136.898681640625, "loss": 0.7257, "rewards/accuracies": 0.5, "rewards/chosen": -1.3347032070159912, "rewards/margins": 0.23142650723457336, "rewards/rejected": -1.5661296844482422, "step": 1685 }, { "epoch": 0.2, "learning_rate": 2.4483288059525216e-07, "logits/chosen": -2.5224738121032715, "logits/rejected": -2.3596463203430176, "logps/chosen": -229.8944549560547, "logps/rejected": -501.04241943359375, "loss": 0.4177, "rewards/accuracies": 0.875, "rewards/chosen": -0.7905840277671814, "rewards/margins": 1.1842104196548462, "rewards/rejected": -1.9747943878173828, "step": 1686 }, { "epoch": 0.2, "learning_rate": 2.4479744891933386e-07, "logits/chosen": -2.397657632827759, "logits/rejected": -2.67763090133667, "logps/chosen": -332.201904296875, "logps/rejected": -306.04638671875, "loss": 0.2177, "rewards/accuracies": 0.875, "rewards/chosen": -0.1924385130405426, "rewards/margins": 2.966355323791504, "rewards/rejected": -3.1587939262390137, "step": 1687 }, { "epoch": 0.2, "learning_rate": 2.447620172434156e-07, "logits/chosen": -2.4252774715423584, "logits/rejected": -2.042975664138794, "logps/chosen": -213.68051147460938, "logps/rejected": -332.4058532714844, "loss": 0.1683, "rewards/accuracies": 1.0, "rewards/chosen": -0.5314593315124512, "rewards/margins": 2.5401315689086914, "rewards/rejected": -3.0715906620025635, "step": 1688 }, { "epoch": 0.2, "learning_rate": 2.4472658556749735e-07, "logits/chosen": -2.3236591815948486, "logits/rejected": -2.1976144313812256, "logps/chosen": -224.82814025878906, "logps/rejected": -285.0279541015625, "loss": 0.1829, "rewards/accuracies": 1.0, "rewards/chosen": -0.5715979337692261, "rewards/margins": 2.3901619911193848, "rewards/rejected": -2.9617598056793213, "step": 1689 }, { "epoch": 0.2, "learning_rate": 2.4469115389157905e-07, "logits/chosen": -2.4370248317718506, "logits/rejected": -2.330864667892456, "logps/chosen": -195.62496948242188, "logps/rejected": -172.05862426757812, "loss": 0.1027, "rewards/accuracies": 1.0, "rewards/chosen": -0.6914937496185303, "rewards/margins": 3.0850682258605957, "rewards/rejected": -3.776562213897705, "step": 1690 }, { "epoch": 0.2, "learning_rate": 2.446557222156608e-07, "logits/chosen": -2.6006290912628174, "logits/rejected": -2.378993511199951, "logps/chosen": -300.4176330566406, "logps/rejected": -288.8329772949219, "loss": 0.5316, "rewards/accuracies": 0.625, "rewards/chosen": -0.6617050170898438, "rewards/margins": 1.3923941850662231, "rewards/rejected": -2.0540993213653564, "step": 1691 }, { "epoch": 0.2, "learning_rate": 2.4462029053974255e-07, "logits/chosen": -2.4011642932891846, "logits/rejected": -2.38321590423584, "logps/chosen": -274.719482421875, "logps/rejected": -334.5320739746094, "loss": 0.1688, "rewards/accuracies": 1.0, "rewards/chosen": -0.8670014142990112, "rewards/margins": 2.6068618297576904, "rewards/rejected": -3.473863124847412, "step": 1692 }, { "epoch": 0.2, "learning_rate": 2.4458485886382424e-07, "logits/chosen": -2.7992653846740723, "logits/rejected": -2.691972255706787, "logps/chosen": -221.02493286132812, "logps/rejected": -216.6522979736328, "loss": 0.5473, "rewards/accuracies": 0.75, "rewards/chosen": -1.495931625366211, "rewards/margins": 0.8376299142837524, "rewards/rejected": -2.333561658859253, "step": 1693 }, { "epoch": 0.2, "learning_rate": 2.44549427187906e-07, "logits/chosen": -2.4074881076812744, "logits/rejected": -2.5063364505767822, "logps/chosen": -427.97503662109375, "logps/rejected": -287.05535888671875, "loss": 0.5613, "rewards/accuracies": 0.75, "rewards/chosen": -1.0978846549987793, "rewards/margins": 2.203669786453247, "rewards/rejected": -3.3015546798706055, "step": 1694 }, { "epoch": 0.2, "learning_rate": 2.445139955119877e-07, "logits/chosen": -2.5927371978759766, "logits/rejected": -2.790435791015625, "logps/chosen": -257.138916015625, "logps/rejected": -224.1407470703125, "loss": 0.8213, "rewards/accuracies": 0.875, "rewards/chosen": -1.3167378902435303, "rewards/margins": 2.021420478820801, "rewards/rejected": -3.33815860748291, "step": 1695 }, { "epoch": 0.2, "learning_rate": 2.4447856383606943e-07, "logits/chosen": -2.9457216262817383, "logits/rejected": -2.7275028228759766, "logps/chosen": -178.66099548339844, "logps/rejected": -268.76544189453125, "loss": 0.3225, "rewards/accuracies": 1.0, "rewards/chosen": -0.5670561790466309, "rewards/margins": 1.6274358034133911, "rewards/rejected": -2.1944918632507324, "step": 1696 }, { "epoch": 0.2, "learning_rate": 2.4444313216015113e-07, "logits/chosen": -2.6083035469055176, "logits/rejected": -2.3719499111175537, "logps/chosen": -431.0684814453125, "logps/rejected": -250.6070556640625, "loss": 0.2972, "rewards/accuracies": 0.875, "rewards/chosen": -0.4644041955471039, "rewards/margins": 1.6125348806381226, "rewards/rejected": -2.076939105987549, "step": 1697 }, { "epoch": 0.2, "learning_rate": 2.444077004842329e-07, "logits/chosen": -2.3073740005493164, "logits/rejected": -2.217200994491577, "logps/chosen": -283.94134521484375, "logps/rejected": -496.77032470703125, "loss": 0.6763, "rewards/accuracies": 0.625, "rewards/chosen": -0.4758017659187317, "rewards/margins": 0.22290386259555817, "rewards/rejected": -0.6987056136131287, "step": 1698 }, { "epoch": 0.2, "learning_rate": 2.4437226880831463e-07, "logits/chosen": -2.6404643058776855, "logits/rejected": -2.5165669918060303, "logps/chosen": -246.29473876953125, "logps/rejected": -383.0121765136719, "loss": 0.533, "rewards/accuracies": 0.5, "rewards/chosen": -1.3632125854492188, "rewards/margins": 0.9686402082443237, "rewards/rejected": -2.331852674484253, "step": 1699 }, { "epoch": 0.2, "learning_rate": 2.443368371323964e-07, "logits/chosen": -2.692833185195923, "logits/rejected": -2.749985694885254, "logps/chosen": -288.43060302734375, "logps/rejected": -226.47560119628906, "loss": 0.1727, "rewards/accuracies": 1.0, "rewards/chosen": -0.6061241626739502, "rewards/margins": 2.48777174949646, "rewards/rejected": -3.093895673751831, "step": 1700 }, { "epoch": 0.2, "learning_rate": 2.4430140545647807e-07, "logits/chosen": -2.6619269847869873, "logits/rejected": -2.515246629714966, "logps/chosen": -437.6355895996094, "logps/rejected": -348.0819396972656, "loss": 0.1423, "rewards/accuracies": 1.0, "rewards/chosen": -0.1193317174911499, "rewards/margins": 2.6971993446350098, "rewards/rejected": -2.816531181335449, "step": 1701 }, { "epoch": 0.2, "learning_rate": 2.442659737805598e-07, "logits/chosen": -2.635606288909912, "logits/rejected": -2.6611692905426025, "logps/chosen": -217.91265869140625, "logps/rejected": -276.42266845703125, "loss": 0.2388, "rewards/accuracies": 0.875, "rewards/chosen": -1.0138905048370361, "rewards/margins": 2.4784152507781982, "rewards/rejected": -3.4923057556152344, "step": 1702 }, { "epoch": 0.2, "learning_rate": 2.4423054210464157e-07, "logits/chosen": -1.7425665855407715, "logits/rejected": -1.7193001508712769, "logps/chosen": -290.45196533203125, "logps/rejected": -254.73318481445312, "loss": 0.3901, "rewards/accuracies": 0.875, "rewards/chosen": -0.5442890524864197, "rewards/margins": 0.9033942222595215, "rewards/rejected": -1.447683334350586, "step": 1703 }, { "epoch": 0.2, "learning_rate": 2.4419511042872326e-07, "logits/chosen": -2.8911690711975098, "logits/rejected": -2.672600746154785, "logps/chosen": -64.15889739990234, "logps/rejected": -203.60073852539062, "loss": 0.2929, "rewards/accuracies": 0.875, "rewards/chosen": -0.5320387482643127, "rewards/margins": 1.5270730257034302, "rewards/rejected": -2.0591118335723877, "step": 1704 }, { "epoch": 0.2, "learning_rate": 2.44159678752805e-07, "logits/chosen": -2.687408447265625, "logits/rejected": -2.487170934677124, "logps/chosen": -192.8691864013672, "logps/rejected": -398.520751953125, "loss": 0.6108, "rewards/accuracies": 0.75, "rewards/chosen": -1.3932178020477295, "rewards/margins": 1.695877194404602, "rewards/rejected": -3.089094877243042, "step": 1705 }, { "epoch": 0.2, "learning_rate": 2.441242470768867e-07, "logits/chosen": -2.171093463897705, "logits/rejected": -2.3537776470184326, "logps/chosen": -437.00201416015625, "logps/rejected": -279.71221923828125, "loss": 0.5039, "rewards/accuracies": 0.75, "rewards/chosen": -1.1990809440612793, "rewards/margins": 0.5425747632980347, "rewards/rejected": -1.7416558265686035, "step": 1706 }, { "epoch": 0.2, "learning_rate": 2.4408881540096846e-07, "logits/chosen": -2.5590147972106934, "logits/rejected": -2.745074510574341, "logps/chosen": -236.73760986328125, "logps/rejected": -228.22027587890625, "loss": 0.3705, "rewards/accuracies": 0.75, "rewards/chosen": -0.9400160908699036, "rewards/margins": 2.9647109508514404, "rewards/rejected": -3.904726982116699, "step": 1707 }, { "epoch": 0.2, "learning_rate": 2.4405338372505015e-07, "logits/chosen": -2.1368656158447266, "logits/rejected": -2.1610143184661865, "logps/chosen": -165.5930938720703, "logps/rejected": -204.6591339111328, "loss": 0.249, "rewards/accuracies": 0.875, "rewards/chosen": -0.3611239194869995, "rewards/margins": 2.2286953926086426, "rewards/rejected": -2.5898194313049316, "step": 1708 }, { "epoch": 0.2, "learning_rate": 2.440179520491319e-07, "logits/chosen": -2.3005166053771973, "logits/rejected": -2.191126585006714, "logps/chosen": -198.12051391601562, "logps/rejected": -221.3509063720703, "loss": 0.7289, "rewards/accuracies": 0.625, "rewards/chosen": -1.7328636646270752, "rewards/margins": 0.2828371524810791, "rewards/rejected": -2.0157008171081543, "step": 1709 }, { "epoch": 0.2, "learning_rate": 2.4398252037321365e-07, "logits/chosen": -2.8378093242645264, "logits/rejected": -2.832202911376953, "logps/chosen": -306.0372314453125, "logps/rejected": -284.5621643066406, "loss": 0.1019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6866308450698853, "rewards/margins": 3.0838046073913574, "rewards/rejected": -3.7704360485076904, "step": 1710 }, { "epoch": 0.2, "learning_rate": 2.4394708869729535e-07, "logits/chosen": -2.4000842571258545, "logits/rejected": -2.225205421447754, "logps/chosen": -216.18511962890625, "logps/rejected": -300.9520263671875, "loss": 0.7521, "rewards/accuracies": 0.75, "rewards/chosen": -1.1064001321792603, "rewards/margins": 1.5065675973892212, "rewards/rejected": -2.6129677295684814, "step": 1711 }, { "epoch": 0.2, "learning_rate": 2.439116570213771e-07, "logits/chosen": -2.0285186767578125, "logits/rejected": -1.8265560865402222, "logps/chosen": -202.61000061035156, "logps/rejected": -271.3106689453125, "loss": 0.2628, "rewards/accuracies": 1.0, "rewards/chosen": -0.7479815483093262, "rewards/margins": 2.069674253463745, "rewards/rejected": -2.8176558017730713, "step": 1712 }, { "epoch": 0.2, "learning_rate": 2.4387622534545884e-07, "logits/chosen": -2.3101818561553955, "logits/rejected": -2.424459934234619, "logps/chosen": -344.6812744140625, "logps/rejected": -302.7609558105469, "loss": 0.3615, "rewards/accuracies": 1.0, "rewards/chosen": -1.123995065689087, "rewards/margins": 1.4284008741378784, "rewards/rejected": -2.552396059036255, "step": 1713 }, { "epoch": 0.2, "learning_rate": 2.438407936695406e-07, "logits/chosen": -2.2773690223693848, "logits/rejected": -2.1974005699157715, "logps/chosen": -279.6014404296875, "logps/rejected": -315.0476379394531, "loss": 0.1055, "rewards/accuracies": 1.0, "rewards/chosen": -0.09783801436424255, "rewards/margins": 2.8355906009674072, "rewards/rejected": -2.9334287643432617, "step": 1714 }, { "epoch": 0.2, "learning_rate": 2.438053619936223e-07, "logits/chosen": -2.0184121131896973, "logits/rejected": -2.0313773155212402, "logps/chosen": -219.04071044921875, "logps/rejected": -188.76881408691406, "loss": 0.2332, "rewards/accuracies": 1.0, "rewards/chosen": -0.17230436205863953, "rewards/margins": 2.386626720428467, "rewards/rejected": -2.5589311122894287, "step": 1715 }, { "epoch": 0.2, "learning_rate": 2.4376993031770404e-07, "logits/chosen": -2.68499755859375, "logits/rejected": -2.846951961517334, "logps/chosen": -123.61669158935547, "logps/rejected": -182.27523803710938, "loss": 0.3136, "rewards/accuracies": 0.875, "rewards/chosen": -0.41845184564590454, "rewards/margins": 2.9919564723968506, "rewards/rejected": -3.4104082584381104, "step": 1716 }, { "epoch": 0.2, "learning_rate": 2.4373449864178573e-07, "logits/chosen": -1.7609859704971313, "logits/rejected": -2.1357250213623047, "logps/chosen": -267.39202880859375, "logps/rejected": -194.7054901123047, "loss": 0.9597, "rewards/accuracies": 0.375, "rewards/chosen": -1.4059154987335205, "rewards/margins": 0.09404361248016357, "rewards/rejected": -1.499959111213684, "step": 1717 }, { "epoch": 0.2, "learning_rate": 2.436990669658675e-07, "logits/chosen": -2.9427547454833984, "logits/rejected": -2.8305983543395996, "logps/chosen": -282.66192626953125, "logps/rejected": -212.61468505859375, "loss": 0.4891, "rewards/accuracies": 0.75, "rewards/chosen": -0.9498907327651978, "rewards/margins": 2.364185333251953, "rewards/rejected": -3.3140759468078613, "step": 1718 }, { "epoch": 0.2, "learning_rate": 2.436636352899492e-07, "logits/chosen": -2.3011960983276367, "logits/rejected": -2.2079832553863525, "logps/chosen": -302.9136047363281, "logps/rejected": -407.46063232421875, "loss": 0.2254, "rewards/accuracies": 0.875, "rewards/chosen": -1.1067568063735962, "rewards/margins": 3.2774291038513184, "rewards/rejected": -4.384185791015625, "step": 1719 }, { "epoch": 0.2, "learning_rate": 2.436282036140309e-07, "logits/chosen": -1.4618102312088013, "logits/rejected": -1.0709110498428345, "logps/chosen": -415.23162841796875, "logps/rejected": -532.1278686523438, "loss": 0.2288, "rewards/accuracies": 0.875, "rewards/chosen": -0.42020225524902344, "rewards/margins": 2.2518253326416016, "rewards/rejected": -2.672027587890625, "step": 1720 }, { "epoch": 0.2, "learning_rate": 2.4359277193811267e-07, "logits/chosen": -2.912400007247925, "logits/rejected": -2.772385358810425, "logps/chosen": -181.66525268554688, "logps/rejected": -224.1291961669922, "loss": 0.092, "rewards/accuracies": 1.0, "rewards/chosen": -0.6237964630126953, "rewards/margins": 3.556155204772949, "rewards/rejected": -4.179951190948486, "step": 1721 }, { "epoch": 0.2, "learning_rate": 2.4355734026219437e-07, "logits/chosen": -2.3696835041046143, "logits/rejected": -2.6130361557006836, "logps/chosen": -264.657958984375, "logps/rejected": -206.3082275390625, "loss": 0.3611, "rewards/accuracies": 0.75, "rewards/chosen": -0.6540833711624146, "rewards/margins": 1.9865055084228516, "rewards/rejected": -2.6405887603759766, "step": 1722 }, { "epoch": 0.2, "learning_rate": 2.435219085862761e-07, "logits/chosen": -2.2988734245300293, "logits/rejected": -2.220130443572998, "logps/chosen": -213.69557189941406, "logps/rejected": -308.3348083496094, "loss": 0.2898, "rewards/accuracies": 1.0, "rewards/chosen": -0.554688036441803, "rewards/margins": 2.5775656700134277, "rewards/rejected": -3.132253885269165, "step": 1723 }, { "epoch": 0.2, "learning_rate": 2.4348647691035787e-07, "logits/chosen": -2.378164768218994, "logits/rejected": -2.1779351234436035, "logps/chosen": -308.5841979980469, "logps/rejected": -449.5793151855469, "loss": 0.0617, "rewards/accuracies": 1.0, "rewards/chosen": -0.33357128500938416, "rewards/margins": 3.676621437072754, "rewards/rejected": -4.01019287109375, "step": 1724 }, { "epoch": 0.2, "learning_rate": 2.434510452344396e-07, "logits/chosen": -2.0856363773345947, "logits/rejected": -2.1840147972106934, "logps/chosen": -346.25537109375, "logps/rejected": -262.4714660644531, "loss": 0.2715, "rewards/accuracies": 0.875, "rewards/chosen": -0.20403826236724854, "rewards/margins": 2.0922205448150635, "rewards/rejected": -2.2962586879730225, "step": 1725 }, { "epoch": 0.2, "learning_rate": 2.434156135585213e-07, "logits/chosen": -2.3825149536132812, "logits/rejected": -2.3598337173461914, "logps/chosen": -332.9206237792969, "logps/rejected": -318.6062927246094, "loss": 0.3573, "rewards/accuracies": 0.875, "rewards/chosen": -0.9182248711585999, "rewards/margins": 2.081834077835083, "rewards/rejected": -3.000059127807617, "step": 1726 }, { "epoch": 0.2, "learning_rate": 2.4338018188260306e-07, "logits/chosen": -2.05372953414917, "logits/rejected": -2.3898844718933105, "logps/chosen": -401.6635437011719, "logps/rejected": -204.50555419921875, "loss": 0.9492, "rewards/accuracies": 0.5, "rewards/chosen": -0.8602542281150818, "rewards/margins": 0.16262242197990417, "rewards/rejected": -1.0228767395019531, "step": 1727 }, { "epoch": 0.2, "learning_rate": 2.4334475020668475e-07, "logits/chosen": -3.0013933181762695, "logits/rejected": -3.0418941974639893, "logps/chosen": -333.9551086425781, "logps/rejected": -355.92230224609375, "loss": 0.4858, "rewards/accuracies": 0.75, "rewards/chosen": -0.8301363587379456, "rewards/margins": 1.1164392232894897, "rewards/rejected": -1.94657564163208, "step": 1728 }, { "epoch": 0.2, "learning_rate": 2.433093185307665e-07, "logits/chosen": -2.230476140975952, "logits/rejected": -2.489081859588623, "logps/chosen": -566.677734375, "logps/rejected": -404.4932556152344, "loss": 0.3496, "rewards/accuracies": 0.75, "rewards/chosen": -0.5814450979232788, "rewards/margins": 1.8401529788970947, "rewards/rejected": -2.421597957611084, "step": 1729 }, { "epoch": 0.2, "learning_rate": 2.432738868548482e-07, "logits/chosen": -2.016127824783325, "logits/rejected": -2.083613872528076, "logps/chosen": -297.24267578125, "logps/rejected": -208.52816772460938, "loss": 0.4982, "rewards/accuracies": 0.875, "rewards/chosen": -0.2986968457698822, "rewards/margins": 0.9703996181488037, "rewards/rejected": -1.2690964937210083, "step": 1730 }, { "epoch": 0.2, "learning_rate": 2.4323845517892995e-07, "logits/chosen": -1.9601771831512451, "logits/rejected": -2.4126944541931152, "logps/chosen": -478.25213623046875, "logps/rejected": -438.8279113769531, "loss": 0.266, "rewards/accuracies": 0.875, "rewards/chosen": -0.23452526330947876, "rewards/margins": 2.1234560012817383, "rewards/rejected": -2.3579814434051514, "step": 1731 }, { "epoch": 0.2, "learning_rate": 2.432030235030117e-07, "logits/chosen": -2.0786635875701904, "logits/rejected": -2.107469081878662, "logps/chosen": -383.38592529296875, "logps/rejected": -344.9381103515625, "loss": 0.3009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9960392713546753, "rewards/margins": 3.0186519622802734, "rewards/rejected": -4.014691352844238, "step": 1732 }, { "epoch": 0.2, "learning_rate": 2.431675918270934e-07, "logits/chosen": -2.1233458518981934, "logits/rejected": -2.3950748443603516, "logps/chosen": -426.977294921875, "logps/rejected": -368.4346008300781, "loss": 0.2884, "rewards/accuracies": 0.875, "rewards/chosen": -0.09678052365779877, "rewards/margins": 1.8052647113800049, "rewards/rejected": -1.9020452499389648, "step": 1733 }, { "epoch": 0.2, "learning_rate": 2.4313216015117514e-07, "logits/chosen": -2.070875883102417, "logits/rejected": -2.2786788940429688, "logps/chosen": -222.96792602539062, "logps/rejected": -244.6705780029297, "loss": 0.8561, "rewards/accuracies": 0.625, "rewards/chosen": -1.727665901184082, "rewards/margins": 1.8865258693695068, "rewards/rejected": -3.614192247390747, "step": 1734 }, { "epoch": 0.2, "learning_rate": 2.430967284752569e-07, "logits/chosen": -2.0697665214538574, "logits/rejected": -2.15297794342041, "logps/chosen": -223.64581298828125, "logps/rejected": -301.3560791015625, "loss": 0.2995, "rewards/accuracies": 0.875, "rewards/chosen": -0.33793821930885315, "rewards/margins": 2.382056713104248, "rewards/rejected": -2.7199950218200684, "step": 1735 }, { "epoch": 0.2, "learning_rate": 2.4306129679933864e-07, "logits/chosen": -1.9906092882156372, "logits/rejected": -1.9184799194335938, "logps/chosen": -297.36273193359375, "logps/rejected": -299.9766845703125, "loss": 0.2249, "rewards/accuracies": 1.0, "rewards/chosen": -0.8131616115570068, "rewards/margins": 1.6239393949508667, "rewards/rejected": -2.437101125717163, "step": 1736 }, { "epoch": 0.2, "learning_rate": 2.4302586512342033e-07, "logits/chosen": -1.6368451118469238, "logits/rejected": -2.1341922283172607, "logps/chosen": -253.68731689453125, "logps/rejected": -160.78619384765625, "loss": 0.4431, "rewards/accuracies": 0.75, "rewards/chosen": -0.45115694403648376, "rewards/margins": 1.5704196691513062, "rewards/rejected": -2.0215766429901123, "step": 1737 }, { "epoch": 0.2, "learning_rate": 2.429904334475021e-07, "logits/chosen": -2.172466993331909, "logits/rejected": -2.4013924598693848, "logps/chosen": -292.63861083984375, "logps/rejected": -255.68191528320312, "loss": 0.5456, "rewards/accuracies": 0.875, "rewards/chosen": -0.6883822083473206, "rewards/margins": 1.114113211631775, "rewards/rejected": -1.8024954795837402, "step": 1738 }, { "epoch": 0.2, "learning_rate": 2.429550017715838e-07, "logits/chosen": -2.3342628479003906, "logits/rejected": -2.4550535678863525, "logps/chosen": -387.2298889160156, "logps/rejected": -298.09002685546875, "loss": 0.2482, "rewards/accuracies": 1.0, "rewards/chosen": -0.24177367985248566, "rewards/margins": 2.0086538791656494, "rewards/rejected": -2.250427722930908, "step": 1739 }, { "epoch": 0.2, "learning_rate": 2.429195700956655e-07, "logits/chosen": -2.771385908126831, "logits/rejected": -2.8684158325195312, "logps/chosen": -150.4694061279297, "logps/rejected": -164.94189453125, "loss": 0.3297, "rewards/accuracies": 0.75, "rewards/chosen": -0.6034387946128845, "rewards/margins": 2.2737200260162354, "rewards/rejected": -2.8771588802337646, "step": 1740 }, { "epoch": 0.2, "learning_rate": 2.428841384197472e-07, "logits/chosen": -2.265669584274292, "logits/rejected": -2.398066520690918, "logps/chosen": -104.03521728515625, "logps/rejected": -161.91632080078125, "loss": 0.3678, "rewards/accuracies": 0.875, "rewards/chosen": -0.517264723777771, "rewards/margins": 1.8584647178649902, "rewards/rejected": -2.3757293224334717, "step": 1741 }, { "epoch": 0.2, "learning_rate": 2.4284870674382897e-07, "logits/chosen": -2.5113914012908936, "logits/rejected": -2.400177240371704, "logps/chosen": -377.9798583984375, "logps/rejected": -319.6923828125, "loss": 0.2899, "rewards/accuracies": 0.875, "rewards/chosen": -0.2727707028388977, "rewards/margins": 1.5354102849960327, "rewards/rejected": -1.8081809282302856, "step": 1742 }, { "epoch": 0.2, "learning_rate": 2.428132750679107e-07, "logits/chosen": -2.472517490386963, "logits/rejected": -2.654442310333252, "logps/chosen": -259.4212646484375, "logps/rejected": -281.5700988769531, "loss": 0.6438, "rewards/accuracies": 0.5, "rewards/chosen": -0.9785155653953552, "rewards/margins": 1.7906217575073242, "rewards/rejected": -2.769137382507324, "step": 1743 }, { "epoch": 0.2, "learning_rate": 2.427778433919924e-07, "logits/chosen": -1.9710922241210938, "logits/rejected": -1.8633582592010498, "logps/chosen": -188.33795166015625, "logps/rejected": -204.1896209716797, "loss": 0.5351, "rewards/accuracies": 0.625, "rewards/chosen": -0.922791600227356, "rewards/margins": 1.250744342803955, "rewards/rejected": -2.1735358238220215, "step": 1744 }, { "epoch": 0.2, "learning_rate": 2.4274241171607416e-07, "logits/chosen": -1.8367559909820557, "logits/rejected": -1.5992932319641113, "logps/chosen": -257.70367431640625, "logps/rejected": -322.8680114746094, "loss": 0.3123, "rewards/accuracies": 0.75, "rewards/chosen": -0.879590630531311, "rewards/margins": 2.3005189895629883, "rewards/rejected": -3.1801095008850098, "step": 1745 }, { "epoch": 0.2, "learning_rate": 2.4270698004015586e-07, "logits/chosen": -1.7887511253356934, "logits/rejected": -1.8966578245162964, "logps/chosen": -276.3487548828125, "logps/rejected": -373.684326171875, "loss": 0.53, "rewards/accuracies": 0.625, "rewards/chosen": -0.8763235211372375, "rewards/margins": 1.4357131719589233, "rewards/rejected": -2.3120367527008057, "step": 1746 }, { "epoch": 0.2, "learning_rate": 2.426715483642376e-07, "logits/chosen": -2.656315565109253, "logits/rejected": -2.579333782196045, "logps/chosen": -315.7026062011719, "logps/rejected": -255.6226043701172, "loss": 0.211, "rewards/accuracies": 1.0, "rewards/chosen": -0.47250866889953613, "rewards/margins": 2.1694719791412354, "rewards/rejected": -2.6419806480407715, "step": 1747 }, { "epoch": 0.2, "learning_rate": 2.4263611668831935e-07, "logits/chosen": -2.2009036540985107, "logits/rejected": -2.043870687484741, "logps/chosen": -120.70841217041016, "logps/rejected": -248.37046813964844, "loss": 0.4674, "rewards/accuracies": 0.75, "rewards/chosen": -0.5444543361663818, "rewards/margins": 1.8763632774353027, "rewards/rejected": -2.4208176136016846, "step": 1748 }, { "epoch": 0.2, "learning_rate": 2.426006850124011e-07, "logits/chosen": -1.8161256313323975, "logits/rejected": -1.8112542629241943, "logps/chosen": -337.2427978515625, "logps/rejected": -284.72509765625, "loss": 0.2282, "rewards/accuracies": 1.0, "rewards/chosen": -0.07423257827758789, "rewards/margins": 1.8466020822525024, "rewards/rejected": -1.9208346605300903, "step": 1749 }, { "epoch": 0.2, "learning_rate": 2.425652533364828e-07, "logits/chosen": -2.2315027713775635, "logits/rejected": -2.217707395553589, "logps/chosen": -246.38601684570312, "logps/rejected": -317.8531494140625, "loss": 0.4543, "rewards/accuracies": 0.875, "rewards/chosen": -0.8921371698379517, "rewards/margins": 1.5083684921264648, "rewards/rejected": -2.400505542755127, "step": 1750 }, { "epoch": 0.2, "learning_rate": 2.4252982166056455e-07, "logits/chosen": -2.1611251831054688, "logits/rejected": -2.232698917388916, "logps/chosen": -332.2293701171875, "logps/rejected": -619.751953125, "loss": 0.7693, "rewards/accuracies": 0.75, "rewards/chosen": -0.820287823677063, "rewards/margins": 2.303734064102173, "rewards/rejected": -3.124021530151367, "step": 1751 }, { "epoch": 0.2, "learning_rate": 2.4249438998464624e-07, "logits/chosen": -2.4108920097351074, "logits/rejected": -2.2093849182128906, "logps/chosen": -165.78768920898438, "logps/rejected": -231.1561279296875, "loss": 0.1882, "rewards/accuracies": 1.0, "rewards/chosen": -0.3740214407444, "rewards/margins": 2.5665183067321777, "rewards/rejected": -2.940539598464966, "step": 1752 }, { "epoch": 0.2, "learning_rate": 2.42458958308728e-07, "logits/chosen": -2.602820873260498, "logits/rejected": -2.354285955429077, "logps/chosen": -268.86834716796875, "logps/rejected": -301.0560302734375, "loss": 0.5991, "rewards/accuracies": 0.875, "rewards/chosen": -1.9442713260650635, "rewards/margins": 1.6500985622406006, "rewards/rejected": -3.594370126724243, "step": 1753 }, { "epoch": 0.2, "learning_rate": 2.4242352663280974e-07, "logits/chosen": -2.9923768043518066, "logits/rejected": -3.011714220046997, "logps/chosen": -254.20814514160156, "logps/rejected": -225.48373413085938, "loss": 0.191, "rewards/accuracies": 0.875, "rewards/chosen": -0.5736883878707886, "rewards/margins": 2.286609649658203, "rewards/rejected": -2.8602981567382812, "step": 1754 }, { "epoch": 0.2, "learning_rate": 2.4238809495689144e-07, "logits/chosen": -2.4682748317718506, "logits/rejected": -2.3028483390808105, "logps/chosen": -319.2618103027344, "logps/rejected": -280.13507080078125, "loss": 0.3517, "rewards/accuracies": 0.875, "rewards/chosen": -0.8028172254562378, "rewards/margins": 1.7629976272583008, "rewards/rejected": -2.565814971923828, "step": 1755 }, { "epoch": 0.2, "learning_rate": 2.423526632809732e-07, "logits/chosen": -2.122105836868286, "logits/rejected": -2.2178194522857666, "logps/chosen": -522.3229370117188, "logps/rejected": -415.02410888671875, "loss": 0.1862, "rewards/accuracies": 0.875, "rewards/chosen": -0.35548821091651917, "rewards/margins": 2.657902240753174, "rewards/rejected": -3.01339054107666, "step": 1756 }, { "epoch": 0.2, "learning_rate": 2.423172316050549e-07, "logits/chosen": -2.711503505706787, "logits/rejected": -2.771531820297241, "logps/chosen": -221.61416625976562, "logps/rejected": -331.5538024902344, "loss": 0.3237, "rewards/accuracies": 0.75, "rewards/chosen": -1.090427279472351, "rewards/margins": 2.226104736328125, "rewards/rejected": -3.3165318965911865, "step": 1757 }, { "epoch": 0.2, "learning_rate": 2.4228179992913663e-07, "logits/chosen": -2.7729597091674805, "logits/rejected": -2.800952196121216, "logps/chosen": -112.35714721679688, "logps/rejected": -215.96092224121094, "loss": 0.1733, "rewards/accuracies": 0.875, "rewards/chosen": 0.006350010633468628, "rewards/margins": 3.6231441497802734, "rewards/rejected": -3.6167938709259033, "step": 1758 }, { "epoch": 0.2, "learning_rate": 2.422463682532184e-07, "logits/chosen": -2.522773504257202, "logits/rejected": -2.6411361694335938, "logps/chosen": -350.42974853515625, "logps/rejected": -279.61151123046875, "loss": 1.4841, "rewards/accuracies": 0.5, "rewards/chosen": -2.69384765625, "rewards/margins": -0.13438910245895386, "rewards/rejected": -2.5594584941864014, "step": 1759 }, { "epoch": 0.2, "learning_rate": 2.422109365773001e-07, "logits/chosen": -2.0582802295684814, "logits/rejected": -1.9488418102264404, "logps/chosen": -331.8688049316406, "logps/rejected": -379.86431884765625, "loss": 0.4142, "rewards/accuracies": 0.875, "rewards/chosen": -1.3662176132202148, "rewards/margins": 2.1332383155822754, "rewards/rejected": -3.4994559288024902, "step": 1760 }, { "epoch": 0.2, "learning_rate": 2.421755049013818e-07, "logits/chosen": -2.588632822036743, "logits/rejected": -2.7747089862823486, "logps/chosen": -215.11990356445312, "logps/rejected": -201.19464111328125, "loss": 0.4848, "rewards/accuracies": 0.75, "rewards/chosen": -0.5133243203163147, "rewards/margins": 1.2404953241348267, "rewards/rejected": -1.7538195848464966, "step": 1761 }, { "epoch": 0.2, "learning_rate": 2.4214007322546357e-07, "logits/chosen": -2.286323070526123, "logits/rejected": -1.9844850301742554, "logps/chosen": -239.16629028320312, "logps/rejected": -264.3179931640625, "loss": 0.2999, "rewards/accuracies": 0.75, "rewards/chosen": -0.7090801000595093, "rewards/margins": 3.5325169563293457, "rewards/rejected": -4.2415971755981445, "step": 1762 }, { "epoch": 0.21, "learning_rate": 2.4210464154954527e-07, "logits/chosen": -2.5698742866516113, "logits/rejected": -2.4063408374786377, "logps/chosen": -335.230224609375, "logps/rejected": -375.5904541015625, "loss": 0.2984, "rewards/accuracies": 0.75, "rewards/chosen": -0.19878751039505005, "rewards/margins": 2.9006171226501465, "rewards/rejected": -3.0994043350219727, "step": 1763 }, { "epoch": 0.21, "learning_rate": 2.42069209873627e-07, "logits/chosen": -2.5678648948669434, "logits/rejected": -2.040738105773926, "logps/chosen": -111.26048278808594, "logps/rejected": -378.9677734375, "loss": 0.3701, "rewards/accuracies": 0.875, "rewards/chosen": -0.9800238609313965, "rewards/margins": 2.095574378967285, "rewards/rejected": -3.0755982398986816, "step": 1764 }, { "epoch": 0.21, "learning_rate": 2.4203377819770876e-07, "logits/chosen": -2.012361526489258, "logits/rejected": -2.108659505844116, "logps/chosen": -116.91748809814453, "logps/rejected": -198.938232421875, "loss": 0.2235, "rewards/accuracies": 1.0, "rewards/chosen": -0.9344013929367065, "rewards/margins": 1.8535056114196777, "rewards/rejected": -2.787907123565674, "step": 1765 }, { "epoch": 0.21, "learning_rate": 2.4199834652179046e-07, "logits/chosen": -2.162045955657959, "logits/rejected": -2.70467209815979, "logps/chosen": -532.46826171875, "logps/rejected": -295.8982238769531, "loss": 0.5583, "rewards/accuracies": 0.75, "rewards/chosen": -0.8712948560714722, "rewards/margins": 1.359626293182373, "rewards/rejected": -2.2309212684631348, "step": 1766 }, { "epoch": 0.21, "learning_rate": 2.419629148458722e-07, "logits/chosen": -2.1358556747436523, "logits/rejected": -2.148402690887451, "logps/chosen": -250.0975341796875, "logps/rejected": -268.36871337890625, "loss": 0.3385, "rewards/accuracies": 0.875, "rewards/chosen": -0.290607750415802, "rewards/margins": 1.5199196338653564, "rewards/rejected": -1.8105273246765137, "step": 1767 }, { "epoch": 0.21, "learning_rate": 2.419274831699539e-07, "logits/chosen": -2.3079538345336914, "logits/rejected": -2.667905330657959, "logps/chosen": -251.67478942871094, "logps/rejected": -176.84173583984375, "loss": 0.4451, "rewards/accuracies": 0.75, "rewards/chosen": -0.6356084942817688, "rewards/margins": 1.8497332334518433, "rewards/rejected": -2.4853415489196777, "step": 1768 }, { "epoch": 0.21, "learning_rate": 2.4189205149403565e-07, "logits/chosen": -3.1554512977600098, "logits/rejected": -3.1036746501922607, "logps/chosen": -356.59979248046875, "logps/rejected": -308.8307189941406, "loss": 0.4177, "rewards/accuracies": 0.875, "rewards/chosen": -0.8076197504997253, "rewards/margins": 1.043885588645935, "rewards/rejected": -1.8515052795410156, "step": 1769 }, { "epoch": 0.21, "learning_rate": 2.418566198181174e-07, "logits/chosen": -1.6423438787460327, "logits/rejected": -1.8809717893600464, "logps/chosen": -471.5633850097656, "logps/rejected": -376.1085510253906, "loss": 0.3424, "rewards/accuracies": 0.875, "rewards/chosen": -0.6496620774269104, "rewards/margins": 2.1963248252868652, "rewards/rejected": -2.845987319946289, "step": 1770 }, { "epoch": 0.21, "learning_rate": 2.4182118814219915e-07, "logits/chosen": -2.3699305057525635, "logits/rejected": -2.3077030181884766, "logps/chosen": -152.7406463623047, "logps/rejected": -99.40492248535156, "loss": 0.3901, "rewards/accuracies": 0.875, "rewards/chosen": -0.7054970264434814, "rewards/margins": 1.1497149467468262, "rewards/rejected": -1.8552119731903076, "step": 1771 }, { "epoch": 0.21, "learning_rate": 2.4178575646628084e-07, "logits/chosen": -2.260051727294922, "logits/rejected": -2.225996494293213, "logps/chosen": -182.92218017578125, "logps/rejected": -171.13858032226562, "loss": 0.7437, "rewards/accuracies": 0.625, "rewards/chosen": -0.9591944217681885, "rewards/margins": 1.1684916019439697, "rewards/rejected": -2.127686023712158, "step": 1772 }, { "epoch": 0.21, "learning_rate": 2.417503247903626e-07, "logits/chosen": -2.965254545211792, "logits/rejected": -2.9681715965270996, "logps/chosen": -477.01141357421875, "logps/rejected": -319.0557861328125, "loss": 0.4576, "rewards/accuracies": 0.75, "rewards/chosen": -1.2657482624053955, "rewards/margins": 2.438108444213867, "rewards/rejected": -3.7038567066192627, "step": 1773 }, { "epoch": 0.21, "learning_rate": 2.417148931144443e-07, "logits/chosen": -2.0817630290985107, "logits/rejected": -2.1232998371124268, "logps/chosen": -216.533447265625, "logps/rejected": -231.09722900390625, "loss": 0.5355, "rewards/accuracies": 0.75, "rewards/chosen": -0.630585253238678, "rewards/margins": 1.9499613046646118, "rewards/rejected": -2.5805463790893555, "step": 1774 }, { "epoch": 0.21, "learning_rate": 2.4167946143852604e-07, "logits/chosen": -2.634916305541992, "logits/rejected": -2.901615858078003, "logps/chosen": -222.041748046875, "logps/rejected": -229.88272094726562, "loss": 0.6299, "rewards/accuracies": 0.75, "rewards/chosen": -0.9937788248062134, "rewards/margins": 1.422478437423706, "rewards/rejected": -2.41625714302063, "step": 1775 }, { "epoch": 0.21, "learning_rate": 2.4164402976260773e-07, "logits/chosen": -2.301354169845581, "logits/rejected": -2.17429256439209, "logps/chosen": -323.59344482421875, "logps/rejected": -327.7535705566406, "loss": 0.3608, "rewards/accuracies": 0.75, "rewards/chosen": -0.29222571849823, "rewards/margins": 1.6469990015029907, "rewards/rejected": -1.9392247200012207, "step": 1776 }, { "epoch": 0.21, "learning_rate": 2.416085980866895e-07, "logits/chosen": -2.3338112831115723, "logits/rejected": -2.3443639278411865, "logps/chosen": -278.14990234375, "logps/rejected": -229.26962280273438, "loss": 0.3551, "rewards/accuracies": 0.75, "rewards/chosen": -1.157565951347351, "rewards/margins": 1.7625893354415894, "rewards/rejected": -2.9201552867889404, "step": 1777 }, { "epoch": 0.21, "learning_rate": 2.4157316641077123e-07, "logits/chosen": -1.6919491291046143, "logits/rejected": -2.1408681869506836, "logps/chosen": -296.6734619140625, "logps/rejected": -270.4972839355469, "loss": 0.6078, "rewards/accuracies": 0.625, "rewards/chosen": -1.4082125425338745, "rewards/margins": 1.1235957145690918, "rewards/rejected": -2.531808376312256, "step": 1778 }, { "epoch": 0.21, "learning_rate": 2.415377347348529e-07, "logits/chosen": -2.616039276123047, "logits/rejected": -2.743342876434326, "logps/chosen": -260.18988037109375, "logps/rejected": -270.3879699707031, "loss": 0.2968, "rewards/accuracies": 0.875, "rewards/chosen": -0.6332108974456787, "rewards/margins": 1.984546184539795, "rewards/rejected": -2.6177573204040527, "step": 1779 }, { "epoch": 0.21, "learning_rate": 2.415023030589347e-07, "logits/chosen": -2.79740047454834, "logits/rejected": -2.512173652648926, "logps/chosen": -205.47618103027344, "logps/rejected": -259.50445556640625, "loss": 0.2686, "rewards/accuracies": 0.875, "rewards/chosen": -0.17176327109336853, "rewards/margins": 2.763949394226074, "rewards/rejected": -2.9357125759124756, "step": 1780 }, { "epoch": 0.21, "learning_rate": 2.4146687138301637e-07, "logits/chosen": -2.6682446002960205, "logits/rejected": -2.6388533115386963, "logps/chosen": -308.41558837890625, "logps/rejected": -203.4622344970703, "loss": 0.2529, "rewards/accuracies": 0.75, "rewards/chosen": -0.7370229363441467, "rewards/margins": 2.0111355781555176, "rewards/rejected": -2.7481584548950195, "step": 1781 }, { "epoch": 0.21, "learning_rate": 2.4143143970709817e-07, "logits/chosen": -1.6870448589324951, "logits/rejected": -2.1154861450195312, "logps/chosen": -452.85601806640625, "logps/rejected": -342.8895263671875, "loss": 0.9876, "rewards/accuracies": 0.75, "rewards/chosen": -1.5582736730575562, "rewards/margins": 2.331254005432129, "rewards/rejected": -3.8895277976989746, "step": 1782 }, { "epoch": 0.21, "learning_rate": 2.4139600803117987e-07, "logits/chosen": -2.5333008766174316, "logits/rejected": -2.8384664058685303, "logps/chosen": -314.5324401855469, "logps/rejected": -202.55662536621094, "loss": 0.4603, "rewards/accuracies": 0.75, "rewards/chosen": -0.46156880259513855, "rewards/margins": 2.368114709854126, "rewards/rejected": -2.829683780670166, "step": 1783 }, { "epoch": 0.21, "learning_rate": 2.413605763552616e-07, "logits/chosen": -3.010484218597412, "logits/rejected": -2.9420018196105957, "logps/chosen": -310.7889709472656, "logps/rejected": -273.9451599121094, "loss": 0.3769, "rewards/accuracies": 0.625, "rewards/chosen": -1.0498713254928589, "rewards/margins": 2.0087215900421143, "rewards/rejected": -3.058593273162842, "step": 1784 }, { "epoch": 0.21, "learning_rate": 2.413251446793433e-07, "logits/chosen": -2.360142230987549, "logits/rejected": -2.3157296180725098, "logps/chosen": -490.0210266113281, "logps/rejected": -398.24127197265625, "loss": 0.2676, "rewards/accuracies": 1.0, "rewards/chosen": -0.4722224175930023, "rewards/margins": 1.8734838962554932, "rewards/rejected": -2.345705986022949, "step": 1785 }, { "epoch": 0.21, "learning_rate": 2.4128971300342506e-07, "logits/chosen": -2.257172107696533, "logits/rejected": -2.4716625213623047, "logps/chosen": -417.0865783691406, "logps/rejected": -339.0855407714844, "loss": 0.6129, "rewards/accuracies": 0.75, "rewards/chosen": -0.5448400378227234, "rewards/margins": 0.804780900478363, "rewards/rejected": -1.3496208190917969, "step": 1786 }, { "epoch": 0.21, "learning_rate": 2.4125428132750676e-07, "logits/chosen": -2.949373245239258, "logits/rejected": -2.935990810394287, "logps/chosen": -314.9571838378906, "logps/rejected": -222.14329528808594, "loss": 0.4192, "rewards/accuracies": 0.875, "rewards/chosen": -0.5069339275360107, "rewards/margins": 2.4671695232391357, "rewards/rejected": -2.9741034507751465, "step": 1787 }, { "epoch": 0.21, "learning_rate": 2.412188496515885e-07, "logits/chosen": -2.1952579021453857, "logits/rejected": -2.5754575729370117, "logps/chosen": -438.81036376953125, "logps/rejected": -258.2390441894531, "loss": 0.6286, "rewards/accuracies": 0.5, "rewards/chosen": -1.1901628971099854, "rewards/margins": 0.31962358951568604, "rewards/rejected": -1.509786605834961, "step": 1788 }, { "epoch": 0.21, "learning_rate": 2.4118341797567025e-07, "logits/chosen": -2.2454657554626465, "logits/rejected": -1.98702073097229, "logps/chosen": -161.27996826171875, "logps/rejected": -250.56195068359375, "loss": 0.1593, "rewards/accuracies": 1.0, "rewards/chosen": 0.21291275322437286, "rewards/margins": 2.3598310947418213, "rewards/rejected": -2.146918296813965, "step": 1789 }, { "epoch": 0.21, "learning_rate": 2.4114798629975195e-07, "logits/chosen": -2.7658071517944336, "logits/rejected": -2.9031317234039307, "logps/chosen": -196.84999084472656, "logps/rejected": -393.04248046875, "loss": 0.0727, "rewards/accuracies": 1.0, "rewards/chosen": -0.5813136100769043, "rewards/margins": 4.383693695068359, "rewards/rejected": -4.965007781982422, "step": 1790 }, { "epoch": 0.21, "learning_rate": 2.411125546238337e-07, "logits/chosen": -2.118443489074707, "logits/rejected": -1.9644920825958252, "logps/chosen": -183.48013305664062, "logps/rejected": -191.82369995117188, "loss": 0.7392, "rewards/accuracies": 0.5, "rewards/chosen": -2.3673465251922607, "rewards/margins": 0.4686541259288788, "rewards/rejected": -2.836000680923462, "step": 1791 }, { "epoch": 0.21, "learning_rate": 2.410771229479154e-07, "logits/chosen": -2.101119041442871, "logits/rejected": -2.319307565689087, "logps/chosen": -491.99066162109375, "logps/rejected": -331.24652099609375, "loss": 0.3094, "rewards/accuracies": 0.875, "rewards/chosen": -0.33622315526008606, "rewards/margins": 2.8945205211639404, "rewards/rejected": -3.230743646621704, "step": 1792 }, { "epoch": 0.21, "learning_rate": 2.4104169127199714e-07, "logits/chosen": -2.5244228839874268, "logits/rejected": -2.6231677532196045, "logps/chosen": -281.930908203125, "logps/rejected": -297.80487060546875, "loss": 0.3301, "rewards/accuracies": 0.75, "rewards/chosen": -0.6820855736732483, "rewards/margins": 1.7326068878173828, "rewards/rejected": -2.4146924018859863, "step": 1793 }, { "epoch": 0.21, "learning_rate": 2.410062595960789e-07, "logits/chosen": -2.1775941848754883, "logits/rejected": -2.140559196472168, "logps/chosen": -265.06561279296875, "logps/rejected": -257.12774658203125, "loss": 0.445, "rewards/accuracies": 0.75, "rewards/chosen": -0.4523833692073822, "rewards/margins": 1.183227300643921, "rewards/rejected": -1.635610818862915, "step": 1794 }, { "epoch": 0.21, "learning_rate": 2.4097082792016064e-07, "logits/chosen": -2.1056160926818848, "logits/rejected": -2.1519041061401367, "logps/chosen": -256.8416442871094, "logps/rejected": -261.8810729980469, "loss": 0.1848, "rewards/accuracies": 0.875, "rewards/chosen": -0.434116005897522, "rewards/margins": 2.165034532546997, "rewards/rejected": -2.5991506576538086, "step": 1795 }, { "epoch": 0.21, "learning_rate": 2.4093539624424233e-07, "logits/chosen": -2.406691074371338, "logits/rejected": -2.403996706008911, "logps/chosen": -210.34896850585938, "logps/rejected": -205.80401611328125, "loss": 0.9085, "rewards/accuracies": 0.375, "rewards/chosen": -1.448098063468933, "rewards/margins": 0.10742287337779999, "rewards/rejected": -1.555521011352539, "step": 1796 }, { "epoch": 0.21, "learning_rate": 2.408999645683241e-07, "logits/chosen": -2.3813364505767822, "logits/rejected": -2.5804226398468018, "logps/chosen": -229.07826232910156, "logps/rejected": -206.28733825683594, "loss": 0.6565, "rewards/accuracies": 0.75, "rewards/chosen": -0.5977106094360352, "rewards/margins": 0.8167107105255127, "rewards/rejected": -1.4144212007522583, "step": 1797 }, { "epoch": 0.21, "learning_rate": 2.408645328924058e-07, "logits/chosen": -2.222550868988037, "logits/rejected": -2.104487180709839, "logps/chosen": -200.4759979248047, "logps/rejected": -251.73898315429688, "loss": 0.1941, "rewards/accuracies": 0.875, "rewards/chosen": -1.067495346069336, "rewards/margins": 2.917901039123535, "rewards/rejected": -3.985396385192871, "step": 1798 }, { "epoch": 0.21, "learning_rate": 2.4082910121648753e-07, "logits/chosen": -2.2080435752868652, "logits/rejected": -1.9050899744033813, "logps/chosen": -214.06280517578125, "logps/rejected": -409.6534118652344, "loss": 0.2256, "rewards/accuracies": 1.0, "rewards/chosen": 0.07568755745887756, "rewards/margins": 1.724006175994873, "rewards/rejected": -1.6483186483383179, "step": 1799 }, { "epoch": 0.21, "learning_rate": 2.407936695405693e-07, "logits/chosen": -2.782255172729492, "logits/rejected": -2.5283596515655518, "logps/chosen": -290.778076171875, "logps/rejected": -332.7655334472656, "loss": 0.5626, "rewards/accuracies": 0.75, "rewards/chosen": -0.6284094452857971, "rewards/margins": 1.9699878692626953, "rewards/rejected": -2.5983974933624268, "step": 1800 }, { "epoch": 0.21, "learning_rate": 2.4075823786465097e-07, "logits/chosen": -2.087553024291992, "logits/rejected": -2.0506093502044678, "logps/chosen": -495.3292236328125, "logps/rejected": -430.4928283691406, "loss": 0.5496, "rewards/accuracies": 0.75, "rewards/chosen": -0.7543399333953857, "rewards/margins": 0.6078133583068848, "rewards/rejected": -1.3621532917022705, "step": 1801 }, { "epoch": 0.21, "learning_rate": 2.407228061887327e-07, "logits/chosen": -2.0319645404815674, "logits/rejected": -2.2050719261169434, "logps/chosen": -441.437255859375, "logps/rejected": -355.9711608886719, "loss": 0.4962, "rewards/accuracies": 0.75, "rewards/chosen": -0.9836417436599731, "rewards/margins": 0.9870733022689819, "rewards/rejected": -1.970715045928955, "step": 1802 }, { "epoch": 0.21, "learning_rate": 2.406873745128144e-07, "logits/chosen": -1.780432939529419, "logits/rejected": -1.7204499244689941, "logps/chosen": -320.8402099609375, "logps/rejected": -277.77276611328125, "loss": 0.4205, "rewards/accuracies": 0.75, "rewards/chosen": -1.1337292194366455, "rewards/margins": 1.7460973262786865, "rewards/rejected": -2.879826545715332, "step": 1803 }, { "epoch": 0.21, "learning_rate": 2.4065194283689616e-07, "logits/chosen": -2.732217788696289, "logits/rejected": -2.3263778686523438, "logps/chosen": -412.921630859375, "logps/rejected": -264.34619140625, "loss": 0.1524, "rewards/accuracies": 1.0, "rewards/chosen": -0.6698638796806335, "rewards/margins": 2.2237887382507324, "rewards/rejected": -2.8936524391174316, "step": 1804 }, { "epoch": 0.21, "learning_rate": 2.406165111609779e-07, "logits/chosen": -2.475923538208008, "logits/rejected": -2.8211891651153564, "logps/chosen": -290.59576416015625, "logps/rejected": -168.78836059570312, "loss": 0.9308, "rewards/accuracies": 0.625, "rewards/chosen": -1.0191001892089844, "rewards/margins": 1.2628498077392578, "rewards/rejected": -2.281949996948242, "step": 1805 }, { "epoch": 0.21, "learning_rate": 2.4058107948505966e-07, "logits/chosen": -2.2517244815826416, "logits/rejected": -2.577059745788574, "logps/chosen": -356.13153076171875, "logps/rejected": -245.3402099609375, "loss": 0.3627, "rewards/accuracies": 0.875, "rewards/chosen": -0.6635759472846985, "rewards/margins": 1.917927622795105, "rewards/rejected": -2.581503391265869, "step": 1806 }, { "epoch": 0.21, "learning_rate": 2.4054564780914136e-07, "logits/chosen": -2.65557861328125, "logits/rejected": -2.810976982116699, "logps/chosen": -346.8943176269531, "logps/rejected": -213.51902770996094, "loss": 0.7772, "rewards/accuracies": 0.5, "rewards/chosen": -1.011732816696167, "rewards/margins": 0.16379913687705994, "rewards/rejected": -1.1755318641662598, "step": 1807 }, { "epoch": 0.21, "learning_rate": 2.405102161332231e-07, "logits/chosen": -2.545552968978882, "logits/rejected": -2.1742825508117676, "logps/chosen": -266.0611572265625, "logps/rejected": -246.55587768554688, "loss": 0.7344, "rewards/accuracies": 0.625, "rewards/chosen": -1.0296876430511475, "rewards/margins": 0.5939750671386719, "rewards/rejected": -1.6236627101898193, "step": 1808 }, { "epoch": 0.21, "learning_rate": 2.404747844573048e-07, "logits/chosen": -2.8086862564086914, "logits/rejected": -2.844008445739746, "logps/chosen": -685.3878173828125, "logps/rejected": -469.28662109375, "loss": 0.225, "rewards/accuracies": 0.875, "rewards/chosen": -0.28446730971336365, "rewards/margins": 2.5609359741210938, "rewards/rejected": -2.8454034328460693, "step": 1809 }, { "epoch": 0.21, "learning_rate": 2.4043935278138655e-07, "logits/chosen": -1.9144043922424316, "logits/rejected": -1.7782530784606934, "logps/chosen": -401.7034606933594, "logps/rejected": -332.67059326171875, "loss": 0.4218, "rewards/accuracies": 0.75, "rewards/chosen": -0.9646287560462952, "rewards/margins": 1.4903197288513184, "rewards/rejected": -2.454948663711548, "step": 1810 }, { "epoch": 0.21, "learning_rate": 2.404039211054683e-07, "logits/chosen": -1.3119378089904785, "logits/rejected": -1.9025933742523193, "logps/chosen": -336.310791015625, "logps/rejected": -204.52452087402344, "loss": 0.6543, "rewards/accuracies": 0.75, "rewards/chosen": -1.0531299114227295, "rewards/margins": 2.0119895935058594, "rewards/rejected": -3.0651192665100098, "step": 1811 }, { "epoch": 0.21, "learning_rate": 2.4036848942955e-07, "logits/chosen": -2.1743083000183105, "logits/rejected": -2.46732759475708, "logps/chosen": -443.3954772949219, "logps/rejected": -270.8285217285156, "loss": 0.2361, "rewards/accuracies": 1.0, "rewards/chosen": -0.6953043937683105, "rewards/margins": 3.064387798309326, "rewards/rejected": -3.7596921920776367, "step": 1812 }, { "epoch": 0.21, "learning_rate": 2.4033305775363174e-07, "logits/chosen": -2.3740384578704834, "logits/rejected": -2.2105555534362793, "logps/chosen": -197.86326599121094, "logps/rejected": -273.10223388671875, "loss": 0.3574, "rewards/accuracies": 0.875, "rewards/chosen": -0.5061732530593872, "rewards/margins": 1.3640061616897583, "rewards/rejected": -1.8701794147491455, "step": 1813 }, { "epoch": 0.21, "learning_rate": 2.4029762607771344e-07, "logits/chosen": -2.092151641845703, "logits/rejected": -2.1698620319366455, "logps/chosen": -274.0726318359375, "logps/rejected": -244.6338348388672, "loss": 1.0721, "rewards/accuracies": 0.625, "rewards/chosen": -2.246246099472046, "rewards/margins": 1.3149744272232056, "rewards/rejected": -3.561220645904541, "step": 1814 }, { "epoch": 0.21, "learning_rate": 2.402621944017952e-07, "logits/chosen": -2.3072686195373535, "logits/rejected": -2.0539426803588867, "logps/chosen": -117.81233978271484, "logps/rejected": -193.07135009765625, "loss": 0.4486, "rewards/accuracies": 0.75, "rewards/chosen": -0.5918164253234863, "rewards/margins": 2.0979573726654053, "rewards/rejected": -2.6897740364074707, "step": 1815 }, { "epoch": 0.21, "learning_rate": 2.402267627258769e-07, "logits/chosen": -1.7902355194091797, "logits/rejected": -1.8418197631835938, "logps/chosen": -307.3411865234375, "logps/rejected": -366.6976623535156, "loss": 0.4356, "rewards/accuracies": 0.875, "rewards/chosen": -1.4473026990890503, "rewards/margins": 2.647409200668335, "rewards/rejected": -4.094712257385254, "step": 1816 }, { "epoch": 0.21, "learning_rate": 2.401913310499587e-07, "logits/chosen": -2.324477195739746, "logits/rejected": -2.446094036102295, "logps/chosen": -359.21539306640625, "logps/rejected": -344.5244445800781, "loss": 0.746, "rewards/accuracies": 0.625, "rewards/chosen": -1.0030767917633057, "rewards/margins": 0.9773438572883606, "rewards/rejected": -1.980420708656311, "step": 1817 }, { "epoch": 0.21, "learning_rate": 2.401558993740404e-07, "logits/chosen": -2.537219285964966, "logits/rejected": -2.709055185317993, "logps/chosen": -383.7568054199219, "logps/rejected": -247.7540283203125, "loss": 0.2207, "rewards/accuracies": 0.875, "rewards/chosen": -0.8268507719039917, "rewards/margins": 2.226870059967041, "rewards/rejected": -3.053720712661743, "step": 1818 }, { "epoch": 0.21, "learning_rate": 2.4012046769812213e-07, "logits/chosen": -1.9707705974578857, "logits/rejected": -2.0588130950927734, "logps/chosen": -215.45407104492188, "logps/rejected": -239.27899169921875, "loss": 0.3587, "rewards/accuracies": 0.875, "rewards/chosen": -0.959610641002655, "rewards/margins": 1.1813466548919678, "rewards/rejected": -2.1409573554992676, "step": 1819 }, { "epoch": 0.21, "learning_rate": 2.400850360222038e-07, "logits/chosen": -2.424398899078369, "logits/rejected": -2.6035056114196777, "logps/chosen": -435.84649658203125, "logps/rejected": -301.8731384277344, "loss": 0.2954, "rewards/accuracies": 0.875, "rewards/chosen": 0.05078011006116867, "rewards/margins": 2.353275775909424, "rewards/rejected": -2.3024957180023193, "step": 1820 }, { "epoch": 0.21, "learning_rate": 2.4004960434628557e-07, "logits/chosen": -2.684037685394287, "logits/rejected": -2.6559371948242188, "logps/chosen": -247.36549377441406, "logps/rejected": -201.24359130859375, "loss": 1.5475, "rewards/accuracies": 0.5, "rewards/chosen": -1.5226982831954956, "rewards/margins": -0.05223196744918823, "rewards/rejected": -1.4704663753509521, "step": 1821 }, { "epoch": 0.21, "learning_rate": 2.400141726703673e-07, "logits/chosen": -2.238429546356201, "logits/rejected": -2.1038031578063965, "logps/chosen": -233.472412109375, "logps/rejected": -350.8326416015625, "loss": 0.3534, "rewards/accuracies": 0.875, "rewards/chosen": -0.7716639041900635, "rewards/margins": 2.766087293624878, "rewards/rejected": -3.5377514362335205, "step": 1822 }, { "epoch": 0.21, "learning_rate": 2.39978740994449e-07, "logits/chosen": -2.219170093536377, "logits/rejected": -2.0593149662017822, "logps/chosen": -275.6041564941406, "logps/rejected": -345.3872985839844, "loss": 0.3921, "rewards/accuracies": 0.75, "rewards/chosen": -0.44098731875419617, "rewards/margins": 1.1421345472335815, "rewards/rejected": -1.5831218957901, "step": 1823 }, { "epoch": 0.21, "learning_rate": 2.3994330931853077e-07, "logits/chosen": -2.1841928958892822, "logits/rejected": -2.2610769271850586, "logps/chosen": -140.24583435058594, "logps/rejected": -162.65199279785156, "loss": 0.6778, "rewards/accuracies": 0.5, "rewards/chosen": -1.3847140073776245, "rewards/margins": 0.8519527316093445, "rewards/rejected": -2.2366669178009033, "step": 1824 }, { "epoch": 0.21, "learning_rate": 2.3990787764261246e-07, "logits/chosen": -2.0864861011505127, "logits/rejected": -2.011284351348877, "logps/chosen": -292.2127685546875, "logps/rejected": -524.74560546875, "loss": 0.6519, "rewards/accuracies": 0.625, "rewards/chosen": -1.25495183467865, "rewards/margins": 1.6638132333755493, "rewards/rejected": -2.918765068054199, "step": 1825 }, { "epoch": 0.21, "learning_rate": 2.398724459666942e-07, "logits/chosen": -2.393972158432007, "logits/rejected": -2.4124274253845215, "logps/chosen": -135.2225341796875, "logps/rejected": -185.97543334960938, "loss": 0.6378, "rewards/accuracies": 0.625, "rewards/chosen": -1.5506224632263184, "rewards/margins": 0.856365442276001, "rewards/rejected": -2.4069879055023193, "step": 1826 }, { "epoch": 0.21, "learning_rate": 2.398370142907759e-07, "logits/chosen": -2.539834976196289, "logits/rejected": -2.832745313644409, "logps/chosen": -198.1143798828125, "logps/rejected": -236.12112426757812, "loss": 0.2266, "rewards/accuracies": 1.0, "rewards/chosen": -0.4517902135848999, "rewards/margins": 1.8995400667190552, "rewards/rejected": -2.351330280303955, "step": 1827 }, { "epoch": 0.21, "learning_rate": 2.3980158261485765e-07, "logits/chosen": -2.160886287689209, "logits/rejected": -2.6089634895324707, "logps/chosen": -244.024658203125, "logps/rejected": -189.86734008789062, "loss": 0.8517, "rewards/accuracies": 0.5, "rewards/chosen": -1.0040578842163086, "rewards/margins": 0.3868162930011749, "rewards/rejected": -1.3908742666244507, "step": 1828 }, { "epoch": 0.21, "learning_rate": 2.397661509389394e-07, "logits/chosen": -2.125411033630371, "logits/rejected": -2.2947494983673096, "logps/chosen": -454.7753601074219, "logps/rejected": -430.8077087402344, "loss": 0.4029, "rewards/accuracies": 0.75, "rewards/chosen": -0.6187058091163635, "rewards/margins": 1.2236459255218506, "rewards/rejected": -1.8423516750335693, "step": 1829 }, { "epoch": 0.21, "learning_rate": 2.3973071926302115e-07, "logits/chosen": -2.740480422973633, "logits/rejected": -2.67659330368042, "logps/chosen": -225.91355895996094, "logps/rejected": -223.62881469726562, "loss": 0.2971, "rewards/accuracies": 1.0, "rewards/chosen": -0.6065132021903992, "rewards/margins": 1.2455552816390991, "rewards/rejected": -1.8520684242248535, "step": 1830 }, { "epoch": 0.21, "learning_rate": 2.3969528758710285e-07, "logits/chosen": -2.2650599479675293, "logits/rejected": -2.3431594371795654, "logps/chosen": -220.01846313476562, "logps/rejected": -223.2489013671875, "loss": 0.4305, "rewards/accuracies": 0.75, "rewards/chosen": -0.5444383025169373, "rewards/margins": 1.2333711385726929, "rewards/rejected": -1.7778096199035645, "step": 1831 }, { "epoch": 0.21, "learning_rate": 2.396598559111846e-07, "logits/chosen": -2.4323949813842773, "logits/rejected": -2.3808979988098145, "logps/chosen": -205.6292724609375, "logps/rejected": -196.97604370117188, "loss": 0.4376, "rewards/accuracies": 0.625, "rewards/chosen": -0.7448318600654602, "rewards/margins": 1.1018013954162598, "rewards/rejected": -1.8466330766677856, "step": 1832 }, { "epoch": 0.21, "learning_rate": 2.3962442423526634e-07, "logits/chosen": -2.2598514556884766, "logits/rejected": -2.7316391468048096, "logps/chosen": -260.2677917480469, "logps/rejected": -295.92901611328125, "loss": 0.8556, "rewards/accuracies": 0.625, "rewards/chosen": -1.7733219861984253, "rewards/margins": 1.5683618783950806, "rewards/rejected": -3.3416836261749268, "step": 1833 }, { "epoch": 0.21, "learning_rate": 2.3958899255934804e-07, "logits/chosen": -1.938433051109314, "logits/rejected": -2.050424575805664, "logps/chosen": -267.85443115234375, "logps/rejected": -219.1025390625, "loss": 0.3592, "rewards/accuracies": 0.875, "rewards/chosen": -0.6399181485176086, "rewards/margins": 1.650614857673645, "rewards/rejected": -2.2905330657958984, "step": 1834 }, { "epoch": 0.21, "learning_rate": 2.395535608834298e-07, "logits/chosen": -2.028898239135742, "logits/rejected": -2.1120588779449463, "logps/chosen": -286.2508544921875, "logps/rejected": -265.2257385253906, "loss": 0.4171, "rewards/accuracies": 0.875, "rewards/chosen": -0.8999100923538208, "rewards/margins": 1.5799853801727295, "rewards/rejected": -2.4798953533172607, "step": 1835 }, { "epoch": 0.21, "learning_rate": 2.395181292075115e-07, "logits/chosen": -2.1489248275756836, "logits/rejected": -2.2173194885253906, "logps/chosen": -399.3984375, "logps/rejected": -279.0289306640625, "loss": 0.7069, "rewards/accuracies": 0.75, "rewards/chosen": -0.9595643281936646, "rewards/margins": 0.9332965016365051, "rewards/rejected": -1.892860770225525, "step": 1836 }, { "epoch": 0.21, "learning_rate": 2.3948269753159323e-07, "logits/chosen": -1.767270803451538, "logits/rejected": -1.9848899841308594, "logps/chosen": -334.19915771484375, "logps/rejected": -292.3046569824219, "loss": 0.633, "rewards/accuracies": 0.75, "rewards/chosen": -0.3692512512207031, "rewards/margins": 0.6491051912307739, "rewards/rejected": -1.0183563232421875, "step": 1837 }, { "epoch": 0.21, "learning_rate": 2.3944726585567493e-07, "logits/chosen": -2.1437666416168213, "logits/rejected": -2.232595443725586, "logps/chosen": -316.97711181640625, "logps/rejected": -200.85821533203125, "loss": 0.3645, "rewards/accuracies": 0.875, "rewards/chosen": -0.3444887697696686, "rewards/margins": 2.1315860748291016, "rewards/rejected": -2.476074457168579, "step": 1838 }, { "epoch": 0.21, "learning_rate": 2.394118341797567e-07, "logits/chosen": -2.4141809940338135, "logits/rejected": -2.471381902694702, "logps/chosen": -327.2308044433594, "logps/rejected": -280.989013671875, "loss": 0.3944, "rewards/accuracies": 0.75, "rewards/chosen": -0.9609078168869019, "rewards/margins": 1.5989973545074463, "rewards/rejected": -2.5599050521850586, "step": 1839 }, { "epoch": 0.21, "learning_rate": 2.393764025038384e-07, "logits/chosen": -1.2056314945220947, "logits/rejected": -2.028480291366577, "logps/chosen": -510.6068420410156, "logps/rejected": -229.74786376953125, "loss": 0.58, "rewards/accuracies": 0.75, "rewards/chosen": -0.6691687703132629, "rewards/margins": 0.607108473777771, "rewards/rejected": -1.2762773036956787, "step": 1840 }, { "epoch": 0.21, "learning_rate": 2.393409708279202e-07, "logits/chosen": -2.1948962211608887, "logits/rejected": -2.531216621398926, "logps/chosen": -237.39706420898438, "logps/rejected": -186.1221160888672, "loss": 1.0606, "rewards/accuracies": 0.375, "rewards/chosen": -1.315165400505066, "rewards/margins": -0.23163765668869019, "rewards/rejected": -1.0835278034210205, "step": 1841 }, { "epoch": 0.21, "learning_rate": 2.3930553915200187e-07, "logits/chosen": -2.106248378753662, "logits/rejected": -2.3820412158966064, "logps/chosen": -398.3311767578125, "logps/rejected": -197.82061767578125, "loss": 0.1997, "rewards/accuracies": 1.0, "rewards/chosen": -0.46229487657546997, "rewards/margins": 2.288001775741577, "rewards/rejected": -2.7502965927124023, "step": 1842 }, { "epoch": 0.21, "learning_rate": 2.392701074760836e-07, "logits/chosen": -2.2125391960144043, "logits/rejected": -2.2547972202301025, "logps/chosen": -284.9970703125, "logps/rejected": -291.548828125, "loss": 0.869, "rewards/accuracies": 0.625, "rewards/chosen": -1.1225461959838867, "rewards/margins": 0.5770548582077026, "rewards/rejected": -1.6996010541915894, "step": 1843 }, { "epoch": 0.21, "learning_rate": 2.3923467580016537e-07, "logits/chosen": -2.118744134902954, "logits/rejected": -1.8897547721862793, "logps/chosen": -219.31349182128906, "logps/rejected": -196.26116943359375, "loss": 0.476, "rewards/accuracies": 0.875, "rewards/chosen": -0.7955001592636108, "rewards/margins": 1.586808681488037, "rewards/rejected": -2.3823089599609375, "step": 1844 }, { "epoch": 0.21, "learning_rate": 2.3919924412424706e-07, "logits/chosen": -2.5527396202087402, "logits/rejected": -2.2266621589660645, "logps/chosen": -120.480224609375, "logps/rejected": -310.6474304199219, "loss": 0.268, "rewards/accuracies": 1.0, "rewards/chosen": -0.1293451189994812, "rewards/margins": 2.0820906162261963, "rewards/rejected": -2.2114357948303223, "step": 1845 }, { "epoch": 0.21, "learning_rate": 2.391638124483288e-07, "logits/chosen": -2.293996572494507, "logits/rejected": -2.3616414070129395, "logps/chosen": -388.346435546875, "logps/rejected": -233.2850341796875, "loss": 1.168, "rewards/accuracies": 0.5, "rewards/chosen": -1.548737645149231, "rewards/margins": -0.4776061773300171, "rewards/rejected": -1.0711315870285034, "step": 1846 }, { "epoch": 0.21, "learning_rate": 2.391283807724105e-07, "logits/chosen": -1.8465558290481567, "logits/rejected": -1.9513492584228516, "logps/chosen": -404.521728515625, "logps/rejected": -302.3456115722656, "loss": 0.2203, "rewards/accuracies": 0.875, "rewards/chosen": -0.06522838771343231, "rewards/margins": 2.3514344692230225, "rewards/rejected": -2.4166626930236816, "step": 1847 }, { "epoch": 0.21, "learning_rate": 2.3909294909649226e-07, "logits/chosen": -1.9971561431884766, "logits/rejected": -2.1445775032043457, "logps/chosen": -195.35919189453125, "logps/rejected": -338.66632080078125, "loss": 0.1913, "rewards/accuracies": 1.0, "rewards/chosen": -0.15782007575035095, "rewards/margins": 2.248278856277466, "rewards/rejected": -2.4060990810394287, "step": 1848 }, { "epoch": 0.22, "learning_rate": 2.3905751742057395e-07, "logits/chosen": -2.793036460876465, "logits/rejected": -2.6389265060424805, "logps/chosen": -196.2069091796875, "logps/rejected": -215.1942901611328, "loss": 0.6496, "rewards/accuracies": 0.875, "rewards/chosen": -1.0071046352386475, "rewards/margins": 1.3775696754455566, "rewards/rejected": -2.384674310684204, "step": 1849 }, { "epoch": 0.22, "learning_rate": 2.390220857446557e-07, "logits/chosen": -2.6398863792419434, "logits/rejected": -2.6811814308166504, "logps/chosen": -220.59967041015625, "logps/rejected": -232.10797119140625, "loss": 0.1671, "rewards/accuracies": 1.0, "rewards/chosen": -0.11532637476921082, "rewards/margins": 3.08892822265625, "rewards/rejected": -3.204254627227783, "step": 1850 }, { "epoch": 0.22, "learning_rate": 2.3898665406873745e-07, "logits/chosen": -2.6183342933654785, "logits/rejected": -2.753331184387207, "logps/chosen": -242.7398223876953, "logps/rejected": -194.97073364257812, "loss": 0.6219, "rewards/accuracies": 0.5, "rewards/chosen": -0.981342613697052, "rewards/margins": 1.2197438478469849, "rewards/rejected": -2.2010862827301025, "step": 1851 }, { "epoch": 0.22, "learning_rate": 2.389512223928192e-07, "logits/chosen": -2.4667465686798096, "logits/rejected": -2.49292254447937, "logps/chosen": -267.1186218261719, "logps/rejected": -229.34974670410156, "loss": 0.2449, "rewards/accuracies": 0.875, "rewards/chosen": -0.4146808683872223, "rewards/margins": 2.0976667404174805, "rewards/rejected": -2.512347459793091, "step": 1852 }, { "epoch": 0.22, "learning_rate": 2.389157907169009e-07, "logits/chosen": -2.541045904159546, "logits/rejected": -2.487985372543335, "logps/chosen": -357.8911437988281, "logps/rejected": -431.518310546875, "loss": 0.3187, "rewards/accuracies": 0.875, "rewards/chosen": -0.9771466851234436, "rewards/margins": 2.05190372467041, "rewards/rejected": -3.029050350189209, "step": 1853 }, { "epoch": 0.22, "learning_rate": 2.3888035904098264e-07, "logits/chosen": -1.7816026210784912, "logits/rejected": -1.9180233478546143, "logps/chosen": -382.764404296875, "logps/rejected": -357.87701416015625, "loss": 0.4381, "rewards/accuracies": 0.75, "rewards/chosen": -1.013568639755249, "rewards/margins": 1.7097814083099365, "rewards/rejected": -2.7233500480651855, "step": 1854 }, { "epoch": 0.22, "learning_rate": 2.388449273650644e-07, "logits/chosen": -2.483443021774292, "logits/rejected": -2.617896556854248, "logps/chosen": -152.0458526611328, "logps/rejected": -156.21456909179688, "loss": 0.5595, "rewards/accuracies": 0.75, "rewards/chosen": -0.6136979460716248, "rewards/margins": 0.8004008531570435, "rewards/rejected": -1.4140987396240234, "step": 1855 }, { "epoch": 0.22, "learning_rate": 2.388094956891461e-07, "logits/chosen": -2.2962586879730225, "logits/rejected": -2.2386789321899414, "logps/chosen": -232.10865783691406, "logps/rejected": -269.0420837402344, "loss": 0.5778, "rewards/accuracies": 0.75, "rewards/chosen": -1.3749765157699585, "rewards/margins": 1.1745188236236572, "rewards/rejected": -2.5494954586029053, "step": 1856 }, { "epoch": 0.22, "learning_rate": 2.3877406401322783e-07, "logits/chosen": -2.349285364151001, "logits/rejected": -2.4561026096343994, "logps/chosen": -248.56243896484375, "logps/rejected": -312.0973815917969, "loss": 0.3199, "rewards/accuracies": 0.75, "rewards/chosen": -0.6428518891334534, "rewards/margins": 2.0816967487335205, "rewards/rejected": -2.724548578262329, "step": 1857 }, { "epoch": 0.22, "learning_rate": 2.3873863233730953e-07, "logits/chosen": -2.654658079147339, "logits/rejected": -2.70839262008667, "logps/chosen": -263.2142639160156, "logps/rejected": -232.64837646484375, "loss": 0.3819, "rewards/accuracies": 0.875, "rewards/chosen": -0.24215373396873474, "rewards/margins": 2.618910074234009, "rewards/rejected": -2.8610637187957764, "step": 1858 }, { "epoch": 0.22, "learning_rate": 2.387032006613913e-07, "logits/chosen": -2.1176605224609375, "logits/rejected": -1.8720145225524902, "logps/chosen": -293.8715515136719, "logps/rejected": -356.38323974609375, "loss": 0.7794, "rewards/accuracies": 0.625, "rewards/chosen": -1.1942709684371948, "rewards/margins": 0.8505164384841919, "rewards/rejected": -2.0447874069213867, "step": 1859 }, { "epoch": 0.22, "learning_rate": 2.38667768985473e-07, "logits/chosen": -2.1390013694763184, "logits/rejected": -2.161271810531616, "logps/chosen": -243.033935546875, "logps/rejected": -226.15463256835938, "loss": 0.5114, "rewards/accuracies": 0.75, "rewards/chosen": -1.1890732049942017, "rewards/margins": 2.2810869216918945, "rewards/rejected": -3.4701602458953857, "step": 1860 }, { "epoch": 0.22, "learning_rate": 2.386323373095547e-07, "logits/chosen": -2.2839748859405518, "logits/rejected": -2.6485610008239746, "logps/chosen": -315.6589050292969, "logps/rejected": -172.8271026611328, "loss": 0.3502, "rewards/accuracies": 0.75, "rewards/chosen": -0.37328648567199707, "rewards/margins": 1.629736304283142, "rewards/rejected": -2.0030229091644287, "step": 1861 }, { "epoch": 0.22, "learning_rate": 2.3859690563363647e-07, "logits/chosen": -2.7193374633789062, "logits/rejected": -2.3855364322662354, "logps/chosen": -223.84414672851562, "logps/rejected": -304.731201171875, "loss": 0.1508, "rewards/accuracies": 1.0, "rewards/chosen": -0.285675048828125, "rewards/margins": 2.962185859680176, "rewards/rejected": -3.24786114692688, "step": 1862 }, { "epoch": 0.22, "learning_rate": 2.3856147395771817e-07, "logits/chosen": -1.8834855556488037, "logits/rejected": -2.037998914718628, "logps/chosen": -445.2252502441406, "logps/rejected": -393.1165771484375, "loss": 0.4185, "rewards/accuracies": 0.75, "rewards/chosen": -0.8777363300323486, "rewards/margins": 2.065230369567871, "rewards/rejected": -2.942966938018799, "step": 1863 }, { "epoch": 0.22, "learning_rate": 2.385260422817999e-07, "logits/chosen": -2.3141584396362305, "logits/rejected": -2.650653600692749, "logps/chosen": -348.17987060546875, "logps/rejected": -212.69046020507812, "loss": 1.9008, "rewards/accuracies": 0.5, "rewards/chosen": -2.8680331707000732, "rewards/margins": -0.04981809854507446, "rewards/rejected": -2.8182151317596436, "step": 1864 }, { "epoch": 0.22, "learning_rate": 2.3849061060588166e-07, "logits/chosen": -2.130000114440918, "logits/rejected": -2.181450366973877, "logps/chosen": -441.27691650390625, "logps/rejected": -479.3074951171875, "loss": 0.1648, "rewards/accuracies": 0.875, "rewards/chosen": -1.0422428846359253, "rewards/margins": 3.426182746887207, "rewards/rejected": -4.468425750732422, "step": 1865 }, { "epoch": 0.22, "learning_rate": 2.3845517892996336e-07, "logits/chosen": -3.0332584381103516, "logits/rejected": -2.930889129638672, "logps/chosen": -331.23699951171875, "logps/rejected": -201.7147216796875, "loss": 0.9153, "rewards/accuracies": 0.5, "rewards/chosen": -1.2843034267425537, "rewards/margins": 0.42358124256134033, "rewards/rejected": -1.7078847885131836, "step": 1866 }, { "epoch": 0.22, "learning_rate": 2.384197472540451e-07, "logits/chosen": -2.251643419265747, "logits/rejected": -2.4208426475524902, "logps/chosen": -249.50155639648438, "logps/rejected": -276.2348327636719, "loss": 0.2441, "rewards/accuracies": 0.875, "rewards/chosen": -1.2371821403503418, "rewards/margins": 1.7915068864822388, "rewards/rejected": -3.028688907623291, "step": 1867 }, { "epoch": 0.22, "learning_rate": 2.3838431557812683e-07, "logits/chosen": -2.7052016258239746, "logits/rejected": -2.7283475399017334, "logps/chosen": -279.57769775390625, "logps/rejected": -117.5062026977539, "loss": 0.375, "rewards/accuracies": 0.75, "rewards/chosen": -0.7914484739303589, "rewards/margins": 1.1875109672546387, "rewards/rejected": -1.978959560394287, "step": 1868 }, { "epoch": 0.22, "learning_rate": 2.3834888390220855e-07, "logits/chosen": -2.074162244796753, "logits/rejected": -2.1329944133758545, "logps/chosen": -326.43621826171875, "logps/rejected": -305.2041015625, "loss": 0.2498, "rewards/accuracies": 0.875, "rewards/chosen": -0.7786694765090942, "rewards/margins": 2.6644654273986816, "rewards/rejected": -3.4431350231170654, "step": 1869 }, { "epoch": 0.22, "learning_rate": 2.383134522262903e-07, "logits/chosen": -2.163203716278076, "logits/rejected": -2.1183652877807617, "logps/chosen": -229.25927734375, "logps/rejected": -267.69451904296875, "loss": 0.7225, "rewards/accuracies": 0.75, "rewards/chosen": -0.9101919531822205, "rewards/margins": 1.0462372303009033, "rewards/rejected": -1.956429362297058, "step": 1870 }, { "epoch": 0.22, "learning_rate": 2.3827802055037202e-07, "logits/chosen": -2.105525493621826, "logits/rejected": -2.397615909576416, "logps/chosen": -223.27398681640625, "logps/rejected": -142.81784057617188, "loss": 0.4884, "rewards/accuracies": 0.75, "rewards/chosen": -0.5663559436798096, "rewards/margins": 0.8729592561721802, "rewards/rejected": -1.4393153190612793, "step": 1871 }, { "epoch": 0.22, "learning_rate": 2.3824258887445375e-07, "logits/chosen": -2.2896339893341064, "logits/rejected": -2.2466232776641846, "logps/chosen": -267.2420349121094, "logps/rejected": -310.07586669921875, "loss": 0.3021, "rewards/accuracies": 0.875, "rewards/chosen": -0.3726680278778076, "rewards/margins": 2.13045597076416, "rewards/rejected": -2.5031239986419678, "step": 1872 }, { "epoch": 0.22, "learning_rate": 2.3820715719853547e-07, "logits/chosen": -2.0047855377197266, "logits/rejected": -2.0819265842437744, "logps/chosen": -312.0521240234375, "logps/rejected": -314.2274169921875, "loss": 0.2373, "rewards/accuracies": 0.875, "rewards/chosen": -0.35509783029556274, "rewards/margins": 2.9787514209747314, "rewards/rejected": -3.3338494300842285, "step": 1873 }, { "epoch": 0.22, "learning_rate": 2.381717255226172e-07, "logits/chosen": -2.528146982192993, "logits/rejected": -2.373764753341675, "logps/chosen": -254.23388671875, "logps/rejected": -434.03778076171875, "loss": 0.2513, "rewards/accuracies": 0.875, "rewards/chosen": -1.0806447267532349, "rewards/margins": 2.702732801437378, "rewards/rejected": -3.7833776473999023, "step": 1874 }, { "epoch": 0.22, "learning_rate": 2.3813629384669896e-07, "logits/chosen": -2.2492728233337402, "logits/rejected": -2.5492730140686035, "logps/chosen": -287.4822692871094, "logps/rejected": -241.01199340820312, "loss": 1.2629, "rewards/accuracies": 0.75, "rewards/chosen": -1.9821091890335083, "rewards/margins": 1.0684847831726074, "rewards/rejected": -3.050593852996826, "step": 1875 }, { "epoch": 0.22, "learning_rate": 2.3810086217078069e-07, "logits/chosen": -1.9452821016311646, "logits/rejected": -1.8459829092025757, "logps/chosen": -541.5878295898438, "logps/rejected": -586.473876953125, "loss": 0.5774, "rewards/accuracies": 0.875, "rewards/chosen": -0.985153079032898, "rewards/margins": 1.1230075359344482, "rewards/rejected": -2.1081604957580566, "step": 1876 }, { "epoch": 0.22, "learning_rate": 2.380654304948624e-07, "logits/chosen": -1.9148962497711182, "logits/rejected": -2.084139823913574, "logps/chosen": -462.7070617675781, "logps/rejected": -379.54718017578125, "loss": 0.523, "rewards/accuracies": 0.75, "rewards/chosen": -0.6314287781715393, "rewards/margins": 1.5687813758850098, "rewards/rejected": -2.2002103328704834, "step": 1877 }, { "epoch": 0.22, "learning_rate": 2.3802999881894413e-07, "logits/chosen": -2.085134506225586, "logits/rejected": -2.197228193283081, "logps/chosen": -497.1221008300781, "logps/rejected": -362.1059875488281, "loss": 0.353, "rewards/accuracies": 0.875, "rewards/chosen": -0.8373152613639832, "rewards/margins": 1.9410117864608765, "rewards/rejected": -2.778326988220215, "step": 1878 }, { "epoch": 0.22, "learning_rate": 2.3799456714302585e-07, "logits/chosen": -1.9802736043930054, "logits/rejected": -2.198188304901123, "logps/chosen": -217.31658935546875, "logps/rejected": -150.99002075195312, "loss": 0.6217, "rewards/accuracies": 0.5, "rewards/chosen": -1.6188395023345947, "rewards/margins": 0.4184085726737976, "rewards/rejected": -2.037248134613037, "step": 1879 }, { "epoch": 0.22, "learning_rate": 2.3795913546710758e-07, "logits/chosen": -2.2546474933624268, "logits/rejected": -2.3717594146728516, "logps/chosen": -176.18020629882812, "logps/rejected": -300.4624328613281, "loss": 0.4854, "rewards/accuracies": 0.625, "rewards/chosen": -0.41547009348869324, "rewards/margins": 0.9045188426971436, "rewards/rejected": -1.3199889659881592, "step": 1880 }, { "epoch": 0.22, "learning_rate": 2.3792370379118932e-07, "logits/chosen": -2.157809257507324, "logits/rejected": -2.3635897636413574, "logps/chosen": -298.9660339355469, "logps/rejected": -222.71099853515625, "loss": 0.2591, "rewards/accuracies": 0.875, "rewards/chosen": -0.5718898773193359, "rewards/margins": 2.52756667137146, "rewards/rejected": -3.099456548690796, "step": 1881 }, { "epoch": 0.22, "learning_rate": 2.3788827211527105e-07, "logits/chosen": -2.6111104488372803, "logits/rejected": -2.764491081237793, "logps/chosen": -316.73480224609375, "logps/rejected": -301.009521484375, "loss": 0.2237, "rewards/accuracies": 1.0, "rewards/chosen": -0.7221324443817139, "rewards/margins": 2.25185227394104, "rewards/rejected": -2.973984718322754, "step": 1882 }, { "epoch": 0.22, "learning_rate": 2.3785284043935277e-07, "logits/chosen": -2.3831427097320557, "logits/rejected": -2.3915817737579346, "logps/chosen": -441.51751708984375, "logps/rejected": -417.6058044433594, "loss": 0.4829, "rewards/accuracies": 0.75, "rewards/chosen": -0.7003589868545532, "rewards/margins": 0.9849268198013306, "rewards/rejected": -1.6852858066558838, "step": 1883 }, { "epoch": 0.22, "learning_rate": 2.378174087634345e-07, "logits/chosen": -2.1098759174346924, "logits/rejected": -2.2708804607391357, "logps/chosen": -359.12237548828125, "logps/rejected": -263.175537109375, "loss": 0.4785, "rewards/accuracies": 0.75, "rewards/chosen": -1.4044111967086792, "rewards/margins": 1.0543668270111084, "rewards/rejected": -2.458777904510498, "step": 1884 }, { "epoch": 0.22, "learning_rate": 2.377819770875162e-07, "logits/chosen": -2.520573377609253, "logits/rejected": -2.5162525177001953, "logps/chosen": -261.03619384765625, "logps/rejected": -226.56716918945312, "loss": 0.2973, "rewards/accuracies": 0.875, "rewards/chosen": -0.8558923602104187, "rewards/margins": 1.6590983867645264, "rewards/rejected": -2.51499080657959, "step": 1885 }, { "epoch": 0.22, "learning_rate": 2.3774654541159793e-07, "logits/chosen": -2.594071388244629, "logits/rejected": -2.5104005336761475, "logps/chosen": -165.8953094482422, "logps/rejected": -121.86151885986328, "loss": 0.5706, "rewards/accuracies": 0.625, "rewards/chosen": -0.8685418963432312, "rewards/margins": 1.0428014993667603, "rewards/rejected": -1.9113434553146362, "step": 1886 }, { "epoch": 0.22, "learning_rate": 2.377111137356797e-07, "logits/chosen": -2.0869667530059814, "logits/rejected": -1.713941216468811, "logps/chosen": -218.2683868408203, "logps/rejected": -353.7004699707031, "loss": 0.5165, "rewards/accuracies": 0.75, "rewards/chosen": -0.7024774551391602, "rewards/margins": 1.7456401586532593, "rewards/rejected": -2.448117733001709, "step": 1887 }, { "epoch": 0.22, "learning_rate": 2.3767568205976143e-07, "logits/chosen": -2.164726972579956, "logits/rejected": -2.13382625579834, "logps/chosen": -496.8125915527344, "logps/rejected": -539.302978515625, "loss": 0.3261, "rewards/accuracies": 0.75, "rewards/chosen": -0.7606878280639648, "rewards/margins": 1.917716145515442, "rewards/rejected": -2.678403854370117, "step": 1888 }, { "epoch": 0.22, "learning_rate": 2.3764025038384315e-07, "logits/chosen": -2.454073429107666, "logits/rejected": -2.3721110820770264, "logps/chosen": -435.46527099609375, "logps/rejected": -462.8751220703125, "loss": 0.6519, "rewards/accuracies": 0.5, "rewards/chosen": -0.3418333828449249, "rewards/margins": 1.033921718597412, "rewards/rejected": -1.375754952430725, "step": 1889 }, { "epoch": 0.22, "learning_rate": 2.3760481870792488e-07, "logits/chosen": -2.7343053817749023, "logits/rejected": -2.5915627479553223, "logps/chosen": -232.17544555664062, "logps/rejected": -186.90301513671875, "loss": 0.5301, "rewards/accuracies": 0.75, "rewards/chosen": -1.1238192319869995, "rewards/margins": 0.8672865629196167, "rewards/rejected": -1.9911059141159058, "step": 1890 }, { "epoch": 0.22, "learning_rate": 2.375693870320066e-07, "logits/chosen": -2.536543607711792, "logits/rejected": -2.6756515502929688, "logps/chosen": -148.28323364257812, "logps/rejected": -197.31802368164062, "loss": 0.5705, "rewards/accuracies": 0.75, "rewards/chosen": -0.7416296601295471, "rewards/margins": 1.5229395627975464, "rewards/rejected": -2.2645692825317383, "step": 1891 }, { "epoch": 0.22, "learning_rate": 2.3753395535608835e-07, "logits/chosen": -2.717271089553833, "logits/rejected": -2.8955729007720947, "logps/chosen": -217.17567443847656, "logps/rejected": -211.36114501953125, "loss": 0.1937, "rewards/accuracies": 1.0, "rewards/chosen": -0.34070247411727905, "rewards/margins": 2.527327537536621, "rewards/rejected": -2.868029832839966, "step": 1892 }, { "epoch": 0.22, "learning_rate": 2.3749852368017007e-07, "logits/chosen": -2.5037477016448975, "logits/rejected": -2.6694488525390625, "logps/chosen": -273.09356689453125, "logps/rejected": -174.6703643798828, "loss": 0.2878, "rewards/accuracies": 0.875, "rewards/chosen": 0.14729967713356018, "rewards/margins": 1.5316331386566162, "rewards/rejected": -1.3843333721160889, "step": 1893 }, { "epoch": 0.22, "learning_rate": 2.374630920042518e-07, "logits/chosen": -2.7910709381103516, "logits/rejected": -2.6893210411071777, "logps/chosen": -128.33018493652344, "logps/rejected": -184.21444702148438, "loss": 1.5366, "rewards/accuracies": 0.25, "rewards/chosen": -1.9125924110412598, "rewards/margins": -0.40597808361053467, "rewards/rejected": -1.5066144466400146, "step": 1894 }, { "epoch": 0.22, "learning_rate": 2.374276603283335e-07, "logits/chosen": -2.7422919273376465, "logits/rejected": -2.6861977577209473, "logps/chosen": -187.099609375, "logps/rejected": -196.91485595703125, "loss": 0.2518, "rewards/accuracies": 0.875, "rewards/chosen": -0.6197161078453064, "rewards/margins": 1.7339483499526978, "rewards/rejected": -2.3536643981933594, "step": 1895 }, { "epoch": 0.22, "learning_rate": 2.3739222865241524e-07, "logits/chosen": -2.1273694038391113, "logits/rejected": -2.6363954544067383, "logps/chosen": -436.3154602050781, "logps/rejected": -273.14404296875, "loss": 0.2662, "rewards/accuracies": 0.875, "rewards/chosen": -0.7139080762863159, "rewards/margins": 2.351698398590088, "rewards/rejected": -3.0656065940856934, "step": 1896 }, { "epoch": 0.22, "learning_rate": 2.3735679697649696e-07, "logits/chosen": -1.9425008296966553, "logits/rejected": -2.4603288173675537, "logps/chosen": -532.032470703125, "logps/rejected": -298.94549560546875, "loss": 0.5571, "rewards/accuracies": 0.625, "rewards/chosen": -0.990544855594635, "rewards/margins": 1.7373557090759277, "rewards/rejected": -2.727900505065918, "step": 1897 }, { "epoch": 0.22, "learning_rate": 2.3732136530057868e-07, "logits/chosen": -2.8450167179107666, "logits/rejected": -2.76806902885437, "logps/chosen": -231.7842559814453, "logps/rejected": -215.12518310546875, "loss": 0.2112, "rewards/accuracies": 1.0, "rewards/chosen": -0.19720341265201569, "rewards/margins": 1.9906363487243652, "rewards/rejected": -2.1878397464752197, "step": 1898 }, { "epoch": 0.22, "learning_rate": 2.3728593362466045e-07, "logits/chosen": -2.5170092582702637, "logits/rejected": -2.3557422161102295, "logps/chosen": -190.64340209960938, "logps/rejected": -473.658203125, "loss": 0.2103, "rewards/accuracies": 0.875, "rewards/chosen": -0.32688775658607483, "rewards/margins": 2.3585994243621826, "rewards/rejected": -2.6854870319366455, "step": 1899 }, { "epoch": 0.22, "learning_rate": 2.3725050194874218e-07, "logits/chosen": -2.546168327331543, "logits/rejected": -2.6860127449035645, "logps/chosen": -323.1658020019531, "logps/rejected": -319.9196472167969, "loss": 0.355, "rewards/accuracies": 0.875, "rewards/chosen": -0.6752281188964844, "rewards/margins": 2.7249855995178223, "rewards/rejected": -3.4002134799957275, "step": 1900 }, { "epoch": 0.22, "learning_rate": 2.372150702728239e-07, "logits/chosen": -2.2880942821502686, "logits/rejected": -2.0898585319519043, "logps/chosen": -347.9389343261719, "logps/rejected": -303.85595703125, "loss": 0.2869, "rewards/accuracies": 0.875, "rewards/chosen": -0.48539644479751587, "rewards/margins": 3.4133224487304688, "rewards/rejected": -3.89871883392334, "step": 1901 }, { "epoch": 0.22, "learning_rate": 2.3717963859690562e-07, "logits/chosen": -1.71338951587677, "logits/rejected": -1.8896095752716064, "logps/chosen": -401.1963195800781, "logps/rejected": -244.762939453125, "loss": 0.6482, "rewards/accuracies": 0.625, "rewards/chosen": -0.6270143985748291, "rewards/margins": 0.48526421189308167, "rewards/rejected": -1.1122785806655884, "step": 1902 }, { "epoch": 0.22, "learning_rate": 2.3714420692098734e-07, "logits/chosen": -2.188588857650757, "logits/rejected": -2.1990718841552734, "logps/chosen": -206.90773010253906, "logps/rejected": -206.59719848632812, "loss": 0.3158, "rewards/accuracies": 1.0, "rewards/chosen": -0.5991261005401611, "rewards/margins": 1.3950114250183105, "rewards/rejected": -1.9941375255584717, "step": 1903 }, { "epoch": 0.22, "learning_rate": 2.371087752450691e-07, "logits/chosen": -2.695085048675537, "logits/rejected": -2.350358486175537, "logps/chosen": -246.888916015625, "logps/rejected": -285.4039306640625, "loss": 0.4253, "rewards/accuracies": 0.875, "rewards/chosen": -0.49656784534454346, "rewards/margins": 2.3885788917541504, "rewards/rejected": -2.8851468563079834, "step": 1904 }, { "epoch": 0.22, "learning_rate": 2.3707334356915081e-07, "logits/chosen": -2.0312933921813965, "logits/rejected": -2.07850980758667, "logps/chosen": -317.5693359375, "logps/rejected": -287.5160217285156, "loss": 0.3495, "rewards/accuracies": 0.75, "rewards/chosen": -0.30298808217048645, "rewards/margins": 1.3230944871902466, "rewards/rejected": -1.6260825395584106, "step": 1905 }, { "epoch": 0.22, "learning_rate": 2.3703791189323254e-07, "logits/chosen": -2.5914878845214844, "logits/rejected": -2.727274179458618, "logps/chosen": -302.0438537597656, "logps/rejected": -407.56207275390625, "loss": 0.1321, "rewards/accuracies": 0.875, "rewards/chosen": -0.5776203870773315, "rewards/margins": 4.692588806152344, "rewards/rejected": -5.270208835601807, "step": 1906 }, { "epoch": 0.22, "learning_rate": 2.3700248021731426e-07, "logits/chosen": -2.3409159183502197, "logits/rejected": -2.3960700035095215, "logps/chosen": -288.47283935546875, "logps/rejected": -378.9532775878906, "loss": 1.0877, "rewards/accuracies": 0.5, "rewards/chosen": -1.1508336067199707, "rewards/margins": 0.8084985613822937, "rewards/rejected": -1.9593323469161987, "step": 1907 }, { "epoch": 0.22, "learning_rate": 2.3696704854139598e-07, "logits/chosen": -2.4041659832000732, "logits/rejected": -2.312610149383545, "logps/chosen": -219.34585571289062, "logps/rejected": -146.05990600585938, "loss": 0.5602, "rewards/accuracies": 0.625, "rewards/chosen": -0.6781108975410461, "rewards/margins": 1.097469687461853, "rewards/rejected": -1.7755805253982544, "step": 1908 }, { "epoch": 0.22, "learning_rate": 2.369316168654777e-07, "logits/chosen": -2.5388309955596924, "logits/rejected": -2.544203281402588, "logps/chosen": -344.69866943359375, "logps/rejected": -198.77029418945312, "loss": 0.565, "rewards/accuracies": 0.625, "rewards/chosen": -1.3352962732315063, "rewards/margins": 1.5767372846603394, "rewards/rejected": -2.9120335578918457, "step": 1909 }, { "epoch": 0.22, "learning_rate": 2.3689618518955948e-07, "logits/chosen": -2.34572696685791, "logits/rejected": -1.9676403999328613, "logps/chosen": -203.9527587890625, "logps/rejected": -292.52044677734375, "loss": 0.4774, "rewards/accuracies": 0.75, "rewards/chosen": -0.7269752621650696, "rewards/margins": 1.6519368886947632, "rewards/rejected": -2.3789122104644775, "step": 1910 }, { "epoch": 0.22, "learning_rate": 2.368607535136412e-07, "logits/chosen": -3.0167646408081055, "logits/rejected": -2.9568276405334473, "logps/chosen": -166.76547241210938, "logps/rejected": -158.6016082763672, "loss": 0.8436, "rewards/accuracies": 0.375, "rewards/chosen": -0.8339132070541382, "rewards/margins": 1.053346872329712, "rewards/rejected": -1.88726007938385, "step": 1911 }, { "epoch": 0.22, "learning_rate": 2.3682532183772292e-07, "logits/chosen": -2.1457624435424805, "logits/rejected": -1.905735969543457, "logps/chosen": -264.6837158203125, "logps/rejected": -288.00994873046875, "loss": 0.3102, "rewards/accuracies": 0.875, "rewards/chosen": -1.8385372161865234, "rewards/margins": 1.6179676055908203, "rewards/rejected": -3.4565048217773438, "step": 1912 }, { "epoch": 0.22, "learning_rate": 2.3678989016180464e-07, "logits/chosen": -2.1915013790130615, "logits/rejected": -1.9362857341766357, "logps/chosen": -294.81787109375, "logps/rejected": -371.5819091796875, "loss": 0.2749, "rewards/accuracies": 1.0, "rewards/chosen": -0.7099554538726807, "rewards/margins": 1.3402166366577148, "rewards/rejected": -2.0501720905303955, "step": 1913 }, { "epoch": 0.22, "learning_rate": 2.3675445848588637e-07, "logits/chosen": -2.6588778495788574, "logits/rejected": -2.423527479171753, "logps/chosen": -82.45245361328125, "logps/rejected": -239.75526428222656, "loss": 0.1992, "rewards/accuracies": 1.0, "rewards/chosen": -0.029128743335604668, "rewards/margins": 2.595555543899536, "rewards/rejected": -2.6246840953826904, "step": 1914 }, { "epoch": 0.22, "learning_rate": 2.3671902680996811e-07, "logits/chosen": -2.0048234462738037, "logits/rejected": -2.0521535873413086, "logps/chosen": -333.2558898925781, "logps/rejected": -279.37567138671875, "loss": 0.1844, "rewards/accuracies": 1.0, "rewards/chosen": -0.2931111454963684, "rewards/margins": 2.1779022216796875, "rewards/rejected": -2.471013069152832, "step": 1915 }, { "epoch": 0.22, "learning_rate": 2.3668359513404984e-07, "logits/chosen": -2.118030548095703, "logits/rejected": -2.0117831230163574, "logps/chosen": -345.56512451171875, "logps/rejected": -250.82711791992188, "loss": 0.5729, "rewards/accuracies": 0.625, "rewards/chosen": -0.46400970220565796, "rewards/margins": 1.0468201637268066, "rewards/rejected": -1.5108299255371094, "step": 1916 }, { "epoch": 0.22, "learning_rate": 2.3664816345813156e-07, "logits/chosen": -2.4285521507263184, "logits/rejected": -2.483677864074707, "logps/chosen": -450.41802978515625, "logps/rejected": -507.4884033203125, "loss": 0.5013, "rewards/accuracies": 0.75, "rewards/chosen": -1.101522445678711, "rewards/margins": 1.6162968873977661, "rewards/rejected": -2.7178192138671875, "step": 1917 }, { "epoch": 0.22, "learning_rate": 2.3661273178221328e-07, "logits/chosen": -2.5896239280700684, "logits/rejected": -2.7026164531707764, "logps/chosen": -334.0228271484375, "logps/rejected": -293.3704833984375, "loss": 0.4221, "rewards/accuracies": 0.75, "rewards/chosen": -0.5501595735549927, "rewards/margins": 1.6361674070358276, "rewards/rejected": -2.1863269805908203, "step": 1918 }, { "epoch": 0.22, "learning_rate": 2.36577300106295e-07, "logits/chosen": -2.4875712394714355, "logits/rejected": -1.9761066436767578, "logps/chosen": -275.9391174316406, "logps/rejected": -327.94244384765625, "loss": 0.2748, "rewards/accuracies": 0.875, "rewards/chosen": -0.45149117708206177, "rewards/margins": 2.6281967163085938, "rewards/rejected": -3.0796875953674316, "step": 1919 }, { "epoch": 0.22, "learning_rate": 2.3654186843037672e-07, "logits/chosen": -2.7367970943450928, "logits/rejected": -2.667642593383789, "logps/chosen": -394.35650634765625, "logps/rejected": -374.9109802246094, "loss": 0.2971, "rewards/accuracies": 1.0, "rewards/chosen": -1.0935614109039307, "rewards/margins": 2.049429416656494, "rewards/rejected": -3.1429905891418457, "step": 1920 }, { "epoch": 0.22, "learning_rate": 2.3650643675445847e-07, "logits/chosen": -2.410553455352783, "logits/rejected": -2.764514923095703, "logps/chosen": -349.839111328125, "logps/rejected": -339.2150573730469, "loss": 0.4616, "rewards/accuracies": 0.75, "rewards/chosen": -0.505952000617981, "rewards/margins": 1.3651684522628784, "rewards/rejected": -1.8711204528808594, "step": 1921 }, { "epoch": 0.22, "learning_rate": 2.3647100507854022e-07, "logits/chosen": -2.1740853786468506, "logits/rejected": -2.0532174110412598, "logps/chosen": -203.07275390625, "logps/rejected": -221.76226806640625, "loss": 0.2656, "rewards/accuracies": 1.0, "rewards/chosen": -0.39534902572631836, "rewards/margins": 1.7812209129333496, "rewards/rejected": -2.176569938659668, "step": 1922 }, { "epoch": 0.22, "learning_rate": 2.3643557340262194e-07, "logits/chosen": -2.423549175262451, "logits/rejected": -2.443378448486328, "logps/chosen": -266.1633605957031, "logps/rejected": -336.642578125, "loss": 0.5955, "rewards/accuracies": 0.75, "rewards/chosen": -0.6327597498893738, "rewards/margins": 1.6455696821212769, "rewards/rejected": -2.278329372406006, "step": 1923 }, { "epoch": 0.22, "learning_rate": 2.3640014172670367e-07, "logits/chosen": -2.4912524223327637, "logits/rejected": -2.5601677894592285, "logps/chosen": -287.7222900390625, "logps/rejected": -179.62554931640625, "loss": 0.436, "rewards/accuracies": 0.75, "rewards/chosen": -0.5092442035675049, "rewards/margins": 1.5516278743743896, "rewards/rejected": -2.0608720779418945, "step": 1924 }, { "epoch": 0.22, "learning_rate": 2.363647100507854e-07, "logits/chosen": -2.261140823364258, "logits/rejected": -2.7423696517944336, "logps/chosen": -230.692138671875, "logps/rejected": -211.391845703125, "loss": 0.6102, "rewards/accuracies": 0.75, "rewards/chosen": -1.1278530359268188, "rewards/margins": 1.3393990993499756, "rewards/rejected": -2.467252016067505, "step": 1925 }, { "epoch": 0.22, "learning_rate": 2.3632927837486714e-07, "logits/chosen": -2.661656618118286, "logits/rejected": -2.6082255840301514, "logps/chosen": -126.98728942871094, "logps/rejected": -172.0341796875, "loss": 0.3999, "rewards/accuracies": 0.875, "rewards/chosen": -0.38309431076049805, "rewards/margins": 1.4705787897109985, "rewards/rejected": -1.8536731004714966, "step": 1926 }, { "epoch": 0.22, "learning_rate": 2.3629384669894886e-07, "logits/chosen": -2.1700048446655273, "logits/rejected": -2.533985137939453, "logps/chosen": -405.62750244140625, "logps/rejected": -283.95703125, "loss": 0.4207, "rewards/accuracies": 0.75, "rewards/chosen": -0.9974756240844727, "rewards/margins": 1.5932140350341797, "rewards/rejected": -2.5906896591186523, "step": 1927 }, { "epoch": 0.22, "learning_rate": 2.3625841502303058e-07, "logits/chosen": -2.6873040199279785, "logits/rejected": -2.136427402496338, "logps/chosen": -193.7666473388672, "logps/rejected": -244.97193908691406, "loss": 0.531, "rewards/accuracies": 0.75, "rewards/chosen": -0.8325738310813904, "rewards/margins": 2.3003652095794678, "rewards/rejected": -3.132938861846924, "step": 1928 }, { "epoch": 0.22, "learning_rate": 2.362229833471123e-07, "logits/chosen": -2.473618507385254, "logits/rejected": -2.4008960723876953, "logps/chosen": -139.99038696289062, "logps/rejected": -245.99298095703125, "loss": 0.2713, "rewards/accuracies": 0.875, "rewards/chosen": -0.4486868679523468, "rewards/margins": 2.5053346157073975, "rewards/rejected": -2.9540212154388428, "step": 1929 }, { "epoch": 0.22, "learning_rate": 2.3618755167119403e-07, "logits/chosen": -2.0137226581573486, "logits/rejected": -2.099252700805664, "logps/chosen": -379.299560546875, "logps/rejected": -202.45355224609375, "loss": 0.4251, "rewards/accuracies": 0.625, "rewards/chosen": -1.094517469406128, "rewards/margins": 1.51676344871521, "rewards/rejected": -2.611281156539917, "step": 1930 }, { "epoch": 0.22, "learning_rate": 2.3615211999527575e-07, "logits/chosen": -2.228405475616455, "logits/rejected": -2.389601707458496, "logps/chosen": -728.1940307617188, "logps/rejected": -276.8704833984375, "loss": 0.4111, "rewards/accuracies": 0.875, "rewards/chosen": -1.1772117614746094, "rewards/margins": 1.1355983018875122, "rewards/rejected": -2.312810182571411, "step": 1931 }, { "epoch": 0.22, "learning_rate": 2.3611668831935747e-07, "logits/chosen": -2.2375380992889404, "logits/rejected": -2.3524980545043945, "logps/chosen": -263.7438659667969, "logps/rejected": -262.020751953125, "loss": 0.2629, "rewards/accuracies": 0.875, "rewards/chosen": -1.324380874633789, "rewards/margins": 2.5025112628936768, "rewards/rejected": -3.8268918991088867, "step": 1932 }, { "epoch": 0.22, "learning_rate": 2.3608125664343922e-07, "logits/chosen": -2.625051259994507, "logits/rejected": -2.6038501262664795, "logps/chosen": -293.1787109375, "logps/rejected": -164.8754119873047, "loss": 0.207, "rewards/accuracies": 1.0, "rewards/chosen": 0.1285419464111328, "rewards/margins": 2.102248430252075, "rewards/rejected": -1.9737063646316528, "step": 1933 }, { "epoch": 0.22, "learning_rate": 2.3604582496752097e-07, "logits/chosen": -2.1377415657043457, "logits/rejected": -2.231637716293335, "logps/chosen": -173.719970703125, "logps/rejected": -148.34371948242188, "loss": 0.9323, "rewards/accuracies": 0.625, "rewards/chosen": -1.1020605564117432, "rewards/margins": 0.2662084996700287, "rewards/rejected": -1.3682689666748047, "step": 1934 }, { "epoch": 0.23, "learning_rate": 2.360103932916027e-07, "logits/chosen": -2.0937788486480713, "logits/rejected": -1.9793736934661865, "logps/chosen": -156.51296997070312, "logps/rejected": -262.4140625, "loss": 1.0533, "rewards/accuracies": 0.75, "rewards/chosen": -2.027639865875244, "rewards/margins": 1.5333921909332275, "rewards/rejected": -3.5610318183898926, "step": 1935 }, { "epoch": 0.23, "learning_rate": 2.359749616156844e-07, "logits/chosen": -1.7152124643325806, "logits/rejected": -1.708038091659546, "logps/chosen": -463.2947082519531, "logps/rejected": -535.9517211914062, "loss": 0.7962, "rewards/accuracies": 0.625, "rewards/chosen": -0.4877917766571045, "rewards/margins": 1.05384361743927, "rewards/rejected": -1.541635274887085, "step": 1936 }, { "epoch": 0.23, "learning_rate": 2.3593952993976613e-07, "logits/chosen": -1.968490719795227, "logits/rejected": -2.457221746444702, "logps/chosen": -319.496826171875, "logps/rejected": -254.2911834716797, "loss": 0.2835, "rewards/accuracies": 0.875, "rewards/chosen": -0.14511051774024963, "rewards/margins": 2.079331874847412, "rewards/rejected": -2.224442481994629, "step": 1937 }, { "epoch": 0.23, "learning_rate": 2.3590409826384788e-07, "logits/chosen": -1.9732520580291748, "logits/rejected": -2.133970260620117, "logps/chosen": -355.1065979003906, "logps/rejected": -320.26568603515625, "loss": 0.5496, "rewards/accuracies": 0.625, "rewards/chosen": -1.7588386535644531, "rewards/margins": 1.116028070449829, "rewards/rejected": -2.8748669624328613, "step": 1938 }, { "epoch": 0.23, "learning_rate": 2.358686665879296e-07, "logits/chosen": -2.4308066368103027, "logits/rejected": -2.509688138961792, "logps/chosen": -265.9458312988281, "logps/rejected": -215.50738525390625, "loss": 0.5021, "rewards/accuracies": 0.75, "rewards/chosen": -0.5390626192092896, "rewards/margins": 1.0280547142028809, "rewards/rejected": -1.56711745262146, "step": 1939 }, { "epoch": 0.23, "learning_rate": 2.3583323491201133e-07, "logits/chosen": -2.268925905227661, "logits/rejected": -2.3927552700042725, "logps/chosen": -147.5147705078125, "logps/rejected": -249.91552734375, "loss": 0.4324, "rewards/accuracies": 0.75, "rewards/chosen": -0.537254810333252, "rewards/margins": 1.823635458946228, "rewards/rejected": -2.3608901500701904, "step": 1940 }, { "epoch": 0.23, "learning_rate": 2.3579780323609305e-07, "logits/chosen": -2.2943806648254395, "logits/rejected": -2.589171886444092, "logps/chosen": -316.0751037597656, "logps/rejected": -225.43399047851562, "loss": 0.251, "rewards/accuracies": 1.0, "rewards/chosen": -0.49682310223579407, "rewards/margins": 1.7436914443969727, "rewards/rejected": -2.2405142784118652, "step": 1941 }, { "epoch": 0.23, "learning_rate": 2.3576237156017477e-07, "logits/chosen": -1.8934153318405151, "logits/rejected": -2.3351941108703613, "logps/chosen": -364.35992431640625, "logps/rejected": -278.6876220703125, "loss": 0.5707, "rewards/accuracies": 0.625, "rewards/chosen": -0.5959425568580627, "rewards/margins": 1.0454621315002441, "rewards/rejected": -1.641404628753662, "step": 1942 }, { "epoch": 0.23, "learning_rate": 2.357269398842565e-07, "logits/chosen": -2.1956911087036133, "logits/rejected": -2.0930843353271484, "logps/chosen": -310.4448547363281, "logps/rejected": -368.12750244140625, "loss": 0.936, "rewards/accuracies": 0.625, "rewards/chosen": -1.2604751586914062, "rewards/margins": -0.1302127093076706, "rewards/rejected": -1.1302623748779297, "step": 1943 }, { "epoch": 0.23, "learning_rate": 2.3569150820833824e-07, "logits/chosen": -2.2690470218658447, "logits/rejected": -2.1416778564453125, "logps/chosen": -278.9833679199219, "logps/rejected": -265.6446533203125, "loss": 0.4253, "rewards/accuracies": 0.75, "rewards/chosen": -0.32209083437919617, "rewards/margins": 1.1877819299697876, "rewards/rejected": -1.5098727941513062, "step": 1944 }, { "epoch": 0.23, "learning_rate": 2.3565607653241996e-07, "logits/chosen": -2.8202309608459473, "logits/rejected": -2.90403151512146, "logps/chosen": -115.6203842163086, "logps/rejected": -155.35916137695312, "loss": 0.4547, "rewards/accuracies": 0.75, "rewards/chosen": -0.6795730590820312, "rewards/margins": 1.7632499933242798, "rewards/rejected": -2.4428229331970215, "step": 1945 }, { "epoch": 0.23, "learning_rate": 2.356206448565017e-07, "logits/chosen": -3.018399953842163, "logits/rejected": -2.945033550262451, "logps/chosen": -233.0406951904297, "logps/rejected": -225.58056640625, "loss": 0.2849, "rewards/accuracies": 0.875, "rewards/chosen": -0.11904823780059814, "rewards/margins": 2.310420513153076, "rewards/rejected": -2.4294686317443848, "step": 1946 }, { "epoch": 0.23, "learning_rate": 2.3558521318058343e-07, "logits/chosen": -2.167405128479004, "logits/rejected": -2.1109790802001953, "logps/chosen": -190.81240844726562, "logps/rejected": -250.09608459472656, "loss": 0.4934, "rewards/accuracies": 0.875, "rewards/chosen": -1.6616719961166382, "rewards/margins": 0.6092509031295776, "rewards/rejected": -2.270923137664795, "step": 1947 }, { "epoch": 0.23, "learning_rate": 2.3554978150466516e-07, "logits/chosen": -1.8608980178833008, "logits/rejected": -1.9582914113998413, "logps/chosen": -297.0230712890625, "logps/rejected": -319.9097900390625, "loss": 0.4985, "rewards/accuracies": 0.75, "rewards/chosen": -1.5452778339385986, "rewards/margins": 1.4580882787704468, "rewards/rejected": -3.003365993499756, "step": 1948 }, { "epoch": 0.23, "learning_rate": 2.355143498287469e-07, "logits/chosen": -2.5621790885925293, "logits/rejected": -2.434837818145752, "logps/chosen": -118.51498413085938, "logps/rejected": -211.962158203125, "loss": 0.408, "rewards/accuracies": 0.75, "rewards/chosen": -0.26106390357017517, "rewards/margins": 2.9498674869537354, "rewards/rejected": -3.2109313011169434, "step": 1949 }, { "epoch": 0.23, "learning_rate": 2.3547891815282863e-07, "logits/chosen": -2.096292018890381, "logits/rejected": -2.5016775131225586, "logps/chosen": -253.35401916503906, "logps/rejected": -220.43362426757812, "loss": 0.3765, "rewards/accuracies": 1.0, "rewards/chosen": -1.3263643980026245, "rewards/margins": 0.9609323740005493, "rewards/rejected": -2.287296772003174, "step": 1950 }, { "epoch": 0.23, "learning_rate": 2.3544348647691035e-07, "logits/chosen": -1.7153205871582031, "logits/rejected": -2.0528337955474854, "logps/chosen": -437.9040222167969, "logps/rejected": -416.1257019042969, "loss": 0.1373, "rewards/accuracies": 1.0, "rewards/chosen": -0.504633367061615, "rewards/margins": 3.1203224658966064, "rewards/rejected": -3.624955892562866, "step": 1951 }, { "epoch": 0.23, "learning_rate": 2.3540805480099207e-07, "logits/chosen": -1.8872873783111572, "logits/rejected": -1.938816785812378, "logps/chosen": -427.9764709472656, "logps/rejected": -277.7381591796875, "loss": 0.7244, "rewards/accuracies": 0.5, "rewards/chosen": -1.0255331993103027, "rewards/margins": 0.7432510256767273, "rewards/rejected": -1.7687842845916748, "step": 1952 }, { "epoch": 0.23, "learning_rate": 2.353726231250738e-07, "logits/chosen": -2.3966822624206543, "logits/rejected": -2.498239278793335, "logps/chosen": -234.72531127929688, "logps/rejected": -315.43548583984375, "loss": 0.3135, "rewards/accuracies": 0.875, "rewards/chosen": -0.548599362373352, "rewards/margins": 1.6499407291412354, "rewards/rejected": -2.198539972305298, "step": 1953 }, { "epoch": 0.23, "learning_rate": 2.3533719144915552e-07, "logits/chosen": -2.9425292015075684, "logits/rejected": -2.8486223220825195, "logps/chosen": -297.0843505859375, "logps/rejected": -363.0697021484375, "loss": 0.2319, "rewards/accuracies": 1.0, "rewards/chosen": -0.09073683619499207, "rewards/margins": 2.4803364276885986, "rewards/rejected": -2.571073293685913, "step": 1954 }, { "epoch": 0.23, "learning_rate": 2.3530175977323726e-07, "logits/chosen": -2.3794262409210205, "logits/rejected": -2.4866795539855957, "logps/chosen": -289.1757507324219, "logps/rejected": -340.517333984375, "loss": 1.2129, "rewards/accuracies": 0.625, "rewards/chosen": -1.3949239253997803, "rewards/margins": 1.0033807754516602, "rewards/rejected": -2.3983047008514404, "step": 1955 }, { "epoch": 0.23, "learning_rate": 2.3526632809731899e-07, "logits/chosen": -2.2765073776245117, "logits/rejected": -2.6103739738464355, "logps/chosen": -437.8962097167969, "logps/rejected": -234.457275390625, "loss": 0.4078, "rewards/accuracies": 0.625, "rewards/chosen": -0.5877244472503662, "rewards/margins": 1.8323813676834106, "rewards/rejected": -2.4201059341430664, "step": 1956 }, { "epoch": 0.23, "learning_rate": 2.3523089642140073e-07, "logits/chosen": -1.8146144151687622, "logits/rejected": -1.6830052137374878, "logps/chosen": -165.2733154296875, "logps/rejected": -289.4575500488281, "loss": 0.3389, "rewards/accuracies": 0.75, "rewards/chosen": -1.2034711837768555, "rewards/margins": 3.3609015941619873, "rewards/rejected": -4.564373016357422, "step": 1957 }, { "epoch": 0.23, "learning_rate": 2.3519546474548246e-07, "logits/chosen": -2.467728614807129, "logits/rejected": -2.5623514652252197, "logps/chosen": -155.88412475585938, "logps/rejected": -266.7792053222656, "loss": 0.1203, "rewards/accuracies": 1.0, "rewards/chosen": 0.181828111410141, "rewards/margins": 3.773216485977173, "rewards/rejected": -3.59138822555542, "step": 1958 }, { "epoch": 0.23, "learning_rate": 2.3516003306956418e-07, "logits/chosen": -2.1743507385253906, "logits/rejected": -2.415201187133789, "logps/chosen": -434.0635986328125, "logps/rejected": -273.3455810546875, "loss": 0.8771, "rewards/accuracies": 0.625, "rewards/chosen": -1.6704363822937012, "rewards/margins": 0.5951224565505981, "rewards/rejected": -2.265558958053589, "step": 1959 }, { "epoch": 0.23, "learning_rate": 2.3512460139364593e-07, "logits/chosen": -2.2718307971954346, "logits/rejected": -2.4912564754486084, "logps/chosen": -449.3606262207031, "logps/rejected": -346.3267822265625, "loss": 0.2178, "rewards/accuracies": 0.875, "rewards/chosen": -0.021274691447615623, "rewards/margins": 2.50516939163208, "rewards/rejected": -2.5264439582824707, "step": 1960 }, { "epoch": 0.23, "learning_rate": 2.3508916971772765e-07, "logits/chosen": -2.204979419708252, "logits/rejected": -2.037120819091797, "logps/chosen": -216.04345703125, "logps/rejected": -308.4803466796875, "loss": 0.1049, "rewards/accuracies": 1.0, "rewards/chosen": -0.8284422159194946, "rewards/margins": 3.37876558303833, "rewards/rejected": -4.207207679748535, "step": 1961 }, { "epoch": 0.23, "learning_rate": 2.3505373804180937e-07, "logits/chosen": -2.1463544368743896, "logits/rejected": -2.1713199615478516, "logps/chosen": -254.8804931640625, "logps/rejected": -267.84039306640625, "loss": 0.5483, "rewards/accuracies": 0.75, "rewards/chosen": -0.5927608013153076, "rewards/margins": 1.1968047618865967, "rewards/rejected": -1.7895654439926147, "step": 1962 }, { "epoch": 0.23, "learning_rate": 2.350183063658911e-07, "logits/chosen": -2.13004732131958, "logits/rejected": -2.4813008308410645, "logps/chosen": -232.4341583251953, "logps/rejected": -179.20472717285156, "loss": 0.6793, "rewards/accuracies": 0.625, "rewards/chosen": -1.2120013236999512, "rewards/margins": 0.7421838045120239, "rewards/rejected": -1.954185128211975, "step": 1963 }, { "epoch": 0.23, "learning_rate": 2.3498287468997282e-07, "logits/chosen": -2.0552053451538086, "logits/rejected": -2.465153217315674, "logps/chosen": -370.2318115234375, "logps/rejected": -184.74234008789062, "loss": 0.2956, "rewards/accuracies": 0.875, "rewards/chosen": -0.6746222972869873, "rewards/margins": 1.4199590682983398, "rewards/rejected": -2.094581365585327, "step": 1964 }, { "epoch": 0.23, "learning_rate": 2.3494744301405454e-07, "logits/chosen": -2.3425588607788086, "logits/rejected": -2.1580755710601807, "logps/chosen": -233.93360900878906, "logps/rejected": -224.84085083007812, "loss": 0.3867, "rewards/accuracies": 0.875, "rewards/chosen": -0.4941990077495575, "rewards/margins": 1.095848798751831, "rewards/rejected": -1.5900477170944214, "step": 1965 }, { "epoch": 0.23, "learning_rate": 2.3491201133813626e-07, "logits/chosen": -2.7572269439697266, "logits/rejected": -2.6859066486358643, "logps/chosen": -432.6898193359375, "logps/rejected": -297.7322998046875, "loss": 0.23, "rewards/accuracies": 1.0, "rewards/chosen": -0.562412679195404, "rewards/margins": 2.265326976776123, "rewards/rejected": -2.8277394771575928, "step": 1966 }, { "epoch": 0.23, "learning_rate": 2.34876579662218e-07, "logits/chosen": -2.519235849380493, "logits/rejected": -2.2266769409179688, "logps/chosen": -155.7998504638672, "logps/rejected": -275.9801330566406, "loss": 0.2644, "rewards/accuracies": 1.0, "rewards/chosen": -0.7779420018196106, "rewards/margins": 1.891699194908142, "rewards/rejected": -2.6696410179138184, "step": 1967 }, { "epoch": 0.23, "learning_rate": 2.3484114798629973e-07, "logits/chosen": -2.0956647396087646, "logits/rejected": -2.4342117309570312, "logps/chosen": -292.76593017578125, "logps/rejected": -222.981689453125, "loss": 0.4182, "rewards/accuracies": 0.75, "rewards/chosen": -0.7448635697364807, "rewards/margins": 1.3684239387512207, "rewards/rejected": -2.1132874488830566, "step": 1968 }, { "epoch": 0.23, "learning_rate": 2.3480571631038148e-07, "logits/chosen": -1.8876087665557861, "logits/rejected": -1.982627034187317, "logps/chosen": -394.96636962890625, "logps/rejected": -292.7491149902344, "loss": 0.5063, "rewards/accuracies": 0.875, "rewards/chosen": -1.5040005445480347, "rewards/margins": 1.3237920999526978, "rewards/rejected": -2.8277926445007324, "step": 1969 }, { "epoch": 0.23, "learning_rate": 2.347702846344632e-07, "logits/chosen": -2.6826772689819336, "logits/rejected": -2.7892727851867676, "logps/chosen": -373.95941162109375, "logps/rejected": -299.981201171875, "loss": 0.1881, "rewards/accuracies": 1.0, "rewards/chosen": -0.6756617426872253, "rewards/margins": 2.263915777206421, "rewards/rejected": -2.939577579498291, "step": 1970 }, { "epoch": 0.23, "learning_rate": 2.3473485295854495e-07, "logits/chosen": -2.7992630004882812, "logits/rejected": -2.9575328826904297, "logps/chosen": -230.15194702148438, "logps/rejected": -293.1745300292969, "loss": 0.3331, "rewards/accuracies": 0.875, "rewards/chosen": -0.303811252117157, "rewards/margins": 2.593111515045166, "rewards/rejected": -2.8969228267669678, "step": 1971 }, { "epoch": 0.23, "learning_rate": 2.3469942128262667e-07, "logits/chosen": -2.6200759410858154, "logits/rejected": -2.640418767929077, "logps/chosen": -342.5304870605469, "logps/rejected": -345.995361328125, "loss": 0.3214, "rewards/accuracies": 0.875, "rewards/chosen": -0.2785898745059967, "rewards/margins": 1.940861463546753, "rewards/rejected": -2.219451427459717, "step": 1972 }, { "epoch": 0.23, "learning_rate": 2.346639896067084e-07, "logits/chosen": -2.1326887607574463, "logits/rejected": -2.063842296600342, "logps/chosen": -210.5382080078125, "logps/rejected": -274.0124816894531, "loss": 0.5267, "rewards/accuracies": 0.75, "rewards/chosen": -0.6867938041687012, "rewards/margins": 2.4064230918884277, "rewards/rejected": -3.0932164192199707, "step": 1973 }, { "epoch": 0.23, "learning_rate": 2.3462855793079012e-07, "logits/chosen": -2.5352542400360107, "logits/rejected": -2.451993227005005, "logps/chosen": -327.9621276855469, "logps/rejected": -298.9266357421875, "loss": 0.4011, "rewards/accuracies": 0.625, "rewards/chosen": -0.16905011236667633, "rewards/margins": 1.6964938640594482, "rewards/rejected": -1.8655438423156738, "step": 1974 }, { "epoch": 0.23, "learning_rate": 2.3459312625487184e-07, "logits/chosen": -2.5143871307373047, "logits/rejected": -2.6486411094665527, "logps/chosen": -317.9917907714844, "logps/rejected": -165.66217041015625, "loss": 0.7759, "rewards/accuracies": 0.625, "rewards/chosen": -0.9655543565750122, "rewards/margins": 0.6170029640197754, "rewards/rejected": -1.582557201385498, "step": 1975 }, { "epoch": 0.23, "learning_rate": 2.3455769457895356e-07, "logits/chosen": -2.1544029712677, "logits/rejected": -2.1629600524902344, "logps/chosen": -313.361328125, "logps/rejected": -323.8248291015625, "loss": 0.1721, "rewards/accuracies": 1.0, "rewards/chosen": -0.6415922045707703, "rewards/margins": 2.151778221130371, "rewards/rejected": -2.793370246887207, "step": 1976 }, { "epoch": 0.23, "learning_rate": 2.3452226290303528e-07, "logits/chosen": -2.2814230918884277, "logits/rejected": -2.280259132385254, "logps/chosen": -310.88861083984375, "logps/rejected": -292.0172119140625, "loss": 1.1235, "rewards/accuracies": 0.875, "rewards/chosen": -2.7768259048461914, "rewards/margins": 0.9707800149917603, "rewards/rejected": -3.7476062774658203, "step": 1977 }, { "epoch": 0.23, "learning_rate": 2.3448683122711703e-07, "logits/chosen": -2.8088107109069824, "logits/rejected": -2.6930596828460693, "logps/chosen": -437.1911926269531, "logps/rejected": -254.23907470703125, "loss": 0.1625, "rewards/accuracies": 1.0, "rewards/chosen": -0.6103285551071167, "rewards/margins": 2.8429336547851562, "rewards/rejected": -3.4532623291015625, "step": 1978 }, { "epoch": 0.23, "learning_rate": 2.3445139955119875e-07, "logits/chosen": -2.281198263168335, "logits/rejected": -2.3766019344329834, "logps/chosen": -300.1064147949219, "logps/rejected": -228.39898681640625, "loss": 0.7406, "rewards/accuracies": 0.625, "rewards/chosen": -1.0748196840286255, "rewards/margins": 0.38440003991127014, "rewards/rejected": -1.4592196941375732, "step": 1979 }, { "epoch": 0.23, "learning_rate": 2.3441596787528048e-07, "logits/chosen": -2.1105713844299316, "logits/rejected": -2.339521884918213, "logps/chosen": -453.9054260253906, "logps/rejected": -246.70291137695312, "loss": 0.2837, "rewards/accuracies": 0.875, "rewards/chosen": -0.15994839370250702, "rewards/margins": 1.74416184425354, "rewards/rejected": -1.9041101932525635, "step": 1980 }, { "epoch": 0.23, "learning_rate": 2.3438053619936222e-07, "logits/chosen": -2.2441442012786865, "logits/rejected": -2.398287534713745, "logps/chosen": -306.375244140625, "logps/rejected": -322.2066345214844, "loss": 0.2648, "rewards/accuracies": 0.875, "rewards/chosen": -0.007738925516605377, "rewards/margins": 2.2194318771362305, "rewards/rejected": -2.227170705795288, "step": 1981 }, { "epoch": 0.23, "learning_rate": 2.3434510452344395e-07, "logits/chosen": -2.2389562129974365, "logits/rejected": -2.1819119453430176, "logps/chosen": -282.5087890625, "logps/rejected": -339.15948486328125, "loss": 0.8624, "rewards/accuracies": 0.625, "rewards/chosen": -1.5152976512908936, "rewards/margins": 2.8148577213287354, "rewards/rejected": -4.330155372619629, "step": 1982 }, { "epoch": 0.23, "learning_rate": 2.343096728475257e-07, "logits/chosen": -1.6608740091323853, "logits/rejected": -2.1132619380950928, "logps/chosen": -344.1090087890625, "logps/rejected": -279.04180908203125, "loss": 0.366, "rewards/accuracies": 0.875, "rewards/chosen": -0.6082732081413269, "rewards/margins": 2.6341352462768555, "rewards/rejected": -3.242408275604248, "step": 1983 }, { "epoch": 0.23, "learning_rate": 2.3427424117160742e-07, "logits/chosen": -1.8645031452178955, "logits/rejected": -2.065497398376465, "logps/chosen": -180.1361846923828, "logps/rejected": -198.21923828125, "loss": 0.382, "rewards/accuracies": 0.75, "rewards/chosen": -0.49544787406921387, "rewards/margins": 2.2168498039245605, "rewards/rejected": -2.7122976779937744, "step": 1984 }, { "epoch": 0.23, "learning_rate": 2.3423880949568914e-07, "logits/chosen": -2.149522304534912, "logits/rejected": -2.6492624282836914, "logps/chosen": -512.5096435546875, "logps/rejected": -283.8351135253906, "loss": 0.9904, "rewards/accuracies": 0.625, "rewards/chosen": -1.106372356414795, "rewards/margins": 0.4951530694961548, "rewards/rejected": -1.6015253067016602, "step": 1985 }, { "epoch": 0.23, "learning_rate": 2.3420337781977086e-07, "logits/chosen": -2.1917884349823, "logits/rejected": -2.109611988067627, "logps/chosen": -282.18646240234375, "logps/rejected": -329.8034973144531, "loss": 0.4203, "rewards/accuracies": 0.75, "rewards/chosen": -0.655138373374939, "rewards/margins": 1.4564317464828491, "rewards/rejected": -2.111570119857788, "step": 1986 }, { "epoch": 0.23, "learning_rate": 2.3416794614385258e-07, "logits/chosen": -2.290922164916992, "logits/rejected": -2.383075475692749, "logps/chosen": -311.380615234375, "logps/rejected": -415.647216796875, "loss": 0.833, "rewards/accuracies": 0.625, "rewards/chosen": -1.3233685493469238, "rewards/margins": 1.2943097352981567, "rewards/rejected": -2.617678165435791, "step": 1987 }, { "epoch": 0.23, "learning_rate": 2.341325144679343e-07, "logits/chosen": -2.421081066131592, "logits/rejected": -2.6070947647094727, "logps/chosen": -318.7978515625, "logps/rejected": -294.3724670410156, "loss": 0.553, "rewards/accuracies": 0.875, "rewards/chosen": -0.4900839924812317, "rewards/margins": 1.3121429681777954, "rewards/rejected": -1.8022270202636719, "step": 1988 }, { "epoch": 0.23, "learning_rate": 2.3409708279201605e-07, "logits/chosen": -1.7563356161117554, "logits/rejected": -1.8937163352966309, "logps/chosen": -418.55633544921875, "logps/rejected": -405.0843811035156, "loss": 0.7327, "rewards/accuracies": 0.625, "rewards/chosen": -0.8827035427093506, "rewards/margins": 0.8655978441238403, "rewards/rejected": -1.748301386833191, "step": 1989 }, { "epoch": 0.23, "learning_rate": 2.3406165111609778e-07, "logits/chosen": -2.4359049797058105, "logits/rejected": -2.4992260932922363, "logps/chosen": -263.842529296875, "logps/rejected": -240.83753967285156, "loss": 0.3228, "rewards/accuracies": 1.0, "rewards/chosen": -0.5593858957290649, "rewards/margins": 1.442859411239624, "rewards/rejected": -2.0022454261779785, "step": 1990 }, { "epoch": 0.23, "learning_rate": 2.340262194401795e-07, "logits/chosen": -1.9965806007385254, "logits/rejected": -1.7110234498977661, "logps/chosen": -283.6479797363281, "logps/rejected": -451.0930480957031, "loss": 0.6586, "rewards/accuracies": 0.75, "rewards/chosen": -1.4226959943771362, "rewards/margins": 0.2742460370063782, "rewards/rejected": -1.6969420909881592, "step": 1991 }, { "epoch": 0.23, "learning_rate": 2.3399078776426125e-07, "logits/chosen": -2.2080020904541016, "logits/rejected": -2.383505344390869, "logps/chosen": -280.75390625, "logps/rejected": -316.1920471191406, "loss": 0.4667, "rewards/accuracies": 0.75, "rewards/chosen": -1.0683393478393555, "rewards/margins": 2.130797863006592, "rewards/rejected": -3.199136972427368, "step": 1992 }, { "epoch": 0.23, "learning_rate": 2.3395535608834297e-07, "logits/chosen": -2.60107421875, "logits/rejected": -2.6125307083129883, "logps/chosen": -300.8184509277344, "logps/rejected": -224.2725830078125, "loss": 0.2961, "rewards/accuracies": 0.875, "rewards/chosen": -0.8431633710861206, "rewards/margins": 2.1309609413146973, "rewards/rejected": -2.9741241931915283, "step": 1993 }, { "epoch": 0.23, "learning_rate": 2.3391992441242472e-07, "logits/chosen": -2.338379383087158, "logits/rejected": -2.4408397674560547, "logps/chosen": -232.31024169921875, "logps/rejected": -260.48931884765625, "loss": 0.3086, "rewards/accuracies": 0.875, "rewards/chosen": -0.34689652919769287, "rewards/margins": 2.1600465774536133, "rewards/rejected": -2.5069432258605957, "step": 1994 }, { "epoch": 0.23, "learning_rate": 2.3388449273650644e-07, "logits/chosen": -2.1882948875427246, "logits/rejected": -2.266958475112915, "logps/chosen": -202.89471435546875, "logps/rejected": -314.1727600097656, "loss": 0.2779, "rewards/accuracies": 0.875, "rewards/chosen": -0.5279630422592163, "rewards/margins": 2.4742531776428223, "rewards/rejected": -3.002216339111328, "step": 1995 }, { "epoch": 0.23, "learning_rate": 2.3384906106058816e-07, "logits/chosen": -1.9148709774017334, "logits/rejected": -1.682118535041809, "logps/chosen": -323.4260559082031, "logps/rejected": -398.777587890625, "loss": 0.3918, "rewards/accuracies": 0.75, "rewards/chosen": -0.8555980920791626, "rewards/margins": 2.0248217582702637, "rewards/rejected": -2.880419969558716, "step": 1996 }, { "epoch": 0.23, "learning_rate": 2.3381362938466988e-07, "logits/chosen": -2.2660160064697266, "logits/rejected": -2.798758029937744, "logps/chosen": -439.35076904296875, "logps/rejected": -192.93997192382812, "loss": 0.3105, "rewards/accuracies": 0.875, "rewards/chosen": -0.8983439207077026, "rewards/margins": 1.3224809169769287, "rewards/rejected": -2.220824956893921, "step": 1997 }, { "epoch": 0.23, "learning_rate": 2.337781977087516e-07, "logits/chosen": -2.2869553565979004, "logits/rejected": -2.3743093013763428, "logps/chosen": -316.4087219238281, "logps/rejected": -177.6245574951172, "loss": 0.5853, "rewards/accuracies": 0.75, "rewards/chosen": -0.47578078508377075, "rewards/margins": 0.6859957575798035, "rewards/rejected": -1.1617765426635742, "step": 1998 }, { "epoch": 0.23, "learning_rate": 2.3374276603283333e-07, "logits/chosen": -2.9616076946258545, "logits/rejected": -2.9450843334198, "logps/chosen": -206.57513427734375, "logps/rejected": -214.69483947753906, "loss": 0.5622, "rewards/accuracies": 0.875, "rewards/chosen": -0.8950778245925903, "rewards/margins": 1.3593403100967407, "rewards/rejected": -2.254418134689331, "step": 1999 }, { "epoch": 0.23, "learning_rate": 2.3370733435691508e-07, "logits/chosen": -1.951313853263855, "logits/rejected": -1.8784806728363037, "logps/chosen": -299.53179931640625, "logps/rejected": -327.47607421875, "loss": 0.5171, "rewards/accuracies": 0.875, "rewards/chosen": -0.5280905961990356, "rewards/margins": 1.1732994318008423, "rewards/rejected": -1.701390027999878, "step": 2000 }, { "epoch": 0.23, "eval_logits/chosen": -1.735522747039795, "eval_logits/rejected": -1.734364628791809, "eval_logps/chosen": -276.79437255859375, "eval_logps/rejected": -272.12823486328125, "eval_loss": 0.39564093947410583, "eval_rewards/accuracies": 0.8275862336158752, "eval_rewards/chosen": -0.46289071440696716, "eval_rewards/margins": 1.6492795944213867, "eval_rewards/rejected": -2.112170457839966, "eval_runtime": 237.1853, "eval_samples_per_second": 2.93, "eval_steps_per_second": 1.467, "step": 2000 }, { "epoch": 0.23, "learning_rate": 2.336719026809968e-07, "logits/chosen": -2.3690707683563232, "logits/rejected": -2.2364253997802734, "logps/chosen": -323.9754943847656, "logps/rejected": -361.447509765625, "loss": 0.3575, "rewards/accuracies": 0.875, "rewards/chosen": -0.6039774417877197, "rewards/margins": 1.4217994213104248, "rewards/rejected": -2.0257768630981445, "step": 2001 }, { "epoch": 0.23, "learning_rate": 2.3363647100507852e-07, "logits/chosen": -2.474982500076294, "logits/rejected": -2.5558054447174072, "logps/chosen": -322.45361328125, "logps/rejected": -304.1692199707031, "loss": 0.3839, "rewards/accuracies": 0.875, "rewards/chosen": -0.8651862144470215, "rewards/margins": 2.4731554985046387, "rewards/rejected": -3.33834171295166, "step": 2002 }, { "epoch": 0.23, "learning_rate": 2.3360103932916024e-07, "logits/chosen": -1.9826539754867554, "logits/rejected": -1.961648941040039, "logps/chosen": -189.33905029296875, "logps/rejected": -184.55433654785156, "loss": 0.2725, "rewards/accuracies": 0.875, "rewards/chosen": -0.17802608013153076, "rewards/margins": 2.0691685676574707, "rewards/rejected": -2.247194766998291, "step": 2003 }, { "epoch": 0.23, "learning_rate": 2.33565607653242e-07, "logits/chosen": -2.3278913497924805, "logits/rejected": -2.4057881832122803, "logps/chosen": -411.4052429199219, "logps/rejected": -282.1052551269531, "loss": 0.256, "rewards/accuracies": 0.875, "rewards/chosen": -0.6539748907089233, "rewards/margins": 1.9738472700119019, "rewards/rejected": -2.6278223991394043, "step": 2004 }, { "epoch": 0.23, "learning_rate": 2.3353017597732374e-07, "logits/chosen": -2.5039446353912354, "logits/rejected": -2.599365472793579, "logps/chosen": -254.27488708496094, "logps/rejected": -146.72772216796875, "loss": 0.2708, "rewards/accuracies": 1.0, "rewards/chosen": -0.8487481474876404, "rewards/margins": 1.592360019683838, "rewards/rejected": -2.441108226776123, "step": 2005 }, { "epoch": 0.23, "learning_rate": 2.3349474430140546e-07, "logits/chosen": -2.6036622524261475, "logits/rejected": -2.564223289489746, "logps/chosen": -422.2083740234375, "logps/rejected": -424.9235534667969, "loss": 0.1798, "rewards/accuracies": 1.0, "rewards/chosen": -0.7821944355964661, "rewards/margins": 1.8510080575942993, "rewards/rejected": -2.63320255279541, "step": 2006 }, { "epoch": 0.23, "learning_rate": 2.3345931262548718e-07, "logits/chosen": -2.4397385120391846, "logits/rejected": -2.354215383529663, "logps/chosen": -186.90576171875, "logps/rejected": -318.61517333984375, "loss": 1.0345, "rewards/accuracies": 0.625, "rewards/chosen": -1.5632215738296509, "rewards/margins": 1.6802515983581543, "rewards/rejected": -3.2434730529785156, "step": 2007 }, { "epoch": 0.23, "learning_rate": 2.334238809495689e-07, "logits/chosen": -1.9265174865722656, "logits/rejected": -2.0805206298828125, "logps/chosen": -295.6748962402344, "logps/rejected": -193.3943328857422, "loss": 0.4987, "rewards/accuracies": 0.75, "rewards/chosen": -0.7567787766456604, "rewards/margins": 1.4953749179840088, "rewards/rejected": -2.2521536350250244, "step": 2008 }, { "epoch": 0.23, "learning_rate": 2.3338844927365063e-07, "logits/chosen": -2.1402738094329834, "logits/rejected": -2.12782883644104, "logps/chosen": -223.82821655273438, "logps/rejected": -219.71551513671875, "loss": 0.3436, "rewards/accuracies": 0.875, "rewards/chosen": -0.41703885793685913, "rewards/margins": 2.0865871906280518, "rewards/rejected": -2.5036258697509766, "step": 2009 }, { "epoch": 0.23, "learning_rate": 2.3335301759773235e-07, "logits/chosen": -2.6035897731781006, "logits/rejected": -2.533144474029541, "logps/chosen": -185.51358032226562, "logps/rejected": -318.8678283691406, "loss": 0.1707, "rewards/accuracies": 0.875, "rewards/chosen": -0.03219519555568695, "rewards/margins": 3.306462049484253, "rewards/rejected": -3.3386573791503906, "step": 2010 }, { "epoch": 0.23, "learning_rate": 2.3331758592181407e-07, "logits/chosen": -1.7301537990570068, "logits/rejected": -1.8537843227386475, "logps/chosen": -440.43804931640625, "logps/rejected": -424.9377746582031, "loss": 0.4584, "rewards/accuracies": 0.75, "rewards/chosen": -0.3460497260093689, "rewards/margins": 0.6747007369995117, "rewards/rejected": -1.0207504034042358, "step": 2011 }, { "epoch": 0.23, "learning_rate": 2.3328215424589582e-07, "logits/chosen": -2.189547061920166, "logits/rejected": -2.338770627975464, "logps/chosen": -367.4891357421875, "logps/rejected": -217.9007110595703, "loss": 0.4429, "rewards/accuracies": 0.75, "rewards/chosen": -0.8467769026756287, "rewards/margins": 1.5399144887924194, "rewards/rejected": -2.3866913318634033, "step": 2012 }, { "epoch": 0.23, "learning_rate": 2.3324672256997754e-07, "logits/chosen": -1.6091705560684204, "logits/rejected": -2.2894623279571533, "logps/chosen": -495.65777587890625, "logps/rejected": -186.2374267578125, "loss": 1.7759, "rewards/accuracies": 0.75, "rewards/chosen": -3.099937677383423, "rewards/margins": 0.2060619592666626, "rewards/rejected": -3.305999755859375, "step": 2013 }, { "epoch": 0.23, "learning_rate": 2.3321129089405927e-07, "logits/chosen": -2.8091211318969727, "logits/rejected": -2.8256993293762207, "logps/chosen": -255.82662963867188, "logps/rejected": -230.39810180664062, "loss": 0.4307, "rewards/accuracies": 0.75, "rewards/chosen": -1.0929681062698364, "rewards/margins": 1.9111194610595703, "rewards/rejected": -3.004087448120117, "step": 2014 }, { "epoch": 0.23, "learning_rate": 2.33175859218141e-07, "logits/chosen": -2.4361212253570557, "logits/rejected": -2.5535519123077393, "logps/chosen": -201.01461791992188, "logps/rejected": -214.1597442626953, "loss": 0.8398, "rewards/accuracies": 0.75, "rewards/chosen": -1.7802462577819824, "rewards/margins": 1.396444320678711, "rewards/rejected": -3.1766905784606934, "step": 2015 }, { "epoch": 0.23, "learning_rate": 2.3314042754222276e-07, "logits/chosen": -2.5939416885375977, "logits/rejected": -2.6764373779296875, "logps/chosen": -184.04202270507812, "logps/rejected": -255.7120361328125, "loss": 0.205, "rewards/accuracies": 0.875, "rewards/chosen": -0.08808886259794235, "rewards/margins": 2.2749335765838623, "rewards/rejected": -2.363022565841675, "step": 2016 }, { "epoch": 0.23, "learning_rate": 2.3310499586630449e-07, "logits/chosen": -2.580514907836914, "logits/rejected": -2.515012502670288, "logps/chosen": -160.08331298828125, "logps/rejected": -161.6158447265625, "loss": 0.3607, "rewards/accuracies": 0.875, "rewards/chosen": -0.9722243547439575, "rewards/margins": 1.1654281616210938, "rewards/rejected": -2.137652635574341, "step": 2017 }, { "epoch": 0.23, "learning_rate": 2.330695641903862e-07, "logits/chosen": -2.553687572479248, "logits/rejected": -2.749514579772949, "logps/chosen": -215.03172302246094, "logps/rejected": -147.36903381347656, "loss": 0.3162, "rewards/accuracies": 1.0, "rewards/chosen": -0.38330480456352234, "rewards/margins": 1.4313088655471802, "rewards/rejected": -1.8146135807037354, "step": 2018 }, { "epoch": 0.23, "learning_rate": 2.3303413251446793e-07, "logits/chosen": -2.0398356914520264, "logits/rejected": -2.089282512664795, "logps/chosen": -247.7024383544922, "logps/rejected": -266.53082275390625, "loss": 0.4535, "rewards/accuracies": 0.75, "rewards/chosen": -0.3406440019607544, "rewards/margins": 1.3034788370132446, "rewards/rejected": -1.6441229581832886, "step": 2019 }, { "epoch": 0.23, "learning_rate": 2.3299870083854965e-07, "logits/chosen": -2.074162721633911, "logits/rejected": -2.5535104274749756, "logps/chosen": -314.71136474609375, "logps/rejected": -236.4280548095703, "loss": 0.9495, "rewards/accuracies": 0.875, "rewards/chosen": -0.9658845663070679, "rewards/margins": 0.9747937917709351, "rewards/rejected": -1.940678358078003, "step": 2020 }, { "epoch": 0.24, "learning_rate": 2.3296326916263137e-07, "logits/chosen": -2.3555169105529785, "logits/rejected": -2.1334614753723145, "logps/chosen": -189.9724578857422, "logps/rejected": -262.4036865234375, "loss": 0.3364, "rewards/accuracies": 0.75, "rewards/chosen": -0.4933680295944214, "rewards/margins": 1.6365783214569092, "rewards/rejected": -2.129946231842041, "step": 2021 }, { "epoch": 0.24, "learning_rate": 2.329278374867131e-07, "logits/chosen": -2.0900139808654785, "logits/rejected": -2.155226469039917, "logps/chosen": -297.7610778808594, "logps/rejected": -200.39273071289062, "loss": 0.2205, "rewards/accuracies": 1.0, "rewards/chosen": -1.5367335081100464, "rewards/margins": 1.7805781364440918, "rewards/rejected": -3.3173115253448486, "step": 2022 }, { "epoch": 0.24, "learning_rate": 2.3289240581079484e-07, "logits/chosen": -2.1037867069244385, "logits/rejected": -2.1060948371887207, "logps/chosen": -402.76129150390625, "logps/rejected": -390.22613525390625, "loss": 0.7194, "rewards/accuracies": 0.75, "rewards/chosen": -0.8463070392608643, "rewards/margins": 1.1483607292175293, "rewards/rejected": -1.994667649269104, "step": 2023 }, { "epoch": 0.24, "learning_rate": 2.3285697413487657e-07, "logits/chosen": -2.7792553901672363, "logits/rejected": -2.5888612270355225, "logps/chosen": -426.4296875, "logps/rejected": -374.66754150390625, "loss": 0.4048, "rewards/accuracies": 0.75, "rewards/chosen": -0.8128154277801514, "rewards/margins": 1.6679027080535889, "rewards/rejected": -2.4807181358337402, "step": 2024 }, { "epoch": 0.24, "learning_rate": 2.328215424589583e-07, "logits/chosen": -2.588773250579834, "logits/rejected": -2.525178909301758, "logps/chosen": -281.7531433105469, "logps/rejected": -160.92002868652344, "loss": 0.4545, "rewards/accuracies": 0.875, "rewards/chosen": -0.8186485767364502, "rewards/margins": 1.5059287548065186, "rewards/rejected": -2.3245773315429688, "step": 2025 }, { "epoch": 0.24, "learning_rate": 2.3278611078304e-07, "logits/chosen": -2.3077287673950195, "logits/rejected": -2.3561182022094727, "logps/chosen": -160.13929748535156, "logps/rejected": -121.38096618652344, "loss": 0.8238, "rewards/accuracies": 0.625, "rewards/chosen": -0.7729524374008179, "rewards/margins": 0.37409037351608276, "rewards/rejected": -1.1470427513122559, "step": 2026 }, { "epoch": 0.24, "learning_rate": 2.3275067910712176e-07, "logits/chosen": -2.769462823867798, "logits/rejected": -2.6716341972351074, "logps/chosen": -318.64117431640625, "logps/rejected": -276.5351257324219, "loss": 0.7202, "rewards/accuracies": 0.75, "rewards/chosen": -1.2279462814331055, "rewards/margins": 0.9949378967285156, "rewards/rejected": -2.222884178161621, "step": 2027 }, { "epoch": 0.24, "learning_rate": 2.327152474312035e-07, "logits/chosen": -2.302131414413452, "logits/rejected": -2.2596590518951416, "logps/chosen": -175.794677734375, "logps/rejected": -301.22906494140625, "loss": 0.6166, "rewards/accuracies": 0.5, "rewards/chosen": -1.6479148864746094, "rewards/margins": 0.2617035508155823, "rewards/rejected": -1.9096184968948364, "step": 2028 }, { "epoch": 0.24, "learning_rate": 2.3267981575528523e-07, "logits/chosen": -1.9586384296417236, "logits/rejected": -2.2462337017059326, "logps/chosen": -299.5837097167969, "logps/rejected": -315.59246826171875, "loss": 0.3702, "rewards/accuracies": 0.75, "rewards/chosen": -0.28985777497291565, "rewards/margins": 2.7063097953796387, "rewards/rejected": -2.9961676597595215, "step": 2029 }, { "epoch": 0.24, "learning_rate": 2.3264438407936695e-07, "logits/chosen": -1.7195850610733032, "logits/rejected": -1.730041265487671, "logps/chosen": -349.7305603027344, "logps/rejected": -293.2488708496094, "loss": 0.7856, "rewards/accuracies": 0.875, "rewards/chosen": -1.515875220298767, "rewards/margins": 0.4168912172317505, "rewards/rejected": -1.932766318321228, "step": 2030 }, { "epoch": 0.24, "learning_rate": 2.3260895240344867e-07, "logits/chosen": -2.7275562286376953, "logits/rejected": -2.449794054031372, "logps/chosen": -149.19786071777344, "logps/rejected": -242.50421142578125, "loss": 0.3971, "rewards/accuracies": 0.75, "rewards/chosen": -0.4693740904331207, "rewards/margins": 1.6094392538070679, "rewards/rejected": -2.078813314437866, "step": 2031 }, { "epoch": 0.24, "learning_rate": 2.325735207275304e-07, "logits/chosen": -2.406891345977783, "logits/rejected": -2.341055154800415, "logps/chosen": -496.196533203125, "logps/rejected": -371.0252685546875, "loss": 0.1581, "rewards/accuracies": 1.0, "rewards/chosen": 0.5625265836715698, "rewards/margins": 3.1403613090515137, "rewards/rejected": -2.5778346061706543, "step": 2032 }, { "epoch": 0.24, "learning_rate": 2.3253808905161212e-07, "logits/chosen": -2.828106641769409, "logits/rejected": -2.9514827728271484, "logps/chosen": -256.3870544433594, "logps/rejected": -244.3540802001953, "loss": 0.5676, "rewards/accuracies": 0.75, "rewards/chosen": -1.174810528755188, "rewards/margins": 1.7223913669586182, "rewards/rejected": -2.8972015380859375, "step": 2033 }, { "epoch": 0.24, "learning_rate": 2.3250265737569387e-07, "logits/chosen": -2.842668056488037, "logits/rejected": -2.7221367359161377, "logps/chosen": -243.26437377929688, "logps/rejected": -200.65142822265625, "loss": 0.1688, "rewards/accuracies": 0.875, "rewards/chosen": -0.6657168865203857, "rewards/margins": 2.4617412090301514, "rewards/rejected": -3.127458095550537, "step": 2034 }, { "epoch": 0.24, "learning_rate": 2.324672256997756e-07, "logits/chosen": -2.2707204818725586, "logits/rejected": -2.093201160430908, "logps/chosen": -214.98446655273438, "logps/rejected": -341.29180908203125, "loss": 0.4165, "rewards/accuracies": 0.75, "rewards/chosen": -0.9320346117019653, "rewards/margins": 1.3567686080932617, "rewards/rejected": -2.2888031005859375, "step": 2035 }, { "epoch": 0.24, "learning_rate": 2.324317940238573e-07, "logits/chosen": -2.769197702407837, "logits/rejected": -2.6296226978302, "logps/chosen": -151.77188110351562, "logps/rejected": -212.40762329101562, "loss": 0.369, "rewards/accuracies": 0.75, "rewards/chosen": -0.21906369924545288, "rewards/margins": 1.6712594032287598, "rewards/rejected": -1.8903231620788574, "step": 2036 }, { "epoch": 0.24, "learning_rate": 2.3239636234793903e-07, "logits/chosen": -2.212777614593506, "logits/rejected": -2.278111696243286, "logps/chosen": -146.70469665527344, "logps/rejected": -138.28033447265625, "loss": 0.4649, "rewards/accuracies": 0.875, "rewards/chosen": -0.22962410748004913, "rewards/margins": 1.7458806037902832, "rewards/rejected": -1.975504755973816, "step": 2037 }, { "epoch": 0.24, "learning_rate": 2.3236093067202076e-07, "logits/chosen": -2.291670083999634, "logits/rejected": -2.3384108543395996, "logps/chosen": -366.8844299316406, "logps/rejected": -294.0512390136719, "loss": 0.1504, "rewards/accuracies": 0.875, "rewards/chosen": -0.8982610702514648, "rewards/margins": 4.005279541015625, "rewards/rejected": -4.903540134429932, "step": 2038 }, { "epoch": 0.24, "learning_rate": 2.3232549899610253e-07, "logits/chosen": -1.968712329864502, "logits/rejected": -2.3144710063934326, "logps/chosen": -384.4434814453125, "logps/rejected": -253.3641815185547, "loss": 0.3642, "rewards/accuracies": 0.75, "rewards/chosen": -0.9081239700317383, "rewards/margins": 2.717484951019287, "rewards/rejected": -3.6256091594696045, "step": 2039 }, { "epoch": 0.24, "learning_rate": 2.3229006732018425e-07, "logits/chosen": -2.510974884033203, "logits/rejected": -2.609832286834717, "logps/chosen": -220.684326171875, "logps/rejected": -263.5408630371094, "loss": 0.4068, "rewards/accuracies": 0.875, "rewards/chosen": -0.3319597840309143, "rewards/margins": 2.481935977935791, "rewards/rejected": -2.8138959407806396, "step": 2040 }, { "epoch": 0.24, "learning_rate": 2.3225463564426598e-07, "logits/chosen": -2.5460615158081055, "logits/rejected": -2.557543992996216, "logps/chosen": -360.0933837890625, "logps/rejected": -213.01080322265625, "loss": 0.2613, "rewards/accuracies": 1.0, "rewards/chosen": -0.2244347482919693, "rewards/margins": 1.4214119911193848, "rewards/rejected": -1.6458466053009033, "step": 2041 }, { "epoch": 0.24, "learning_rate": 2.322192039683477e-07, "logits/chosen": -2.2325141429901123, "logits/rejected": -2.304518222808838, "logps/chosen": -110.122314453125, "logps/rejected": -119.24663543701172, "loss": 0.2999, "rewards/accuracies": 0.875, "rewards/chosen": -0.3240876793861389, "rewards/margins": 1.9563941955566406, "rewards/rejected": -2.2804818153381348, "step": 2042 }, { "epoch": 0.24, "learning_rate": 2.3218377229242942e-07, "logits/chosen": -2.4074838161468506, "logits/rejected": -2.5174002647399902, "logps/chosen": -201.61880493164062, "logps/rejected": -149.50794982910156, "loss": 1.0537, "rewards/accuracies": 0.625, "rewards/chosen": -1.4754530191421509, "rewards/margins": 0.2362002283334732, "rewards/rejected": -1.7116531133651733, "step": 2043 }, { "epoch": 0.24, "learning_rate": 2.3214834061651114e-07, "logits/chosen": -2.812570571899414, "logits/rejected": -2.731611967086792, "logps/chosen": -143.41311645507812, "logps/rejected": -195.87518310546875, "loss": 0.2186, "rewards/accuracies": 0.875, "rewards/chosen": -0.6606496572494507, "rewards/margins": 2.649125576019287, "rewards/rejected": -3.309774875640869, "step": 2044 }, { "epoch": 0.24, "learning_rate": 2.321129089405929e-07, "logits/chosen": -2.150832414627075, "logits/rejected": -2.5207042694091797, "logps/chosen": -354.78961181640625, "logps/rejected": -223.37905883789062, "loss": 0.2641, "rewards/accuracies": 1.0, "rewards/chosen": -0.3215101659297943, "rewards/margins": 1.7496140003204346, "rewards/rejected": -2.071124315261841, "step": 2045 }, { "epoch": 0.24, "learning_rate": 2.320774772646746e-07, "logits/chosen": -2.068253993988037, "logits/rejected": -2.0605204105377197, "logps/chosen": -510.9864196777344, "logps/rejected": -365.5048828125, "loss": 0.7527, "rewards/accuracies": 0.625, "rewards/chosen": -0.7201324701309204, "rewards/margins": 0.29562777280807495, "rewards/rejected": -1.0157601833343506, "step": 2046 }, { "epoch": 0.24, "learning_rate": 2.3204204558875633e-07, "logits/chosen": -2.3502566814422607, "logits/rejected": -2.396901845932007, "logps/chosen": -319.370361328125, "logps/rejected": -228.249267578125, "loss": 0.3779, "rewards/accuracies": 0.75, "rewards/chosen": -0.49296289682388306, "rewards/margins": 1.6559906005859375, "rewards/rejected": -2.148953437805176, "step": 2047 }, { "epoch": 0.24, "learning_rate": 2.3200661391283806e-07, "logits/chosen": -2.3833396434783936, "logits/rejected": -2.4190738201141357, "logps/chosen": -378.88348388671875, "logps/rejected": -221.31948852539062, "loss": 0.439, "rewards/accuracies": 0.75, "rewards/chosen": -0.9592045545578003, "rewards/margins": 1.101942539215088, "rewards/rejected": -2.0611469745635986, "step": 2048 }, { "epoch": 0.24, "learning_rate": 2.3197118223691978e-07, "logits/chosen": -1.9409353733062744, "logits/rejected": -1.886033535003662, "logps/chosen": -148.40745544433594, "logps/rejected": -255.3448486328125, "loss": 0.5596, "rewards/accuracies": 0.5, "rewards/chosen": -0.3841799199581146, "rewards/margins": 1.4257230758666992, "rewards/rejected": -1.8099030256271362, "step": 2049 }, { "epoch": 0.24, "learning_rate": 2.319357505610015e-07, "logits/chosen": -1.6990395784378052, "logits/rejected": -2.2230329513549805, "logps/chosen": -297.362548828125, "logps/rejected": -228.18093872070312, "loss": 0.7327, "rewards/accuracies": 0.75, "rewards/chosen": -1.1835384368896484, "rewards/margins": 1.3047230243682861, "rewards/rejected": -2.4882614612579346, "step": 2050 }, { "epoch": 0.24, "learning_rate": 2.3190031888508328e-07, "logits/chosen": -2.537649631500244, "logits/rejected": -2.6452975273132324, "logps/chosen": -619.759765625, "logps/rejected": -353.56005859375, "loss": 0.3772, "rewards/accuracies": 0.75, "rewards/chosen": -0.49194395542144775, "rewards/margins": 1.7878797054290771, "rewards/rejected": -2.2798235416412354, "step": 2051 }, { "epoch": 0.24, "learning_rate": 2.31864887209165e-07, "logits/chosen": -2.764446973800659, "logits/rejected": -2.6761300563812256, "logps/chosen": -263.1351623535156, "logps/rejected": -254.53260803222656, "loss": 0.5305, "rewards/accuracies": 0.625, "rewards/chosen": -1.9918043613433838, "rewards/margins": 1.6356933116912842, "rewards/rejected": -3.627497673034668, "step": 2052 }, { "epoch": 0.24, "learning_rate": 2.3182945553324672e-07, "logits/chosen": -2.69112229347229, "logits/rejected": -3.0157575607299805, "logps/chosen": -408.39666748046875, "logps/rejected": -309.54595947265625, "loss": 0.1826, "rewards/accuracies": 1.0, "rewards/chosen": -0.07711735367774963, "rewards/margins": 2.1510908603668213, "rewards/rejected": -2.228208065032959, "step": 2053 }, { "epoch": 0.24, "learning_rate": 2.3179402385732844e-07, "logits/chosen": -2.081737518310547, "logits/rejected": -2.133505344390869, "logps/chosen": -359.0486755371094, "logps/rejected": -281.8216857910156, "loss": 0.7259, "rewards/accuracies": 0.5, "rewards/chosen": -0.5981441736221313, "rewards/margins": 0.981544017791748, "rewards/rejected": -1.579688310623169, "step": 2054 }, { "epoch": 0.24, "learning_rate": 2.3175859218141016e-07, "logits/chosen": -1.7702155113220215, "logits/rejected": -2.0965635776519775, "logps/chosen": -402.19921875, "logps/rejected": -220.8493194580078, "loss": 0.7826, "rewards/accuracies": 0.625, "rewards/chosen": -1.4614759683609009, "rewards/margins": 0.63457852602005, "rewards/rejected": -2.0960545539855957, "step": 2055 }, { "epoch": 0.24, "learning_rate": 2.3172316050549189e-07, "logits/chosen": -2.2324156761169434, "logits/rejected": -2.329479932785034, "logps/chosen": -373.3224182128906, "logps/rejected": -341.1250305175781, "loss": 0.4742, "rewards/accuracies": 0.75, "rewards/chosen": -1.242738962173462, "rewards/margins": 1.1859095096588135, "rewards/rejected": -2.4286484718322754, "step": 2056 }, { "epoch": 0.24, "learning_rate": 2.3168772882957364e-07, "logits/chosen": -2.249872922897339, "logits/rejected": -2.153383493423462, "logps/chosen": -233.1608428955078, "logps/rejected": -258.5486145019531, "loss": 0.6108, "rewards/accuracies": 0.625, "rewards/chosen": -1.123549222946167, "rewards/margins": 1.4703047275543213, "rewards/rejected": -2.5938539505004883, "step": 2057 }, { "epoch": 0.24, "learning_rate": 2.3165229715365536e-07, "logits/chosen": -2.293715000152588, "logits/rejected": -2.3873379230499268, "logps/chosen": -162.99310302734375, "logps/rejected": -145.78494262695312, "loss": 0.6584, "rewards/accuracies": 0.75, "rewards/chosen": -0.5111904144287109, "rewards/margins": 0.13250459730625153, "rewards/rejected": -0.643695056438446, "step": 2058 }, { "epoch": 0.24, "learning_rate": 2.3161686547773708e-07, "logits/chosen": -2.834343910217285, "logits/rejected": -2.8110439777374268, "logps/chosen": -252.72042846679688, "logps/rejected": -183.12646484375, "loss": 0.5396, "rewards/accuracies": 0.75, "rewards/chosen": -0.23687627911567688, "rewards/margins": 1.654449701309204, "rewards/rejected": -1.8913260698318481, "step": 2059 }, { "epoch": 0.24, "learning_rate": 2.315814338018188e-07, "logits/chosen": -2.174685478210449, "logits/rejected": -2.1113905906677246, "logps/chosen": -210.0044708251953, "logps/rejected": -308.68798828125, "loss": 0.9212, "rewards/accuracies": 0.75, "rewards/chosen": -1.1340367794036865, "rewards/margins": 0.7136525511741638, "rewards/rejected": -1.8476893901824951, "step": 2060 }, { "epoch": 0.24, "learning_rate": 2.3154600212590052e-07, "logits/chosen": -2.350801706314087, "logits/rejected": -2.3886895179748535, "logps/chosen": -283.32476806640625, "logps/rejected": -346.052734375, "loss": 1.0974, "rewards/accuracies": 0.375, "rewards/chosen": -1.3154613971710205, "rewards/margins": -0.07462924718856812, "rewards/rejected": -1.2408322095870972, "step": 2061 }, { "epoch": 0.24, "learning_rate": 2.315105704499823e-07, "logits/chosen": -2.6257810592651367, "logits/rejected": -2.564004898071289, "logps/chosen": -230.0107879638672, "logps/rejected": -203.06492614746094, "loss": 0.602, "rewards/accuracies": 0.75, "rewards/chosen": -1.0469653606414795, "rewards/margins": 1.104737401008606, "rewards/rejected": -2.151702880859375, "step": 2062 }, { "epoch": 0.24, "learning_rate": 2.3147513877406402e-07, "logits/chosen": -2.4696969985961914, "logits/rejected": -2.5073649883270264, "logps/chosen": -111.795654296875, "logps/rejected": -120.26400756835938, "loss": 0.5582, "rewards/accuracies": 0.75, "rewards/chosen": -1.3880670070648193, "rewards/margins": 0.3678975999355316, "rewards/rejected": -1.7559646368026733, "step": 2063 }, { "epoch": 0.24, "learning_rate": 2.3143970709814574e-07, "logits/chosen": -2.7043700218200684, "logits/rejected": -2.7811365127563477, "logps/chosen": -332.83892822265625, "logps/rejected": -295.2342529296875, "loss": 0.2479, "rewards/accuracies": 0.75, "rewards/chosen": 0.2664511203765869, "rewards/margins": 2.4638257026672363, "rewards/rejected": -2.1973745822906494, "step": 2064 }, { "epoch": 0.24, "learning_rate": 2.3140427542222747e-07, "logits/chosen": -2.7371633052825928, "logits/rejected": -2.7435383796691895, "logps/chosen": -280.2341003417969, "logps/rejected": -314.576416015625, "loss": 0.2323, "rewards/accuracies": 0.75, "rewards/chosen": -0.9098036289215088, "rewards/margins": 2.62107515335083, "rewards/rejected": -3.5308785438537598, "step": 2065 }, { "epoch": 0.24, "learning_rate": 2.313688437463092e-07, "logits/chosen": -2.5510506629943848, "logits/rejected": -2.7809622287750244, "logps/chosen": -184.58187866210938, "logps/rejected": -231.81297302246094, "loss": 0.4834, "rewards/accuracies": 0.75, "rewards/chosen": -0.46479663252830505, "rewards/margins": 1.634385108947754, "rewards/rejected": -2.09918212890625, "step": 2066 }, { "epoch": 0.24, "learning_rate": 2.313334120703909e-07, "logits/chosen": -2.286210536956787, "logits/rejected": -2.2372682094573975, "logps/chosen": -380.9281311035156, "logps/rejected": -415.9924621582031, "loss": 0.6469, "rewards/accuracies": 0.75, "rewards/chosen": -1.122817039489746, "rewards/margins": 1.3784973621368408, "rewards/rejected": -2.501314640045166, "step": 2067 }, { "epoch": 0.24, "learning_rate": 2.3129798039447266e-07, "logits/chosen": -1.848660945892334, "logits/rejected": -2.0682692527770996, "logps/chosen": -363.060546875, "logps/rejected": -296.3851623535156, "loss": 0.5715, "rewards/accuracies": 0.75, "rewards/chosen": -0.9150069952011108, "rewards/margins": 0.4359191060066223, "rewards/rejected": -1.350926160812378, "step": 2068 }, { "epoch": 0.24, "learning_rate": 2.3126254871855438e-07, "logits/chosen": -2.7038397789001465, "logits/rejected": -2.680751085281372, "logps/chosen": -261.6009826660156, "logps/rejected": -305.29888916015625, "loss": 0.1515, "rewards/accuracies": 1.0, "rewards/chosen": -0.40541332960128784, "rewards/margins": 3.810908317565918, "rewards/rejected": -4.21632194519043, "step": 2069 }, { "epoch": 0.24, "learning_rate": 2.312271170426361e-07, "logits/chosen": -2.157916307449341, "logits/rejected": -2.4135901927948, "logps/chosen": -383.1419372558594, "logps/rejected": -267.9462890625, "loss": 0.6058, "rewards/accuracies": 0.75, "rewards/chosen": -1.081172227859497, "rewards/margins": 1.9821442365646362, "rewards/rejected": -3.0633163452148438, "step": 2070 }, { "epoch": 0.24, "learning_rate": 2.3119168536671782e-07, "logits/chosen": -1.894946575164795, "logits/rejected": -2.351252794265747, "logps/chosen": -393.8211669921875, "logps/rejected": -375.05755615234375, "loss": 0.3264, "rewards/accuracies": 0.875, "rewards/chosen": -0.8408360481262207, "rewards/margins": 1.4590222835540771, "rewards/rejected": -2.299858331680298, "step": 2071 }, { "epoch": 0.24, "learning_rate": 2.3115625369079955e-07, "logits/chosen": -2.423804759979248, "logits/rejected": -2.4525179862976074, "logps/chosen": -287.20611572265625, "logps/rejected": -300.79193115234375, "loss": 0.4869, "rewards/accuracies": 0.75, "rewards/chosen": -1.6825218200683594, "rewards/margins": 1.355987548828125, "rewards/rejected": -3.0385093688964844, "step": 2072 }, { "epoch": 0.24, "learning_rate": 2.3112082201488127e-07, "logits/chosen": -2.0599029064178467, "logits/rejected": -2.065988302230835, "logps/chosen": -741.3494262695312, "logps/rejected": -731.4830322265625, "loss": 0.2933, "rewards/accuracies": 0.875, "rewards/chosen": -1.0429275035858154, "rewards/margins": 2.9162943363189697, "rewards/rejected": -3.9592220783233643, "step": 2073 }, { "epoch": 0.24, "learning_rate": 2.3108539033896304e-07, "logits/chosen": -2.25712513923645, "logits/rejected": -2.23083233833313, "logps/chosen": -284.7942199707031, "logps/rejected": -268.1402587890625, "loss": 0.2225, "rewards/accuracies": 0.875, "rewards/chosen": -0.9001434445381165, "rewards/margins": 2.5516343116760254, "rewards/rejected": -3.451777696609497, "step": 2074 }, { "epoch": 0.24, "learning_rate": 2.3104995866304477e-07, "logits/chosen": -2.1770951747894287, "logits/rejected": -2.2381420135498047, "logps/chosen": -298.89630126953125, "logps/rejected": -274.75726318359375, "loss": 0.3309, "rewards/accuracies": 1.0, "rewards/chosen": -0.5474511384963989, "rewards/margins": 1.3452699184417725, "rewards/rejected": -1.8927209377288818, "step": 2075 }, { "epoch": 0.24, "learning_rate": 2.310145269871265e-07, "logits/chosen": -2.7789416313171387, "logits/rejected": -2.640777587890625, "logps/chosen": -276.1231689453125, "logps/rejected": -177.1703338623047, "loss": 0.2902, "rewards/accuracies": 0.875, "rewards/chosen": -0.6109340190887451, "rewards/margins": 3.7640726566314697, "rewards/rejected": -4.375006675720215, "step": 2076 }, { "epoch": 0.24, "learning_rate": 2.309790953112082e-07, "logits/chosen": -2.224620819091797, "logits/rejected": -2.350910186767578, "logps/chosen": -422.27069091796875, "logps/rejected": -321.5037841796875, "loss": 0.3775, "rewards/accuracies": 0.875, "rewards/chosen": -0.8151144981384277, "rewards/margins": 2.2183737754821777, "rewards/rejected": -3.0334882736206055, "step": 2077 }, { "epoch": 0.24, "learning_rate": 2.3094366363528993e-07, "logits/chosen": -1.8314849138259888, "logits/rejected": -2.0104141235351562, "logps/chosen": -375.28900146484375, "logps/rejected": -277.774169921875, "loss": 0.6302, "rewards/accuracies": 0.625, "rewards/chosen": -0.8396937847137451, "rewards/margins": 1.0412566661834717, "rewards/rejected": -1.8809503316879272, "step": 2078 }, { "epoch": 0.24, "learning_rate": 2.3090823195937168e-07, "logits/chosen": -2.5636136531829834, "logits/rejected": -2.265578269958496, "logps/chosen": -244.00924682617188, "logps/rejected": -351.1802978515625, "loss": 0.497, "rewards/accuracies": 0.625, "rewards/chosen": -0.9172402024269104, "rewards/margins": 1.0423904657363892, "rewards/rejected": -1.9596307277679443, "step": 2079 }, { "epoch": 0.24, "learning_rate": 2.308728002834534e-07, "logits/chosen": -2.590925693511963, "logits/rejected": -2.4717209339141846, "logps/chosen": -252.8572998046875, "logps/rejected": -226.3404541015625, "loss": 0.2072, "rewards/accuracies": 1.0, "rewards/chosen": -0.27060365676879883, "rewards/margins": 2.2467408180236816, "rewards/rejected": -2.5173444747924805, "step": 2080 }, { "epoch": 0.24, "learning_rate": 2.3083736860753513e-07, "logits/chosen": -2.6463701725006104, "logits/rejected": -2.631129741668701, "logps/chosen": -254.4882049560547, "logps/rejected": -146.6963348388672, "loss": 0.3981, "rewards/accuracies": 0.875, "rewards/chosen": -0.6959709525108337, "rewards/margins": 1.3422380685806274, "rewards/rejected": -2.0382089614868164, "step": 2081 }, { "epoch": 0.24, "learning_rate": 2.3080193693161685e-07, "logits/chosen": -1.93552565574646, "logits/rejected": -2.299076795578003, "logps/chosen": -249.86318969726562, "logps/rejected": -191.1592254638672, "loss": 0.4961, "rewards/accuracies": 0.75, "rewards/chosen": -0.617728590965271, "rewards/margins": 2.052072525024414, "rewards/rejected": -2.6698012351989746, "step": 2082 }, { "epoch": 0.24, "learning_rate": 2.3076650525569857e-07, "logits/chosen": -2.4341533184051514, "logits/rejected": -2.0779552459716797, "logps/chosen": -448.59710693359375, "logps/rejected": -466.44964599609375, "loss": 0.2894, "rewards/accuracies": 1.0, "rewards/chosen": -0.6883320808410645, "rewards/margins": 1.6192301511764526, "rewards/rejected": -2.3075623512268066, "step": 2083 }, { "epoch": 0.24, "learning_rate": 2.307310735797803e-07, "logits/chosen": -3.071538209915161, "logits/rejected": -3.023592233657837, "logps/chosen": -412.66546630859375, "logps/rejected": -214.17709350585938, "loss": 0.9459, "rewards/accuracies": 0.75, "rewards/chosen": -0.8521295189857483, "rewards/margins": 1.0258171558380127, "rewards/rejected": -1.8779468536376953, "step": 2084 }, { "epoch": 0.24, "learning_rate": 2.3069564190386201e-07, "logits/chosen": -2.1012156009674072, "logits/rejected": -2.3568899631500244, "logps/chosen": -223.47088623046875, "logps/rejected": -202.3527069091797, "loss": 2.0635, "rewards/accuracies": 0.5, "rewards/chosen": -2.4038147926330566, "rewards/margins": -1.3419294357299805, "rewards/rejected": -1.0618852376937866, "step": 2085 }, { "epoch": 0.24, "learning_rate": 2.306602102279438e-07, "logits/chosen": -2.7428174018859863, "logits/rejected": -2.579439878463745, "logps/chosen": -256.74359130859375, "logps/rejected": -292.8490295410156, "loss": 0.8754, "rewards/accuracies": 0.75, "rewards/chosen": -1.502882957458496, "rewards/margins": 1.1394391059875488, "rewards/rejected": -2.642321825027466, "step": 2086 }, { "epoch": 0.24, "learning_rate": 2.306247785520255e-07, "logits/chosen": -2.425887107849121, "logits/rejected": -2.408629894256592, "logps/chosen": -243.2975311279297, "logps/rejected": -291.95831298828125, "loss": 0.3789, "rewards/accuracies": 0.625, "rewards/chosen": -1.1353849172592163, "rewards/margins": 1.7271255254745483, "rewards/rejected": -2.8625104427337646, "step": 2087 }, { "epoch": 0.24, "learning_rate": 2.3058934687610723e-07, "logits/chosen": -2.4236881732940674, "logits/rejected": -2.7078330516815186, "logps/chosen": -253.06715393066406, "logps/rejected": -166.00115966796875, "loss": 0.8406, "rewards/accuracies": 0.625, "rewards/chosen": -0.8810558915138245, "rewards/margins": -0.051469504833221436, "rewards/rejected": -0.8295864462852478, "step": 2088 }, { "epoch": 0.24, "learning_rate": 2.3055391520018895e-07, "logits/chosen": -2.336369276046753, "logits/rejected": -2.4930810928344727, "logps/chosen": -309.11517333984375, "logps/rejected": -280.1531066894531, "loss": 0.8765, "rewards/accuracies": 0.5, "rewards/chosen": -1.0990009307861328, "rewards/margins": 1.2734899520874023, "rewards/rejected": -2.372490882873535, "step": 2089 }, { "epoch": 0.24, "learning_rate": 2.305184835242707e-07, "logits/chosen": -2.08042573928833, "logits/rejected": -2.172767162322998, "logps/chosen": -380.06939697265625, "logps/rejected": -305.51788330078125, "loss": 0.1407, "rewards/accuracies": 1.0, "rewards/chosen": -0.619519829750061, "rewards/margins": 2.6680397987365723, "rewards/rejected": -3.287559747695923, "step": 2090 }, { "epoch": 0.24, "learning_rate": 2.3048305184835243e-07, "logits/chosen": -2.351177215576172, "logits/rejected": -2.3011081218719482, "logps/chosen": -266.94195556640625, "logps/rejected": -361.29876708984375, "loss": 0.1486, "rewards/accuracies": 1.0, "rewards/chosen": -0.5726664662361145, "rewards/margins": 2.9676342010498047, "rewards/rejected": -3.5403010845184326, "step": 2091 }, { "epoch": 0.24, "learning_rate": 2.3044762017243415e-07, "logits/chosen": -1.9984383583068848, "logits/rejected": -2.188103675842285, "logps/chosen": -374.37518310546875, "logps/rejected": -255.70880126953125, "loss": 0.3173, "rewards/accuracies": 0.875, "rewards/chosen": -0.048550959676504135, "rewards/margins": 1.3606481552124023, "rewards/rejected": -1.4091991186141968, "step": 2092 }, { "epoch": 0.24, "learning_rate": 2.3041218849651587e-07, "logits/chosen": -2.6444292068481445, "logits/rejected": -2.693577289581299, "logps/chosen": -358.3814697265625, "logps/rejected": -341.25885009765625, "loss": 0.0941, "rewards/accuracies": 1.0, "rewards/chosen": -0.562543511390686, "rewards/margins": 2.9499905109405518, "rewards/rejected": -3.5125343799591064, "step": 2093 }, { "epoch": 0.24, "learning_rate": 2.303767568205976e-07, "logits/chosen": -1.6545289754867554, "logits/rejected": -2.066967248916626, "logps/chosen": -594.4368286132812, "logps/rejected": -363.7176208496094, "loss": 0.637, "rewards/accuracies": 0.75, "rewards/chosen": -0.6513514518737793, "rewards/margins": 1.0310990810394287, "rewards/rejected": -1.6824506521224976, "step": 2094 }, { "epoch": 0.24, "learning_rate": 2.3034132514467931e-07, "logits/chosen": -2.0807619094848633, "logits/rejected": -1.957549810409546, "logps/chosen": -184.15469360351562, "logps/rejected": -188.47393798828125, "loss": 0.7225, "rewards/accuracies": 0.5, "rewards/chosen": -0.7239547967910767, "rewards/margins": 1.2631405591964722, "rewards/rejected": -1.9870953559875488, "step": 2095 }, { "epoch": 0.24, "learning_rate": 2.3030589346876104e-07, "logits/chosen": -2.6687190532684326, "logits/rejected": -2.6768581867218018, "logps/chosen": -399.9473876953125, "logps/rejected": -389.5585632324219, "loss": 0.2147, "rewards/accuracies": 0.875, "rewards/chosen": -0.25516819953918457, "rewards/margins": 2.385730743408203, "rewards/rejected": -2.640899181365967, "step": 2096 }, { "epoch": 0.24, "learning_rate": 2.302704617928428e-07, "logits/chosen": -2.547635555267334, "logits/rejected": -2.5019633769989014, "logps/chosen": -421.4581298828125, "logps/rejected": -286.7607727050781, "loss": 0.5365, "rewards/accuracies": 0.75, "rewards/chosen": -0.2664855718612671, "rewards/margins": 0.6848435401916504, "rewards/rejected": -0.9513291120529175, "step": 2097 }, { "epoch": 0.24, "learning_rate": 2.3023503011692453e-07, "logits/chosen": -1.9565714597702026, "logits/rejected": -2.28084659576416, "logps/chosen": -240.66305541992188, "logps/rejected": -150.10205078125, "loss": 0.653, "rewards/accuracies": 0.625, "rewards/chosen": -0.4271824061870575, "rewards/margins": 0.35893547534942627, "rewards/rejected": -0.7861179113388062, "step": 2098 }, { "epoch": 0.24, "learning_rate": 2.3019959844100626e-07, "logits/chosen": -2.659397602081299, "logits/rejected": -2.739529609680176, "logps/chosen": -242.95175170898438, "logps/rejected": -167.45614624023438, "loss": 0.4208, "rewards/accuracies": 0.875, "rewards/chosen": -0.819594144821167, "rewards/margins": 1.6537340879440308, "rewards/rejected": -2.473328113555908, "step": 2099 }, { "epoch": 0.24, "learning_rate": 2.3016416676508798e-07, "logits/chosen": -1.6965988874435425, "logits/rejected": -2.2584781646728516, "logps/chosen": -500.3504943847656, "logps/rejected": -179.69728088378906, "loss": 0.3855, "rewards/accuracies": 0.875, "rewards/chosen": -0.19545957446098328, "rewards/margins": 1.5131888389587402, "rewards/rejected": -1.708648443222046, "step": 2100 }, { "epoch": 0.24, "learning_rate": 2.301287350891697e-07, "logits/chosen": -2.114398717880249, "logits/rejected": -2.162353038787842, "logps/chosen": -525.50634765625, "logps/rejected": -471.1441345214844, "loss": 0.6405, "rewards/accuracies": 0.75, "rewards/chosen": -0.01101897656917572, "rewards/margins": 1.8904545307159424, "rewards/rejected": -1.9014735221862793, "step": 2101 }, { "epoch": 0.24, "learning_rate": 2.3009330341325145e-07, "logits/chosen": -2.7504093647003174, "logits/rejected": -2.8256869316101074, "logps/chosen": -164.28782653808594, "logps/rejected": -250.89205932617188, "loss": 0.1963, "rewards/accuracies": 0.875, "rewards/chosen": -0.5459045171737671, "rewards/margins": 2.1295342445373535, "rewards/rejected": -2.67543888092041, "step": 2102 }, { "epoch": 0.24, "learning_rate": 2.3005787173733317e-07, "logits/chosen": -2.526620388031006, "logits/rejected": -2.308732032775879, "logps/chosen": -181.3770294189453, "logps/rejected": -301.6756286621094, "loss": 0.3151, "rewards/accuracies": 0.875, "rewards/chosen": -0.8538342714309692, "rewards/margins": 2.393160343170166, "rewards/rejected": -3.2469944953918457, "step": 2103 }, { "epoch": 0.24, "learning_rate": 2.300224400614149e-07, "logits/chosen": -1.8640164136886597, "logits/rejected": -2.5301032066345215, "logps/chosen": -557.3271484375, "logps/rejected": -228.86883544921875, "loss": 0.6514, "rewards/accuracies": 0.75, "rewards/chosen": -0.7960251569747925, "rewards/margins": 0.8735105991363525, "rewards/rejected": -1.6695358753204346, "step": 2104 }, { "epoch": 0.24, "learning_rate": 2.2998700838549661e-07, "logits/chosen": -2.514897346496582, "logits/rejected": -2.7236521244049072, "logps/chosen": -417.3526611328125, "logps/rejected": -199.49081420898438, "loss": 16.4606, "rewards/accuracies": 0.625, "rewards/chosen": -17.218109130859375, "rewards/margins": -14.884782791137695, "rewards/rejected": -2.3333230018615723, "step": 2105 }, { "epoch": 0.24, "learning_rate": 2.2995157670957834e-07, "logits/chosen": -2.3351211547851562, "logits/rejected": -2.4225146770477295, "logps/chosen": -189.0, "logps/rejected": -291.29022216796875, "loss": 0.378, "rewards/accuracies": 0.75, "rewards/chosen": -0.8758213520050049, "rewards/margins": 2.2854561805725098, "rewards/rejected": -3.1612775325775146, "step": 2106 }, { "epoch": 0.25, "learning_rate": 2.2991614503366006e-07, "logits/chosen": -1.9343101978302002, "logits/rejected": -2.377197742462158, "logps/chosen": -209.54122924804688, "logps/rejected": -112.00872039794922, "loss": 0.395, "rewards/accuracies": 1.0, "rewards/chosen": -0.38213610649108887, "rewards/margins": 1.0223580598831177, "rewards/rejected": -1.4044941663742065, "step": 2107 }, { "epoch": 0.25, "learning_rate": 2.298807133577418e-07, "logits/chosen": -1.549855351448059, "logits/rejected": -1.805354118347168, "logps/chosen": -521.0791015625, "logps/rejected": -362.57257080078125, "loss": 0.6282, "rewards/accuracies": 0.75, "rewards/chosen": -1.171830415725708, "rewards/margins": 0.8442789316177368, "rewards/rejected": -2.0161094665527344, "step": 2108 }, { "epoch": 0.25, "learning_rate": 2.2984528168182356e-07, "logits/chosen": -1.7286882400512695, "logits/rejected": -1.9006962776184082, "logps/chosen": -503.29583740234375, "logps/rejected": -304.79986572265625, "loss": 0.8084, "rewards/accuracies": 0.75, "rewards/chosen": -1.1845356225967407, "rewards/margins": 0.34847450256347656, "rewards/rejected": -1.5330100059509277, "step": 2109 }, { "epoch": 0.25, "learning_rate": 2.2980985000590528e-07, "logits/chosen": -2.5560271739959717, "logits/rejected": -2.4566686153411865, "logps/chosen": -328.2701416015625, "logps/rejected": -252.1268768310547, "loss": 0.3875, "rewards/accuracies": 0.75, "rewards/chosen": -1.1438974142074585, "rewards/margins": 1.2961591482162476, "rewards/rejected": -2.440056324005127, "step": 2110 }, { "epoch": 0.25, "learning_rate": 2.29774418329987e-07, "logits/chosen": -2.1737284660339355, "logits/rejected": -2.262253761291504, "logps/chosen": -204.8711395263672, "logps/rejected": -211.63177490234375, "loss": 0.4168, "rewards/accuracies": 0.75, "rewards/chosen": 0.18141092360019684, "rewards/margins": 1.2299103736877441, "rewards/rejected": -1.0484994649887085, "step": 2111 }, { "epoch": 0.25, "learning_rate": 2.2973898665406872e-07, "logits/chosen": -2.201474189758301, "logits/rejected": -2.180999755859375, "logps/chosen": -381.7806091308594, "logps/rejected": -644.9986572265625, "loss": 0.0948, "rewards/accuracies": 1.0, "rewards/chosen": -0.7228696942329407, "rewards/margins": 3.134190797805786, "rewards/rejected": -3.857060432434082, "step": 2112 }, { "epoch": 0.25, "learning_rate": 2.2970355497815047e-07, "logits/chosen": -2.541313886642456, "logits/rejected": -2.6406726837158203, "logps/chosen": -104.00942993164062, "logps/rejected": -187.33856201171875, "loss": 0.6473, "rewards/accuracies": 0.75, "rewards/chosen": -1.075761318206787, "rewards/margins": 1.0872206687927246, "rewards/rejected": -2.1629819869995117, "step": 2113 }, { "epoch": 0.25, "learning_rate": 2.296681233022322e-07, "logits/chosen": -2.585556983947754, "logits/rejected": -2.6896328926086426, "logps/chosen": -312.0445556640625, "logps/rejected": -217.39840698242188, "loss": 0.5744, "rewards/accuracies": 0.75, "rewards/chosen": -0.7982922792434692, "rewards/margins": 2.5577845573425293, "rewards/rejected": -3.356076717376709, "step": 2114 }, { "epoch": 0.25, "learning_rate": 2.2963269162631392e-07, "logits/chosen": -2.1246178150177, "logits/rejected": -2.2158761024475098, "logps/chosen": -341.9050598144531, "logps/rejected": -263.41668701171875, "loss": 0.3931, "rewards/accuracies": 0.75, "rewards/chosen": -0.49661314487457275, "rewards/margins": 1.7788549661636353, "rewards/rejected": -2.275468111038208, "step": 2115 }, { "epoch": 0.25, "learning_rate": 2.2959725995039564e-07, "logits/chosen": -2.5187501907348633, "logits/rejected": -2.4576497077941895, "logps/chosen": -258.60784912109375, "logps/rejected": -215.1470947265625, "loss": 0.2985, "rewards/accuracies": 1.0, "rewards/chosen": -0.3413189947605133, "rewards/margins": 1.5360360145568848, "rewards/rejected": -1.8773548603057861, "step": 2116 }, { "epoch": 0.25, "learning_rate": 2.2956182827447736e-07, "logits/chosen": -2.502315044403076, "logits/rejected": -2.363102436065674, "logps/chosen": -171.89984130859375, "logps/rejected": -247.88565063476562, "loss": 0.5386, "rewards/accuracies": 0.75, "rewards/chosen": -1.1271822452545166, "rewards/margins": 1.1379424333572388, "rewards/rejected": -2.265124559402466, "step": 2117 }, { "epoch": 0.25, "learning_rate": 2.2952639659855908e-07, "logits/chosen": -2.2036654949188232, "logits/rejected": -2.236550807952881, "logps/chosen": -230.99203491210938, "logps/rejected": -237.07241821289062, "loss": 0.4014, "rewards/accuracies": 0.875, "rewards/chosen": -0.44174614548683167, "rewards/margins": 1.165037751197815, "rewards/rejected": -1.6067838668823242, "step": 2118 }, { "epoch": 0.25, "learning_rate": 2.2949096492264083e-07, "logits/chosen": -2.558526039123535, "logits/rejected": -2.632307291030884, "logps/chosen": -205.89813232421875, "logps/rejected": -215.0721435546875, "loss": 1.8303, "rewards/accuracies": 0.625, "rewards/chosen": -2.262101888656616, "rewards/margins": 0.5201675891876221, "rewards/rejected": -2.7822694778442383, "step": 2119 }, { "epoch": 0.25, "learning_rate": 2.2945553324672255e-07, "logits/chosen": -1.5845973491668701, "logits/rejected": -1.8394256830215454, "logps/chosen": -331.51104736328125, "logps/rejected": -358.0188293457031, "loss": 0.5168, "rewards/accuracies": 0.75, "rewards/chosen": -0.6169475317001343, "rewards/margins": 0.8839913010597229, "rewards/rejected": -1.5009386539459229, "step": 2120 }, { "epoch": 0.25, "learning_rate": 2.294201015708043e-07, "logits/chosen": -2.456845998764038, "logits/rejected": -2.316467761993408, "logps/chosen": -273.5861511230469, "logps/rejected": -349.6385498046875, "loss": 0.2059, "rewards/accuracies": 1.0, "rewards/chosen": -1.1065634489059448, "rewards/margins": 1.7222654819488525, "rewards/rejected": -2.828828811645508, "step": 2121 }, { "epoch": 0.25, "learning_rate": 2.2938466989488602e-07, "logits/chosen": -2.3535051345825195, "logits/rejected": -2.4179320335388184, "logps/chosen": -281.8887939453125, "logps/rejected": -339.3114929199219, "loss": 0.8535, "rewards/accuracies": 0.5, "rewards/chosen": -1.4515576362609863, "rewards/margins": 0.47012099623680115, "rewards/rejected": -1.9216787815093994, "step": 2122 }, { "epoch": 0.25, "learning_rate": 2.2934923821896775e-07, "logits/chosen": -2.4629600048065186, "logits/rejected": -2.5789308547973633, "logps/chosen": -364.0344543457031, "logps/rejected": -281.83563232421875, "loss": 0.2476, "rewards/accuracies": 1.0, "rewards/chosen": -0.6804347038269043, "rewards/margins": 2.183723211288452, "rewards/rejected": -2.8641576766967773, "step": 2123 }, { "epoch": 0.25, "learning_rate": 2.293138065430495e-07, "logits/chosen": -2.014516830444336, "logits/rejected": -2.265544891357422, "logps/chosen": -426.75164794921875, "logps/rejected": -272.9317626953125, "loss": 0.4549, "rewards/accuracies": 0.75, "rewards/chosen": -0.9944907426834106, "rewards/margins": 0.7225480675697327, "rewards/rejected": -1.7170387506484985, "step": 2124 }, { "epoch": 0.25, "learning_rate": 2.2927837486713122e-07, "logits/chosen": -1.9910438060760498, "logits/rejected": -2.2715377807617188, "logps/chosen": -165.43820190429688, "logps/rejected": -150.1271209716797, "loss": 0.276, "rewards/accuracies": 0.875, "rewards/chosen": -0.31181901693344116, "rewards/margins": 1.5795241594314575, "rewards/rejected": -1.891343355178833, "step": 2125 }, { "epoch": 0.25, "learning_rate": 2.2924294319121294e-07, "logits/chosen": -2.6272196769714355, "logits/rejected": -2.5384135246276855, "logps/chosen": -145.94424438476562, "logps/rejected": -256.4827575683594, "loss": 0.2319, "rewards/accuracies": 1.0, "rewards/chosen": -0.4750710725784302, "rewards/margins": 1.81942880153656, "rewards/rejected": -2.2944998741149902, "step": 2126 }, { "epoch": 0.25, "learning_rate": 2.2920751151529466e-07, "logits/chosen": -2.496472120285034, "logits/rejected": -2.3808064460754395, "logps/chosen": -330.91802978515625, "logps/rejected": -295.9268798828125, "loss": 0.4974, "rewards/accuracies": 0.875, "rewards/chosen": -1.1536391973495483, "rewards/margins": 1.3214571475982666, "rewards/rejected": -2.4750962257385254, "step": 2127 }, { "epoch": 0.25, "learning_rate": 2.2917207983937638e-07, "logits/chosen": -2.190812826156616, "logits/rejected": -2.4244284629821777, "logps/chosen": -338.1206359863281, "logps/rejected": -233.25277709960938, "loss": 0.4055, "rewards/accuracies": 0.875, "rewards/chosen": -0.4596202075481415, "rewards/margins": 1.4400889873504639, "rewards/rejected": -1.8997092247009277, "step": 2128 }, { "epoch": 0.25, "learning_rate": 2.291366481634581e-07, "logits/chosen": -2.2277932167053223, "logits/rejected": -2.407660484313965, "logps/chosen": -304.5791015625, "logps/rejected": -300.0853271484375, "loss": 0.2368, "rewards/accuracies": 0.875, "rewards/chosen": -0.43349504470825195, "rewards/margins": 2.2916736602783203, "rewards/rejected": -2.7251687049865723, "step": 2129 }, { "epoch": 0.25, "learning_rate": 2.2910121648753983e-07, "logits/chosen": -2.237928867340088, "logits/rejected": -2.6818368434906006, "logps/chosen": -375.8564758300781, "logps/rejected": -240.56015014648438, "loss": 0.2772, "rewards/accuracies": 0.875, "rewards/chosen": -0.835185170173645, "rewards/margins": 2.009805202484131, "rewards/rejected": -2.8449904918670654, "step": 2130 }, { "epoch": 0.25, "learning_rate": 2.2906578481162158e-07, "logits/chosen": -1.8794376850128174, "logits/rejected": -2.2117056846618652, "logps/chosen": -414.12359619140625, "logps/rejected": -203.04776000976562, "loss": 0.4977, "rewards/accuracies": 0.75, "rewards/chosen": -0.44557681679725647, "rewards/margins": 1.6291123628616333, "rewards/rejected": -2.0746891498565674, "step": 2131 }, { "epoch": 0.25, "learning_rate": 2.2903035313570332e-07, "logits/chosen": -2.604048252105713, "logits/rejected": -2.703619956970215, "logps/chosen": -168.92449951171875, "logps/rejected": -244.16683959960938, "loss": 0.4155, "rewards/accuracies": 0.875, "rewards/chosen": -0.7219383716583252, "rewards/margins": 1.4182631969451904, "rewards/rejected": -2.1402015686035156, "step": 2132 }, { "epoch": 0.25, "learning_rate": 2.2899492145978505e-07, "logits/chosen": -2.058744430541992, "logits/rejected": -2.150193452835083, "logps/chosen": -228.8919677734375, "logps/rejected": -253.7061004638672, "loss": 0.2413, "rewards/accuracies": 1.0, "rewards/chosen": -0.9216313362121582, "rewards/margins": 1.5907222032546997, "rewards/rejected": -2.5123536586761475, "step": 2133 }, { "epoch": 0.25, "learning_rate": 2.2895948978386677e-07, "logits/chosen": -2.0887513160705566, "logits/rejected": -1.9864083528518677, "logps/chosen": -347.41241455078125, "logps/rejected": -340.7961120605469, "loss": 0.4651, "rewards/accuracies": 0.75, "rewards/chosen": -0.927433967590332, "rewards/margins": 2.149841547012329, "rewards/rejected": -3.0772757530212402, "step": 2134 }, { "epoch": 0.25, "learning_rate": 2.289240581079485e-07, "logits/chosen": -2.3944852352142334, "logits/rejected": -2.4618420600891113, "logps/chosen": -307.84527587890625, "logps/rejected": -362.5429992675781, "loss": 0.2097, "rewards/accuracies": 1.0, "rewards/chosen": -0.24313297867774963, "rewards/margins": 3.026973009109497, "rewards/rejected": -3.2701058387756348, "step": 2135 }, { "epoch": 0.25, "learning_rate": 2.2888862643203024e-07, "logits/chosen": -2.4910762310028076, "logits/rejected": -2.3431849479675293, "logps/chosen": -397.3056945800781, "logps/rejected": -358.9815368652344, "loss": 0.8569, "rewards/accuracies": 0.5, "rewards/chosen": -0.4302827715873718, "rewards/margins": 1.3840219974517822, "rewards/rejected": -1.8143047094345093, "step": 2136 }, { "epoch": 0.25, "learning_rate": 2.2885319475611196e-07, "logits/chosen": -2.7177534103393555, "logits/rejected": -2.8236515522003174, "logps/chosen": -166.70004272460938, "logps/rejected": -251.80120849609375, "loss": 0.3298, "rewards/accuracies": 0.875, "rewards/chosen": -0.5478538274765015, "rewards/margins": 2.876509666442871, "rewards/rejected": -3.424363613128662, "step": 2137 }, { "epoch": 0.25, "learning_rate": 2.2881776308019368e-07, "logits/chosen": -2.3688271045684814, "logits/rejected": -2.226384162902832, "logps/chosen": -386.25531005859375, "logps/rejected": -346.11669921875, "loss": 0.4517, "rewards/accuracies": 0.875, "rewards/chosen": -0.14075793325901031, "rewards/margins": 2.106947183609009, "rewards/rejected": -2.2477052211761475, "step": 2138 }, { "epoch": 0.25, "learning_rate": 2.287823314042754e-07, "logits/chosen": -2.571503162384033, "logits/rejected": -2.8303749561309814, "logps/chosen": -496.48614501953125, "logps/rejected": -315.65814208984375, "loss": 0.3743, "rewards/accuracies": 0.875, "rewards/chosen": -0.15027162432670593, "rewards/margins": 1.311789631843567, "rewards/rejected": -1.4620612859725952, "step": 2139 }, { "epoch": 0.25, "learning_rate": 2.2874689972835713e-07, "logits/chosen": -2.4124035835266113, "logits/rejected": -2.544478416442871, "logps/chosen": -237.2946014404297, "logps/rejected": -255.879638671875, "loss": 0.6711, "rewards/accuracies": 0.5, "rewards/chosen": -1.050889253616333, "rewards/margins": 1.1060473918914795, "rewards/rejected": -2.1569366455078125, "step": 2140 }, { "epoch": 0.25, "learning_rate": 2.2871146805243885e-07, "logits/chosen": -2.790017604827881, "logits/rejected": -2.8436005115509033, "logps/chosen": -129.85610961914062, "logps/rejected": -243.6861572265625, "loss": 0.3345, "rewards/accuracies": 0.875, "rewards/chosen": -0.023372262716293335, "rewards/margins": 2.127570629119873, "rewards/rejected": -2.150942802429199, "step": 2141 }, { "epoch": 0.25, "learning_rate": 2.286760363765206e-07, "logits/chosen": -2.323880672454834, "logits/rejected": -2.3414485454559326, "logps/chosen": -150.3916015625, "logps/rejected": -154.37591552734375, "loss": 0.4648, "rewards/accuracies": 0.75, "rewards/chosen": -1.2354729175567627, "rewards/margins": 1.809091567993164, "rewards/rejected": -3.0445644855499268, "step": 2142 }, { "epoch": 0.25, "learning_rate": 2.2864060470060232e-07, "logits/chosen": -2.4641032218933105, "logits/rejected": -2.1864349842071533, "logps/chosen": -172.58273315429688, "logps/rejected": -237.34593200683594, "loss": 0.6329, "rewards/accuracies": 0.875, "rewards/chosen": -0.867960512638092, "rewards/margins": 1.3109456300735474, "rewards/rejected": -2.178906202316284, "step": 2143 }, { "epoch": 0.25, "learning_rate": 2.2860517302468407e-07, "logits/chosen": -2.2381465435028076, "logits/rejected": -2.1028833389282227, "logps/chosen": -515.9766845703125, "logps/rejected": -384.5388488769531, "loss": 0.0903, "rewards/accuracies": 1.0, "rewards/chosen": -1.264719009399414, "rewards/margins": 3.6574082374572754, "rewards/rejected": -4.922127723693848, "step": 2144 }, { "epoch": 0.25, "learning_rate": 2.285697413487658e-07, "logits/chosen": -2.3454036712646484, "logits/rejected": -2.3121447563171387, "logps/chosen": -355.50030517578125, "logps/rejected": -258.6581726074219, "loss": 0.2716, "rewards/accuracies": 0.875, "rewards/chosen": -0.17693877220153809, "rewards/margins": 1.9134660959243774, "rewards/rejected": -2.090404987335205, "step": 2145 }, { "epoch": 0.25, "learning_rate": 2.285343096728475e-07, "logits/chosen": -2.735893726348877, "logits/rejected": -2.8115148544311523, "logps/chosen": -192.97540283203125, "logps/rejected": -150.6136932373047, "loss": 0.3932, "rewards/accuracies": 0.875, "rewards/chosen": -0.5548878908157349, "rewards/margins": 1.6299331188201904, "rewards/rejected": -2.184821128845215, "step": 2146 }, { "epoch": 0.25, "learning_rate": 2.2849887799692926e-07, "logits/chosen": -2.314082622528076, "logits/rejected": -2.6197214126586914, "logps/chosen": -377.728759765625, "logps/rejected": -231.7926025390625, "loss": 0.2437, "rewards/accuracies": 1.0, "rewards/chosen": -0.8115788698196411, "rewards/margins": 1.5890796184539795, "rewards/rejected": -2.40065860748291, "step": 2147 }, { "epoch": 0.25, "learning_rate": 2.2846344632101098e-07, "logits/chosen": -1.670051097869873, "logits/rejected": -2.135693073272705, "logps/chosen": -306.4853515625, "logps/rejected": -270.49725341796875, "loss": 0.7444, "rewards/accuracies": 0.625, "rewards/chosen": -1.326229453086853, "rewards/margins": 0.1724797934293747, "rewards/rejected": -1.4987092018127441, "step": 2148 }, { "epoch": 0.25, "learning_rate": 2.284280146450927e-07, "logits/chosen": -2.5106849670410156, "logits/rejected": -2.512481451034546, "logps/chosen": -239.30255126953125, "logps/rejected": -186.81539916992188, "loss": 0.2753, "rewards/accuracies": 1.0, "rewards/chosen": -0.5068041086196899, "rewards/margins": 1.5255060195922852, "rewards/rejected": -2.0323102474212646, "step": 2149 }, { "epoch": 0.25, "learning_rate": 2.2839258296917443e-07, "logits/chosen": -2.034332752227783, "logits/rejected": -2.2236948013305664, "logps/chosen": -417.65570068359375, "logps/rejected": -199.47605895996094, "loss": 0.3308, "rewards/accuracies": 0.75, "rewards/chosen": -0.6438850164413452, "rewards/margins": 1.8943629264831543, "rewards/rejected": -2.538248062133789, "step": 2150 }, { "epoch": 0.25, "learning_rate": 2.2835715129325615e-07, "logits/chosen": -2.150095224380493, "logits/rejected": -2.4181201457977295, "logps/chosen": -316.0111083984375, "logps/rejected": -291.03912353515625, "loss": 0.7188, "rewards/accuracies": 0.75, "rewards/chosen": -0.8399404883384705, "rewards/margins": 2.407923698425293, "rewards/rejected": -3.247864246368408, "step": 2151 }, { "epoch": 0.25, "learning_rate": 2.2832171961733787e-07, "logits/chosen": -2.6252546310424805, "logits/rejected": -2.8130970001220703, "logps/chosen": -291.6003723144531, "logps/rejected": -299.0618591308594, "loss": 0.3524, "rewards/accuracies": 0.875, "rewards/chosen": -0.3494405746459961, "rewards/margins": 1.6680450439453125, "rewards/rejected": -2.0174856185913086, "step": 2152 }, { "epoch": 0.25, "learning_rate": 2.2828628794141962e-07, "logits/chosen": -2.400312662124634, "logits/rejected": -2.5134358406066895, "logps/chosen": -344.0612487792969, "logps/rejected": -244.72296142578125, "loss": 1.0196, "rewards/accuracies": 0.625, "rewards/chosen": -1.1275907754898071, "rewards/margins": 1.1151525974273682, "rewards/rejected": -2.2427432537078857, "step": 2153 }, { "epoch": 0.25, "learning_rate": 2.2825085626550134e-07, "logits/chosen": -2.3547616004943848, "logits/rejected": -2.119281053543091, "logps/chosen": -331.4162292480469, "logps/rejected": -289.97186279296875, "loss": 0.5657, "rewards/accuracies": 0.75, "rewards/chosen": -1.0001826286315918, "rewards/margins": 1.0805103778839111, "rewards/rejected": -2.080693006515503, "step": 2154 }, { "epoch": 0.25, "learning_rate": 2.2821542458958307e-07, "logits/chosen": -2.3150649070739746, "logits/rejected": -2.1082205772399902, "logps/chosen": -232.9322052001953, "logps/rejected": -260.71600341796875, "loss": 0.5042, "rewards/accuracies": 0.75, "rewards/chosen": -1.1300795078277588, "rewards/margins": 2.1710898876190186, "rewards/rejected": -3.3011693954467773, "step": 2155 }, { "epoch": 0.25, "learning_rate": 2.2817999291366481e-07, "logits/chosen": -2.336153507232666, "logits/rejected": -2.715815544128418, "logps/chosen": -723.7492065429688, "logps/rejected": -231.21392822265625, "loss": 0.5667, "rewards/accuracies": 0.75, "rewards/chosen": -1.0666197538375854, "rewards/margins": 0.7664984464645386, "rewards/rejected": -1.8331180810928345, "step": 2156 }, { "epoch": 0.25, "learning_rate": 2.2814456123774654e-07, "logits/chosen": -2.0084612369537354, "logits/rejected": -1.59686279296875, "logps/chosen": -285.2032165527344, "logps/rejected": -362.973388671875, "loss": 0.5674, "rewards/accuracies": 0.75, "rewards/chosen": -0.6827245354652405, "rewards/margins": 0.9338477849960327, "rewards/rejected": -1.6165724992752075, "step": 2157 }, { "epoch": 0.25, "learning_rate": 2.2810912956182828e-07, "logits/chosen": -2.693225383758545, "logits/rejected": -2.6949892044067383, "logps/chosen": -332.5247497558594, "logps/rejected": -278.5345458984375, "loss": 0.3053, "rewards/accuracies": 0.75, "rewards/chosen": -0.3524531424045563, "rewards/margins": 1.9375313520431519, "rewards/rejected": -2.289984703063965, "step": 2158 }, { "epoch": 0.25, "learning_rate": 2.2807369788591e-07, "logits/chosen": -2.1743569374084473, "logits/rejected": -2.186223030090332, "logps/chosen": -249.9532470703125, "logps/rejected": -356.5863037109375, "loss": 0.2661, "rewards/accuracies": 1.0, "rewards/chosen": -1.1279939413070679, "rewards/margins": 2.0470759868621826, "rewards/rejected": -3.175069570541382, "step": 2159 }, { "epoch": 0.25, "learning_rate": 2.2803826620999173e-07, "logits/chosen": -2.0684783458709717, "logits/rejected": -2.278337001800537, "logps/chosen": -216.55099487304688, "logps/rejected": -205.36614990234375, "loss": 0.5008, "rewards/accuracies": 0.75, "rewards/chosen": -0.8345708250999451, "rewards/margins": 1.2827980518341064, "rewards/rejected": -2.117368698120117, "step": 2160 }, { "epoch": 0.25, "learning_rate": 2.2800283453407345e-07, "logits/chosen": -1.887197732925415, "logits/rejected": -2.0081796646118164, "logps/chosen": -323.2264404296875, "logps/rejected": -244.2742919921875, "loss": 1.0444, "rewards/accuracies": 0.375, "rewards/chosen": -1.0651228427886963, "rewards/margins": -0.26668572425842285, "rewards/rejected": -0.7984371781349182, "step": 2161 }, { "epoch": 0.25, "learning_rate": 2.2796740285815517e-07, "logits/chosen": -2.2592294216156006, "logits/rejected": -2.133533239364624, "logps/chosen": -255.3863525390625, "logps/rejected": -273.91229248046875, "loss": 0.4766, "rewards/accuracies": 0.625, "rewards/chosen": -0.7411010265350342, "rewards/margins": 1.3481422662734985, "rewards/rejected": -2.0892434120178223, "step": 2162 }, { "epoch": 0.25, "learning_rate": 2.279319711822369e-07, "logits/chosen": -2.39513897895813, "logits/rejected": -2.6418890953063965, "logps/chosen": -387.3426818847656, "logps/rejected": -211.67279052734375, "loss": 0.3396, "rewards/accuracies": 0.75, "rewards/chosen": -0.7786517143249512, "rewards/margins": 2.430246353149414, "rewards/rejected": -3.2088980674743652, "step": 2163 }, { "epoch": 0.25, "learning_rate": 2.2789653950631862e-07, "logits/chosen": -2.5594708919525146, "logits/rejected": -2.400991916656494, "logps/chosen": -305.7545471191406, "logps/rejected": -338.04656982421875, "loss": 0.2144, "rewards/accuracies": 0.875, "rewards/chosen": -0.8824601769447327, "rewards/margins": 2.3318915367126465, "rewards/rejected": -3.2143516540527344, "step": 2164 }, { "epoch": 0.25, "learning_rate": 2.2786110783040037e-07, "logits/chosen": -2.7051806449890137, "logits/rejected": -2.6197474002838135, "logps/chosen": -152.98471069335938, "logps/rejected": -351.36248779296875, "loss": 0.2955, "rewards/accuracies": 0.875, "rewards/chosen": -0.303146094083786, "rewards/margins": 2.4990811347961426, "rewards/rejected": -2.80222749710083, "step": 2165 }, { "epoch": 0.25, "learning_rate": 2.278256761544821e-07, "logits/chosen": -2.1872992515563965, "logits/rejected": -2.317622184753418, "logps/chosen": -378.1176452636719, "logps/rejected": -418.0537414550781, "loss": 0.5468, "rewards/accuracies": 0.625, "rewards/chosen": -0.20752966403961182, "rewards/margins": 1.941022276878357, "rewards/rejected": -2.1485519409179688, "step": 2166 }, { "epoch": 0.25, "learning_rate": 2.2779024447856384e-07, "logits/chosen": -2.4930427074432373, "logits/rejected": -2.4171035289764404, "logps/chosen": -235.86534118652344, "logps/rejected": -290.5405578613281, "loss": 0.2915, "rewards/accuracies": 0.75, "rewards/chosen": -0.18631824851036072, "rewards/margins": 3.558704376220703, "rewards/rejected": -3.7450222969055176, "step": 2167 }, { "epoch": 0.25, "learning_rate": 2.2775481280264556e-07, "logits/chosen": -2.1766223907470703, "logits/rejected": -2.381653308868408, "logps/chosen": -334.9700012207031, "logps/rejected": -228.99057006835938, "loss": 0.4758, "rewards/accuracies": 0.875, "rewards/chosen": -1.0367393493652344, "rewards/margins": 0.8552526235580444, "rewards/rejected": -1.8919920921325684, "step": 2168 }, { "epoch": 0.25, "learning_rate": 2.277193811267273e-07, "logits/chosen": -2.0178306102752686, "logits/rejected": -2.189450740814209, "logps/chosen": -275.6355895996094, "logps/rejected": -281.29693603515625, "loss": 0.6212, "rewards/accuracies": 0.625, "rewards/chosen": -0.7972977757453918, "rewards/margins": 1.1546599864959717, "rewards/rejected": -1.9519578218460083, "step": 2169 }, { "epoch": 0.25, "learning_rate": 2.2768394945080903e-07, "logits/chosen": -2.2734668254852295, "logits/rejected": -2.4732158184051514, "logps/chosen": -187.61366271972656, "logps/rejected": -185.02780151367188, "loss": 0.9356, "rewards/accuracies": 0.75, "rewards/chosen": -1.1494472026824951, "rewards/margins": 1.1912405490875244, "rewards/rejected": -2.3406877517700195, "step": 2170 }, { "epoch": 0.25, "learning_rate": 2.2764851777489075e-07, "logits/chosen": -2.298065662384033, "logits/rejected": -2.368726968765259, "logps/chosen": -233.78732299804688, "logps/rejected": -232.2427215576172, "loss": 0.7904, "rewards/accuracies": 0.625, "rewards/chosen": -0.7596050500869751, "rewards/margins": 0.35640284419059753, "rewards/rejected": -1.1160078048706055, "step": 2171 }, { "epoch": 0.25, "learning_rate": 2.2761308609897247e-07, "logits/chosen": -1.6020190715789795, "logits/rejected": -2.079550266265869, "logps/chosen": -403.8466796875, "logps/rejected": -250.55337524414062, "loss": 0.4163, "rewards/accuracies": 0.875, "rewards/chosen": -0.16269069910049438, "rewards/margins": 0.8778970241546631, "rewards/rejected": -1.0405877828598022, "step": 2172 }, { "epoch": 0.25, "learning_rate": 2.275776544230542e-07, "logits/chosen": -2.381019353866577, "logits/rejected": -2.465379476547241, "logps/chosen": -282.2665100097656, "logps/rejected": -376.0032958984375, "loss": 0.4417, "rewards/accuracies": 0.75, "rewards/chosen": -0.12151315808296204, "rewards/margins": 1.1937079429626465, "rewards/rejected": -1.3152210712432861, "step": 2173 }, { "epoch": 0.25, "learning_rate": 2.2754222274713592e-07, "logits/chosen": -1.6910005807876587, "logits/rejected": -1.7976192235946655, "logps/chosen": -452.1587219238281, "logps/rejected": -310.80059814453125, "loss": 0.4499, "rewards/accuracies": 0.75, "rewards/chosen": -0.7068763375282288, "rewards/margins": 1.420177936553955, "rewards/rejected": -2.127054214477539, "step": 2174 }, { "epoch": 0.25, "learning_rate": 2.2750679107121764e-07, "logits/chosen": -2.0479683876037598, "logits/rejected": -1.8563112020492554, "logps/chosen": -308.58099365234375, "logps/rejected": -300.50927734375, "loss": 0.3251, "rewards/accuracies": 0.75, "rewards/chosen": -1.0412843227386475, "rewards/margins": 1.8914991617202759, "rewards/rejected": -2.932783603668213, "step": 2175 }, { "epoch": 0.25, "learning_rate": 2.274713593952994e-07, "logits/chosen": -2.4977524280548096, "logits/rejected": -2.725266695022583, "logps/chosen": -316.5888366699219, "logps/rejected": -230.71267700195312, "loss": 0.4248, "rewards/accuracies": 0.625, "rewards/chosen": -0.4561302661895752, "rewards/margins": 1.786384105682373, "rewards/rejected": -2.2425143718719482, "step": 2176 }, { "epoch": 0.25, "learning_rate": 2.274359277193811e-07, "logits/chosen": -2.2812459468841553, "logits/rejected": -2.227173328399658, "logps/chosen": -202.62049865722656, "logps/rejected": -277.80438232421875, "loss": 0.4856, "rewards/accuracies": 0.75, "rewards/chosen": -0.9735349416732788, "rewards/margins": 1.0427041053771973, "rewards/rejected": -2.0162389278411865, "step": 2177 }, { "epoch": 0.25, "learning_rate": 2.2740049604346283e-07, "logits/chosen": -2.3988869190216064, "logits/rejected": -2.5430619716644287, "logps/chosen": -350.46612548828125, "logps/rejected": -285.5378723144531, "loss": 0.3341, "rewards/accuracies": 0.875, "rewards/chosen": -0.5103209018707275, "rewards/margins": 1.8445733785629272, "rewards/rejected": -2.3548941612243652, "step": 2178 }, { "epoch": 0.25, "learning_rate": 2.2736506436754458e-07, "logits/chosen": -2.0520081520080566, "logits/rejected": -2.3029398918151855, "logps/chosen": -203.90997314453125, "logps/rejected": -132.52655029296875, "loss": 0.4222, "rewards/accuracies": 0.75, "rewards/chosen": -0.7518880367279053, "rewards/margins": 1.1252784729003906, "rewards/rejected": -1.8771663904190063, "step": 2179 }, { "epoch": 0.25, "learning_rate": 2.273296326916263e-07, "logits/chosen": -2.256568670272827, "logits/rejected": -2.358093500137329, "logps/chosen": -279.9107971191406, "logps/rejected": -406.9961853027344, "loss": 0.1796, "rewards/accuracies": 1.0, "rewards/chosen": -0.6932511925697327, "rewards/margins": 2.5141732692718506, "rewards/rejected": -3.2074244022369385, "step": 2180 }, { "epoch": 0.25, "learning_rate": 2.2729420101570805e-07, "logits/chosen": -1.9170485734939575, "logits/rejected": -2.6212596893310547, "logps/chosen": -416.2386169433594, "logps/rejected": -299.912109375, "loss": 0.0732, "rewards/accuracies": 1.0, "rewards/chosen": 0.06405384838581085, "rewards/margins": 4.170487880706787, "rewards/rejected": -4.106434345245361, "step": 2181 }, { "epoch": 0.25, "learning_rate": 2.2725876933978977e-07, "logits/chosen": -2.1969571113586426, "logits/rejected": -2.1868491172790527, "logps/chosen": -204.36643981933594, "logps/rejected": -231.0072021484375, "loss": 0.8245, "rewards/accuracies": 0.75, "rewards/chosen": -1.7387568950653076, "rewards/margins": 0.8371239900588989, "rewards/rejected": -2.575880527496338, "step": 2182 }, { "epoch": 0.25, "learning_rate": 2.272233376638715e-07, "logits/chosen": -2.528383255004883, "logits/rejected": -2.509833812713623, "logps/chosen": -137.77626037597656, "logps/rejected": -222.89300537109375, "loss": 0.3118, "rewards/accuracies": 1.0, "rewards/chosen": -0.1394088715314865, "rewards/margins": 1.960296869277954, "rewards/rejected": -2.099705696105957, "step": 2183 }, { "epoch": 0.25, "learning_rate": 2.2718790598795322e-07, "logits/chosen": -2.4688563346862793, "logits/rejected": -2.467459201812744, "logps/chosen": -192.42608642578125, "logps/rejected": -174.33621215820312, "loss": 0.4512, "rewards/accuracies": 0.875, "rewards/chosen": -0.45962536334991455, "rewards/margins": 1.3100085258483887, "rewards/rejected": -1.7696338891983032, "step": 2184 }, { "epoch": 0.25, "learning_rate": 2.2715247431203494e-07, "logits/chosen": -2.521930456161499, "logits/rejected": -2.4143190383911133, "logps/chosen": -233.11837768554688, "logps/rejected": -340.8379821777344, "loss": 0.6535, "rewards/accuracies": 0.625, "rewards/chosen": -1.5772802829742432, "rewards/margins": 0.8542445302009583, "rewards/rejected": -2.4315247535705566, "step": 2185 }, { "epoch": 0.25, "learning_rate": 2.2711704263611666e-07, "logits/chosen": -2.4585909843444824, "logits/rejected": -2.1747941970825195, "logps/chosen": -180.0541534423828, "logps/rejected": -399.7169189453125, "loss": 0.4061, "rewards/accuracies": 0.875, "rewards/chosen": -0.4606156051158905, "rewards/margins": 1.515753149986267, "rewards/rejected": -1.9763686656951904, "step": 2186 }, { "epoch": 0.25, "learning_rate": 2.270816109601984e-07, "logits/chosen": -1.9671661853790283, "logits/rejected": -2.0288028717041016, "logps/chosen": -570.3203735351562, "logps/rejected": -288.6346130371094, "loss": 0.3564, "rewards/accuracies": 0.75, "rewards/chosen": -0.48816007375717163, "rewards/margins": 1.9157480001449585, "rewards/rejected": -2.4039080142974854, "step": 2187 }, { "epoch": 0.25, "learning_rate": 2.2704617928428013e-07, "logits/chosen": -2.7198400497436523, "logits/rejected": -2.52475905418396, "logps/chosen": -285.28680419921875, "logps/rejected": -218.46337890625, "loss": 0.4109, "rewards/accuracies": 0.75, "rewards/chosen": -0.9212007522583008, "rewards/margins": 1.336998701095581, "rewards/rejected": -2.258199691772461, "step": 2188 }, { "epoch": 0.25, "learning_rate": 2.2701074760836186e-07, "logits/chosen": -2.2880024909973145, "logits/rejected": -2.521951675415039, "logps/chosen": -379.365234375, "logps/rejected": -174.3182373046875, "loss": 0.8191, "rewards/accuracies": 0.625, "rewards/chosen": -0.3549180030822754, "rewards/margins": 0.5332018136978149, "rewards/rejected": -0.8881198167800903, "step": 2189 }, { "epoch": 0.25, "learning_rate": 2.2697531593244358e-07, "logits/chosen": -2.1973929405212402, "logits/rejected": -2.365079879760742, "logps/chosen": -458.2642822265625, "logps/rejected": -294.8658142089844, "loss": 0.246, "rewards/accuracies": 0.875, "rewards/chosen": -0.3687232732772827, "rewards/margins": 1.8982001543045044, "rewards/rejected": -2.266923427581787, "step": 2190 }, { "epoch": 0.25, "learning_rate": 2.2693988425652533e-07, "logits/chosen": -2.3547797203063965, "logits/rejected": -2.307621479034424, "logps/chosen": -322.23980712890625, "logps/rejected": -333.84039306640625, "loss": 0.4692, "rewards/accuracies": 0.75, "rewards/chosen": -0.055474333465099335, "rewards/margins": 2.1145195960998535, "rewards/rejected": -2.1699936389923096, "step": 2191 }, { "epoch": 0.25, "learning_rate": 2.2690445258060707e-07, "logits/chosen": -2.077676773071289, "logits/rejected": -2.034428596496582, "logps/chosen": -186.52151489257812, "logps/rejected": -370.5426025390625, "loss": 0.588, "rewards/accuracies": 0.625, "rewards/chosen": -0.4094897508621216, "rewards/margins": 1.5558617115020752, "rewards/rejected": -1.9653514623641968, "step": 2192 }, { "epoch": 0.26, "learning_rate": 2.268690209046888e-07, "logits/chosen": -1.9660917520523071, "logits/rejected": -2.412707805633545, "logps/chosen": -303.42559814453125, "logps/rejected": -375.7781982421875, "loss": 0.8299, "rewards/accuracies": 0.75, "rewards/chosen": -1.0103862285614014, "rewards/margins": 1.102613925933838, "rewards/rejected": -2.11299991607666, "step": 2193 }, { "epoch": 0.26, "learning_rate": 2.2683358922877052e-07, "logits/chosen": -1.8946905136108398, "logits/rejected": -1.6209267377853394, "logps/chosen": -245.49798583984375, "logps/rejected": -361.66046142578125, "loss": 0.3415, "rewards/accuracies": 0.875, "rewards/chosen": -1.96596097946167, "rewards/margins": 2.207536458969116, "rewards/rejected": -4.173497200012207, "step": 2194 }, { "epoch": 0.26, "learning_rate": 2.2679815755285224e-07, "logits/chosen": -2.6899492740631104, "logits/rejected": -2.829401731491089, "logps/chosen": -550.650390625, "logps/rejected": -421.2415771484375, "loss": 0.6853, "rewards/accuracies": 0.75, "rewards/chosen": -1.443908452987671, "rewards/margins": 1.0611375570297241, "rewards/rejected": -2.5050458908081055, "step": 2195 }, { "epoch": 0.26, "learning_rate": 2.2676272587693396e-07, "logits/chosen": -2.27907395362854, "logits/rejected": -2.3962414264678955, "logps/chosen": -187.3654022216797, "logps/rejected": -214.6670684814453, "loss": 0.305, "rewards/accuracies": 0.875, "rewards/chosen": -0.5460546612739563, "rewards/margins": 1.57201087474823, "rewards/rejected": -2.118065357208252, "step": 2196 }, { "epoch": 0.26, "learning_rate": 2.2672729420101569e-07, "logits/chosen": -2.7213053703308105, "logits/rejected": -2.679159164428711, "logps/chosen": -126.7208251953125, "logps/rejected": -137.71084594726562, "loss": 1.0016, "rewards/accuracies": 0.625, "rewards/chosen": -1.8497123718261719, "rewards/margins": 1.409379243850708, "rewards/rejected": -3.259091377258301, "step": 2197 }, { "epoch": 0.26, "learning_rate": 2.2669186252509743e-07, "logits/chosen": -2.4415602684020996, "logits/rejected": -2.539029359817505, "logps/chosen": -318.6957092285156, "logps/rejected": -338.78985595703125, "loss": 0.5346, "rewards/accuracies": 0.75, "rewards/chosen": -0.8570128679275513, "rewards/margins": 0.7651805281639099, "rewards/rejected": -1.622193455696106, "step": 2198 }, { "epoch": 0.26, "learning_rate": 2.2665643084917916e-07, "logits/chosen": -2.4161999225616455, "logits/rejected": -2.4458882808685303, "logps/chosen": -100.66584014892578, "logps/rejected": -156.87957763671875, "loss": 0.4165, "rewards/accuracies": 0.75, "rewards/chosen": -0.7642151117324829, "rewards/margins": 1.532168984413147, "rewards/rejected": -2.29638409614563, "step": 2199 }, { "epoch": 0.26, "learning_rate": 2.2662099917326088e-07, "logits/chosen": -2.0434110164642334, "logits/rejected": -2.3836755752563477, "logps/chosen": -305.6235046386719, "logps/rejected": -268.07110595703125, "loss": 0.7506, "rewards/accuracies": 0.625, "rewards/chosen": -1.291447639465332, "rewards/margins": 0.857170581817627, "rewards/rejected": -2.14861798286438, "step": 2200 }, { "epoch": 0.26, "learning_rate": 2.265855674973426e-07, "logits/chosen": -2.269157886505127, "logits/rejected": -2.276824712753296, "logps/chosen": -161.12884521484375, "logps/rejected": -164.8707733154297, "loss": 0.2724, "rewards/accuracies": 0.875, "rewards/chosen": -0.3817580044269562, "rewards/margins": 2.5294058322906494, "rewards/rejected": -2.911163806915283, "step": 2201 }, { "epoch": 0.26, "learning_rate": 2.2655013582142435e-07, "logits/chosen": -2.4443743228912354, "logits/rejected": -2.704706907272339, "logps/chosen": -219.93496704101562, "logps/rejected": -129.1767120361328, "loss": 0.4081, "rewards/accuracies": 0.75, "rewards/chosen": -0.4428876042366028, "rewards/margins": 1.0850483179092407, "rewards/rejected": -1.5279359817504883, "step": 2202 }, { "epoch": 0.26, "learning_rate": 2.265147041455061e-07, "logits/chosen": -1.9535980224609375, "logits/rejected": -2.116917133331299, "logps/chosen": -237.8778076171875, "logps/rejected": -182.68408203125, "loss": 0.4124, "rewards/accuracies": 0.875, "rewards/chosen": -0.20374450087547302, "rewards/margins": 1.2534356117248535, "rewards/rejected": -1.4571800231933594, "step": 2203 }, { "epoch": 0.26, "learning_rate": 2.2647927246958782e-07, "logits/chosen": -2.682274580001831, "logits/rejected": -2.4723587036132812, "logps/chosen": -234.87371826171875, "logps/rejected": -360.26568603515625, "loss": 0.2897, "rewards/accuracies": 1.0, "rewards/chosen": -1.3790254592895508, "rewards/margins": 2.2587177753448486, "rewards/rejected": -3.6377432346343994, "step": 2204 }, { "epoch": 0.26, "learning_rate": 2.2644384079366954e-07, "logits/chosen": -2.148134469985962, "logits/rejected": -2.590517282485962, "logps/chosen": -296.2895812988281, "logps/rejected": -252.99815368652344, "loss": 0.8687, "rewards/accuracies": 0.75, "rewards/chosen": -0.8539435863494873, "rewards/margins": 0.5031282305717468, "rewards/rejected": -1.3570716381072998, "step": 2205 }, { "epoch": 0.26, "learning_rate": 2.2640840911775126e-07, "logits/chosen": -2.566002607345581, "logits/rejected": -2.6361191272735596, "logps/chosen": -194.16429138183594, "logps/rejected": -253.6163330078125, "loss": 0.1997, "rewards/accuracies": 1.0, "rewards/chosen": 0.12994100153446198, "rewards/margins": 3.005037307739258, "rewards/rejected": -2.875096321105957, "step": 2206 }, { "epoch": 0.26, "learning_rate": 2.2637297744183299e-07, "logits/chosen": -1.7851834297180176, "logits/rejected": -2.045300006866455, "logps/chosen": -405.2264709472656, "logps/rejected": -397.5347900390625, "loss": 0.6084, "rewards/accuracies": 0.625, "rewards/chosen": -0.6460085511207581, "rewards/margins": 0.49397343397140503, "rewards/rejected": -1.139981985092163, "step": 2207 }, { "epoch": 0.26, "learning_rate": 2.263375457659147e-07, "logits/chosen": -2.2607016563415527, "logits/rejected": -2.3870835304260254, "logps/chosen": -317.15496826171875, "logps/rejected": -346.7872314453125, "loss": 0.2319, "rewards/accuracies": 0.875, "rewards/chosen": -0.5566080808639526, "rewards/margins": 2.465899705886841, "rewards/rejected": -3.022507905960083, "step": 2208 }, { "epoch": 0.26, "learning_rate": 2.2630211408999643e-07, "logits/chosen": -2.788239002227783, "logits/rejected": -2.683950901031494, "logps/chosen": -297.99078369140625, "logps/rejected": -248.7438507080078, "loss": 0.3137, "rewards/accuracies": 0.875, "rewards/chosen": -0.39687103033065796, "rewards/margins": 2.641606092453003, "rewards/rejected": -3.0384771823883057, "step": 2209 }, { "epoch": 0.26, "learning_rate": 2.2626668241407818e-07, "logits/chosen": -1.77592933177948, "logits/rejected": -1.803506851196289, "logps/chosen": -347.27459716796875, "logps/rejected": -389.534423828125, "loss": 0.3034, "rewards/accuracies": 0.875, "rewards/chosen": -0.6428102254867554, "rewards/margins": 1.9889980554580688, "rewards/rejected": -2.631808280944824, "step": 2210 }, { "epoch": 0.26, "learning_rate": 2.262312507381599e-07, "logits/chosen": -2.326338768005371, "logits/rejected": -2.5560641288757324, "logps/chosen": -208.26580810546875, "logps/rejected": -238.36288452148438, "loss": 0.3188, "rewards/accuracies": 0.875, "rewards/chosen": -1.5791000127792358, "rewards/margins": 1.9776796102523804, "rewards/rejected": -3.556779384613037, "step": 2211 }, { "epoch": 0.26, "learning_rate": 2.2619581906224162e-07, "logits/chosen": -2.4291164875030518, "logits/rejected": -2.549701690673828, "logps/chosen": -301.35821533203125, "logps/rejected": -339.755859375, "loss": 0.1116, "rewards/accuracies": 1.0, "rewards/chosen": 0.0733058825135231, "rewards/margins": 3.8136773109436035, "rewards/rejected": -3.7403712272644043, "step": 2212 }, { "epoch": 0.26, "learning_rate": 2.2616038738632335e-07, "logits/chosen": -2.618539810180664, "logits/rejected": -2.4691076278686523, "logps/chosen": -241.77700805664062, "logps/rejected": -300.0421447753906, "loss": 0.3953, "rewards/accuracies": 0.75, "rewards/chosen": -0.5121986865997314, "rewards/margins": 1.188902735710144, "rewards/rejected": -1.701101541519165, "step": 2213 }, { "epoch": 0.26, "learning_rate": 2.2612495571040512e-07, "logits/chosen": -2.6005430221557617, "logits/rejected": -2.5233426094055176, "logps/chosen": -286.72528076171875, "logps/rejected": -203.00558471679688, "loss": 0.5138, "rewards/accuracies": 0.625, "rewards/chosen": -0.6384978890419006, "rewards/margins": 2.0146634578704834, "rewards/rejected": -2.6531615257263184, "step": 2214 }, { "epoch": 0.26, "learning_rate": 2.2608952403448684e-07, "logits/chosen": -2.1191959381103516, "logits/rejected": -2.606194257736206, "logps/chosen": -351.64996337890625, "logps/rejected": -199.28225708007812, "loss": 0.8352, "rewards/accuracies": 0.625, "rewards/chosen": -1.02115797996521, "rewards/margins": 0.7858879566192627, "rewards/rejected": -1.8070459365844727, "step": 2215 }, { "epoch": 0.26, "learning_rate": 2.2605409235856856e-07, "logits/chosen": -2.7401115894317627, "logits/rejected": -2.407297134399414, "logps/chosen": -238.26165771484375, "logps/rejected": -239.33822631835938, "loss": 0.4776, "rewards/accuracies": 0.75, "rewards/chosen": -0.819509744644165, "rewards/margins": 1.4317800998687744, "rewards/rejected": -2.2512898445129395, "step": 2216 }, { "epoch": 0.26, "learning_rate": 2.260186606826503e-07, "logits/chosen": -1.8954682350158691, "logits/rejected": -2.068930149078369, "logps/chosen": -226.3697509765625, "logps/rejected": -277.2089538574219, "loss": 0.4133, "rewards/accuracies": 0.75, "rewards/chosen": -0.5585901737213135, "rewards/margins": 1.5252166986465454, "rewards/rejected": -2.0838069915771484, "step": 2217 }, { "epoch": 0.26, "learning_rate": 2.25983229006732e-07, "logits/chosen": -1.775301456451416, "logits/rejected": -1.7959669828414917, "logps/chosen": -329.22589111328125, "logps/rejected": -392.1826171875, "loss": 0.24, "rewards/accuracies": 0.875, "rewards/chosen": -0.7417924404144287, "rewards/margins": 2.6951303482055664, "rewards/rejected": -3.436922788619995, "step": 2218 }, { "epoch": 0.26, "learning_rate": 2.2594779733081373e-07, "logits/chosen": -2.3922863006591797, "logits/rejected": -2.254333734512329, "logps/chosen": -271.20068359375, "logps/rejected": -290.08245849609375, "loss": 0.6595, "rewards/accuracies": 0.625, "rewards/chosen": -1.157062292098999, "rewards/margins": 1.2331371307373047, "rewards/rejected": -2.3901994228363037, "step": 2219 }, { "epoch": 0.26, "learning_rate": 2.2591236565489545e-07, "logits/chosen": -2.2101669311523438, "logits/rejected": -2.2030749320983887, "logps/chosen": -148.06199645996094, "logps/rejected": -238.65240478515625, "loss": 0.2563, "rewards/accuracies": 0.875, "rewards/chosen": -0.7841014862060547, "rewards/margins": 3.0515024662017822, "rewards/rejected": -3.835604190826416, "step": 2220 }, { "epoch": 0.26, "learning_rate": 2.258769339789772e-07, "logits/chosen": -2.9097933769226074, "logits/rejected": -2.8605053424835205, "logps/chosen": -273.4096374511719, "logps/rejected": -289.3459167480469, "loss": 0.1587, "rewards/accuracies": 1.0, "rewards/chosen": -0.5949381589889526, "rewards/margins": 2.1197361946105957, "rewards/rejected": -2.714674472808838, "step": 2221 }, { "epoch": 0.26, "learning_rate": 2.2584150230305892e-07, "logits/chosen": -2.0957529544830322, "logits/rejected": -2.2686238288879395, "logps/chosen": -495.4945068359375, "logps/rejected": -324.6546630859375, "loss": 0.5289, "rewards/accuracies": 0.75, "rewards/chosen": -1.6759283542633057, "rewards/margins": 1.2904428243637085, "rewards/rejected": -2.9663710594177246, "step": 2222 }, { "epoch": 0.26, "learning_rate": 2.2580607062714065e-07, "logits/chosen": -2.495971202850342, "logits/rejected": -2.5593466758728027, "logps/chosen": -398.02996826171875, "logps/rejected": -442.7575378417969, "loss": 0.5096, "rewards/accuracies": 0.625, "rewards/chosen": -0.28825056552886963, "rewards/margins": 0.725037157535553, "rewards/rejected": -1.0132877826690674, "step": 2223 }, { "epoch": 0.26, "learning_rate": 2.2577063895122237e-07, "logits/chosen": -1.965828537940979, "logits/rejected": -2.0859873294830322, "logps/chosen": -445.4443054199219, "logps/rejected": -285.328369140625, "loss": 0.3427, "rewards/accuracies": 0.875, "rewards/chosen": -0.6924425363540649, "rewards/margins": 2.374490261077881, "rewards/rejected": -3.066932439804077, "step": 2224 }, { "epoch": 0.26, "learning_rate": 2.257352072753041e-07, "logits/chosen": -2.3780739307403564, "logits/rejected": -2.2002902030944824, "logps/chosen": -372.5032043457031, "logps/rejected": -531.9457397460938, "loss": 0.6927, "rewards/accuracies": 0.75, "rewards/chosen": -0.838510274887085, "rewards/margins": 1.7599385976791382, "rewards/rejected": -2.5984487533569336, "step": 2225 }, { "epoch": 0.26, "learning_rate": 2.2569977559938587e-07, "logits/chosen": -2.2050116062164307, "logits/rejected": -2.1608080863952637, "logps/chosen": -272.6571350097656, "logps/rejected": -260.0111083984375, "loss": 0.4261, "rewards/accuracies": 0.625, "rewards/chosen": -0.35595569014549255, "rewards/margins": 1.4814578294754028, "rewards/rejected": -1.8374134302139282, "step": 2226 }, { "epoch": 0.26, "learning_rate": 2.256643439234676e-07, "logits/chosen": -1.8067381381988525, "logits/rejected": -2.125800132751465, "logps/chosen": -273.30633544921875, "logps/rejected": -261.3936767578125, "loss": 0.3367, "rewards/accuracies": 0.75, "rewards/chosen": -0.7177894115447998, "rewards/margins": 2.1182758808135986, "rewards/rejected": -2.8360652923583984, "step": 2227 }, { "epoch": 0.26, "learning_rate": 2.256289122475493e-07, "logits/chosen": -2.0081374645233154, "logits/rejected": -2.266482353210449, "logps/chosen": -438.4326171875, "logps/rejected": -410.3953857421875, "loss": 0.3921, "rewards/accuracies": 0.875, "rewards/chosen": -0.2940959334373474, "rewards/margins": 1.4941316843032837, "rewards/rejected": -1.7882274389266968, "step": 2228 }, { "epoch": 0.26, "learning_rate": 2.2559348057163103e-07, "logits/chosen": -2.166203498840332, "logits/rejected": -2.0084404945373535, "logps/chosen": -187.4442901611328, "logps/rejected": -260.0781555175781, "loss": 0.3293, "rewards/accuracies": 0.875, "rewards/chosen": -0.20120257139205933, "rewards/margins": 1.6559299230575562, "rewards/rejected": -1.8571325540542603, "step": 2229 }, { "epoch": 0.26, "learning_rate": 2.2555804889571275e-07, "logits/chosen": -2.450007438659668, "logits/rejected": -2.6020655632019043, "logps/chosen": -329.8806457519531, "logps/rejected": -344.6948547363281, "loss": 0.5299, "rewards/accuracies": 0.75, "rewards/chosen": -0.9587228298187256, "rewards/margins": 1.1871380805969238, "rewards/rejected": -2.1458609104156494, "step": 2230 }, { "epoch": 0.26, "learning_rate": 2.2552261721979448e-07, "logits/chosen": -2.6287450790405273, "logits/rejected": -2.438342332839966, "logps/chosen": -298.167724609375, "logps/rejected": -260.5318298339844, "loss": 0.2594, "rewards/accuracies": 0.875, "rewards/chosen": -0.7182356119155884, "rewards/margins": 2.13051438331604, "rewards/rejected": -2.848750114440918, "step": 2231 }, { "epoch": 0.26, "learning_rate": 2.2548718554387622e-07, "logits/chosen": -2.962860345840454, "logits/rejected": -2.952655076980591, "logps/chosen": -115.7410888671875, "logps/rejected": -138.054931640625, "loss": 0.2907, "rewards/accuracies": 0.75, "rewards/chosen": -0.5736470222473145, "rewards/margins": 2.3354883193969727, "rewards/rejected": -2.909135103225708, "step": 2232 }, { "epoch": 0.26, "learning_rate": 2.2545175386795795e-07, "logits/chosen": -2.0351409912109375, "logits/rejected": -2.147841215133667, "logps/chosen": -378.5544738769531, "logps/rejected": -290.6115417480469, "loss": 0.5648, "rewards/accuracies": 0.625, "rewards/chosen": -1.2123260498046875, "rewards/margins": 2.6956775188446045, "rewards/rejected": -3.908003568649292, "step": 2233 }, { "epoch": 0.26, "learning_rate": 2.2541632219203967e-07, "logits/chosen": -2.39721941947937, "logits/rejected": -2.4118902683258057, "logps/chosen": -234.1080780029297, "logps/rejected": -228.02670288085938, "loss": 0.5131, "rewards/accuracies": 0.875, "rewards/chosen": -0.425875723361969, "rewards/margins": 1.2953789234161377, "rewards/rejected": -1.7212547063827515, "step": 2234 }, { "epoch": 0.26, "learning_rate": 2.253808905161214e-07, "logits/chosen": -1.7584824562072754, "logits/rejected": -1.499441385269165, "logps/chosen": -222.8790283203125, "logps/rejected": -250.87045288085938, "loss": 0.7079, "rewards/accuracies": 0.75, "rewards/chosen": -2.2404229640960693, "rewards/margins": 2.239509344100952, "rewards/rejected": -4.479931831359863, "step": 2235 }, { "epoch": 0.26, "learning_rate": 2.253454588402031e-07, "logits/chosen": -1.4448570013046265, "logits/rejected": -1.9737565517425537, "logps/chosen": -631.2334594726562, "logps/rejected": -440.1190185546875, "loss": 0.2786, "rewards/accuracies": 0.875, "rewards/chosen": 0.5250144004821777, "rewards/margins": 1.8212811946868896, "rewards/rejected": -1.296266794204712, "step": 2236 }, { "epoch": 0.26, "learning_rate": 2.253100271642849e-07, "logits/chosen": -2.993021011352539, "logits/rejected": -2.9816830158233643, "logps/chosen": -249.64254760742188, "logps/rejected": -311.1166076660156, "loss": 0.2685, "rewards/accuracies": 0.875, "rewards/chosen": -0.5826046466827393, "rewards/margins": 2.261411190032959, "rewards/rejected": -2.844015598297119, "step": 2237 }, { "epoch": 0.26, "learning_rate": 2.252745954883666e-07, "logits/chosen": -2.2093067169189453, "logits/rejected": -2.5027079582214355, "logps/chosen": -303.1380615234375, "logps/rejected": -216.9829559326172, "loss": 0.2033, "rewards/accuracies": 0.875, "rewards/chosen": -0.21893131732940674, "rewards/margins": 2.499175786972046, "rewards/rejected": -2.718106985092163, "step": 2238 }, { "epoch": 0.26, "learning_rate": 2.2523916381244833e-07, "logits/chosen": -2.306025743484497, "logits/rejected": -2.240424156188965, "logps/chosen": -127.5557861328125, "logps/rejected": -136.601318359375, "loss": 0.6827, "rewards/accuracies": 0.625, "rewards/chosen": -0.8704906105995178, "rewards/margins": 1.0905183553695679, "rewards/rejected": -1.9610090255737305, "step": 2239 }, { "epoch": 0.26, "learning_rate": 2.2520373213653005e-07, "logits/chosen": -2.478996753692627, "logits/rejected": -2.3323349952697754, "logps/chosen": -264.19854736328125, "logps/rejected": -284.23291015625, "loss": 0.5327, "rewards/accuracies": 0.75, "rewards/chosen": -0.6179327964782715, "rewards/margins": 1.0518726110458374, "rewards/rejected": -1.6698055267333984, "step": 2240 }, { "epoch": 0.26, "learning_rate": 2.2516830046061178e-07, "logits/chosen": -2.162780284881592, "logits/rejected": -2.033653736114502, "logps/chosen": -266.3164367675781, "logps/rejected": -373.35113525390625, "loss": 0.1597, "rewards/accuracies": 1.0, "rewards/chosen": 0.4184514284133911, "rewards/margins": 2.8973429203033447, "rewards/rejected": -2.478891372680664, "step": 2241 }, { "epoch": 0.26, "learning_rate": 2.251328687846935e-07, "logits/chosen": -2.7889578342437744, "logits/rejected": -2.7332167625427246, "logps/chosen": -344.39300537109375, "logps/rejected": -366.85821533203125, "loss": 0.4666, "rewards/accuracies": 0.75, "rewards/chosen": -1.529309630393982, "rewards/margins": 2.1084322929382324, "rewards/rejected": -3.637742042541504, "step": 2242 }, { "epoch": 0.26, "learning_rate": 2.2509743710877525e-07, "logits/chosen": -2.9781546592712402, "logits/rejected": -2.9168171882629395, "logps/chosen": -379.44952392578125, "logps/rejected": -323.8695068359375, "loss": 0.2293, "rewards/accuracies": 0.875, "rewards/chosen": -0.23599907755851746, "rewards/margins": 2.6260149478912354, "rewards/rejected": -2.8620142936706543, "step": 2243 }, { "epoch": 0.26, "learning_rate": 2.2506200543285697e-07, "logits/chosen": -1.9151602983474731, "logits/rejected": -2.1762866973876953, "logps/chosen": -246.69979858398438, "logps/rejected": -294.57879638671875, "loss": 0.3734, "rewards/accuracies": 0.875, "rewards/chosen": -0.8125112652778625, "rewards/margins": 1.1405081748962402, "rewards/rejected": -1.953019380569458, "step": 2244 }, { "epoch": 0.26, "learning_rate": 2.250265737569387e-07, "logits/chosen": -2.312774896621704, "logits/rejected": -1.924754023551941, "logps/chosen": -89.41317749023438, "logps/rejected": -222.2435302734375, "loss": 0.3719, "rewards/accuracies": 0.875, "rewards/chosen": -0.5856520533561707, "rewards/margins": 2.412468910217285, "rewards/rejected": -2.9981207847595215, "step": 2245 }, { "epoch": 0.26, "learning_rate": 2.2499114208102041e-07, "logits/chosen": -1.533930778503418, "logits/rejected": -1.6927227973937988, "logps/chosen": -314.80023193359375, "logps/rejected": -337.54248046875, "loss": 0.1374, "rewards/accuracies": 1.0, "rewards/chosen": -0.36502864956855774, "rewards/margins": 2.5815398693084717, "rewards/rejected": -2.946568489074707, "step": 2246 }, { "epoch": 0.26, "learning_rate": 2.2495571040510214e-07, "logits/chosen": -2.655251979827881, "logits/rejected": -2.5738022327423096, "logps/chosen": -127.62313842773438, "logps/rejected": -222.14505004882812, "loss": 0.4241, "rewards/accuracies": 0.625, "rewards/chosen": -1.0949407815933228, "rewards/margins": 2.801884174346924, "rewards/rejected": -3.896825075149536, "step": 2247 }, { "epoch": 0.26, "learning_rate": 2.2492027872918386e-07, "logits/chosen": -2.5697529315948486, "logits/rejected": -2.6344428062438965, "logps/chosen": -344.04644775390625, "logps/rejected": -284.2572021484375, "loss": 0.5911, "rewards/accuracies": 0.625, "rewards/chosen": -0.9132232069969177, "rewards/margins": 1.0355985164642334, "rewards/rejected": -1.9488215446472168, "step": 2248 }, { "epoch": 0.26, "learning_rate": 2.2488484705326563e-07, "logits/chosen": -2.3982791900634766, "logits/rejected": -2.4744508266448975, "logps/chosen": -248.84963989257812, "logps/rejected": -342.4407958984375, "loss": 0.172, "rewards/accuracies": 0.875, "rewards/chosen": -0.4169912040233612, "rewards/margins": 3.298081874847412, "rewards/rejected": -3.715073347091675, "step": 2249 }, { "epoch": 0.26, "learning_rate": 2.2484941537734736e-07, "logits/chosen": -2.700608253479004, "logits/rejected": -2.7071120738983154, "logps/chosen": -244.4542694091797, "logps/rejected": -264.46636962890625, "loss": 0.5948, "rewards/accuracies": 0.625, "rewards/chosen": -1.0091311931610107, "rewards/margins": 1.8171807527542114, "rewards/rejected": -2.8263120651245117, "step": 2250 }, { "epoch": 0.26, "learning_rate": 2.2481398370142908e-07, "logits/chosen": -2.5963611602783203, "logits/rejected": -2.4546470642089844, "logps/chosen": -193.46160888671875, "logps/rejected": -162.83229064941406, "loss": 0.3798, "rewards/accuracies": 0.75, "rewards/chosen": -1.1353093385696411, "rewards/margins": 1.5132030248641968, "rewards/rejected": -2.648512363433838, "step": 2251 }, { "epoch": 0.26, "learning_rate": 2.247785520255108e-07, "logits/chosen": -2.265582323074341, "logits/rejected": -2.2264320850372314, "logps/chosen": -172.97080993652344, "logps/rejected": -187.93341064453125, "loss": 0.4949, "rewards/accuracies": 0.625, "rewards/chosen": -1.1714026927947998, "rewards/margins": 1.2106976509094238, "rewards/rejected": -2.3821005821228027, "step": 2252 }, { "epoch": 0.26, "learning_rate": 2.2474312034959252e-07, "logits/chosen": -2.2259368896484375, "logits/rejected": -2.253335475921631, "logps/chosen": -165.5911102294922, "logps/rejected": -397.0439453125, "loss": 0.2284, "rewards/accuracies": 0.875, "rewards/chosen": -0.3213706612586975, "rewards/margins": 3.548773765563965, "rewards/rejected": -3.870144844055176, "step": 2253 }, { "epoch": 0.26, "learning_rate": 2.2470768867367424e-07, "logits/chosen": -2.4262919425964355, "logits/rejected": -2.0825517177581787, "logps/chosen": -310.81219482421875, "logps/rejected": -405.16412353515625, "loss": 0.4189, "rewards/accuracies": 0.625, "rewards/chosen": -1.1101151704788208, "rewards/margins": 3.710494041442871, "rewards/rejected": -4.820609092712402, "step": 2254 }, { "epoch": 0.26, "learning_rate": 2.24672256997756e-07, "logits/chosen": -2.6745476722717285, "logits/rejected": -2.8498430252075195, "logps/chosen": -444.4673767089844, "logps/rejected": -306.2907409667969, "loss": 0.2535, "rewards/accuracies": 0.875, "rewards/chosen": -0.46698835492134094, "rewards/margins": 2.3085343837738037, "rewards/rejected": -2.775522470474243, "step": 2255 }, { "epoch": 0.26, "learning_rate": 2.2463682532183771e-07, "logits/chosen": -2.300407648086548, "logits/rejected": -2.2511019706726074, "logps/chosen": -275.4330139160156, "logps/rejected": -201.09078979492188, "loss": 0.4219, "rewards/accuracies": 0.875, "rewards/chosen": -0.49023908376693726, "rewards/margins": 1.4753100872039795, "rewards/rejected": -1.9655492305755615, "step": 2256 }, { "epoch": 0.26, "learning_rate": 2.2460139364591944e-07, "logits/chosen": -2.424161195755005, "logits/rejected": -2.2094078063964844, "logps/chosen": -200.67315673828125, "logps/rejected": -300.233154296875, "loss": 0.4376, "rewards/accuracies": 0.75, "rewards/chosen": -0.6835014820098877, "rewards/margins": 0.9579112529754639, "rewards/rejected": -1.6414127349853516, "step": 2257 }, { "epoch": 0.26, "learning_rate": 2.2456596197000116e-07, "logits/chosen": -2.162682294845581, "logits/rejected": -2.2718005180358887, "logps/chosen": -366.96038818359375, "logps/rejected": -309.9407043457031, "loss": 0.4857, "rewards/accuracies": 0.75, "rewards/chosen": -0.45616716146469116, "rewards/margins": 0.8845772743225098, "rewards/rejected": -1.3407444953918457, "step": 2258 }, { "epoch": 0.26, "learning_rate": 2.2453053029408288e-07, "logits/chosen": -1.717231273651123, "logits/rejected": -1.729622483253479, "logps/chosen": -304.7221374511719, "logps/rejected": -224.7342071533203, "loss": 0.4007, "rewards/accuracies": 0.875, "rewards/chosen": -1.0255309343338013, "rewards/margins": 1.4945279359817505, "rewards/rejected": -2.5200588703155518, "step": 2259 }, { "epoch": 0.26, "learning_rate": 2.244950986181646e-07, "logits/chosen": -2.903620481491089, "logits/rejected": -3.0065786838531494, "logps/chosen": -126.12030792236328, "logps/rejected": -124.84638214111328, "loss": 0.4099, "rewards/accuracies": 0.75, "rewards/chosen": -0.7668179869651794, "rewards/margins": 1.0439938306808472, "rewards/rejected": -1.8108117580413818, "step": 2260 }, { "epoch": 0.26, "learning_rate": 2.2445966694224638e-07, "logits/chosen": -1.4250168800354004, "logits/rejected": -1.6482179164886475, "logps/chosen": -446.3251647949219, "logps/rejected": -386.45269775390625, "loss": 0.844, "rewards/accuracies": 0.625, "rewards/chosen": -1.22188401222229, "rewards/margins": 0.550480842590332, "rewards/rejected": -1.772364616394043, "step": 2261 }, { "epoch": 0.26, "learning_rate": 2.244242352663281e-07, "logits/chosen": -2.417090892791748, "logits/rejected": -2.3535163402557373, "logps/chosen": -301.97283935546875, "logps/rejected": -235.0905303955078, "loss": 0.6547, "rewards/accuracies": 0.625, "rewards/chosen": -0.9387972950935364, "rewards/margins": 1.1422076225280762, "rewards/rejected": -2.0810048580169678, "step": 2262 }, { "epoch": 0.26, "learning_rate": 2.2438880359040982e-07, "logits/chosen": -2.4417824745178223, "logits/rejected": -2.3752763271331787, "logps/chosen": -297.52081298828125, "logps/rejected": -355.20562744140625, "loss": 0.2223, "rewards/accuracies": 1.0, "rewards/chosen": -0.40751081705093384, "rewards/margins": 2.5277185440063477, "rewards/rejected": -2.935229539871216, "step": 2263 }, { "epoch": 0.26, "learning_rate": 2.2435337191449154e-07, "logits/chosen": -2.4068915843963623, "logits/rejected": -2.3153491020202637, "logps/chosen": -164.78616333007812, "logps/rejected": -276.5911865234375, "loss": 0.3633, "rewards/accuracies": 0.875, "rewards/chosen": -0.7363844513893127, "rewards/margins": 1.9175992012023926, "rewards/rejected": -2.6539835929870605, "step": 2264 }, { "epoch": 0.26, "learning_rate": 2.2431794023857327e-07, "logits/chosen": -2.3043301105499268, "logits/rejected": -2.311607837677002, "logps/chosen": -217.195068359375, "logps/rejected": -259.3789978027344, "loss": 0.356, "rewards/accuracies": 1.0, "rewards/chosen": -0.5587706565856934, "rewards/margins": 1.8194465637207031, "rewards/rejected": -2.3782172203063965, "step": 2265 }, { "epoch": 0.26, "learning_rate": 2.2428250856265502e-07, "logits/chosen": -2.1303837299346924, "logits/rejected": -2.335303544998169, "logps/chosen": -303.8712158203125, "logps/rejected": -202.3148193359375, "loss": 0.5488, "rewards/accuracies": 0.75, "rewards/chosen": -1.6040847301483154, "rewards/margins": 0.8857783079147339, "rewards/rejected": -2.4898629188537598, "step": 2266 }, { "epoch": 0.26, "learning_rate": 2.2424707688673674e-07, "logits/chosen": -2.758913993835449, "logits/rejected": -2.69862699508667, "logps/chosen": -222.27133178710938, "logps/rejected": -203.4457550048828, "loss": 0.2709, "rewards/accuracies": 0.875, "rewards/chosen": 0.008799456059932709, "rewards/margins": 1.7193584442138672, "rewards/rejected": -1.7105588912963867, "step": 2267 }, { "epoch": 0.26, "learning_rate": 2.2421164521081846e-07, "logits/chosen": -2.3648898601531982, "logits/rejected": -2.106973171234131, "logps/chosen": -247.87933349609375, "logps/rejected": -251.3011474609375, "loss": 0.5586, "rewards/accuracies": 0.5, "rewards/chosen": -0.7966480255126953, "rewards/margins": 1.0676133632659912, "rewards/rejected": -1.8642613887786865, "step": 2268 }, { "epoch": 0.26, "learning_rate": 2.2417621353490018e-07, "logits/chosen": -2.3848726749420166, "logits/rejected": -2.3342785835266113, "logps/chosen": -170.41616821289062, "logps/rejected": -354.4142761230469, "loss": 0.2298, "rewards/accuracies": 0.875, "rewards/chosen": -0.7970758676528931, "rewards/margins": 4.756030082702637, "rewards/rejected": -5.553106307983398, "step": 2269 }, { "epoch": 0.26, "learning_rate": 2.241407818589819e-07, "logits/chosen": -2.512169599533081, "logits/rejected": -2.5739758014678955, "logps/chosen": -263.180908203125, "logps/rejected": -251.89097595214844, "loss": 1.0385, "rewards/accuracies": 0.625, "rewards/chosen": -1.8877309560775757, "rewards/margins": 1.2428138256072998, "rewards/rejected": -3.130544662475586, "step": 2270 }, { "epoch": 0.26, "learning_rate": 2.2410535018306363e-07, "logits/chosen": -2.392455816268921, "logits/rejected": -2.6473910808563232, "logps/chosen": -302.7626647949219, "logps/rejected": -271.96136474609375, "loss": 0.3789, "rewards/accuracies": 0.875, "rewards/chosen": -0.6568416357040405, "rewards/margins": 1.130077600479126, "rewards/rejected": -1.7869189977645874, "step": 2271 }, { "epoch": 0.26, "learning_rate": 2.2406991850714537e-07, "logits/chosen": -2.5354835987091064, "logits/rejected": -2.6908912658691406, "logps/chosen": -331.61590576171875, "logps/rejected": -454.5199890136719, "loss": 0.1704, "rewards/accuracies": 0.875, "rewards/chosen": -0.1614624261856079, "rewards/margins": 3.1160359382629395, "rewards/rejected": -3.277498245239258, "step": 2272 }, { "epoch": 0.26, "learning_rate": 2.2403448683122712e-07, "logits/chosen": -2.0652294158935547, "logits/rejected": -2.34183931350708, "logps/chosen": -375.20361328125, "logps/rejected": -317.8718566894531, "loss": 0.3539, "rewards/accuracies": 0.75, "rewards/chosen": -0.06756527721881866, "rewards/margins": 1.2261501550674438, "rewards/rejected": -1.293715238571167, "step": 2273 }, { "epoch": 0.26, "learning_rate": 2.2399905515530884e-07, "logits/chosen": -2.223048210144043, "logits/rejected": -2.210000514984131, "logps/chosen": -414.92633056640625, "logps/rejected": -394.0779113769531, "loss": 0.4788, "rewards/accuracies": 0.75, "rewards/chosen": -0.5493685007095337, "rewards/margins": 1.5945743322372437, "rewards/rejected": -2.1439428329467773, "step": 2274 }, { "epoch": 0.26, "learning_rate": 2.2396362347939057e-07, "logits/chosen": -2.528329849243164, "logits/rejected": -2.5360496044158936, "logps/chosen": -412.5983581542969, "logps/rejected": -259.8576965332031, "loss": 0.2826, "rewards/accuracies": 0.875, "rewards/chosen": -0.9218227863311768, "rewards/margins": 2.051978826522827, "rewards/rejected": -2.973801612854004, "step": 2275 }, { "epoch": 0.26, "learning_rate": 2.239281918034723e-07, "logits/chosen": -2.328158378601074, "logits/rejected": -2.3716049194335938, "logps/chosen": -320.9047546386719, "logps/rejected": -324.80419921875, "loss": 0.3286, "rewards/accuracies": 0.875, "rewards/chosen": -0.65273118019104, "rewards/margins": 1.0278904438018799, "rewards/rejected": -1.68062162399292, "step": 2276 }, { "epoch": 0.26, "learning_rate": 2.2389276012755404e-07, "logits/chosen": -2.9384543895721436, "logits/rejected": -2.8506901264190674, "logps/chosen": -428.9738464355469, "logps/rejected": -195.60301208496094, "loss": 0.1492, "rewards/accuracies": 1.0, "rewards/chosen": -0.2437121570110321, "rewards/margins": 2.480186939239502, "rewards/rejected": -2.7238991260528564, "step": 2277 }, { "epoch": 0.26, "learning_rate": 2.2385732845163576e-07, "logits/chosen": -2.690838575363159, "logits/rejected": -2.635251045227051, "logps/chosen": -154.67420959472656, "logps/rejected": -209.71510314941406, "loss": 0.3206, "rewards/accuracies": 0.875, "rewards/chosen": -0.3113667666912079, "rewards/margins": 2.510406494140625, "rewards/rejected": -2.821773052215576, "step": 2278 }, { "epoch": 0.27, "learning_rate": 2.2382189677571748e-07, "logits/chosen": -2.4789392948150635, "logits/rejected": -2.2035329341888428, "logps/chosen": -291.5769958496094, "logps/rejected": -308.45428466796875, "loss": 0.2673, "rewards/accuracies": 0.875, "rewards/chosen": -1.5610976219177246, "rewards/margins": 2.3766348361968994, "rewards/rejected": -3.937732219696045, "step": 2279 }, { "epoch": 0.27, "learning_rate": 2.237864650997992e-07, "logits/chosen": -2.6111907958984375, "logits/rejected": -2.5697999000549316, "logps/chosen": -260.03460693359375, "logps/rejected": -225.35806274414062, "loss": 0.7894, "rewards/accuracies": 0.625, "rewards/chosen": -1.3287396430969238, "rewards/margins": 1.2374447584152222, "rewards/rejected": -2.5661842823028564, "step": 2280 }, { "epoch": 0.27, "learning_rate": 2.2375103342388093e-07, "logits/chosen": -2.7989232540130615, "logits/rejected": -2.6479830741882324, "logps/chosen": -186.59169006347656, "logps/rejected": -280.64556884765625, "loss": 0.261, "rewards/accuracies": 0.75, "rewards/chosen": -0.6203424334526062, "rewards/margins": 2.5256166458129883, "rewards/rejected": -3.1459593772888184, "step": 2281 }, { "epoch": 0.27, "learning_rate": 2.2371560174796265e-07, "logits/chosen": -2.494370460510254, "logits/rejected": -2.5689845085144043, "logps/chosen": -379.5869140625, "logps/rejected": -355.1335144042969, "loss": 0.237, "rewards/accuracies": 1.0, "rewards/chosen": -0.5111453533172607, "rewards/margins": 2.257485866546631, "rewards/rejected": -2.7686314582824707, "step": 2282 }, { "epoch": 0.27, "learning_rate": 2.2368017007204437e-07, "logits/chosen": -2.5488228797912598, "logits/rejected": -2.722444534301758, "logps/chosen": -433.7989501953125, "logps/rejected": -203.48477172851562, "loss": 0.3906, "rewards/accuracies": 0.75, "rewards/chosen": -0.6474804878234863, "rewards/margins": 1.3359726667404175, "rewards/rejected": -1.9834531545639038, "step": 2283 }, { "epoch": 0.27, "learning_rate": 2.2364473839612615e-07, "logits/chosen": -1.9583320617675781, "logits/rejected": -1.7748161554336548, "logps/chosen": -261.85101318359375, "logps/rejected": -303.9474182128906, "loss": 0.3652, "rewards/accuracies": 0.875, "rewards/chosen": -0.4710744023323059, "rewards/margins": 1.6275522708892822, "rewards/rejected": -2.0986266136169434, "step": 2284 }, { "epoch": 0.27, "learning_rate": 2.2360930672020787e-07, "logits/chosen": -2.562701463699341, "logits/rejected": -2.455134630203247, "logps/chosen": -183.12767028808594, "logps/rejected": -249.51431274414062, "loss": 0.1462, "rewards/accuracies": 1.0, "rewards/chosen": -0.1461396962404251, "rewards/margins": 2.270303249359131, "rewards/rejected": -2.41644287109375, "step": 2285 }, { "epoch": 0.27, "learning_rate": 2.235738750442896e-07, "logits/chosen": -2.279376983642578, "logits/rejected": -1.9575154781341553, "logps/chosen": -356.150390625, "logps/rejected": -340.01007080078125, "loss": 0.6355, "rewards/accuracies": 0.625, "rewards/chosen": -1.763382911682129, "rewards/margins": 0.6889535188674927, "rewards/rejected": -2.452336549758911, "step": 2286 }, { "epoch": 0.27, "learning_rate": 2.235384433683713e-07, "logits/chosen": -2.110706329345703, "logits/rejected": -2.1168668270111084, "logps/chosen": -328.3638916015625, "logps/rejected": -320.38287353515625, "loss": 0.2239, "rewards/accuracies": 1.0, "rewards/chosen": -0.8781938552856445, "rewards/margins": 1.8796828985214233, "rewards/rejected": -2.7578768730163574, "step": 2287 }, { "epoch": 0.27, "learning_rate": 2.2350301169245303e-07, "logits/chosen": -2.1837973594665527, "logits/rejected": -2.6691465377807617, "logps/chosen": -518.7034301757812, "logps/rejected": -295.5169982910156, "loss": 0.4145, "rewards/accuracies": 0.75, "rewards/chosen": -1.3184672594070435, "rewards/margins": 1.7954461574554443, "rewards/rejected": -3.1139135360717773, "step": 2288 }, { "epoch": 0.27, "learning_rate": 2.2346758001653478e-07, "logits/chosen": -2.2882866859436035, "logits/rejected": -2.299179792404175, "logps/chosen": -205.40994262695312, "logps/rejected": -275.7272644042969, "loss": 0.3276, "rewards/accuracies": 0.875, "rewards/chosen": -0.1974257528781891, "rewards/margins": 2.0034236907958984, "rewards/rejected": -2.2008492946624756, "step": 2289 }, { "epoch": 0.27, "learning_rate": 2.234321483406165e-07, "logits/chosen": -2.1982667446136475, "logits/rejected": -1.9386587142944336, "logps/chosen": -253.54417419433594, "logps/rejected": -290.667236328125, "loss": 0.7659, "rewards/accuracies": 0.5, "rewards/chosen": -1.0781185626983643, "rewards/margins": 0.5555004477500916, "rewards/rejected": -1.6336190700531006, "step": 2290 }, { "epoch": 0.27, "learning_rate": 2.2339671666469823e-07, "logits/chosen": -1.9820151329040527, "logits/rejected": -2.114626884460449, "logps/chosen": -351.70703125, "logps/rejected": -321.0057373046875, "loss": 0.1089, "rewards/accuracies": 1.0, "rewards/chosen": -0.07788477838039398, "rewards/margins": 3.316633701324463, "rewards/rejected": -3.3945186138153076, "step": 2291 }, { "epoch": 0.27, "learning_rate": 2.2336128498877995e-07, "logits/chosen": -2.333949089050293, "logits/rejected": -2.6794896125793457, "logps/chosen": -278.5251770019531, "logps/rejected": -151.92031860351562, "loss": 0.3445, "rewards/accuracies": 0.875, "rewards/chosen": -0.407683789730072, "rewards/margins": 1.7040380239486694, "rewards/rejected": -2.1117217540740967, "step": 2292 }, { "epoch": 0.27, "learning_rate": 2.2332585331286167e-07, "logits/chosen": -2.2470345497131348, "logits/rejected": -2.0355703830718994, "logps/chosen": -288.5887451171875, "logps/rejected": -349.6793212890625, "loss": 0.196, "rewards/accuracies": 1.0, "rewards/chosen": -0.113924540579319, "rewards/margins": 2.4374308586120605, "rewards/rejected": -2.5513553619384766, "step": 2293 }, { "epoch": 0.27, "learning_rate": 2.232904216369434e-07, "logits/chosen": -2.0353424549102783, "logits/rejected": -2.3459396362304688, "logps/chosen": -246.17379760742188, "logps/rejected": -159.01966857910156, "loss": 0.6313, "rewards/accuracies": 0.625, "rewards/chosen": -0.5082567930221558, "rewards/margins": 0.3887743353843689, "rewards/rejected": -0.8970310688018799, "step": 2294 }, { "epoch": 0.27, "learning_rate": 2.2325498996102514e-07, "logits/chosen": -2.2025644779205322, "logits/rejected": -2.180711269378662, "logps/chosen": -232.543212890625, "logps/rejected": -294.3747253417969, "loss": 0.1346, "rewards/accuracies": 1.0, "rewards/chosen": -0.7048602104187012, "rewards/margins": 2.755133867263794, "rewards/rejected": -3.459993839263916, "step": 2295 }, { "epoch": 0.27, "learning_rate": 2.232195582851069e-07, "logits/chosen": -2.6725873947143555, "logits/rejected": -2.5025510787963867, "logps/chosen": -247.4805908203125, "logps/rejected": -295.7008361816406, "loss": 0.1523, "rewards/accuracies": 1.0, "rewards/chosen": -0.31408771872520447, "rewards/margins": 2.938622236251831, "rewards/rejected": -3.2527101039886475, "step": 2296 }, { "epoch": 0.27, "learning_rate": 2.231841266091886e-07, "logits/chosen": -2.188382625579834, "logits/rejected": -1.93808114528656, "logps/chosen": -147.0696563720703, "logps/rejected": -318.56365966796875, "loss": 0.2794, "rewards/accuracies": 0.875, "rewards/chosen": -0.16401195526123047, "rewards/margins": 1.6380008459091187, "rewards/rejected": -1.8020129203796387, "step": 2297 }, { "epoch": 0.27, "learning_rate": 2.2314869493327033e-07, "logits/chosen": -2.8099894523620605, "logits/rejected": -2.8206005096435547, "logps/chosen": -238.7169189453125, "logps/rejected": -204.77975463867188, "loss": 0.5088, "rewards/accuracies": 0.75, "rewards/chosen": -0.7920150756835938, "rewards/margins": 1.0420554876327515, "rewards/rejected": -1.8340705633163452, "step": 2298 }, { "epoch": 0.27, "learning_rate": 2.2311326325735206e-07, "logits/chosen": -2.043713092803955, "logits/rejected": -2.1732397079467773, "logps/chosen": -360.9541015625, "logps/rejected": -370.5504150390625, "loss": 0.347, "rewards/accuracies": 0.875, "rewards/chosen": -0.4997970759868622, "rewards/margins": 1.2628229856491089, "rewards/rejected": -1.762619972229004, "step": 2299 }, { "epoch": 0.27, "learning_rate": 2.230778315814338e-07, "logits/chosen": -2.302518606185913, "logits/rejected": -2.5703089237213135, "logps/chosen": -314.6614990234375, "logps/rejected": -192.18283081054688, "loss": 0.3133, "rewards/accuracies": 0.875, "rewards/chosen": -0.416350781917572, "rewards/margins": 1.4463359117507935, "rewards/rejected": -1.8626867532730103, "step": 2300 }, { "epoch": 0.27, "learning_rate": 2.2304239990551553e-07, "logits/chosen": -2.2703709602355957, "logits/rejected": -2.6228830814361572, "logps/chosen": -381.0523681640625, "logps/rejected": -279.66351318359375, "loss": 0.4767, "rewards/accuracies": 0.75, "rewards/chosen": -0.7822766304016113, "rewards/margins": 2.0465314388275146, "rewards/rejected": -2.828808069229126, "step": 2301 }, { "epoch": 0.27, "learning_rate": 2.2300696822959725e-07, "logits/chosen": -2.1241133213043213, "logits/rejected": -1.9224019050598145, "logps/chosen": -114.99288940429688, "logps/rejected": -125.38633728027344, "loss": 1.0262, "rewards/accuracies": 0.375, "rewards/chosen": -1.3740273714065552, "rewards/margins": 0.45909762382507324, "rewards/rejected": -1.8331249952316284, "step": 2302 }, { "epoch": 0.27, "learning_rate": 2.2297153655367897e-07, "logits/chosen": -2.4934065341949463, "logits/rejected": -2.421907901763916, "logps/chosen": -102.96295166015625, "logps/rejected": -123.11402130126953, "loss": 0.3169, "rewards/accuracies": 0.875, "rewards/chosen": 0.37276020646095276, "rewards/margins": 1.272170066833496, "rewards/rejected": -0.899409830570221, "step": 2303 }, { "epoch": 0.27, "learning_rate": 2.229361048777607e-07, "logits/chosen": -2.5545639991760254, "logits/rejected": -2.7831151485443115, "logps/chosen": -254.76004028320312, "logps/rejected": -179.77658081054688, "loss": 0.4214, "rewards/accuracies": 0.75, "rewards/chosen": -0.7192851901054382, "rewards/margins": 1.2144399881362915, "rewards/rejected": -1.9337252378463745, "step": 2304 }, { "epoch": 0.27, "learning_rate": 2.2290067320184242e-07, "logits/chosen": -2.4477362632751465, "logits/rejected": -2.425642490386963, "logps/chosen": -195.9151153564453, "logps/rejected": -190.32839965820312, "loss": 0.2373, "rewards/accuracies": 1.0, "rewards/chosen": -0.9561547040939331, "rewards/margins": 1.8870506286621094, "rewards/rejected": -2.843205213546753, "step": 2305 }, { "epoch": 0.27, "learning_rate": 2.2286524152592416e-07, "logits/chosen": -2.405057191848755, "logits/rejected": -2.4684462547302246, "logps/chosen": -245.86767578125, "logps/rejected": -297.3666687011719, "loss": 0.2393, "rewards/accuracies": 1.0, "rewards/chosen": 0.3053625822067261, "rewards/margins": 2.4061999320983887, "rewards/rejected": -2.100837469100952, "step": 2306 }, { "epoch": 0.27, "learning_rate": 2.228298098500059e-07, "logits/chosen": -2.016921043395996, "logits/rejected": -1.8138549327850342, "logps/chosen": -230.95169067382812, "logps/rejected": -330.4517822265625, "loss": 0.7317, "rewards/accuracies": 0.5, "rewards/chosen": -0.9699602723121643, "rewards/margins": 1.362838625907898, "rewards/rejected": -2.332798957824707, "step": 2307 }, { "epoch": 0.27, "learning_rate": 2.2279437817408764e-07, "logits/chosen": -2.5597708225250244, "logits/rejected": -2.5255303382873535, "logps/chosen": -140.4930419921875, "logps/rejected": -200.0936279296875, "loss": 0.4475, "rewards/accuracies": 0.875, "rewards/chosen": -0.2884659767150879, "rewards/margins": 0.9685728549957275, "rewards/rejected": -1.2570388317108154, "step": 2308 }, { "epoch": 0.27, "learning_rate": 2.2275894649816936e-07, "logits/chosen": -2.2639126777648926, "logits/rejected": -2.4815425872802734, "logps/chosen": -336.80126953125, "logps/rejected": -454.7950439453125, "loss": 0.5802, "rewards/accuracies": 0.75, "rewards/chosen": -1.48478364944458, "rewards/margins": 1.5791040658950806, "rewards/rejected": -3.06388783454895, "step": 2309 }, { "epoch": 0.27, "learning_rate": 2.2272351482225108e-07, "logits/chosen": -2.6225337982177734, "logits/rejected": -2.893322467803955, "logps/chosen": -371.2069091796875, "logps/rejected": -313.2666931152344, "loss": 0.1322, "rewards/accuracies": 1.0, "rewards/chosen": -0.6648715734481812, "rewards/margins": 2.5355825424194336, "rewards/rejected": -3.2004542350769043, "step": 2310 }, { "epoch": 0.27, "learning_rate": 2.2268808314633283e-07, "logits/chosen": -2.0928092002868652, "logits/rejected": -2.4475271701812744, "logps/chosen": -313.5830078125, "logps/rejected": -239.19961547851562, "loss": 0.486, "rewards/accuracies": 0.5, "rewards/chosen": -0.6801308393478394, "rewards/margins": 1.5475234985351562, "rewards/rejected": -2.227654457092285, "step": 2311 }, { "epoch": 0.27, "learning_rate": 2.2265265147041455e-07, "logits/chosen": -2.45333194732666, "logits/rejected": -2.30489444732666, "logps/chosen": -253.33868408203125, "logps/rejected": -345.7149353027344, "loss": 0.3041, "rewards/accuracies": 0.75, "rewards/chosen": -0.06258188188076019, "rewards/margins": 1.9271495342254639, "rewards/rejected": -1.9897315502166748, "step": 2312 }, { "epoch": 0.27, "learning_rate": 2.2261721979449627e-07, "logits/chosen": -2.0333449840545654, "logits/rejected": -2.1790084838867188, "logps/chosen": -188.17816162109375, "logps/rejected": -148.2199249267578, "loss": 1.1117, "rewards/accuracies": 0.375, "rewards/chosen": -1.4381242990493774, "rewards/margins": -0.08943580090999603, "rewards/rejected": -1.3486886024475098, "step": 2313 }, { "epoch": 0.27, "learning_rate": 2.22581788118578e-07, "logits/chosen": -2.2416234016418457, "logits/rejected": -2.49039363861084, "logps/chosen": -256.3288879394531, "logps/rejected": -188.55014038085938, "loss": 0.2808, "rewards/accuracies": 0.875, "rewards/chosen": -0.9297614097595215, "rewards/margins": 1.8489290475845337, "rewards/rejected": -2.7786903381347656, "step": 2314 }, { "epoch": 0.27, "learning_rate": 2.2254635644265972e-07, "logits/chosen": -2.2613332271575928, "logits/rejected": -2.568969249725342, "logps/chosen": -525.091796875, "logps/rejected": -268.9901123046875, "loss": 0.3686, "rewards/accuracies": 0.875, "rewards/chosen": -0.6298060417175293, "rewards/margins": 2.542104721069336, "rewards/rejected": -3.171910524368286, "step": 2315 }, { "epoch": 0.27, "learning_rate": 2.2251092476674144e-07, "logits/chosen": -2.0121235847473145, "logits/rejected": -1.785934567451477, "logps/chosen": -231.7498321533203, "logps/rejected": -305.99676513671875, "loss": 0.5045, "rewards/accuracies": 0.75, "rewards/chosen": -1.2733073234558105, "rewards/margins": 1.1576037406921387, "rewards/rejected": -2.430911064147949, "step": 2316 }, { "epoch": 0.27, "learning_rate": 2.2247549309082316e-07, "logits/chosen": -2.380159378051758, "logits/rejected": -2.231494903564453, "logps/chosen": -361.9001770019531, "logps/rejected": -217.9887237548828, "loss": 0.292, "rewards/accuracies": 0.875, "rewards/chosen": -0.8085237145423889, "rewards/margins": 3.277121067047119, "rewards/rejected": -4.085644721984863, "step": 2317 }, { "epoch": 0.27, "learning_rate": 2.224400614149049e-07, "logits/chosen": -2.2861576080322266, "logits/rejected": -2.219399929046631, "logps/chosen": -216.46932983398438, "logps/rejected": -239.6901397705078, "loss": 0.9705, "rewards/accuracies": 0.375, "rewards/chosen": -1.3336951732635498, "rewards/margins": -0.29036611318588257, "rewards/rejected": -1.0433290004730225, "step": 2318 }, { "epoch": 0.27, "learning_rate": 2.2240462973898666e-07, "logits/chosen": -2.5508031845092773, "logits/rejected": -2.1986300945281982, "logps/chosen": -201.8201141357422, "logps/rejected": -246.69459533691406, "loss": 0.3183, "rewards/accuracies": 0.875, "rewards/chosen": -0.6003473997116089, "rewards/margins": 2.306387424468994, "rewards/rejected": -2.9067347049713135, "step": 2319 }, { "epoch": 0.27, "learning_rate": 2.2236919806306838e-07, "logits/chosen": -2.575049877166748, "logits/rejected": -2.343888282775879, "logps/chosen": -295.1542053222656, "logps/rejected": -309.12286376953125, "loss": 0.4632, "rewards/accuracies": 0.75, "rewards/chosen": -1.225027322769165, "rewards/margins": 1.6371146440505981, "rewards/rejected": -2.8621420860290527, "step": 2320 }, { "epoch": 0.27, "learning_rate": 2.223337663871501e-07, "logits/chosen": -2.4680352210998535, "logits/rejected": -2.668046236038208, "logps/chosen": -324.6842346191406, "logps/rejected": -161.828857421875, "loss": 0.3288, "rewards/accuracies": 0.875, "rewards/chosen": -0.6518596410751343, "rewards/margins": 1.8565640449523926, "rewards/rejected": -2.5084238052368164, "step": 2321 }, { "epoch": 0.27, "learning_rate": 2.2229833471123185e-07, "logits/chosen": -2.314807176589966, "logits/rejected": -2.6978659629821777, "logps/chosen": -163.9160919189453, "logps/rejected": -154.4246368408203, "loss": 0.2758, "rewards/accuracies": 1.0, "rewards/chosen": -0.8830258846282959, "rewards/margins": 2.120959520339966, "rewards/rejected": -3.0039854049682617, "step": 2322 }, { "epoch": 0.27, "learning_rate": 2.2226290303531357e-07, "logits/chosen": -1.9165089130401611, "logits/rejected": -2.296776056289673, "logps/chosen": -435.97540283203125, "logps/rejected": -287.1107482910156, "loss": 0.2322, "rewards/accuracies": 0.875, "rewards/chosen": 0.00701478123664856, "rewards/margins": 2.663872480392456, "rewards/rejected": -2.65685772895813, "step": 2323 }, { "epoch": 0.27, "learning_rate": 2.222274713593953e-07, "logits/chosen": -2.2731106281280518, "logits/rejected": -2.1114699840545654, "logps/chosen": -350.9409484863281, "logps/rejected": -369.7868347167969, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": -0.1844133585691452, "rewards/margins": 4.561666488647461, "rewards/rejected": -4.746079444885254, "step": 2324 }, { "epoch": 0.27, "learning_rate": 2.2219203968347702e-07, "logits/chosen": -2.5088741779327393, "logits/rejected": -2.3814172744750977, "logps/chosen": -359.91192626953125, "logps/rejected": -224.7606964111328, "loss": 0.59, "rewards/accuracies": 0.75, "rewards/chosen": -1.1308521032333374, "rewards/margins": 1.452314019203186, "rewards/rejected": -2.5831661224365234, "step": 2325 }, { "epoch": 0.27, "learning_rate": 2.2215660800755874e-07, "logits/chosen": -2.120164155960083, "logits/rejected": -2.250788927078247, "logps/chosen": -340.92584228515625, "logps/rejected": -314.28118896484375, "loss": 0.2736, "rewards/accuracies": 0.75, "rewards/chosen": -0.7423826456069946, "rewards/margins": 2.693681001663208, "rewards/rejected": -3.436063528060913, "step": 2326 }, { "epoch": 0.27, "learning_rate": 2.2212117633164046e-07, "logits/chosen": -2.0943052768707275, "logits/rejected": -2.1481404304504395, "logps/chosen": -244.93551635742188, "logps/rejected": -202.8932342529297, "loss": 0.6297, "rewards/accuracies": 0.375, "rewards/chosen": -0.7181135416030884, "rewards/margins": 0.5158251523971558, "rewards/rejected": -1.2339386940002441, "step": 2327 }, { "epoch": 0.27, "learning_rate": 2.2208574465572218e-07, "logits/chosen": -2.0263278484344482, "logits/rejected": -2.355471611022949, "logps/chosen": -289.506103515625, "logps/rejected": -172.9910888671875, "loss": 0.666, "rewards/accuracies": 0.625, "rewards/chosen": -0.9220860004425049, "rewards/margins": 0.9235358238220215, "rewards/rejected": -1.8456218242645264, "step": 2328 }, { "epoch": 0.27, "learning_rate": 2.2205031297980393e-07, "logits/chosen": -2.146834135055542, "logits/rejected": -2.298522710800171, "logps/chosen": -428.6302795410156, "logps/rejected": -246.95297241210938, "loss": 0.7919, "rewards/accuracies": 0.625, "rewards/chosen": -1.0847526788711548, "rewards/margins": 0.7268729209899902, "rewards/rejected": -1.8116254806518555, "step": 2329 }, { "epoch": 0.27, "learning_rate": 2.2201488130388565e-07, "logits/chosen": -1.863234043121338, "logits/rejected": -1.9400355815887451, "logps/chosen": -381.6712646484375, "logps/rejected": -275.09674072265625, "loss": 0.4671, "rewards/accuracies": 0.75, "rewards/chosen": -0.41852056980133057, "rewards/margins": 1.6593189239501953, "rewards/rejected": -2.0778396129608154, "step": 2330 }, { "epoch": 0.27, "learning_rate": 2.219794496279674e-07, "logits/chosen": -2.5288686752319336, "logits/rejected": -2.535917043685913, "logps/chosen": -251.35084533691406, "logps/rejected": -236.79800415039062, "loss": 0.3348, "rewards/accuracies": 0.875, "rewards/chosen": -0.18949905037879944, "rewards/margins": 1.2721318006515503, "rewards/rejected": -1.4616308212280273, "step": 2331 }, { "epoch": 0.27, "learning_rate": 2.2194401795204913e-07, "logits/chosen": -2.2104930877685547, "logits/rejected": -2.3038458824157715, "logps/chosen": -351.9674377441406, "logps/rejected": -284.28363037109375, "loss": 0.3822, "rewards/accuracies": 0.875, "rewards/chosen": -1.1780307292938232, "rewards/margins": 1.0954787731170654, "rewards/rejected": -2.2735097408294678, "step": 2332 }, { "epoch": 0.27, "learning_rate": 2.2190858627613085e-07, "logits/chosen": -1.7868220806121826, "logits/rejected": -2.118410587310791, "logps/chosen": -478.75323486328125, "logps/rejected": -293.51214599609375, "loss": 0.3007, "rewards/accuracies": 0.875, "rewards/chosen": -1.0436301231384277, "rewards/margins": 2.0944817066192627, "rewards/rejected": -3.1381120681762695, "step": 2333 }, { "epoch": 0.27, "learning_rate": 2.218731546002126e-07, "logits/chosen": -2.1012794971466064, "logits/rejected": -2.005875825881958, "logps/chosen": -353.8478698730469, "logps/rejected": -201.50689697265625, "loss": 0.8106, "rewards/accuracies": 0.375, "rewards/chosen": -2.014651298522949, "rewards/margins": 0.16474232077598572, "rewards/rejected": -2.1793932914733887, "step": 2334 }, { "epoch": 0.27, "learning_rate": 2.2183772292429432e-07, "logits/chosen": -2.51424241065979, "logits/rejected": -2.5801382064819336, "logps/chosen": -346.51513671875, "logps/rejected": -296.2989807128906, "loss": 0.1794, "rewards/accuracies": 1.0, "rewards/chosen": -1.116164207458496, "rewards/margins": 2.0953683853149414, "rewards/rejected": -3.2115323543548584, "step": 2335 }, { "epoch": 0.27, "learning_rate": 2.2180229124837604e-07, "logits/chosen": -2.6300530433654785, "logits/rejected": -2.7171499729156494, "logps/chosen": -303.7069396972656, "logps/rejected": -283.9895324707031, "loss": 0.294, "rewards/accuracies": 0.875, "rewards/chosen": -0.5895646810531616, "rewards/margins": 1.8297762870788574, "rewards/rejected": -2.4193408489227295, "step": 2336 }, { "epoch": 0.27, "learning_rate": 2.2176685957245776e-07, "logits/chosen": -2.509228467941284, "logits/rejected": -2.6573312282562256, "logps/chosen": -203.83352661132812, "logps/rejected": -229.6935272216797, "loss": 0.3059, "rewards/accuracies": 0.875, "rewards/chosen": -0.336578369140625, "rewards/margins": 2.0018186569213867, "rewards/rejected": -2.3383970260620117, "step": 2337 }, { "epoch": 0.27, "learning_rate": 2.2173142789653948e-07, "logits/chosen": -2.3799374103546143, "logits/rejected": -2.5987653732299805, "logps/chosen": -414.4248352050781, "logps/rejected": -276.3588562011719, "loss": 0.8842, "rewards/accuracies": 0.75, "rewards/chosen": -1.9455180168151855, "rewards/margins": 0.35441192984580994, "rewards/rejected": -2.2999300956726074, "step": 2338 }, { "epoch": 0.27, "learning_rate": 2.216959962206212e-07, "logits/chosen": -2.3306522369384766, "logits/rejected": -2.086343288421631, "logps/chosen": -285.0040588378906, "logps/rejected": -334.10589599609375, "loss": 0.1882, "rewards/accuracies": 0.875, "rewards/chosen": -0.6397737264633179, "rewards/margins": 2.5017120838165283, "rewards/rejected": -3.1414859294891357, "step": 2339 }, { "epoch": 0.27, "learning_rate": 2.2166056454470296e-07, "logits/chosen": -1.756518840789795, "logits/rejected": -1.9333630800247192, "logps/chosen": -219.66305541992188, "logps/rejected": -216.55975341796875, "loss": 0.2766, "rewards/accuracies": 1.0, "rewards/chosen": -0.7006471753120422, "rewards/margins": 1.4319958686828613, "rewards/rejected": -2.132642984390259, "step": 2340 }, { "epoch": 0.27, "learning_rate": 2.2162513286878468e-07, "logits/chosen": -2.58113169670105, "logits/rejected": -2.59798526763916, "logps/chosen": -340.30902099609375, "logps/rejected": -216.14337158203125, "loss": 4.0131, "rewards/accuracies": 0.875, "rewards/chosen": -4.506505012512207, "rewards/margins": -1.5689713954925537, "rewards/rejected": -2.9375336170196533, "step": 2341 }, { "epoch": 0.27, "learning_rate": 2.215897011928664e-07, "logits/chosen": -2.0126798152923584, "logits/rejected": -2.52693772315979, "logps/chosen": -388.7501220703125, "logps/rejected": -222.02639770507812, "loss": 0.8632, "rewards/accuracies": 0.75, "rewards/chosen": -1.2303935289382935, "rewards/margins": 0.05533331632614136, "rewards/rejected": -1.28572678565979, "step": 2342 }, { "epoch": 0.27, "learning_rate": 2.2155426951694815e-07, "logits/chosen": -2.1979267597198486, "logits/rejected": -2.404275894165039, "logps/chosen": -461.43670654296875, "logps/rejected": -308.5429992675781, "loss": 0.5917, "rewards/accuracies": 0.75, "rewards/chosen": -0.5367857217788696, "rewards/margins": 1.3757575750350952, "rewards/rejected": -1.9125432968139648, "step": 2343 }, { "epoch": 0.27, "learning_rate": 2.2151883784102987e-07, "logits/chosen": -2.2873904705047607, "logits/rejected": -2.533367395401001, "logps/chosen": -501.81390380859375, "logps/rejected": -293.88690185546875, "loss": 0.2673, "rewards/accuracies": 1.0, "rewards/chosen": -0.3701082170009613, "rewards/margins": 1.9717859029769897, "rewards/rejected": -2.3418939113616943, "step": 2344 }, { "epoch": 0.27, "learning_rate": 2.2148340616511162e-07, "logits/chosen": -1.9484496116638184, "logits/rejected": -2.5285189151763916, "logps/chosen": -539.4857788085938, "logps/rejected": -249.08535766601562, "loss": 0.2609, "rewards/accuracies": 0.875, "rewards/chosen": 0.45619314908981323, "rewards/margins": 1.8701670169830322, "rewards/rejected": -1.4139738082885742, "step": 2345 }, { "epoch": 0.27, "learning_rate": 2.2144797448919334e-07, "logits/chosen": -2.2651586532592773, "logits/rejected": -1.8783873319625854, "logps/chosen": -135.37623596191406, "logps/rejected": -343.5937194824219, "loss": 0.1053, "rewards/accuracies": 1.0, "rewards/chosen": -0.24080611765384674, "rewards/margins": 3.577331304550171, "rewards/rejected": -3.8181374073028564, "step": 2346 }, { "epoch": 0.27, "learning_rate": 2.2141254281327506e-07, "logits/chosen": -2.309349298477173, "logits/rejected": -2.1380529403686523, "logps/chosen": -304.93145751953125, "logps/rejected": -270.6235656738281, "loss": 0.8768, "rewards/accuracies": 0.625, "rewards/chosen": -1.6129963397979736, "rewards/margins": 1.0161304473876953, "rewards/rejected": -2.62912654876709, "step": 2347 }, { "epoch": 0.27, "learning_rate": 2.2137711113735679e-07, "logits/chosen": -1.7736966609954834, "logits/rejected": -2.343919038772583, "logps/chosen": -294.3101501464844, "logps/rejected": -209.73196411132812, "loss": 0.2748, "rewards/accuracies": 0.875, "rewards/chosen": -0.4887973666191101, "rewards/margins": 2.028449058532715, "rewards/rejected": -2.5172462463378906, "step": 2348 }, { "epoch": 0.27, "learning_rate": 2.213416794614385e-07, "logits/chosen": -1.7552624940872192, "logits/rejected": -1.8386038541793823, "logps/chosen": -238.33078002929688, "logps/rejected": -265.6388854980469, "loss": 0.482, "rewards/accuracies": 0.75, "rewards/chosen": -0.25377997756004333, "rewards/margins": 1.4614430665969849, "rewards/rejected": -1.7152231931686401, "step": 2349 }, { "epoch": 0.27, "learning_rate": 2.2130624778552023e-07, "logits/chosen": -2.4830029010772705, "logits/rejected": -2.403174877166748, "logps/chosen": -267.6897888183594, "logps/rejected": -218.0553741455078, "loss": 1.1432, "rewards/accuracies": 0.375, "rewards/chosen": -1.9087505340576172, "rewards/margins": 0.1288871467113495, "rewards/rejected": -2.03763747215271, "step": 2350 }, { "epoch": 0.27, "learning_rate": 2.2127081610960198e-07, "logits/chosen": -2.089923143386841, "logits/rejected": -2.131441593170166, "logps/chosen": -349.5634460449219, "logps/rejected": -408.6036071777344, "loss": 0.9031, "rewards/accuracies": 0.625, "rewards/chosen": -1.50191330909729, "rewards/margins": 0.676645040512085, "rewards/rejected": -2.178558349609375, "step": 2351 }, { "epoch": 0.27, "learning_rate": 2.212353844336837e-07, "logits/chosen": -1.6187704801559448, "logits/rejected": -2.0322279930114746, "logps/chosen": -638.4826049804688, "logps/rejected": -365.4383239746094, "loss": 0.2936, "rewards/accuracies": 0.875, "rewards/chosen": -0.6537527441978455, "rewards/margins": 2.2139840126037598, "rewards/rejected": -2.86773681640625, "step": 2352 }, { "epoch": 0.27, "learning_rate": 2.2119995275776542e-07, "logits/chosen": -2.3770034313201904, "logits/rejected": -2.2488796710968018, "logps/chosen": -197.54150390625, "logps/rejected": -279.10321044921875, "loss": 0.3072, "rewards/accuracies": 0.75, "rewards/chosen": -1.4318734407424927, "rewards/margins": 2.443850040435791, "rewards/rejected": -3.8757236003875732, "step": 2353 }, { "epoch": 0.27, "learning_rate": 2.2116452108184717e-07, "logits/chosen": -2.7970478534698486, "logits/rejected": -2.759305238723755, "logps/chosen": -244.17694091796875, "logps/rejected": -193.90567016601562, "loss": 0.3423, "rewards/accuracies": 0.75, "rewards/chosen": -1.149188756942749, "rewards/margins": 1.456141710281372, "rewards/rejected": -2.605330467224121, "step": 2354 }, { "epoch": 0.27, "learning_rate": 2.211290894059289e-07, "logits/chosen": -2.320312261581421, "logits/rejected": -2.180847406387329, "logps/chosen": -213.18814086914062, "logps/rejected": -255.67953491210938, "loss": 0.3538, "rewards/accuracies": 0.75, "rewards/chosen": -0.7515394687652588, "rewards/margins": 1.5918781757354736, "rewards/rejected": -2.3434176445007324, "step": 2355 }, { "epoch": 0.27, "learning_rate": 2.2109365773001064e-07, "logits/chosen": -2.280881643295288, "logits/rejected": -2.292393922805786, "logps/chosen": -159.19058227539062, "logps/rejected": -260.7165832519531, "loss": 0.0691, "rewards/accuracies": 1.0, "rewards/chosen": -0.06148223578929901, "rewards/margins": 3.5465002059936523, "rewards/rejected": -3.6079823970794678, "step": 2356 }, { "epoch": 0.27, "learning_rate": 2.2105822605409236e-07, "logits/chosen": -2.5721921920776367, "logits/rejected": -2.495333671569824, "logps/chosen": -316.4535827636719, "logps/rejected": -347.248291015625, "loss": 0.4416, "rewards/accuracies": 0.625, "rewards/chosen": -0.9478765726089478, "rewards/margins": 2.292473793029785, "rewards/rejected": -3.2403502464294434, "step": 2357 }, { "epoch": 0.27, "learning_rate": 2.2102279437817409e-07, "logits/chosen": -2.196408987045288, "logits/rejected": -2.2882773876190186, "logps/chosen": -261.9180603027344, "logps/rejected": -188.11013793945312, "loss": 0.465, "rewards/accuracies": 0.875, "rewards/chosen": -1.1776363849639893, "rewards/margins": 1.186735987663269, "rewards/rejected": -2.3643722534179688, "step": 2358 }, { "epoch": 0.27, "learning_rate": 2.209873627022558e-07, "logits/chosen": -2.1748299598693848, "logits/rejected": -2.372422218322754, "logps/chosen": -279.1041259765625, "logps/rejected": -361.0118408203125, "loss": 0.2135, "rewards/accuracies": 1.0, "rewards/chosen": -0.683375895023346, "rewards/margins": 2.211364507675171, "rewards/rejected": -2.894740343093872, "step": 2359 }, { "epoch": 0.27, "learning_rate": 2.2095193102633753e-07, "logits/chosen": -2.4749372005462646, "logits/rejected": -2.365464210510254, "logps/chosen": -361.41766357421875, "logps/rejected": -424.8029479980469, "loss": 0.5578, "rewards/accuracies": 0.75, "rewards/chosen": -0.9799612760543823, "rewards/margins": 2.590235710144043, "rewards/rejected": -3.570197105407715, "step": 2360 }, { "epoch": 0.27, "learning_rate": 2.2091649935041925e-07, "logits/chosen": -2.3847100734710693, "logits/rejected": -2.5419487953186035, "logps/chosen": -239.45211791992188, "logps/rejected": -195.13766479492188, "loss": 3.1153, "rewards/accuracies": 0.625, "rewards/chosen": -3.1817626953125, "rewards/margins": -2.178901433944702, "rewards/rejected": -1.0028612613677979, "step": 2361 }, { "epoch": 0.27, "learning_rate": 2.2088106767450097e-07, "logits/chosen": -1.8170108795166016, "logits/rejected": -1.419071912765503, "logps/chosen": -461.3736572265625, "logps/rejected": -471.66107177734375, "loss": 0.3664, "rewards/accuracies": 0.875, "rewards/chosen": -0.22786402702331543, "rewards/margins": 1.3093289136886597, "rewards/rejected": -1.537192940711975, "step": 2362 }, { "epoch": 0.27, "learning_rate": 2.2084563599858272e-07, "logits/chosen": -2.3640308380126953, "logits/rejected": -2.26580548286438, "logps/chosen": -276.943603515625, "logps/rejected": -315.0652770996094, "loss": 10.3895, "rewards/accuracies": 0.875, "rewards/chosen": -11.030027389526367, "rewards/margins": -7.436212539672852, "rewards/rejected": -3.5938143730163574, "step": 2363 }, { "epoch": 0.28, "learning_rate": 2.2081020432266444e-07, "logits/chosen": -1.7471156120300293, "logits/rejected": -1.7002453804016113, "logps/chosen": -232.1814727783203, "logps/rejected": -287.02362060546875, "loss": 0.5856, "rewards/accuracies": 0.875, "rewards/chosen": -0.297282338142395, "rewards/margins": 1.506813406944275, "rewards/rejected": -1.8040958642959595, "step": 2364 }, { "epoch": 0.28, "learning_rate": 2.2077477264674617e-07, "logits/chosen": -2.257291793823242, "logits/rejected": -2.2081761360168457, "logps/chosen": -190.15274047851562, "logps/rejected": -215.10765075683594, "loss": 0.2746, "rewards/accuracies": 0.875, "rewards/chosen": -0.7106113433837891, "rewards/margins": 2.6461777687072754, "rewards/rejected": -3.3567891120910645, "step": 2365 }, { "epoch": 0.28, "learning_rate": 2.2073934097082792e-07, "logits/chosen": -2.0045714378356934, "logits/rejected": -2.0284788608551025, "logps/chosen": -155.90640258789062, "logps/rejected": -326.88470458984375, "loss": 0.6921, "rewards/accuracies": 0.75, "rewards/chosen": -1.0469673871994019, "rewards/margins": 1.4492459297180176, "rewards/rejected": -2.496213436126709, "step": 2366 }, { "epoch": 0.28, "learning_rate": 2.2070390929490966e-07, "logits/chosen": -2.3875529766082764, "logits/rejected": -2.1453328132629395, "logps/chosen": -341.0448913574219, "logps/rejected": -323.91290283203125, "loss": 0.343, "rewards/accuracies": 0.75, "rewards/chosen": -0.2551132142543793, "rewards/margins": 2.9822468757629395, "rewards/rejected": -3.2373600006103516, "step": 2367 }, { "epoch": 0.28, "learning_rate": 2.2066847761899139e-07, "logits/chosen": -2.1235127449035645, "logits/rejected": -2.106849193572998, "logps/chosen": -242.9930419921875, "logps/rejected": -218.91946411132812, "loss": 0.5839, "rewards/accuracies": 0.625, "rewards/chosen": -0.625690221786499, "rewards/margins": 2.3765556812286377, "rewards/rejected": -3.002246141433716, "step": 2368 }, { "epoch": 0.28, "learning_rate": 2.206330459430731e-07, "logits/chosen": -2.2457399368286133, "logits/rejected": -2.063079357147217, "logps/chosen": -308.23699951171875, "logps/rejected": -342.2718200683594, "loss": 0.384, "rewards/accuracies": 0.875, "rewards/chosen": -0.5508841276168823, "rewards/margins": 1.452219009399414, "rewards/rejected": -2.003103256225586, "step": 2369 }, { "epoch": 0.28, "learning_rate": 2.2059761426715483e-07, "logits/chosen": -2.6948163509368896, "logits/rejected": -2.5903496742248535, "logps/chosen": -116.08805084228516, "logps/rejected": -467.1485595703125, "loss": 0.2809, "rewards/accuracies": 0.875, "rewards/chosen": -0.3716392517089844, "rewards/margins": 2.226377010345459, "rewards/rejected": -2.5980162620544434, "step": 2370 }, { "epoch": 0.28, "learning_rate": 2.2056218259123655e-07, "logits/chosen": -2.7818942070007324, "logits/rejected": -2.7699718475341797, "logps/chosen": -123.5443115234375, "logps/rejected": -178.60928344726562, "loss": 0.2879, "rewards/accuracies": 0.875, "rewards/chosen": -0.6243726015090942, "rewards/margins": 2.520885705947876, "rewards/rejected": -3.1452584266662598, "step": 2371 }, { "epoch": 0.28, "learning_rate": 2.2052675091531827e-07, "logits/chosen": -1.8245190382003784, "logits/rejected": -1.9161250591278076, "logps/chosen": -231.40274047851562, "logps/rejected": -218.56686401367188, "loss": 0.839, "rewards/accuracies": 0.5, "rewards/chosen": -1.2886322736740112, "rewards/margins": 1.046738862991333, "rewards/rejected": -2.3353710174560547, "step": 2372 }, { "epoch": 0.28, "learning_rate": 2.204913192394e-07, "logits/chosen": -2.285767078399658, "logits/rejected": -2.2660446166992188, "logps/chosen": -189.80606079101562, "logps/rejected": -190.5797882080078, "loss": 0.5488, "rewards/accuracies": 0.75, "rewards/chosen": -0.6608383655548096, "rewards/margins": 1.2405519485473633, "rewards/rejected": -1.9013901948928833, "step": 2373 }, { "epoch": 0.28, "learning_rate": 2.2045588756348175e-07, "logits/chosen": -2.4185690879821777, "logits/rejected": -2.5796079635620117, "logps/chosen": -412.8140563964844, "logps/rejected": -181.40756225585938, "loss": 0.5848, "rewards/accuracies": 0.875, "rewards/chosen": -0.3630870580673218, "rewards/margins": 0.6978697776794434, "rewards/rejected": -1.0609568357467651, "step": 2374 }, { "epoch": 0.28, "learning_rate": 2.2042045588756347e-07, "logits/chosen": -2.2554476261138916, "logits/rejected": -2.082094192504883, "logps/chosen": -209.9491424560547, "logps/rejected": -324.65032958984375, "loss": 0.7433, "rewards/accuracies": 0.625, "rewards/chosen": -1.5812917947769165, "rewards/margins": 0.6294167041778564, "rewards/rejected": -2.2107086181640625, "step": 2375 }, { "epoch": 0.28, "learning_rate": 2.203850242116452e-07, "logits/chosen": -2.269076347351074, "logits/rejected": -2.480708122253418, "logps/chosen": -359.80169677734375, "logps/rejected": -191.16073608398438, "loss": 0.4795, "rewards/accuracies": 0.75, "rewards/chosen": -1.3389315605163574, "rewards/margins": 1.5065524578094482, "rewards/rejected": -2.8454840183258057, "step": 2376 }, { "epoch": 0.28, "learning_rate": 2.203495925357269e-07, "logits/chosen": -2.2147653102874756, "logits/rejected": -2.2513058185577393, "logps/chosen": -312.72283935546875, "logps/rejected": -269.85943603515625, "loss": 0.281, "rewards/accuracies": 0.75, "rewards/chosen": -0.7431227564811707, "rewards/margins": 2.270219326019287, "rewards/rejected": -3.0133419036865234, "step": 2377 }, { "epoch": 0.28, "learning_rate": 2.2031416085980866e-07, "logits/chosen": -2.449279308319092, "logits/rejected": -2.4968695640563965, "logps/chosen": -327.5238037109375, "logps/rejected": -295.8417053222656, "loss": 0.2015, "rewards/accuracies": 0.875, "rewards/chosen": 0.38361847400665283, "rewards/margins": 2.9238224029541016, "rewards/rejected": -2.5402040481567383, "step": 2378 }, { "epoch": 0.28, "learning_rate": 2.202787291838904e-07, "logits/chosen": -2.1470117568969727, "logits/rejected": -2.165759563446045, "logps/chosen": -149.8660125732422, "logps/rejected": -246.30050659179688, "loss": 0.2654, "rewards/accuracies": 0.875, "rewards/chosen": -0.30056631565093994, "rewards/margins": 2.982776641845703, "rewards/rejected": -3.2833428382873535, "step": 2379 }, { "epoch": 0.28, "learning_rate": 2.2024329750797213e-07, "logits/chosen": -2.230952739715576, "logits/rejected": -2.0624520778656006, "logps/chosen": -278.165283203125, "logps/rejected": -283.8473815917969, "loss": 0.3396, "rewards/accuracies": 0.75, "rewards/chosen": -0.3423001766204834, "rewards/margins": 1.7550339698791504, "rewards/rejected": -2.097334146499634, "step": 2380 }, { "epoch": 0.28, "learning_rate": 2.2020786583205385e-07, "logits/chosen": -2.8514208793640137, "logits/rejected": -2.626286268234253, "logps/chosen": -272.5777282714844, "logps/rejected": -347.6559143066406, "loss": 0.1595, "rewards/accuracies": 0.875, "rewards/chosen": -0.4610629677772522, "rewards/margins": 2.96547794342041, "rewards/rejected": -3.4265408515930176, "step": 2381 }, { "epoch": 0.28, "learning_rate": 2.2017243415613558e-07, "logits/chosen": -2.578042984008789, "logits/rejected": -2.269885540008545, "logps/chosen": -233.7438201904297, "logps/rejected": -315.58843994140625, "loss": 0.2775, "rewards/accuracies": 0.875, "rewards/chosen": -1.0502796173095703, "rewards/margins": 2.48714542388916, "rewards/rejected": -3.5374250411987305, "step": 2382 }, { "epoch": 0.28, "learning_rate": 2.201370024802173e-07, "logits/chosen": -2.6171793937683105, "logits/rejected": -2.82112455368042, "logps/chosen": -263.806396484375, "logps/rejected": -194.5598907470703, "loss": 0.6574, "rewards/accuracies": 0.75, "rewards/chosen": -1.1325020790100098, "rewards/margins": 0.3997999429702759, "rewards/rejected": -1.5323021411895752, "step": 2383 }, { "epoch": 0.28, "learning_rate": 2.2010157080429902e-07, "logits/chosen": -2.57999587059021, "logits/rejected": -2.7716307640075684, "logps/chosen": -250.9347686767578, "logps/rejected": -237.021240234375, "loss": 0.4301, "rewards/accuracies": 0.875, "rewards/chosen": -0.6953539848327637, "rewards/margins": 2.4878132343292236, "rewards/rejected": -3.1831672191619873, "step": 2384 }, { "epoch": 0.28, "learning_rate": 2.2006613912838077e-07, "logits/chosen": -2.7224910259246826, "logits/rejected": -2.7879536151885986, "logps/chosen": -306.70684814453125, "logps/rejected": -228.5615692138672, "loss": 0.3768, "rewards/accuracies": 0.875, "rewards/chosen": -1.5941888093948364, "rewards/margins": 2.301236391067505, "rewards/rejected": -3.8954248428344727, "step": 2385 }, { "epoch": 0.28, "learning_rate": 2.200307074524625e-07, "logits/chosen": -2.0413150787353516, "logits/rejected": -1.8335734605789185, "logps/chosen": -253.45407104492188, "logps/rejected": -311.700927734375, "loss": 0.7677, "rewards/accuracies": 0.75, "rewards/chosen": -0.8027268648147583, "rewards/margins": 1.2913553714752197, "rewards/rejected": -2.0940823554992676, "step": 2386 }, { "epoch": 0.28, "learning_rate": 2.199952757765442e-07, "logits/chosen": -2.5803537368774414, "logits/rejected": -2.466660976409912, "logps/chosen": -281.01568603515625, "logps/rejected": -214.20652770996094, "loss": 0.4697, "rewards/accuracies": 0.75, "rewards/chosen": -0.8788562417030334, "rewards/margins": 1.4742250442504883, "rewards/rejected": -2.353081226348877, "step": 2387 }, { "epoch": 0.28, "learning_rate": 2.1995984410062593e-07, "logits/chosen": -2.6785192489624023, "logits/rejected": -2.4651811122894287, "logps/chosen": -242.13633728027344, "logps/rejected": -266.4056091308594, "loss": 0.3374, "rewards/accuracies": 0.75, "rewards/chosen": -0.21231329441070557, "rewards/margins": 1.938291311264038, "rewards/rejected": -2.150604724884033, "step": 2388 }, { "epoch": 0.28, "learning_rate": 2.1992441242470768e-07, "logits/chosen": -2.461221218109131, "logits/rejected": -2.525752305984497, "logps/chosen": -242.9658966064453, "logps/rejected": -209.76199340820312, "loss": 0.2382, "rewards/accuracies": 0.875, "rewards/chosen": -0.43106257915496826, "rewards/margins": 2.6564924716949463, "rewards/rejected": -3.087555408477783, "step": 2389 }, { "epoch": 0.28, "learning_rate": 2.1988898074878943e-07, "logits/chosen": -2.5665929317474365, "logits/rejected": -2.625302314758301, "logps/chosen": -474.7854919433594, "logps/rejected": -259.02313232421875, "loss": 0.6549, "rewards/accuracies": 0.75, "rewards/chosen": -1.2976677417755127, "rewards/margins": 0.7028109431266785, "rewards/rejected": -2.000478506088257, "step": 2390 }, { "epoch": 0.28, "learning_rate": 2.1985354907287115e-07, "logits/chosen": -2.4631879329681396, "logits/rejected": -2.540917158126831, "logps/chosen": -251.0959014892578, "logps/rejected": -281.427001953125, "loss": 0.2746, "rewards/accuracies": 0.875, "rewards/chosen": -0.5039739608764648, "rewards/margins": 2.23482346534729, "rewards/rejected": -2.738797426223755, "step": 2391 }, { "epoch": 0.28, "learning_rate": 2.1981811739695288e-07, "logits/chosen": -2.4236018657684326, "logits/rejected": -2.431947708129883, "logps/chosen": -357.7934265136719, "logps/rejected": -281.3615417480469, "loss": 0.3079, "rewards/accuracies": 0.875, "rewards/chosen": -0.9061816930770874, "rewards/margins": 1.9344557523727417, "rewards/rejected": -2.840637445449829, "step": 2392 }, { "epoch": 0.28, "learning_rate": 2.197826857210346e-07, "logits/chosen": -2.051589250564575, "logits/rejected": -2.0300133228302, "logps/chosen": -299.17828369140625, "logps/rejected": -335.50091552734375, "loss": 0.3633, "rewards/accuracies": 0.75, "rewards/chosen": -0.6886791586875916, "rewards/margins": 2.1329829692840576, "rewards/rejected": -2.821661949157715, "step": 2393 }, { "epoch": 0.28, "learning_rate": 2.1974725404511632e-07, "logits/chosen": -1.6360447406768799, "logits/rejected": -2.1216464042663574, "logps/chosen": -257.3417053222656, "logps/rejected": -188.1143035888672, "loss": 1.2082, "rewards/accuracies": 0.75, "rewards/chosen": -2.342543840408325, "rewards/margins": 0.494704008102417, "rewards/rejected": -2.837247848510742, "step": 2394 }, { "epoch": 0.28, "learning_rate": 2.1971182236919804e-07, "logits/chosen": -2.3122386932373047, "logits/rejected": -2.334972381591797, "logps/chosen": -315.799072265625, "logps/rejected": -329.4121398925781, "loss": 0.2584, "rewards/accuracies": 0.875, "rewards/chosen": -0.36763039231300354, "rewards/margins": 2.2250094413757324, "rewards/rejected": -2.592639923095703, "step": 2395 }, { "epoch": 0.28, "learning_rate": 2.196763906932798e-07, "logits/chosen": -1.7906460762023926, "logits/rejected": -1.7463388442993164, "logps/chosen": -499.3748779296875, "logps/rejected": -486.61767578125, "loss": 0.6052, "rewards/accuracies": 0.875, "rewards/chosen": -1.2680559158325195, "rewards/margins": 1.432629942893982, "rewards/rejected": -2.700685977935791, "step": 2396 }, { "epoch": 0.28, "learning_rate": 2.1964095901736151e-07, "logits/chosen": -2.1510205268859863, "logits/rejected": -2.5302445888519287, "logps/chosen": -237.18040466308594, "logps/rejected": -148.45404052734375, "loss": 1.0678, "rewards/accuracies": 0.875, "rewards/chosen": -1.429471492767334, "rewards/margins": 0.029442042112350464, "rewards/rejected": -1.4589135646820068, "step": 2397 }, { "epoch": 0.28, "learning_rate": 2.1960552734144324e-07, "logits/chosen": -2.531017780303955, "logits/rejected": -2.3079235553741455, "logps/chosen": -184.8892822265625, "logps/rejected": -338.419921875, "loss": 0.3147, "rewards/accuracies": 0.875, "rewards/chosen": -0.6930748224258423, "rewards/margins": 2.5709385871887207, "rewards/rejected": -3.2640132904052734, "step": 2398 }, { "epoch": 0.28, "learning_rate": 2.1957009566552496e-07, "logits/chosen": -2.1965672969818115, "logits/rejected": -2.2260260581970215, "logps/chosen": -244.67416381835938, "logps/rejected": -206.92391967773438, "loss": 2.0456, "rewards/accuracies": 0.75, "rewards/chosen": -3.6251540184020996, "rewards/margins": -0.9866768717765808, "rewards/rejected": -2.638477087020874, "step": 2399 }, { "epoch": 0.28, "learning_rate": 2.1953466398960668e-07, "logits/chosen": -2.5782532691955566, "logits/rejected": -2.642744779586792, "logps/chosen": -184.93133544921875, "logps/rejected": -288.57537841796875, "loss": 0.829, "rewards/accuracies": 0.625, "rewards/chosen": -0.7896428108215332, "rewards/margins": 1.7281076908111572, "rewards/rejected": -2.5177507400512695, "step": 2400 }, { "epoch": 0.28, "learning_rate": 2.1949923231368845e-07, "logits/chosen": -2.104445219039917, "logits/rejected": -2.114363193511963, "logps/chosen": -250.6646728515625, "logps/rejected": -288.03546142578125, "loss": 0.455, "rewards/accuracies": 0.75, "rewards/chosen": -0.8317999839782715, "rewards/margins": 1.4594800472259521, "rewards/rejected": -2.2912800312042236, "step": 2401 }, { "epoch": 0.28, "learning_rate": 2.1946380063777018e-07, "logits/chosen": -2.529813766479492, "logits/rejected": -2.702843427658081, "logps/chosen": -374.92523193359375, "logps/rejected": -218.46713256835938, "loss": 0.42, "rewards/accuracies": 0.75, "rewards/chosen": -0.9739036560058594, "rewards/margins": 1.1215391159057617, "rewards/rejected": -2.095442771911621, "step": 2402 }, { "epoch": 0.28, "learning_rate": 2.194283689618519e-07, "logits/chosen": -2.7392899990081787, "logits/rejected": -2.7655205726623535, "logps/chosen": -237.51446533203125, "logps/rejected": -239.36331176757812, "loss": 0.5113, "rewards/accuracies": 0.875, "rewards/chosen": -1.1517144441604614, "rewards/margins": 2.620999336242676, "rewards/rejected": -3.7727136611938477, "step": 2403 }, { "epoch": 0.28, "learning_rate": 2.1939293728593362e-07, "logits/chosen": -1.9733822345733643, "logits/rejected": -1.9166345596313477, "logps/chosen": -537.4116821289062, "logps/rejected": -406.8906555175781, "loss": 0.4769, "rewards/accuracies": 0.875, "rewards/chosen": -0.8158719539642334, "rewards/margins": 1.1274051666259766, "rewards/rejected": -1.94327712059021, "step": 2404 }, { "epoch": 0.28, "learning_rate": 2.1935750561001534e-07, "logits/chosen": -2.221069812774658, "logits/rejected": -2.236828565597534, "logps/chosen": -273.2884826660156, "logps/rejected": -276.7657470703125, "loss": 0.5332, "rewards/accuracies": 0.875, "rewards/chosen": -2.9262239933013916, "rewards/margins": 1.9250211715698242, "rewards/rejected": -4.851245403289795, "step": 2405 }, { "epoch": 0.28, "learning_rate": 2.1932207393409707e-07, "logits/chosen": -2.3161580562591553, "logits/rejected": -2.1704442501068115, "logps/chosen": -261.67535400390625, "logps/rejected": -207.7584228515625, "loss": 0.4286, "rewards/accuracies": 0.875, "rewards/chosen": -0.4458218812942505, "rewards/margins": 0.7641977667808533, "rewards/rejected": -1.210019826889038, "step": 2406 }, { "epoch": 0.28, "learning_rate": 2.192866422581788e-07, "logits/chosen": -2.5317957401275635, "logits/rejected": -2.573197364807129, "logps/chosen": -396.7981872558594, "logps/rejected": -340.38177490234375, "loss": 0.1522, "rewards/accuracies": 1.0, "rewards/chosen": 0.43957602977752686, "rewards/margins": 3.1361522674560547, "rewards/rejected": -2.6965763568878174, "step": 2407 }, { "epoch": 0.28, "learning_rate": 2.1925121058226054e-07, "logits/chosen": -2.3984756469726562, "logits/rejected": -2.5308644771575928, "logps/chosen": -319.2395935058594, "logps/rejected": -283.64202880859375, "loss": 0.5217, "rewards/accuracies": 0.75, "rewards/chosen": -0.22181940078735352, "rewards/margins": 1.814698576927185, "rewards/rejected": -2.036517858505249, "step": 2408 }, { "epoch": 0.28, "learning_rate": 2.1921577890634226e-07, "logits/chosen": -2.2337279319763184, "logits/rejected": -2.0121798515319824, "logps/chosen": -207.4919891357422, "logps/rejected": -289.39202880859375, "loss": 0.7214, "rewards/accuracies": 0.625, "rewards/chosen": -1.4415642023086548, "rewards/margins": 1.1816262006759644, "rewards/rejected": -2.623190402984619, "step": 2409 }, { "epoch": 0.28, "learning_rate": 2.1918034723042398e-07, "logits/chosen": -2.802725315093994, "logits/rejected": -2.9099221229553223, "logps/chosen": -175.3132781982422, "logps/rejected": -273.21148681640625, "loss": 0.1209, "rewards/accuracies": 0.875, "rewards/chosen": -0.4772554039955139, "rewards/margins": 4.014412879943848, "rewards/rejected": -4.491668701171875, "step": 2410 }, { "epoch": 0.28, "learning_rate": 2.191449155545057e-07, "logits/chosen": -2.5199801921844482, "logits/rejected": -2.580404043197632, "logps/chosen": -315.50579833984375, "logps/rejected": -261.89520263671875, "loss": 0.4488, "rewards/accuracies": 0.75, "rewards/chosen": -0.6685268878936768, "rewards/margins": 1.4561865329742432, "rewards/rejected": -2.12471342086792, "step": 2411 }, { "epoch": 0.28, "learning_rate": 2.1910948387858742e-07, "logits/chosen": -2.7414276599884033, "logits/rejected": -2.4749064445495605, "logps/chosen": -224.6689453125, "logps/rejected": -227.08041381835938, "loss": 0.5519, "rewards/accuracies": 0.625, "rewards/chosen": -0.6149680018424988, "rewards/margins": 1.4762042760849, "rewards/rejected": -2.091172218322754, "step": 2412 }, { "epoch": 0.28, "learning_rate": 2.190740522026692e-07, "logits/chosen": -2.2181923389434814, "logits/rejected": -1.867035150527954, "logps/chosen": -279.40887451171875, "logps/rejected": -302.70233154296875, "loss": 0.4825, "rewards/accuracies": 0.875, "rewards/chosen": -1.0305511951446533, "rewards/margins": 1.1736646890640259, "rewards/rejected": -2.2042160034179688, "step": 2413 }, { "epoch": 0.28, "learning_rate": 2.1903862052675092e-07, "logits/chosen": -1.7038986682891846, "logits/rejected": -1.6517492532730103, "logps/chosen": -280.27886962890625, "logps/rejected": -228.50279235839844, "loss": 0.3665, "rewards/accuracies": 0.75, "rewards/chosen": -0.5821016430854797, "rewards/margins": 1.2603877782821655, "rewards/rejected": -1.84248948097229, "step": 2414 }, { "epoch": 0.28, "learning_rate": 2.1900318885083264e-07, "logits/chosen": -2.334750175476074, "logits/rejected": -2.5466737747192383, "logps/chosen": -315.8623046875, "logps/rejected": -155.6282501220703, "loss": 0.2642, "rewards/accuracies": 0.875, "rewards/chosen": -0.9079563617706299, "rewards/margins": 1.986535668373108, "rewards/rejected": -2.8944919109344482, "step": 2415 }, { "epoch": 0.28, "learning_rate": 2.1896775717491437e-07, "logits/chosen": -2.305742025375366, "logits/rejected": -2.345782995223999, "logps/chosen": -159.86029052734375, "logps/rejected": -216.63575744628906, "loss": 0.2029, "rewards/accuracies": 1.0, "rewards/chosen": -1.7539782524108887, "rewards/margins": 2.9820733070373535, "rewards/rejected": -4.736051559448242, "step": 2416 }, { "epoch": 0.28, "learning_rate": 2.189323254989961e-07, "logits/chosen": -2.807830333709717, "logits/rejected": -2.591386556625366, "logps/chosen": -178.17306518554688, "logps/rejected": -254.59375, "loss": 0.2194, "rewards/accuracies": 0.875, "rewards/chosen": -0.8814403414726257, "rewards/margins": 3.3203353881835938, "rewards/rejected": -4.201775550842285, "step": 2417 }, { "epoch": 0.28, "learning_rate": 2.188968938230778e-07, "logits/chosen": -2.1598758697509766, "logits/rejected": -1.9119510650634766, "logps/chosen": -217.4944305419922, "logps/rejected": -222.904541015625, "loss": 0.3358, "rewards/accuracies": 0.875, "rewards/chosen": -0.28720033168792725, "rewards/margins": 1.7462971210479736, "rewards/rejected": -2.0334973335266113, "step": 2418 }, { "epoch": 0.28, "learning_rate": 2.1886146214715956e-07, "logits/chosen": -2.9726126194000244, "logits/rejected": -3.056459426879883, "logps/chosen": -216.31887817382812, "logps/rejected": -242.21856689453125, "loss": 0.3292, "rewards/accuracies": 0.75, "rewards/chosen": -0.806330680847168, "rewards/margins": 2.829763889312744, "rewards/rejected": -3.636094331741333, "step": 2419 }, { "epoch": 0.28, "learning_rate": 2.1882603047124128e-07, "logits/chosen": -2.5238051414489746, "logits/rejected": -2.6433095932006836, "logps/chosen": -289.5738830566406, "logps/rejected": -186.95626831054688, "loss": 1.3483, "rewards/accuracies": 0.75, "rewards/chosen": -2.0523738861083984, "rewards/margins": -0.24874144792556763, "rewards/rejected": -1.8036324977874756, "step": 2420 }, { "epoch": 0.28, "learning_rate": 2.18790598795323e-07, "logits/chosen": -1.9263896942138672, "logits/rejected": -1.8871774673461914, "logps/chosen": -304.6511535644531, "logps/rejected": -362.1975402832031, "loss": 0.3025, "rewards/accuracies": 0.875, "rewards/chosen": -1.3768489360809326, "rewards/margins": 2.2314929962158203, "rewards/rejected": -3.608342170715332, "step": 2421 }, { "epoch": 0.28, "learning_rate": 2.1875516711940473e-07, "logits/chosen": -2.5491456985473633, "logits/rejected": -2.365755081176758, "logps/chosen": -291.5038757324219, "logps/rejected": -242.99124145507812, "loss": 0.347, "rewards/accuracies": 0.75, "rewards/chosen": -0.5266618728637695, "rewards/margins": 2.365705728530884, "rewards/rejected": -2.8923676013946533, "step": 2422 }, { "epoch": 0.28, "learning_rate": 2.1871973544348645e-07, "logits/chosen": -1.9858808517456055, "logits/rejected": -2.0882153511047363, "logps/chosen": -172.44886779785156, "logps/rejected": -227.1353302001953, "loss": 0.9635, "rewards/accuracies": 0.5, "rewards/chosen": -1.679159164428711, "rewards/margins": 0.7912605404853821, "rewards/rejected": -2.4704198837280273, "step": 2423 }, { "epoch": 0.28, "learning_rate": 2.1868430376756822e-07, "logits/chosen": -2.4140841960906982, "logits/rejected": -2.4430532455444336, "logps/chosen": -247.08200073242188, "logps/rejected": -238.9581298828125, "loss": 0.4527, "rewards/accuracies": 0.625, "rewards/chosen": -1.3172849416732788, "rewards/margins": 1.7958452701568604, "rewards/rejected": -3.1131303310394287, "step": 2424 }, { "epoch": 0.28, "learning_rate": 2.1864887209164994e-07, "logits/chosen": -2.4403512477874756, "logits/rejected": -2.635002851486206, "logps/chosen": -295.29437255859375, "logps/rejected": -305.2942810058594, "loss": 0.5551, "rewards/accuracies": 0.75, "rewards/chosen": -0.5474229454994202, "rewards/margins": 2.4311656951904297, "rewards/rejected": -2.978588581085205, "step": 2425 }, { "epoch": 0.28, "learning_rate": 2.1861344041573167e-07, "logits/chosen": -2.9091224670410156, "logits/rejected": -2.7866063117980957, "logps/chosen": -261.61065673828125, "logps/rejected": -243.71136474609375, "loss": 0.8067, "rewards/accuracies": 0.625, "rewards/chosen": -1.3093852996826172, "rewards/margins": 0.6743326187133789, "rewards/rejected": -1.983717918395996, "step": 2426 }, { "epoch": 0.28, "learning_rate": 2.185780087398134e-07, "logits/chosen": -2.7849247455596924, "logits/rejected": -2.722393274307251, "logps/chosen": -170.15000915527344, "logps/rejected": -235.87515258789062, "loss": 0.5568, "rewards/accuracies": 0.75, "rewards/chosen": -1.0051884651184082, "rewards/margins": 3.331677198410034, "rewards/rejected": -4.3368659019470215, "step": 2427 }, { "epoch": 0.28, "learning_rate": 2.185425770638951e-07, "logits/chosen": -2.3067376613616943, "logits/rejected": -2.4732773303985596, "logps/chosen": -509.762939453125, "logps/rejected": -441.5346984863281, "loss": 0.3159, "rewards/accuracies": 1.0, "rewards/chosen": -0.4289066195487976, "rewards/margins": 1.9739067554473877, "rewards/rejected": -2.40281343460083, "step": 2428 }, { "epoch": 0.28, "learning_rate": 2.1850714538797683e-07, "logits/chosen": -2.3931708335876465, "logits/rejected": -2.4984872341156006, "logps/chosen": -341.8184509277344, "logps/rejected": -249.65969848632812, "loss": 0.5713, "rewards/accuracies": 0.625, "rewards/chosen": -0.7307844758033752, "rewards/margins": 2.2252461910247803, "rewards/rejected": -2.9560303688049316, "step": 2429 }, { "epoch": 0.28, "learning_rate": 2.1847171371205858e-07, "logits/chosen": -2.297938585281372, "logits/rejected": -1.8681225776672363, "logps/chosen": -270.20556640625, "logps/rejected": -484.920166015625, "loss": 0.5677, "rewards/accuracies": 0.875, "rewards/chosen": -0.7687557339668274, "rewards/margins": 0.6492458581924438, "rewards/rejected": -1.4180015325546265, "step": 2430 }, { "epoch": 0.28, "learning_rate": 2.184362820361403e-07, "logits/chosen": -2.5370614528656006, "logits/rejected": -2.7370262145996094, "logps/chosen": -277.70782470703125, "logps/rejected": -237.05938720703125, "loss": 0.3329, "rewards/accuracies": 0.75, "rewards/chosen": -1.1228300333023071, "rewards/margins": 1.885025978088379, "rewards/rejected": -3.0078558921813965, "step": 2431 }, { "epoch": 0.28, "learning_rate": 2.1840085036022203e-07, "logits/chosen": -2.383835792541504, "logits/rejected": -2.333524703979492, "logps/chosen": -371.65740966796875, "logps/rejected": -396.67791748046875, "loss": 0.4606, "rewards/accuracies": 0.75, "rewards/chosen": -0.09138723462820053, "rewards/margins": 0.7897689938545227, "rewards/rejected": -0.8811562061309814, "step": 2432 }, { "epoch": 0.28, "learning_rate": 2.1836541868430375e-07, "logits/chosen": -2.4804301261901855, "logits/rejected": -2.508697032928467, "logps/chosen": -203.05255126953125, "logps/rejected": -232.43853759765625, "loss": 0.2683, "rewards/accuracies": 0.875, "rewards/chosen": -0.6086359024047852, "rewards/margins": 3.016047716140747, "rewards/rejected": -3.6246838569641113, "step": 2433 }, { "epoch": 0.28, "learning_rate": 2.1832998700838547e-07, "logits/chosen": -2.3846709728240967, "logits/rejected": -2.4511735439300537, "logps/chosen": -240.42608642578125, "logps/rejected": -211.8994140625, "loss": 0.2795, "rewards/accuracies": 0.875, "rewards/chosen": -0.14571093022823334, "rewards/margins": 1.9337424039840698, "rewards/rejected": -2.079453229904175, "step": 2434 }, { "epoch": 0.28, "learning_rate": 2.182945553324672e-07, "logits/chosen": -2.2005419731140137, "logits/rejected": -2.347635507583618, "logps/chosen": -246.1650848388672, "logps/rejected": -159.45718383789062, "loss": 0.5205, "rewards/accuracies": 0.75, "rewards/chosen": -0.6720130443572998, "rewards/margins": 1.2505114078521729, "rewards/rejected": -1.9225245714187622, "step": 2435 }, { "epoch": 0.28, "learning_rate": 2.1825912365654897e-07, "logits/chosen": -2.744433879852295, "logits/rejected": -2.3822758197784424, "logps/chosen": -194.48477172851562, "logps/rejected": -225.1651153564453, "loss": 0.6012, "rewards/accuracies": 0.75, "rewards/chosen": -1.0521517992019653, "rewards/margins": 0.6601595878601074, "rewards/rejected": -1.7123115062713623, "step": 2436 }, { "epoch": 0.28, "learning_rate": 2.182236919806307e-07, "logits/chosen": -3.090705394744873, "logits/rejected": -3.0250086784362793, "logps/chosen": -374.5745849609375, "logps/rejected": -321.883056640625, "loss": 0.6359, "rewards/accuracies": 0.625, "rewards/chosen": -1.4832537174224854, "rewards/margins": 1.7162953615188599, "rewards/rejected": -3.199549436569214, "step": 2437 }, { "epoch": 0.28, "learning_rate": 2.181882603047124e-07, "logits/chosen": -1.6695842742919922, "logits/rejected": -1.5733023881912231, "logps/chosen": -350.4315185546875, "logps/rejected": -323.7000732421875, "loss": 0.3243, "rewards/accuracies": 0.875, "rewards/chosen": -0.9580378532409668, "rewards/margins": 1.8412737846374512, "rewards/rejected": -2.799311637878418, "step": 2438 }, { "epoch": 0.28, "learning_rate": 2.1815282862879413e-07, "logits/chosen": -2.780118942260742, "logits/rejected": -2.6690480709075928, "logps/chosen": -287.4557800292969, "logps/rejected": -227.68028259277344, "loss": 0.6141, "rewards/accuracies": 0.75, "rewards/chosen": -0.9995822906494141, "rewards/margins": 1.172029733657837, "rewards/rejected": -2.171612024307251, "step": 2439 }, { "epoch": 0.28, "learning_rate": 2.1811739695287586e-07, "logits/chosen": -2.0758941173553467, "logits/rejected": -2.0042366981506348, "logps/chosen": -245.48233032226562, "logps/rejected": -256.89459228515625, "loss": 0.2882, "rewards/accuracies": 0.875, "rewards/chosen": -0.5923284292221069, "rewards/margins": 2.0743322372436523, "rewards/rejected": -2.6666603088378906, "step": 2440 }, { "epoch": 0.28, "learning_rate": 2.180819652769576e-07, "logits/chosen": -2.3016905784606934, "logits/rejected": -2.6276965141296387, "logps/chosen": -284.684326171875, "logps/rejected": -199.1748809814453, "loss": 0.6392, "rewards/accuracies": 0.75, "rewards/chosen": -1.1366122961044312, "rewards/margins": 1.3160499334335327, "rewards/rejected": -2.452662229537964, "step": 2441 }, { "epoch": 0.28, "learning_rate": 2.1804653360103933e-07, "logits/chosen": -2.300863027572632, "logits/rejected": -2.23675274848938, "logps/chosen": -289.3321533203125, "logps/rejected": -247.20803833007812, "loss": 0.2806, "rewards/accuracies": 0.875, "rewards/chosen": -0.6414758563041687, "rewards/margins": 2.3768720626831055, "rewards/rejected": -3.018347978591919, "step": 2442 }, { "epoch": 0.28, "learning_rate": 2.1801110192512105e-07, "logits/chosen": -2.0366744995117188, "logits/rejected": -2.172194004058838, "logps/chosen": -188.126953125, "logps/rejected": -220.412353515625, "loss": 0.4973, "rewards/accuracies": 0.75, "rewards/chosen": -1.6541728973388672, "rewards/margins": 1.6980571746826172, "rewards/rejected": -3.3522300720214844, "step": 2443 }, { "epoch": 0.28, "learning_rate": 2.1797567024920277e-07, "logits/chosen": -2.7356224060058594, "logits/rejected": -2.7738306522369385, "logps/chosen": -213.9744110107422, "logps/rejected": -205.75306701660156, "loss": 0.1875, "rewards/accuracies": 1.0, "rewards/chosen": -0.09099578857421875, "rewards/margins": 2.4257750511169434, "rewards/rejected": -2.516770839691162, "step": 2444 }, { "epoch": 0.28, "learning_rate": 2.179402385732845e-07, "logits/chosen": -2.0045621395111084, "logits/rejected": -2.075671672821045, "logps/chosen": -198.0613250732422, "logps/rejected": -236.8986053466797, "loss": 1.1645, "rewards/accuracies": 0.375, "rewards/chosen": -2.5709025859832764, "rewards/margins": 0.6127287149429321, "rewards/rejected": -3.183631420135498, "step": 2445 }, { "epoch": 0.28, "learning_rate": 2.1790480689736621e-07, "logits/chosen": -1.7411086559295654, "logits/rejected": -1.7757494449615479, "logps/chosen": -210.12625122070312, "logps/rejected": -238.86343383789062, "loss": 0.4397, "rewards/accuracies": 0.875, "rewards/chosen": -0.44968381524086, "rewards/margins": 1.8220546245574951, "rewards/rejected": -2.2717385292053223, "step": 2446 }, { "epoch": 0.28, "learning_rate": 2.1786937522144794e-07, "logits/chosen": -2.5625712871551514, "logits/rejected": -2.319204330444336, "logps/chosen": -348.5757751464844, "logps/rejected": -310.1686096191406, "loss": 0.3593, "rewards/accuracies": 0.875, "rewards/chosen": -1.1385153532028198, "rewards/margins": 1.6224730014801025, "rewards/rejected": -2.760988235473633, "step": 2447 }, { "epoch": 0.28, "learning_rate": 2.178339435455297e-07, "logits/chosen": -2.5249807834625244, "logits/rejected": -2.6661505699157715, "logps/chosen": -223.2581329345703, "logps/rejected": -208.005615234375, "loss": 0.4646, "rewards/accuracies": 0.875, "rewards/chosen": -0.9694516658782959, "rewards/margins": 1.5931706428527832, "rewards/rejected": -2.5626220703125, "step": 2448 }, { "epoch": 0.28, "learning_rate": 2.1779851186961143e-07, "logits/chosen": -2.069368839263916, "logits/rejected": -2.3304343223571777, "logps/chosen": -337.6851806640625, "logps/rejected": -242.470703125, "loss": 0.6183, "rewards/accuracies": 0.75, "rewards/chosen": -1.1734493970870972, "rewards/margins": 0.8337082862854004, "rewards/rejected": -2.007157802581787, "step": 2449 }, { "epoch": 0.29, "learning_rate": 2.1776308019369316e-07, "logits/chosen": -2.1143674850463867, "logits/rejected": -2.4560136795043945, "logps/chosen": -358.7385559082031, "logps/rejected": -209.5023193359375, "loss": 0.4394, "rewards/accuracies": 0.625, "rewards/chosen": -0.7461340427398682, "rewards/margins": 1.8844082355499268, "rewards/rejected": -2.630542278289795, "step": 2450 }, { "epoch": 0.29, "learning_rate": 2.1772764851777488e-07, "logits/chosen": -2.0776731967926025, "logits/rejected": -2.16764235496521, "logps/chosen": -329.9303283691406, "logps/rejected": -298.8729553222656, "loss": 0.4509, "rewards/accuracies": 0.875, "rewards/chosen": -0.5396146774291992, "rewards/margins": 1.087437629699707, "rewards/rejected": -1.6270523071289062, "step": 2451 }, { "epoch": 0.29, "learning_rate": 2.176922168418566e-07, "logits/chosen": -1.3730180263519287, "logits/rejected": -1.8702495098114014, "logps/chosen": -399.27484130859375, "logps/rejected": -274.1457214355469, "loss": 0.4645, "rewards/accuracies": 0.75, "rewards/chosen": -0.10476456582546234, "rewards/margins": 1.2702255249023438, "rewards/rejected": -1.3749902248382568, "step": 2452 }, { "epoch": 0.29, "learning_rate": 2.1765678516593835e-07, "logits/chosen": -2.0756473541259766, "logits/rejected": -2.388780117034912, "logps/chosen": -239.2199249267578, "logps/rejected": -245.57345581054688, "loss": 0.4867, "rewards/accuracies": 0.875, "rewards/chosen": -0.44959378242492676, "rewards/margins": 1.2436915636062622, "rewards/rejected": -1.6932854652404785, "step": 2453 }, { "epoch": 0.29, "learning_rate": 2.1762135349002007e-07, "logits/chosen": -2.3898582458496094, "logits/rejected": -2.6965701580047607, "logps/chosen": -300.75341796875, "logps/rejected": -198.84890747070312, "loss": 0.3955, "rewards/accuracies": 0.75, "rewards/chosen": -0.3583180010318756, "rewards/margins": 2.208394765853882, "rewards/rejected": -2.5667128562927246, "step": 2454 }, { "epoch": 0.29, "learning_rate": 2.175859218141018e-07, "logits/chosen": -2.2460758686065674, "logits/rejected": -2.34220290184021, "logps/chosen": -275.5306396484375, "logps/rejected": -297.3830871582031, "loss": 0.3654, "rewards/accuracies": 0.875, "rewards/chosen": -0.40517866611480713, "rewards/margins": 2.767184019088745, "rewards/rejected": -3.172362804412842, "step": 2455 }, { "epoch": 0.29, "learning_rate": 2.1755049013818352e-07, "logits/chosen": -2.265929698944092, "logits/rejected": -2.5448100566864014, "logps/chosen": -231.16973876953125, "logps/rejected": -220.73806762695312, "loss": 0.4614, "rewards/accuracies": 0.75, "rewards/chosen": -0.9303874373435974, "rewards/margins": 1.3694424629211426, "rewards/rejected": -2.2998297214508057, "step": 2456 }, { "epoch": 0.29, "learning_rate": 2.1751505846226524e-07, "logits/chosen": -2.6133482456207275, "logits/rejected": -2.9053165912628174, "logps/chosen": -202.8801727294922, "logps/rejected": -163.5523681640625, "loss": 0.7751, "rewards/accuracies": 0.875, "rewards/chosen": -1.3435837030410767, "rewards/margins": 1.4021862745285034, "rewards/rejected": -2.74576997756958, "step": 2457 }, { "epoch": 0.29, "learning_rate": 2.1747962678634696e-07, "logits/chosen": -2.572540283203125, "logits/rejected": -2.4240710735321045, "logps/chosen": -125.78656768798828, "logps/rejected": -201.97930908203125, "loss": 0.3686, "rewards/accuracies": 0.75, "rewards/chosen": -0.2883782982826233, "rewards/margins": 2.0042266845703125, "rewards/rejected": -2.292604923248291, "step": 2458 }, { "epoch": 0.29, "learning_rate": 2.1744419511042873e-07, "logits/chosen": -2.3581531047821045, "logits/rejected": -2.2826406955718994, "logps/chosen": -263.19293212890625, "logps/rejected": -193.87155151367188, "loss": 0.6241, "rewards/accuracies": 0.75, "rewards/chosen": -0.8815513849258423, "rewards/margins": 2.085815668106079, "rewards/rejected": -2.967366933822632, "step": 2459 }, { "epoch": 0.29, "learning_rate": 2.1740876343451046e-07, "logits/chosen": -2.1588902473449707, "logits/rejected": -2.4780352115631104, "logps/chosen": -412.3316650390625, "logps/rejected": -343.46563720703125, "loss": 0.7801, "rewards/accuracies": 0.75, "rewards/chosen": -1.3211928606033325, "rewards/margins": 0.3646429777145386, "rewards/rejected": -1.685835838317871, "step": 2460 }, { "epoch": 0.29, "learning_rate": 2.1737333175859218e-07, "logits/chosen": -1.9822461605072021, "logits/rejected": -2.0525383949279785, "logps/chosen": -271.55340576171875, "logps/rejected": -235.89541625976562, "loss": 0.6805, "rewards/accuracies": 0.75, "rewards/chosen": -1.2293037176132202, "rewards/margins": 0.8565818071365356, "rewards/rejected": -2.085885524749756, "step": 2461 }, { "epoch": 0.29, "learning_rate": 2.173379000826739e-07, "logits/chosen": -2.732396364212036, "logits/rejected": -2.691169500350952, "logps/chosen": -317.80517578125, "logps/rejected": -358.82281494140625, "loss": 0.4811, "rewards/accuracies": 0.75, "rewards/chosen": -1.2289564609527588, "rewards/margins": 1.2388205528259277, "rewards/rejected": -2.4677772521972656, "step": 2462 }, { "epoch": 0.29, "learning_rate": 2.1730246840675562e-07, "logits/chosen": -2.7565176486968994, "logits/rejected": -2.8796520233154297, "logps/chosen": -182.7015380859375, "logps/rejected": -238.11822509765625, "loss": 0.2345, "rewards/accuracies": 1.0, "rewards/chosen": -0.6201177835464478, "rewards/margins": 1.957884430885315, "rewards/rejected": -2.5780019760131836, "step": 2463 }, { "epoch": 0.29, "learning_rate": 2.1726703673083737e-07, "logits/chosen": -2.0676345825195312, "logits/rejected": -2.018420696258545, "logps/chosen": -313.51361083984375, "logps/rejected": -237.87725830078125, "loss": 0.4599, "rewards/accuracies": 0.625, "rewards/chosen": -0.5916812419891357, "rewards/margins": 1.7116608619689941, "rewards/rejected": -2.30334210395813, "step": 2464 }, { "epoch": 0.29, "learning_rate": 2.172316050549191e-07, "logits/chosen": -2.1458559036254883, "logits/rejected": -2.1581954956054688, "logps/chosen": -374.22467041015625, "logps/rejected": -360.7703552246094, "loss": 0.254, "rewards/accuracies": 1.0, "rewards/chosen": -1.053375005722046, "rewards/margins": 2.296565055847168, "rewards/rejected": -3.349940061569214, "step": 2465 }, { "epoch": 0.29, "learning_rate": 2.1719617337900082e-07, "logits/chosen": -2.6527764797210693, "logits/rejected": -2.5191380977630615, "logps/chosen": -176.8474578857422, "logps/rejected": -186.81121826171875, "loss": 0.5223, "rewards/accuracies": 0.625, "rewards/chosen": -1.2767152786254883, "rewards/margins": 0.7830303907394409, "rewards/rejected": -2.0597455501556396, "step": 2466 }, { "epoch": 0.29, "learning_rate": 2.1716074170308254e-07, "logits/chosen": -2.6270358562469482, "logits/rejected": -2.847661018371582, "logps/chosen": -246.0839080810547, "logps/rejected": -214.92115783691406, "loss": 0.2277, "rewards/accuracies": 1.0, "rewards/chosen": -1.3325130939483643, "rewards/margins": 3.010699987411499, "rewards/rejected": -4.343213081359863, "step": 2467 }, { "epoch": 0.29, "learning_rate": 2.1712531002716426e-07, "logits/chosen": -2.0580027103424072, "logits/rejected": -2.246978759765625, "logps/chosen": -363.367919921875, "logps/rejected": -255.94229125976562, "loss": 0.5123, "rewards/accuracies": 0.625, "rewards/chosen": -1.3784147500991821, "rewards/margins": 1.2304003238677979, "rewards/rejected": -2.6088151931762695, "step": 2468 }, { "epoch": 0.29, "learning_rate": 2.1708987835124598e-07, "logits/chosen": -2.6273584365844727, "logits/rejected": -2.421530246734619, "logps/chosen": -163.42869567871094, "logps/rejected": -272.5882263183594, "loss": 0.557, "rewards/accuracies": 0.625, "rewards/chosen": -0.6079937815666199, "rewards/margins": 1.618372917175293, "rewards/rejected": -2.2263667583465576, "step": 2469 }, { "epoch": 0.29, "learning_rate": 2.1705444667532773e-07, "logits/chosen": -2.3506693840026855, "logits/rejected": -2.5059287548065186, "logps/chosen": -387.5794677734375, "logps/rejected": -245.97079467773438, "loss": 0.652, "rewards/accuracies": 0.75, "rewards/chosen": -1.0010559558868408, "rewards/margins": 1.6933295726776123, "rewards/rejected": -2.694385528564453, "step": 2470 }, { "epoch": 0.29, "learning_rate": 2.1701901499940948e-07, "logits/chosen": -2.1610360145568848, "logits/rejected": -2.7579829692840576, "logps/chosen": -515.5169067382812, "logps/rejected": -274.25726318359375, "loss": 0.487, "rewards/accuracies": 0.625, "rewards/chosen": -1.6520326137542725, "rewards/margins": 0.898408830165863, "rewards/rejected": -2.5504415035247803, "step": 2471 }, { "epoch": 0.29, "learning_rate": 2.169835833234912e-07, "logits/chosen": -1.7973394393920898, "logits/rejected": -2.4238595962524414, "logps/chosen": -265.7392272949219, "logps/rejected": -174.10629272460938, "loss": 0.5527, "rewards/accuracies": 0.625, "rewards/chosen": -0.5772750377655029, "rewards/margins": 1.0169744491577148, "rewards/rejected": -1.5942493677139282, "step": 2472 }, { "epoch": 0.29, "learning_rate": 2.1694815164757292e-07, "logits/chosen": -1.8389298915863037, "logits/rejected": -1.9321054220199585, "logps/chosen": -456.0957336425781, "logps/rejected": -377.3522033691406, "loss": 0.4853, "rewards/accuracies": 0.75, "rewards/chosen": -0.5206893682479858, "rewards/margins": 1.5758976936340332, "rewards/rejected": -2.0965871810913086, "step": 2473 }, { "epoch": 0.29, "learning_rate": 2.1691271997165465e-07, "logits/chosen": -2.7043190002441406, "logits/rejected": -2.602125644683838, "logps/chosen": -263.0576171875, "logps/rejected": -299.27313232421875, "loss": 0.1125, "rewards/accuracies": 1.0, "rewards/chosen": -0.5646453499794006, "rewards/margins": 3.171477794647217, "rewards/rejected": -3.7361230850219727, "step": 2474 }, { "epoch": 0.29, "learning_rate": 2.168772882957364e-07, "logits/chosen": -2.5873637199401855, "logits/rejected": -2.472294330596924, "logps/chosen": -337.7218322753906, "logps/rejected": -253.92446899414062, "loss": 0.2148, "rewards/accuracies": 1.0, "rewards/chosen": -1.1485633850097656, "rewards/margins": 1.8845783472061157, "rewards/rejected": -3.033141613006592, "step": 2475 }, { "epoch": 0.29, "learning_rate": 2.1684185661981812e-07, "logits/chosen": -2.471859931945801, "logits/rejected": -2.494781017303467, "logps/chosen": -452.29425048828125, "logps/rejected": -615.3822631835938, "loss": 0.3725, "rewards/accuracies": 0.875, "rewards/chosen": -1.2650425434112549, "rewards/margins": 2.8933329582214355, "rewards/rejected": -4.1583757400512695, "step": 2476 }, { "epoch": 0.29, "learning_rate": 2.1680642494389984e-07, "logits/chosen": -1.9317779541015625, "logits/rejected": -1.91277015209198, "logps/chosen": -344.041259765625, "logps/rejected": -370.9241943359375, "loss": 0.3656, "rewards/accuracies": 0.75, "rewards/chosen": -0.8470520377159119, "rewards/margins": 1.7892144918441772, "rewards/rejected": -2.6362664699554443, "step": 2477 }, { "epoch": 0.29, "learning_rate": 2.1677099326798156e-07, "logits/chosen": -2.6003546714782715, "logits/rejected": -2.4782469272613525, "logps/chosen": -155.3167724609375, "logps/rejected": -244.5566864013672, "loss": 0.6015, "rewards/accuracies": 0.625, "rewards/chosen": -1.0782259702682495, "rewards/margins": 1.5402922630310059, "rewards/rejected": -2.618518352508545, "step": 2478 }, { "epoch": 0.29, "learning_rate": 2.1673556159206328e-07, "logits/chosen": -2.764953851699829, "logits/rejected": -2.8564746379852295, "logps/chosen": -153.8671417236328, "logps/rejected": -182.783203125, "loss": 0.512, "rewards/accuracies": 0.875, "rewards/chosen": -0.924048125743866, "rewards/margins": 2.099832773208618, "rewards/rejected": -3.023880958557129, "step": 2479 }, { "epoch": 0.29, "learning_rate": 2.16700129916145e-07, "logits/chosen": -2.386533260345459, "logits/rejected": -2.425724744796753, "logps/chosen": -310.36151123046875, "logps/rejected": -195.2545166015625, "loss": 0.1532, "rewards/accuracies": 1.0, "rewards/chosen": -0.4305097758769989, "rewards/margins": 2.4000132083892822, "rewards/rejected": -2.8305230140686035, "step": 2480 }, { "epoch": 0.29, "learning_rate": 2.1666469824022673e-07, "logits/chosen": -1.8042891025543213, "logits/rejected": -2.167426586151123, "logps/chosen": -554.564208984375, "logps/rejected": -370.48065185546875, "loss": 0.5978, "rewards/accuracies": 0.875, "rewards/chosen": -0.6072208881378174, "rewards/margins": 2.131959915161133, "rewards/rejected": -2.7391810417175293, "step": 2481 }, { "epoch": 0.29, "learning_rate": 2.1662926656430848e-07, "logits/chosen": -2.7623744010925293, "logits/rejected": -2.661491632461548, "logps/chosen": -126.27397155761719, "logps/rejected": -233.30946350097656, "loss": 0.3436, "rewards/accuracies": 0.875, "rewards/chosen": -0.4357917606830597, "rewards/margins": 1.8856812715530396, "rewards/rejected": -2.3214731216430664, "step": 2482 }, { "epoch": 0.29, "learning_rate": 2.1659383488839022e-07, "logits/chosen": -2.355668067932129, "logits/rejected": -2.486140012741089, "logps/chosen": -304.5206604003906, "logps/rejected": -226.6120147705078, "loss": 0.5104, "rewards/accuracies": 0.75, "rewards/chosen": -1.3285385370254517, "rewards/margins": 1.1340433359146118, "rewards/rejected": -2.4625821113586426, "step": 2483 }, { "epoch": 0.29, "learning_rate": 2.1655840321247195e-07, "logits/chosen": -2.8706371784210205, "logits/rejected": -2.7139811515808105, "logps/chosen": -175.02418518066406, "logps/rejected": -207.94557189941406, "loss": 0.4015, "rewards/accuracies": 0.875, "rewards/chosen": -1.0142889022827148, "rewards/margins": 1.9969303607940674, "rewards/rejected": -3.0112192630767822, "step": 2484 }, { "epoch": 0.29, "learning_rate": 2.1652297153655367e-07, "logits/chosen": -2.4295926094055176, "logits/rejected": -2.457249402999878, "logps/chosen": -196.5600128173828, "logps/rejected": -344.6933898925781, "loss": 0.4395, "rewards/accuracies": 0.875, "rewards/chosen": -0.6401822566986084, "rewards/margins": 1.4477496147155762, "rewards/rejected": -2.0879318714141846, "step": 2485 }, { "epoch": 0.29, "learning_rate": 2.164875398606354e-07, "logits/chosen": -2.3014252185821533, "logits/rejected": -2.210580825805664, "logps/chosen": -96.48078155517578, "logps/rejected": -224.66656494140625, "loss": 0.2997, "rewards/accuracies": 0.875, "rewards/chosen": -0.6605328917503357, "rewards/margins": 1.7184669971466064, "rewards/rejected": -2.378999948501587, "step": 2486 }, { "epoch": 0.29, "learning_rate": 2.1645210818471714e-07, "logits/chosen": -2.3552684783935547, "logits/rejected": -2.480907440185547, "logps/chosen": -288.65447998046875, "logps/rejected": -217.87808227539062, "loss": 0.7457, "rewards/accuracies": 0.75, "rewards/chosen": -1.4416617155075073, "rewards/margins": 0.9657660722732544, "rewards/rejected": -2.4074275493621826, "step": 2487 }, { "epoch": 0.29, "learning_rate": 2.1641667650879886e-07, "logits/chosen": -1.9270848035812378, "logits/rejected": -2.0156235694885254, "logps/chosen": -677.5250244140625, "logps/rejected": -304.284423828125, "loss": 0.5192, "rewards/accuracies": 0.75, "rewards/chosen": -1.0662102699279785, "rewards/margins": 1.6201300621032715, "rewards/rejected": -2.68634033203125, "step": 2488 }, { "epoch": 0.29, "learning_rate": 2.1638124483288058e-07, "logits/chosen": -2.279860258102417, "logits/rejected": -2.399411201477051, "logps/chosen": -314.2524108886719, "logps/rejected": -306.31207275390625, "loss": 0.3357, "rewards/accuracies": 0.875, "rewards/chosen": -0.221620112657547, "rewards/margins": 1.6651103496551514, "rewards/rejected": -1.8867303133010864, "step": 2489 }, { "epoch": 0.29, "learning_rate": 2.163458131569623e-07, "logits/chosen": -2.265770673751831, "logits/rejected": -2.7091500759124756, "logps/chosen": -337.3496398925781, "logps/rejected": -216.61634826660156, "loss": 0.2304, "rewards/accuracies": 0.875, "rewards/chosen": -0.5104184746742249, "rewards/margins": 2.345304250717163, "rewards/rejected": -2.8557229042053223, "step": 2490 }, { "epoch": 0.29, "learning_rate": 2.1631038148104403e-07, "logits/chosen": -2.1074585914611816, "logits/rejected": -2.240861415863037, "logps/chosen": -140.7238311767578, "logps/rejected": -159.63108825683594, "loss": 0.5311, "rewards/accuracies": 0.75, "rewards/chosen": -1.02963387966156, "rewards/margins": 0.892888069152832, "rewards/rejected": -1.922521948814392, "step": 2491 }, { "epoch": 0.29, "learning_rate": 2.1627494980512575e-07, "logits/chosen": -2.4363150596618652, "logits/rejected": -2.4098405838012695, "logps/chosen": -240.70071411132812, "logps/rejected": -224.8988800048828, "loss": 0.4626, "rewards/accuracies": 0.875, "rewards/chosen": -1.2235594987869263, "rewards/margins": 0.8425126075744629, "rewards/rejected": -2.0660722255706787, "step": 2492 }, { "epoch": 0.29, "learning_rate": 2.162395181292075e-07, "logits/chosen": -1.9511940479278564, "logits/rejected": -1.9594807624816895, "logps/chosen": -296.89794921875, "logps/rejected": -339.48968505859375, "loss": 0.4048, "rewards/accuracies": 0.875, "rewards/chosen": -0.7253051400184631, "rewards/margins": 0.954999566078186, "rewards/rejected": -1.6803046464920044, "step": 2493 }, { "epoch": 0.29, "learning_rate": 2.1620408645328925e-07, "logits/chosen": -2.4320504665374756, "logits/rejected": -2.227735996246338, "logps/chosen": -194.30369567871094, "logps/rejected": -286.7164611816406, "loss": 0.4027, "rewards/accuracies": 0.75, "rewards/chosen": -0.5044612288475037, "rewards/margins": 1.7659118175506592, "rewards/rejected": -2.2703728675842285, "step": 2494 }, { "epoch": 0.29, "learning_rate": 2.1616865477737097e-07, "logits/chosen": -2.683600664138794, "logits/rejected": -2.6873273849487305, "logps/chosen": -200.62213134765625, "logps/rejected": -260.86944580078125, "loss": 0.4168, "rewards/accuracies": 0.875, "rewards/chosen": -1.0549192428588867, "rewards/margins": 1.512897253036499, "rewards/rejected": -2.5678164958953857, "step": 2495 }, { "epoch": 0.29, "learning_rate": 2.161332231014527e-07, "logits/chosen": -2.5147647857666016, "logits/rejected": -2.6125638484954834, "logps/chosen": -185.51768493652344, "logps/rejected": -264.8766784667969, "loss": 0.4466, "rewards/accuracies": 0.625, "rewards/chosen": -0.3316652476787567, "rewards/margins": 1.7436048984527588, "rewards/rejected": -2.075270175933838, "step": 2496 }, { "epoch": 0.29, "learning_rate": 2.1609779142553441e-07, "logits/chosen": -1.6731147766113281, "logits/rejected": -2.253133773803711, "logps/chosen": -611.4571533203125, "logps/rejected": -290.9269104003906, "loss": 0.5342, "rewards/accuracies": 0.75, "rewards/chosen": -1.4358718395233154, "rewards/margins": 0.5214658379554749, "rewards/rejected": -1.9573378562927246, "step": 2497 }, { "epoch": 0.29, "learning_rate": 2.1606235974961616e-07, "logits/chosen": -2.7119803428649902, "logits/rejected": -2.7804431915283203, "logps/chosen": -262.18011474609375, "logps/rejected": -197.81143188476562, "loss": 0.4504, "rewards/accuracies": 0.75, "rewards/chosen": -1.2761025428771973, "rewards/margins": 1.1145788431167603, "rewards/rejected": -2.390681266784668, "step": 2498 }, { "epoch": 0.29, "learning_rate": 2.1602692807369788e-07, "logits/chosen": -2.087944507598877, "logits/rejected": -2.3218302726745605, "logps/chosen": -421.219970703125, "logps/rejected": -236.00625610351562, "loss": 0.3775, "rewards/accuracies": 0.875, "rewards/chosen": -0.9269934892654419, "rewards/margins": 1.710361361503601, "rewards/rejected": -2.637354850769043, "step": 2499 }, { "epoch": 0.29, "learning_rate": 2.159914963977796e-07, "logits/chosen": -2.364070415496826, "logits/rejected": -2.37058162689209, "logps/chosen": -194.7274169921875, "logps/rejected": -223.50689697265625, "loss": 0.1926, "rewards/accuracies": 0.875, "rewards/chosen": -0.4434488117694855, "rewards/margins": 2.6976234912872314, "rewards/rejected": -3.1410725116729736, "step": 2500 }, { "epoch": 0.29, "learning_rate": 2.1595606472186133e-07, "logits/chosen": -2.66389799118042, "logits/rejected": -2.8655638694763184, "logps/chosen": -226.45138549804688, "logps/rejected": -163.21151733398438, "loss": 0.6713, "rewards/accuracies": 0.875, "rewards/chosen": -0.8911166191101074, "rewards/margins": 1.7618833780288696, "rewards/rejected": -2.6529998779296875, "step": 2501 }, { "epoch": 0.29, "learning_rate": 2.1592063304594305e-07, "logits/chosen": -2.665041923522949, "logits/rejected": -2.6689534187316895, "logps/chosen": -177.0982666015625, "logps/rejected": -261.6306457519531, "loss": 0.2848, "rewards/accuracies": 0.875, "rewards/chosen": -0.6877730488777161, "rewards/margins": 2.144991159439087, "rewards/rejected": -2.8327643871307373, "step": 2502 }, { "epoch": 0.29, "learning_rate": 2.1588520137002477e-07, "logits/chosen": -1.9570120573043823, "logits/rejected": -2.0342416763305664, "logps/chosen": -416.959228515625, "logps/rejected": -306.8493347167969, "loss": 0.2026, "rewards/accuracies": 0.875, "rewards/chosen": -0.36147573590278625, "rewards/margins": 2.408031940460205, "rewards/rejected": -2.769507646560669, "step": 2503 }, { "epoch": 0.29, "learning_rate": 2.1584976969410652e-07, "logits/chosen": -2.486175537109375, "logits/rejected": -2.26587176322937, "logps/chosen": -251.4013671875, "logps/rejected": -255.628173828125, "loss": 0.2594, "rewards/accuracies": 0.875, "rewards/chosen": -0.4538498520851135, "rewards/margins": 2.3155696392059326, "rewards/rejected": -2.7694191932678223, "step": 2504 }, { "epoch": 0.29, "learning_rate": 2.1581433801818824e-07, "logits/chosen": -2.3222920894622803, "logits/rejected": -2.5238311290740967, "logps/chosen": -318.052490234375, "logps/rejected": -163.18344116210938, "loss": 0.7829, "rewards/accuracies": 0.5, "rewards/chosen": -2.386183738708496, "rewards/margins": 0.9369373321533203, "rewards/rejected": -3.3231213092803955, "step": 2505 }, { "epoch": 0.29, "learning_rate": 2.1577890634227e-07, "logits/chosen": -2.333968162536621, "logits/rejected": -2.5672144889831543, "logps/chosen": -362.73565673828125, "logps/rejected": -255.62030029296875, "loss": 0.2109, "rewards/accuracies": 1.0, "rewards/chosen": -0.2562384605407715, "rewards/margins": 2.2481024265289307, "rewards/rejected": -2.504340887069702, "step": 2506 }, { "epoch": 0.29, "learning_rate": 2.1574347466635171e-07, "logits/chosen": -2.6349728107452393, "logits/rejected": -2.8988699913024902, "logps/chosen": -259.6604919433594, "logps/rejected": -227.67808532714844, "loss": 0.5244, "rewards/accuracies": 0.75, "rewards/chosen": -0.6094520092010498, "rewards/margins": 2.4679994583129883, "rewards/rejected": -3.077451705932617, "step": 2507 }, { "epoch": 0.29, "learning_rate": 2.1570804299043344e-07, "logits/chosen": -2.30950927734375, "logits/rejected": -2.539088726043701, "logps/chosen": -246.06527709960938, "logps/rejected": -311.15618896484375, "loss": 0.1114, "rewards/accuracies": 1.0, "rewards/chosen": -0.18298441171646118, "rewards/margins": 3.3109629154205322, "rewards/rejected": -3.4939475059509277, "step": 2508 }, { "epoch": 0.29, "learning_rate": 2.1567261131451519e-07, "logits/chosen": -2.332838535308838, "logits/rejected": -2.3300657272338867, "logps/chosen": -250.5757598876953, "logps/rejected": -286.112060546875, "loss": 0.76, "rewards/accuracies": 0.75, "rewards/chosen": -0.7139772176742554, "rewards/margins": 0.8995258212089539, "rewards/rejected": -1.613503098487854, "step": 2509 }, { "epoch": 0.29, "learning_rate": 2.156371796385969e-07, "logits/chosen": -1.726469874382019, "logits/rejected": -1.7293589115142822, "logps/chosen": -387.61981201171875, "logps/rejected": -441.3700256347656, "loss": 0.3401, "rewards/accuracies": 0.875, "rewards/chosen": -0.9473698139190674, "rewards/margins": 2.4158239364624023, "rewards/rejected": -3.3631937503814697, "step": 2510 }, { "epoch": 0.29, "learning_rate": 2.1560174796267863e-07, "logits/chosen": -2.4357662200927734, "logits/rejected": -2.3817741870880127, "logps/chosen": -292.2746276855469, "logps/rejected": -248.77415466308594, "loss": 0.5239, "rewards/accuracies": 0.875, "rewards/chosen": -0.35337433218955994, "rewards/margins": 1.4764848947525024, "rewards/rejected": -1.8298592567443848, "step": 2511 }, { "epoch": 0.29, "learning_rate": 2.1556631628676035e-07, "logits/chosen": -2.5331079959869385, "logits/rejected": -2.249842643737793, "logps/chosen": -268.59783935546875, "logps/rejected": -375.6451721191406, "loss": 0.3666, "rewards/accuracies": 0.875, "rewards/chosen": -0.42586958408355713, "rewards/margins": 1.9522696733474731, "rewards/rejected": -2.3781392574310303, "step": 2512 }, { "epoch": 0.29, "learning_rate": 2.1553088461084207e-07, "logits/chosen": -2.6065847873687744, "logits/rejected": -2.7714946269989014, "logps/chosen": -303.1990966796875, "logps/rejected": -311.88543701171875, "loss": 0.1076, "rewards/accuracies": 1.0, "rewards/chosen": -0.5122393369674683, "rewards/margins": 3.1735033988952637, "rewards/rejected": -3.6857428550720215, "step": 2513 }, { "epoch": 0.29, "learning_rate": 2.154954529349238e-07, "logits/chosen": -2.1436097621917725, "logits/rejected": -2.0793073177337646, "logps/chosen": -257.1372375488281, "logps/rejected": -320.68505859375, "loss": 0.7532, "rewards/accuracies": 0.75, "rewards/chosen": -0.5653723478317261, "rewards/margins": 0.30032941699028015, "rewards/rejected": -0.8657017946243286, "step": 2514 }, { "epoch": 0.29, "learning_rate": 2.1546002125900552e-07, "logits/chosen": -2.2719881534576416, "logits/rejected": -2.0516462326049805, "logps/chosen": -153.1759796142578, "logps/rejected": -162.70884704589844, "loss": 1.0015, "rewards/accuracies": 0.625, "rewards/chosen": -1.0997684001922607, "rewards/margins": 0.6520240902900696, "rewards/rejected": -1.7517926692962646, "step": 2515 }, { "epoch": 0.29, "learning_rate": 2.1542458958308727e-07, "logits/chosen": -2.474311590194702, "logits/rejected": -2.2612688541412354, "logps/chosen": -174.1598663330078, "logps/rejected": -204.6775665283203, "loss": 0.4369, "rewards/accuracies": 0.75, "rewards/chosen": -0.5761568546295166, "rewards/margins": 1.2398627996444702, "rewards/rejected": -1.8160197734832764, "step": 2516 }, { "epoch": 0.29, "learning_rate": 2.15389157907169e-07, "logits/chosen": -2.4809908866882324, "logits/rejected": -2.289766311645508, "logps/chosen": -191.56077575683594, "logps/rejected": -215.5283966064453, "loss": 0.3109, "rewards/accuracies": 0.875, "rewards/chosen": -0.8735411167144775, "rewards/margins": 2.218721866607666, "rewards/rejected": -3.0922629833221436, "step": 2517 }, { "epoch": 0.29, "learning_rate": 2.1535372623125074e-07, "logits/chosen": -2.823366641998291, "logits/rejected": -2.7455403804779053, "logps/chosen": -126.89311218261719, "logps/rejected": -183.48045349121094, "loss": 0.2034, "rewards/accuracies": 0.875, "rewards/chosen": -0.913655161857605, "rewards/margins": 1.9730548858642578, "rewards/rejected": -2.8867099285125732, "step": 2518 }, { "epoch": 0.29, "learning_rate": 2.1531829455533246e-07, "logits/chosen": -1.9960289001464844, "logits/rejected": -1.8561956882476807, "logps/chosen": -192.32167053222656, "logps/rejected": -235.8656005859375, "loss": 0.6836, "rewards/accuracies": 0.625, "rewards/chosen": -1.452646017074585, "rewards/margins": 1.8187305927276611, "rewards/rejected": -3.271376609802246, "step": 2519 }, { "epoch": 0.29, "learning_rate": 2.152828628794142e-07, "logits/chosen": -1.544995665550232, "logits/rejected": -1.4306167364120483, "logps/chosen": -439.45416259765625, "logps/rejected": -314.9580078125, "loss": 0.5009, "rewards/accuracies": 0.75, "rewards/chosen": -0.8839030265808105, "rewards/margins": 1.0862598419189453, "rewards/rejected": -1.9701627492904663, "step": 2520 }, { "epoch": 0.29, "learning_rate": 2.1524743120349593e-07, "logits/chosen": -2.0658252239227295, "logits/rejected": -2.366868019104004, "logps/chosen": -335.5693359375, "logps/rejected": -245.95172119140625, "loss": 0.4255, "rewards/accuracies": 0.75, "rewards/chosen": -0.9966591596603394, "rewards/margins": 1.659735083580017, "rewards/rejected": -2.6563942432403564, "step": 2521 }, { "epoch": 0.29, "learning_rate": 2.1521199952757765e-07, "logits/chosen": -2.5534114837646484, "logits/rejected": -2.722569465637207, "logps/chosen": -212.35562133789062, "logps/rejected": -186.20565795898438, "loss": 0.5153, "rewards/accuracies": 0.75, "rewards/chosen": -0.45241692662239075, "rewards/margins": 1.912498950958252, "rewards/rejected": -2.3649158477783203, "step": 2522 }, { "epoch": 0.29, "learning_rate": 2.1517656785165937e-07, "logits/chosen": -2.3214805126190186, "logits/rejected": -2.4388954639434814, "logps/chosen": -344.68121337890625, "logps/rejected": -243.51193237304688, "loss": 0.3391, "rewards/accuracies": 0.75, "rewards/chosen": -0.5915799736976624, "rewards/margins": 1.4331473112106323, "rewards/rejected": -2.0247273445129395, "step": 2523 }, { "epoch": 0.29, "learning_rate": 2.151411361757411e-07, "logits/chosen": -2.653506278991699, "logits/rejected": -2.872809886932373, "logps/chosen": -206.9505157470703, "logps/rejected": -147.49998474121094, "loss": 0.5155, "rewards/accuracies": 0.625, "rewards/chosen": -1.15928316116333, "rewards/margins": 0.840441882610321, "rewards/rejected": -1.999725103378296, "step": 2524 }, { "epoch": 0.29, "learning_rate": 2.1510570449982282e-07, "logits/chosen": -2.4350903034210205, "logits/rejected": -2.1825990676879883, "logps/chosen": -215.20053100585938, "logps/rejected": -268.059814453125, "loss": 0.2701, "rewards/accuracies": 1.0, "rewards/chosen": 0.02213093638420105, "rewards/margins": 1.729360818862915, "rewards/rejected": -1.7072298526763916, "step": 2525 }, { "epoch": 0.29, "learning_rate": 2.1507027282390454e-07, "logits/chosen": -2.6398324966430664, "logits/rejected": -2.4200289249420166, "logps/chosen": -258.4054870605469, "logps/rejected": -370.25726318359375, "loss": 0.6648, "rewards/accuracies": 0.75, "rewards/chosen": -0.8303380012512207, "rewards/margins": 5.022956848144531, "rewards/rejected": -5.85329532623291, "step": 2526 }, { "epoch": 0.29, "learning_rate": 2.150348411479863e-07, "logits/chosen": -2.467426061630249, "logits/rejected": -2.579580783843994, "logps/chosen": -403.6004943847656, "logps/rejected": -295.8081970214844, "loss": 0.0776, "rewards/accuracies": 1.0, "rewards/chosen": 0.35209962725639343, "rewards/margins": 3.722083330154419, "rewards/rejected": -3.369983434677124, "step": 2527 }, { "epoch": 0.29, "learning_rate": 2.14999409472068e-07, "logits/chosen": -2.321190118789673, "logits/rejected": -2.3155312538146973, "logps/chosen": -175.9431610107422, "logps/rejected": -226.6474609375, "loss": 0.4849, "rewards/accuracies": 0.625, "rewards/chosen": -0.9819499254226685, "rewards/margins": 2.3070194721221924, "rewards/rejected": -3.2889695167541504, "step": 2528 }, { "epoch": 0.29, "learning_rate": 2.1496397779614976e-07, "logits/chosen": -2.2119476795196533, "logits/rejected": -2.2493815422058105, "logps/chosen": -234.21539306640625, "logps/rejected": -300.6205749511719, "loss": 0.562, "rewards/accuracies": 0.75, "rewards/chosen": -0.33483225107192993, "rewards/margins": 1.8870439529418945, "rewards/rejected": -2.2218761444091797, "step": 2529 }, { "epoch": 0.29, "learning_rate": 2.1492854612023148e-07, "logits/chosen": -1.8799748420715332, "logits/rejected": -2.1158785820007324, "logps/chosen": -222.13890075683594, "logps/rejected": -223.2462158203125, "loss": 0.4094, "rewards/accuracies": 0.75, "rewards/chosen": -0.435774564743042, "rewards/margins": 2.016730546951294, "rewards/rejected": -2.452505350112915, "step": 2530 }, { "epoch": 0.29, "learning_rate": 2.148931144443132e-07, "logits/chosen": -1.658432126045227, "logits/rejected": -1.5726585388183594, "logps/chosen": -280.7524719238281, "logps/rejected": -298.454833984375, "loss": 0.7576, "rewards/accuracies": 0.625, "rewards/chosen": -0.5815993547439575, "rewards/margins": 2.131830930709839, "rewards/rejected": -2.713430166244507, "step": 2531 }, { "epoch": 0.29, "learning_rate": 2.1485768276839495e-07, "logits/chosen": -2.9714138507843018, "logits/rejected": -2.919175386428833, "logps/chosen": -178.443115234375, "logps/rejected": -133.97642517089844, "loss": 0.2917, "rewards/accuracies": 1.0, "rewards/chosen": -0.05928654968738556, "rewards/margins": 1.7179152965545654, "rewards/rejected": -1.7772016525268555, "step": 2532 }, { "epoch": 0.29, "learning_rate": 2.1482225109247668e-07, "logits/chosen": -2.0244479179382324, "logits/rejected": -2.351897954940796, "logps/chosen": -540.7979125976562, "logps/rejected": -338.91217041015625, "loss": 0.2739, "rewards/accuracies": 0.75, "rewards/chosen": -0.48705923557281494, "rewards/margins": 1.8355441093444824, "rewards/rejected": -2.322603225708008, "step": 2533 }, { "epoch": 0.29, "learning_rate": 2.147868194165584e-07, "logits/chosen": -2.5561256408691406, "logits/rejected": -2.6778125762939453, "logps/chosen": -229.99276733398438, "logps/rejected": -217.60507202148438, "loss": 0.5552, "rewards/accuracies": 0.75, "rewards/chosen": -1.0051968097686768, "rewards/margins": 1.162032127380371, "rewards/rejected": -2.167228937149048, "step": 2534 }, { "epoch": 0.29, "learning_rate": 2.1475138774064012e-07, "logits/chosen": -2.0245018005371094, "logits/rejected": -2.349809408187866, "logps/chosen": -464.1607971191406, "logps/rejected": -365.2149353027344, "loss": 0.5502, "rewards/accuracies": 0.75, "rewards/chosen": -0.44163796305656433, "rewards/margins": 1.8277925252914429, "rewards/rejected": -2.26943039894104, "step": 2535 }, { "epoch": 0.3, "learning_rate": 2.1471595606472184e-07, "logits/chosen": -1.7524765729904175, "logits/rejected": -2.0873425006866455, "logps/chosen": -285.0338439941406, "logps/rejected": -227.56915283203125, "loss": 0.1665, "rewards/accuracies": 1.0, "rewards/chosen": -0.5647150278091431, "rewards/margins": 2.530390977859497, "rewards/rejected": -3.0951058864593506, "step": 2536 }, { "epoch": 0.3, "learning_rate": 2.1468052438880356e-07, "logits/chosen": -2.296182155609131, "logits/rejected": -2.254962921142578, "logps/chosen": -180.7959747314453, "logps/rejected": -242.15760803222656, "loss": 0.2681, "rewards/accuracies": 0.875, "rewards/chosen": -0.45223239064216614, "rewards/margins": 2.9311883449554443, "rewards/rejected": -3.383420467376709, "step": 2537 }, { "epoch": 0.3, "learning_rate": 2.146450927128853e-07, "logits/chosen": -2.780153751373291, "logits/rejected": -2.7498185634613037, "logps/chosen": -212.97930908203125, "logps/rejected": -251.3601531982422, "loss": 0.4615, "rewards/accuracies": 0.625, "rewards/chosen": -0.6916762590408325, "rewards/margins": 1.893078327178955, "rewards/rejected": -2.584754705429077, "step": 2538 }, { "epoch": 0.3, "learning_rate": 2.1460966103696703e-07, "logits/chosen": -2.0870018005371094, "logits/rejected": -2.2417819499969482, "logps/chosen": -311.5880126953125, "logps/rejected": -256.0032958984375, "loss": 0.3598, "rewards/accuracies": 0.875, "rewards/chosen": -0.9865826368331909, "rewards/margins": 1.67692232131958, "rewards/rejected": -2.6635048389434814, "step": 2539 }, { "epoch": 0.3, "learning_rate": 2.1457422936104876e-07, "logits/chosen": -2.2921738624572754, "logits/rejected": -2.4124789237976074, "logps/chosen": -463.2216796875, "logps/rejected": -340.97308349609375, "loss": 0.6251, "rewards/accuracies": 0.75, "rewards/chosen": -0.7892606258392334, "rewards/margins": 0.5807223320007324, "rewards/rejected": -1.3699829578399658, "step": 2540 }, { "epoch": 0.3, "learning_rate": 2.145387976851305e-07, "logits/chosen": -2.0969619750976562, "logits/rejected": -1.9209352731704712, "logps/chosen": -240.2244110107422, "logps/rejected": -381.3532409667969, "loss": 0.2895, "rewards/accuracies": 0.875, "rewards/chosen": -0.9568729996681213, "rewards/margins": 3.6741714477539062, "rewards/rejected": -4.631044387817383, "step": 2541 }, { "epoch": 0.3, "learning_rate": 2.1450336600921223e-07, "logits/chosen": -2.419430732727051, "logits/rejected": -2.457300901412964, "logps/chosen": -368.83441162109375, "logps/rejected": -304.298095703125, "loss": 0.3284, "rewards/accuracies": 0.875, "rewards/chosen": -0.8944985866546631, "rewards/margins": 1.3708322048187256, "rewards/rejected": -2.2653307914733887, "step": 2542 }, { "epoch": 0.3, "learning_rate": 2.1446793433329398e-07, "logits/chosen": -2.091369390487671, "logits/rejected": -2.6405375003814697, "logps/chosen": -369.7547607421875, "logps/rejected": -279.75848388671875, "loss": 0.6114, "rewards/accuracies": 0.625, "rewards/chosen": -1.4008021354675293, "rewards/margins": 0.83387291431427, "rewards/rejected": -2.234675168991089, "step": 2543 }, { "epoch": 0.3, "learning_rate": 2.144325026573757e-07, "logits/chosen": -1.6898483037948608, "logits/rejected": -2.121739625930786, "logps/chosen": -256.3876037597656, "logps/rejected": -224.85952758789062, "loss": 0.1921, "rewards/accuracies": 1.0, "rewards/chosen": 0.3262261152267456, "rewards/margins": 2.2378976345062256, "rewards/rejected": -1.9116718769073486, "step": 2544 }, { "epoch": 0.3, "learning_rate": 2.1439707098145742e-07, "logits/chosen": -1.8569974899291992, "logits/rejected": -2.192429542541504, "logps/chosen": -338.24591064453125, "logps/rejected": -253.52133178710938, "loss": 1.13, "rewards/accuracies": 0.875, "rewards/chosen": -1.58491849899292, "rewards/margins": 1.2095301151275635, "rewards/rejected": -2.7944488525390625, "step": 2545 }, { "epoch": 0.3, "learning_rate": 2.1436163930553914e-07, "logits/chosen": -2.5827949047088623, "logits/rejected": -2.631491184234619, "logps/chosen": -389.8240051269531, "logps/rejected": -330.2645263671875, "loss": 0.2693, "rewards/accuracies": 1.0, "rewards/chosen": -0.7319276332855225, "rewards/margins": 1.927665114402771, "rewards/rejected": -2.659592628479004, "step": 2546 }, { "epoch": 0.3, "learning_rate": 2.1432620762962086e-07, "logits/chosen": -2.4921927452087402, "logits/rejected": -2.3757214546203613, "logps/chosen": -156.644775390625, "logps/rejected": -196.5872039794922, "loss": 0.1795, "rewards/accuracies": 1.0, "rewards/chosen": -0.22755427658557892, "rewards/margins": 2.889796018600464, "rewards/rejected": -3.1173503398895264, "step": 2547 }, { "epoch": 0.3, "learning_rate": 2.1429077595370259e-07, "logits/chosen": -2.2522928714752197, "logits/rejected": -2.046821355819702, "logps/chosen": -264.7816467285156, "logps/rejected": -368.62884521484375, "loss": 0.5165, "rewards/accuracies": 0.75, "rewards/chosen": -0.6784238815307617, "rewards/margins": 2.8429083824157715, "rewards/rejected": -3.521332263946533, "step": 2548 }, { "epoch": 0.3, "learning_rate": 2.1425534427778433e-07, "logits/chosen": -2.337648391723633, "logits/rejected": -2.6032400131225586, "logps/chosen": -299.40032958984375, "logps/rejected": -211.0352020263672, "loss": 0.4828, "rewards/accuracies": 0.875, "rewards/chosen": -0.5232434272766113, "rewards/margins": 0.9209034442901611, "rewards/rejected": -1.4441468715667725, "step": 2549 }, { "epoch": 0.3, "learning_rate": 2.1421991260186606e-07, "logits/chosen": -2.329113483428955, "logits/rejected": -2.244281530380249, "logps/chosen": -206.89669799804688, "logps/rejected": -184.121337890625, "loss": 0.5675, "rewards/accuracies": 0.625, "rewards/chosen": -0.7923282384872437, "rewards/margins": 0.7351560592651367, "rewards/rejected": -1.5274842977523804, "step": 2550 }, { "epoch": 0.3, "learning_rate": 2.1418448092594778e-07, "logits/chosen": -2.805490016937256, "logits/rejected": -2.6878297328948975, "logps/chosen": -328.20654296875, "logps/rejected": -184.1094207763672, "loss": 0.5501, "rewards/accuracies": 0.75, "rewards/chosen": -1.3256984949111938, "rewards/margins": 0.5649362802505493, "rewards/rejected": -1.8906347751617432, "step": 2551 }, { "epoch": 0.3, "learning_rate": 2.141490492500295e-07, "logits/chosen": -1.3871078491210938, "logits/rejected": -1.4252395629882812, "logps/chosen": -533.41845703125, "logps/rejected": -483.2685852050781, "loss": 0.4536, "rewards/accuracies": 0.75, "rewards/chosen": -0.8303223252296448, "rewards/margins": 1.6787357330322266, "rewards/rejected": -2.5090579986572266, "step": 2552 }, { "epoch": 0.3, "learning_rate": 2.1411361757411125e-07, "logits/chosen": -2.475013017654419, "logits/rejected": -2.5368683338165283, "logps/chosen": -255.7469024658203, "logps/rejected": -211.8311767578125, "loss": 0.3921, "rewards/accuracies": 0.875, "rewards/chosen": -1.1779918670654297, "rewards/margins": 1.5918350219726562, "rewards/rejected": -2.769826889038086, "step": 2553 }, { "epoch": 0.3, "learning_rate": 2.14078185898193e-07, "logits/chosen": -2.0530736446380615, "logits/rejected": -1.8021992444992065, "logps/chosen": -266.2743835449219, "logps/rejected": -434.33892822265625, "loss": 0.3758, "rewards/accuracies": 0.75, "rewards/chosen": -0.9634113907814026, "rewards/margins": 2.9322547912597656, "rewards/rejected": -3.8956663608551025, "step": 2554 }, { "epoch": 0.3, "learning_rate": 2.1404275422227472e-07, "logits/chosen": -2.6773080825805664, "logits/rejected": -2.6919801235198975, "logps/chosen": -330.18109130859375, "logps/rejected": -390.2763671875, "loss": 0.8071, "rewards/accuracies": 0.625, "rewards/chosen": -1.4889535903930664, "rewards/margins": 0.6570836305618286, "rewards/rejected": -2.1460371017456055, "step": 2555 }, { "epoch": 0.3, "learning_rate": 2.1400732254635644e-07, "logits/chosen": -2.375849723815918, "logits/rejected": -2.170619010925293, "logps/chosen": -193.95196533203125, "logps/rejected": -259.37646484375, "loss": 0.2699, "rewards/accuracies": 0.875, "rewards/chosen": -0.8537381291389465, "rewards/margins": 2.6344709396362305, "rewards/rejected": -3.4882090091705322, "step": 2556 }, { "epoch": 0.3, "learning_rate": 2.1397189087043816e-07, "logits/chosen": -2.4741876125335693, "logits/rejected": -2.3509905338287354, "logps/chosen": -300.0631103515625, "logps/rejected": -183.92515563964844, "loss": 0.8448, "rewards/accuracies": 0.375, "rewards/chosen": -1.315861701965332, "rewards/margins": 0.8787112832069397, "rewards/rejected": -2.194572925567627, "step": 2557 }, { "epoch": 0.3, "learning_rate": 2.139364591945199e-07, "logits/chosen": -2.6255922317504883, "logits/rejected": -2.8591160774230957, "logps/chosen": -206.8905487060547, "logps/rejected": -174.03048706054688, "loss": 0.3497, "rewards/accuracies": 0.75, "rewards/chosen": -0.6024445295333862, "rewards/margins": 2.7891364097595215, "rewards/rejected": -3.3915810585021973, "step": 2558 }, { "epoch": 0.3, "learning_rate": 2.139010275186016e-07, "logits/chosen": -2.8973851203918457, "logits/rejected": -2.902285575866699, "logps/chosen": -208.32025146484375, "logps/rejected": -223.06077575683594, "loss": 0.3292, "rewards/accuracies": 0.875, "rewards/chosen": -1.139334797859192, "rewards/margins": 1.4390631914138794, "rewards/rejected": -2.5783979892730713, "step": 2559 }, { "epoch": 0.3, "learning_rate": 2.1386559584268333e-07, "logits/chosen": -1.680161476135254, "logits/rejected": -1.5693259239196777, "logps/chosen": -493.3486022949219, "logps/rejected": -508.0200500488281, "loss": 0.7089, "rewards/accuracies": 0.5, "rewards/chosen": -1.347231388092041, "rewards/margins": 1.0592546463012695, "rewards/rejected": -2.4064860343933105, "step": 2560 }, { "epoch": 0.3, "learning_rate": 2.1383016416676508e-07, "logits/chosen": -2.554274797439575, "logits/rejected": -2.710440158843994, "logps/chosen": -290.7469482421875, "logps/rejected": -256.419677734375, "loss": 0.0604, "rewards/accuracies": 1.0, "rewards/chosen": 0.41870439052581787, "rewards/margins": 3.2795321941375732, "rewards/rejected": -2.860827922821045, "step": 2561 }, { "epoch": 0.3, "learning_rate": 2.137947324908468e-07, "logits/chosen": -2.5328361988067627, "logits/rejected": -2.5581235885620117, "logps/chosen": -307.789306640625, "logps/rejected": -327.694091796875, "loss": 0.2622, "rewards/accuracies": 0.875, "rewards/chosen": -1.216713547706604, "rewards/margins": 2.4931373596191406, "rewards/rejected": -3.7098512649536133, "step": 2562 }, { "epoch": 0.3, "learning_rate": 2.1375930081492852e-07, "logits/chosen": -1.9491183757781982, "logits/rejected": -2.1379191875457764, "logps/chosen": -283.219482421875, "logps/rejected": -313.8524169921875, "loss": 0.8224, "rewards/accuracies": 0.625, "rewards/chosen": -1.1927485466003418, "rewards/margins": 1.3765716552734375, "rewards/rejected": -2.5693204402923584, "step": 2563 }, { "epoch": 0.3, "learning_rate": 2.1372386913901027e-07, "logits/chosen": -2.441416025161743, "logits/rejected": -2.434452533721924, "logps/chosen": -174.22471618652344, "logps/rejected": -221.33143615722656, "loss": 0.3138, "rewards/accuracies": 1.0, "rewards/chosen": -1.0940719842910767, "rewards/margins": 1.596376895904541, "rewards/rejected": -2.6904489994049072, "step": 2564 }, { "epoch": 0.3, "learning_rate": 2.1368843746309202e-07, "logits/chosen": -3.0159478187561035, "logits/rejected": -3.010021686553955, "logps/chosen": -212.3440704345703, "logps/rejected": -238.00233459472656, "loss": 0.2541, "rewards/accuracies": 0.875, "rewards/chosen": -1.1313419342041016, "rewards/margins": 2.14462947845459, "rewards/rejected": -3.2759711742401123, "step": 2565 }, { "epoch": 0.3, "learning_rate": 2.1365300578717374e-07, "logits/chosen": -2.8007259368896484, "logits/rejected": -2.437293529510498, "logps/chosen": -197.3070068359375, "logps/rejected": -234.79383850097656, "loss": 0.6553, "rewards/accuracies": 0.75, "rewards/chosen": -0.9126080274581909, "rewards/margins": 2.3788390159606934, "rewards/rejected": -3.2914469242095947, "step": 2566 }, { "epoch": 0.3, "learning_rate": 2.1361757411125547e-07, "logits/chosen": -2.4267590045928955, "logits/rejected": -2.5445449352264404, "logps/chosen": -273.1348876953125, "logps/rejected": -282.7674560546875, "loss": 0.1588, "rewards/accuracies": 1.0, "rewards/chosen": -0.6609268188476562, "rewards/margins": 3.722398042678833, "rewards/rejected": -4.383325099945068, "step": 2567 }, { "epoch": 0.3, "learning_rate": 2.135821424353372e-07, "logits/chosen": -2.3227498531341553, "logits/rejected": -2.548868179321289, "logps/chosen": -305.441650390625, "logps/rejected": -246.95455932617188, "loss": 0.392, "rewards/accuracies": 0.875, "rewards/chosen": -0.39701545238494873, "rewards/margins": 1.585936427116394, "rewards/rejected": -1.9829518795013428, "step": 2568 }, { "epoch": 0.3, "learning_rate": 2.135467107594189e-07, "logits/chosen": -2.3718113899230957, "logits/rejected": -2.1741511821746826, "logps/chosen": -348.6911315917969, "logps/rejected": -266.3795471191406, "loss": 0.2698, "rewards/accuracies": 0.875, "rewards/chosen": 0.05877622216939926, "rewards/margins": 1.889040470123291, "rewards/rejected": -1.8302642107009888, "step": 2569 }, { "epoch": 0.3, "learning_rate": 2.1351127908350063e-07, "logits/chosen": -1.788395643234253, "logits/rejected": -2.0295250415802, "logps/chosen": -254.83863830566406, "logps/rejected": -252.07754516601562, "loss": 0.7222, "rewards/accuracies": 0.625, "rewards/chosen": -2.093961000442505, "rewards/margins": 1.495567798614502, "rewards/rejected": -3.589528799057007, "step": 2570 }, { "epoch": 0.3, "learning_rate": 2.1347584740758235e-07, "logits/chosen": -2.507138252258301, "logits/rejected": -2.6624112129211426, "logps/chosen": -167.6116943359375, "logps/rejected": -154.7735595703125, "loss": 0.4048, "rewards/accuracies": 0.875, "rewards/chosen": -0.8135145902633667, "rewards/margins": 0.9753490686416626, "rewards/rejected": -1.7888636589050293, "step": 2571 }, { "epoch": 0.3, "learning_rate": 2.134404157316641e-07, "logits/chosen": -2.4818649291992188, "logits/rejected": -2.5543031692504883, "logps/chosen": -192.1553192138672, "logps/rejected": -242.59072875976562, "loss": 0.3473, "rewards/accuracies": 0.75, "rewards/chosen": -0.3355586528778076, "rewards/margins": 2.013864040374756, "rewards/rejected": -2.3494229316711426, "step": 2572 }, { "epoch": 0.3, "learning_rate": 2.1340498405574582e-07, "logits/chosen": -2.338322401046753, "logits/rejected": -2.352806568145752, "logps/chosen": -233.18942260742188, "logps/rejected": -284.0077209472656, "loss": 0.1236, "rewards/accuracies": 1.0, "rewards/chosen": -0.9903063774108887, "rewards/margins": 3.2890801429748535, "rewards/rejected": -4.279386520385742, "step": 2573 }, { "epoch": 0.3, "learning_rate": 2.1336955237982755e-07, "logits/chosen": -2.3057117462158203, "logits/rejected": -2.434682846069336, "logps/chosen": -170.9749298095703, "logps/rejected": -154.5333709716797, "loss": 0.5823, "rewards/accuracies": 0.625, "rewards/chosen": -0.9230182766914368, "rewards/margins": 0.6151262521743774, "rewards/rejected": -1.5381447076797485, "step": 2574 }, { "epoch": 0.3, "learning_rate": 2.1333412070390927e-07, "logits/chosen": -2.5503361225128174, "logits/rejected": -2.627332925796509, "logps/chosen": -370.56854248046875, "logps/rejected": -287.74993896484375, "loss": 0.4156, "rewards/accuracies": 0.75, "rewards/chosen": -0.945339024066925, "rewards/margins": 1.933487892150879, "rewards/rejected": -2.8788270950317383, "step": 2575 }, { "epoch": 0.3, "learning_rate": 2.1329868902799102e-07, "logits/chosen": -1.8307360410690308, "logits/rejected": -2.1788668632507324, "logps/chosen": -420.8900146484375, "logps/rejected": -287.46881103515625, "loss": 0.3282, "rewards/accuracies": 0.875, "rewards/chosen": -0.46200910210609436, "rewards/margins": 1.8660531044006348, "rewards/rejected": -2.328062057495117, "step": 2576 }, { "epoch": 0.3, "learning_rate": 2.1326325735207277e-07, "logits/chosen": -2.641584873199463, "logits/rejected": -2.4264044761657715, "logps/chosen": -233.10919189453125, "logps/rejected": -300.5948181152344, "loss": 0.2567, "rewards/accuracies": 1.0, "rewards/chosen": -0.9543836116790771, "rewards/margins": 1.8592082262039185, "rewards/rejected": -2.813591718673706, "step": 2577 }, { "epoch": 0.3, "learning_rate": 2.132278256761545e-07, "logits/chosen": -2.2594223022460938, "logits/rejected": -1.9873428344726562, "logps/chosen": -161.418212890625, "logps/rejected": -249.96804809570312, "loss": 0.5935, "rewards/accuracies": 0.625, "rewards/chosen": -0.07782843708992004, "rewards/margins": 1.0898891687393188, "rewards/rejected": -1.1677175760269165, "step": 2578 }, { "epoch": 0.3, "learning_rate": 2.131923940002362e-07, "logits/chosen": -2.7860255241394043, "logits/rejected": -2.6397600173950195, "logps/chosen": -354.02996826171875, "logps/rejected": -356.10198974609375, "loss": 0.2757, "rewards/accuracies": 0.875, "rewards/chosen": -0.8101099729537964, "rewards/margins": 1.934465765953064, "rewards/rejected": -2.7445757389068604, "step": 2579 }, { "epoch": 0.3, "learning_rate": 2.1315696232431793e-07, "logits/chosen": -2.0987277030944824, "logits/rejected": -2.3405189514160156, "logps/chosen": -461.44110107421875, "logps/rejected": -289.266357421875, "loss": 0.2576, "rewards/accuracies": 1.0, "rewards/chosen": -0.5622744560241699, "rewards/margins": 1.505702257156372, "rewards/rejected": -2.067976713180542, "step": 2580 }, { "epoch": 0.3, "learning_rate": 2.1312153064839965e-07, "logits/chosen": -2.2263922691345215, "logits/rejected": -2.131243944168091, "logps/chosen": -363.37567138671875, "logps/rejected": -444.2206115722656, "loss": 0.5159, "rewards/accuracies": 0.75, "rewards/chosen": -1.322045087814331, "rewards/margins": 1.8033368587493896, "rewards/rejected": -3.1253819465637207, "step": 2581 }, { "epoch": 0.3, "learning_rate": 2.1308609897248138e-07, "logits/chosen": -1.8517756462097168, "logits/rejected": -2.052490472793579, "logps/chosen": -287.9496765136719, "logps/rejected": -332.45440673828125, "loss": 0.7047, "rewards/accuracies": 0.625, "rewards/chosen": -0.8589866161346436, "rewards/margins": 0.7502407431602478, "rewards/rejected": -1.609227180480957, "step": 2582 }, { "epoch": 0.3, "learning_rate": 2.1305066729656313e-07, "logits/chosen": -2.078005790710449, "logits/rejected": -2.3747832775115967, "logps/chosen": -313.2320861816406, "logps/rejected": -243.12908935546875, "loss": 0.4894, "rewards/accuracies": 0.625, "rewards/chosen": -0.7399553060531616, "rewards/margins": 0.8784913420677185, "rewards/rejected": -1.618446707725525, "step": 2583 }, { "epoch": 0.3, "learning_rate": 2.1301523562064485e-07, "logits/chosen": -1.7394474744796753, "logits/rejected": -1.9447870254516602, "logps/chosen": -236.136962890625, "logps/rejected": -213.13027954101562, "loss": 0.5938, "rewards/accuracies": 0.5, "rewards/chosen": -1.4992235898971558, "rewards/margins": 1.3519543409347534, "rewards/rejected": -2.851177930831909, "step": 2584 }, { "epoch": 0.3, "learning_rate": 2.1297980394472657e-07, "logits/chosen": -2.3467164039611816, "logits/rejected": -2.3690361976623535, "logps/chosen": -200.31387329101562, "logps/rejected": -558.9495239257812, "loss": 0.1757, "rewards/accuracies": 0.875, "rewards/chosen": -0.5098056793212891, "rewards/margins": 2.6347427368164062, "rewards/rejected": -3.1445486545562744, "step": 2585 }, { "epoch": 0.3, "learning_rate": 2.129443722688083e-07, "logits/chosen": -2.170457363128662, "logits/rejected": -2.2061541080474854, "logps/chosen": -348.7015075683594, "logps/rejected": -282.8475646972656, "loss": 0.6392, "rewards/accuracies": 0.375, "rewards/chosen": -0.8532479405403137, "rewards/margins": 1.0584313869476318, "rewards/rejected": -1.9116792678833008, "step": 2586 }, { "epoch": 0.3, "learning_rate": 2.1290894059289001e-07, "logits/chosen": -2.223428249359131, "logits/rejected": -2.410503387451172, "logps/chosen": -194.19691467285156, "logps/rejected": -187.31814575195312, "loss": 1.2347, "rewards/accuracies": 0.625, "rewards/chosen": -1.2361310720443726, "rewards/margins": 0.6413974761962891, "rewards/rejected": -1.8775286674499512, "step": 2587 }, { "epoch": 0.3, "learning_rate": 2.128735089169718e-07, "logits/chosen": -2.449988842010498, "logits/rejected": -2.3299474716186523, "logps/chosen": -365.9095153808594, "logps/rejected": -390.0362854003906, "loss": 0.5369, "rewards/accuracies": 0.625, "rewards/chosen": -1.9473435878753662, "rewards/margins": 2.1307365894317627, "rewards/rejected": -4.078080654144287, "step": 2588 }, { "epoch": 0.3, "learning_rate": 2.128380772410535e-07, "logits/chosen": -2.0173096656799316, "logits/rejected": -1.8287954330444336, "logps/chosen": -278.3974304199219, "logps/rejected": -304.689453125, "loss": 0.5405, "rewards/accuracies": 0.625, "rewards/chosen": -0.5452982187271118, "rewards/margins": 1.493009090423584, "rewards/rejected": -2.0383071899414062, "step": 2589 }, { "epoch": 0.3, "learning_rate": 2.1280264556513523e-07, "logits/chosen": -2.9732067584991455, "logits/rejected": -3.045532703399658, "logps/chosen": -260.14666748046875, "logps/rejected": -250.9874725341797, "loss": 0.3417, "rewards/accuracies": 0.875, "rewards/chosen": -0.8266110420227051, "rewards/margins": 2.058135986328125, "rewards/rejected": -2.88474702835083, "step": 2590 }, { "epoch": 0.3, "learning_rate": 2.1276721388921696e-07, "logits/chosen": -2.44769287109375, "logits/rejected": -2.013193368911743, "logps/chosen": -383.69525146484375, "logps/rejected": -332.4159851074219, "loss": 0.4063, "rewards/accuracies": 0.75, "rewards/chosen": -0.6982718110084534, "rewards/margins": 2.229570150375366, "rewards/rejected": -2.927841901779175, "step": 2591 }, { "epoch": 0.3, "learning_rate": 2.1273178221329868e-07, "logits/chosen": -2.5451931953430176, "logits/rejected": -2.4386518001556396, "logps/chosen": -311.6807861328125, "logps/rejected": -398.2003173828125, "loss": 0.2306, "rewards/accuracies": 1.0, "rewards/chosen": -1.3065221309661865, "rewards/margins": 2.2609922885894775, "rewards/rejected": -3.567514181137085, "step": 2592 }, { "epoch": 0.3, "learning_rate": 2.126963505373804e-07, "logits/chosen": -2.0170228481292725, "logits/rejected": -2.314340353012085, "logps/chosen": -659.23486328125, "logps/rejected": -409.014404296875, "loss": 0.1879, "rewards/accuracies": 1.0, "rewards/chosen": -0.17520523071289062, "rewards/margins": 2.439131736755371, "rewards/rejected": -2.6143369674682617, "step": 2593 }, { "epoch": 0.3, "learning_rate": 2.1266091886146215e-07, "logits/chosen": -2.5408551692962646, "logits/rejected": -2.7623209953308105, "logps/chosen": -419.5908203125, "logps/rejected": -262.07952880859375, "loss": 0.4076, "rewards/accuracies": 0.75, "rewards/chosen": -0.42643341422080994, "rewards/margins": 1.9627695083618164, "rewards/rejected": -2.3892030715942383, "step": 2594 }, { "epoch": 0.3, "learning_rate": 2.1262548718554387e-07, "logits/chosen": -2.130004405975342, "logits/rejected": -2.1696572303771973, "logps/chosen": -237.37547302246094, "logps/rejected": -297.6939392089844, "loss": 0.6062, "rewards/accuracies": 0.625, "rewards/chosen": -0.8029007911682129, "rewards/margins": 0.5593338012695312, "rewards/rejected": -1.3622345924377441, "step": 2595 }, { "epoch": 0.3, "learning_rate": 2.125900555096256e-07, "logits/chosen": -2.3736417293548584, "logits/rejected": -2.410658359527588, "logps/chosen": -264.06817626953125, "logps/rejected": -280.2198486328125, "loss": 0.4736, "rewards/accuracies": 0.875, "rewards/chosen": -1.343987226486206, "rewards/margins": 1.5078290700912476, "rewards/rejected": -2.851816177368164, "step": 2596 }, { "epoch": 0.3, "learning_rate": 2.1255462383370731e-07, "logits/chosen": -2.7688183784484863, "logits/rejected": -2.6200129985809326, "logps/chosen": -276.15789794921875, "logps/rejected": -315.2568359375, "loss": 0.2082, "rewards/accuracies": 0.875, "rewards/chosen": -0.957351565361023, "rewards/margins": 2.4698352813720703, "rewards/rejected": -3.427186965942383, "step": 2597 }, { "epoch": 0.3, "learning_rate": 2.1251919215778904e-07, "logits/chosen": -2.7673873901367188, "logits/rejected": -2.792937994003296, "logps/chosen": -253.6224365234375, "logps/rejected": -289.9352722167969, "loss": 0.4511, "rewards/accuracies": 0.75, "rewards/chosen": -0.6798582077026367, "rewards/margins": 0.9627512693405151, "rewards/rejected": -1.6426095962524414, "step": 2598 }, { "epoch": 0.3, "learning_rate": 2.1248376048187076e-07, "logits/chosen": -2.4799113273620605, "logits/rejected": -2.410592794418335, "logps/chosen": -215.07684326171875, "logps/rejected": -262.2229919433594, "loss": 0.3251, "rewards/accuracies": 0.75, "rewards/chosen": -0.20404258370399475, "rewards/margins": 1.8311035633087158, "rewards/rejected": -2.0351459980010986, "step": 2599 }, { "epoch": 0.3, "learning_rate": 2.1244832880595253e-07, "logits/chosen": -2.0321807861328125, "logits/rejected": -2.256634473800659, "logps/chosen": -370.8642883300781, "logps/rejected": -295.35736083984375, "loss": 0.1515, "rewards/accuracies": 1.0, "rewards/chosen": -0.708146333694458, "rewards/margins": 2.2296202182769775, "rewards/rejected": -2.9377665519714355, "step": 2600 }, { "epoch": 0.3, "learning_rate": 2.1241289713003426e-07, "logits/chosen": -1.658935308456421, "logits/rejected": -1.6585564613342285, "logps/chosen": -491.05328369140625, "logps/rejected": -448.1021728515625, "loss": 1.0592, "rewards/accuracies": 0.375, "rewards/chosen": -0.8309261798858643, "rewards/margins": -0.4665265679359436, "rewards/rejected": -0.36439961194992065, "step": 2601 }, { "epoch": 0.3, "learning_rate": 2.1237746545411598e-07, "logits/chosen": -2.0054657459259033, "logits/rejected": -1.9933688640594482, "logps/chosen": -201.70379638671875, "logps/rejected": -203.50924682617188, "loss": 0.4371, "rewards/accuracies": 0.625, "rewards/chosen": -0.14479874074459076, "rewards/margins": 1.36676025390625, "rewards/rejected": -1.511559009552002, "step": 2602 }, { "epoch": 0.3, "learning_rate": 2.123420337781977e-07, "logits/chosen": -2.478367328643799, "logits/rejected": -2.4522769451141357, "logps/chosen": -268.9043273925781, "logps/rejected": -254.5853271484375, "loss": 0.5811, "rewards/accuracies": 0.625, "rewards/chosen": -0.5560452938079834, "rewards/margins": 1.874540090560913, "rewards/rejected": -2.4305853843688965, "step": 2603 }, { "epoch": 0.3, "learning_rate": 2.1230660210227942e-07, "logits/chosen": -1.834705114364624, "logits/rejected": -2.2807557582855225, "logps/chosen": -406.5901794433594, "logps/rejected": -355.1419677734375, "loss": 0.5577, "rewards/accuracies": 0.75, "rewards/chosen": -1.0850335359573364, "rewards/margins": 1.8589515686035156, "rewards/rejected": -2.9439849853515625, "step": 2604 }, { "epoch": 0.3, "learning_rate": 2.1227117042636114e-07, "logits/chosen": -2.3453547954559326, "logits/rejected": -2.3312788009643555, "logps/chosen": -313.15283203125, "logps/rejected": -301.3948669433594, "loss": 0.4597, "rewards/accuracies": 0.875, "rewards/chosen": -0.9407507181167603, "rewards/margins": 1.9643663167953491, "rewards/rejected": -2.9051170349121094, "step": 2605 }, { "epoch": 0.3, "learning_rate": 2.122357387504429e-07, "logits/chosen": -2.81205415725708, "logits/rejected": -2.751915454864502, "logps/chosen": -242.0782470703125, "logps/rejected": -187.39796447753906, "loss": 0.307, "rewards/accuracies": 1.0, "rewards/chosen": -0.3091857433319092, "rewards/margins": 1.7634137868881226, "rewards/rejected": -2.072599411010742, "step": 2606 }, { "epoch": 0.3, "learning_rate": 2.1220030707452462e-07, "logits/chosen": -1.9987821578979492, "logits/rejected": -2.2674148082733154, "logps/chosen": -418.02392578125, "logps/rejected": -334.96258544921875, "loss": 0.5651, "rewards/accuracies": 0.625, "rewards/chosen": -1.7907872200012207, "rewards/margins": 0.9457281231880188, "rewards/rejected": -2.736515522003174, "step": 2607 }, { "epoch": 0.3, "learning_rate": 2.1216487539860634e-07, "logits/chosen": -2.056626319885254, "logits/rejected": -2.3265323638916016, "logps/chosen": -327.2454833984375, "logps/rejected": -355.4508361816406, "loss": 0.4877, "rewards/accuracies": 0.625, "rewards/chosen": -0.8456428050994873, "rewards/margins": 1.4092652797698975, "rewards/rejected": -2.2549080848693848, "step": 2608 }, { "epoch": 0.3, "learning_rate": 2.1212944372268806e-07, "logits/chosen": -2.259206533432007, "logits/rejected": -2.403496742248535, "logps/chosen": -307.18438720703125, "logps/rejected": -312.2645263671875, "loss": 0.2615, "rewards/accuracies": 0.875, "rewards/chosen": -1.0662508010864258, "rewards/margins": 2.3492517471313477, "rewards/rejected": -3.4155025482177734, "step": 2609 }, { "epoch": 0.3, "learning_rate": 2.1209401204676978e-07, "logits/chosen": -1.7066807746887207, "logits/rejected": -1.8605637550354004, "logps/chosen": -164.90879821777344, "logps/rejected": -251.44285583496094, "loss": 0.6079, "rewards/accuracies": 0.75, "rewards/chosen": -1.0301826000213623, "rewards/margins": 2.6616854667663574, "rewards/rejected": -3.691868305206299, "step": 2610 }, { "epoch": 0.3, "learning_rate": 2.1205858037085156e-07, "logits/chosen": -2.483349084854126, "logits/rejected": -2.168569326400757, "logps/chosen": -351.0950927734375, "logps/rejected": -362.4216613769531, "loss": 0.9923, "rewards/accuracies": 0.75, "rewards/chosen": -1.3393306732177734, "rewards/margins": 0.3561866283416748, "rewards/rejected": -1.6955173015594482, "step": 2611 }, { "epoch": 0.3, "learning_rate": 2.1202314869493328e-07, "logits/chosen": -2.4357848167419434, "logits/rejected": -2.5168371200561523, "logps/chosen": -315.2315673828125, "logps/rejected": -248.7649688720703, "loss": 0.1825, "rewards/accuracies": 1.0, "rewards/chosen": -0.6943668127059937, "rewards/margins": 2.862358331680298, "rewards/rejected": -3.556725025177002, "step": 2612 }, { "epoch": 0.3, "learning_rate": 2.11987717019015e-07, "logits/chosen": -2.2405080795288086, "logits/rejected": -2.593627691268921, "logps/chosen": -508.3279113769531, "logps/rejected": -391.7936706542969, "loss": 0.1328, "rewards/accuracies": 1.0, "rewards/chosen": -0.29705363512039185, "rewards/margins": 3.110368251800537, "rewards/rejected": -3.407421827316284, "step": 2613 }, { "epoch": 0.3, "learning_rate": 2.1195228534309672e-07, "logits/chosen": -2.561253786087036, "logits/rejected": -2.840947151184082, "logps/chosen": -419.335205078125, "logps/rejected": -248.33193969726562, "loss": 0.6347, "rewards/accuracies": 0.875, "rewards/chosen": -1.216674566268921, "rewards/margins": 0.8248121738433838, "rewards/rejected": -2.0414867401123047, "step": 2614 }, { "epoch": 0.3, "learning_rate": 2.1191685366717845e-07, "logits/chosen": -2.5178534984588623, "logits/rejected": -2.454564332962036, "logps/chosen": -452.730224609375, "logps/rejected": -263.9150390625, "loss": 0.4406, "rewards/accuracies": 0.625, "rewards/chosen": -0.9623321890830994, "rewards/margins": 1.2375288009643555, "rewards/rejected": -2.1998610496520996, "step": 2615 }, { "epoch": 0.3, "learning_rate": 2.1188142199126017e-07, "logits/chosen": -1.4523088932037354, "logits/rejected": -1.9552210569381714, "logps/chosen": -401.03717041015625, "logps/rejected": -356.2298278808594, "loss": 0.2712, "rewards/accuracies": 1.0, "rewards/chosen": -0.967836320400238, "rewards/margins": 1.5908634662628174, "rewards/rejected": -2.5586998462677, "step": 2616 }, { "epoch": 0.3, "learning_rate": 2.1184599031534192e-07, "logits/chosen": -2.603978395462036, "logits/rejected": -2.7571914196014404, "logps/chosen": -218.9181365966797, "logps/rejected": -296.4081115722656, "loss": 0.3591, "rewards/accuracies": 0.75, "rewards/chosen": -0.8356574177742004, "rewards/margins": 2.6117098331451416, "rewards/rejected": -3.4473671913146973, "step": 2617 }, { "epoch": 0.3, "learning_rate": 2.1181055863942364e-07, "logits/chosen": -2.5307421684265137, "logits/rejected": -2.3866899013519287, "logps/chosen": -213.90304565429688, "logps/rejected": -289.0001220703125, "loss": 0.3718, "rewards/accuracies": 0.75, "rewards/chosen": -1.394941806793213, "rewards/margins": 1.557161569595337, "rewards/rejected": -2.95210337638855, "step": 2618 }, { "epoch": 0.3, "learning_rate": 2.1177512696350536e-07, "logits/chosen": -2.2501115798950195, "logits/rejected": -2.0865461826324463, "logps/chosen": -253.7670440673828, "logps/rejected": -287.92144775390625, "loss": 0.2165, "rewards/accuracies": 1.0, "rewards/chosen": -0.3729434907436371, "rewards/margins": 2.7201356887817383, "rewards/rejected": -3.0930793285369873, "step": 2619 }, { "epoch": 0.3, "learning_rate": 2.1173969528758708e-07, "logits/chosen": -2.152005910873413, "logits/rejected": -2.4854536056518555, "logps/chosen": -419.6391906738281, "logps/rejected": -291.58441162109375, "loss": 0.4318, "rewards/accuracies": 0.875, "rewards/chosen": -0.8422344923019409, "rewards/margins": 0.9933696389198303, "rewards/rejected": -1.8356040716171265, "step": 2620 }, { "epoch": 0.3, "learning_rate": 2.117042636116688e-07, "logits/chosen": -2.2342865467071533, "logits/rejected": -1.9242534637451172, "logps/chosen": -320.97076416015625, "logps/rejected": -397.93792724609375, "loss": 0.6453, "rewards/accuracies": 0.875, "rewards/chosen": -0.5702366828918457, "rewards/margins": 1.847440242767334, "rewards/rejected": -2.4176766872406006, "step": 2621 }, { "epoch": 0.31, "learning_rate": 2.1166883193575053e-07, "logits/chosen": -2.6408190727233887, "logits/rejected": -2.531914710998535, "logps/chosen": -359.55718994140625, "logps/rejected": -267.2164306640625, "loss": 0.4981, "rewards/accuracies": 0.875, "rewards/chosen": -1.2366304397583008, "rewards/margins": 2.831247091293335, "rewards/rejected": -4.067877769470215, "step": 2622 }, { "epoch": 0.31, "learning_rate": 2.116334002598323e-07, "logits/chosen": -2.1648788452148438, "logits/rejected": -2.4661946296691895, "logps/chosen": -283.6842041015625, "logps/rejected": -239.6512908935547, "loss": 0.1733, "rewards/accuracies": 1.0, "rewards/chosen": -1.0294183492660522, "rewards/margins": 2.941720485687256, "rewards/rejected": -3.9711389541625977, "step": 2623 }, { "epoch": 0.31, "learning_rate": 2.1159796858391402e-07, "logits/chosen": -2.277092218399048, "logits/rejected": -2.169022560119629, "logps/chosen": -244.0896453857422, "logps/rejected": -260.3732604980469, "loss": 0.2086, "rewards/accuracies": 1.0, "rewards/chosen": -0.4678685963153839, "rewards/margins": 2.222628593444824, "rewards/rejected": -2.690497398376465, "step": 2624 }, { "epoch": 0.31, "learning_rate": 2.1156253690799575e-07, "logits/chosen": -2.20923113822937, "logits/rejected": -2.0786266326904297, "logps/chosen": -376.7230224609375, "logps/rejected": -361.3587341308594, "loss": 0.2817, "rewards/accuracies": 0.875, "rewards/chosen": -0.9471611976623535, "rewards/margins": 2.585561513900757, "rewards/rejected": -3.5327227115631104, "step": 2625 }, { "epoch": 0.31, "learning_rate": 2.1152710523207747e-07, "logits/chosen": -2.4404876232147217, "logits/rejected": -2.4123077392578125, "logps/chosen": -148.29144287109375, "logps/rejected": -177.520263671875, "loss": 0.2516, "rewards/accuracies": 0.875, "rewards/chosen": -0.7547823190689087, "rewards/margins": 2.3257863521575928, "rewards/rejected": -3.080568790435791, "step": 2626 }, { "epoch": 0.31, "learning_rate": 2.114916735561592e-07, "logits/chosen": -2.4032299518585205, "logits/rejected": -2.618673324584961, "logps/chosen": -286.8197021484375, "logps/rejected": -238.5795135498047, "loss": 0.2876, "rewards/accuracies": 0.875, "rewards/chosen": -0.1468677818775177, "rewards/margins": 1.5315043926239014, "rewards/rejected": -1.6783721446990967, "step": 2627 }, { "epoch": 0.31, "learning_rate": 2.1145624188024094e-07, "logits/chosen": -2.320826530456543, "logits/rejected": -2.5137197971343994, "logps/chosen": -221.326904296875, "logps/rejected": -145.04627990722656, "loss": 0.3531, "rewards/accuracies": 1.0, "rewards/chosen": -0.5586292147636414, "rewards/margins": 1.2979543209075928, "rewards/rejected": -1.856583595275879, "step": 2628 }, { "epoch": 0.31, "learning_rate": 2.1142081020432266e-07, "logits/chosen": -2.181326150894165, "logits/rejected": -1.8664754629135132, "logps/chosen": -261.5431213378906, "logps/rejected": -301.7285461425781, "loss": 0.265, "rewards/accuracies": 0.875, "rewards/chosen": 0.22348226606845856, "rewards/margins": 1.9520633220672607, "rewards/rejected": -1.7285809516906738, "step": 2629 }, { "epoch": 0.31, "learning_rate": 2.1138537852840438e-07, "logits/chosen": -2.8733761310577393, "logits/rejected": -2.8309526443481445, "logps/chosen": -167.49317932128906, "logps/rejected": -230.95228576660156, "loss": 0.2215, "rewards/accuracies": 1.0, "rewards/chosen": -0.7066681981086731, "rewards/margins": 2.5904173851013184, "rewards/rejected": -3.2970852851867676, "step": 2630 }, { "epoch": 0.31, "learning_rate": 2.113499468524861e-07, "logits/chosen": -2.6493706703186035, "logits/rejected": -2.5405726432800293, "logps/chosen": -265.2339172363281, "logps/rejected": -317.2952880859375, "loss": 0.3547, "rewards/accuracies": 0.75, "rewards/chosen": -0.37690725922584534, "rewards/margins": 1.6584751605987549, "rewards/rejected": -2.0353822708129883, "step": 2631 }, { "epoch": 0.31, "learning_rate": 2.1131451517656783e-07, "logits/chosen": -2.4469263553619385, "logits/rejected": -2.388017416000366, "logps/chosen": -284.89300537109375, "logps/rejected": -309.20916748046875, "loss": 0.2181, "rewards/accuracies": 0.875, "rewards/chosen": -0.35175448656082153, "rewards/margins": 2.2137906551361084, "rewards/rejected": -2.565545082092285, "step": 2632 }, { "epoch": 0.31, "learning_rate": 2.1127908350064955e-07, "logits/chosen": -1.7910642623901367, "logits/rejected": -1.699021577835083, "logps/chosen": -198.5264892578125, "logps/rejected": -294.5920104980469, "loss": 0.452, "rewards/accuracies": 0.75, "rewards/chosen": -0.37074053287506104, "rewards/margins": 1.4371693134307861, "rewards/rejected": -1.8079099655151367, "step": 2633 }, { "epoch": 0.31, "learning_rate": 2.1124365182473127e-07, "logits/chosen": -2.6893982887268066, "logits/rejected": -2.8123655319213867, "logps/chosen": -166.62506103515625, "logps/rejected": -201.0951690673828, "loss": 0.1793, "rewards/accuracies": 1.0, "rewards/chosen": -0.3479629158973694, "rewards/margins": 3.9292595386505127, "rewards/rejected": -4.277222156524658, "step": 2634 }, { "epoch": 0.31, "learning_rate": 2.1120822014881305e-07, "logits/chosen": -2.5012810230255127, "logits/rejected": -2.5813379287719727, "logps/chosen": -350.53759765625, "logps/rejected": -286.6799011230469, "loss": 0.1268, "rewards/accuracies": 1.0, "rewards/chosen": 0.13475248217582703, "rewards/margins": 2.1292128562927246, "rewards/rejected": -1.9944602251052856, "step": 2635 }, { "epoch": 0.31, "learning_rate": 2.1117278847289477e-07, "logits/chosen": -2.7884576320648193, "logits/rejected": -2.741788148880005, "logps/chosen": -288.32318115234375, "logps/rejected": -191.75872802734375, "loss": 0.4837, "rewards/accuracies": 0.625, "rewards/chosen": -1.4506542682647705, "rewards/margins": 1.6181352138519287, "rewards/rejected": -3.068789482116699, "step": 2636 }, { "epoch": 0.31, "learning_rate": 2.111373567969765e-07, "logits/chosen": -2.429070472717285, "logits/rejected": -2.6503121852874756, "logps/chosen": -242.72433471679688, "logps/rejected": -191.52037048339844, "loss": 0.2607, "rewards/accuracies": 0.875, "rewards/chosen": -0.1555165946483612, "rewards/margins": 2.1063499450683594, "rewards/rejected": -2.261866807937622, "step": 2637 }, { "epoch": 0.31, "learning_rate": 2.111019251210582e-07, "logits/chosen": -2.692335844039917, "logits/rejected": -2.626697540283203, "logps/chosen": -160.56411743164062, "logps/rejected": -208.75839233398438, "loss": 0.2978, "rewards/accuracies": 0.875, "rewards/chosen": -0.734602689743042, "rewards/margins": 1.7118511199951172, "rewards/rejected": -2.44645357131958, "step": 2638 }, { "epoch": 0.31, "learning_rate": 2.1106649344513996e-07, "logits/chosen": -2.485778570175171, "logits/rejected": -2.694890022277832, "logps/chosen": -298.801025390625, "logps/rejected": -253.38330078125, "loss": 0.6193, "rewards/accuracies": 0.75, "rewards/chosen": -0.5510451197624207, "rewards/margins": 0.774277925491333, "rewards/rejected": -1.3253231048583984, "step": 2639 }, { "epoch": 0.31, "learning_rate": 2.1103106176922168e-07, "logits/chosen": -3.069822311401367, "logits/rejected": -2.938406467437744, "logps/chosen": -349.7483825683594, "logps/rejected": -244.55052185058594, "loss": 0.1686, "rewards/accuracies": 1.0, "rewards/chosen": -0.9716681241989136, "rewards/margins": 2.650838613510132, "rewards/rejected": -3.622506618499756, "step": 2640 }, { "epoch": 0.31, "learning_rate": 2.109956300933034e-07, "logits/chosen": -2.6175661087036133, "logits/rejected": -2.672595500946045, "logps/chosen": -249.59304809570312, "logps/rejected": -345.5322265625, "loss": 0.4742, "rewards/accuracies": 0.875, "rewards/chosen": -0.7195810079574585, "rewards/margins": 2.117192029953003, "rewards/rejected": -2.836772918701172, "step": 2641 }, { "epoch": 0.31, "learning_rate": 2.1096019841738513e-07, "logits/chosen": -2.5615475177764893, "logits/rejected": -2.543086051940918, "logps/chosen": -142.9766082763672, "logps/rejected": -213.3986358642578, "loss": 0.1695, "rewards/accuracies": 1.0, "rewards/chosen": -0.7747042179107666, "rewards/margins": 1.8799471855163574, "rewards/rejected": -2.654651641845703, "step": 2642 }, { "epoch": 0.31, "learning_rate": 2.1092476674146685e-07, "logits/chosen": -2.264913558959961, "logits/rejected": -2.3459906578063965, "logps/chosen": -265.44952392578125, "logps/rejected": -232.56129455566406, "loss": 0.6207, "rewards/accuracies": 0.625, "rewards/chosen": -1.2776224613189697, "rewards/margins": 2.1406726837158203, "rewards/rejected": -3.418294906616211, "step": 2643 }, { "epoch": 0.31, "learning_rate": 2.1088933506554857e-07, "logits/chosen": -2.516901731491089, "logits/rejected": -2.360185146331787, "logps/chosen": -138.0449981689453, "logps/rejected": -264.96563720703125, "loss": 0.3667, "rewards/accuracies": 0.875, "rewards/chosen": -0.4896734356880188, "rewards/margins": 2.5722780227661133, "rewards/rejected": -3.0619516372680664, "step": 2644 }, { "epoch": 0.31, "learning_rate": 2.108539033896303e-07, "logits/chosen": -2.2688114643096924, "logits/rejected": -2.28415846824646, "logps/chosen": -288.4903259277344, "logps/rejected": -285.13177490234375, "loss": 0.2086, "rewards/accuracies": 1.0, "rewards/chosen": -1.3402937650680542, "rewards/margins": 2.1977367401123047, "rewards/rejected": -3.5380303859710693, "step": 2645 }, { "epoch": 0.31, "learning_rate": 2.1081847171371207e-07, "logits/chosen": -2.0191867351531982, "logits/rejected": -2.1728403568267822, "logps/chosen": -180.56874084472656, "logps/rejected": -200.29214477539062, "loss": 1.0942, "rewards/accuracies": 0.625, "rewards/chosen": -1.5283387899398804, "rewards/margins": 0.2724747657775879, "rewards/rejected": -1.8008134365081787, "step": 2646 }, { "epoch": 0.31, "learning_rate": 2.107830400377938e-07, "logits/chosen": -2.132140874862671, "logits/rejected": -2.0301506519317627, "logps/chosen": -305.08868408203125, "logps/rejected": -300.6380920410156, "loss": 0.515, "rewards/accuracies": 0.875, "rewards/chosen": -0.41881027817726135, "rewards/margins": 1.301442265510559, "rewards/rejected": -1.720252513885498, "step": 2647 }, { "epoch": 0.31, "learning_rate": 2.1074760836187551e-07, "logits/chosen": -2.510190010070801, "logits/rejected": -2.2151806354522705, "logps/chosen": -121.96829223632812, "logps/rejected": -272.3302001953125, "loss": 0.3341, "rewards/accuracies": 0.875, "rewards/chosen": -0.8673459887504578, "rewards/margins": 2.559882640838623, "rewards/rejected": -3.4272284507751465, "step": 2648 }, { "epoch": 0.31, "learning_rate": 2.1071217668595724e-07, "logits/chosen": -2.226126194000244, "logits/rejected": -2.487330913543701, "logps/chosen": -552.5927124023438, "logps/rejected": -419.29327392578125, "loss": 0.4172, "rewards/accuracies": 0.875, "rewards/chosen": -0.8817245364189148, "rewards/margins": 1.954500675201416, "rewards/rejected": -2.8362247943878174, "step": 2649 }, { "epoch": 0.31, "learning_rate": 2.1067674501003896e-07, "logits/chosen": -2.533182144165039, "logits/rejected": -2.476315975189209, "logps/chosen": -121.79317474365234, "logps/rejected": -167.78134155273438, "loss": 0.3491, "rewards/accuracies": 0.875, "rewards/chosen": -0.2469920814037323, "rewards/margins": 1.272566556930542, "rewards/rejected": -1.5195586681365967, "step": 2650 }, { "epoch": 0.31, "learning_rate": 2.106413133341207e-07, "logits/chosen": -1.6353230476379395, "logits/rejected": -2.031093120574951, "logps/chosen": -260.50286865234375, "logps/rejected": -161.6191864013672, "loss": 0.5465, "rewards/accuracies": 0.875, "rewards/chosen": -1.1875946521759033, "rewards/margins": 1.0488122701644897, "rewards/rejected": -2.2364070415496826, "step": 2651 }, { "epoch": 0.31, "learning_rate": 2.1060588165820243e-07, "logits/chosen": -2.794382095336914, "logits/rejected": -2.7597005367279053, "logps/chosen": -355.0013427734375, "logps/rejected": -300.3617858886719, "loss": 0.5312, "rewards/accuracies": 0.625, "rewards/chosen": -1.4043614864349365, "rewards/margins": 1.6262657642364502, "rewards/rejected": -3.0306272506713867, "step": 2652 }, { "epoch": 0.31, "learning_rate": 2.1057044998228415e-07, "logits/chosen": -1.9108713865280151, "logits/rejected": -1.9917467832565308, "logps/chosen": -326.210205078125, "logps/rejected": -346.9455871582031, "loss": 1.0825, "rewards/accuracies": 0.75, "rewards/chosen": -1.988628625869751, "rewards/margins": 0.284631609916687, "rewards/rejected": -2.2732601165771484, "step": 2653 }, { "epoch": 0.31, "learning_rate": 2.1053501830636587e-07, "logits/chosen": -2.1646206378936768, "logits/rejected": -2.3150386810302734, "logps/chosen": -393.072021484375, "logps/rejected": -283.27362060546875, "loss": 0.8263, "rewards/accuracies": 0.625, "rewards/chosen": -0.992642879486084, "rewards/margins": 0.6271450519561768, "rewards/rejected": -1.6197879314422607, "step": 2654 }, { "epoch": 0.31, "learning_rate": 2.104995866304476e-07, "logits/chosen": -2.418151617050171, "logits/rejected": -2.595637083053589, "logps/chosen": -255.8325958251953, "logps/rejected": -356.89703369140625, "loss": 0.2625, "rewards/accuracies": 0.875, "rewards/chosen": -0.7023605704307556, "rewards/margins": 2.6243202686309814, "rewards/rejected": -3.3266806602478027, "step": 2655 }, { "epoch": 0.31, "learning_rate": 2.1046415495452932e-07, "logits/chosen": -2.9892048835754395, "logits/rejected": -2.9474711418151855, "logps/chosen": -129.03915405273438, "logps/rejected": -187.4677734375, "loss": 0.3802, "rewards/accuracies": 0.75, "rewards/chosen": -0.7479711771011353, "rewards/margins": 2.073326826095581, "rewards/rejected": -2.821298122406006, "step": 2656 }, { "epoch": 0.31, "learning_rate": 2.1042872327861107e-07, "logits/chosen": -2.084246873855591, "logits/rejected": -2.46764874458313, "logps/chosen": -341.95489501953125, "logps/rejected": -180.57235717773438, "loss": 1.0312, "rewards/accuracies": 0.75, "rewards/chosen": -1.3535133600234985, "rewards/margins": 0.8197640180587769, "rewards/rejected": -2.1732773780822754, "step": 2657 }, { "epoch": 0.31, "learning_rate": 2.1039329160269281e-07, "logits/chosen": -2.0597596168518066, "logits/rejected": -2.4004149436950684, "logps/chosen": -312.1173095703125, "logps/rejected": -341.5392150878906, "loss": 0.374, "rewards/accuracies": 0.875, "rewards/chosen": -0.524691104888916, "rewards/margins": 1.22756028175354, "rewards/rejected": -1.7522512674331665, "step": 2658 }, { "epoch": 0.31, "learning_rate": 2.1035785992677454e-07, "logits/chosen": -2.1929712295532227, "logits/rejected": -2.287137508392334, "logps/chosen": -353.84649658203125, "logps/rejected": -291.94720458984375, "loss": 0.4838, "rewards/accuracies": 0.75, "rewards/chosen": -0.9188960194587708, "rewards/margins": 1.7596720457077026, "rewards/rejected": -2.678568124771118, "step": 2659 }, { "epoch": 0.31, "learning_rate": 2.1032242825085626e-07, "logits/chosen": -2.627694845199585, "logits/rejected": -2.267904281616211, "logps/chosen": -388.19171142578125, "logps/rejected": -336.8194580078125, "loss": 0.2208, "rewards/accuracies": 1.0, "rewards/chosen": -1.1722581386566162, "rewards/margins": 3.7775230407714844, "rewards/rejected": -4.94978141784668, "step": 2660 }, { "epoch": 0.31, "learning_rate": 2.1028699657493798e-07, "logits/chosen": -2.4802045822143555, "logits/rejected": -2.457271099090576, "logps/chosen": -237.4795379638672, "logps/rejected": -229.6590576171875, "loss": 0.3183, "rewards/accuracies": 0.875, "rewards/chosen": -0.7305752038955688, "rewards/margins": 2.229184627532959, "rewards/rejected": -2.959759473800659, "step": 2661 }, { "epoch": 0.31, "learning_rate": 2.1025156489901973e-07, "logits/chosen": -1.8240361213684082, "logits/rejected": -2.0376675128936768, "logps/chosen": -337.7503662109375, "logps/rejected": -283.7057800292969, "loss": 0.3674, "rewards/accuracies": 0.875, "rewards/chosen": -1.1554460525512695, "rewards/margins": 1.886474609375, "rewards/rejected": -3.0419209003448486, "step": 2662 }, { "epoch": 0.31, "learning_rate": 2.1021613322310145e-07, "logits/chosen": -1.9533578157424927, "logits/rejected": -2.1146039962768555, "logps/chosen": -329.7105712890625, "logps/rejected": -403.6001892089844, "loss": 0.4593, "rewards/accuracies": 0.75, "rewards/chosen": -1.4846744537353516, "rewards/margins": 2.088966131210327, "rewards/rejected": -3.5736405849456787, "step": 2663 }, { "epoch": 0.31, "learning_rate": 2.1018070154718317e-07, "logits/chosen": -2.3854057788848877, "logits/rejected": -2.1195895671844482, "logps/chosen": -131.17222595214844, "logps/rejected": -204.53121948242188, "loss": 0.4014, "rewards/accuracies": 0.875, "rewards/chosen": -0.6077719330787659, "rewards/margins": 1.0128673315048218, "rewards/rejected": -1.6206393241882324, "step": 2664 }, { "epoch": 0.31, "learning_rate": 2.101452698712649e-07, "logits/chosen": -2.4261093139648438, "logits/rejected": -2.5097408294677734, "logps/chosen": -544.8653564453125, "logps/rejected": -475.90863037109375, "loss": 0.2474, "rewards/accuracies": 0.875, "rewards/chosen": -1.0649033784866333, "rewards/margins": 3.06026029586792, "rewards/rejected": -4.125163555145264, "step": 2665 }, { "epoch": 0.31, "learning_rate": 2.1010983819534662e-07, "logits/chosen": -1.6681721210479736, "logits/rejected": -1.7541205883026123, "logps/chosen": -257.0186767578125, "logps/rejected": -267.51123046875, "loss": 0.5477, "rewards/accuracies": 0.625, "rewards/chosen": -0.934929370880127, "rewards/margins": 0.515464723110199, "rewards/rejected": -1.4503940343856812, "step": 2666 }, { "epoch": 0.31, "learning_rate": 2.1007440651942834e-07, "logits/chosen": -2.3892014026641846, "logits/rejected": -2.4401612281799316, "logps/chosen": -295.4156494140625, "logps/rejected": -244.27566528320312, "loss": 0.4327, "rewards/accuracies": 0.75, "rewards/chosen": -0.43629157543182373, "rewards/margins": 1.6572957038879395, "rewards/rejected": -2.0935871601104736, "step": 2667 }, { "epoch": 0.31, "learning_rate": 2.100389748435101e-07, "logits/chosen": -1.9924544095993042, "logits/rejected": -2.3507463932037354, "logps/chosen": -267.5882873535156, "logps/rejected": -141.91348266601562, "loss": 0.6673, "rewards/accuracies": 0.625, "rewards/chosen": -0.9390842914581299, "rewards/margins": 0.2679375112056732, "rewards/rejected": -1.207021713256836, "step": 2668 }, { "epoch": 0.31, "learning_rate": 2.100035431675918e-07, "logits/chosen": -1.8421306610107422, "logits/rejected": -2.098737955093384, "logps/chosen": -352.4469909667969, "logps/rejected": -266.88385009765625, "loss": 0.8283, "rewards/accuracies": 0.625, "rewards/chosen": -1.178695559501648, "rewards/margins": 0.7573772668838501, "rewards/rejected": -1.9360729455947876, "step": 2669 }, { "epoch": 0.31, "learning_rate": 2.0996811149167356e-07, "logits/chosen": -2.316805124282837, "logits/rejected": -2.484732151031494, "logps/chosen": -119.25689697265625, "logps/rejected": -137.76181030273438, "loss": 0.5212, "rewards/accuracies": 0.625, "rewards/chosen": -1.074318766593933, "rewards/margins": 1.1764211654663086, "rewards/rejected": -2.2507400512695312, "step": 2670 }, { "epoch": 0.31, "learning_rate": 2.0993267981575528e-07, "logits/chosen": -2.205911636352539, "logits/rejected": -2.101837158203125, "logps/chosen": -257.53692626953125, "logps/rejected": -252.5493621826172, "loss": 0.3506, "rewards/accuracies": 1.0, "rewards/chosen": -0.9378831386566162, "rewards/margins": 1.1028310060501099, "rewards/rejected": -2.0407142639160156, "step": 2671 }, { "epoch": 0.31, "learning_rate": 2.09897248139837e-07, "logits/chosen": -2.536623954772949, "logits/rejected": -2.7729992866516113, "logps/chosen": -268.724853515625, "logps/rejected": -196.9244384765625, "loss": 0.5163, "rewards/accuracies": 0.625, "rewards/chosen": -0.916095495223999, "rewards/margins": 1.095945119857788, "rewards/rejected": -2.012040615081787, "step": 2672 }, { "epoch": 0.31, "learning_rate": 2.0986181646391875e-07, "logits/chosen": -2.053227663040161, "logits/rejected": -2.5156712532043457, "logps/chosen": -478.115966796875, "logps/rejected": -146.27442932128906, "loss": 0.3886, "rewards/accuracies": 0.875, "rewards/chosen": -0.06146063655614853, "rewards/margins": 1.7046363353729248, "rewards/rejected": -1.7660969495773315, "step": 2673 }, { "epoch": 0.31, "learning_rate": 2.0982638478800047e-07, "logits/chosen": -2.0902929306030273, "logits/rejected": -2.1604156494140625, "logps/chosen": -273.1072082519531, "logps/rejected": -357.2471618652344, "loss": 0.1423, "rewards/accuracies": 0.875, "rewards/chosen": -0.8819839954376221, "rewards/margins": 3.795983076095581, "rewards/rejected": -4.677967071533203, "step": 2674 }, { "epoch": 0.31, "learning_rate": 2.097909531120822e-07, "logits/chosen": -2.2265102863311768, "logits/rejected": -2.407501697540283, "logps/chosen": -422.2948913574219, "logps/rejected": -307.6490478515625, "loss": 0.7164, "rewards/accuracies": 0.625, "rewards/chosen": -1.0199847221374512, "rewards/margins": 0.3709573447704315, "rewards/rejected": -1.390942096710205, "step": 2675 }, { "epoch": 0.31, "learning_rate": 2.0975552143616392e-07, "logits/chosen": -2.083221912384033, "logits/rejected": -2.174692392349243, "logps/chosen": -330.82879638671875, "logps/rejected": -361.12249755859375, "loss": 0.6136, "rewards/accuracies": 0.875, "rewards/chosen": -0.5711886882781982, "rewards/margins": 2.419903516769409, "rewards/rejected": -2.9910922050476074, "step": 2676 }, { "epoch": 0.31, "learning_rate": 2.0972008976024564e-07, "logits/chosen": -2.1168875694274902, "logits/rejected": -2.265673875808716, "logps/chosen": -430.12750244140625, "logps/rejected": -279.1639404296875, "loss": 0.3879, "rewards/accuracies": 0.75, "rewards/chosen": -0.16879981756210327, "rewards/margins": 2.085088014602661, "rewards/rejected": -2.253887891769409, "step": 2677 }, { "epoch": 0.31, "learning_rate": 2.0968465808432736e-07, "logits/chosen": -1.954486608505249, "logits/rejected": -2.3209309577941895, "logps/chosen": -299.5252380371094, "logps/rejected": -189.27455139160156, "loss": 0.4292, "rewards/accuracies": 0.75, "rewards/chosen": -1.3502020835876465, "rewards/margins": 1.8267571926116943, "rewards/rejected": -3.176959276199341, "step": 2678 }, { "epoch": 0.31, "learning_rate": 2.0964922640840908e-07, "logits/chosen": -1.9220571517944336, "logits/rejected": -1.844224214553833, "logps/chosen": -197.9112091064453, "logps/rejected": -230.99183654785156, "loss": 0.3335, "rewards/accuracies": 0.75, "rewards/chosen": -0.12954463064670563, "rewards/margins": 1.3945575952529907, "rewards/rejected": -1.5241022109985352, "step": 2679 }, { "epoch": 0.31, "learning_rate": 2.0961379473249083e-07, "logits/chosen": -2.385979175567627, "logits/rejected": -2.2437386512756348, "logps/chosen": -131.93894958496094, "logps/rejected": -265.19842529296875, "loss": 0.896, "rewards/accuracies": 0.625, "rewards/chosen": -1.920721411705017, "rewards/margins": 3.388429880142212, "rewards/rejected": -5.3091511726379395, "step": 2680 }, { "epoch": 0.31, "learning_rate": 2.0957836305657258e-07, "logits/chosen": -1.9039777517318726, "logits/rejected": -2.2027769088745117, "logps/chosen": -328.58544921875, "logps/rejected": -245.66094970703125, "loss": 0.4788, "rewards/accuracies": 0.625, "rewards/chosen": -1.1873950958251953, "rewards/margins": 1.8677208423614502, "rewards/rejected": -3.0551156997680664, "step": 2681 }, { "epoch": 0.31, "learning_rate": 2.095429313806543e-07, "logits/chosen": -2.410505771636963, "logits/rejected": -2.3592982292175293, "logps/chosen": -177.11376953125, "logps/rejected": -260.02154541015625, "loss": 0.3227, "rewards/accuracies": 0.875, "rewards/chosen": -0.8805637955665588, "rewards/margins": 1.5039234161376953, "rewards/rejected": -2.3844871520996094, "step": 2682 }, { "epoch": 0.31, "learning_rate": 2.0950749970473603e-07, "logits/chosen": -2.1531481742858887, "logits/rejected": -2.119595527648926, "logps/chosen": -197.77114868164062, "logps/rejected": -195.6953582763672, "loss": 0.4999, "rewards/accuracies": 0.625, "rewards/chosen": -0.339576780796051, "rewards/margins": 1.182603359222412, "rewards/rejected": -1.5221800804138184, "step": 2683 }, { "epoch": 0.31, "learning_rate": 2.0947206802881775e-07, "logits/chosen": -2.149996757507324, "logits/rejected": -2.546684503555298, "logps/chosen": -553.7098388671875, "logps/rejected": -258.75604248046875, "loss": 0.345, "rewards/accuracies": 0.875, "rewards/chosen": -0.43140238523483276, "rewards/margins": 1.6851290464401245, "rewards/rejected": -2.1165313720703125, "step": 2684 }, { "epoch": 0.31, "learning_rate": 2.094366363528995e-07, "logits/chosen": -2.21504545211792, "logits/rejected": -2.173283815383911, "logps/chosen": -350.2112731933594, "logps/rejected": -243.73985290527344, "loss": 0.4231, "rewards/accuracies": 0.875, "rewards/chosen": -2.1309080123901367, "rewards/margins": 1.1104423999786377, "rewards/rejected": -3.2413504123687744, "step": 2685 }, { "epoch": 0.31, "learning_rate": 2.0940120467698122e-07, "logits/chosen": -2.6654253005981445, "logits/rejected": -2.7541329860687256, "logps/chosen": -250.3563232421875, "logps/rejected": -333.6627197265625, "loss": 0.2484, "rewards/accuracies": 1.0, "rewards/chosen": -0.4679511487483978, "rewards/margins": 2.7770838737487793, "rewards/rejected": -3.24503493309021, "step": 2686 }, { "epoch": 0.31, "learning_rate": 2.0936577300106294e-07, "logits/chosen": -2.023287773132324, "logits/rejected": -2.2012529373168945, "logps/chosen": -243.79791259765625, "logps/rejected": -205.6609344482422, "loss": 0.4791, "rewards/accuracies": 0.875, "rewards/chosen": -1.0264650583267212, "rewards/margins": 1.7309491634368896, "rewards/rejected": -2.7574143409729004, "step": 2687 }, { "epoch": 0.31, "learning_rate": 2.0933034132514466e-07, "logits/chosen": -2.376368999481201, "logits/rejected": -2.343724250793457, "logps/chosen": -245.82496643066406, "logps/rejected": -144.9889373779297, "loss": 0.4882, "rewards/accuracies": 0.75, "rewards/chosen": -0.03749767690896988, "rewards/margins": 1.1064523458480835, "rewards/rejected": -1.1439499855041504, "step": 2688 }, { "epoch": 0.31, "learning_rate": 2.0929490964922639e-07, "logits/chosen": -2.7336738109588623, "logits/rejected": -2.6613545417785645, "logps/chosen": -164.3998565673828, "logps/rejected": -251.1111602783203, "loss": 0.5477, "rewards/accuracies": 0.75, "rewards/chosen": -0.567257285118103, "rewards/margins": 1.6258279085159302, "rewards/rejected": -2.193085193634033, "step": 2689 }, { "epoch": 0.31, "learning_rate": 2.092594779733081e-07, "logits/chosen": -2.2934212684631348, "logits/rejected": -2.358381509780884, "logps/chosen": -247.85089111328125, "logps/rejected": -300.5628356933594, "loss": 0.4263, "rewards/accuracies": 0.75, "rewards/chosen": -1.1860499382019043, "rewards/margins": 2.0033295154571533, "rewards/rejected": -3.1893796920776367, "step": 2690 }, { "epoch": 0.31, "learning_rate": 2.0922404629738986e-07, "logits/chosen": -2.809638023376465, "logits/rejected": -2.571241855621338, "logps/chosen": -334.3759460449219, "logps/rejected": -274.49468994140625, "loss": 0.8282, "rewards/accuracies": 0.5, "rewards/chosen": -1.1718063354492188, "rewards/margins": 1.063801646232605, "rewards/rejected": -2.235607862472534, "step": 2691 }, { "epoch": 0.31, "learning_rate": 2.0918861462147158e-07, "logits/chosen": -2.001131057739258, "logits/rejected": -1.92683744430542, "logps/chosen": -192.08355712890625, "logps/rejected": -182.40234375, "loss": 0.2652, "rewards/accuracies": 0.75, "rewards/chosen": -0.2393803745508194, "rewards/margins": 2.53415584564209, "rewards/rejected": -2.773536443710327, "step": 2692 }, { "epoch": 0.31, "learning_rate": 2.0915318294555333e-07, "logits/chosen": -2.032057523727417, "logits/rejected": -2.245615005493164, "logps/chosen": -290.7298583984375, "logps/rejected": -283.6589050292969, "loss": 0.4533, "rewards/accuracies": 0.75, "rewards/chosen": -1.117310643196106, "rewards/margins": 1.4772664308547974, "rewards/rejected": -2.5945773124694824, "step": 2693 }, { "epoch": 0.31, "learning_rate": 2.0911775126963505e-07, "logits/chosen": -2.108515977859497, "logits/rejected": -2.051095962524414, "logps/chosen": -155.84579467773438, "logps/rejected": -194.0198211669922, "loss": 0.6813, "rewards/accuracies": 0.75, "rewards/chosen": -0.2425987422466278, "rewards/margins": 0.7405811548233032, "rewards/rejected": -0.9831799268722534, "step": 2694 }, { "epoch": 0.31, "learning_rate": 2.0908231959371677e-07, "logits/chosen": -1.584932804107666, "logits/rejected": -1.5097304582595825, "logps/chosen": -445.1934814453125, "logps/rejected": -479.97479248046875, "loss": 0.7303, "rewards/accuracies": 0.75, "rewards/chosen": -1.3746416568756104, "rewards/margins": 0.7369695901870728, "rewards/rejected": -2.1116113662719727, "step": 2695 }, { "epoch": 0.31, "learning_rate": 2.0904688791779852e-07, "logits/chosen": -2.2612454891204834, "logits/rejected": -2.4997611045837402, "logps/chosen": -299.16070556640625, "logps/rejected": -266.1734313964844, "loss": 0.1038, "rewards/accuracies": 1.0, "rewards/chosen": 0.023641347885131836, "rewards/margins": 3.937661647796631, "rewards/rejected": -3.914020538330078, "step": 2696 }, { "epoch": 0.31, "learning_rate": 2.0901145624188024e-07, "logits/chosen": -2.5422918796539307, "logits/rejected": -2.656534194946289, "logps/chosen": -257.2203674316406, "logps/rejected": -330.8985595703125, "loss": 0.2619, "rewards/accuracies": 0.875, "rewards/chosen": -0.693057119846344, "rewards/margins": 2.447880983352661, "rewards/rejected": -3.1409378051757812, "step": 2697 }, { "epoch": 0.31, "learning_rate": 2.0897602456596196e-07, "logits/chosen": -2.4870247840881348, "logits/rejected": -2.641413688659668, "logps/chosen": -310.0843200683594, "logps/rejected": -296.08013916015625, "loss": 0.228, "rewards/accuracies": 0.875, "rewards/chosen": -0.7178309559822083, "rewards/margins": 2.392054557800293, "rewards/rejected": -3.1098852157592773, "step": 2698 }, { "epoch": 0.31, "learning_rate": 2.0894059289004369e-07, "logits/chosen": -2.3552918434143066, "logits/rejected": -2.371882677078247, "logps/chosen": -306.52587890625, "logps/rejected": -253.155029296875, "loss": 0.3252, "rewards/accuracies": 1.0, "rewards/chosen": -1.046555757522583, "rewards/margins": 1.6710288524627686, "rewards/rejected": -2.7175846099853516, "step": 2699 }, { "epoch": 0.31, "learning_rate": 2.089051612141254e-07, "logits/chosen": -2.275146722793579, "logits/rejected": -2.556574821472168, "logps/chosen": -197.9202880859375, "logps/rejected": -232.50062561035156, "loss": 0.4161, "rewards/accuracies": 0.625, "rewards/chosen": -0.47690507769584656, "rewards/margins": 1.6315109729766846, "rewards/rejected": -2.1084160804748535, "step": 2700 }, { "epoch": 0.31, "learning_rate": 2.0886972953820713e-07, "logits/chosen": -2.44958758354187, "logits/rejected": -2.1986613273620605, "logps/chosen": -172.91000366210938, "logps/rejected": -170.13551330566406, "loss": 0.3125, "rewards/accuracies": 0.75, "rewards/chosen": -0.7574644684791565, "rewards/margins": 1.9040602445602417, "rewards/rejected": -2.661524534225464, "step": 2701 }, { "epoch": 0.31, "learning_rate": 2.0883429786228888e-07, "logits/chosen": -2.2510390281677246, "logits/rejected": -2.4591660499572754, "logps/chosen": -324.08685302734375, "logps/rejected": -215.64913940429688, "loss": 1.0262, "rewards/accuracies": 0.75, "rewards/chosen": -4.139427661895752, "rewards/margins": 0.7493161559104919, "rewards/rejected": -4.8887434005737305, "step": 2702 }, { "epoch": 0.31, "learning_rate": 2.087988661863706e-07, "logits/chosen": -2.3729872703552246, "logits/rejected": -2.431762933731079, "logps/chosen": -332.2828674316406, "logps/rejected": -244.47764587402344, "loss": 1.055, "rewards/accuracies": 0.625, "rewards/chosen": -1.0413732528686523, "rewards/margins": 0.15132497251033783, "rewards/rejected": -1.1926981210708618, "step": 2703 }, { "epoch": 0.31, "learning_rate": 2.0876343451045232e-07, "logits/chosen": -2.578282356262207, "logits/rejected": -2.8098061084747314, "logps/chosen": -289.2070617675781, "logps/rejected": -318.4175109863281, "loss": 0.3243, "rewards/accuracies": 0.875, "rewards/chosen": -0.3002718985080719, "rewards/margins": 2.323092460632324, "rewards/rejected": -2.623364210128784, "step": 2704 }, { "epoch": 0.31, "learning_rate": 2.0872800283453407e-07, "logits/chosen": -2.8158648014068604, "logits/rejected": -2.7035233974456787, "logps/chosen": -261.217529296875, "logps/rejected": -415.8542785644531, "loss": 0.2243, "rewards/accuracies": 0.875, "rewards/chosen": -1.2730704545974731, "rewards/margins": 2.550398349761963, "rewards/rejected": -3.8234689235687256, "step": 2705 }, { "epoch": 0.31, "learning_rate": 2.086925711586158e-07, "logits/chosen": -2.313232421875, "logits/rejected": -2.3152084350585938, "logps/chosen": -245.49098205566406, "logps/rejected": -287.53326416015625, "loss": 0.2303, "rewards/accuracies": 0.875, "rewards/chosen": -0.11041027307510376, "rewards/margins": 3.080228090286255, "rewards/rejected": -3.1906380653381348, "step": 2706 }, { "epoch": 0.31, "learning_rate": 2.0865713948269754e-07, "logits/chosen": -2.3331079483032227, "logits/rejected": -1.9809876680374146, "logps/chosen": -251.12557983398438, "logps/rejected": -396.9845886230469, "loss": 0.3684, "rewards/accuracies": 0.75, "rewards/chosen": -1.1850337982177734, "rewards/margins": 1.868536353111267, "rewards/rejected": -3.053570032119751, "step": 2707 }, { "epoch": 0.32, "learning_rate": 2.0862170780677926e-07, "logits/chosen": -2.043034315109253, "logits/rejected": -2.257098436355591, "logps/chosen": -399.12982177734375, "logps/rejected": -302.3027038574219, "loss": 0.2456, "rewards/accuracies": 1.0, "rewards/chosen": -1.435766577720642, "rewards/margins": 1.4610176086425781, "rewards/rejected": -2.8967843055725098, "step": 2708 }, { "epoch": 0.32, "learning_rate": 2.0858627613086099e-07, "logits/chosen": -2.543705463409424, "logits/rejected": -2.572354793548584, "logps/chosen": -318.005859375, "logps/rejected": -205.0018310546875, "loss": 0.485, "rewards/accuracies": 0.625, "rewards/chosen": -1.0689947605133057, "rewards/margins": 1.9310626983642578, "rewards/rejected": -3.0000574588775635, "step": 2709 }, { "epoch": 0.32, "learning_rate": 2.085508444549427e-07, "logits/chosen": -2.081629753112793, "logits/rejected": -2.4560599327087402, "logps/chosen": -302.975341796875, "logps/rejected": -231.81283569335938, "loss": 0.617, "rewards/accuracies": 0.625, "rewards/chosen": -0.6423460245132446, "rewards/margins": 1.2739678621292114, "rewards/rejected": -1.9163137674331665, "step": 2710 }, { "epoch": 0.32, "learning_rate": 2.0851541277902443e-07, "logits/chosen": -2.493652105331421, "logits/rejected": -2.3145036697387695, "logps/chosen": -203.880126953125, "logps/rejected": -186.07920837402344, "loss": 0.3767, "rewards/accuracies": 0.875, "rewards/chosen": -0.28596559166908264, "rewards/margins": 1.7772178649902344, "rewards/rejected": -2.063183546066284, "step": 2711 }, { "epoch": 0.32, "learning_rate": 2.0847998110310615e-07, "logits/chosen": -2.4053444862365723, "logits/rejected": -2.243661403656006, "logps/chosen": -244.26467895507812, "logps/rejected": -341.4519348144531, "loss": 0.3238, "rewards/accuracies": 0.875, "rewards/chosen": -0.45404720306396484, "rewards/margins": 2.7348456382751465, "rewards/rejected": -3.1888930797576904, "step": 2712 }, { "epoch": 0.32, "learning_rate": 2.0844454942718787e-07, "logits/chosen": -2.0521459579467773, "logits/rejected": -1.850496768951416, "logps/chosen": -461.64825439453125, "logps/rejected": -508.4150695800781, "loss": 0.5861, "rewards/accuracies": 0.625, "rewards/chosen": -0.4270686209201813, "rewards/margins": 1.003695011138916, "rewards/rejected": -1.4307637214660645, "step": 2713 }, { "epoch": 0.32, "learning_rate": 2.0840911775126962e-07, "logits/chosen": -2.57513427734375, "logits/rejected": -2.2692623138427734, "logps/chosen": -336.09906005859375, "logps/rejected": -319.16558837890625, "loss": 0.4216, "rewards/accuracies": 0.75, "rewards/chosen": -0.892180323600769, "rewards/margins": 2.566850185394287, "rewards/rejected": -3.4590303897857666, "step": 2714 }, { "epoch": 0.32, "learning_rate": 2.0837368607535135e-07, "logits/chosen": -2.70721435546875, "logits/rejected": -2.7745985984802246, "logps/chosen": -172.1348114013672, "logps/rejected": -229.09019470214844, "loss": 0.3329, "rewards/accuracies": 0.875, "rewards/chosen": -0.4388311803340912, "rewards/margins": 2.2791738510131836, "rewards/rejected": -2.7180051803588867, "step": 2715 }, { "epoch": 0.32, "learning_rate": 2.083382543994331e-07, "logits/chosen": -2.6381447315216064, "logits/rejected": -2.6967344284057617, "logps/chosen": -243.62779235839844, "logps/rejected": -175.01564025878906, "loss": 0.503, "rewards/accuracies": 0.875, "rewards/chosen": -0.009652681648731232, "rewards/margins": 1.187166690826416, "rewards/rejected": -1.1968194246292114, "step": 2716 }, { "epoch": 0.32, "learning_rate": 2.0830282272351482e-07, "logits/chosen": -2.1260969638824463, "logits/rejected": -2.3022420406341553, "logps/chosen": -318.4859619140625, "logps/rejected": -182.4466094970703, "loss": 0.3821, "rewards/accuracies": 0.875, "rewards/chosen": -0.3226085305213928, "rewards/margins": 1.228325605392456, "rewards/rejected": -1.5509341955184937, "step": 2717 }, { "epoch": 0.32, "learning_rate": 2.0826739104759656e-07, "logits/chosen": -2.1855757236480713, "logits/rejected": -2.2827584743499756, "logps/chosen": -332.7135314941406, "logps/rejected": -479.521728515625, "loss": 0.1845, "rewards/accuracies": 1.0, "rewards/chosen": -1.54310941696167, "rewards/margins": 3.481614112854004, "rewards/rejected": -5.024723052978516, "step": 2718 }, { "epoch": 0.32, "learning_rate": 2.082319593716783e-07, "logits/chosen": -2.354111671447754, "logits/rejected": -2.3428194522857666, "logps/chosen": -177.3536834716797, "logps/rejected": -270.4571228027344, "loss": 0.1437, "rewards/accuracies": 0.875, "rewards/chosen": -1.0465937852859497, "rewards/margins": 4.1524434089660645, "rewards/rejected": -5.199037075042725, "step": 2719 }, { "epoch": 0.32, "learning_rate": 2.0819652769576e-07, "logits/chosen": -1.8852524757385254, "logits/rejected": -1.9846627712249756, "logps/chosen": -243.71365356445312, "logps/rejected": -282.68231201171875, "loss": 0.5994, "rewards/accuracies": 0.75, "rewards/chosen": -0.5771859884262085, "rewards/margins": 1.673588752746582, "rewards/rejected": -2.25077486038208, "step": 2720 }, { "epoch": 0.32, "learning_rate": 2.0816109601984173e-07, "logits/chosen": -2.1657636165618896, "logits/rejected": -2.3458809852600098, "logps/chosen": -267.2022705078125, "logps/rejected": -286.8110046386719, "loss": 0.394, "rewards/accuracies": 0.875, "rewards/chosen": -0.4018622636795044, "rewards/margins": 1.750438928604126, "rewards/rejected": -2.152301073074341, "step": 2721 }, { "epoch": 0.32, "learning_rate": 2.0812566434392345e-07, "logits/chosen": -1.8264944553375244, "logits/rejected": -1.5642446279525757, "logps/chosen": -263.8738098144531, "logps/rejected": -304.9481201171875, "loss": 0.4248, "rewards/accuracies": 0.75, "rewards/chosen": -0.3707170784473419, "rewards/margins": 1.8187414407730103, "rewards/rejected": -2.1894583702087402, "step": 2722 }, { "epoch": 0.32, "learning_rate": 2.0809023266800518e-07, "logits/chosen": -2.591073513031006, "logits/rejected": -2.356257438659668, "logps/chosen": -153.2386016845703, "logps/rejected": -352.4542236328125, "loss": 0.4566, "rewards/accuracies": 0.875, "rewards/chosen": -0.449241042137146, "rewards/margins": 3.144718647003174, "rewards/rejected": -3.5939598083496094, "step": 2723 }, { "epoch": 0.32, "learning_rate": 2.080548009920869e-07, "logits/chosen": -2.091245174407959, "logits/rejected": -1.8745932579040527, "logps/chosen": -257.42578125, "logps/rejected": -289.25762939453125, "loss": 0.3799, "rewards/accuracies": 0.875, "rewards/chosen": -1.4190809726715088, "rewards/margins": 1.846193790435791, "rewards/rejected": -3.265275001525879, "step": 2724 }, { "epoch": 0.32, "learning_rate": 2.0801936931616865e-07, "logits/chosen": -2.501713275909424, "logits/rejected": -2.440138101577759, "logps/chosen": -119.82244873046875, "logps/rejected": -169.54965209960938, "loss": 0.3586, "rewards/accuracies": 0.75, "rewards/chosen": -0.7004234194755554, "rewards/margins": 2.619542121887207, "rewards/rejected": -3.3199656009674072, "step": 2725 }, { "epoch": 0.32, "learning_rate": 2.0798393764025037e-07, "logits/chosen": -2.308194160461426, "logits/rejected": -2.2186479568481445, "logps/chosen": -251.18609619140625, "logps/rejected": -376.8612365722656, "loss": 0.1007, "rewards/accuracies": 1.0, "rewards/chosen": -0.36350494623184204, "rewards/margins": 3.757767677307129, "rewards/rejected": -4.121273040771484, "step": 2726 }, { "epoch": 0.32, "learning_rate": 2.079485059643321e-07, "logits/chosen": -2.215871810913086, "logits/rejected": -2.5223546028137207, "logps/chosen": -319.2067565917969, "logps/rejected": -202.1268310546875, "loss": 0.7024, "rewards/accuracies": 0.625, "rewards/chosen": -1.575373649597168, "rewards/margins": 0.24656054377555847, "rewards/rejected": -1.8219343423843384, "step": 2727 }, { "epoch": 0.32, "learning_rate": 2.0791307428841384e-07, "logits/chosen": -2.6945254802703857, "logits/rejected": -2.711095094680786, "logps/chosen": -224.7584991455078, "logps/rejected": -178.49807739257812, "loss": 0.2477, "rewards/accuracies": 1.0, "rewards/chosen": -0.5578495860099792, "rewards/margins": 1.756978988647461, "rewards/rejected": -2.314828634262085, "step": 2728 }, { "epoch": 0.32, "learning_rate": 2.0787764261249556e-07, "logits/chosen": -1.7340097427368164, "logits/rejected": -1.6804218292236328, "logps/chosen": -231.2611083984375, "logps/rejected": -205.69174194335938, "loss": 0.3476, "rewards/accuracies": 0.875, "rewards/chosen": -0.8761190176010132, "rewards/margins": 1.3855739831924438, "rewards/rejected": -2.261693000793457, "step": 2729 }, { "epoch": 0.32, "learning_rate": 2.078422109365773e-07, "logits/chosen": -2.211613655090332, "logits/rejected": -2.573498249053955, "logps/chosen": -315.66827392578125, "logps/rejected": -300.2453308105469, "loss": 0.4093, "rewards/accuracies": 0.75, "rewards/chosen": -0.6856505870819092, "rewards/margins": 1.6242314577102661, "rewards/rejected": -2.309882164001465, "step": 2730 }, { "epoch": 0.32, "learning_rate": 2.0780677926065903e-07, "logits/chosen": -2.530533790588379, "logits/rejected": -2.6519880294799805, "logps/chosen": -455.1575927734375, "logps/rejected": -481.04278564453125, "loss": 0.3253, "rewards/accuracies": 0.875, "rewards/chosen": -1.1108334064483643, "rewards/margins": 2.2547214031219482, "rewards/rejected": -3.3655548095703125, "step": 2731 }, { "epoch": 0.32, "learning_rate": 2.0777134758474075e-07, "logits/chosen": -2.007115364074707, "logits/rejected": -1.9796065092086792, "logps/chosen": -339.02227783203125, "logps/rejected": -325.2618103027344, "loss": 0.199, "rewards/accuracies": 1.0, "rewards/chosen": -0.5551353693008423, "rewards/margins": 2.371004819869995, "rewards/rejected": -2.926140069961548, "step": 2732 }, { "epoch": 0.32, "learning_rate": 2.0773591590882248e-07, "logits/chosen": -2.073641300201416, "logits/rejected": -2.2942140102386475, "logps/chosen": -322.10284423828125, "logps/rejected": -201.0215606689453, "loss": 0.8298, "rewards/accuracies": 0.75, "rewards/chosen": -0.557479202747345, "rewards/margins": 0.8951711058616638, "rewards/rejected": -1.4526503086090088, "step": 2733 }, { "epoch": 0.32, "learning_rate": 2.077004842329042e-07, "logits/chosen": -2.0518383979797363, "logits/rejected": -2.151930570602417, "logps/chosen": -202.91928100585938, "logps/rejected": -146.01695251464844, "loss": 0.5492, "rewards/accuracies": 0.625, "rewards/chosen": -0.34734249114990234, "rewards/margins": 1.4260258674621582, "rewards/rejected": -1.7733683586120605, "step": 2734 }, { "epoch": 0.32, "learning_rate": 2.0766505255698592e-07, "logits/chosen": -2.3419580459594727, "logits/rejected": -2.4661552906036377, "logps/chosen": -257.7551574707031, "logps/rejected": -190.0042724609375, "loss": 0.8978, "rewards/accuracies": 0.625, "rewards/chosen": -1.8303265571594238, "rewards/margins": 0.545998215675354, "rewards/rejected": -2.3763246536254883, "step": 2735 }, { "epoch": 0.32, "learning_rate": 2.0762962088106767e-07, "logits/chosen": -2.167544364929199, "logits/rejected": -2.2882955074310303, "logps/chosen": -207.08541870117188, "logps/rejected": -243.2821044921875, "loss": 0.2834, "rewards/accuracies": 0.875, "rewards/chosen": -0.2470426708459854, "rewards/margins": 2.240060329437256, "rewards/rejected": -2.48710298538208, "step": 2736 }, { "epoch": 0.32, "learning_rate": 2.075941892051494e-07, "logits/chosen": -2.061751127243042, "logits/rejected": -2.0869791507720947, "logps/chosen": -217.41952514648438, "logps/rejected": -295.2725524902344, "loss": 0.3514, "rewards/accuracies": 0.875, "rewards/chosen": -0.870985209941864, "rewards/margins": 2.4654653072357178, "rewards/rejected": -3.3364505767822266, "step": 2737 }, { "epoch": 0.32, "learning_rate": 2.0755875752923111e-07, "logits/chosen": -2.6105051040649414, "logits/rejected": -2.3733105659484863, "logps/chosen": -119.23661804199219, "logps/rejected": -206.68031311035156, "loss": 0.4588, "rewards/accuracies": 0.875, "rewards/chosen": -0.5620625019073486, "rewards/margins": 1.7952492237091064, "rewards/rejected": -2.357311725616455, "step": 2738 }, { "epoch": 0.32, "learning_rate": 2.0752332585331284e-07, "logits/chosen": -2.3295812606811523, "logits/rejected": -2.262988805770874, "logps/chosen": -258.0665588378906, "logps/rejected": -293.92681884765625, "loss": 0.3361, "rewards/accuracies": 0.875, "rewards/chosen": -0.41324377059936523, "rewards/margins": 2.177206039428711, "rewards/rejected": -2.590449571609497, "step": 2739 }, { "epoch": 0.32, "learning_rate": 2.0748789417739458e-07, "logits/chosen": -2.2079854011535645, "logits/rejected": -2.301755905151367, "logps/chosen": -282.29510498046875, "logps/rejected": -346.2251892089844, "loss": 0.0927, "rewards/accuracies": 1.0, "rewards/chosen": -0.5780449509620667, "rewards/margins": 4.109785079956055, "rewards/rejected": -4.687829971313477, "step": 2740 }, { "epoch": 0.32, "learning_rate": 2.0745246250147633e-07, "logits/chosen": -2.4354004859924316, "logits/rejected": -2.5989863872528076, "logps/chosen": -281.60809326171875, "logps/rejected": -188.22213745117188, "loss": 0.3364, "rewards/accuracies": 0.875, "rewards/chosen": -1.2605706453323364, "rewards/margins": 1.468400239944458, "rewards/rejected": -2.728970766067505, "step": 2741 }, { "epoch": 0.32, "learning_rate": 2.0741703082555805e-07, "logits/chosen": -2.324098587036133, "logits/rejected": -2.496450424194336, "logps/chosen": -266.6272888183594, "logps/rejected": -270.54107666015625, "loss": 1.1013, "rewards/accuracies": 0.375, "rewards/chosen": -1.8765517473220825, "rewards/margins": 0.3214974105358124, "rewards/rejected": -2.1980490684509277, "step": 2742 }, { "epoch": 0.32, "learning_rate": 2.0738159914963978e-07, "logits/chosen": -2.2760794162750244, "logits/rejected": -1.904067039489746, "logps/chosen": -197.61044311523438, "logps/rejected": -232.00567626953125, "loss": 1.3495, "rewards/accuracies": 0.625, "rewards/chosen": -3.0899405479431152, "rewards/margins": 0.24331754446029663, "rewards/rejected": -3.3332581520080566, "step": 2743 }, { "epoch": 0.32, "learning_rate": 2.073461674737215e-07, "logits/chosen": -2.0641071796417236, "logits/rejected": -2.1095101833343506, "logps/chosen": -158.85879516601562, "logps/rejected": -372.7569580078125, "loss": 0.7306, "rewards/accuracies": 0.5, "rewards/chosen": -1.2892751693725586, "rewards/margins": 0.744792640209198, "rewards/rejected": -2.0340676307678223, "step": 2744 }, { "epoch": 0.32, "learning_rate": 2.0731073579780322e-07, "logits/chosen": -2.890855312347412, "logits/rejected": -2.6605968475341797, "logps/chosen": -197.75306701660156, "logps/rejected": -188.14488220214844, "loss": 0.2276, "rewards/accuracies": 0.875, "rewards/chosen": -0.28597861528396606, "rewards/margins": 3.1753170490264893, "rewards/rejected": -3.4612953662872314, "step": 2745 }, { "epoch": 0.32, "learning_rate": 2.0727530412188494e-07, "logits/chosen": -2.375332832336426, "logits/rejected": -2.5051138401031494, "logps/chosen": -171.81069946289062, "logps/rejected": -258.024658203125, "loss": 0.5519, "rewards/accuracies": 0.375, "rewards/chosen": -0.6839407086372375, "rewards/margins": 1.333158016204834, "rewards/rejected": -2.017098903656006, "step": 2746 }, { "epoch": 0.32, "learning_rate": 2.072398724459667e-07, "logits/chosen": -1.9678460359573364, "logits/rejected": -2.075413942337036, "logps/chosen": -292.64447021484375, "logps/rejected": -345.1872253417969, "loss": 0.8388, "rewards/accuracies": 0.5, "rewards/chosen": -0.7227127552032471, "rewards/margins": 1.054764747619629, "rewards/rejected": -1.7774773836135864, "step": 2747 }, { "epoch": 0.32, "learning_rate": 2.0720444077004841e-07, "logits/chosen": -2.1775062084198, "logits/rejected": -2.182426929473877, "logps/chosen": -160.1633758544922, "logps/rejected": -211.48031616210938, "loss": 0.7152, "rewards/accuracies": 0.75, "rewards/chosen": -1.440686583518982, "rewards/margins": 1.395626187324524, "rewards/rejected": -2.836312770843506, "step": 2748 }, { "epoch": 0.32, "learning_rate": 2.0716900909413014e-07, "logits/chosen": -1.7111473083496094, "logits/rejected": -1.7333407402038574, "logps/chosen": -482.4383544921875, "logps/rejected": -424.7276916503906, "loss": 0.4943, "rewards/accuracies": 0.75, "rewards/chosen": -0.5618476867675781, "rewards/margins": 1.6746454238891602, "rewards/rejected": -2.2364931106567383, "step": 2749 }, { "epoch": 0.32, "learning_rate": 2.0713357741821186e-07, "logits/chosen": -2.2246415615081787, "logits/rejected": -2.442307949066162, "logps/chosen": -297.3554992675781, "logps/rejected": -177.75241088867188, "loss": 0.4014, "rewards/accuracies": 0.75, "rewards/chosen": -0.666824996471405, "rewards/margins": 1.2845443487167358, "rewards/rejected": -1.951369285583496, "step": 2750 }, { "epoch": 0.32, "learning_rate": 2.070981457422936e-07, "logits/chosen": -1.4929125308990479, "logits/rejected": -1.8814928531646729, "logps/chosen": -488.4658203125, "logps/rejected": -376.7021179199219, "loss": 0.7114, "rewards/accuracies": 0.5, "rewards/chosen": -1.417024850845337, "rewards/margins": 0.3771205544471741, "rewards/rejected": -1.7941455841064453, "step": 2751 }, { "epoch": 0.32, "learning_rate": 2.0706271406637536e-07, "logits/chosen": -1.990221619606018, "logits/rejected": -2.194573402404785, "logps/chosen": -323.70196533203125, "logps/rejected": -292.50250244140625, "loss": 0.3849, "rewards/accuracies": 0.75, "rewards/chosen": -0.6472268104553223, "rewards/margins": 2.5692431926727295, "rewards/rejected": -3.216470241546631, "step": 2752 }, { "epoch": 0.32, "learning_rate": 2.0702728239045708e-07, "logits/chosen": -2.5495829582214355, "logits/rejected": -2.5197689533233643, "logps/chosen": -282.1540222167969, "logps/rejected": -298.8853759765625, "loss": 0.1557, "rewards/accuracies": 1.0, "rewards/chosen": -0.5023384690284729, "rewards/margins": 3.5250680446624756, "rewards/rejected": -4.027406692504883, "step": 2753 }, { "epoch": 0.32, "learning_rate": 2.069918507145388e-07, "logits/chosen": -2.1546311378479004, "logits/rejected": -2.3572661876678467, "logps/chosen": -345.59613037109375, "logps/rejected": -296.26458740234375, "loss": 0.2234, "rewards/accuracies": 0.875, "rewards/chosen": -0.8814011216163635, "rewards/margins": 2.0716118812561035, "rewards/rejected": -2.9530129432678223, "step": 2754 }, { "epoch": 0.32, "learning_rate": 2.0695641903862052e-07, "logits/chosen": -2.4767515659332275, "logits/rejected": -2.5641064643859863, "logps/chosen": -307.09490966796875, "logps/rejected": -207.67019653320312, "loss": 0.3301, "rewards/accuracies": 0.75, "rewards/chosen": -0.3820151388645172, "rewards/margins": 1.6839958429336548, "rewards/rejected": -2.0660109519958496, "step": 2755 }, { "epoch": 0.32, "learning_rate": 2.0692098736270224e-07, "logits/chosen": -1.636419653892517, "logits/rejected": -2.002798080444336, "logps/chosen": -384.4627990722656, "logps/rejected": -309.7705383300781, "loss": 0.8475, "rewards/accuracies": 0.625, "rewards/chosen": -2.529336452484131, "rewards/margins": 0.9278169274330139, "rewards/rejected": -3.4571533203125, "step": 2756 }, { "epoch": 0.32, "learning_rate": 2.0688555568678397e-07, "logits/chosen": -2.4896345138549805, "logits/rejected": -2.5010228157043457, "logps/chosen": -323.11871337890625, "logps/rejected": -345.5707092285156, "loss": 0.3174, "rewards/accuracies": 0.75, "rewards/chosen": -0.8689936995506287, "rewards/margins": 2.8992867469787598, "rewards/rejected": -3.768280506134033, "step": 2757 }, { "epoch": 0.32, "learning_rate": 2.068501240108657e-07, "logits/chosen": -2.0251681804656982, "logits/rejected": -2.2202882766723633, "logps/chosen": -533.9995727539062, "logps/rejected": -328.071044921875, "loss": 0.2991, "rewards/accuracies": 0.875, "rewards/chosen": -0.8631742000579834, "rewards/margins": 1.4737939834594727, "rewards/rejected": -2.336968183517456, "step": 2758 }, { "epoch": 0.32, "learning_rate": 2.0681469233494744e-07, "logits/chosen": -2.1772360801696777, "logits/rejected": -1.9749560356140137, "logps/chosen": -172.00509643554688, "logps/rejected": -267.8743896484375, "loss": 0.541, "rewards/accuracies": 0.75, "rewards/chosen": -0.8997894525527954, "rewards/margins": 2.082317352294922, "rewards/rejected": -2.9821066856384277, "step": 2759 }, { "epoch": 0.32, "learning_rate": 2.0677926065902916e-07, "logits/chosen": -2.3416895866394043, "logits/rejected": -2.430999994277954, "logps/chosen": -242.75958251953125, "logps/rejected": -223.500732421875, "loss": 0.4602, "rewards/accuracies": 0.75, "rewards/chosen": -1.1338908672332764, "rewards/margins": 1.7552207708358765, "rewards/rejected": -2.8891115188598633, "step": 2760 }, { "epoch": 0.32, "learning_rate": 2.0674382898311088e-07, "logits/chosen": -2.363949775695801, "logits/rejected": -2.4174118041992188, "logps/chosen": -227.4774932861328, "logps/rejected": -272.5215759277344, "loss": 0.0916, "rewards/accuracies": 1.0, "rewards/chosen": -0.8144614100456238, "rewards/margins": 3.817065954208374, "rewards/rejected": -4.631526947021484, "step": 2761 }, { "epoch": 0.32, "learning_rate": 2.067083973071926e-07, "logits/chosen": -2.1846981048583984, "logits/rejected": -2.5316524505615234, "logps/chosen": -268.4424743652344, "logps/rejected": -185.2261199951172, "loss": 1.1532, "rewards/accuracies": 0.25, "rewards/chosen": -1.9046132564544678, "rewards/margins": -0.2842203378677368, "rewards/rejected": -1.6203927993774414, "step": 2762 }, { "epoch": 0.32, "learning_rate": 2.0667296563127438e-07, "logits/chosen": -2.5707197189331055, "logits/rejected": -2.623444080352783, "logps/chosen": -248.19915771484375, "logps/rejected": -155.9779052734375, "loss": 0.3997, "rewards/accuracies": 0.75, "rewards/chosen": -0.7524929046630859, "rewards/margins": 1.272731900215149, "rewards/rejected": -2.0252246856689453, "step": 2763 }, { "epoch": 0.32, "learning_rate": 2.066375339553561e-07, "logits/chosen": -2.3865294456481934, "logits/rejected": -2.6887707710266113, "logps/chosen": -349.3714294433594, "logps/rejected": -177.7042694091797, "loss": 0.446, "rewards/accuracies": 0.75, "rewards/chosen": -0.781042218208313, "rewards/margins": 1.9484821557998657, "rewards/rejected": -2.7295243740081787, "step": 2764 }, { "epoch": 0.32, "learning_rate": 2.0660210227943782e-07, "logits/chosen": -2.0423269271850586, "logits/rejected": -1.8252992630004883, "logps/chosen": -337.62451171875, "logps/rejected": -408.57843017578125, "loss": 0.1074, "rewards/accuracies": 1.0, "rewards/chosen": -0.02934664487838745, "rewards/margins": 3.8525209426879883, "rewards/rejected": -3.8818678855895996, "step": 2765 }, { "epoch": 0.32, "learning_rate": 2.0656667060351954e-07, "logits/chosen": -2.1039938926696777, "logits/rejected": -2.0415587425231934, "logps/chosen": -387.4490661621094, "logps/rejected": -350.63916015625, "loss": 0.2306, "rewards/accuracies": 0.875, "rewards/chosen": -0.681992769241333, "rewards/margins": 2.5637311935424805, "rewards/rejected": -3.2457237243652344, "step": 2766 }, { "epoch": 0.32, "learning_rate": 2.0653123892760127e-07, "logits/chosen": -2.2397561073303223, "logits/rejected": -2.482811689376831, "logps/chosen": -271.80615234375, "logps/rejected": -229.54083251953125, "loss": 0.534, "rewards/accuracies": 0.75, "rewards/chosen": -1.632980465888977, "rewards/margins": 1.8278834819793701, "rewards/rejected": -3.4608640670776367, "step": 2767 }, { "epoch": 0.32, "learning_rate": 2.06495807251683e-07, "logits/chosen": -2.835266351699829, "logits/rejected": -2.8360862731933594, "logps/chosen": -269.29150390625, "logps/rejected": -247.71299743652344, "loss": 0.253, "rewards/accuracies": 0.875, "rewards/chosen": -1.2972280979156494, "rewards/margins": 4.1515116691589355, "rewards/rejected": -5.448740005493164, "step": 2768 }, { "epoch": 0.32, "learning_rate": 2.064603755757647e-07, "logits/chosen": -2.455646514892578, "logits/rejected": -2.509162187576294, "logps/chosen": -260.1095886230469, "logps/rejected": -268.59326171875, "loss": 0.3223, "rewards/accuracies": 0.875, "rewards/chosen": -1.0549540519714355, "rewards/margins": 2.536766767501831, "rewards/rejected": -3.5917210578918457, "step": 2769 }, { "epoch": 0.32, "learning_rate": 2.0642494389984646e-07, "logits/chosen": -1.972563624382019, "logits/rejected": -2.487394332885742, "logps/chosen": -384.8781433105469, "logps/rejected": -289.3961486816406, "loss": 0.1449, "rewards/accuracies": 1.0, "rewards/chosen": -0.1388077288866043, "rewards/margins": 2.5734758377075195, "rewards/rejected": -2.7122836112976074, "step": 2770 }, { "epoch": 0.32, "learning_rate": 2.0638951222392818e-07, "logits/chosen": -2.3750717639923096, "logits/rejected": -2.4007115364074707, "logps/chosen": -364.71380615234375, "logps/rejected": -486.5599670410156, "loss": 0.5689, "rewards/accuracies": 0.625, "rewards/chosen": -0.6389650702476501, "rewards/margins": 1.2544959783554077, "rewards/rejected": -1.8934611082077026, "step": 2771 }, { "epoch": 0.32, "learning_rate": 2.063540805480099e-07, "logits/chosen": -1.9437891244888306, "logits/rejected": -2.008641004562378, "logps/chosen": -425.13885498046875, "logps/rejected": -367.20001220703125, "loss": 0.4111, "rewards/accuracies": 0.75, "rewards/chosen": -0.9009430408477783, "rewards/margins": 1.0747218132019043, "rewards/rejected": -1.975664734840393, "step": 2772 }, { "epoch": 0.32, "learning_rate": 2.0631864887209163e-07, "logits/chosen": -1.707619309425354, "logits/rejected": -2.017993927001953, "logps/chosen": -443.898193359375, "logps/rejected": -235.83226013183594, "loss": 0.3726, "rewards/accuracies": 0.75, "rewards/chosen": -0.061083436012268066, "rewards/margins": 3.158172607421875, "rewards/rejected": -3.2192559242248535, "step": 2773 }, { "epoch": 0.32, "learning_rate": 2.0628321719617335e-07, "logits/chosen": -2.4310951232910156, "logits/rejected": -2.6068644523620605, "logps/chosen": -210.737548828125, "logps/rejected": -211.23036193847656, "loss": 0.3731, "rewards/accuracies": 0.75, "rewards/chosen": -0.43631404638290405, "rewards/margins": 3.100959300994873, "rewards/rejected": -3.5372731685638428, "step": 2774 }, { "epoch": 0.32, "learning_rate": 2.0624778552025512e-07, "logits/chosen": -1.755044937133789, "logits/rejected": -1.5662319660186768, "logps/chosen": -141.99420166015625, "logps/rejected": -269.23297119140625, "loss": 0.417, "rewards/accuracies": 0.75, "rewards/chosen": -0.7886494994163513, "rewards/margins": 1.9704394340515137, "rewards/rejected": -2.759089231491089, "step": 2775 }, { "epoch": 0.32, "learning_rate": 2.0621235384433685e-07, "logits/chosen": -2.406850814819336, "logits/rejected": -2.6088953018188477, "logps/chosen": -334.05059814453125, "logps/rejected": -249.40940856933594, "loss": 0.2336, "rewards/accuracies": 0.875, "rewards/chosen": 0.00031469762325286865, "rewards/margins": 2.3747613430023193, "rewards/rejected": -2.3744466304779053, "step": 2776 }, { "epoch": 0.32, "learning_rate": 2.0617692216841857e-07, "logits/chosen": -2.20857572555542, "logits/rejected": -2.3470346927642822, "logps/chosen": -298.4207763671875, "logps/rejected": -250.31565856933594, "loss": 0.4144, "rewards/accuracies": 0.875, "rewards/chosen": -0.9045414328575134, "rewards/margins": 0.8793571591377258, "rewards/rejected": -1.7838985919952393, "step": 2777 }, { "epoch": 0.32, "learning_rate": 2.061414904925003e-07, "logits/chosen": -1.9985195398330688, "logits/rejected": -2.314100980758667, "logps/chosen": -179.6425323486328, "logps/rejected": -180.563720703125, "loss": 0.3423, "rewards/accuracies": 0.75, "rewards/chosen": -0.8461536765098572, "rewards/margins": 1.5938369035720825, "rewards/rejected": -2.439990520477295, "step": 2778 }, { "epoch": 0.32, "learning_rate": 2.06106058816582e-07, "logits/chosen": -2.5613293647766113, "logits/rejected": -2.5643248558044434, "logps/chosen": -519.8988037109375, "logps/rejected": -323.7275085449219, "loss": 0.1304, "rewards/accuracies": 1.0, "rewards/chosen": 0.25258561968803406, "rewards/margins": 3.1322360038757324, "rewards/rejected": -2.879650354385376, "step": 2779 }, { "epoch": 0.32, "learning_rate": 2.0607062714066373e-07, "logits/chosen": -2.276991128921509, "logits/rejected": -2.284942150115967, "logps/chosen": -184.548095703125, "logps/rejected": -228.9742431640625, "loss": 0.1879, "rewards/accuracies": 1.0, "rewards/chosen": -0.6611593961715698, "rewards/margins": 2.3809316158294678, "rewards/rejected": -3.042090892791748, "step": 2780 }, { "epoch": 0.32, "learning_rate": 2.0603519546474548e-07, "logits/chosen": -1.5570497512817383, "logits/rejected": -1.7196389436721802, "logps/chosen": -420.92437744140625, "logps/rejected": -375.7477111816406, "loss": 0.2125, "rewards/accuracies": 1.0, "rewards/chosen": 0.031127259135246277, "rewards/margins": 1.7547438144683838, "rewards/rejected": -1.723616600036621, "step": 2781 }, { "epoch": 0.32, "learning_rate": 2.059997637888272e-07, "logits/chosen": -2.448502540588379, "logits/rejected": -2.5700931549072266, "logps/chosen": -304.7576904296875, "logps/rejected": -231.25494384765625, "loss": 0.5484, "rewards/accuracies": 0.625, "rewards/chosen": -0.9735724329948425, "rewards/margins": 1.3316367864608765, "rewards/rejected": -2.305209159851074, "step": 2782 }, { "epoch": 0.32, "learning_rate": 2.0596433211290893e-07, "logits/chosen": -2.560392379760742, "logits/rejected": -2.583927869796753, "logps/chosen": -177.2637939453125, "logps/rejected": -331.18511962890625, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": -0.15544834733009338, "rewards/margins": 4.33696174621582, "rewards/rejected": -4.492409706115723, "step": 2783 }, { "epoch": 0.32, "learning_rate": 2.0592890043699065e-07, "logits/chosen": -2.3378896713256836, "logits/rejected": -2.2976109981536865, "logps/chosen": -318.4089050292969, "logps/rejected": -385.070556640625, "loss": 0.5601, "rewards/accuracies": 0.75, "rewards/chosen": -0.9627291560173035, "rewards/margins": 1.2610429525375366, "rewards/rejected": -2.2237720489501953, "step": 2784 }, { "epoch": 0.32, "learning_rate": 2.0589346876107237e-07, "logits/chosen": -2.465488910675049, "logits/rejected": -2.543954372406006, "logps/chosen": -218.9886932373047, "logps/rejected": -352.46820068359375, "loss": 0.2741, "rewards/accuracies": 0.875, "rewards/chosen": -0.8004039525985718, "rewards/margins": 2.812516927719116, "rewards/rejected": -3.6129207611083984, "step": 2785 }, { "epoch": 0.32, "learning_rate": 2.0585803708515415e-07, "logits/chosen": -2.5342752933502197, "logits/rejected": -2.416834592819214, "logps/chosen": -362.8609619140625, "logps/rejected": -349.4075622558594, "loss": 0.1443, "rewards/accuracies": 1.0, "rewards/chosen": -0.4146467447280884, "rewards/margins": 2.964639902114868, "rewards/rejected": -3.379286527633667, "step": 2786 }, { "epoch": 0.32, "learning_rate": 2.0582260540923587e-07, "logits/chosen": -2.4081315994262695, "logits/rejected": -2.337503433227539, "logps/chosen": -403.8183288574219, "logps/rejected": -514.3018798828125, "loss": 0.3147, "rewards/accuracies": 0.875, "rewards/chosen": -0.6671752333641052, "rewards/margins": 1.7555726766586304, "rewards/rejected": -2.422747850418091, "step": 2787 }, { "epoch": 0.32, "learning_rate": 2.057871737333176e-07, "logits/chosen": -2.1663217544555664, "logits/rejected": -2.2671895027160645, "logps/chosen": -195.31683349609375, "logps/rejected": -233.54225158691406, "loss": 0.5943, "rewards/accuracies": 0.75, "rewards/chosen": -1.3199719190597534, "rewards/margins": 0.9029352068901062, "rewards/rejected": -2.222907066345215, "step": 2788 }, { "epoch": 0.32, "learning_rate": 2.057517420573993e-07, "logits/chosen": -2.0933797359466553, "logits/rejected": -2.1743855476379395, "logps/chosen": -317.94073486328125, "logps/rejected": -377.95111083984375, "loss": 0.1873, "rewards/accuracies": 0.875, "rewards/chosen": -0.6250758767127991, "rewards/margins": 4.745172500610352, "rewards/rejected": -5.370248317718506, "step": 2789 }, { "epoch": 0.32, "learning_rate": 2.0571631038148103e-07, "logits/chosen": -2.0753180980682373, "logits/rejected": -2.053215742111206, "logps/chosen": -197.126220703125, "logps/rejected": -256.1136169433594, "loss": 0.1088, "rewards/accuracies": 1.0, "rewards/chosen": -1.0177252292633057, "rewards/margins": 3.7494266033172607, "rewards/rejected": -4.767152309417725, "step": 2790 }, { "epoch": 0.32, "learning_rate": 2.0568087870556276e-07, "logits/chosen": -2.7155985832214355, "logits/rejected": -2.6023130416870117, "logps/chosen": -126.25444030761719, "logps/rejected": -196.45538330078125, "loss": 0.2156, "rewards/accuracies": 0.875, "rewards/chosen": -0.6991429328918457, "rewards/margins": 2.856741189956665, "rewards/rejected": -3.5558838844299316, "step": 2791 }, { "epoch": 0.32, "learning_rate": 2.056454470296445e-07, "logits/chosen": -1.5817039012908936, "logits/rejected": -1.6766940355300903, "logps/chosen": -414.9866638183594, "logps/rejected": -340.78143310546875, "loss": 0.9663, "rewards/accuracies": 0.375, "rewards/chosen": -1.6030627489089966, "rewards/margins": 0.3543718457221985, "rewards/rejected": -1.9574344158172607, "step": 2792 }, { "epoch": 0.32, "learning_rate": 2.0561001535372623e-07, "logits/chosen": -2.886582136154175, "logits/rejected": -2.7930896282196045, "logps/chosen": -125.51559448242188, "logps/rejected": -134.61605834960938, "loss": 0.5199, "rewards/accuracies": 0.875, "rewards/chosen": -1.2961407899856567, "rewards/margins": 2.088855743408203, "rewards/rejected": -3.3849964141845703, "step": 2793 }, { "epoch": 0.33, "learning_rate": 2.0557458367780795e-07, "logits/chosen": -2.455160140991211, "logits/rejected": -2.613832473754883, "logps/chosen": -512.1253662109375, "logps/rejected": -237.97140502929688, "loss": 0.3089, "rewards/accuracies": 0.875, "rewards/chosen": -1.8840157985687256, "rewards/margins": 1.7954705953598022, "rewards/rejected": -3.6794862747192383, "step": 2794 }, { "epoch": 0.33, "learning_rate": 2.0553915200188967e-07, "logits/chosen": -2.0982892513275146, "logits/rejected": -2.1001107692718506, "logps/chosen": -274.06158447265625, "logps/rejected": -276.936279296875, "loss": 0.7712, "rewards/accuracies": 0.625, "rewards/chosen": -1.2053558826446533, "rewards/margins": 0.3067867159843445, "rewards/rejected": -1.512142539024353, "step": 2795 }, { "epoch": 0.33, "learning_rate": 2.055037203259714e-07, "logits/chosen": -1.6699740886688232, "logits/rejected": -1.6630700826644897, "logps/chosen": -292.50103759765625, "logps/rejected": -260.097412109375, "loss": 0.3754, "rewards/accuracies": 0.75, "rewards/chosen": -0.3645244538784027, "rewards/margins": 2.2247323989868164, "rewards/rejected": -2.589256763458252, "step": 2796 }, { "epoch": 0.33, "learning_rate": 2.0546828865005312e-07, "logits/chosen": -2.236677646636963, "logits/rejected": -2.4373927116394043, "logps/chosen": -346.0108642578125, "logps/rejected": -258.8670349121094, "loss": 0.2163, "rewards/accuracies": 1.0, "rewards/chosen": -0.5777016878128052, "rewards/margins": 2.119042158126831, "rewards/rejected": -2.6967437267303467, "step": 2797 }, { "epoch": 0.33, "learning_rate": 2.054328569741349e-07, "logits/chosen": -2.675575017929077, "logits/rejected": -2.6351897716522217, "logps/chosen": -400.10516357421875, "logps/rejected": -325.06365966796875, "loss": 0.9688, "rewards/accuracies": 0.5, "rewards/chosen": -2.2481026649475098, "rewards/margins": 0.4926161468029022, "rewards/rejected": -2.7407186031341553, "step": 2798 }, { "epoch": 0.33, "learning_rate": 2.053974252982166e-07, "logits/chosen": -2.7971432209014893, "logits/rejected": -2.633223533630371, "logps/chosen": -302.6767578125, "logps/rejected": -245.10604858398438, "loss": 0.2816, "rewards/accuracies": 0.875, "rewards/chosen": -0.9614919424057007, "rewards/margins": 3.4177117347717285, "rewards/rejected": -4.379203796386719, "step": 2799 }, { "epoch": 0.33, "learning_rate": 2.0536199362229834e-07, "logits/chosen": -2.0296876430511475, "logits/rejected": -1.885248064994812, "logps/chosen": -254.11224365234375, "logps/rejected": -303.008544921875, "loss": 0.3742, "rewards/accuracies": 0.875, "rewards/chosen": -1.0010836124420166, "rewards/margins": 1.263413429260254, "rewards/rejected": -2.2644970417022705, "step": 2800 }, { "epoch": 0.33, "learning_rate": 2.0532656194638006e-07, "logits/chosen": -1.991680383682251, "logits/rejected": -1.502970814704895, "logps/chosen": -155.11618041992188, "logps/rejected": -236.58041381835938, "loss": 0.4252, "rewards/accuracies": 0.875, "rewards/chosen": -0.8260632753372192, "rewards/margins": 1.3820829391479492, "rewards/rejected": -2.208146095275879, "step": 2801 }, { "epoch": 0.33, "learning_rate": 2.0529113027046178e-07, "logits/chosen": -1.6984056234359741, "logits/rejected": -1.3114551305770874, "logps/chosen": -86.82627868652344, "logps/rejected": -152.1896209716797, "loss": 0.4359, "rewards/accuracies": 0.75, "rewards/chosen": -0.04985235258936882, "rewards/margins": 1.2690937519073486, "rewards/rejected": -1.318946123123169, "step": 2802 }, { "epoch": 0.33, "learning_rate": 2.052556985945435e-07, "logits/chosen": -2.5094528198242188, "logits/rejected": -2.4720582962036133, "logps/chosen": -176.70758056640625, "logps/rejected": -283.5103759765625, "loss": 0.1812, "rewards/accuracies": 0.875, "rewards/chosen": -1.026608943939209, "rewards/margins": 3.0588245391845703, "rewards/rejected": -4.085433483123779, "step": 2803 }, { "epoch": 0.33, "learning_rate": 2.0522026691862525e-07, "logits/chosen": -1.6189991235733032, "logits/rejected": -2.006326198577881, "logps/chosen": -656.6763916015625, "logps/rejected": -392.430419921875, "loss": 0.6457, "rewards/accuracies": 0.75, "rewards/chosen": -1.0681871175765991, "rewards/margins": 0.5803056359291077, "rewards/rejected": -1.6484928131103516, "step": 2804 }, { "epoch": 0.33, "learning_rate": 2.0518483524270697e-07, "logits/chosen": -2.323451280593872, "logits/rejected": -2.3354034423828125, "logps/chosen": -469.4316711425781, "logps/rejected": -366.966064453125, "loss": 0.3055, "rewards/accuracies": 0.875, "rewards/chosen": -0.05852103233337402, "rewards/margins": 2.029176712036133, "rewards/rejected": -2.087697982788086, "step": 2805 }, { "epoch": 0.33, "learning_rate": 2.051494035667887e-07, "logits/chosen": -2.3163020610809326, "logits/rejected": -2.150282859802246, "logps/chosen": -344.0404052734375, "logps/rejected": -274.2154235839844, "loss": 0.3899, "rewards/accuracies": 0.875, "rewards/chosen": -1.0138919353485107, "rewards/margins": 1.303979516029358, "rewards/rejected": -2.317871570587158, "step": 2806 }, { "epoch": 0.33, "learning_rate": 2.0511397189087042e-07, "logits/chosen": -2.7944467067718506, "logits/rejected": -2.772958755493164, "logps/chosen": -312.7858581542969, "logps/rejected": -386.0821228027344, "loss": 0.1438, "rewards/accuracies": 1.0, "rewards/chosen": -0.231122225522995, "rewards/margins": 4.467634677886963, "rewards/rejected": -4.698757171630859, "step": 2807 }, { "epoch": 0.33, "learning_rate": 2.0507854021495214e-07, "logits/chosen": -2.3214612007141113, "logits/rejected": -1.9573845863342285, "logps/chosen": -60.63727951049805, "logps/rejected": -171.14071655273438, "loss": 0.2813, "rewards/accuracies": 0.75, "rewards/chosen": -0.3563816249370575, "rewards/margins": 1.8481109142303467, "rewards/rejected": -2.2044925689697266, "step": 2808 }, { "epoch": 0.33, "learning_rate": 2.0504310853903386e-07, "logits/chosen": -2.7508983612060547, "logits/rejected": -2.7914950847625732, "logps/chosen": -256.6297607421875, "logps/rejected": -307.9344177246094, "loss": 0.3206, "rewards/accuracies": 1.0, "rewards/chosen": -1.0767958164215088, "rewards/margins": 1.325225591659546, "rewards/rejected": -2.4020214080810547, "step": 2809 }, { "epoch": 0.33, "learning_rate": 2.0500767686311564e-07, "logits/chosen": -1.5481828451156616, "logits/rejected": -1.5910831689834595, "logps/chosen": -252.73226928710938, "logps/rejected": -312.66729736328125, "loss": 0.5177, "rewards/accuracies": 0.75, "rewards/chosen": -1.2445430755615234, "rewards/margins": 0.6077486872673035, "rewards/rejected": -1.8522920608520508, "step": 2810 }, { "epoch": 0.33, "learning_rate": 2.0497224518719736e-07, "logits/chosen": -1.960991382598877, "logits/rejected": -2.089397430419922, "logps/chosen": -152.9217071533203, "logps/rejected": -163.07533264160156, "loss": 0.6286, "rewards/accuracies": 0.75, "rewards/chosen": -1.081068515777588, "rewards/margins": 0.28588971495628357, "rewards/rejected": -1.3669581413269043, "step": 2811 }, { "epoch": 0.33, "learning_rate": 2.0493681351127908e-07, "logits/chosen": -2.4788684844970703, "logits/rejected": -2.6726245880126953, "logps/chosen": -402.6098327636719, "logps/rejected": -265.6576232910156, "loss": 0.6706, "rewards/accuracies": 0.625, "rewards/chosen": -1.5419917106628418, "rewards/margins": 1.1017179489135742, "rewards/rejected": -2.643709659576416, "step": 2812 }, { "epoch": 0.33, "learning_rate": 2.049013818353608e-07, "logits/chosen": -2.468499183654785, "logits/rejected": -2.6753218173980713, "logps/chosen": -219.60073852539062, "logps/rejected": -196.6517333984375, "loss": 0.2031, "rewards/accuracies": 1.0, "rewards/chosen": -0.676396906375885, "rewards/margins": 2.1995468139648438, "rewards/rejected": -2.875943660736084, "step": 2813 }, { "epoch": 0.33, "learning_rate": 2.0486595015944252e-07, "logits/chosen": -2.416999101638794, "logits/rejected": -2.546029567718506, "logps/chosen": -217.00042724609375, "logps/rejected": -193.41323852539062, "loss": 0.2883, "rewards/accuracies": 0.875, "rewards/chosen": -0.7034333944320679, "rewards/margins": 2.724259853363037, "rewards/rejected": -3.4276933670043945, "step": 2814 }, { "epoch": 0.33, "learning_rate": 2.0483051848352427e-07, "logits/chosen": -1.8129630088806152, "logits/rejected": -2.2402396202087402, "logps/chosen": -397.4700927734375, "logps/rejected": -268.7874450683594, "loss": 0.4886, "rewards/accuracies": 0.625, "rewards/chosen": -1.2502238750457764, "rewards/margins": 1.4893684387207031, "rewards/rejected": -2.7395923137664795, "step": 2815 }, { "epoch": 0.33, "learning_rate": 2.04795086807606e-07, "logits/chosen": -2.5891482830047607, "logits/rejected": -2.6199758052825928, "logps/chosen": -276.3951721191406, "logps/rejected": -261.1891174316406, "loss": 0.7248, "rewards/accuracies": 0.875, "rewards/chosen": -1.683912992477417, "rewards/margins": 1.5360163450241089, "rewards/rejected": -3.2199292182922363, "step": 2816 }, { "epoch": 0.33, "learning_rate": 2.0475965513168772e-07, "logits/chosen": -2.5540971755981445, "logits/rejected": -2.3981118202209473, "logps/chosen": -209.75234985351562, "logps/rejected": -212.50405883789062, "loss": 0.8078, "rewards/accuracies": 0.875, "rewards/chosen": -1.8089838027954102, "rewards/margins": 0.9001773595809937, "rewards/rejected": -2.7091610431671143, "step": 2817 }, { "epoch": 0.33, "learning_rate": 2.0472422345576944e-07, "logits/chosen": -1.6472505331039429, "logits/rejected": -2.2328805923461914, "logps/chosen": -484.7900695800781, "logps/rejected": -172.57437133789062, "loss": 0.5016, "rewards/accuracies": 0.625, "rewards/chosen": -0.7868074178695679, "rewards/margins": 1.4236958026885986, "rewards/rejected": -2.210503101348877, "step": 2818 }, { "epoch": 0.33, "learning_rate": 2.0468879177985116e-07, "logits/chosen": -2.3393406867980957, "logits/rejected": -2.351262092590332, "logps/chosen": -449.6119384765625, "logps/rejected": -239.43972778320312, "loss": 0.8984, "rewards/accuracies": 0.75, "rewards/chosen": -0.8348033428192139, "rewards/margins": 1.2704559564590454, "rewards/rejected": -2.105259418487549, "step": 2819 }, { "epoch": 0.33, "learning_rate": 2.0465336010393288e-07, "logits/chosen": -2.749523162841797, "logits/rejected": -2.5502002239227295, "logps/chosen": -119.17343139648438, "logps/rejected": -257.31591796875, "loss": 0.223, "rewards/accuracies": 1.0, "rewards/chosen": -0.45282799005508423, "rewards/margins": 2.287104606628418, "rewards/rejected": -2.7399325370788574, "step": 2820 }, { "epoch": 0.33, "learning_rate": 2.0461792842801466e-07, "logits/chosen": -2.3165204524993896, "logits/rejected": -2.5828311443328857, "logps/chosen": -272.6784973144531, "logps/rejected": -144.60081481933594, "loss": 0.3326, "rewards/accuracies": 0.75, "rewards/chosen": -0.14727984368801117, "rewards/margins": 1.8323566913604736, "rewards/rejected": -1.979636549949646, "step": 2821 }, { "epoch": 0.33, "learning_rate": 2.0458249675209638e-07, "logits/chosen": -2.792811155319214, "logits/rejected": -2.8511414527893066, "logps/chosen": -133.80726623535156, "logps/rejected": -203.33470153808594, "loss": 0.3303, "rewards/accuracies": 0.875, "rewards/chosen": -0.6973686218261719, "rewards/margins": 2.3489441871643066, "rewards/rejected": -3.0463125705718994, "step": 2822 }, { "epoch": 0.33, "learning_rate": 2.045470650761781e-07, "logits/chosen": -2.4337236881256104, "logits/rejected": -2.5470268726348877, "logps/chosen": -308.79425048828125, "logps/rejected": -519.998291015625, "loss": 0.0825, "rewards/accuracies": 1.0, "rewards/chosen": -0.42340192198753357, "rewards/margins": 5.131959915161133, "rewards/rejected": -5.555361747741699, "step": 2823 }, { "epoch": 0.33, "learning_rate": 2.0451163340025982e-07, "logits/chosen": -2.36165189743042, "logits/rejected": -2.2468252182006836, "logps/chosen": -166.033447265625, "logps/rejected": -243.00094604492188, "loss": 0.6086, "rewards/accuracies": 0.625, "rewards/chosen": -0.7419983148574829, "rewards/margins": 1.414143443107605, "rewards/rejected": -2.156141757965088, "step": 2824 }, { "epoch": 0.33, "learning_rate": 2.0447620172434155e-07, "logits/chosen": -2.1709892749786377, "logits/rejected": -2.123112678527832, "logps/chosen": -115.22529602050781, "logps/rejected": -203.98980712890625, "loss": 0.1664, "rewards/accuracies": 1.0, "rewards/chosen": -0.22637619078159332, "rewards/margins": 2.349916934967041, "rewards/rejected": -2.5762932300567627, "step": 2825 }, { "epoch": 0.33, "learning_rate": 2.044407700484233e-07, "logits/chosen": -1.9990406036376953, "logits/rejected": -2.1865406036376953, "logps/chosen": -360.7684020996094, "logps/rejected": -319.9784240722656, "loss": 0.5165, "rewards/accuracies": 0.75, "rewards/chosen": -0.6421569585800171, "rewards/margins": 0.8304758071899414, "rewards/rejected": -1.4726327657699585, "step": 2826 }, { "epoch": 0.33, "learning_rate": 2.0440533837250502e-07, "logits/chosen": -2.164041519165039, "logits/rejected": -2.4912188053131104, "logps/chosen": -379.3470458984375, "logps/rejected": -220.86074829101562, "loss": 0.3036, "rewards/accuracies": 1.0, "rewards/chosen": -1.2399139404296875, "rewards/margins": 1.163254737854004, "rewards/rejected": -2.4031686782836914, "step": 2827 }, { "epoch": 0.33, "learning_rate": 2.0436990669658674e-07, "logits/chosen": -2.855140209197998, "logits/rejected": -2.8357858657836914, "logps/chosen": -207.58799743652344, "logps/rejected": -247.04696655273438, "loss": 0.5711, "rewards/accuracies": 0.5, "rewards/chosen": -1.0696563720703125, "rewards/margins": 1.5588760375976562, "rewards/rejected": -2.6285324096679688, "step": 2828 }, { "epoch": 0.33, "learning_rate": 2.0433447502066846e-07, "logits/chosen": -2.6308095455169678, "logits/rejected": -2.6607749462127686, "logps/chosen": -109.74212646484375, "logps/rejected": -153.10687255859375, "loss": 0.6946, "rewards/accuracies": 0.625, "rewards/chosen": -0.5706846117973328, "rewards/margins": 1.284409761428833, "rewards/rejected": -1.855094313621521, "step": 2829 }, { "epoch": 0.33, "learning_rate": 2.0429904334475018e-07, "logits/chosen": -2.5287153720855713, "logits/rejected": -2.578961133956909, "logps/chosen": -386.8229064941406, "logps/rejected": -355.1982727050781, "loss": 0.2328, "rewards/accuracies": 1.0, "rewards/chosen": -0.38310733437538147, "rewards/margins": 2.1144466400146484, "rewards/rejected": -2.497554063796997, "step": 2830 }, { "epoch": 0.33, "learning_rate": 2.042636116688319e-07, "logits/chosen": -1.7102911472320557, "logits/rejected": -1.9979699850082397, "logps/chosen": -318.42352294921875, "logps/rejected": -279.9345703125, "loss": 0.4946, "rewards/accuracies": 0.75, "rewards/chosen": -0.7330102920532227, "rewards/margins": 0.9977298974990845, "rewards/rejected": -1.7307403087615967, "step": 2831 }, { "epoch": 0.33, "learning_rate": 2.0422817999291363e-07, "logits/chosen": -2.863314628601074, "logits/rejected": -2.8775007724761963, "logps/chosen": -186.6444091796875, "logps/rejected": -206.51780700683594, "loss": 0.7769, "rewards/accuracies": 0.5, "rewards/chosen": -1.4505507946014404, "rewards/margins": 0.6711548566818237, "rewards/rejected": -2.1217057704925537, "step": 2832 }, { "epoch": 0.33, "learning_rate": 2.041927483169954e-07, "logits/chosen": -2.596134662628174, "logits/rejected": -2.3948166370391846, "logps/chosen": -211.3447265625, "logps/rejected": -213.29898071289062, "loss": 0.1824, "rewards/accuracies": 1.0, "rewards/chosen": -0.14425039291381836, "rewards/margins": 2.322404384613037, "rewards/rejected": -2.4666545391082764, "step": 2833 }, { "epoch": 0.33, "learning_rate": 2.0415731664107713e-07, "logits/chosen": -2.1336591243743896, "logits/rejected": -1.9621479511260986, "logps/chosen": -105.7725830078125, "logps/rejected": -165.7503662109375, "loss": 0.8965, "rewards/accuracies": 0.5, "rewards/chosen": -1.604356288909912, "rewards/margins": 0.2732434868812561, "rewards/rejected": -1.8775997161865234, "step": 2834 }, { "epoch": 0.33, "learning_rate": 2.0412188496515885e-07, "logits/chosen": -2.74996018409729, "logits/rejected": -2.406066417694092, "logps/chosen": -226.12258911132812, "logps/rejected": -270.3167724609375, "loss": 0.5797, "rewards/accuracies": 0.75, "rewards/chosen": -0.6943837404251099, "rewards/margins": 0.9944208860397339, "rewards/rejected": -1.6888046264648438, "step": 2835 }, { "epoch": 0.33, "learning_rate": 2.0408645328924057e-07, "logits/chosen": -2.2756476402282715, "logits/rejected": -2.1289124488830566, "logps/chosen": -135.25123596191406, "logps/rejected": -201.6737823486328, "loss": 0.4717, "rewards/accuracies": 0.75, "rewards/chosen": -1.3982540369033813, "rewards/margins": 1.5935182571411133, "rewards/rejected": -2.991771936416626, "step": 2836 }, { "epoch": 0.33, "learning_rate": 2.040510216133223e-07, "logits/chosen": -2.5038020610809326, "logits/rejected": -1.7901339530944824, "logps/chosen": -111.13200378417969, "logps/rejected": -408.293212890625, "loss": 0.2288, "rewards/accuracies": 1.0, "rewards/chosen": 0.059613630175590515, "rewards/margins": 2.4257407188415527, "rewards/rejected": -2.3661270141601562, "step": 2837 }, { "epoch": 0.33, "learning_rate": 2.0401558993740404e-07, "logits/chosen": -2.749030590057373, "logits/rejected": -2.726857900619507, "logps/chosen": -176.64962768554688, "logps/rejected": -178.43772888183594, "loss": 0.7638, "rewards/accuracies": 0.5, "rewards/chosen": -1.6310076713562012, "rewards/margins": 1.1016085147857666, "rewards/rejected": -2.732616424560547, "step": 2838 }, { "epoch": 0.33, "learning_rate": 2.0398015826148576e-07, "logits/chosen": -2.220405101776123, "logits/rejected": -2.0455281734466553, "logps/chosen": -123.35054016113281, "logps/rejected": -140.5076904296875, "loss": 0.5718, "rewards/accuracies": 0.75, "rewards/chosen": -0.5483088493347168, "rewards/margins": 1.0419373512268066, "rewards/rejected": -1.5902462005615234, "step": 2839 }, { "epoch": 0.33, "learning_rate": 2.0394472658556748e-07, "logits/chosen": -2.772846221923828, "logits/rejected": -2.603695869445801, "logps/chosen": -219.9719696044922, "logps/rejected": -273.76690673828125, "loss": 0.406, "rewards/accuracies": 0.75, "rewards/chosen": -0.5464353561401367, "rewards/margins": 1.6724764108657837, "rewards/rejected": -2.218911647796631, "step": 2840 }, { "epoch": 0.33, "learning_rate": 2.039092949096492e-07, "logits/chosen": -2.092108726501465, "logits/rejected": -2.126579761505127, "logps/chosen": -429.8004150390625, "logps/rejected": -406.6106262207031, "loss": 0.2536, "rewards/accuracies": 0.875, "rewards/chosen": -0.9370561838150024, "rewards/margins": 1.758062481880188, "rewards/rejected": -2.6951186656951904, "step": 2841 }, { "epoch": 0.33, "learning_rate": 2.0387386323373093e-07, "logits/chosen": -2.3389503955841064, "logits/rejected": -2.499793529510498, "logps/chosen": -249.271728515625, "logps/rejected": -225.81951904296875, "loss": 0.2141, "rewards/accuracies": 0.875, "rewards/chosen": -1.0997467041015625, "rewards/margins": 2.8356688022613525, "rewards/rejected": -3.935415744781494, "step": 2842 }, { "epoch": 0.33, "learning_rate": 2.0383843155781265e-07, "logits/chosen": -2.9714183807373047, "logits/rejected": -2.9895739555358887, "logps/chosen": -265.3147888183594, "logps/rejected": -221.22161865234375, "loss": 0.285, "rewards/accuracies": 1.0, "rewards/chosen": -0.7265208959579468, "rewards/margins": 2.158388137817383, "rewards/rejected": -2.884908676147461, "step": 2843 }, { "epoch": 0.33, "learning_rate": 2.038029998818944e-07, "logits/chosen": -2.8175811767578125, "logits/rejected": -2.8435721397399902, "logps/chosen": -253.998046875, "logps/rejected": -244.614013671875, "loss": 0.2087, "rewards/accuracies": 0.875, "rewards/chosen": -0.14495407044887543, "rewards/margins": 2.406085729598999, "rewards/rejected": -2.551039695739746, "step": 2844 }, { "epoch": 0.33, "learning_rate": 2.0376756820597615e-07, "logits/chosen": -1.9524171352386475, "logits/rejected": -2.0545637607574463, "logps/chosen": -187.60845947265625, "logps/rejected": -196.5067596435547, "loss": 0.3161, "rewards/accuracies": 0.875, "rewards/chosen": -0.8776882290840149, "rewards/margins": 1.4847383499145508, "rewards/rejected": -2.362426519393921, "step": 2845 }, { "epoch": 0.33, "learning_rate": 2.0373213653005787e-07, "logits/chosen": -2.3409676551818848, "logits/rejected": -2.6742193698883057, "logps/chosen": -554.197509765625, "logps/rejected": -411.0762634277344, "loss": 0.4378, "rewards/accuracies": 0.75, "rewards/chosen": -1.222048044204712, "rewards/margins": 3.0703349113464355, "rewards/rejected": -4.292383193969727, "step": 2846 }, { "epoch": 0.33, "learning_rate": 2.036967048541396e-07, "logits/chosen": -2.534383535385132, "logits/rejected": -2.6092355251312256, "logps/chosen": -197.86746215820312, "logps/rejected": -141.77825927734375, "loss": 0.2369, "rewards/accuracies": 1.0, "rewards/chosen": -0.38461869955062866, "rewards/margins": 1.4713630676269531, "rewards/rejected": -1.8559815883636475, "step": 2847 }, { "epoch": 0.33, "learning_rate": 2.0366127317822131e-07, "logits/chosen": -2.293240547180176, "logits/rejected": -2.2658278942108154, "logps/chosen": -311.14373779296875, "logps/rejected": -248.43307495117188, "loss": 0.2915, "rewards/accuracies": 0.75, "rewards/chosen": -0.6427315473556519, "rewards/margins": 2.1426873207092285, "rewards/rejected": -2.78541898727417, "step": 2848 }, { "epoch": 0.33, "learning_rate": 2.0362584150230306e-07, "logits/chosen": -2.338634490966797, "logits/rejected": -2.0143356323242188, "logps/chosen": -107.28548431396484, "logps/rejected": -286.7553405761719, "loss": 0.2444, "rewards/accuracies": 1.0, "rewards/chosen": -0.08572103083133698, "rewards/margins": 2.2003350257873535, "rewards/rejected": -2.2860560417175293, "step": 2849 }, { "epoch": 0.33, "learning_rate": 2.0359040982638479e-07, "logits/chosen": -2.2955589294433594, "logits/rejected": -2.47743558883667, "logps/chosen": -239.28292846679688, "logps/rejected": -159.82424926757812, "loss": 0.7208, "rewards/accuracies": 0.625, "rewards/chosen": -1.6456551551818848, "rewards/margins": 0.5764782428741455, "rewards/rejected": -2.222133159637451, "step": 2850 }, { "epoch": 0.33, "learning_rate": 2.035549781504665e-07, "logits/chosen": -2.3325648307800293, "logits/rejected": -2.3824925422668457, "logps/chosen": -183.08778381347656, "logps/rejected": -194.589111328125, "loss": 0.4246, "rewards/accuracies": 0.75, "rewards/chosen": -1.2112860679626465, "rewards/margins": 1.254469394683838, "rewards/rejected": -2.4657554626464844, "step": 2851 }, { "epoch": 0.33, "learning_rate": 2.0351954647454823e-07, "logits/chosen": -2.3954105377197266, "logits/rejected": -2.534482717514038, "logps/chosen": -326.9853820800781, "logps/rejected": -411.5298767089844, "loss": 0.4292, "rewards/accuracies": 0.75, "rewards/chosen": -0.6472383737564087, "rewards/margins": 1.7075189352035522, "rewards/rejected": -2.35475754737854, "step": 2852 }, { "epoch": 0.33, "learning_rate": 2.0348411479862995e-07, "logits/chosen": -2.3276333808898926, "logits/rejected": -2.65321683883667, "logps/chosen": -382.74755859375, "logps/rejected": -254.12014770507812, "loss": 0.457, "rewards/accuracies": 0.625, "rewards/chosen": -0.8999982476234436, "rewards/margins": 0.901712954044342, "rewards/rejected": -1.8017112016677856, "step": 2853 }, { "epoch": 0.33, "learning_rate": 2.0344868312271167e-07, "logits/chosen": -2.7296299934387207, "logits/rejected": -2.7550432682037354, "logps/chosen": -528.6714477539062, "logps/rejected": -346.8750305175781, "loss": 0.5692, "rewards/accuracies": 0.625, "rewards/chosen": -1.018274188041687, "rewards/margins": 0.6785012483596802, "rewards/rejected": -1.6967754364013672, "step": 2854 }, { "epoch": 0.33, "learning_rate": 2.0341325144679342e-07, "logits/chosen": -2.86840558052063, "logits/rejected": -2.946042537689209, "logps/chosen": -202.4578094482422, "logps/rejected": -223.56314086914062, "loss": 0.2114, "rewards/accuracies": 1.0, "rewards/chosen": -0.6463537812232971, "rewards/margins": 2.3079893589019775, "rewards/rejected": -2.954343318939209, "step": 2855 }, { "epoch": 0.33, "learning_rate": 2.0337781977087517e-07, "logits/chosen": -2.5411508083343506, "logits/rejected": -2.691070079803467, "logps/chosen": -256.4216613769531, "logps/rejected": -236.79119873046875, "loss": 0.4356, "rewards/accuracies": 0.75, "rewards/chosen": -1.609972357749939, "rewards/margins": 1.3840147256851196, "rewards/rejected": -2.9939870834350586, "step": 2856 }, { "epoch": 0.33, "learning_rate": 2.033423880949569e-07, "logits/chosen": -2.832761287689209, "logits/rejected": -2.796764612197876, "logps/chosen": -190.47116088867188, "logps/rejected": -258.75421142578125, "loss": 0.177, "rewards/accuracies": 1.0, "rewards/chosen": -0.5369760394096375, "rewards/margins": 3.00662899017334, "rewards/rejected": -3.543605089187622, "step": 2857 }, { "epoch": 0.33, "learning_rate": 2.0330695641903862e-07, "logits/chosen": -2.257845640182495, "logits/rejected": -2.3125150203704834, "logps/chosen": -230.78269958496094, "logps/rejected": -216.63568115234375, "loss": 0.6072, "rewards/accuracies": 0.875, "rewards/chosen": -0.7062742710113525, "rewards/margins": 1.691977620124817, "rewards/rejected": -2.398252010345459, "step": 2858 }, { "epoch": 0.33, "learning_rate": 2.0327152474312034e-07, "logits/chosen": -2.4231739044189453, "logits/rejected": -2.214143753051758, "logps/chosen": -364.149169921875, "logps/rejected": -352.7958679199219, "loss": 0.4091, "rewards/accuracies": 0.75, "rewards/chosen": -1.2891604900360107, "rewards/margins": 1.647066593170166, "rewards/rejected": -2.9362268447875977, "step": 2859 }, { "epoch": 0.33, "learning_rate": 2.0323609306720209e-07, "logits/chosen": -2.1073765754699707, "logits/rejected": -2.4159884452819824, "logps/chosen": -441.6005859375, "logps/rejected": -346.96142578125, "loss": 0.485, "rewards/accuracies": 0.625, "rewards/chosen": -1.0235151052474976, "rewards/margins": 1.7792872190475464, "rewards/rejected": -2.802802324295044, "step": 2860 }, { "epoch": 0.33, "learning_rate": 2.032006613912838e-07, "logits/chosen": -2.1934328079223633, "logits/rejected": -2.5753846168518066, "logps/chosen": -289.9516906738281, "logps/rejected": -208.5700225830078, "loss": 0.5458, "rewards/accuracies": 0.625, "rewards/chosen": -0.8048868179321289, "rewards/margins": 1.155310034751892, "rewards/rejected": -1.9601967334747314, "step": 2861 }, { "epoch": 0.33, "learning_rate": 2.0316522971536553e-07, "logits/chosen": -2.6112372875213623, "logits/rejected": -2.5233471393585205, "logps/chosen": -353.9142150878906, "logps/rejected": -322.8084411621094, "loss": 0.7719, "rewards/accuracies": 0.625, "rewards/chosen": -1.3388844728469849, "rewards/margins": 0.8053247928619385, "rewards/rejected": -2.144209146499634, "step": 2862 }, { "epoch": 0.33, "learning_rate": 2.0312979803944725e-07, "logits/chosen": -2.28244686126709, "logits/rejected": -2.1591668128967285, "logps/chosen": -218.95614624023438, "logps/rejected": -375.42645263671875, "loss": 0.2096, "rewards/accuracies": 0.875, "rewards/chosen": -1.0893088579177856, "rewards/margins": 4.839071273803711, "rewards/rejected": -5.928380012512207, "step": 2863 }, { "epoch": 0.33, "learning_rate": 2.0309436636352897e-07, "logits/chosen": -2.4412362575531006, "logits/rejected": -2.451605796813965, "logps/chosen": -157.55386352539062, "logps/rejected": -170.01113891601562, "loss": 0.4649, "rewards/accuracies": 0.875, "rewards/chosen": -0.5210105180740356, "rewards/margins": 1.0127930641174316, "rewards/rejected": -1.5338035821914673, "step": 2864 }, { "epoch": 0.33, "learning_rate": 2.030589346876107e-07, "logits/chosen": -2.1625285148620605, "logits/rejected": -1.6983957290649414, "logps/chosen": -277.6005859375, "logps/rejected": -385.6598205566406, "loss": 0.656, "rewards/accuracies": 0.75, "rewards/chosen": -1.1222116947174072, "rewards/margins": 1.2989028692245483, "rewards/rejected": -2.421114683151245, "step": 2865 }, { "epoch": 0.33, "learning_rate": 2.0302350301169242e-07, "logits/chosen": -1.9634594917297363, "logits/rejected": -2.1003565788269043, "logps/chosen": -321.10919189453125, "logps/rejected": -198.8200225830078, "loss": 0.6641, "rewards/accuracies": 0.625, "rewards/chosen": -0.6732341647148132, "rewards/margins": 0.20739053189754486, "rewards/rejected": -0.8806246519088745, "step": 2866 }, { "epoch": 0.33, "learning_rate": 2.0298807133577417e-07, "logits/chosen": -2.4625420570373535, "logits/rejected": -2.706028461456299, "logps/chosen": -299.9356689453125, "logps/rejected": -322.8956298828125, "loss": 0.3407, "rewards/accuracies": 0.75, "rewards/chosen": -0.7983559966087341, "rewards/margins": 2.469330310821533, "rewards/rejected": -3.267686367034912, "step": 2867 }, { "epoch": 0.33, "learning_rate": 2.0295263965985592e-07, "logits/chosen": -2.326526641845703, "logits/rejected": -2.367274761199951, "logps/chosen": -111.6295166015625, "logps/rejected": -231.0255126953125, "loss": 0.3975, "rewards/accuracies": 0.875, "rewards/chosen": -1.2228307723999023, "rewards/margins": 1.820825219154358, "rewards/rejected": -3.0436558723449707, "step": 2868 }, { "epoch": 0.33, "learning_rate": 2.0291720798393764e-07, "logits/chosen": -2.430966377258301, "logits/rejected": -2.1374354362487793, "logps/chosen": -226.77301025390625, "logps/rejected": -285.22515869140625, "loss": 0.5139, "rewards/accuracies": 0.75, "rewards/chosen": -1.2108231782913208, "rewards/margins": 2.0325894355773926, "rewards/rejected": -3.243412971496582, "step": 2869 }, { "epoch": 0.33, "learning_rate": 2.0288177630801936e-07, "logits/chosen": -1.837465763092041, "logits/rejected": -2.1744298934936523, "logps/chosen": -383.08575439453125, "logps/rejected": -252.12258911132812, "loss": 0.3452, "rewards/accuracies": 0.75, "rewards/chosen": -0.5019859671592712, "rewards/margins": 1.609582781791687, "rewards/rejected": -2.1115686893463135, "step": 2870 }, { "epoch": 0.33, "learning_rate": 2.028463446321011e-07, "logits/chosen": -2.3352088928222656, "logits/rejected": -2.360283613204956, "logps/chosen": -201.75726318359375, "logps/rejected": -309.3847351074219, "loss": 0.4806, "rewards/accuracies": 0.625, "rewards/chosen": -1.734175205230713, "rewards/margins": 1.2834196090698242, "rewards/rejected": -3.017594814300537, "step": 2871 }, { "epoch": 0.33, "learning_rate": 2.0281091295618283e-07, "logits/chosen": -1.9165136814117432, "logits/rejected": -2.10412335395813, "logps/chosen": -372.65545654296875, "logps/rejected": -407.44317626953125, "loss": 0.2476, "rewards/accuracies": 0.875, "rewards/chosen": -0.9890196323394775, "rewards/margins": 2.9019620418548584, "rewards/rejected": -3.890981674194336, "step": 2872 }, { "epoch": 0.33, "learning_rate": 2.0277548128026455e-07, "logits/chosen": -2.246105194091797, "logits/rejected": -2.0619866847991943, "logps/chosen": -247.62705993652344, "logps/rejected": -339.67572021484375, "loss": 0.2168, "rewards/accuracies": 0.875, "rewards/chosen": -0.4305148124694824, "rewards/margins": 2.4015254974365234, "rewards/rejected": -2.832040309906006, "step": 2873 }, { "epoch": 0.33, "learning_rate": 2.0274004960434628e-07, "logits/chosen": -2.075566291809082, "logits/rejected": -2.5348029136657715, "logps/chosen": -299.35845947265625, "logps/rejected": -232.8660430908203, "loss": 0.3858, "rewards/accuracies": 0.875, "rewards/chosen": -0.4475215971469879, "rewards/margins": 2.4609344005584717, "rewards/rejected": -2.9084560871124268, "step": 2874 }, { "epoch": 0.33, "learning_rate": 2.02704617928428e-07, "logits/chosen": -2.7401082515716553, "logits/rejected": -2.295564651489258, "logps/chosen": -241.01943969726562, "logps/rejected": -370.2752380371094, "loss": 0.0822, "rewards/accuracies": 1.0, "rewards/chosen": -0.7928420901298523, "rewards/margins": 3.9431591033935547, "rewards/rejected": -4.736001014709473, "step": 2875 }, { "epoch": 0.33, "learning_rate": 2.0266918625250972e-07, "logits/chosen": -1.4570530652999878, "logits/rejected": -1.7534987926483154, "logps/chosen": -264.896240234375, "logps/rejected": -202.8114471435547, "loss": 0.8372, "rewards/accuracies": 0.5, "rewards/chosen": -1.6122134923934937, "rewards/margins": 1.0992285013198853, "rewards/rejected": -2.711441993713379, "step": 2876 }, { "epoch": 0.33, "learning_rate": 2.0263375457659144e-07, "logits/chosen": -2.8308329582214355, "logits/rejected": -2.811244010925293, "logps/chosen": -200.76812744140625, "logps/rejected": -228.46072387695312, "loss": 0.3615, "rewards/accuracies": 0.75, "rewards/chosen": -0.5914900302886963, "rewards/margins": 1.6404783725738525, "rewards/rejected": -2.231968402862549, "step": 2877 }, { "epoch": 0.33, "learning_rate": 2.025983229006732e-07, "logits/chosen": -2.3264904022216797, "logits/rejected": -2.418992519378662, "logps/chosen": -341.9829406738281, "logps/rejected": -294.7510681152344, "loss": 1.2145, "rewards/accuracies": 0.25, "rewards/chosen": -2.2792155742645264, "rewards/margins": -0.3553204834461212, "rewards/rejected": -1.923895001411438, "step": 2878 }, { "epoch": 0.33, "learning_rate": 2.025628912247549e-07, "logits/chosen": -2.5910556316375732, "logits/rejected": -2.6425607204437256, "logps/chosen": -427.37554931640625, "logps/rejected": -372.8592529296875, "loss": 0.1674, "rewards/accuracies": 0.875, "rewards/chosen": 0.1722436249256134, "rewards/margins": 2.4149796962738037, "rewards/rejected": -2.2427358627319336, "step": 2879 }, { "epoch": 0.34, "learning_rate": 2.0252745954883666e-07, "logits/chosen": -2.256223440170288, "logits/rejected": -1.9176206588745117, "logps/chosen": -281.580078125, "logps/rejected": -357.53619384765625, "loss": 0.3053, "rewards/accuracies": 0.875, "rewards/chosen": -0.24218136072158813, "rewards/margins": 2.945889472961426, "rewards/rejected": -3.188070774078369, "step": 2880 }, { "epoch": 0.34, "learning_rate": 2.0249202787291838e-07, "logits/chosen": -2.393981695175171, "logits/rejected": -2.6840949058532715, "logps/chosen": -617.1217041015625, "logps/rejected": -177.08956909179688, "loss": 0.7975, "rewards/accuracies": 0.875, "rewards/chosen": -1.5384984016418457, "rewards/margins": 1.6234798431396484, "rewards/rejected": -3.161978244781494, "step": 2881 }, { "epoch": 0.34, "learning_rate": 2.024565961970001e-07, "logits/chosen": -1.8459445238113403, "logits/rejected": -2.3582472801208496, "logps/chosen": -456.4781799316406, "logps/rejected": -187.4498291015625, "loss": 0.4169, "rewards/accuracies": 0.75, "rewards/chosen": -1.3145253658294678, "rewards/margins": 0.9809325337409973, "rewards/rejected": -2.2954578399658203, "step": 2882 }, { "epoch": 0.34, "learning_rate": 2.0242116452108185e-07, "logits/chosen": -2.0384066104888916, "logits/rejected": -2.3625717163085938, "logps/chosen": -217.86029052734375, "logps/rejected": -158.98114013671875, "loss": 1.1107, "rewards/accuracies": 0.375, "rewards/chosen": -1.6349494457244873, "rewards/margins": -0.5352143049240112, "rewards/rejected": -1.099735140800476, "step": 2883 }, { "epoch": 0.34, "learning_rate": 2.0238573284516358e-07, "logits/chosen": -2.0060477256774902, "logits/rejected": -2.1232669353485107, "logps/chosen": -206.22161865234375, "logps/rejected": -219.52394104003906, "loss": 0.2985, "rewards/accuracies": 0.875, "rewards/chosen": -1.124667763710022, "rewards/margins": 2.266490936279297, "rewards/rejected": -3.3911588191986084, "step": 2884 }, { "epoch": 0.34, "learning_rate": 2.023503011692453e-07, "logits/chosen": -1.9929039478302002, "logits/rejected": -2.0601019859313965, "logps/chosen": -403.5986328125, "logps/rejected": -498.83563232421875, "loss": 0.6154, "rewards/accuracies": 0.75, "rewards/chosen": -0.9665526151657104, "rewards/margins": 1.8448909521102905, "rewards/rejected": -2.811443328857422, "step": 2885 }, { "epoch": 0.34, "learning_rate": 2.0231486949332702e-07, "logits/chosen": -2.6422231197357178, "logits/rejected": -2.5143351554870605, "logps/chosen": -212.97222900390625, "logps/rejected": -285.6768493652344, "loss": 0.3461, "rewards/accuracies": 0.875, "rewards/chosen": -0.18182986974716187, "rewards/margins": 1.9619641304016113, "rewards/rejected": -2.143794059753418, "step": 2886 }, { "epoch": 0.34, "learning_rate": 2.0227943781740874e-07, "logits/chosen": -2.1844730377197266, "logits/rejected": -2.6466782093048096, "logps/chosen": -282.8886413574219, "logps/rejected": -215.51840209960938, "loss": 0.7105, "rewards/accuracies": 0.5, "rewards/chosen": -0.9629329442977905, "rewards/margins": 2.287843704223633, "rewards/rejected": -3.250776767730713, "step": 2887 }, { "epoch": 0.34, "learning_rate": 2.0224400614149046e-07, "logits/chosen": -2.2526354789733887, "logits/rejected": -2.3379318714141846, "logps/chosen": -263.161376953125, "logps/rejected": -217.009033203125, "loss": 0.7185, "rewards/accuracies": 0.625, "rewards/chosen": -1.2396337985992432, "rewards/margins": 0.9094007015228271, "rewards/rejected": -2.1490345001220703, "step": 2888 }, { "epoch": 0.34, "learning_rate": 2.022085744655722e-07, "logits/chosen": -2.1689679622650146, "logits/rejected": -2.242253065109253, "logps/chosen": -290.3797607421875, "logps/rejected": -266.25079345703125, "loss": 0.4546, "rewards/accuracies": 0.875, "rewards/chosen": -0.680088222026825, "rewards/margins": 1.7658886909484863, "rewards/rejected": -2.445976972579956, "step": 2889 }, { "epoch": 0.34, "learning_rate": 2.0217314278965394e-07, "logits/chosen": -1.8021471500396729, "logits/rejected": -1.776775598526001, "logps/chosen": -194.62965393066406, "logps/rejected": -318.5040588378906, "loss": 0.6408, "rewards/accuracies": 0.75, "rewards/chosen": -1.107653021812439, "rewards/margins": 1.904958963394165, "rewards/rejected": -3.0126121044158936, "step": 2890 }, { "epoch": 0.34, "learning_rate": 2.0213771111373568e-07, "logits/chosen": -1.9805021286010742, "logits/rejected": -1.8746891021728516, "logps/chosen": -252.8421630859375, "logps/rejected": -274.3184814453125, "loss": 0.1827, "rewards/accuracies": 1.0, "rewards/chosen": -0.8497920036315918, "rewards/margins": 1.9227381944656372, "rewards/rejected": -2.7725300788879395, "step": 2891 }, { "epoch": 0.34, "learning_rate": 2.021022794378174e-07, "logits/chosen": -2.0213210582733154, "logits/rejected": -1.8614336252212524, "logps/chosen": -296.9938659667969, "logps/rejected": -351.732421875, "loss": 0.7181, "rewards/accuracies": 0.625, "rewards/chosen": -1.0767945051193237, "rewards/margins": 0.8482465744018555, "rewards/rejected": -1.9250409603118896, "step": 2892 }, { "epoch": 0.34, "learning_rate": 2.0206684776189913e-07, "logits/chosen": -2.6949777603149414, "logits/rejected": -2.6188080310821533, "logps/chosen": -188.3943634033203, "logps/rejected": -182.68670654296875, "loss": 0.2588, "rewards/accuracies": 1.0, "rewards/chosen": -0.23387223482131958, "rewards/margins": 1.9993082284927368, "rewards/rejected": -2.233180522918701, "step": 2893 }, { "epoch": 0.34, "learning_rate": 2.0203141608598088e-07, "logits/chosen": -2.3466105461120605, "logits/rejected": -2.556361675262451, "logps/chosen": -115.06883239746094, "logps/rejected": -175.20767211914062, "loss": 0.2328, "rewards/accuracies": 0.875, "rewards/chosen": -0.49210256338119507, "rewards/margins": 2.1394600868225098, "rewards/rejected": -2.6315627098083496, "step": 2894 }, { "epoch": 0.34, "learning_rate": 2.019959844100626e-07, "logits/chosen": -2.686100959777832, "logits/rejected": -2.531803846359253, "logps/chosen": -104.9197998046875, "logps/rejected": -143.34988403320312, "loss": 0.5166, "rewards/accuracies": 0.75, "rewards/chosen": -1.0272703170776367, "rewards/margins": 0.6143026947975159, "rewards/rejected": -1.6415729522705078, "step": 2895 }, { "epoch": 0.34, "learning_rate": 2.0196055273414432e-07, "logits/chosen": -2.579378843307495, "logits/rejected": -2.589851140975952, "logps/chosen": -288.3555908203125, "logps/rejected": -211.2042694091797, "loss": 0.3076, "rewards/accuracies": 1.0, "rewards/chosen": -0.5928442478179932, "rewards/margins": 1.6573816537857056, "rewards/rejected": -2.2502260208129883, "step": 2896 }, { "epoch": 0.34, "learning_rate": 2.0192512105822604e-07, "logits/chosen": -2.2085208892822266, "logits/rejected": -2.3226630687713623, "logps/chosen": -173.33340454101562, "logps/rejected": -156.54083251953125, "loss": 0.446, "rewards/accuracies": 0.875, "rewards/chosen": -0.801984429359436, "rewards/margins": 1.124724268913269, "rewards/rejected": -1.926708698272705, "step": 2897 }, { "epoch": 0.34, "learning_rate": 2.0188968938230776e-07, "logits/chosen": -1.993521809577942, "logits/rejected": -1.9770526885986328, "logps/chosen": -437.001708984375, "logps/rejected": -418.07904052734375, "loss": 0.2164, "rewards/accuracies": 1.0, "rewards/chosen": -0.704768180847168, "rewards/margins": 2.854259967803955, "rewards/rejected": -3.559028148651123, "step": 2898 }, { "epoch": 0.34, "learning_rate": 2.018542577063895e-07, "logits/chosen": -2.6230273246765137, "logits/rejected": -2.598602294921875, "logps/chosen": -258.31561279296875, "logps/rejected": -341.5511779785156, "loss": 0.2956, "rewards/accuracies": 0.75, "rewards/chosen": -0.4791499674320221, "rewards/margins": 2.113140344619751, "rewards/rejected": -2.5922904014587402, "step": 2899 }, { "epoch": 0.34, "learning_rate": 2.0181882603047124e-07, "logits/chosen": -2.226626396179199, "logits/rejected": -2.5421509742736816, "logps/chosen": -349.17041015625, "logps/rejected": -221.76226806640625, "loss": 0.3253, "rewards/accuracies": 0.875, "rewards/chosen": -0.514342188835144, "rewards/margins": 1.2088603973388672, "rewards/rejected": -1.7232025861740112, "step": 2900 }, { "epoch": 0.34, "learning_rate": 2.0178339435455296e-07, "logits/chosen": -2.86385440826416, "logits/rejected": -2.7421200275421143, "logps/chosen": -247.175537109375, "logps/rejected": -198.66641235351562, "loss": 0.2221, "rewards/accuracies": 0.875, "rewards/chosen": -1.3531253337860107, "rewards/margins": 2.4901657104492188, "rewards/rejected": -3.8432910442352295, "step": 2901 }, { "epoch": 0.34, "learning_rate": 2.0174796267863468e-07, "logits/chosen": -2.6131203174591064, "logits/rejected": -2.6613430976867676, "logps/chosen": -119.65361785888672, "logps/rejected": -235.96148681640625, "loss": 0.1199, "rewards/accuracies": 1.0, "rewards/chosen": -0.23277662694454193, "rewards/margins": 3.518507480621338, "rewards/rejected": -3.751284122467041, "step": 2902 }, { "epoch": 0.34, "learning_rate": 2.0171253100271643e-07, "logits/chosen": -1.866573691368103, "logits/rejected": -2.312160015106201, "logps/chosen": -359.2745361328125, "logps/rejected": -280.340576171875, "loss": 0.488, "rewards/accuracies": 0.625, "rewards/chosen": -0.5938076376914978, "rewards/margins": 0.7951335310935974, "rewards/rejected": -1.3889411687850952, "step": 2903 }, { "epoch": 0.34, "learning_rate": 2.0167709932679815e-07, "logits/chosen": -2.5010504722595215, "logits/rejected": -2.3570709228515625, "logps/chosen": -308.05621337890625, "logps/rejected": -354.69866943359375, "loss": 0.2587, "rewards/accuracies": 0.875, "rewards/chosen": -0.6857733726501465, "rewards/margins": 3.434004306793213, "rewards/rejected": -4.119777679443359, "step": 2904 }, { "epoch": 0.34, "learning_rate": 2.016416676508799e-07, "logits/chosen": -2.238051652908325, "logits/rejected": -2.1889498233795166, "logps/chosen": -442.6329040527344, "logps/rejected": -322.31597900390625, "loss": 0.3949, "rewards/accuracies": 0.75, "rewards/chosen": -0.7156566977500916, "rewards/margins": 1.6037371158599854, "rewards/rejected": -2.3193938732147217, "step": 2905 }, { "epoch": 0.34, "learning_rate": 2.0160623597496162e-07, "logits/chosen": -1.8123810291290283, "logits/rejected": -1.8424222469329834, "logps/chosen": -346.43878173828125, "logps/rejected": -297.72308349609375, "loss": 0.0695, "rewards/accuracies": 1.0, "rewards/chosen": -0.5600947737693787, "rewards/margins": 3.2305641174316406, "rewards/rejected": -3.790658712387085, "step": 2906 }, { "epoch": 0.34, "learning_rate": 2.0157080429904334e-07, "logits/chosen": -2.413424015045166, "logits/rejected": -2.764202833175659, "logps/chosen": -294.4854736328125, "logps/rejected": -249.68624877929688, "loss": 0.1375, "rewards/accuracies": 1.0, "rewards/chosen": -0.2591441571712494, "rewards/margins": 3.248051166534424, "rewards/rejected": -3.507195472717285, "step": 2907 }, { "epoch": 0.34, "learning_rate": 2.0153537262312507e-07, "logits/chosen": -2.763993978500366, "logits/rejected": -2.573305368423462, "logps/chosen": -98.67894744873047, "logps/rejected": -141.6231231689453, "loss": 0.3705, "rewards/accuracies": 0.875, "rewards/chosen": -1.029970407485962, "rewards/margins": 2.1616175174713135, "rewards/rejected": -3.1915879249572754, "step": 2908 }, { "epoch": 0.34, "learning_rate": 2.014999409472068e-07, "logits/chosen": -1.715468168258667, "logits/rejected": -1.9786796569824219, "logps/chosen": -553.8857421875, "logps/rejected": -351.9848327636719, "loss": 0.2811, "rewards/accuracies": 0.875, "rewards/chosen": -0.21551834046840668, "rewards/margins": 2.5704092979431152, "rewards/rejected": -2.7859277725219727, "step": 2909 }, { "epoch": 0.34, "learning_rate": 2.014645092712885e-07, "logits/chosen": -2.671719551086426, "logits/rejected": -2.6838431358337402, "logps/chosen": -339.44317626953125, "logps/rejected": -447.43310546875, "loss": 0.7095, "rewards/accuracies": 0.625, "rewards/chosen": -1.504263162612915, "rewards/margins": 2.1330599784851074, "rewards/rejected": -3.6373231410980225, "step": 2910 }, { "epoch": 0.34, "learning_rate": 2.0142907759537023e-07, "logits/chosen": -2.1355679035186768, "logits/rejected": -2.1122989654541016, "logps/chosen": -379.2438049316406, "logps/rejected": -341.314208984375, "loss": 0.5223, "rewards/accuracies": 0.75, "rewards/chosen": -1.8497755527496338, "rewards/margins": 1.79927396774292, "rewards/rejected": -3.6490495204925537, "step": 2911 }, { "epoch": 0.34, "learning_rate": 2.0139364591945198e-07, "logits/chosen": -2.6578526496887207, "logits/rejected": -2.619096040725708, "logps/chosen": -170.84283447265625, "logps/rejected": -255.70046997070312, "loss": 0.685, "rewards/accuracies": 0.75, "rewards/chosen": -1.555381178855896, "rewards/margins": 3.028592109680176, "rewards/rejected": -4.583973407745361, "step": 2912 }, { "epoch": 0.34, "learning_rate": 2.013582142435337e-07, "logits/chosen": -2.1168463230133057, "logits/rejected": -2.243605613708496, "logps/chosen": -367.1270751953125, "logps/rejected": -220.95384216308594, "loss": 0.596, "rewards/accuracies": 0.5, "rewards/chosen": -1.028884768486023, "rewards/margins": 1.0163989067077637, "rewards/rejected": -2.045283794403076, "step": 2913 }, { "epoch": 0.34, "learning_rate": 2.0132278256761542e-07, "logits/chosen": -2.322432041168213, "logits/rejected": -2.6980199813842773, "logps/chosen": -235.40992736816406, "logps/rejected": -144.21420288085938, "loss": 0.7743, "rewards/accuracies": 0.75, "rewards/chosen": -1.5636762380599976, "rewards/margins": 0.9845834970474243, "rewards/rejected": -2.548259973526001, "step": 2914 }, { "epoch": 0.34, "learning_rate": 2.0128735089169717e-07, "logits/chosen": -2.9559946060180664, "logits/rejected": -2.9896111488342285, "logps/chosen": -194.0387725830078, "logps/rejected": -194.22564697265625, "loss": 0.3681, "rewards/accuracies": 0.75, "rewards/chosen": -0.4741092920303345, "rewards/margins": 2.248908519744873, "rewards/rejected": -2.723017930984497, "step": 2915 }, { "epoch": 0.34, "learning_rate": 2.0125191921577892e-07, "logits/chosen": -2.7808947563171387, "logits/rejected": -2.914289951324463, "logps/chosen": -108.57732391357422, "logps/rejected": -297.12030029296875, "loss": 0.4967, "rewards/accuracies": 0.625, "rewards/chosen": -0.6685969829559326, "rewards/margins": 1.3955144882202148, "rewards/rejected": -2.0641114711761475, "step": 2916 }, { "epoch": 0.34, "learning_rate": 2.0121648753986064e-07, "logits/chosen": -2.30912184715271, "logits/rejected": -2.3399477005004883, "logps/chosen": -272.7632141113281, "logps/rejected": -271.8263854980469, "loss": 0.5932, "rewards/accuracies": 0.625, "rewards/chosen": -0.5316908955574036, "rewards/margins": 1.1266906261444092, "rewards/rejected": -1.658381462097168, "step": 2917 }, { "epoch": 0.34, "learning_rate": 2.0118105586394237e-07, "logits/chosen": -2.4315483570098877, "logits/rejected": -2.2851662635803223, "logps/chosen": -182.47215270996094, "logps/rejected": -257.54913330078125, "loss": 0.336, "rewards/accuracies": 0.875, "rewards/chosen": -0.5161490440368652, "rewards/margins": 1.561649203300476, "rewards/rejected": -2.077798366546631, "step": 2918 }, { "epoch": 0.34, "learning_rate": 2.011456241880241e-07, "logits/chosen": -2.2006661891937256, "logits/rejected": -2.1646039485931396, "logps/chosen": -434.67877197265625, "logps/rejected": -341.927978515625, "loss": 0.9661, "rewards/accuracies": 0.75, "rewards/chosen": -1.740044116973877, "rewards/margins": 0.9853518605232239, "rewards/rejected": -2.725396156311035, "step": 2919 }, { "epoch": 0.34, "learning_rate": 2.011101925121058e-07, "logits/chosen": -1.6322543621063232, "logits/rejected": -1.8881782293319702, "logps/chosen": -292.7653503417969, "logps/rejected": -219.0809326171875, "loss": 0.6202, "rewards/accuracies": 0.875, "rewards/chosen": -0.6112018823623657, "rewards/margins": 0.7212937474250793, "rewards/rejected": -1.3324956893920898, "step": 2920 }, { "epoch": 0.34, "learning_rate": 2.0107476083618753e-07, "logits/chosen": -2.7338504791259766, "logits/rejected": -2.4025659561157227, "logps/chosen": -177.26974487304688, "logps/rejected": -221.90487670898438, "loss": 0.272, "rewards/accuracies": 1.0, "rewards/chosen": -0.1996528059244156, "rewards/margins": 1.690220832824707, "rewards/rejected": -1.8898735046386719, "step": 2921 }, { "epoch": 0.34, "learning_rate": 2.0103932916026925e-07, "logits/chosen": -2.3635764122009277, "logits/rejected": -2.472235918045044, "logps/chosen": -213.44801330566406, "logps/rejected": -218.5472869873047, "loss": 0.5757, "rewards/accuracies": 0.625, "rewards/chosen": -1.4223453998565674, "rewards/margins": 0.8540315628051758, "rewards/rejected": -2.2763772010803223, "step": 2922 }, { "epoch": 0.34, "learning_rate": 2.01003897484351e-07, "logits/chosen": -2.14583158493042, "logits/rejected": -2.2695555686950684, "logps/chosen": -135.1510467529297, "logps/rejected": -154.6786651611328, "loss": 0.1339, "rewards/accuracies": 1.0, "rewards/chosen": -0.7779644131660461, "rewards/margins": 2.2833235263824463, "rewards/rejected": -3.0612878799438477, "step": 2923 }, { "epoch": 0.34, "learning_rate": 2.0096846580843273e-07, "logits/chosen": -2.3814899921417236, "logits/rejected": -2.5100526809692383, "logps/chosen": -195.1119384765625, "logps/rejected": -177.22975158691406, "loss": 0.2506, "rewards/accuracies": 0.875, "rewards/chosen": -0.22154103219509125, "rewards/margins": 2.2221519947052, "rewards/rejected": -2.443693161010742, "step": 2924 }, { "epoch": 0.34, "learning_rate": 2.0093303413251445e-07, "logits/chosen": -2.037078857421875, "logits/rejected": -2.2519540786743164, "logps/chosen": -361.8112487792969, "logps/rejected": -324.4891357421875, "loss": 0.3512, "rewards/accuracies": 0.875, "rewards/chosen": -0.2261950969696045, "rewards/margins": 1.9505302906036377, "rewards/rejected": -2.176725387573242, "step": 2925 }, { "epoch": 0.34, "learning_rate": 2.0089760245659617e-07, "logits/chosen": -2.271433115005493, "logits/rejected": -2.143216133117676, "logps/chosen": -140.87429809570312, "logps/rejected": -243.0975799560547, "loss": 0.9611, "rewards/accuracies": 0.75, "rewards/chosen": -1.7680010795593262, "rewards/margins": 0.9985706210136414, "rewards/rejected": -2.766571521759033, "step": 2926 }, { "epoch": 0.34, "learning_rate": 2.0086217078067792e-07, "logits/chosen": -2.230926275253296, "logits/rejected": -2.198648452758789, "logps/chosen": -176.80764770507812, "logps/rejected": -225.64303588867188, "loss": 0.3351, "rewards/accuracies": 0.75, "rewards/chosen": -1.251363754272461, "rewards/margins": 2.4190735816955566, "rewards/rejected": -3.6704373359680176, "step": 2927 }, { "epoch": 0.34, "learning_rate": 2.0082673910475967e-07, "logits/chosen": -2.1258020401000977, "logits/rejected": -1.8229312896728516, "logps/chosen": -202.33834838867188, "logps/rejected": -347.025146484375, "loss": 0.5939, "rewards/accuracies": 0.625, "rewards/chosen": -1.6505422592163086, "rewards/margins": 2.076451063156128, "rewards/rejected": -3.7269930839538574, "step": 2928 }, { "epoch": 0.34, "learning_rate": 2.007913074288414e-07, "logits/chosen": -2.3995542526245117, "logits/rejected": -2.0580923557281494, "logps/chosen": -135.10195922851562, "logps/rejected": -277.869873046875, "loss": 0.3794, "rewards/accuracies": 0.875, "rewards/chosen": -0.5674827098846436, "rewards/margins": 2.2271230220794678, "rewards/rejected": -2.7946054935455322, "step": 2929 }, { "epoch": 0.34, "learning_rate": 2.007558757529231e-07, "logits/chosen": -2.7765636444091797, "logits/rejected": -2.593801975250244, "logps/chosen": -235.21005249023438, "logps/rejected": -215.4985809326172, "loss": 0.2078, "rewards/accuracies": 0.875, "rewards/chosen": -0.1597038209438324, "rewards/margins": 2.3615593910217285, "rewards/rejected": -2.521263360977173, "step": 2930 }, { "epoch": 0.34, "learning_rate": 2.0072044407700483e-07, "logits/chosen": -2.7641549110412598, "logits/rejected": -2.7341067790985107, "logps/chosen": -358.2259521484375, "logps/rejected": -305.59783935546875, "loss": 0.6213, "rewards/accuracies": 0.75, "rewards/chosen": -0.9237785339355469, "rewards/margins": 1.4694404602050781, "rewards/rejected": -2.393218994140625, "step": 2931 }, { "epoch": 0.34, "learning_rate": 2.0068501240108656e-07, "logits/chosen": -2.1385064125061035, "logits/rejected": -2.5001134872436523, "logps/chosen": -262.65655517578125, "logps/rejected": -193.37716674804688, "loss": 0.5703, "rewards/accuracies": 0.5, "rewards/chosen": -0.5864198207855225, "rewards/margins": 0.9017243385314941, "rewards/rejected": -1.4881441593170166, "step": 2932 }, { "epoch": 0.34, "learning_rate": 2.0064958072516828e-07, "logits/chosen": -1.7771201133728027, "logits/rejected": -1.6606181859970093, "logps/chosen": -401.1876525878906, "logps/rejected": -445.3562927246094, "loss": 0.4001, "rewards/accuracies": 0.75, "rewards/chosen": -0.4578157663345337, "rewards/margins": 2.6788392066955566, "rewards/rejected": -3.136654853820801, "step": 2933 }, { "epoch": 0.34, "learning_rate": 2.0061414904925003e-07, "logits/chosen": -1.9131063222885132, "logits/rejected": -1.6544075012207031, "logps/chosen": -344.4603271484375, "logps/rejected": -343.1708984375, "loss": 0.155, "rewards/accuracies": 1.0, "rewards/chosen": 0.08929747343063354, "rewards/margins": 2.488907814025879, "rewards/rejected": -2.3996105194091797, "step": 2934 }, { "epoch": 0.34, "learning_rate": 2.0057871737333175e-07, "logits/chosen": -2.7798643112182617, "logits/rejected": -2.7114267349243164, "logps/chosen": -185.43434143066406, "logps/rejected": -160.83795166015625, "loss": 0.0914, "rewards/accuracies": 1.0, "rewards/chosen": -0.5058056116104126, "rewards/margins": 2.873469352722168, "rewards/rejected": -3.379274845123291, "step": 2935 }, { "epoch": 0.34, "learning_rate": 2.0054328569741347e-07, "logits/chosen": -1.8365588188171387, "logits/rejected": -1.977575421333313, "logps/chosen": -504.61358642578125, "logps/rejected": -423.5311279296875, "loss": 0.4108, "rewards/accuracies": 0.75, "rewards/chosen": -0.5996456742286682, "rewards/margins": 1.2731050252914429, "rewards/rejected": -1.8727507591247559, "step": 2936 }, { "epoch": 0.34, "learning_rate": 2.005078540214952e-07, "logits/chosen": -2.2681212425231934, "logits/rejected": -2.564570426940918, "logps/chosen": -298.9365234375, "logps/rejected": -216.32440185546875, "loss": 0.4535, "rewards/accuracies": 0.75, "rewards/chosen": -0.857894241809845, "rewards/margins": 0.7612103223800659, "rewards/rejected": -1.6191045045852661, "step": 2937 }, { "epoch": 0.34, "learning_rate": 2.0047242234557694e-07, "logits/chosen": -2.101719379425049, "logits/rejected": -2.5418734550476074, "logps/chosen": -510.6044921875, "logps/rejected": -247.27154541015625, "loss": 0.224, "rewards/accuracies": 0.875, "rewards/chosen": -0.6818119287490845, "rewards/margins": 2.496598243713379, "rewards/rejected": -3.178410530090332, "step": 2938 }, { "epoch": 0.34, "learning_rate": 2.004369906696587e-07, "logits/chosen": -2.2151851654052734, "logits/rejected": -2.372112989425659, "logps/chosen": -379.4731750488281, "logps/rejected": -304.09332275390625, "loss": 0.2016, "rewards/accuracies": 1.0, "rewards/chosen": -0.45288822054862976, "rewards/margins": 2.0562357902526855, "rewards/rejected": -2.5091238021850586, "step": 2939 }, { "epoch": 0.34, "learning_rate": 2.004015589937404e-07, "logits/chosen": -2.6408915519714355, "logits/rejected": -2.4832851886749268, "logps/chosen": -209.3400421142578, "logps/rejected": -239.52781677246094, "loss": 0.3537, "rewards/accuracies": 0.875, "rewards/chosen": -0.10983119904994965, "rewards/margins": 2.682030439376831, "rewards/rejected": -2.7918617725372314, "step": 2940 }, { "epoch": 0.34, "learning_rate": 2.0036612731782213e-07, "logits/chosen": -2.320194721221924, "logits/rejected": -2.442559242248535, "logps/chosen": -301.04241943359375, "logps/rejected": -242.84500122070312, "loss": 0.2474, "rewards/accuracies": 0.875, "rewards/chosen": -0.4987078309059143, "rewards/margins": 1.8824102878570557, "rewards/rejected": -2.381118059158325, "step": 2941 }, { "epoch": 0.34, "learning_rate": 2.0033069564190386e-07, "logits/chosen": -2.288820743560791, "logits/rejected": -2.0482044219970703, "logps/chosen": -232.35494995117188, "logps/rejected": -275.17254638671875, "loss": 0.1896, "rewards/accuracies": 1.0, "rewards/chosen": -0.962920606136322, "rewards/margins": 3.1037437915802, "rewards/rejected": -4.066664218902588, "step": 2942 }, { "epoch": 0.34, "learning_rate": 2.0029526396598558e-07, "logits/chosen": -1.9667783975601196, "logits/rejected": -1.9268335103988647, "logps/chosen": -437.59912109375, "logps/rejected": -294.917236328125, "loss": 0.5138, "rewards/accuracies": 0.75, "rewards/chosen": -0.7919634580612183, "rewards/margins": 1.086709976196289, "rewards/rejected": -1.8786734342575073, "step": 2943 }, { "epoch": 0.34, "learning_rate": 2.002598322900673e-07, "logits/chosen": -2.3284685611724854, "logits/rejected": -2.206052780151367, "logps/chosen": -277.4950866699219, "logps/rejected": -312.2777099609375, "loss": 0.1508, "rewards/accuracies": 1.0, "rewards/chosen": -0.789352297782898, "rewards/margins": 2.4773261547088623, "rewards/rejected": -3.26667857170105, "step": 2944 }, { "epoch": 0.34, "learning_rate": 2.0022440061414905e-07, "logits/chosen": -2.353238582611084, "logits/rejected": -2.2482492923736572, "logps/chosen": -333.7976989746094, "logps/rejected": -321.90814208984375, "loss": 0.4478, "rewards/accuracies": 0.75, "rewards/chosen": -1.0109210014343262, "rewards/margins": 0.8346867561340332, "rewards/rejected": -1.8456077575683594, "step": 2945 }, { "epoch": 0.34, "learning_rate": 2.0018896893823077e-07, "logits/chosen": -2.375850200653076, "logits/rejected": -2.362276554107666, "logps/chosen": -344.5463562011719, "logps/rejected": -316.58587646484375, "loss": 0.3865, "rewards/accuracies": 0.875, "rewards/chosen": -1.4915316104888916, "rewards/margins": 1.0389384031295776, "rewards/rejected": -2.530470132827759, "step": 2946 }, { "epoch": 0.34, "learning_rate": 2.001535372623125e-07, "logits/chosen": -2.4104995727539062, "logits/rejected": -2.292480945587158, "logps/chosen": -226.97662353515625, "logps/rejected": -293.36663818359375, "loss": 0.8654, "rewards/accuracies": 0.5, "rewards/chosen": -1.2003618478775024, "rewards/margins": 0.32490360736846924, "rewards/rejected": -1.5252654552459717, "step": 2947 }, { "epoch": 0.34, "learning_rate": 2.0011810558639422e-07, "logits/chosen": -2.283973217010498, "logits/rejected": -2.1675024032592773, "logps/chosen": -236.4239501953125, "logps/rejected": -301.725341796875, "loss": 0.5205, "rewards/accuracies": 0.75, "rewards/chosen": -0.6969221830368042, "rewards/margins": 1.0932577848434448, "rewards/rejected": -1.790179967880249, "step": 2948 }, { "epoch": 0.34, "learning_rate": 2.0008267391047594e-07, "logits/chosen": -2.4770517349243164, "logits/rejected": -2.0775909423828125, "logps/chosen": -217.0772247314453, "logps/rejected": -337.2941589355469, "loss": 0.6725, "rewards/accuracies": 0.625, "rewards/chosen": -1.282008171081543, "rewards/margins": 0.7198567390441895, "rewards/rejected": -2.0018649101257324, "step": 2949 }, { "epoch": 0.34, "learning_rate": 2.000472422345577e-07, "logits/chosen": -2.556140422821045, "logits/rejected": -2.5009443759918213, "logps/chosen": -173.9896697998047, "logps/rejected": -282.7094421386719, "loss": 0.3951, "rewards/accuracies": 0.875, "rewards/chosen": -1.0036544799804688, "rewards/margins": 1.163169503211975, "rewards/rejected": -2.1668238639831543, "step": 2950 }, { "epoch": 0.34, "learning_rate": 2.0001181055863943e-07, "logits/chosen": -2.332852363586426, "logits/rejected": -2.428933620452881, "logps/chosen": -424.38568115234375, "logps/rejected": -472.03759765625, "loss": 0.2362, "rewards/accuracies": 0.875, "rewards/chosen": -0.6782013177871704, "rewards/margins": 3.6055824756622314, "rewards/rejected": -4.283783435821533, "step": 2951 }, { "epoch": 0.34, "learning_rate": 1.9997637888272116e-07, "logits/chosen": -2.506739616394043, "logits/rejected": -2.450920820236206, "logps/chosen": -264.32586669921875, "logps/rejected": -227.83897399902344, "loss": 0.3261, "rewards/accuracies": 0.75, "rewards/chosen": -0.7826262712478638, "rewards/margins": 2.484111785888672, "rewards/rejected": -3.266737937927246, "step": 2952 }, { "epoch": 0.34, "learning_rate": 1.9994094720680288e-07, "logits/chosen": -2.051487922668457, "logits/rejected": -2.0303730964660645, "logps/chosen": -291.3682861328125, "logps/rejected": -314.61407470703125, "loss": 0.4538, "rewards/accuracies": 0.625, "rewards/chosen": -0.7440359592437744, "rewards/margins": 2.156759262084961, "rewards/rejected": -2.9007952213287354, "step": 2953 }, { "epoch": 0.34, "learning_rate": 1.999055155308846e-07, "logits/chosen": -2.425264596939087, "logits/rejected": -2.3210527896881104, "logps/chosen": -269.73736572265625, "logps/rejected": -356.1817626953125, "loss": 0.3489, "rewards/accuracies": 0.875, "rewards/chosen": -0.511172354221344, "rewards/margins": 1.8551009893417358, "rewards/rejected": -2.3662734031677246, "step": 2954 }, { "epoch": 0.34, "learning_rate": 1.9987008385496632e-07, "logits/chosen": -2.1973328590393066, "logits/rejected": -2.434110164642334, "logps/chosen": -263.4280700683594, "logps/rejected": -203.09645080566406, "loss": 0.4527, "rewards/accuracies": 0.875, "rewards/chosen": -0.8146154880523682, "rewards/margins": 0.8889662027359009, "rewards/rejected": -1.703581690788269, "step": 2955 }, { "epoch": 0.34, "learning_rate": 1.9983465217904805e-07, "logits/chosen": -2.2124061584472656, "logits/rejected": -2.2912731170654297, "logps/chosen": -259.53277587890625, "logps/rejected": -230.1304473876953, "loss": 0.2291, "rewards/accuracies": 1.0, "rewards/chosen": -0.5238584280014038, "rewards/margins": 2.341963768005371, "rewards/rejected": -2.8658220767974854, "step": 2956 }, { "epoch": 0.34, "learning_rate": 1.997992205031298e-07, "logits/chosen": -2.4036221504211426, "logits/rejected": -2.5151474475860596, "logps/chosen": -422.882568359375, "logps/rejected": -258.9435729980469, "loss": 0.9013, "rewards/accuracies": 0.375, "rewards/chosen": -1.4087369441986084, "rewards/margins": 1.2083147764205933, "rewards/rejected": -2.617051839828491, "step": 2957 }, { "epoch": 0.34, "learning_rate": 1.9976378882721152e-07, "logits/chosen": -2.221648693084717, "logits/rejected": -2.2349069118499756, "logps/chosen": -322.3822021484375, "logps/rejected": -365.86627197265625, "loss": 0.1644, "rewards/accuracies": 1.0, "rewards/chosen": -0.25471287965774536, "rewards/margins": 4.34344482421875, "rewards/rejected": -4.59815788269043, "step": 2958 }, { "epoch": 0.34, "learning_rate": 1.9972835715129324e-07, "logits/chosen": -1.7634639739990234, "logits/rejected": -2.3269524574279785, "logps/chosen": -409.5461730957031, "logps/rejected": -182.36782836914062, "loss": 1.1843, "rewards/accuracies": 0.375, "rewards/chosen": -1.8923242092132568, "rewards/margins": -0.4478663504123688, "rewards/rejected": -1.4444578886032104, "step": 2959 }, { "epoch": 0.34, "learning_rate": 1.9969292547537496e-07, "logits/chosen": -1.9270144701004028, "logits/rejected": -1.9363837242126465, "logps/chosen": -251.80111694335938, "logps/rejected": -229.285888671875, "loss": 0.3353, "rewards/accuracies": 0.875, "rewards/chosen": -0.795459508895874, "rewards/margins": 2.083198070526123, "rewards/rejected": -2.878657341003418, "step": 2960 }, { "epoch": 0.34, "learning_rate": 1.9965749379945668e-07, "logits/chosen": -3.1551668643951416, "logits/rejected": -2.959796190261841, "logps/chosen": -365.0365905761719, "logps/rejected": -304.3221435546875, "loss": 0.6154, "rewards/accuracies": 0.75, "rewards/chosen": -0.9896816611289978, "rewards/margins": 0.9685075283050537, "rewards/rejected": -1.9581892490386963, "step": 2961 }, { "epoch": 0.34, "learning_rate": 1.9962206212353846e-07, "logits/chosen": -1.998806118965149, "logits/rejected": -1.9266431331634521, "logps/chosen": -198.1507110595703, "logps/rejected": -261.8472900390625, "loss": 0.5192, "rewards/accuracies": 0.75, "rewards/chosen": -0.4290041923522949, "rewards/margins": 1.997981071472168, "rewards/rejected": -2.426985502243042, "step": 2962 }, { "epoch": 0.34, "learning_rate": 1.9958663044762018e-07, "logits/chosen": -1.930488109588623, "logits/rejected": -2.0079345703125, "logps/chosen": -333.8968505859375, "logps/rejected": -328.39569091796875, "loss": 0.9569, "rewards/accuracies": 0.375, "rewards/chosen": -1.3199591636657715, "rewards/margins": 0.017900601029396057, "rewards/rejected": -1.3378597497940063, "step": 2963 }, { "epoch": 0.34, "learning_rate": 1.995511987717019e-07, "logits/chosen": -1.8392246961593628, "logits/rejected": -2.2603702545166016, "logps/chosen": -282.7964172363281, "logps/rejected": -164.8245086669922, "loss": 0.2697, "rewards/accuracies": 0.875, "rewards/chosen": -0.16705051064491272, "rewards/margins": 2.112429618835449, "rewards/rejected": -2.279480457305908, "step": 2964 }, { "epoch": 0.34, "learning_rate": 1.9951576709578362e-07, "logits/chosen": -1.9577205181121826, "logits/rejected": -1.6914575099945068, "logps/chosen": -113.56126403808594, "logps/rejected": -139.3248748779297, "loss": 0.5022, "rewards/accuracies": 0.625, "rewards/chosen": -0.21151790022850037, "rewards/margins": 0.7962382435798645, "rewards/rejected": -1.007756233215332, "step": 2965 }, { "epoch": 0.35, "learning_rate": 1.9948033541986535e-07, "logits/chosen": -2.312349319458008, "logits/rejected": -2.349256992340088, "logps/chosen": -247.89352416992188, "logps/rejected": -332.27764892578125, "loss": 0.4856, "rewards/accuracies": 0.5, "rewards/chosen": -0.57518470287323, "rewards/margins": 1.1079691648483276, "rewards/rejected": -1.6831538677215576, "step": 2966 }, { "epoch": 0.35, "learning_rate": 1.9944490374394707e-07, "logits/chosen": -2.3599276542663574, "logits/rejected": -2.5052852630615234, "logps/chosen": -306.83685302734375, "logps/rejected": -230.5961151123047, "loss": 0.3585, "rewards/accuracies": 0.75, "rewards/chosen": -0.8077543377876282, "rewards/margins": 1.8203766345977783, "rewards/rejected": -2.6281309127807617, "step": 2967 }, { "epoch": 0.35, "learning_rate": 1.9940947206802882e-07, "logits/chosen": -2.6003191471099854, "logits/rejected": -2.269643783569336, "logps/chosen": -183.5027313232422, "logps/rejected": -316.7831726074219, "loss": 0.2899, "rewards/accuracies": 0.875, "rewards/chosen": -1.4048264026641846, "rewards/margins": 2.2455625534057617, "rewards/rejected": -3.6503889560699463, "step": 2968 }, { "epoch": 0.35, "learning_rate": 1.9937404039211054e-07, "logits/chosen": -2.4002928733825684, "logits/rejected": -2.388449192047119, "logps/chosen": -149.33644104003906, "logps/rejected": -318.544677734375, "loss": 0.4518, "rewards/accuracies": 0.875, "rewards/chosen": -0.7640703320503235, "rewards/margins": 2.808077573776245, "rewards/rejected": -3.572147846221924, "step": 2969 }, { "epoch": 0.35, "learning_rate": 1.9933860871619226e-07, "logits/chosen": -2.581054210662842, "logits/rejected": -2.480968475341797, "logps/chosen": -262.0176696777344, "logps/rejected": -350.72088623046875, "loss": 0.4046, "rewards/accuracies": 0.75, "rewards/chosen": -0.5113088488578796, "rewards/margins": 1.4705942869186401, "rewards/rejected": -1.981903314590454, "step": 2970 }, { "epoch": 0.35, "learning_rate": 1.9930317704027398e-07, "logits/chosen": -2.44429349899292, "logits/rejected": -2.1890335083007812, "logps/chosen": -192.13336181640625, "logps/rejected": -292.06060791015625, "loss": 0.3554, "rewards/accuracies": 0.75, "rewards/chosen": -1.1395573616027832, "rewards/margins": 2.0346007347106934, "rewards/rejected": -3.1741578578948975, "step": 2971 }, { "epoch": 0.35, "learning_rate": 1.992677453643557e-07, "logits/chosen": -2.1442832946777344, "logits/rejected": -2.402881383895874, "logps/chosen": -211.551025390625, "logps/rejected": -234.384033203125, "loss": 0.5705, "rewards/accuracies": 0.75, "rewards/chosen": -0.8243160247802734, "rewards/margins": 3.032723903656006, "rewards/rejected": -3.8570399284362793, "step": 2972 }, { "epoch": 0.35, "learning_rate": 1.9923231368843748e-07, "logits/chosen": -2.887645721435547, "logits/rejected": -2.9233834743499756, "logps/chosen": -242.0677490234375, "logps/rejected": -214.32501220703125, "loss": 0.3434, "rewards/accuracies": 0.875, "rewards/chosen": -0.8676060438156128, "rewards/margins": 2.1376421451568604, "rewards/rejected": -3.0052480697631836, "step": 2973 }, { "epoch": 0.35, "learning_rate": 1.991968820125192e-07, "logits/chosen": -2.344966411590576, "logits/rejected": -2.4604201316833496, "logps/chosen": -235.11480712890625, "logps/rejected": -355.4427795410156, "loss": 0.2285, "rewards/accuracies": 0.875, "rewards/chosen": -1.1107521057128906, "rewards/margins": 4.45008659362793, "rewards/rejected": -5.56083869934082, "step": 2974 }, { "epoch": 0.35, "learning_rate": 1.9916145033660092e-07, "logits/chosen": -2.7818405628204346, "logits/rejected": -2.8401758670806885, "logps/chosen": -139.1961212158203, "logps/rejected": -249.43685913085938, "loss": 0.3002, "rewards/accuracies": 0.875, "rewards/chosen": -0.6239289045333862, "rewards/margins": 3.0030181407928467, "rewards/rejected": -3.6269471645355225, "step": 2975 }, { "epoch": 0.35, "learning_rate": 1.9912601866068265e-07, "logits/chosen": -2.375119686126709, "logits/rejected": -2.4892711639404297, "logps/chosen": -237.21255493164062, "logps/rejected": -246.2691650390625, "loss": 0.179, "rewards/accuracies": 1.0, "rewards/chosen": -0.83076012134552, "rewards/margins": 2.461059331893921, "rewards/rejected": -3.2918195724487305, "step": 2976 }, { "epoch": 0.35, "learning_rate": 1.9909058698476437e-07, "logits/chosen": -1.9953749179840088, "logits/rejected": -1.4665166139602661, "logps/chosen": -271.1525573730469, "logps/rejected": -423.68304443359375, "loss": 0.2447, "rewards/accuracies": 0.875, "rewards/chosen": -0.7756668925285339, "rewards/margins": 3.527169942855835, "rewards/rejected": -4.302836894989014, "step": 2977 }, { "epoch": 0.35, "learning_rate": 1.990551553088461e-07, "logits/chosen": -2.419403076171875, "logits/rejected": -2.393122911453247, "logps/chosen": -389.3238525390625, "logps/rejected": -435.58233642578125, "loss": 0.4571, "rewards/accuracies": 0.625, "rewards/chosen": -0.9680401682853699, "rewards/margins": 1.2539844512939453, "rewards/rejected": -2.22202467918396, "step": 2978 }, { "epoch": 0.35, "learning_rate": 1.9901972363292784e-07, "logits/chosen": -2.7056260108947754, "logits/rejected": -2.8224802017211914, "logps/chosen": -189.83680725097656, "logps/rejected": -271.62164306640625, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 0.09028299152851105, "rewards/margins": 5.211880683898926, "rewards/rejected": -5.121597766876221, "step": 2979 }, { "epoch": 0.35, "learning_rate": 1.9898429195700956e-07, "logits/chosen": -1.7996091842651367, "logits/rejected": -1.9412338733673096, "logps/chosen": -325.79541015625, "logps/rejected": -261.46051025390625, "loss": 0.341, "rewards/accuracies": 1.0, "rewards/chosen": 0.17956455051898956, "rewards/margins": 1.185703992843628, "rewards/rejected": -1.0061395168304443, "step": 2980 }, { "epoch": 0.35, "learning_rate": 1.9894886028109128e-07, "logits/chosen": -2.3512072563171387, "logits/rejected": -2.5131142139434814, "logps/chosen": -207.00772094726562, "logps/rejected": -189.58924865722656, "loss": 0.3772, "rewards/accuracies": 0.875, "rewards/chosen": -0.39496469497680664, "rewards/margins": 1.1836988925933838, "rewards/rejected": -1.5786635875701904, "step": 2981 }, { "epoch": 0.35, "learning_rate": 1.98913428605173e-07, "logits/chosen": -2.139326333999634, "logits/rejected": -2.3618686199188232, "logps/chosen": -270.9585266113281, "logps/rejected": -274.06103515625, "loss": 0.4571, "rewards/accuracies": 0.75, "rewards/chosen": -0.9525195360183716, "rewards/margins": 2.381098747253418, "rewards/rejected": -3.333617925643921, "step": 2982 }, { "epoch": 0.35, "learning_rate": 1.9887799692925473e-07, "logits/chosen": -2.7527027130126953, "logits/rejected": -2.67350697517395, "logps/chosen": -127.47023010253906, "logps/rejected": -275.6497802734375, "loss": 0.7784, "rewards/accuracies": 0.75, "rewards/chosen": -0.3970443606376648, "rewards/margins": 1.5528340339660645, "rewards/rejected": -1.949878454208374, "step": 2983 }, { "epoch": 0.35, "learning_rate": 1.9884256525333645e-07, "logits/chosen": -2.7597310543060303, "logits/rejected": -2.3768808841705322, "logps/chosen": -372.4945068359375, "logps/rejected": -386.5562438964844, "loss": 0.0799, "rewards/accuracies": 1.0, "rewards/chosen": -0.7409670948982239, "rewards/margins": 2.8956496715545654, "rewards/rejected": -3.6366167068481445, "step": 2984 }, { "epoch": 0.35, "learning_rate": 1.9880713357741822e-07, "logits/chosen": -2.453596591949463, "logits/rejected": -2.5824103355407715, "logps/chosen": -281.9296875, "logps/rejected": -169.03256225585938, "loss": 0.7276, "rewards/accuracies": 0.625, "rewards/chosen": -0.3892822265625, "rewards/margins": 1.9185659885406494, "rewards/rejected": -2.3078482151031494, "step": 2985 }, { "epoch": 0.35, "learning_rate": 1.9877170190149995e-07, "logits/chosen": -1.853387475013733, "logits/rejected": -2.404853343963623, "logps/chosen": -329.145263671875, "logps/rejected": -164.2345733642578, "loss": 1.0488, "rewards/accuracies": 0.5, "rewards/chosen": -1.0530712604522705, "rewards/margins": 0.32960045337677, "rewards/rejected": -1.382671594619751, "step": 2986 }, { "epoch": 0.35, "learning_rate": 1.9873627022558167e-07, "logits/chosen": -2.7191615104675293, "logits/rejected": -2.728963613510132, "logps/chosen": -184.5883026123047, "logps/rejected": -182.47152709960938, "loss": 0.2529, "rewards/accuracies": 0.875, "rewards/chosen": -0.13931691646575928, "rewards/margins": 2.1777701377868652, "rewards/rejected": -2.317087173461914, "step": 2987 }, { "epoch": 0.35, "learning_rate": 1.987008385496634e-07, "logits/chosen": -1.8460450172424316, "logits/rejected": -2.049100399017334, "logps/chosen": -280.8143310546875, "logps/rejected": -231.23255920410156, "loss": 0.5839, "rewards/accuracies": 0.625, "rewards/chosen": -0.7711240649223328, "rewards/margins": 1.2123451232910156, "rewards/rejected": -1.9834691286087036, "step": 2988 }, { "epoch": 0.35, "learning_rate": 1.9866540687374511e-07, "logits/chosen": -2.3131420612335205, "logits/rejected": -2.54781436920166, "logps/chosen": -345.4716796875, "logps/rejected": -226.30291748046875, "loss": 0.4623, "rewards/accuracies": 0.875, "rewards/chosen": -0.7816973328590393, "rewards/margins": 2.7152488231658936, "rewards/rejected": -3.496945858001709, "step": 2989 }, { "epoch": 0.35, "learning_rate": 1.9862997519782686e-07, "logits/chosen": -2.094048500061035, "logits/rejected": -2.2418534755706787, "logps/chosen": -181.994140625, "logps/rejected": -133.42947387695312, "loss": 0.5018, "rewards/accuracies": 0.625, "rewards/chosen": -0.7052596807479858, "rewards/margins": 1.3199505805969238, "rewards/rejected": -2.025210380554199, "step": 2990 }, { "epoch": 0.35, "learning_rate": 1.9859454352190858e-07, "logits/chosen": -2.3440046310424805, "logits/rejected": -2.161285161972046, "logps/chosen": -358.63232421875, "logps/rejected": -209.40484619140625, "loss": 0.4257, "rewards/accuracies": 0.875, "rewards/chosen": -0.6296483874320984, "rewards/margins": 1.4024100303649902, "rewards/rejected": -2.0320584774017334, "step": 2991 }, { "epoch": 0.35, "learning_rate": 1.985591118459903e-07, "logits/chosen": -2.4367330074310303, "logits/rejected": -2.5656981468200684, "logps/chosen": -302.6728515625, "logps/rejected": -251.53436279296875, "loss": 0.5902, "rewards/accuracies": 0.5, "rewards/chosen": -0.8423860669136047, "rewards/margins": 1.7068713903427124, "rewards/rejected": -2.549257516860962, "step": 2992 }, { "epoch": 0.35, "learning_rate": 1.9852368017007203e-07, "logits/chosen": -2.4103927612304688, "logits/rejected": -2.3722410202026367, "logps/chosen": -163.59613037109375, "logps/rejected": -187.11683654785156, "loss": 0.2976, "rewards/accuracies": 0.875, "rewards/chosen": -1.244042992591858, "rewards/margins": 1.6250025033950806, "rewards/rejected": -2.8690457344055176, "step": 2993 }, { "epoch": 0.35, "learning_rate": 1.9848824849415375e-07, "logits/chosen": -2.537788152694702, "logits/rejected": -2.557664632797241, "logps/chosen": -285.9046630859375, "logps/rejected": -212.3981170654297, "loss": 0.2143, "rewards/accuracies": 0.875, "rewards/chosen": -0.3959222733974457, "rewards/margins": 2.684755802154541, "rewards/rejected": -3.0806779861450195, "step": 2994 }, { "epoch": 0.35, "learning_rate": 1.9845281681823547e-07, "logits/chosen": -2.4008352756500244, "logits/rejected": -2.4128642082214355, "logps/chosen": -225.70220947265625, "logps/rejected": -173.80136108398438, "loss": 0.2537, "rewards/accuracies": 1.0, "rewards/chosen": -0.7729071378707886, "rewards/margins": 1.7236690521240234, "rewards/rejected": -2.4965763092041016, "step": 2995 }, { "epoch": 0.35, "learning_rate": 1.984173851423172e-07, "logits/chosen": -2.0448684692382812, "logits/rejected": -2.454012870788574, "logps/chosen": -336.7002258300781, "logps/rejected": -271.1465148925781, "loss": 0.1749, "rewards/accuracies": 1.0, "rewards/chosen": -0.2043069750070572, "rewards/margins": 2.5484235286712646, "rewards/rejected": -2.752730369567871, "step": 2996 }, { "epoch": 0.35, "learning_rate": 1.9838195346639897e-07, "logits/chosen": -2.50107741355896, "logits/rejected": -2.298346519470215, "logps/chosen": -121.91665649414062, "logps/rejected": -178.97055053710938, "loss": 0.5876, "rewards/accuracies": 0.625, "rewards/chosen": -0.6283455491065979, "rewards/margins": 1.3679494857788086, "rewards/rejected": -1.9962950944900513, "step": 2997 }, { "epoch": 0.35, "learning_rate": 1.983465217904807e-07, "logits/chosen": -2.1958560943603516, "logits/rejected": -1.8425116539001465, "logps/chosen": -205.7991485595703, "logps/rejected": -294.8202209472656, "loss": 0.4541, "rewards/accuracies": 0.875, "rewards/chosen": -0.9130134582519531, "rewards/margins": 1.8140803575515747, "rewards/rejected": -2.7270936965942383, "step": 2998 }, { "epoch": 0.35, "learning_rate": 1.9831109011456241e-07, "logits/chosen": -2.493767499923706, "logits/rejected": -2.607168674468994, "logps/chosen": -221.44847106933594, "logps/rejected": -389.8901062011719, "loss": 0.3622, "rewards/accuracies": 0.875, "rewards/chosen": -0.3364153206348419, "rewards/margins": 2.517232894897461, "rewards/rejected": -2.8536481857299805, "step": 2999 }, { "epoch": 0.35, "learning_rate": 1.9827565843864414e-07, "logits/chosen": -2.1696486473083496, "logits/rejected": -2.1916065216064453, "logps/chosen": -202.68438720703125, "logps/rejected": -253.47877502441406, "loss": 0.5182, "rewards/accuracies": 0.75, "rewards/chosen": -0.9760220050811768, "rewards/margins": 1.5308241844177246, "rewards/rejected": -2.5068461894989014, "step": 3000 }, { "epoch": 0.35, "eval_logits/chosen": -1.7466588020324707, "eval_logits/rejected": -1.746543049812317, "eval_logps/chosen": -277.8332824707031, "eval_logps/rejected": -275.6523132324219, "eval_loss": 0.3812580704689026, "eval_rewards/accuracies": 0.8405172228813171, "eval_rewards/chosen": -0.5667834281921387, "eval_rewards/margins": 1.897790551185608, "eval_rewards/rejected": -2.464573621749878, "eval_runtime": 237.7682, "eval_samples_per_second": 2.923, "eval_steps_per_second": 1.464, "step": 3000 }, { "epoch": 0.35, "learning_rate": 1.9824022676272586e-07, "logits/chosen": -2.4594738483428955, "logits/rejected": -2.363607406616211, "logps/chosen": -146.41091918945312, "logps/rejected": -274.72247314453125, "loss": 0.1678, "rewards/accuracies": 1.0, "rewards/chosen": -0.9387666583061218, "rewards/margins": 2.9524550437927246, "rewards/rejected": -3.891221523284912, "step": 3001 }, { "epoch": 0.35, "learning_rate": 1.982047950868076e-07, "logits/chosen": -2.6245057582855225, "logits/rejected": -2.642518997192383, "logps/chosen": -190.01617431640625, "logps/rejected": -242.55023193359375, "loss": 0.3491, "rewards/accuracies": 0.875, "rewards/chosen": -0.6915631294250488, "rewards/margins": 1.4898996353149414, "rewards/rejected": -2.1814630031585693, "step": 3002 }, { "epoch": 0.35, "learning_rate": 1.9816936341088933e-07, "logits/chosen": -2.780846118927002, "logits/rejected": -2.767791986465454, "logps/chosen": -173.71292114257812, "logps/rejected": -163.12527465820312, "loss": 0.3204, "rewards/accuracies": 1.0, "rewards/chosen": -0.555416464805603, "rewards/margins": 1.2818272113800049, "rewards/rejected": -1.8372435569763184, "step": 3003 }, { "epoch": 0.35, "learning_rate": 1.9813393173497105e-07, "logits/chosen": -2.277650833129883, "logits/rejected": -2.189649820327759, "logps/chosen": -346.3839111328125, "logps/rejected": -313.9497985839844, "loss": 0.3721, "rewards/accuracies": 0.875, "rewards/chosen": -0.8681928515434265, "rewards/margins": 2.0290160179138184, "rewards/rejected": -2.8972091674804688, "step": 3004 }, { "epoch": 0.35, "learning_rate": 1.9809850005905277e-07, "logits/chosen": -2.9929909706115723, "logits/rejected": -3.0167157649993896, "logps/chosen": -138.9799346923828, "logps/rejected": -199.08450317382812, "loss": 0.1956, "rewards/accuracies": 1.0, "rewards/chosen": -1.1523414850234985, "rewards/margins": 2.8133692741394043, "rewards/rejected": -3.9657106399536133, "step": 3005 }, { "epoch": 0.35, "learning_rate": 1.980630683831345e-07, "logits/chosen": -2.255199909210205, "logits/rejected": -2.5363755226135254, "logps/chosen": -293.1972961425781, "logps/rejected": -168.693115234375, "loss": 0.6522, "rewards/accuracies": 0.625, "rewards/chosen": -0.8815634250640869, "rewards/margins": 1.6672542095184326, "rewards/rejected": -2.5488173961639404, "step": 3006 }, { "epoch": 0.35, "learning_rate": 1.9802763670721622e-07, "logits/chosen": -1.6118335723876953, "logits/rejected": -1.9646233320236206, "logps/chosen": -553.3547973632812, "logps/rejected": -306.34893798828125, "loss": 0.5595, "rewards/accuracies": 0.875, "rewards/chosen": -1.7926628589630127, "rewards/margins": 2.412933826446533, "rewards/rejected": -4.205596446990967, "step": 3007 }, { "epoch": 0.35, "learning_rate": 1.97992205031298e-07, "logits/chosen": -1.5852832794189453, "logits/rejected": -2.020991802215576, "logps/chosen": -670.7092895507812, "logps/rejected": -305.908935546875, "loss": 0.2212, "rewards/accuracies": 0.875, "rewards/chosen": -0.4429447054862976, "rewards/margins": 2.6377742290496826, "rewards/rejected": -3.080718994140625, "step": 3008 }, { "epoch": 0.35, "learning_rate": 1.9795677335537971e-07, "logits/chosen": -1.9796062707901, "logits/rejected": -2.377922773361206, "logps/chosen": -340.1313171386719, "logps/rejected": -198.8621826171875, "loss": 0.2767, "rewards/accuracies": 0.875, "rewards/chosen": -0.20771107077598572, "rewards/margins": 2.2245826721191406, "rewards/rejected": -2.4322938919067383, "step": 3009 }, { "epoch": 0.35, "learning_rate": 1.9792134167946144e-07, "logits/chosen": -2.693833589553833, "logits/rejected": -2.7357425689697266, "logps/chosen": -191.77951049804688, "logps/rejected": -179.29135131835938, "loss": 0.8104, "rewards/accuracies": 0.625, "rewards/chosen": -1.1371018886566162, "rewards/margins": 0.0901307538151741, "rewards/rejected": -1.227232575416565, "step": 3010 }, { "epoch": 0.35, "learning_rate": 1.9788591000354316e-07, "logits/chosen": -2.0227575302124023, "logits/rejected": -1.629905343055725, "logps/chosen": -232.49853515625, "logps/rejected": -318.7529296875, "loss": 0.6374, "rewards/accuracies": 0.625, "rewards/chosen": -0.5750538110733032, "rewards/margins": 0.24198950827121735, "rewards/rejected": -0.8170433044433594, "step": 3011 }, { "epoch": 0.35, "learning_rate": 1.9785047832762488e-07, "logits/chosen": -1.9970715045928955, "logits/rejected": -1.8324120044708252, "logps/chosen": -277.4881591796875, "logps/rejected": -350.7144775390625, "loss": 0.4483, "rewards/accuracies": 0.75, "rewards/chosen": -0.8784058094024658, "rewards/margins": 1.7513728141784668, "rewards/rejected": -2.6297788619995117, "step": 3012 }, { "epoch": 0.35, "learning_rate": 1.9781504665170663e-07, "logits/chosen": -2.326165199279785, "logits/rejected": -2.4515037536621094, "logps/chosen": -378.49078369140625, "logps/rejected": -348.53515625, "loss": 0.4571, "rewards/accuracies": 0.875, "rewards/chosen": -0.7918878793716431, "rewards/margins": 1.3227030038833618, "rewards/rejected": -2.114590883255005, "step": 3013 }, { "epoch": 0.35, "learning_rate": 1.9777961497578835e-07, "logits/chosen": -1.9172874689102173, "logits/rejected": -2.1663930416107178, "logps/chosen": -510.93011474609375, "logps/rejected": -243.97113037109375, "loss": 0.2106, "rewards/accuracies": 0.875, "rewards/chosen": -0.03988611698150635, "rewards/margins": 3.142962694168091, "rewards/rejected": -3.1828489303588867, "step": 3014 }, { "epoch": 0.35, "learning_rate": 1.9774418329987007e-07, "logits/chosen": -2.1951584815979004, "logits/rejected": -1.9230486154556274, "logps/chosen": -256.08148193359375, "logps/rejected": -294.5352783203125, "loss": 0.3055, "rewards/accuracies": 0.875, "rewards/chosen": -0.9217814803123474, "rewards/margins": 3.449796676635742, "rewards/rejected": -4.371578216552734, "step": 3015 }, { "epoch": 0.35, "learning_rate": 1.977087516239518e-07, "logits/chosen": -2.2998878955841064, "logits/rejected": -2.2646679878234863, "logps/chosen": -303.7473449707031, "logps/rejected": -297.41607666015625, "loss": 0.3862, "rewards/accuracies": 0.875, "rewards/chosen": -1.0641454458236694, "rewards/margins": 1.3899402618408203, "rewards/rejected": -2.4540855884552, "step": 3016 }, { "epoch": 0.35, "learning_rate": 1.9767331994803352e-07, "logits/chosen": -2.2941417694091797, "logits/rejected": -2.199272632598877, "logps/chosen": -265.89361572265625, "logps/rejected": -349.03594970703125, "loss": 0.377, "rewards/accuracies": 0.875, "rewards/chosen": -0.644376277923584, "rewards/margins": 2.3195011615753174, "rewards/rejected": -2.9638774394989014, "step": 3017 }, { "epoch": 0.35, "learning_rate": 1.9763788827211524e-07, "logits/chosen": -2.039112091064453, "logits/rejected": -1.9755923748016357, "logps/chosen": -292.5597839355469, "logps/rejected": -273.22222900390625, "loss": 0.1079, "rewards/accuracies": 1.0, "rewards/chosen": -1.065636157989502, "rewards/margins": 3.8276336193084717, "rewards/rejected": -4.8932695388793945, "step": 3018 }, { "epoch": 0.35, "learning_rate": 1.97602456596197e-07, "logits/chosen": -2.5239152908325195, "logits/rejected": -2.5838866233825684, "logps/chosen": -139.35659790039062, "logps/rejected": -242.7225341796875, "loss": 0.5887, "rewards/accuracies": 0.75, "rewards/chosen": -1.1285662651062012, "rewards/margins": 2.7950539588928223, "rewards/rejected": -3.9236199855804443, "step": 3019 }, { "epoch": 0.35, "learning_rate": 1.9756702492027874e-07, "logits/chosen": -2.066732883453369, "logits/rejected": -2.1423020362854004, "logps/chosen": -169.10150146484375, "logps/rejected": -290.64788818359375, "loss": 0.737, "rewards/accuracies": 0.5, "rewards/chosen": -1.1237233877182007, "rewards/margins": 1.1853047609329224, "rewards/rejected": -2.309027910232544, "step": 3020 }, { "epoch": 0.35, "learning_rate": 1.9753159324436046e-07, "logits/chosen": -2.687124729156494, "logits/rejected": -2.672086477279663, "logps/chosen": -116.39154052734375, "logps/rejected": -194.3573760986328, "loss": 0.4016, "rewards/accuracies": 0.875, "rewards/chosen": -1.3717831373214722, "rewards/margins": 1.6063649654388428, "rewards/rejected": -2.9781484603881836, "step": 3021 }, { "epoch": 0.35, "learning_rate": 1.9749616156844218e-07, "logits/chosen": -2.2653307914733887, "logits/rejected": -1.9556455612182617, "logps/chosen": -264.12860107421875, "logps/rejected": -309.27215576171875, "loss": 0.5846, "rewards/accuracies": 0.625, "rewards/chosen": -0.030281245708465576, "rewards/margins": 1.4140753746032715, "rewards/rejected": -1.4443565607070923, "step": 3022 }, { "epoch": 0.35, "learning_rate": 1.974607298925239e-07, "logits/chosen": -2.6611971855163574, "logits/rejected": -2.529115915298462, "logps/chosen": -301.7157897949219, "logps/rejected": -355.5975341796875, "loss": 0.2084, "rewards/accuracies": 1.0, "rewards/chosen": -0.6033135056495667, "rewards/margins": 2.247802257537842, "rewards/rejected": -2.8511159420013428, "step": 3023 }, { "epoch": 0.35, "learning_rate": 1.9742529821660565e-07, "logits/chosen": -2.738468647003174, "logits/rejected": -2.705026865005493, "logps/chosen": -280.864990234375, "logps/rejected": -298.3419494628906, "loss": 0.2549, "rewards/accuracies": 1.0, "rewards/chosen": -0.5234990119934082, "rewards/margins": 2.169316291809082, "rewards/rejected": -2.6928153038024902, "step": 3024 }, { "epoch": 0.35, "learning_rate": 1.9738986654068737e-07, "logits/chosen": -2.534022092819214, "logits/rejected": -2.8444674015045166, "logps/chosen": -244.8070831298828, "logps/rejected": -249.51451110839844, "loss": 0.398, "rewards/accuracies": 0.875, "rewards/chosen": -1.063251256942749, "rewards/margins": 1.882554292678833, "rewards/rejected": -2.945805549621582, "step": 3025 }, { "epoch": 0.35, "learning_rate": 1.973544348647691e-07, "logits/chosen": -2.283919095993042, "logits/rejected": -2.44333553314209, "logps/chosen": -178.06307983398438, "logps/rejected": -216.8458251953125, "loss": 0.2114, "rewards/accuracies": 1.0, "rewards/chosen": -0.7997996807098389, "rewards/margins": 3.042936325073242, "rewards/rejected": -3.842735767364502, "step": 3026 }, { "epoch": 0.35, "learning_rate": 1.9731900318885082e-07, "logits/chosen": -2.255034923553467, "logits/rejected": -2.3941080570220947, "logps/chosen": -414.7364196777344, "logps/rejected": -465.2244873046875, "loss": 0.2052, "rewards/accuracies": 0.875, "rewards/chosen": -0.063772052526474, "rewards/margins": 2.7206168174743652, "rewards/rejected": -2.784389019012451, "step": 3027 }, { "epoch": 0.35, "learning_rate": 1.9728357151293254e-07, "logits/chosen": -2.4632627964019775, "logits/rejected": -2.5817079544067383, "logps/chosen": -128.80618286132812, "logps/rejected": -214.5970458984375, "loss": 0.2904, "rewards/accuracies": 0.875, "rewards/chosen": -0.7817670106887817, "rewards/margins": 2.3830456733703613, "rewards/rejected": -3.1648128032684326, "step": 3028 }, { "epoch": 0.35, "learning_rate": 1.9724813983701426e-07, "logits/chosen": -2.1246497631073, "logits/rejected": -2.068174362182617, "logps/chosen": -421.6078186035156, "logps/rejected": -337.85955810546875, "loss": 0.9095, "rewards/accuracies": 0.625, "rewards/chosen": -1.6912754774093628, "rewards/margins": 0.7982547879219055, "rewards/rejected": -2.489530324935913, "step": 3029 }, { "epoch": 0.35, "learning_rate": 1.9721270816109599e-07, "logits/chosen": -2.2448923587799072, "logits/rejected": -2.1223881244659424, "logps/chosen": -301.55438232421875, "logps/rejected": -315.1142578125, "loss": 0.7824, "rewards/accuracies": 0.75, "rewards/chosen": -1.768424153327942, "rewards/margins": 1.113840103149414, "rewards/rejected": -2.8822643756866455, "step": 3030 }, { "epoch": 0.35, "learning_rate": 1.9717727648517773e-07, "logits/chosen": -2.2851743698120117, "logits/rejected": -2.413346767425537, "logps/chosen": -368.11212158203125, "logps/rejected": -255.84933471679688, "loss": 0.3918, "rewards/accuracies": 0.625, "rewards/chosen": -1.0641793012619019, "rewards/margins": 1.5954036712646484, "rewards/rejected": -2.6595828533172607, "step": 3031 }, { "epoch": 0.35, "learning_rate": 1.9714184480925948e-07, "logits/chosen": -1.6360836029052734, "logits/rejected": -1.790245771408081, "logps/chosen": -335.57537841796875, "logps/rejected": -321.9815368652344, "loss": 0.6121, "rewards/accuracies": 0.625, "rewards/chosen": -0.6329741477966309, "rewards/margins": 0.8294532299041748, "rewards/rejected": -1.4624273777008057, "step": 3032 }, { "epoch": 0.35, "learning_rate": 1.971064131333412e-07, "logits/chosen": -2.2159547805786133, "logits/rejected": -2.468369483947754, "logps/chosen": -426.2157897949219, "logps/rejected": -252.64630126953125, "loss": 0.5882, "rewards/accuracies": 0.75, "rewards/chosen": -0.47872400283813477, "rewards/margins": 1.574688196182251, "rewards/rejected": -2.0534119606018066, "step": 3033 }, { "epoch": 0.35, "learning_rate": 1.9707098145742293e-07, "logits/chosen": -2.431584596633911, "logits/rejected": -2.1954903602600098, "logps/chosen": -227.67306518554688, "logps/rejected": -393.4449768066406, "loss": 0.6734, "rewards/accuracies": 0.75, "rewards/chosen": -0.8990190029144287, "rewards/margins": 0.8050597906112671, "rewards/rejected": -1.7040787935256958, "step": 3034 }, { "epoch": 0.35, "learning_rate": 1.9703554978150465e-07, "logits/chosen": -2.5853936672210693, "logits/rejected": -2.7979090213775635, "logps/chosen": -228.4013671875, "logps/rejected": -157.54718017578125, "loss": 0.3534, "rewards/accuracies": 0.875, "rewards/chosen": -0.823639988899231, "rewards/margins": 1.9678676128387451, "rewards/rejected": -2.7915077209472656, "step": 3035 }, { "epoch": 0.35, "learning_rate": 1.970001181055864e-07, "logits/chosen": -2.774946689605713, "logits/rejected": -2.5650579929351807, "logps/chosen": -182.18905639648438, "logps/rejected": -185.5701446533203, "loss": 0.4161, "rewards/accuracies": 0.875, "rewards/chosen": -1.1382135152816772, "rewards/margins": 1.5018057823181152, "rewards/rejected": -2.640019416809082, "step": 3036 }, { "epoch": 0.35, "learning_rate": 1.9696468642966812e-07, "logits/chosen": -2.361912965774536, "logits/rejected": -2.1005945205688477, "logps/chosen": -125.73505401611328, "logps/rejected": -326.3161926269531, "loss": 0.1828, "rewards/accuracies": 1.0, "rewards/chosen": -0.3146044909954071, "rewards/margins": 3.5657927989959717, "rewards/rejected": -3.880397319793701, "step": 3037 }, { "epoch": 0.35, "learning_rate": 1.9692925475374984e-07, "logits/chosen": -2.1438868045806885, "logits/rejected": -2.056556463241577, "logps/chosen": -339.1431884765625, "logps/rejected": -337.5471496582031, "loss": 0.811, "rewards/accuracies": 0.75, "rewards/chosen": -1.1610056161880493, "rewards/margins": 0.3940655589103699, "rewards/rejected": -1.555071234703064, "step": 3038 }, { "epoch": 0.35, "learning_rate": 1.9689382307783156e-07, "logits/chosen": -2.820479393005371, "logits/rejected": -2.6825873851776123, "logps/chosen": -171.345458984375, "logps/rejected": -213.25445556640625, "loss": 0.3305, "rewards/accuracies": 0.75, "rewards/chosen": -0.8060693144798279, "rewards/margins": 2.465203046798706, "rewards/rejected": -3.2712721824645996, "step": 3039 }, { "epoch": 0.35, "learning_rate": 1.9685839140191329e-07, "logits/chosen": -2.2133054733276367, "logits/rejected": -1.7539443969726562, "logps/chosen": -261.4666748046875, "logps/rejected": -445.5120849609375, "loss": 0.2601, "rewards/accuracies": 0.875, "rewards/chosen": -0.6042224764823914, "rewards/margins": 3.9756383895874023, "rewards/rejected": -4.579861164093018, "step": 3040 }, { "epoch": 0.35, "learning_rate": 1.96822959725995e-07, "logits/chosen": -1.61419677734375, "logits/rejected": -1.696324110031128, "logps/chosen": -388.1860046386719, "logps/rejected": -428.0269775390625, "loss": 0.5707, "rewards/accuracies": 0.75, "rewards/chosen": -1.4056317806243896, "rewards/margins": 1.5454752445220947, "rewards/rejected": -2.9511070251464844, "step": 3041 }, { "epoch": 0.35, "learning_rate": 1.9678752805007676e-07, "logits/chosen": -2.667227268218994, "logits/rejected": -2.4604671001434326, "logps/chosen": -243.31875610351562, "logps/rejected": -245.55548095703125, "loss": 0.2512, "rewards/accuracies": 1.0, "rewards/chosen": -0.9633252620697021, "rewards/margins": 2.358229637145996, "rewards/rejected": -3.3215551376342773, "step": 3042 }, { "epoch": 0.35, "learning_rate": 1.967520963741585e-07, "logits/chosen": -2.3172240257263184, "logits/rejected": -2.187418222427368, "logps/chosen": -275.0868835449219, "logps/rejected": -253.36917114257812, "loss": 0.3858, "rewards/accuracies": 0.875, "rewards/chosen": -1.0002658367156982, "rewards/margins": 2.307159900665283, "rewards/rejected": -3.3074259757995605, "step": 3043 }, { "epoch": 0.35, "learning_rate": 1.9671666469824023e-07, "logits/chosen": -2.287874221801758, "logits/rejected": -2.1239354610443115, "logps/chosen": -236.24758911132812, "logps/rejected": -270.7436218261719, "loss": 0.354, "rewards/accuracies": 0.875, "rewards/chosen": -1.0777268409729004, "rewards/margins": 1.7625505924224854, "rewards/rejected": -2.8402771949768066, "step": 3044 }, { "epoch": 0.35, "learning_rate": 1.9668123302232195e-07, "logits/chosen": -2.6026806831359863, "logits/rejected": -2.573774814605713, "logps/chosen": -258.40960693359375, "logps/rejected": -279.1078796386719, "loss": 0.6847, "rewards/accuracies": 0.75, "rewards/chosen": -1.7767550945281982, "rewards/margins": 0.6069813966751099, "rewards/rejected": -2.3837363719940186, "step": 3045 }, { "epoch": 0.35, "learning_rate": 1.9664580134640367e-07, "logits/chosen": -3.045938491821289, "logits/rejected": -2.9854049682617188, "logps/chosen": -265.9449768066406, "logps/rejected": -171.61712646484375, "loss": 0.3814, "rewards/accuracies": 0.75, "rewards/chosen": -0.7812365293502808, "rewards/margins": 1.5574370622634888, "rewards/rejected": -2.3386735916137695, "step": 3046 }, { "epoch": 0.35, "learning_rate": 1.9661036967048542e-07, "logits/chosen": -1.602947473526001, "logits/rejected": -1.9173753261566162, "logps/chosen": -445.0979309082031, "logps/rejected": -367.5957946777344, "loss": 0.6145, "rewards/accuracies": 0.625, "rewards/chosen": -0.136256605386734, "rewards/margins": 1.3297594785690308, "rewards/rejected": -1.4660160541534424, "step": 3047 }, { "epoch": 0.35, "learning_rate": 1.9657493799456714e-07, "logits/chosen": -2.202681303024292, "logits/rejected": -2.1134557723999023, "logps/chosen": -356.2322998046875, "logps/rejected": -447.5849914550781, "loss": 0.3043, "rewards/accuracies": 0.875, "rewards/chosen": -0.7635381817817688, "rewards/margins": 2.8975396156311035, "rewards/rejected": -3.6610779762268066, "step": 3048 }, { "epoch": 0.35, "learning_rate": 1.9653950631864886e-07, "logits/chosen": -2.311034679412842, "logits/rejected": -2.165663003921509, "logps/chosen": -312.968505859375, "logps/rejected": -493.8941650390625, "loss": 0.1718, "rewards/accuracies": 1.0, "rewards/chosen": -0.760169267654419, "rewards/margins": 2.8355534076690674, "rewards/rejected": -3.5957226753234863, "step": 3049 }, { "epoch": 0.35, "learning_rate": 1.9650407464273059e-07, "logits/chosen": -2.7979226112365723, "logits/rejected": -2.599684476852417, "logps/chosen": -107.00436401367188, "logps/rejected": -132.50714111328125, "loss": 0.3813, "rewards/accuracies": 0.75, "rewards/chosen": -0.8473031520843506, "rewards/margins": 1.4625849723815918, "rewards/rejected": -2.3098881244659424, "step": 3050 }, { "epoch": 0.35, "learning_rate": 1.964686429668123e-07, "logits/chosen": -2.096634864807129, "logits/rejected": -2.1523337364196777, "logps/chosen": -328.902587890625, "logps/rejected": -343.0173034667969, "loss": 0.4095, "rewards/accuracies": 0.75, "rewards/chosen": -0.6052758693695068, "rewards/margins": 1.3955286741256714, "rewards/rejected": -2.0008046627044678, "step": 3051 }, { "epoch": 0.36, "learning_rate": 1.9643321129089403e-07, "logits/chosen": -2.555698871612549, "logits/rejected": -2.5644805431365967, "logps/chosen": -345.969970703125, "logps/rejected": -239.98837280273438, "loss": 0.1566, "rewards/accuracies": 1.0, "rewards/chosen": -0.9914661645889282, "rewards/margins": 2.1565024852752686, "rewards/rejected": -3.1479687690734863, "step": 3052 }, { "epoch": 0.36, "learning_rate": 1.9639777961497578e-07, "logits/chosen": -2.8092093467712402, "logits/rejected": -2.7629144191741943, "logps/chosen": -196.1188507080078, "logps/rejected": -183.78057861328125, "loss": 0.3435, "rewards/accuracies": 0.875, "rewards/chosen": -0.4851170778274536, "rewards/margins": 1.7952828407287598, "rewards/rejected": -2.280399799346924, "step": 3053 }, { "epoch": 0.36, "learning_rate": 1.963623479390575e-07, "logits/chosen": -2.3625946044921875, "logits/rejected": -2.498396158218384, "logps/chosen": -100.30036926269531, "logps/rejected": -171.9199981689453, "loss": 0.5455, "rewards/accuracies": 0.75, "rewards/chosen": -0.5099483728408813, "rewards/margins": 1.9526798725128174, "rewards/rejected": -2.462628126144409, "step": 3054 }, { "epoch": 0.36, "learning_rate": 1.9632691626313925e-07, "logits/chosen": -2.6012723445892334, "logits/rejected": -2.2539377212524414, "logps/chosen": -234.70997619628906, "logps/rejected": -305.70782470703125, "loss": 0.3875, "rewards/accuracies": 0.875, "rewards/chosen": -0.5423040986061096, "rewards/margins": 1.9892863035202026, "rewards/rejected": -2.531590223312378, "step": 3055 }, { "epoch": 0.36, "learning_rate": 1.9629148458722097e-07, "logits/chosen": -2.3057796955108643, "logits/rejected": -2.6744885444641113, "logps/chosen": -265.74713134765625, "logps/rejected": -150.86221313476562, "loss": 0.3644, "rewards/accuracies": 0.875, "rewards/chosen": -0.6600843667984009, "rewards/margins": 1.7885726690292358, "rewards/rejected": -2.448657274246216, "step": 3056 }, { "epoch": 0.36, "learning_rate": 1.962560529113027e-07, "logits/chosen": -1.8908133506774902, "logits/rejected": -1.6980204582214355, "logps/chosen": -484.2646179199219, "logps/rejected": -501.28961181640625, "loss": 0.429, "rewards/accuracies": 0.875, "rewards/chosen": -0.662290096282959, "rewards/margins": 1.6878539323806763, "rewards/rejected": -2.3501439094543457, "step": 3057 }, { "epoch": 0.36, "learning_rate": 1.9622062123538444e-07, "logits/chosen": -2.3585987091064453, "logits/rejected": -2.1797397136688232, "logps/chosen": -189.91006469726562, "logps/rejected": -297.7913818359375, "loss": 0.5103, "rewards/accuracies": 0.5, "rewards/chosen": -0.766045868396759, "rewards/margins": 1.836298942565918, "rewards/rejected": -2.6023447513580322, "step": 3058 }, { "epoch": 0.36, "learning_rate": 1.9618518955946617e-07, "logits/chosen": -2.548710823059082, "logits/rejected": -2.476182699203491, "logps/chosen": -207.40208435058594, "logps/rejected": -222.58419799804688, "loss": 0.3233, "rewards/accuracies": 0.875, "rewards/chosen": -0.594593346118927, "rewards/margins": 1.672452449798584, "rewards/rejected": -2.2670459747314453, "step": 3059 }, { "epoch": 0.36, "learning_rate": 1.961497578835479e-07, "logits/chosen": -2.3417482376098633, "logits/rejected": -2.456052541732788, "logps/chosen": -261.672607421875, "logps/rejected": -289.4530029296875, "loss": 0.2638, "rewards/accuracies": 1.0, "rewards/chosen": -0.4028152525424957, "rewards/margins": 1.9479931592941284, "rewards/rejected": -2.3508083820343018, "step": 3060 }, { "epoch": 0.36, "learning_rate": 1.961143262076296e-07, "logits/chosen": -2.9097137451171875, "logits/rejected": -2.752378225326538, "logps/chosen": -241.68771362304688, "logps/rejected": -350.2965087890625, "loss": 0.5437, "rewards/accuracies": 0.75, "rewards/chosen": -0.7554417848587036, "rewards/margins": 1.824181318283081, "rewards/rejected": -2.579623222351074, "step": 3061 }, { "epoch": 0.36, "learning_rate": 1.9607889453171133e-07, "logits/chosen": -1.8009636402130127, "logits/rejected": -1.760741949081421, "logps/chosen": -198.28707885742188, "logps/rejected": -276.1143493652344, "loss": 0.3623, "rewards/accuracies": 0.875, "rewards/chosen": -1.0645872354507446, "rewards/margins": 1.5824391841888428, "rewards/rejected": -2.647026300430298, "step": 3062 }, { "epoch": 0.36, "learning_rate": 1.9604346285579305e-07, "logits/chosen": -2.268942356109619, "logits/rejected": -2.323070764541626, "logps/chosen": -362.32147216796875, "logps/rejected": -338.8183288574219, "loss": 0.4187, "rewards/accuracies": 0.75, "rewards/chosen": -0.4711971879005432, "rewards/margins": 1.6433734893798828, "rewards/rejected": -2.1145706176757812, "step": 3063 }, { "epoch": 0.36, "learning_rate": 1.9600803117987478e-07, "logits/chosen": -2.310106039047241, "logits/rejected": -2.692974805831909, "logps/chosen": -214.76718139648438, "logps/rejected": -164.80487060546875, "loss": 0.4218, "rewards/accuracies": 0.875, "rewards/chosen": -0.7426090240478516, "rewards/margins": 0.9774537086486816, "rewards/rejected": -1.7200628519058228, "step": 3064 }, { "epoch": 0.36, "learning_rate": 1.9597259950395652e-07, "logits/chosen": -2.5107953548431396, "logits/rejected": -2.5724096298217773, "logps/chosen": -212.32220458984375, "logps/rejected": -177.16461181640625, "loss": 0.478, "rewards/accuracies": 0.625, "rewards/chosen": -0.750869631767273, "rewards/margins": 1.648637294769287, "rewards/rejected": -2.3995070457458496, "step": 3065 }, { "epoch": 0.36, "learning_rate": 1.9593716782803825e-07, "logits/chosen": -1.6984198093414307, "logits/rejected": -1.6876379251480103, "logps/chosen": -310.6319885253906, "logps/rejected": -316.47149658203125, "loss": 0.986, "rewards/accuracies": 0.625, "rewards/chosen": -2.828258514404297, "rewards/margins": 0.3625556230545044, "rewards/rejected": -3.1908140182495117, "step": 3066 }, { "epoch": 0.36, "learning_rate": 1.9590173615212e-07, "logits/chosen": -2.5617804527282715, "logits/rejected": -2.786628246307373, "logps/chosen": -186.99172973632812, "logps/rejected": -202.25515747070312, "loss": 0.2187, "rewards/accuracies": 1.0, "rewards/chosen": -0.3378148376941681, "rewards/margins": 2.022547721862793, "rewards/rejected": -2.3603627681732178, "step": 3067 }, { "epoch": 0.36, "learning_rate": 1.9586630447620172e-07, "logits/chosen": -2.2520787715911865, "logits/rejected": -2.1152021884918213, "logps/chosen": -303.16363525390625, "logps/rejected": -241.32870483398438, "loss": 0.3544, "rewards/accuracies": 0.875, "rewards/chosen": -0.7017867565155029, "rewards/margins": 2.899522304534912, "rewards/rejected": -3.601309061050415, "step": 3068 }, { "epoch": 0.36, "learning_rate": 1.9583087280028347e-07, "logits/chosen": -1.8861809968948364, "logits/rejected": -1.9619195461273193, "logps/chosen": -229.7706756591797, "logps/rejected": -218.89755249023438, "loss": 0.3756, "rewards/accuracies": 0.875, "rewards/chosen": -0.5653438568115234, "rewards/margins": 2.2450454235076904, "rewards/rejected": -2.810389518737793, "step": 3069 }, { "epoch": 0.36, "learning_rate": 1.957954411243652e-07, "logits/chosen": -2.3462936878204346, "logits/rejected": -2.492610454559326, "logps/chosen": -176.53970336914062, "logps/rejected": -275.4410400390625, "loss": 0.4486, "rewards/accuracies": 0.75, "rewards/chosen": -0.462695837020874, "rewards/margins": 2.3066556453704834, "rewards/rejected": -2.7693514823913574, "step": 3070 }, { "epoch": 0.36, "learning_rate": 1.957600094484469e-07, "logits/chosen": -2.7490100860595703, "logits/rejected": -2.8418877124786377, "logps/chosen": -137.25155639648438, "logps/rejected": -158.52777099609375, "loss": 0.404, "rewards/accuracies": 0.625, "rewards/chosen": -0.26092809438705444, "rewards/margins": 2.4463939666748047, "rewards/rejected": -2.707322120666504, "step": 3071 }, { "epoch": 0.36, "learning_rate": 1.9572457777252863e-07, "logits/chosen": -2.5266342163085938, "logits/rejected": -2.472921371459961, "logps/chosen": -422.720458984375, "logps/rejected": -464.9222412109375, "loss": 0.2407, "rewards/accuracies": 0.875, "rewards/chosen": -0.7467414736747742, "rewards/margins": 1.8848586082458496, "rewards/rejected": -2.6315999031066895, "step": 3072 }, { "epoch": 0.36, "learning_rate": 1.9568914609661035e-07, "logits/chosen": -2.4075984954833984, "logits/rejected": -2.510087728500366, "logps/chosen": -281.11444091796875, "logps/rejected": -300.56378173828125, "loss": 0.3065, "rewards/accuracies": 0.875, "rewards/chosen": -0.1748780608177185, "rewards/margins": 1.968912124633789, "rewards/rejected": -2.1437902450561523, "step": 3073 }, { "epoch": 0.36, "learning_rate": 1.9565371442069208e-07, "logits/chosen": -2.2236740589141846, "logits/rejected": -2.3185322284698486, "logps/chosen": -365.0279235839844, "logps/rejected": -310.16162109375, "loss": 0.394, "rewards/accuracies": 0.75, "rewards/chosen": -1.0041489601135254, "rewards/margins": 1.437568187713623, "rewards/rejected": -2.4417169094085693, "step": 3074 }, { "epoch": 0.36, "learning_rate": 1.956182827447738e-07, "logits/chosen": -2.2502241134643555, "logits/rejected": -2.277646541595459, "logps/chosen": -318.6170959472656, "logps/rejected": -357.46002197265625, "loss": 0.4725, "rewards/accuracies": 0.625, "rewards/chosen": -0.25281912088394165, "rewards/margins": 1.5596362352371216, "rewards/rejected": -1.812455415725708, "step": 3075 }, { "epoch": 0.36, "learning_rate": 1.9558285106885555e-07, "logits/chosen": -2.744269609451294, "logits/rejected": -2.8026366233825684, "logps/chosen": -251.04922485351562, "logps/rejected": -244.90658569335938, "loss": 1.0798, "rewards/accuracies": 0.5, "rewards/chosen": -1.3697755336761475, "rewards/margins": 0.5562127828598022, "rewards/rejected": -1.9259884357452393, "step": 3076 }, { "epoch": 0.36, "learning_rate": 1.9554741939293727e-07, "logits/chosen": -2.1796066761016846, "logits/rejected": -2.51393985748291, "logps/chosen": -493.0710144042969, "logps/rejected": -361.9283447265625, "loss": 0.2556, "rewards/accuracies": 0.875, "rewards/chosen": -0.7756271362304688, "rewards/margins": 1.6182520389556885, "rewards/rejected": -2.393878936767578, "step": 3077 }, { "epoch": 0.36, "learning_rate": 1.9551198771701902e-07, "logits/chosen": -2.298384666442871, "logits/rejected": -2.4795377254486084, "logps/chosen": -156.8967742919922, "logps/rejected": -161.52896118164062, "loss": 0.4335, "rewards/accuracies": 0.75, "rewards/chosen": -1.0661638975143433, "rewards/margins": 1.6871165037155151, "rewards/rejected": -2.7532804012298584, "step": 3078 }, { "epoch": 0.36, "learning_rate": 1.9547655604110074e-07, "logits/chosen": -2.139148473739624, "logits/rejected": -2.3867995738983154, "logps/chosen": -225.09780883789062, "logps/rejected": -276.9072570800781, "loss": 0.3089, "rewards/accuracies": 0.875, "rewards/chosen": -0.31729376316070557, "rewards/margins": 2.4318580627441406, "rewards/rejected": -2.7491519451141357, "step": 3079 }, { "epoch": 0.36, "learning_rate": 1.9544112436518246e-07, "logits/chosen": -2.761261463165283, "logits/rejected": -2.780332326889038, "logps/chosen": -171.3284912109375, "logps/rejected": -206.00689697265625, "loss": 0.3987, "rewards/accuracies": 0.875, "rewards/chosen": -0.61780846118927, "rewards/margins": 1.9389897584915161, "rewards/rejected": -2.556798219680786, "step": 3080 }, { "epoch": 0.36, "learning_rate": 1.954056926892642e-07, "logits/chosen": -1.691419243812561, "logits/rejected": -2.125296115875244, "logps/chosen": -365.7619323730469, "logps/rejected": -267.72296142578125, "loss": 0.4409, "rewards/accuracies": 0.75, "rewards/chosen": -0.7751481533050537, "rewards/margins": 1.0066146850585938, "rewards/rejected": -1.781762957572937, "step": 3081 }, { "epoch": 0.36, "learning_rate": 1.9537026101334593e-07, "logits/chosen": -1.7622246742248535, "logits/rejected": -2.2715725898742676, "logps/chosen": -583.3938598632812, "logps/rejected": -357.3657531738281, "loss": 0.6876, "rewards/accuracies": 0.625, "rewards/chosen": -1.8566279411315918, "rewards/margins": 0.6388593912124634, "rewards/rejected": -2.4954874515533447, "step": 3082 }, { "epoch": 0.36, "learning_rate": 1.9533482933742765e-07, "logits/chosen": -2.4055278301239014, "logits/rejected": -2.5267140865325928, "logps/chosen": -201.18251037597656, "logps/rejected": -233.51934814453125, "loss": 0.1476, "rewards/accuracies": 1.0, "rewards/chosen": -0.5946815013885498, "rewards/margins": 2.7114150524139404, "rewards/rejected": -3.3060965538024902, "step": 3083 }, { "epoch": 0.36, "learning_rate": 1.9529939766150938e-07, "logits/chosen": -2.5423502922058105, "logits/rejected": -2.625427722930908, "logps/chosen": -386.2813415527344, "logps/rejected": -258.6645812988281, "loss": 0.8179, "rewards/accuracies": 0.875, "rewards/chosen": -2.235407829284668, "rewards/margins": 2.2106876373291016, "rewards/rejected": -4.4460954666137695, "step": 3084 }, { "epoch": 0.36, "learning_rate": 1.952639659855911e-07, "logits/chosen": -1.863872766494751, "logits/rejected": -1.8901612758636475, "logps/chosen": -275.6691589355469, "logps/rejected": -235.2613067626953, "loss": 0.2872, "rewards/accuracies": 0.875, "rewards/chosen": -0.33936962485313416, "rewards/margins": 1.726625919342041, "rewards/rejected": -2.065995454788208, "step": 3085 }, { "epoch": 0.36, "learning_rate": 1.9522853430967282e-07, "logits/chosen": -2.193173408508301, "logits/rejected": -2.205641746520996, "logps/chosen": -222.06204223632812, "logps/rejected": -275.3194274902344, "loss": 0.2699, "rewards/accuracies": 0.875, "rewards/chosen": -0.13196103274822235, "rewards/margins": 2.625652551651001, "rewards/rejected": -2.7576136589050293, "step": 3086 }, { "epoch": 0.36, "learning_rate": 1.9519310263375457e-07, "logits/chosen": -2.003970146179199, "logits/rejected": -2.077258825302124, "logps/chosen": -316.2494201660156, "logps/rejected": -367.93499755859375, "loss": 0.2564, "rewards/accuracies": 1.0, "rewards/chosen": -0.4094979166984558, "rewards/margins": 2.5925018787384033, "rewards/rejected": -3.002000093460083, "step": 3087 }, { "epoch": 0.36, "learning_rate": 1.951576709578363e-07, "logits/chosen": -2.466526508331299, "logits/rejected": -2.3165440559387207, "logps/chosen": -148.6517333984375, "logps/rejected": -225.70298767089844, "loss": 0.2519, "rewards/accuracies": 0.875, "rewards/chosen": -0.8260928392410278, "rewards/margins": 2.692384719848633, "rewards/rejected": -3.51847767829895, "step": 3088 }, { "epoch": 0.36, "learning_rate": 1.9512223928191801e-07, "logits/chosen": -1.9633926153182983, "logits/rejected": -1.7110555171966553, "logps/chosen": -400.64117431640625, "logps/rejected": -626.545654296875, "loss": 0.2578, "rewards/accuracies": 0.875, "rewards/chosen": -0.9234839677810669, "rewards/margins": 3.7648444175720215, "rewards/rejected": -4.688328266143799, "step": 3089 }, { "epoch": 0.36, "learning_rate": 1.9508680760599976e-07, "logits/chosen": -2.1981005668640137, "logits/rejected": -2.250837802886963, "logps/chosen": -226.04299926757812, "logps/rejected": -357.83270263671875, "loss": 0.5753, "rewards/accuracies": 0.75, "rewards/chosen": -0.8571372032165527, "rewards/margins": 1.8876385688781738, "rewards/rejected": -2.7447757720947266, "step": 3090 }, { "epoch": 0.36, "learning_rate": 1.9505137593008148e-07, "logits/chosen": -2.577970504760742, "logits/rejected": -2.7372524738311768, "logps/chosen": -286.6099853515625, "logps/rejected": -237.63589477539062, "loss": 0.7865, "rewards/accuracies": 0.5, "rewards/chosen": -2.2667179107666016, "rewards/margins": 0.23975539207458496, "rewards/rejected": -2.5064735412597656, "step": 3091 }, { "epoch": 0.36, "learning_rate": 1.9501594425416323e-07, "logits/chosen": -2.3509678840637207, "logits/rejected": -2.6302804946899414, "logps/chosen": -319.7651062011719, "logps/rejected": -203.45895385742188, "loss": 0.6763, "rewards/accuracies": 0.75, "rewards/chosen": -1.3953826427459717, "rewards/margins": 1.3449151515960693, "rewards/rejected": -2.740297794342041, "step": 3092 }, { "epoch": 0.36, "learning_rate": 1.9498051257824496e-07, "logits/chosen": -1.8278895616531372, "logits/rejected": -1.8975833654403687, "logps/chosen": -500.2528381347656, "logps/rejected": -508.5038146972656, "loss": 0.102, "rewards/accuracies": 1.0, "rewards/chosen": -0.30173200368881226, "rewards/margins": 3.3749606609344482, "rewards/rejected": -3.676692485809326, "step": 3093 }, { "epoch": 0.36, "learning_rate": 1.9494508090232668e-07, "logits/chosen": -1.5039117336273193, "logits/rejected": -1.9227688312530518, "logps/chosen": -463.9488220214844, "logps/rejected": -399.75213623046875, "loss": 0.0679, "rewards/accuracies": 1.0, "rewards/chosen": -0.04540317505598068, "rewards/margins": 3.228289842605591, "rewards/rejected": -3.273693084716797, "step": 3094 }, { "epoch": 0.36, "learning_rate": 1.949096492264084e-07, "logits/chosen": -2.2632195949554443, "logits/rejected": -2.4194135665893555, "logps/chosen": -394.73480224609375, "logps/rejected": -223.79910278320312, "loss": 0.5495, "rewards/accuracies": 0.625, "rewards/chosen": -1.1289913654327393, "rewards/margins": 0.8319392204284668, "rewards/rejected": -1.960930585861206, "step": 3095 }, { "epoch": 0.36, "learning_rate": 1.9487421755049012e-07, "logits/chosen": -2.183795690536499, "logits/rejected": -2.347364664077759, "logps/chosen": -403.4247131347656, "logps/rejected": -249.07310485839844, "loss": 0.6306, "rewards/accuracies": 0.75, "rewards/chosen": -1.4496487379074097, "rewards/margins": 1.746886968612671, "rewards/rejected": -3.196535587310791, "step": 3096 }, { "epoch": 0.36, "learning_rate": 1.9483878587457184e-07, "logits/chosen": -1.8170714378356934, "logits/rejected": -2.2644705772399902, "logps/chosen": -383.1539306640625, "logps/rejected": -222.98590087890625, "loss": 0.6317, "rewards/accuracies": 0.625, "rewards/chosen": -0.6372826099395752, "rewards/margins": 2.0703272819519043, "rewards/rejected": -2.7076096534729004, "step": 3097 }, { "epoch": 0.36, "learning_rate": 1.948033541986536e-07, "logits/chosen": -2.527939558029175, "logits/rejected": -2.755084276199341, "logps/chosen": -246.33460998535156, "logps/rejected": -275.1799621582031, "loss": 0.4103, "rewards/accuracies": 0.75, "rewards/chosen": -0.9714311957359314, "rewards/margins": 3.269047737121582, "rewards/rejected": -4.240478992462158, "step": 3098 }, { "epoch": 0.36, "learning_rate": 1.9476792252273531e-07, "logits/chosen": -2.2928264141082764, "logits/rejected": -2.0483834743499756, "logps/chosen": -186.25051879882812, "logps/rejected": -290.2942199707031, "loss": 0.2781, "rewards/accuracies": 0.875, "rewards/chosen": -0.8996459245681763, "rewards/margins": 1.9352480173110962, "rewards/rejected": -2.8348939418792725, "step": 3099 }, { "epoch": 0.36, "learning_rate": 1.9473249084681704e-07, "logits/chosen": -2.523756265640259, "logits/rejected": -2.684831142425537, "logps/chosen": -341.0999755859375, "logps/rejected": -172.13128662109375, "loss": 0.7798, "rewards/accuracies": 0.75, "rewards/chosen": -1.0476539134979248, "rewards/margins": 0.6210567355155945, "rewards/rejected": -1.6687105894088745, "step": 3100 }, { "epoch": 0.36, "learning_rate": 1.9469705917089876e-07, "logits/chosen": -2.1315619945526123, "logits/rejected": -1.8821651935577393, "logps/chosen": -157.85919189453125, "logps/rejected": -200.81057739257812, "loss": 0.458, "rewards/accuracies": 0.75, "rewards/chosen": -1.8662946224212646, "rewards/margins": 1.5491145849227905, "rewards/rejected": -3.4154093265533447, "step": 3101 }, { "epoch": 0.36, "learning_rate": 1.946616274949805e-07, "logits/chosen": -2.6267032623291016, "logits/rejected": -2.5020735263824463, "logps/chosen": -217.08343505859375, "logps/rejected": -344.1680908203125, "loss": 0.2286, "rewards/accuracies": 1.0, "rewards/chosen": -0.15955981612205505, "rewards/margins": 2.61568546295166, "rewards/rejected": -2.775245428085327, "step": 3102 }, { "epoch": 0.36, "learning_rate": 1.9462619581906226e-07, "logits/chosen": -1.9715205430984497, "logits/rejected": -2.3447420597076416, "logps/chosen": -512.128173828125, "logps/rejected": -314.21331787109375, "loss": 0.3411, "rewards/accuracies": 0.75, "rewards/chosen": 0.272097110748291, "rewards/margins": 2.964650869369507, "rewards/rejected": -2.6925535202026367, "step": 3103 }, { "epoch": 0.36, "learning_rate": 1.9459076414314398e-07, "logits/chosen": -2.1395809650421143, "logits/rejected": -2.515711545944214, "logps/chosen": -174.10145568847656, "logps/rejected": -156.0001220703125, "loss": 0.3387, "rewards/accuracies": 1.0, "rewards/chosen": -0.29176539182662964, "rewards/margins": 1.6570239067077637, "rewards/rejected": -1.948789358139038, "step": 3104 }, { "epoch": 0.36, "learning_rate": 1.945553324672257e-07, "logits/chosen": -2.255340099334717, "logits/rejected": -2.1561477184295654, "logps/chosen": -428.8955078125, "logps/rejected": -367.0755615234375, "loss": 0.6539, "rewards/accuracies": 0.625, "rewards/chosen": -1.2377064228057861, "rewards/margins": 1.6460140943527222, "rewards/rejected": -2.8837203979492188, "step": 3105 }, { "epoch": 0.36, "learning_rate": 1.9451990079130742e-07, "logits/chosen": -1.3099322319030762, "logits/rejected": -1.6241211891174316, "logps/chosen": -401.2363586425781, "logps/rejected": -285.8721618652344, "loss": 0.7805, "rewards/accuracies": 0.5, "rewards/chosen": -1.53331458568573, "rewards/margins": 0.006419472396373749, "rewards/rejected": -1.539734125137329, "step": 3106 }, { "epoch": 0.36, "learning_rate": 1.9448446911538914e-07, "logits/chosen": -1.6691772937774658, "logits/rejected": -1.7718544006347656, "logps/chosen": -439.49578857421875, "logps/rejected": -377.2452392578125, "loss": 0.8446, "rewards/accuracies": 0.625, "rewards/chosen": -0.8918315172195435, "rewards/margins": 2.0388946533203125, "rewards/rejected": -2.9307260513305664, "step": 3107 }, { "epoch": 0.36, "learning_rate": 1.9444903743947087e-07, "logits/chosen": -2.6120729446411133, "logits/rejected": -2.7166614532470703, "logps/chosen": -380.427490234375, "logps/rejected": -381.2755126953125, "loss": 0.4833, "rewards/accuracies": 0.75, "rewards/chosen": -0.5260157585144043, "rewards/margins": 1.6505365371704102, "rewards/rejected": -2.1765522956848145, "step": 3108 }, { "epoch": 0.36, "learning_rate": 1.944136057635526e-07, "logits/chosen": -2.2808775901794434, "logits/rejected": -2.4095587730407715, "logps/chosen": -507.74395751953125, "logps/rejected": -337.41571044921875, "loss": 0.1603, "rewards/accuracies": 1.0, "rewards/chosen": -1.2569407224655151, "rewards/margins": 2.731412887573242, "rewards/rejected": -3.988353729248047, "step": 3109 }, { "epoch": 0.36, "learning_rate": 1.9437817408763434e-07, "logits/chosen": -2.2663347721099854, "logits/rejected": -2.319885492324829, "logps/chosen": -213.2877197265625, "logps/rejected": -244.46157836914062, "loss": 0.4708, "rewards/accuracies": 0.875, "rewards/chosen": -0.5261576175689697, "rewards/margins": 1.0919712781906128, "rewards/rejected": -1.6181288957595825, "step": 3110 }, { "epoch": 0.36, "learning_rate": 1.9434274241171606e-07, "logits/chosen": -2.2333898544311523, "logits/rejected": -2.3654563426971436, "logps/chosen": -407.58697509765625, "logps/rejected": -380.197509765625, "loss": 0.4992, "rewards/accuracies": 0.75, "rewards/chosen": -0.6908873319625854, "rewards/margins": 1.0724596977233887, "rewards/rejected": -1.7633470296859741, "step": 3111 }, { "epoch": 0.36, "learning_rate": 1.9430731073579778e-07, "logits/chosen": -1.9532456398010254, "logits/rejected": -2.08958101272583, "logps/chosen": -262.1343078613281, "logps/rejected": -237.88241577148438, "loss": 0.1801, "rewards/accuracies": 1.0, "rewards/chosen": -0.5578480362892151, "rewards/margins": 2.770270347595215, "rewards/rejected": -3.328118324279785, "step": 3112 }, { "epoch": 0.36, "learning_rate": 1.9427187905987953e-07, "logits/chosen": -2.6000728607177734, "logits/rejected": -2.4458532333374023, "logps/chosen": -382.8772888183594, "logps/rejected": -310.2657470703125, "loss": 0.6905, "rewards/accuracies": 0.625, "rewards/chosen": -1.4247158765792847, "rewards/margins": 0.8124868273735046, "rewards/rejected": -2.2372026443481445, "step": 3113 }, { "epoch": 0.36, "learning_rate": 1.9423644738396128e-07, "logits/chosen": -1.7292454242706299, "logits/rejected": -1.625171422958374, "logps/chosen": -132.97607421875, "logps/rejected": -171.45257568359375, "loss": 1.2613, "rewards/accuracies": 0.5, "rewards/chosen": -2.566391944885254, "rewards/margins": 0.7467284798622131, "rewards/rejected": -3.3131203651428223, "step": 3114 }, { "epoch": 0.36, "learning_rate": 1.94201015708043e-07, "logits/chosen": -2.256432294845581, "logits/rejected": -2.1059908866882324, "logps/chosen": -199.9241180419922, "logps/rejected": -217.54931640625, "loss": 0.169, "rewards/accuracies": 1.0, "rewards/chosen": -0.7063655853271484, "rewards/margins": 1.918487310409546, "rewards/rejected": -2.6248531341552734, "step": 3115 }, { "epoch": 0.36, "learning_rate": 1.9416558403212472e-07, "logits/chosen": -2.3934261798858643, "logits/rejected": -2.551150321960449, "logps/chosen": -192.21408081054688, "logps/rejected": -264.39056396484375, "loss": 0.1731, "rewards/accuracies": 1.0, "rewards/chosen": -0.5673214197158813, "rewards/margins": 3.1848888397216797, "rewards/rejected": -3.7522099018096924, "step": 3116 }, { "epoch": 0.36, "learning_rate": 1.9413015235620645e-07, "logits/chosen": -2.4616799354553223, "logits/rejected": -2.571305751800537, "logps/chosen": -108.89421844482422, "logps/rejected": -232.10964965820312, "loss": 0.3327, "rewards/accuracies": 0.75, "rewards/chosen": -0.5497379899024963, "rewards/margins": 2.1916420459747314, "rewards/rejected": -2.741380214691162, "step": 3117 }, { "epoch": 0.36, "learning_rate": 1.9409472068028817e-07, "logits/chosen": -2.4977517127990723, "logits/rejected": -2.0010085105895996, "logps/chosen": -83.21744537353516, "logps/rejected": -276.8382568359375, "loss": 0.4153, "rewards/accuracies": 0.75, "rewards/chosen": -0.9481070637702942, "rewards/margins": 2.5234591960906982, "rewards/rejected": -3.4715659618377686, "step": 3118 }, { "epoch": 0.36, "learning_rate": 1.940592890043699e-07, "logits/chosen": -2.4068498611450195, "logits/rejected": -2.175837516784668, "logps/chosen": -235.85528564453125, "logps/rejected": -398.30291748046875, "loss": 0.1797, "rewards/accuracies": 1.0, "rewards/chosen": -0.3214884400367737, "rewards/margins": 2.6764488220214844, "rewards/rejected": -2.9979374408721924, "step": 3119 }, { "epoch": 0.36, "learning_rate": 1.940238573284516e-07, "logits/chosen": -2.5489022731781006, "logits/rejected": -2.4398512840270996, "logps/chosen": -324.2091979980469, "logps/rejected": -352.88848876953125, "loss": 0.2667, "rewards/accuracies": 0.875, "rewards/chosen": -0.11648707091808319, "rewards/margins": 2.299185276031494, "rewards/rejected": -2.415672540664673, "step": 3120 }, { "epoch": 0.36, "learning_rate": 1.9398842565253336e-07, "logits/chosen": -2.684638023376465, "logits/rejected": -2.5656890869140625, "logps/chosen": -182.70233154296875, "logps/rejected": -240.39285278320312, "loss": 0.2162, "rewards/accuracies": 1.0, "rewards/chosen": -0.8190944194793701, "rewards/margins": 2.705848455429077, "rewards/rejected": -3.5249428749084473, "step": 3121 }, { "epoch": 0.36, "learning_rate": 1.9395299397661508e-07, "logits/chosen": -1.9103986024856567, "logits/rejected": -2.164799928665161, "logps/chosen": -393.04437255859375, "logps/rejected": -335.9977722167969, "loss": 0.4371, "rewards/accuracies": 0.625, "rewards/chosen": -1.1821215152740479, "rewards/margins": 1.507992148399353, "rewards/rejected": -2.6901135444641113, "step": 3122 }, { "epoch": 0.36, "learning_rate": 1.939175623006968e-07, "logits/chosen": -2.592072010040283, "logits/rejected": -2.6159229278564453, "logps/chosen": -116.55342102050781, "logps/rejected": -210.6845245361328, "loss": 0.2051, "rewards/accuracies": 1.0, "rewards/chosen": -0.46971070766448975, "rewards/margins": 2.2071855068206787, "rewards/rejected": -2.676896095275879, "step": 3123 }, { "epoch": 0.36, "learning_rate": 1.9388213062477853e-07, "logits/chosen": -2.10007905960083, "logits/rejected": -2.185770034790039, "logps/chosen": -343.1531982421875, "logps/rejected": -320.8360595703125, "loss": 0.2492, "rewards/accuracies": 1.0, "rewards/chosen": -0.20104816555976868, "rewards/margins": 1.8057745695114136, "rewards/rejected": -2.0068225860595703, "step": 3124 }, { "epoch": 0.36, "learning_rate": 1.9384669894886028e-07, "logits/chosen": -1.4998385906219482, "logits/rejected": -2.3586649894714355, "logps/chosen": -636.711181640625, "logps/rejected": -382.38177490234375, "loss": 0.5859, "rewards/accuracies": 0.625, "rewards/chosen": -1.1133610010147095, "rewards/margins": 1.0986320972442627, "rewards/rejected": -2.2119932174682617, "step": 3125 }, { "epoch": 0.36, "learning_rate": 1.9381126727294202e-07, "logits/chosen": -2.4803004264831543, "logits/rejected": -2.161444902420044, "logps/chosen": -172.16583251953125, "logps/rejected": -286.1757507324219, "loss": 0.374, "rewards/accuracies": 0.875, "rewards/chosen": -0.0674363225698471, "rewards/margins": 2.683539390563965, "rewards/rejected": -2.7509756088256836, "step": 3126 }, { "epoch": 0.36, "learning_rate": 1.9377583559702375e-07, "logits/chosen": -2.656484603881836, "logits/rejected": -2.694640874862671, "logps/chosen": -80.12544250488281, "logps/rejected": -138.1623992919922, "loss": 0.2575, "rewards/accuracies": 0.875, "rewards/chosen": 0.07886181771755219, "rewards/margins": 2.4925155639648438, "rewards/rejected": -2.41365385055542, "step": 3127 }, { "epoch": 0.36, "learning_rate": 1.9374040392110547e-07, "logits/chosen": -2.350421905517578, "logits/rejected": -2.515573263168335, "logps/chosen": -277.6192932128906, "logps/rejected": -240.78457641601562, "loss": 0.2561, "rewards/accuracies": 0.875, "rewards/chosen": -0.5066509246826172, "rewards/margins": 2.982818365097046, "rewards/rejected": -3.489469051361084, "step": 3128 }, { "epoch": 0.36, "learning_rate": 1.937049722451872e-07, "logits/chosen": -2.965205669403076, "logits/rejected": -3.0474088191986084, "logps/chosen": -329.3662414550781, "logps/rejected": -249.338134765625, "loss": 0.8615, "rewards/accuracies": 0.75, "rewards/chosen": -1.2708778381347656, "rewards/margins": 1.2647546529769897, "rewards/rejected": -2.535632610321045, "step": 3129 }, { "epoch": 0.36, "learning_rate": 1.936695405692689e-07, "logits/chosen": -2.4371161460876465, "logits/rejected": -2.088001251220703, "logps/chosen": -170.34738159179688, "logps/rejected": -288.82403564453125, "loss": 0.3816, "rewards/accuracies": 0.875, "rewards/chosen": -1.2606197595596313, "rewards/margins": 2.0035548210144043, "rewards/rejected": -3.2641749382019043, "step": 3130 }, { "epoch": 0.36, "learning_rate": 1.9363410889335063e-07, "logits/chosen": -2.080554485321045, "logits/rejected": -2.3817286491394043, "logps/chosen": -264.6495361328125, "logps/rejected": -149.86404418945312, "loss": 0.5214, "rewards/accuracies": 0.75, "rewards/chosen": -0.7187601327896118, "rewards/margins": 1.0175445079803467, "rewards/rejected": -1.736304521560669, "step": 3131 }, { "epoch": 0.36, "learning_rate": 1.9359867721743238e-07, "logits/chosen": -2.275902271270752, "logits/rejected": -2.3267109394073486, "logps/chosen": -207.8766632080078, "logps/rejected": -230.2354278564453, "loss": 0.2692, "rewards/accuracies": 1.0, "rewards/chosen": -0.6790817975997925, "rewards/margins": 1.982696771621704, "rewards/rejected": -2.661778450012207, "step": 3132 }, { "epoch": 0.36, "learning_rate": 1.935632455415141e-07, "logits/chosen": -2.3230996131896973, "logits/rejected": -2.574122428894043, "logps/chosen": -382.58636474609375, "logps/rejected": -217.22059631347656, "loss": 0.2448, "rewards/accuracies": 1.0, "rewards/chosen": 0.12732012569904327, "rewards/margins": 1.843235969543457, "rewards/rejected": -1.7159159183502197, "step": 3133 }, { "epoch": 0.36, "learning_rate": 1.9352781386559583e-07, "logits/chosen": -2.2557919025421143, "logits/rejected": -2.490957498550415, "logps/chosen": -587.0347900390625, "logps/rejected": -331.9078369140625, "loss": 0.3415, "rewards/accuracies": 0.875, "rewards/chosen": -0.62187659740448, "rewards/margins": 2.351989984512329, "rewards/rejected": -2.9738669395446777, "step": 3134 }, { "epoch": 0.36, "learning_rate": 1.9349238218967755e-07, "logits/chosen": -2.4150912761688232, "logits/rejected": -2.300534248352051, "logps/chosen": -217.19692993164062, "logps/rejected": -440.0042724609375, "loss": 0.4936, "rewards/accuracies": 0.875, "rewards/chosen": -1.277635097503662, "rewards/margins": 2.6255717277526855, "rewards/rejected": -3.9032068252563477, "step": 3135 }, { "epoch": 0.36, "learning_rate": 1.9345695051375927e-07, "logits/chosen": -2.434382438659668, "logits/rejected": -2.4949252605438232, "logps/chosen": -252.11721801757812, "logps/rejected": -251.63311767578125, "loss": 0.3064, "rewards/accuracies": 0.75, "rewards/chosen": -0.801544725894928, "rewards/margins": 2.8959438800811768, "rewards/rejected": -3.697488784790039, "step": 3136 }, { "epoch": 0.36, "learning_rate": 1.9342151883784105e-07, "logits/chosen": -2.308393955230713, "logits/rejected": -2.4564592838287354, "logps/chosen": -444.9427490234375, "logps/rejected": -336.61981201171875, "loss": 0.312, "rewards/accuracies": 0.875, "rewards/chosen": -1.0134586095809937, "rewards/margins": 1.9564151763916016, "rewards/rejected": -2.9698739051818848, "step": 3137 }, { "epoch": 0.37, "learning_rate": 1.9338608716192277e-07, "logits/chosen": -2.2691216468811035, "logits/rejected": -2.1121933460235596, "logps/chosen": -582.8369140625, "logps/rejected": -380.7342529296875, "loss": 0.8403, "rewards/accuracies": 0.75, "rewards/chosen": -1.307189702987671, "rewards/margins": 0.9894444942474365, "rewards/rejected": -2.2966341972351074, "step": 3138 }, { "epoch": 0.37, "learning_rate": 1.933506554860045e-07, "logits/chosen": -2.5083045959472656, "logits/rejected": -2.6909873485565186, "logps/chosen": -303.33453369140625, "logps/rejected": -339.1681823730469, "loss": 0.4014, "rewards/accuracies": 0.75, "rewards/chosen": -0.9819588661193848, "rewards/margins": 1.6858787536621094, "rewards/rejected": -2.667837619781494, "step": 3139 }, { "epoch": 0.37, "learning_rate": 1.933152238100862e-07, "logits/chosen": -2.160820722579956, "logits/rejected": -1.9663872718811035, "logps/chosen": -325.99127197265625, "logps/rejected": -417.8299560546875, "loss": 0.2755, "rewards/accuracies": 0.875, "rewards/chosen": -0.45534124970436096, "rewards/margins": 2.8656198978424072, "rewards/rejected": -3.320960760116577, "step": 3140 }, { "epoch": 0.37, "learning_rate": 1.9327979213416794e-07, "logits/chosen": -2.3190388679504395, "logits/rejected": -2.156068801879883, "logps/chosen": -191.9080810546875, "logps/rejected": -203.80789184570312, "loss": 0.5179, "rewards/accuracies": 0.75, "rewards/chosen": -0.8741163015365601, "rewards/margins": 1.057347297668457, "rewards/rejected": -1.9314637184143066, "step": 3141 }, { "epoch": 0.37, "learning_rate": 1.9324436045824966e-07, "logits/chosen": -2.2184793949127197, "logits/rejected": -2.405529022216797, "logps/chosen": -276.3310852050781, "logps/rejected": -269.9331359863281, "loss": 1.3409, "rewards/accuracies": 0.625, "rewards/chosen": -1.8734275102615356, "rewards/margins": 1.4713757038116455, "rewards/rejected": -3.3448030948638916, "step": 3142 }, { "epoch": 0.37, "learning_rate": 1.932089287823314e-07, "logits/chosen": -2.256789207458496, "logits/rejected": -2.385599374771118, "logps/chosen": -395.928466796875, "logps/rejected": -247.03604125976562, "loss": 0.4809, "rewards/accuracies": 0.875, "rewards/chosen": -0.973597526550293, "rewards/margins": 1.6667382717132568, "rewards/rejected": -2.640336036682129, "step": 3143 }, { "epoch": 0.37, "learning_rate": 1.9317349710641313e-07, "logits/chosen": -2.290410041809082, "logits/rejected": -2.6533286571502686, "logps/chosen": -298.1980285644531, "logps/rejected": -231.24752807617188, "loss": 0.5155, "rewards/accuracies": 0.75, "rewards/chosen": -1.2942817211151123, "rewards/margins": 0.6732892990112305, "rewards/rejected": -1.9675710201263428, "step": 3144 }, { "epoch": 0.37, "learning_rate": 1.9313806543049485e-07, "logits/chosen": -2.188337564468384, "logits/rejected": -2.416543960571289, "logps/chosen": -318.5128479003906, "logps/rejected": -234.77174377441406, "loss": 0.3441, "rewards/accuracies": 0.875, "rewards/chosen": -0.8966893553733826, "rewards/margins": 1.403678297996521, "rewards/rejected": -2.300367832183838, "step": 3145 }, { "epoch": 0.37, "learning_rate": 1.9310263375457657e-07, "logits/chosen": -2.6692333221435547, "logits/rejected": -2.755354881286621, "logps/chosen": -169.9678955078125, "logps/rejected": -182.23582458496094, "loss": 0.1318, "rewards/accuracies": 1.0, "rewards/chosen": 0.14124518632888794, "rewards/margins": 2.9847562313079834, "rewards/rejected": -2.8435111045837402, "step": 3146 }, { "epoch": 0.37, "learning_rate": 1.930672020786583e-07, "logits/chosen": -1.7486519813537598, "logits/rejected": -1.798386573791504, "logps/chosen": -477.0087585449219, "logps/rejected": -409.324462890625, "loss": 0.6296, "rewards/accuracies": 0.5, "rewards/chosen": -0.899261474609375, "rewards/margins": 1.913628339767456, "rewards/rejected": -2.812889814376831, "step": 3147 }, { "epoch": 0.37, "learning_rate": 1.9303177040274007e-07, "logits/chosen": -2.1254711151123047, "logits/rejected": -2.646773338317871, "logps/chosen": -298.5115966796875, "logps/rejected": -299.6465148925781, "loss": 0.2747, "rewards/accuracies": 0.875, "rewards/chosen": -1.0887181758880615, "rewards/margins": 2.2744734287261963, "rewards/rejected": -3.363191604614258, "step": 3148 }, { "epoch": 0.37, "learning_rate": 1.929963387268218e-07, "logits/chosen": -2.6941685676574707, "logits/rejected": -2.668522357940674, "logps/chosen": -186.5926513671875, "logps/rejected": -417.25933837890625, "loss": 0.1505, "rewards/accuracies": 1.0, "rewards/chosen": -0.8929291367530823, "rewards/margins": 3.580596446990967, "rewards/rejected": -4.473525524139404, "step": 3149 }, { "epoch": 0.37, "learning_rate": 1.9296090705090351e-07, "logits/chosen": -2.7407705783843994, "logits/rejected": -2.7407851219177246, "logps/chosen": -226.93617248535156, "logps/rejected": -251.0614471435547, "loss": 0.2178, "rewards/accuracies": 0.875, "rewards/chosen": -1.4730029106140137, "rewards/margins": 3.7094790935516357, "rewards/rejected": -5.18248176574707, "step": 3150 }, { "epoch": 0.37, "learning_rate": 1.9292547537498524e-07, "logits/chosen": -2.237391948699951, "logits/rejected": -2.5306034088134766, "logps/chosen": -343.9668273925781, "logps/rejected": -294.36614990234375, "loss": 0.4401, "rewards/accuracies": 0.625, "rewards/chosen": -0.7083986401557922, "rewards/margins": 2.134974956512451, "rewards/rejected": -2.8433735370635986, "step": 3151 }, { "epoch": 0.37, "learning_rate": 1.9289004369906696e-07, "logits/chosen": -2.045839548110962, "logits/rejected": -1.9975348711013794, "logps/chosen": -159.90927124023438, "logps/rejected": -158.82867431640625, "loss": 0.4595, "rewards/accuracies": 0.75, "rewards/chosen": -1.0179097652435303, "rewards/margins": 0.8473888635635376, "rewards/rejected": -1.8652986288070679, "step": 3152 }, { "epoch": 0.37, "learning_rate": 1.9285461202314868e-07, "logits/chosen": -2.5274221897125244, "logits/rejected": -2.0660452842712402, "logps/chosen": -126.86803436279297, "logps/rejected": -264.5057678222656, "loss": 0.2647, "rewards/accuracies": 1.0, "rewards/chosen": -0.14727041125297546, "rewards/margins": 2.82851243019104, "rewards/rejected": -2.975782871246338, "step": 3153 }, { "epoch": 0.37, "learning_rate": 1.928191803472304e-07, "logits/chosen": -2.249345302581787, "logits/rejected": -2.2968902587890625, "logps/chosen": -79.97516632080078, "logps/rejected": -149.7945098876953, "loss": 0.2891, "rewards/accuracies": 0.875, "rewards/chosen": -0.019741669297218323, "rewards/margins": 2.123882293701172, "rewards/rejected": -2.1436238288879395, "step": 3154 }, { "epoch": 0.37, "learning_rate": 1.9278374867131215e-07, "logits/chosen": -2.892702579498291, "logits/rejected": -2.7949483394622803, "logps/chosen": -123.44725799560547, "logps/rejected": -174.04078674316406, "loss": 0.2852, "rewards/accuracies": 0.875, "rewards/chosen": -1.2171828746795654, "rewards/margins": 2.4107046127319336, "rewards/rejected": -3.627887487411499, "step": 3155 }, { "epoch": 0.37, "learning_rate": 1.9274831699539387e-07, "logits/chosen": -2.4653091430664062, "logits/rejected": -2.7025585174560547, "logps/chosen": -258.10858154296875, "logps/rejected": -185.49456787109375, "loss": 0.2058, "rewards/accuracies": 1.0, "rewards/chosen": -0.931549072265625, "rewards/margins": 1.700240135192871, "rewards/rejected": -2.631789207458496, "step": 3156 }, { "epoch": 0.37, "learning_rate": 1.927128853194756e-07, "logits/chosen": -2.163545846939087, "logits/rejected": -2.6042652130126953, "logps/chosen": -401.03607177734375, "logps/rejected": -233.97898864746094, "loss": 0.1997, "rewards/accuracies": 1.0, "rewards/chosen": -0.5937491655349731, "rewards/margins": 2.5653302669525146, "rewards/rejected": -3.1590797901153564, "step": 3157 }, { "epoch": 0.37, "learning_rate": 1.9267745364355732e-07, "logits/chosen": -2.768998622894287, "logits/rejected": -2.6972715854644775, "logps/chosen": -180.02444458007812, "logps/rejected": -222.41339111328125, "loss": 1.3841, "rewards/accuracies": 0.625, "rewards/chosen": -2.2711691856384277, "rewards/margins": 0.2155253291130066, "rewards/rejected": -2.4866943359375, "step": 3158 }, { "epoch": 0.37, "learning_rate": 1.9264202196763904e-07, "logits/chosen": -1.850663661956787, "logits/rejected": -1.9897730350494385, "logps/chosen": -541.7745971679688, "logps/rejected": -489.3761291503906, "loss": 0.0812, "rewards/accuracies": 1.0, "rewards/chosen": 0.13470707833766937, "rewards/margins": 3.1683897972106934, "rewards/rejected": -3.0336828231811523, "step": 3159 }, { "epoch": 0.37, "learning_rate": 1.9260659029172081e-07, "logits/chosen": -2.499582529067993, "logits/rejected": -2.44433331489563, "logps/chosen": -245.42471313476562, "logps/rejected": -302.79852294921875, "loss": 0.1432, "rewards/accuracies": 1.0, "rewards/chosen": -0.6744850277900696, "rewards/margins": 3.5585427284240723, "rewards/rejected": -4.233027935028076, "step": 3160 }, { "epoch": 0.37, "learning_rate": 1.9257115861580254e-07, "logits/chosen": -2.900247097015381, "logits/rejected": -2.930180072784424, "logps/chosen": -389.54107666015625, "logps/rejected": -310.0992431640625, "loss": 0.1728, "rewards/accuracies": 1.0, "rewards/chosen": -0.9990042448043823, "rewards/margins": 2.1150031089782715, "rewards/rejected": -3.1140072345733643, "step": 3161 }, { "epoch": 0.37, "learning_rate": 1.9253572693988426e-07, "logits/chosen": -2.3940377235412598, "logits/rejected": -2.246613025665283, "logps/chosen": -301.7984313964844, "logps/rejected": -341.15692138671875, "loss": 0.2776, "rewards/accuracies": 0.875, "rewards/chosen": -0.946528434753418, "rewards/margins": 2.131150245666504, "rewards/rejected": -3.077678680419922, "step": 3162 }, { "epoch": 0.37, "learning_rate": 1.9250029526396598e-07, "logits/chosen": -2.44759202003479, "logits/rejected": -2.4825141429901123, "logps/chosen": -316.13604736328125, "logps/rejected": -286.5953369140625, "loss": 0.316, "rewards/accuracies": 0.75, "rewards/chosen": -0.8119725584983826, "rewards/margins": 2.597079038619995, "rewards/rejected": -3.4090514183044434, "step": 3163 }, { "epoch": 0.37, "learning_rate": 1.924648635880477e-07, "logits/chosen": -2.7020111083984375, "logits/rejected": -2.660953998565674, "logps/chosen": -148.32684326171875, "logps/rejected": -237.59823608398438, "loss": 0.2714, "rewards/accuracies": 0.875, "rewards/chosen": -0.7312800288200378, "rewards/margins": 2.763472080230713, "rewards/rejected": -3.4947519302368164, "step": 3164 }, { "epoch": 0.37, "learning_rate": 1.9242943191212942e-07, "logits/chosen": -2.0983476638793945, "logits/rejected": -2.643665075302124, "logps/chosen": -421.32574462890625, "logps/rejected": -178.2677001953125, "loss": 0.3181, "rewards/accuracies": 0.875, "rewards/chosen": -0.8557639718055725, "rewards/margins": 1.6461113691329956, "rewards/rejected": -2.501875162124634, "step": 3165 }, { "epoch": 0.37, "learning_rate": 1.9239400023621117e-07, "logits/chosen": -1.6538810729980469, "logits/rejected": -2.04146671295166, "logps/chosen": -437.01287841796875, "logps/rejected": -357.8394775390625, "loss": 0.1927, "rewards/accuracies": 1.0, "rewards/chosen": -0.8159582018852234, "rewards/margins": 2.586364984512329, "rewards/rejected": -3.402323007583618, "step": 3166 }, { "epoch": 0.37, "learning_rate": 1.923585685602929e-07, "logits/chosen": -2.6980769634246826, "logits/rejected": -2.9216294288635254, "logps/chosen": -384.0958251953125, "logps/rejected": -358.8319396972656, "loss": 0.265, "rewards/accuracies": 0.875, "rewards/chosen": -0.9607188701629639, "rewards/margins": 2.52705454826355, "rewards/rejected": -3.4877734184265137, "step": 3167 }, { "epoch": 0.37, "learning_rate": 1.9232313688437462e-07, "logits/chosen": -2.5672430992126465, "logits/rejected": -2.5246002674102783, "logps/chosen": -348.85107421875, "logps/rejected": -260.0709228515625, "loss": 0.2044, "rewards/accuracies": 1.0, "rewards/chosen": -0.9269664287567139, "rewards/margins": 2.1458261013031006, "rewards/rejected": -3.0727925300598145, "step": 3168 }, { "epoch": 0.37, "learning_rate": 1.9228770520845634e-07, "logits/chosen": -1.9530051946640015, "logits/rejected": -2.399528741836548, "logps/chosen": -406.9411926269531, "logps/rejected": -260.3360595703125, "loss": 0.5938, "rewards/accuracies": 0.75, "rewards/chosen": -0.7623782753944397, "rewards/margins": 0.5734649896621704, "rewards/rejected": -1.3358433246612549, "step": 3169 }, { "epoch": 0.37, "learning_rate": 1.9225227353253806e-07, "logits/chosen": -2.3042895793914795, "logits/rejected": -2.6620395183563232, "logps/chosen": -272.5081787109375, "logps/rejected": -210.39694213867188, "loss": 0.2503, "rewards/accuracies": 1.0, "rewards/chosen": 0.06712014973163605, "rewards/margins": 2.498213768005371, "rewards/rejected": -2.431093692779541, "step": 3170 }, { "epoch": 0.37, "learning_rate": 1.9221684185661978e-07, "logits/chosen": -2.5497233867645264, "logits/rejected": -2.7116408348083496, "logps/chosen": -202.58700561523438, "logps/rejected": -200.17637634277344, "loss": 0.2296, "rewards/accuracies": 1.0, "rewards/chosen": -0.09004423022270203, "rewards/margins": 1.9502902030944824, "rewards/rejected": -2.0403342247009277, "step": 3171 }, { "epoch": 0.37, "learning_rate": 1.9218141018070156e-07, "logits/chosen": -2.507938861846924, "logits/rejected": -2.4972078800201416, "logps/chosen": -296.51654052734375, "logps/rejected": -281.16644287109375, "loss": 0.3491, "rewards/accuracies": 0.75, "rewards/chosen": -1.139602780342102, "rewards/margins": 1.7851760387420654, "rewards/rejected": -2.924778938293457, "step": 3172 }, { "epoch": 0.37, "learning_rate": 1.9214597850478328e-07, "logits/chosen": -2.0462560653686523, "logits/rejected": -1.9870109558105469, "logps/chosen": -281.8150634765625, "logps/rejected": -310.6997375488281, "loss": 0.377, "rewards/accuracies": 0.875, "rewards/chosen": -1.4008017778396606, "rewards/margins": 2.5081920623779297, "rewards/rejected": -3.9089934825897217, "step": 3173 }, { "epoch": 0.37, "learning_rate": 1.92110546828865e-07, "logits/chosen": -1.8173969984054565, "logits/rejected": -2.0294840335845947, "logps/chosen": -319.11834716796875, "logps/rejected": -298.73309326171875, "loss": 0.6093, "rewards/accuracies": 0.5, "rewards/chosen": -0.8669200539588928, "rewards/margins": 0.6802937984466553, "rewards/rejected": -1.5472137928009033, "step": 3174 }, { "epoch": 0.37, "learning_rate": 1.9207511515294673e-07, "logits/chosen": -1.8864620923995972, "logits/rejected": -2.190478801727295, "logps/chosen": -474.81103515625, "logps/rejected": -387.6461486816406, "loss": 0.3311, "rewards/accuracies": 0.875, "rewards/chosen": -0.8746293187141418, "rewards/margins": 2.1003055572509766, "rewards/rejected": -2.9749348163604736, "step": 3175 }, { "epoch": 0.37, "learning_rate": 1.9203968347702845e-07, "logits/chosen": -2.182969093322754, "logits/rejected": -2.1879749298095703, "logps/chosen": -150.87913513183594, "logps/rejected": -309.70782470703125, "loss": 0.4148, "rewards/accuracies": 0.75, "rewards/chosen": -0.3453094959259033, "rewards/margins": 1.3505222797393799, "rewards/rejected": -1.6958316564559937, "step": 3176 }, { "epoch": 0.37, "learning_rate": 1.920042518011102e-07, "logits/chosen": -1.974729061126709, "logits/rejected": -2.245504379272461, "logps/chosen": -306.3042297363281, "logps/rejected": -292.2158203125, "loss": 0.2734, "rewards/accuracies": 0.875, "rewards/chosen": -0.09486120939254761, "rewards/margins": 2.164905309677124, "rewards/rejected": -2.2597665786743164, "step": 3177 }, { "epoch": 0.37, "learning_rate": 1.9196882012519192e-07, "logits/chosen": -2.327970504760742, "logits/rejected": -2.292393684387207, "logps/chosen": -330.66845703125, "logps/rejected": -250.999755859375, "loss": 0.4247, "rewards/accuracies": 0.875, "rewards/chosen": -0.4500001072883606, "rewards/margins": 0.8633212447166443, "rewards/rejected": -1.3133213520050049, "step": 3178 }, { "epoch": 0.37, "learning_rate": 1.9193338844927364e-07, "logits/chosen": -2.018627643585205, "logits/rejected": -2.206972599029541, "logps/chosen": -199.66067504882812, "logps/rejected": -248.08346557617188, "loss": 0.6225, "rewards/accuracies": 0.375, "rewards/chosen": -0.3957357406616211, "rewards/margins": 0.9450592398643494, "rewards/rejected": -1.3407950401306152, "step": 3179 }, { "epoch": 0.37, "learning_rate": 1.9189795677335536e-07, "logits/chosen": -1.895951509475708, "logits/rejected": -2.53849196434021, "logps/chosen": -433.8330993652344, "logps/rejected": -242.42787170410156, "loss": 0.2672, "rewards/accuracies": 0.875, "rewards/chosen": -0.41876792907714844, "rewards/margins": 1.9294724464416504, "rewards/rejected": -2.348240375518799, "step": 3180 }, { "epoch": 0.37, "learning_rate": 1.9186252509743708e-07, "logits/chosen": -2.1511030197143555, "logits/rejected": -2.1521201133728027, "logps/chosen": -132.5138702392578, "logps/rejected": -133.8966064453125, "loss": 0.6024, "rewards/accuracies": 0.5, "rewards/chosen": -0.7199131846427917, "rewards/margins": 1.7673122882843018, "rewards/rejected": -2.4872255325317383, "step": 3181 }, { "epoch": 0.37, "learning_rate": 1.918270934215188e-07, "logits/chosen": -1.8453233242034912, "logits/rejected": -2.110959768295288, "logps/chosen": -309.5833435058594, "logps/rejected": -255.5559539794922, "loss": 0.2443, "rewards/accuracies": 0.875, "rewards/chosen": -0.36813098192214966, "rewards/margins": 2.3243069648742676, "rewards/rejected": -2.6924376487731934, "step": 3182 }, { "epoch": 0.37, "learning_rate": 1.9179166174560058e-07, "logits/chosen": -2.0539145469665527, "logits/rejected": -1.9143383502960205, "logps/chosen": -339.960693359375, "logps/rejected": -368.6610107421875, "loss": 0.5098, "rewards/accuracies": 0.625, "rewards/chosen": -0.8677932620048523, "rewards/margins": 0.8465698957443237, "rewards/rejected": -1.7143632173538208, "step": 3183 }, { "epoch": 0.37, "learning_rate": 1.917562300696823e-07, "logits/chosen": -2.1474761962890625, "logits/rejected": -2.2780191898345947, "logps/chosen": -164.1271514892578, "logps/rejected": -212.37091064453125, "loss": 0.4573, "rewards/accuracies": 0.875, "rewards/chosen": -0.38927680253982544, "rewards/margins": 1.5299155712127686, "rewards/rejected": -1.9191921949386597, "step": 3184 }, { "epoch": 0.37, "learning_rate": 1.9172079839376403e-07, "logits/chosen": -2.792558193206787, "logits/rejected": -2.643643379211426, "logps/chosen": -193.36093139648438, "logps/rejected": -273.80584716796875, "loss": 0.4996, "rewards/accuracies": 0.75, "rewards/chosen": -1.536325454711914, "rewards/margins": 0.8020482659339905, "rewards/rejected": -2.338373899459839, "step": 3185 }, { "epoch": 0.37, "learning_rate": 1.9168536671784575e-07, "logits/chosen": -2.2104530334472656, "logits/rejected": -1.7676036357879639, "logps/chosen": -175.49789428710938, "logps/rejected": -302.2394714355469, "loss": 0.2512, "rewards/accuracies": 0.75, "rewards/chosen": -0.9535512924194336, "rewards/margins": 2.583712577819824, "rewards/rejected": -3.5372636318206787, "step": 3186 }, { "epoch": 0.37, "learning_rate": 1.9164993504192747e-07, "logits/chosen": -1.6194193363189697, "logits/rejected": -1.925923466682434, "logps/chosen": -225.3070068359375, "logps/rejected": -163.70974731445312, "loss": 0.6318, "rewards/accuracies": 0.625, "rewards/chosen": -0.9462329149246216, "rewards/margins": 0.6181483864784241, "rewards/rejected": -1.5643812417984009, "step": 3187 }, { "epoch": 0.37, "learning_rate": 1.9161450336600922e-07, "logits/chosen": -2.3876099586486816, "logits/rejected": -2.440568208694458, "logps/chosen": -333.3180847167969, "logps/rejected": -325.70404052734375, "loss": 0.2648, "rewards/accuracies": 0.875, "rewards/chosen": -0.7548568844795227, "rewards/margins": 3.3911831378936768, "rewards/rejected": -4.146039962768555, "step": 3188 }, { "epoch": 0.37, "learning_rate": 1.9157907169009094e-07, "logits/chosen": -2.2270219326019287, "logits/rejected": -2.1326770782470703, "logps/chosen": -322.4561767578125, "logps/rejected": -348.9518737792969, "loss": 0.5374, "rewards/accuracies": 0.75, "rewards/chosen": -0.6574346423149109, "rewards/margins": 0.8381863832473755, "rewards/rejected": -1.4956210851669312, "step": 3189 }, { "epoch": 0.37, "learning_rate": 1.9154364001417266e-07, "logits/chosen": -2.464693069458008, "logits/rejected": -2.4156620502471924, "logps/chosen": -185.22540283203125, "logps/rejected": -257.880615234375, "loss": 0.5815, "rewards/accuracies": 0.75, "rewards/chosen": -0.5320767760276794, "rewards/margins": 1.382102608680725, "rewards/rejected": -1.9141794443130493, "step": 3190 }, { "epoch": 0.37, "learning_rate": 1.9150820833825439e-07, "logits/chosen": -2.4733452796936035, "logits/rejected": -2.435786485671997, "logps/chosen": -241.92214965820312, "logps/rejected": -242.48731994628906, "loss": 0.7388, "rewards/accuracies": 0.75, "rewards/chosen": -0.8836735486984253, "rewards/margins": 1.1027183532714844, "rewards/rejected": -1.9863920211791992, "step": 3191 }, { "epoch": 0.37, "learning_rate": 1.914727766623361e-07, "logits/chosen": -2.5483412742614746, "logits/rejected": -2.20320725440979, "logps/chosen": -242.97195434570312, "logps/rejected": -254.82826232910156, "loss": 0.3482, "rewards/accuracies": 0.75, "rewards/chosen": -0.7763811945915222, "rewards/margins": 1.9565354585647583, "rewards/rejected": -2.732916831970215, "step": 3192 }, { "epoch": 0.37, "learning_rate": 1.9143734498641783e-07, "logits/chosen": -1.8268365859985352, "logits/rejected": -2.417933940887451, "logps/chosen": -357.09478759765625, "logps/rejected": -255.75735473632812, "loss": 0.3739, "rewards/accuracies": 0.875, "rewards/chosen": -0.7232149839401245, "rewards/margins": 1.1114208698272705, "rewards/rejected": -1.8346359729766846, "step": 3193 }, { "epoch": 0.37, "learning_rate": 1.9140191331049955e-07, "logits/chosen": -2.2324602603912354, "logits/rejected": -1.8773897886276245, "logps/chosen": -118.87921905517578, "logps/rejected": -299.29290771484375, "loss": 0.2625, "rewards/accuracies": 1.0, "rewards/chosen": -0.2545661926269531, "rewards/margins": 2.1697733402252197, "rewards/rejected": -2.424339532852173, "step": 3194 }, { "epoch": 0.37, "learning_rate": 1.9136648163458133e-07, "logits/chosen": -2.559506416320801, "logits/rejected": -2.5846002101898193, "logps/chosen": -282.648681640625, "logps/rejected": -256.5959777832031, "loss": 0.3366, "rewards/accuracies": 0.875, "rewards/chosen": -0.5970479249954224, "rewards/margins": 1.9208812713623047, "rewards/rejected": -2.5179290771484375, "step": 3195 }, { "epoch": 0.37, "learning_rate": 1.9133104995866305e-07, "logits/chosen": -2.651726484298706, "logits/rejected": -2.7787532806396484, "logps/chosen": -364.31707763671875, "logps/rejected": -193.00221252441406, "loss": 0.4738, "rewards/accuracies": 0.625, "rewards/chosen": -0.6019917130470276, "rewards/margins": 1.286452054977417, "rewards/rejected": -1.8884437084197998, "step": 3196 }, { "epoch": 0.37, "learning_rate": 1.9129561828274477e-07, "logits/chosen": -2.733668565750122, "logits/rejected": -2.750742197036743, "logps/chosen": -136.57806396484375, "logps/rejected": -129.05055236816406, "loss": 0.3988, "rewards/accuracies": 0.75, "rewards/chosen": -1.0261073112487793, "rewards/margins": 1.187927007675171, "rewards/rejected": -2.21403431892395, "step": 3197 }, { "epoch": 0.37, "learning_rate": 1.912601866068265e-07, "logits/chosen": -2.331082820892334, "logits/rejected": -2.3602089881896973, "logps/chosen": -348.1024169921875, "logps/rejected": -240.73379516601562, "loss": 0.712, "rewards/accuracies": 0.75, "rewards/chosen": -1.0845441818237305, "rewards/margins": 0.7099411487579346, "rewards/rejected": -1.794485330581665, "step": 3198 }, { "epoch": 0.37, "learning_rate": 1.9122475493090822e-07, "logits/chosen": -2.169079303741455, "logits/rejected": -2.362210512161255, "logps/chosen": -91.36421203613281, "logps/rejected": -93.60712432861328, "loss": 1.3947, "rewards/accuracies": 0.75, "rewards/chosen": -1.6875498294830322, "rewards/margins": 0.17984771728515625, "rewards/rejected": -1.867397665977478, "step": 3199 }, { "epoch": 0.37, "learning_rate": 1.9118932325498996e-07, "logits/chosen": -2.5542519092559814, "logits/rejected": -2.444429874420166, "logps/chosen": -184.71420288085938, "logps/rejected": -225.59666442871094, "loss": 0.1815, "rewards/accuracies": 1.0, "rewards/chosen": -0.667992353439331, "rewards/margins": 2.259141445159912, "rewards/rejected": -2.927133560180664, "step": 3200 }, { "epoch": 0.37, "learning_rate": 1.9115389157907169e-07, "logits/chosen": -2.5047459602355957, "logits/rejected": -2.534607410430908, "logps/chosen": -506.90509033203125, "logps/rejected": -404.16741943359375, "loss": 0.1579, "rewards/accuracies": 1.0, "rewards/chosen": -0.10554055869579315, "rewards/margins": 3.207212448120117, "rewards/rejected": -3.312753200531006, "step": 3201 }, { "epoch": 0.37, "learning_rate": 1.911184599031534e-07, "logits/chosen": -1.6969332695007324, "logits/rejected": -1.32732355594635, "logps/chosen": -366.0934143066406, "logps/rejected": -515.6815185546875, "loss": 0.2045, "rewards/accuracies": 1.0, "rewards/chosen": -0.1854037195444107, "rewards/margins": 2.7835452556610107, "rewards/rejected": -2.9689488410949707, "step": 3202 }, { "epoch": 0.37, "learning_rate": 1.9108302822723513e-07, "logits/chosen": -2.7645230293273926, "logits/rejected": -2.7652127742767334, "logps/chosen": -273.9284362792969, "logps/rejected": -238.3545684814453, "loss": 0.2577, "rewards/accuracies": 0.875, "rewards/chosen": -0.47424209117889404, "rewards/margins": 3.1590867042541504, "rewards/rejected": -3.633328676223755, "step": 3203 }, { "epoch": 0.37, "learning_rate": 1.9104759655131685e-07, "logits/chosen": -2.684561252593994, "logits/rejected": -2.6680617332458496, "logps/chosen": -172.08779907226562, "logps/rejected": -259.9454345703125, "loss": 0.1237, "rewards/accuracies": 1.0, "rewards/chosen": -0.4344140887260437, "rewards/margins": 3.1675710678100586, "rewards/rejected": -3.601985454559326, "step": 3204 }, { "epoch": 0.37, "learning_rate": 1.9101216487539857e-07, "logits/chosen": -2.579371929168701, "logits/rejected": -2.4444425106048584, "logps/chosen": -247.86573791503906, "logps/rejected": -755.3213500976562, "loss": 0.0983, "rewards/accuracies": 1.0, "rewards/chosen": -1.3130439519882202, "rewards/margins": 4.38079309463501, "rewards/rejected": -5.6938371658325195, "step": 3205 }, { "epoch": 0.37, "learning_rate": 1.9097673319948032e-07, "logits/chosen": -2.394746780395508, "logits/rejected": -2.4037437438964844, "logps/chosen": -390.1556091308594, "logps/rejected": -254.78860473632812, "loss": 0.5292, "rewards/accuracies": 0.75, "rewards/chosen": -0.46477609872817993, "rewards/margins": 1.7637434005737305, "rewards/rejected": -2.2285194396972656, "step": 3206 }, { "epoch": 0.37, "learning_rate": 1.9094130152356207e-07, "logits/chosen": -2.0833282470703125, "logits/rejected": -1.9953733682632446, "logps/chosen": -252.40838623046875, "logps/rejected": -304.6759338378906, "loss": 1.0719, "rewards/accuracies": 0.875, "rewards/chosen": -1.583401083946228, "rewards/margins": 2.3307087421417236, "rewards/rejected": -3.9141101837158203, "step": 3207 }, { "epoch": 0.37, "learning_rate": 1.909058698476438e-07, "logits/chosen": -2.0746684074401855, "logits/rejected": -2.3198964595794678, "logps/chosen": -402.735595703125, "logps/rejected": -260.41046142578125, "loss": 0.3226, "rewards/accuracies": 1.0, "rewards/chosen": -0.5031176209449768, "rewards/margins": 1.4772083759307861, "rewards/rejected": -1.9803260564804077, "step": 3208 }, { "epoch": 0.37, "learning_rate": 1.9087043817172552e-07, "logits/chosen": -2.644043445587158, "logits/rejected": -2.176420211791992, "logps/chosen": -143.150146484375, "logps/rejected": -329.0374755859375, "loss": 0.6718, "rewards/accuracies": 0.75, "rewards/chosen": -1.4195330142974854, "rewards/margins": 1.9932042360305786, "rewards/rejected": -3.4127373695373535, "step": 3209 }, { "epoch": 0.37, "learning_rate": 1.9083500649580724e-07, "logits/chosen": -1.914677619934082, "logits/rejected": -2.0118980407714844, "logps/chosen": -233.90213012695312, "logps/rejected": -245.00588989257812, "loss": 0.6469, "rewards/accuracies": 0.75, "rewards/chosen": -1.3744927644729614, "rewards/margins": 1.8965227603912354, "rewards/rejected": -3.2710154056549072, "step": 3210 }, { "epoch": 0.37, "learning_rate": 1.9079957481988899e-07, "logits/chosen": -2.3674724102020264, "logits/rejected": -2.420836925506592, "logps/chosen": -389.5140380859375, "logps/rejected": -313.31268310546875, "loss": 0.2617, "rewards/accuracies": 1.0, "rewards/chosen": -0.637345552444458, "rewards/margins": 1.5520628690719604, "rewards/rejected": -2.189408302307129, "step": 3211 }, { "epoch": 0.37, "learning_rate": 1.907641431439707e-07, "logits/chosen": -2.3276522159576416, "logits/rejected": -2.312445640563965, "logps/chosen": -204.3212890625, "logps/rejected": -280.382568359375, "loss": 0.4871, "rewards/accuracies": 0.625, "rewards/chosen": -0.8925344944000244, "rewards/margins": 1.6479724645614624, "rewards/rejected": -2.5405070781707764, "step": 3212 }, { "epoch": 0.37, "learning_rate": 1.9072871146805243e-07, "logits/chosen": -2.2591464519500732, "logits/rejected": -2.060910224914551, "logps/chosen": -264.9720458984375, "logps/rejected": -304.6864929199219, "loss": 0.3779, "rewards/accuracies": 0.75, "rewards/chosen": -2.0994749069213867, "rewards/margins": 2.774629831314087, "rewards/rejected": -4.8741044998168945, "step": 3213 }, { "epoch": 0.37, "learning_rate": 1.9069327979213415e-07, "logits/chosen": -2.688176155090332, "logits/rejected": -2.602663278579712, "logps/chosen": -214.14093017578125, "logps/rejected": -239.98794555664062, "loss": 0.2724, "rewards/accuracies": 0.875, "rewards/chosen": -1.2367576360702515, "rewards/margins": 2.65828800201416, "rewards/rejected": -3.895045518875122, "step": 3214 }, { "epoch": 0.37, "learning_rate": 1.9065784811621588e-07, "logits/chosen": -2.7085845470428467, "logits/rejected": -2.707627296447754, "logps/chosen": -359.3642578125, "logps/rejected": -258.1665344238281, "loss": 0.0798, "rewards/accuracies": 1.0, "rewards/chosen": -0.21059304475784302, "rewards/margins": 3.703244209289551, "rewards/rejected": -3.91383695602417, "step": 3215 }, { "epoch": 0.37, "learning_rate": 1.906224164402976e-07, "logits/chosen": -2.687804937362671, "logits/rejected": -2.605192184448242, "logps/chosen": -221.1123046875, "logps/rejected": -235.78416442871094, "loss": 0.5507, "rewards/accuracies": 0.75, "rewards/chosen": -0.4222317636013031, "rewards/margins": 1.3833495378494263, "rewards/rejected": -1.8055810928344727, "step": 3216 }, { "epoch": 0.37, "learning_rate": 1.9058698476437932e-07, "logits/chosen": -2.241328477859497, "logits/rejected": -2.6773998737335205, "logps/chosen": -314.7541809082031, "logps/rejected": -300.5948791503906, "loss": 0.5495, "rewards/accuracies": 0.75, "rewards/chosen": -0.47973719239234924, "rewards/margins": 1.2005685567855835, "rewards/rejected": -1.6803057193756104, "step": 3217 }, { "epoch": 0.37, "learning_rate": 1.905515530884611e-07, "logits/chosen": -2.600045919418335, "logits/rejected": -2.5837719440460205, "logps/chosen": -174.25775146484375, "logps/rejected": -255.02529907226562, "loss": 0.755, "rewards/accuracies": 0.625, "rewards/chosen": -1.2335269451141357, "rewards/margins": 0.5501248836517334, "rewards/rejected": -1.7836518287658691, "step": 3218 }, { "epoch": 0.37, "learning_rate": 1.9051612141254282e-07, "logits/chosen": -2.5625696182250977, "logits/rejected": -2.2703685760498047, "logps/chosen": -163.1625518798828, "logps/rejected": -264.955322265625, "loss": 0.175, "rewards/accuracies": 1.0, "rewards/chosen": -0.49817001819610596, "rewards/margins": 3.069423198699951, "rewards/rejected": -3.5675930976867676, "step": 3219 }, { "epoch": 0.37, "learning_rate": 1.9048068973662454e-07, "logits/chosen": -1.7626054286956787, "logits/rejected": -1.8993251323699951, "logps/chosen": -294.11614990234375, "logps/rejected": -316.83099365234375, "loss": 0.3901, "rewards/accuracies": 0.875, "rewards/chosen": -0.6766389012336731, "rewards/margins": 0.9292320013046265, "rewards/rejected": -1.6058709621429443, "step": 3220 }, { "epoch": 0.37, "learning_rate": 1.9044525806070626e-07, "logits/chosen": -2.450798273086548, "logits/rejected": -2.672447443008423, "logps/chosen": -289.308349609375, "logps/rejected": -204.59765625, "loss": 0.3008, "rewards/accuracies": 0.875, "rewards/chosen": -0.5295712351799011, "rewards/margins": 1.7788383960723877, "rewards/rejected": -2.3084096908569336, "step": 3221 }, { "epoch": 0.37, "learning_rate": 1.90409826384788e-07, "logits/chosen": -2.841648817062378, "logits/rejected": -2.8426687717437744, "logps/chosen": -499.3968200683594, "logps/rejected": -519.694580078125, "loss": 0.2382, "rewards/accuracies": 0.875, "rewards/chosen": -1.220902681350708, "rewards/margins": 2.7731587886810303, "rewards/rejected": -3.9940614700317383, "step": 3222 }, { "epoch": 0.37, "learning_rate": 1.9037439470886973e-07, "logits/chosen": -2.506439208984375, "logits/rejected": -2.4853339195251465, "logps/chosen": -231.01251220703125, "logps/rejected": -169.3791961669922, "loss": 0.3347, "rewards/accuracies": 0.875, "rewards/chosen": -0.6539261341094971, "rewards/margins": 1.743590235710144, "rewards/rejected": -2.3975162506103516, "step": 3223 }, { "epoch": 0.38, "learning_rate": 1.9033896303295145e-07, "logits/chosen": -2.6239452362060547, "logits/rejected": -2.7804253101348877, "logps/chosen": -273.1101989746094, "logps/rejected": -157.99765014648438, "loss": 0.4335, "rewards/accuracies": 0.875, "rewards/chosen": -0.2694106698036194, "rewards/margins": 2.116637706756592, "rewards/rejected": -2.3860483169555664, "step": 3224 }, { "epoch": 0.38, "learning_rate": 1.9030353135703318e-07, "logits/chosen": -2.3311619758605957, "logits/rejected": -2.4225945472717285, "logps/chosen": -78.176025390625, "logps/rejected": -115.0626220703125, "loss": 0.3385, "rewards/accuracies": 0.875, "rewards/chosen": -0.29209184646606445, "rewards/margins": 1.7206566333770752, "rewards/rejected": -2.0127484798431396, "step": 3225 }, { "epoch": 0.38, "learning_rate": 1.902680996811149e-07, "logits/chosen": -2.4940054416656494, "logits/rejected": -2.4528303146362305, "logps/chosen": -265.793212890625, "logps/rejected": -231.36289978027344, "loss": 0.3222, "rewards/accuracies": 0.75, "rewards/chosen": -0.062372103333473206, "rewards/margins": 2.670358419418335, "rewards/rejected": -2.7327306270599365, "step": 3226 }, { "epoch": 0.38, "learning_rate": 1.9023266800519662e-07, "logits/chosen": -1.8420449495315552, "logits/rejected": -2.151578903198242, "logps/chosen": -529.20361328125, "logps/rejected": -408.520751953125, "loss": 0.2086, "rewards/accuracies": 1.0, "rewards/chosen": -1.2404475212097168, "rewards/margins": 2.4566221237182617, "rewards/rejected": -3.6970691680908203, "step": 3227 }, { "epoch": 0.38, "learning_rate": 1.9019723632927834e-07, "logits/chosen": -2.2002663612365723, "logits/rejected": -2.0867955684661865, "logps/chosen": -154.4943084716797, "logps/rejected": -182.96224975585938, "loss": 0.1634, "rewards/accuracies": 1.0, "rewards/chosen": -0.9413011074066162, "rewards/margins": 2.0985305309295654, "rewards/rejected": -3.0398316383361816, "step": 3228 }, { "epoch": 0.38, "learning_rate": 1.901618046533601e-07, "logits/chosen": -1.637128233909607, "logits/rejected": -1.6729846000671387, "logps/chosen": -326.263427734375, "logps/rejected": -385.0272216796875, "loss": 0.3685, "rewards/accuracies": 0.75, "rewards/chosen": -0.7870007157325745, "rewards/margins": 1.9854624271392822, "rewards/rejected": -2.772463083267212, "step": 3229 }, { "epoch": 0.38, "learning_rate": 1.9012637297744184e-07, "logits/chosen": -2.356210231781006, "logits/rejected": -2.3210883140563965, "logps/chosen": -136.46890258789062, "logps/rejected": -216.78048706054688, "loss": 0.4094, "rewards/accuracies": 0.75, "rewards/chosen": -0.4427909255027771, "rewards/margins": 2.1402082443237305, "rewards/rejected": -2.5829989910125732, "step": 3230 }, { "epoch": 0.38, "learning_rate": 1.9009094130152356e-07, "logits/chosen": -2.2593088150024414, "logits/rejected": -1.9057867527008057, "logps/chosen": -258.94329833984375, "logps/rejected": -406.21307373046875, "loss": 0.1049, "rewards/accuracies": 1.0, "rewards/chosen": -0.540289044380188, "rewards/margins": 4.383500576019287, "rewards/rejected": -4.9237895011901855, "step": 3231 }, { "epoch": 0.38, "learning_rate": 1.9005550962560528e-07, "logits/chosen": -1.9971282482147217, "logits/rejected": -1.6898767948150635, "logps/chosen": -306.839599609375, "logps/rejected": -442.9520568847656, "loss": 0.5708, "rewards/accuracies": 0.875, "rewards/chosen": -1.1713738441467285, "rewards/margins": 1.1274758577346802, "rewards/rejected": -2.2988498210906982, "step": 3232 }, { "epoch": 0.38, "learning_rate": 1.90020077949687e-07, "logits/chosen": -2.4572126865386963, "logits/rejected": -2.5742881298065186, "logps/chosen": -206.15101623535156, "logps/rejected": -256.31146240234375, "loss": 0.3748, "rewards/accuracies": 0.875, "rewards/chosen": -0.4443996846675873, "rewards/margins": 2.25850248336792, "rewards/rejected": -2.70290207862854, "step": 3233 }, { "epoch": 0.38, "learning_rate": 1.8998464627376875e-07, "logits/chosen": -2.2815403938293457, "logits/rejected": -2.492597818374634, "logps/chosen": -245.51341247558594, "logps/rejected": -259.1561279296875, "loss": 0.5709, "rewards/accuracies": 0.875, "rewards/chosen": -1.1496930122375488, "rewards/margins": 1.920180082321167, "rewards/rejected": -3.069873332977295, "step": 3234 }, { "epoch": 0.38, "learning_rate": 1.8994921459785048e-07, "logits/chosen": -2.298266887664795, "logits/rejected": -2.4823861122131348, "logps/chosen": -277.4986877441406, "logps/rejected": -293.2684020996094, "loss": 0.2074, "rewards/accuracies": 1.0, "rewards/chosen": -0.3420056402683258, "rewards/margins": 1.8934776782989502, "rewards/rejected": -2.235483407974243, "step": 3235 }, { "epoch": 0.38, "learning_rate": 1.899137829219322e-07, "logits/chosen": -2.55987811088562, "logits/rejected": -2.3394289016723633, "logps/chosen": -212.00074768066406, "logps/rejected": -288.7040100097656, "loss": 0.1006, "rewards/accuracies": 1.0, "rewards/chosen": -0.3443097174167633, "rewards/margins": 3.84413480758667, "rewards/rejected": -4.1884446144104, "step": 3236 }, { "epoch": 0.38, "learning_rate": 1.8987835124601392e-07, "logits/chosen": -2.2606234550476074, "logits/rejected": -2.6480982303619385, "logps/chosen": -210.07150268554688, "logps/rejected": -129.44456481933594, "loss": 0.576, "rewards/accuracies": 0.75, "rewards/chosen": -0.3498830199241638, "rewards/margins": 2.073061943054199, "rewards/rejected": -2.422945261001587, "step": 3237 }, { "epoch": 0.38, "learning_rate": 1.8984291957009564e-07, "logits/chosen": -2.804220199584961, "logits/rejected": -2.641544818878174, "logps/chosen": -287.5760498046875, "logps/rejected": -255.9549560546875, "loss": 0.1771, "rewards/accuracies": 1.0, "rewards/chosen": -0.42140915989875793, "rewards/margins": 2.1233999729156494, "rewards/rejected": -2.544809341430664, "step": 3238 }, { "epoch": 0.38, "learning_rate": 1.8980748789417737e-07, "logits/chosen": -2.1498894691467285, "logits/rejected": -2.084552049636841, "logps/chosen": -245.97970581054688, "logps/rejected": -197.21173095703125, "loss": 0.7039, "rewards/accuracies": 0.5, "rewards/chosen": -0.9468976259231567, "rewards/margins": 0.6123813390731812, "rewards/rejected": -1.5592788457870483, "step": 3239 }, { "epoch": 0.38, "learning_rate": 1.8977205621825911e-07, "logits/chosen": -2.7211756706237793, "logits/rejected": -2.6205661296844482, "logps/chosen": -148.29661560058594, "logps/rejected": -164.67198181152344, "loss": 0.4059, "rewards/accuracies": 0.875, "rewards/chosen": -0.8614287972450256, "rewards/margins": 1.9615739583969116, "rewards/rejected": -2.823002815246582, "step": 3240 }, { "epoch": 0.38, "learning_rate": 1.8973662454234084e-07, "logits/chosen": -2.555750846862793, "logits/rejected": -2.4533040523529053, "logps/chosen": -278.0552978515625, "logps/rejected": -296.5425720214844, "loss": 0.835, "rewards/accuracies": 0.875, "rewards/chosen": -1.4452159404754639, "rewards/margins": 1.44575035572052, "rewards/rejected": -2.8909664154052734, "step": 3241 }, { "epoch": 0.38, "learning_rate": 1.8970119286642258e-07, "logits/chosen": -2.185974597930908, "logits/rejected": -2.089214324951172, "logps/chosen": -359.0887756347656, "logps/rejected": -382.88623046875, "loss": 0.5292, "rewards/accuracies": 0.75, "rewards/chosen": -0.7457459568977356, "rewards/margins": 1.3702107667922974, "rewards/rejected": -2.1159567832946777, "step": 3242 }, { "epoch": 0.38, "learning_rate": 1.896657611905043e-07, "logits/chosen": -2.3580374717712402, "logits/rejected": -2.51841139793396, "logps/chosen": -391.99407958984375, "logps/rejected": -149.8769989013672, "loss": 0.9725, "rewards/accuracies": 0.625, "rewards/chosen": -1.3286888599395752, "rewards/margins": 0.42823508381843567, "rewards/rejected": -1.756924033164978, "step": 3243 }, { "epoch": 0.38, "learning_rate": 1.8963032951458603e-07, "logits/chosen": -2.07189679145813, "logits/rejected": -2.239478826522827, "logps/chosen": -278.524658203125, "logps/rejected": -297.58721923828125, "loss": 0.2304, "rewards/accuracies": 1.0, "rewards/chosen": -1.0030953884124756, "rewards/margins": 2.5819036960601807, "rewards/rejected": -3.5849990844726562, "step": 3244 }, { "epoch": 0.38, "learning_rate": 1.8959489783866778e-07, "logits/chosen": -2.0882723331451416, "logits/rejected": -2.2723264694213867, "logps/chosen": -509.1351318359375, "logps/rejected": -338.858642578125, "loss": 0.1637, "rewards/accuracies": 0.875, "rewards/chosen": -0.43399888277053833, "rewards/margins": 2.8785440921783447, "rewards/rejected": -3.3125431537628174, "step": 3245 }, { "epoch": 0.38, "learning_rate": 1.895594661627495e-07, "logits/chosen": -1.9129339456558228, "logits/rejected": -2.246053695678711, "logps/chosen": -290.80548095703125, "logps/rejected": -286.33477783203125, "loss": 0.3913, "rewards/accuracies": 0.875, "rewards/chosen": -1.0789754390716553, "rewards/margins": 2.2127249240875244, "rewards/rejected": -3.2917003631591797, "step": 3246 }, { "epoch": 0.38, "learning_rate": 1.8952403448683122e-07, "logits/chosen": -2.2596805095672607, "logits/rejected": -2.291738748550415, "logps/chosen": -343.4935302734375, "logps/rejected": -524.1539916992188, "loss": 0.6708, "rewards/accuracies": 0.75, "rewards/chosen": -0.792493999004364, "rewards/margins": 1.9889382123947144, "rewards/rejected": -2.7814323902130127, "step": 3247 }, { "epoch": 0.38, "learning_rate": 1.8948860281091294e-07, "logits/chosen": -2.271575927734375, "logits/rejected": -2.4065840244293213, "logps/chosen": -327.3310546875, "logps/rejected": -208.06649780273438, "loss": 0.631, "rewards/accuracies": 0.75, "rewards/chosen": -0.8533061146736145, "rewards/margins": 0.3383706212043762, "rewards/rejected": -1.1916767358779907, "step": 3248 }, { "epoch": 0.38, "learning_rate": 1.8945317113499467e-07, "logits/chosen": -2.5871002674102783, "logits/rejected": -2.4718213081359863, "logps/chosen": -264.62835693359375, "logps/rejected": -275.6920471191406, "loss": 0.591, "rewards/accuracies": 0.75, "rewards/chosen": -1.6940630674362183, "rewards/margins": 0.9341267347335815, "rewards/rejected": -2.6281898021698, "step": 3249 }, { "epoch": 0.38, "learning_rate": 1.894177394590764e-07, "logits/chosen": -2.6770834922790527, "logits/rejected": -2.6993303298950195, "logps/chosen": -231.24710083007812, "logps/rejected": -329.3340759277344, "loss": 0.3595, "rewards/accuracies": 0.75, "rewards/chosen": -0.2437734454870224, "rewards/margins": 1.5285239219665527, "rewards/rejected": -1.7722972631454468, "step": 3250 }, { "epoch": 0.38, "learning_rate": 1.8938230778315814e-07, "logits/chosen": -2.0170881748199463, "logits/rejected": -2.368675947189331, "logps/chosen": -218.9362030029297, "logps/rejected": -138.83453369140625, "loss": 0.3859, "rewards/accuracies": 0.875, "rewards/chosen": -0.6683643460273743, "rewards/margins": 1.1873081922531128, "rewards/rejected": -1.8556725978851318, "step": 3251 }, { "epoch": 0.38, "learning_rate": 1.8934687610723986e-07, "logits/chosen": -1.7311886548995972, "logits/rejected": -1.9576821327209473, "logps/chosen": -360.599609375, "logps/rejected": -247.40048217773438, "loss": 1.2568, "rewards/accuracies": 0.625, "rewards/chosen": -2.505693197250366, "rewards/margins": -0.039053529500961304, "rewards/rejected": -2.466639518737793, "step": 3252 }, { "epoch": 0.38, "learning_rate": 1.8931144443132158e-07, "logits/chosen": -2.3881936073303223, "logits/rejected": -2.609907627105713, "logps/chosen": -402.769775390625, "logps/rejected": -373.51251220703125, "loss": 0.5153, "rewards/accuracies": 0.625, "rewards/chosen": -1.1083673238754272, "rewards/margins": 1.5676758289337158, "rewards/rejected": -2.6760432720184326, "step": 3253 }, { "epoch": 0.38, "learning_rate": 1.8927601275540333e-07, "logits/chosen": -2.4687955379486084, "logits/rejected": -2.5008156299591064, "logps/chosen": -162.7052459716797, "logps/rejected": -206.4859619140625, "loss": 0.2254, "rewards/accuracies": 0.875, "rewards/chosen": -0.6865992546081543, "rewards/margins": 1.9023888111114502, "rewards/rejected": -2.5889883041381836, "step": 3254 }, { "epoch": 0.38, "learning_rate": 1.8924058107948505e-07, "logits/chosen": -2.119002342224121, "logits/rejected": -2.344454050064087, "logps/chosen": -370.7475280761719, "logps/rejected": -223.86312866210938, "loss": 0.3289, "rewards/accuracies": 0.875, "rewards/chosen": -0.6575223803520203, "rewards/margins": 1.6050114631652832, "rewards/rejected": -2.2625339031219482, "step": 3255 }, { "epoch": 0.38, "learning_rate": 1.892051494035668e-07, "logits/chosen": -2.538059711456299, "logits/rejected": -2.309659004211426, "logps/chosen": -534.3564453125, "logps/rejected": -439.9421081542969, "loss": 0.4138, "rewards/accuracies": 0.875, "rewards/chosen": -0.6898093223571777, "rewards/margins": 2.3398847579956055, "rewards/rejected": -3.029693841934204, "step": 3256 }, { "epoch": 0.38, "learning_rate": 1.8916971772764852e-07, "logits/chosen": -2.649397611618042, "logits/rejected": -2.3945024013519287, "logps/chosen": -276.1242370605469, "logps/rejected": -340.9031066894531, "loss": 0.5471, "rewards/accuracies": 0.875, "rewards/chosen": -1.0764834880828857, "rewards/margins": 1.5946435928344727, "rewards/rejected": -2.6711270809173584, "step": 3257 }, { "epoch": 0.38, "learning_rate": 1.8913428605173024e-07, "logits/chosen": -2.056574583053589, "logits/rejected": -2.2691707611083984, "logps/chosen": -459.6457214355469, "logps/rejected": -466.3200988769531, "loss": 0.1805, "rewards/accuracies": 1.0, "rewards/chosen": -1.4385409355163574, "rewards/margins": 2.2731130123138428, "rewards/rejected": -3.711653709411621, "step": 3258 }, { "epoch": 0.38, "learning_rate": 1.8909885437581197e-07, "logits/chosen": -2.0088343620300293, "logits/rejected": -2.321969985961914, "logps/chosen": -545.6524047851562, "logps/rejected": -334.8851318359375, "loss": 0.431, "rewards/accuracies": 0.75, "rewards/chosen": -0.30487310886383057, "rewards/margins": 1.9125112295150757, "rewards/rejected": -2.2173843383789062, "step": 3259 }, { "epoch": 0.38, "learning_rate": 1.890634226998937e-07, "logits/chosen": -1.913032054901123, "logits/rejected": -1.9833568334579468, "logps/chosen": -358.41131591796875, "logps/rejected": -286.3663024902344, "loss": 0.5419, "rewards/accuracies": 0.75, "rewards/chosen": -1.520216464996338, "rewards/margins": 1.5905377864837646, "rewards/rejected": -3.1107544898986816, "step": 3260 }, { "epoch": 0.38, "learning_rate": 1.890279910239754e-07, "logits/chosen": -2.4498484134674072, "logits/rejected": -2.6825144290924072, "logps/chosen": -471.0520324707031, "logps/rejected": -364.9278564453125, "loss": 0.2471, "rewards/accuracies": 0.875, "rewards/chosen": -1.4404642581939697, "rewards/margins": 1.9710488319396973, "rewards/rejected": -3.411513090133667, "step": 3261 }, { "epoch": 0.38, "learning_rate": 1.8899255934805713e-07, "logits/chosen": -2.439056396484375, "logits/rejected": -2.253410816192627, "logps/chosen": -384.49566650390625, "logps/rejected": -277.30072021484375, "loss": 0.2824, "rewards/accuracies": 1.0, "rewards/chosen": -1.1500232219696045, "rewards/margins": 1.5387393236160278, "rewards/rejected": -2.688762664794922, "step": 3262 }, { "epoch": 0.38, "learning_rate": 1.8895712767213888e-07, "logits/chosen": -2.501188278198242, "logits/rejected": -2.6026458740234375, "logps/chosen": -256.1303405761719, "logps/rejected": -280.6025390625, "loss": 0.215, "rewards/accuracies": 0.875, "rewards/chosen": -1.015978217124939, "rewards/margins": 3.0172438621520996, "rewards/rejected": -4.033222198486328, "step": 3263 }, { "epoch": 0.38, "learning_rate": 1.889216959962206e-07, "logits/chosen": -2.377229690551758, "logits/rejected": -2.1464321613311768, "logps/chosen": -296.6788330078125, "logps/rejected": -265.0169372558594, "loss": 0.9112, "rewards/accuracies": 0.625, "rewards/chosen": -1.1680712699890137, "rewards/margins": 1.4681212902069092, "rewards/rejected": -2.636192798614502, "step": 3264 }, { "epoch": 0.38, "learning_rate": 1.8888626432030235e-07, "logits/chosen": -2.1957216262817383, "logits/rejected": -2.1763265132904053, "logps/chosen": -209.71295166015625, "logps/rejected": -273.5195007324219, "loss": 0.462, "rewards/accuracies": 0.875, "rewards/chosen": -0.8503550291061401, "rewards/margins": 1.5570666790008545, "rewards/rejected": -2.407421588897705, "step": 3265 }, { "epoch": 0.38, "learning_rate": 1.8885083264438407e-07, "logits/chosen": -1.683905005455017, "logits/rejected": -2.0291144847869873, "logps/chosen": -347.59271240234375, "logps/rejected": -278.66033935546875, "loss": 0.6531, "rewards/accuracies": 0.625, "rewards/chosen": -0.7877626419067383, "rewards/margins": 0.8475530743598938, "rewards/rejected": -1.6353156566619873, "step": 3266 }, { "epoch": 0.38, "learning_rate": 1.8881540096846582e-07, "logits/chosen": -1.8197193145751953, "logits/rejected": -2.1332387924194336, "logps/chosen": -265.1026916503906, "logps/rejected": -189.19644165039062, "loss": 1.1772, "rewards/accuracies": 0.625, "rewards/chosen": -1.0541492700576782, "rewards/margins": -0.2400013506412506, "rewards/rejected": -0.81414794921875, "step": 3267 }, { "epoch": 0.38, "learning_rate": 1.8877996929254754e-07, "logits/chosen": -2.113678455352783, "logits/rejected": -2.051194667816162, "logps/chosen": -318.1328125, "logps/rejected": -292.32940673828125, "loss": 0.4659, "rewards/accuracies": 0.875, "rewards/chosen": -1.2011120319366455, "rewards/margins": 0.7260014414787292, "rewards/rejected": -1.9271135330200195, "step": 3268 }, { "epoch": 0.38, "learning_rate": 1.8874453761662927e-07, "logits/chosen": -2.189897298812866, "logits/rejected": -2.47031569480896, "logps/chosen": -364.5947570800781, "logps/rejected": -344.44622802734375, "loss": 0.4095, "rewards/accuracies": 0.75, "rewards/chosen": -0.9895294904708862, "rewards/margins": 1.9544157981872559, "rewards/rejected": -2.9439454078674316, "step": 3269 }, { "epoch": 0.38, "learning_rate": 1.88709105940711e-07, "logits/chosen": -2.3104217052459717, "logits/rejected": -2.359304904937744, "logps/chosen": -331.6579284667969, "logps/rejected": -277.7344665527344, "loss": 0.5275, "rewards/accuracies": 0.625, "rewards/chosen": -0.7964348793029785, "rewards/margins": 1.7797231674194336, "rewards/rejected": -2.576158285140991, "step": 3270 }, { "epoch": 0.38, "learning_rate": 1.886736742647927e-07, "logits/chosen": -2.1030499935150146, "logits/rejected": -2.3495609760284424, "logps/chosen": -482.6959228515625, "logps/rejected": -287.880126953125, "loss": 0.8062, "rewards/accuracies": 0.75, "rewards/chosen": -2.0059123039245605, "rewards/margins": 2.1069846153259277, "rewards/rejected": -4.112896919250488, "step": 3271 }, { "epoch": 0.38, "learning_rate": 1.8863824258887443e-07, "logits/chosen": -2.1760880947113037, "logits/rejected": -2.185364007949829, "logps/chosen": -374.39385986328125, "logps/rejected": -390.8062438964844, "loss": 0.3791, "rewards/accuracies": 0.75, "rewards/chosen": -0.2656015455722809, "rewards/margins": 2.6022567749023438, "rewards/rejected": -2.867858409881592, "step": 3272 }, { "epoch": 0.38, "learning_rate": 1.8860281091295616e-07, "logits/chosen": -2.666940450668335, "logits/rejected": -2.614044666290283, "logps/chosen": -228.04689025878906, "logps/rejected": -327.348876953125, "loss": 0.1242, "rewards/accuracies": 1.0, "rewards/chosen": -0.7845311164855957, "rewards/margins": 3.8655014038085938, "rewards/rejected": -4.6500325202941895, "step": 3273 }, { "epoch": 0.38, "learning_rate": 1.885673792370379e-07, "logits/chosen": -2.316056966781616, "logits/rejected": -2.647771120071411, "logps/chosen": -466.0149230957031, "logps/rejected": -297.01220703125, "loss": 0.3068, "rewards/accuracies": 0.875, "rewards/chosen": -0.7311259508132935, "rewards/margins": 2.488952159881592, "rewards/rejected": -3.2200779914855957, "step": 3274 }, { "epoch": 0.38, "learning_rate": 1.8853194756111963e-07, "logits/chosen": -1.5578582286834717, "logits/rejected": -1.7535136938095093, "logps/chosen": -298.4570617675781, "logps/rejected": -274.2190856933594, "loss": 0.6043, "rewards/accuracies": 0.5, "rewards/chosen": -0.8602393865585327, "rewards/margins": 1.0028367042541504, "rewards/rejected": -1.863076090812683, "step": 3275 }, { "epoch": 0.38, "learning_rate": 1.8849651588520135e-07, "logits/chosen": -2.7714154720306396, "logits/rejected": -2.6975979804992676, "logps/chosen": -99.29049682617188, "logps/rejected": -199.0389404296875, "loss": 0.5736, "rewards/accuracies": 0.875, "rewards/chosen": -0.8301012516021729, "rewards/margins": 1.476645588874817, "rewards/rejected": -2.3067469596862793, "step": 3276 }, { "epoch": 0.38, "learning_rate": 1.884610842092831e-07, "logits/chosen": -2.0784099102020264, "logits/rejected": -2.207335948944092, "logps/chosen": -341.85150146484375, "logps/rejected": -304.4019775390625, "loss": 0.5567, "rewards/accuracies": 0.5, "rewards/chosen": -1.1250044107437134, "rewards/margins": 1.435920238494873, "rewards/rejected": -2.560924530029297, "step": 3277 }, { "epoch": 0.38, "learning_rate": 1.8842565253336482e-07, "logits/chosen": -2.7889060974121094, "logits/rejected": -2.8827128410339355, "logps/chosen": -222.6475067138672, "logps/rejected": -238.67617797851562, "loss": 0.0812, "rewards/accuracies": 1.0, "rewards/chosen": -0.7017053365707397, "rewards/margins": 3.163712978363037, "rewards/rejected": -3.8654184341430664, "step": 3278 }, { "epoch": 0.38, "learning_rate": 1.8839022085744657e-07, "logits/chosen": -2.3735227584838867, "logits/rejected": -2.4404592514038086, "logps/chosen": -151.88734436035156, "logps/rejected": -173.66217041015625, "loss": 0.1397, "rewards/accuracies": 1.0, "rewards/chosen": -0.38649022579193115, "rewards/margins": 2.8479325771331787, "rewards/rejected": -3.2344229221343994, "step": 3279 }, { "epoch": 0.38, "learning_rate": 1.883547891815283e-07, "logits/chosen": -2.619227170944214, "logits/rejected": -2.6384117603302, "logps/chosen": -196.28121948242188, "logps/rejected": -295.298095703125, "loss": 0.3877, "rewards/accuracies": 0.75, "rewards/chosen": -1.427058219909668, "rewards/margins": 2.388202667236328, "rewards/rejected": -3.815260887145996, "step": 3280 }, { "epoch": 0.38, "learning_rate": 1.8831935750561e-07, "logits/chosen": -2.4417858123779297, "logits/rejected": -2.1641342639923096, "logps/chosen": -320.58404541015625, "logps/rejected": -301.60162353515625, "loss": 0.3924, "rewards/accuracies": 0.875, "rewards/chosen": -1.0585428476333618, "rewards/margins": 1.3287712335586548, "rewards/rejected": -2.3873140811920166, "step": 3281 }, { "epoch": 0.38, "learning_rate": 1.8828392582969173e-07, "logits/chosen": -2.4424550533294678, "logits/rejected": -2.5853452682495117, "logps/chosen": -364.84979248046875, "logps/rejected": -251.2431640625, "loss": 0.2793, "rewards/accuracies": 1.0, "rewards/chosen": -0.5532305240631104, "rewards/margins": 1.5544167757034302, "rewards/rejected": -2.107647180557251, "step": 3282 }, { "epoch": 0.38, "learning_rate": 1.8824849415377346e-07, "logits/chosen": -2.5711097717285156, "logits/rejected": -2.64656400680542, "logps/chosen": -258.8346862792969, "logps/rejected": -215.57003784179688, "loss": 0.3759, "rewards/accuracies": 0.875, "rewards/chosen": -0.32285669445991516, "rewards/margins": 1.3606055974960327, "rewards/rejected": -1.6834622621536255, "step": 3283 }, { "epoch": 0.38, "learning_rate": 1.8821306247785518e-07, "logits/chosen": -2.6931533813476562, "logits/rejected": -2.821028709411621, "logps/chosen": -201.779541015625, "logps/rejected": -174.5984649658203, "loss": 0.33, "rewards/accuracies": 0.875, "rewards/chosen": -1.0129868984222412, "rewards/margins": 1.2468655109405518, "rewards/rejected": -2.259852409362793, "step": 3284 }, { "epoch": 0.38, "learning_rate": 1.8817763080193693e-07, "logits/chosen": -2.5199334621429443, "logits/rejected": -2.24521803855896, "logps/chosen": -221.0295867919922, "logps/rejected": -266.2992858886719, "loss": 0.8316, "rewards/accuracies": 0.75, "rewards/chosen": -2.0914595127105713, "rewards/margins": 1.4784820079803467, "rewards/rejected": -3.569941759109497, "step": 3285 }, { "epoch": 0.38, "learning_rate": 1.8814219912601865e-07, "logits/chosen": -2.4212422370910645, "logits/rejected": -2.3263494968414307, "logps/chosen": -317.54193115234375, "logps/rejected": -367.0589599609375, "loss": 0.3953, "rewards/accuracies": 0.875, "rewards/chosen": -0.20900674164295197, "rewards/margins": 2.5605926513671875, "rewards/rejected": -2.769599437713623, "step": 3286 }, { "epoch": 0.38, "learning_rate": 1.8810676745010037e-07, "logits/chosen": -2.948509931564331, "logits/rejected": -2.9453582763671875, "logps/chosen": -162.92503356933594, "logps/rejected": -175.3579559326172, "loss": 0.2717, "rewards/accuracies": 0.875, "rewards/chosen": -0.030333571135997772, "rewards/margins": 1.5389050245285034, "rewards/rejected": -1.569238543510437, "step": 3287 }, { "epoch": 0.38, "learning_rate": 1.880713357741821e-07, "logits/chosen": -2.790242910385132, "logits/rejected": -2.395946979522705, "logps/chosen": -258.07012939453125, "logps/rejected": -408.6767578125, "loss": 0.4176, "rewards/accuracies": 0.75, "rewards/chosen": -0.7302748560905457, "rewards/margins": 2.8038430213928223, "rewards/rejected": -3.534118175506592, "step": 3288 }, { "epoch": 0.38, "learning_rate": 1.8803590409826384e-07, "logits/chosen": -2.131655693054199, "logits/rejected": -2.0914385318756104, "logps/chosen": -338.2203369140625, "logps/rejected": -334.618408203125, "loss": 0.2537, "rewards/accuracies": 0.875, "rewards/chosen": -0.8387415409088135, "rewards/margins": 2.3420779705047607, "rewards/rejected": -3.180819511413574, "step": 3289 }, { "epoch": 0.38, "learning_rate": 1.880004724223456e-07, "logits/chosen": -1.38021981716156, "logits/rejected": -1.765668272972107, "logps/chosen": -527.9132080078125, "logps/rejected": -331.218994140625, "loss": 0.7581, "rewards/accuracies": 0.75, "rewards/chosen": -1.265608310699463, "rewards/margins": 0.8357164859771729, "rewards/rejected": -2.1013247966766357, "step": 3290 }, { "epoch": 0.38, "learning_rate": 1.879650407464273e-07, "logits/chosen": -1.8684849739074707, "logits/rejected": -1.780892252922058, "logps/chosen": -277.1417236328125, "logps/rejected": -269.4858093261719, "loss": 0.7831, "rewards/accuracies": 0.75, "rewards/chosen": -1.2440983057022095, "rewards/margins": 1.630279541015625, "rewards/rejected": -2.874377965927124, "step": 3291 }, { "epoch": 0.38, "learning_rate": 1.8792960907050903e-07, "logits/chosen": -2.1279478073120117, "logits/rejected": -2.0628511905670166, "logps/chosen": -239.98599243164062, "logps/rejected": -387.88665771484375, "loss": 1.0234, "rewards/accuracies": 0.5, "rewards/chosen": -1.5104138851165771, "rewards/margins": 0.2929292917251587, "rewards/rejected": -1.8033432960510254, "step": 3292 }, { "epoch": 0.38, "learning_rate": 1.8789417739459076e-07, "logits/chosen": -2.607560634613037, "logits/rejected": -2.4234440326690674, "logps/chosen": -194.35614013671875, "logps/rejected": -328.53851318359375, "loss": 0.9115, "rewards/accuracies": 0.75, "rewards/chosen": -1.8143310546875, "rewards/margins": 1.6550230979919434, "rewards/rejected": -3.4693541526794434, "step": 3293 }, { "epoch": 0.38, "learning_rate": 1.8785874571867248e-07, "logits/chosen": -2.297886848449707, "logits/rejected": -2.2505717277526855, "logps/chosen": -256.78399658203125, "logps/rejected": -271.07916259765625, "loss": 0.4724, "rewards/accuracies": 0.75, "rewards/chosen": -0.13164746761322021, "rewards/margins": 1.5230202674865723, "rewards/rejected": -1.654667615890503, "step": 3294 }, { "epoch": 0.38, "learning_rate": 1.878233140427542e-07, "logits/chosen": -2.346222400665283, "logits/rejected": -2.397308111190796, "logps/chosen": -332.24151611328125, "logps/rejected": -271.36566162109375, "loss": 0.6777, "rewards/accuracies": 0.625, "rewards/chosen": -1.8540563583374023, "rewards/margins": 1.443103551864624, "rewards/rejected": -3.2971601486206055, "step": 3295 }, { "epoch": 0.38, "learning_rate": 1.8778788236683595e-07, "logits/chosen": -1.509308934211731, "logits/rejected": -1.9815223217010498, "logps/chosen": -465.8705749511719, "logps/rejected": -283.3115234375, "loss": 0.3616, "rewards/accuracies": 0.875, "rewards/chosen": -0.9027110934257507, "rewards/margins": 1.0970818996429443, "rewards/rejected": -1.9997931718826294, "step": 3296 }, { "epoch": 0.38, "learning_rate": 1.8775245069091767e-07, "logits/chosen": -2.637381076812744, "logits/rejected": -2.630558490753174, "logps/chosen": -233.5251922607422, "logps/rejected": -222.3778076171875, "loss": 0.3755, "rewards/accuracies": 0.875, "rewards/chosen": -1.1114683151245117, "rewards/margins": 2.5344152450561523, "rewards/rejected": -3.645883560180664, "step": 3297 }, { "epoch": 0.38, "learning_rate": 1.877170190149994e-07, "logits/chosen": -2.675461530685425, "logits/rejected": -2.8047633171081543, "logps/chosen": -366.94696044921875, "logps/rejected": -371.2447509765625, "loss": 0.3709, "rewards/accuracies": 0.875, "rewards/chosen": -0.653447151184082, "rewards/margins": 2.008774518966675, "rewards/rejected": -2.662221670150757, "step": 3298 }, { "epoch": 0.38, "learning_rate": 1.8768158733908112e-07, "logits/chosen": -2.0736334323883057, "logits/rejected": -2.4159953594207764, "logps/chosen": -346.5773620605469, "logps/rejected": -199.7591552734375, "loss": 0.4795, "rewards/accuracies": 0.75, "rewards/chosen": -0.7982228398323059, "rewards/margins": 1.3728286027908325, "rewards/rejected": -2.171051502227783, "step": 3299 }, { "epoch": 0.38, "learning_rate": 1.8764615566316286e-07, "logits/chosen": -2.1672611236572266, "logits/rejected": -2.326845169067383, "logps/chosen": -254.47702026367188, "logps/rejected": -196.19400024414062, "loss": 0.5652, "rewards/accuracies": 0.625, "rewards/chosen": -1.0665678977966309, "rewards/margins": 1.664721965789795, "rewards/rejected": -2.731289863586426, "step": 3300 }, { "epoch": 0.38, "learning_rate": 1.876107239872446e-07, "logits/chosen": -2.2654995918273926, "logits/rejected": -2.5289788246154785, "logps/chosen": -446.5240478515625, "logps/rejected": -277.4591369628906, "loss": 0.3351, "rewards/accuracies": 0.875, "rewards/chosen": -0.6383839249610901, "rewards/margins": 1.9719722270965576, "rewards/rejected": -2.610355854034424, "step": 3301 }, { "epoch": 0.38, "learning_rate": 1.8757529231132634e-07, "logits/chosen": -2.534900188446045, "logits/rejected": -2.5250163078308105, "logps/chosen": -384.9508972167969, "logps/rejected": -206.67236328125, "loss": 0.4618, "rewards/accuracies": 0.75, "rewards/chosen": -1.322751760482788, "rewards/margins": 1.9341816902160645, "rewards/rejected": -3.2569332122802734, "step": 3302 }, { "epoch": 0.38, "learning_rate": 1.8753986063540806e-07, "logits/chosen": -2.2728404998779297, "logits/rejected": -2.2574386596679688, "logps/chosen": -228.38821411132812, "logps/rejected": -289.6147155761719, "loss": 0.3669, "rewards/accuracies": 0.75, "rewards/chosen": -1.0045157670974731, "rewards/margins": 2.9450840950012207, "rewards/rejected": -3.949599504470825, "step": 3303 }, { "epoch": 0.38, "learning_rate": 1.8750442895948978e-07, "logits/chosen": -2.7738311290740967, "logits/rejected": -2.824504852294922, "logps/chosen": -341.946533203125, "logps/rejected": -231.15078735351562, "loss": 0.4011, "rewards/accuracies": 0.875, "rewards/chosen": -0.4048522710800171, "rewards/margins": 2.0366220474243164, "rewards/rejected": -2.441474199295044, "step": 3304 }, { "epoch": 0.38, "learning_rate": 1.874689972835715e-07, "logits/chosen": -2.6926231384277344, "logits/rejected": -2.7381725311279297, "logps/chosen": -390.7256774902344, "logps/rejected": -281.3282470703125, "loss": 0.2631, "rewards/accuracies": 0.875, "rewards/chosen": -0.5844161510467529, "rewards/margins": 2.045635223388672, "rewards/rejected": -2.630051374435425, "step": 3305 }, { "epoch": 0.38, "learning_rate": 1.8743356560765322e-07, "logits/chosen": -2.7192962169647217, "logits/rejected": -2.581277370452881, "logps/chosen": -201.28512573242188, "logps/rejected": -250.05494689941406, "loss": 0.2676, "rewards/accuracies": 0.875, "rewards/chosen": -1.2981903553009033, "rewards/margins": 2.9865493774414062, "rewards/rejected": -4.284739971160889, "step": 3306 }, { "epoch": 0.38, "learning_rate": 1.8739813393173495e-07, "logits/chosen": -2.1047887802124023, "logits/rejected": -2.3058788776397705, "logps/chosen": -305.2047119140625, "logps/rejected": -299.7403564453125, "loss": 0.131, "rewards/accuracies": 1.0, "rewards/chosen": -0.5046166181564331, "rewards/margins": 3.402752161026001, "rewards/rejected": -3.9073686599731445, "step": 3307 }, { "epoch": 0.38, "learning_rate": 1.873627022558167e-07, "logits/chosen": -2.3415985107421875, "logits/rejected": -1.9895336627960205, "logps/chosen": -161.0898895263672, "logps/rejected": -352.61944580078125, "loss": 0.7431, "rewards/accuracies": 0.75, "rewards/chosen": -1.3483014106750488, "rewards/margins": 1.316733479499817, "rewards/rejected": -2.665034770965576, "step": 3308 }, { "epoch": 0.38, "learning_rate": 1.8732727057989842e-07, "logits/chosen": -1.78541100025177, "logits/rejected": -1.8967807292938232, "logps/chosen": -250.46212768554688, "logps/rejected": -154.62986755371094, "loss": 0.1574, "rewards/accuracies": 1.0, "rewards/chosen": -0.5219288468360901, "rewards/margins": 2.0804286003112793, "rewards/rejected": -2.6023576259613037, "step": 3309 }, { "epoch": 0.39, "learning_rate": 1.8729183890398014e-07, "logits/chosen": -2.682833194732666, "logits/rejected": -2.683387041091919, "logps/chosen": -292.8410339355469, "logps/rejected": -383.4903259277344, "loss": 0.4671, "rewards/accuracies": 0.75, "rewards/chosen": -1.433631181716919, "rewards/margins": 4.542508125305176, "rewards/rejected": -5.976139545440674, "step": 3310 }, { "epoch": 0.39, "learning_rate": 1.8725640722806186e-07, "logits/chosen": -1.8614394664764404, "logits/rejected": -2.1195318698883057, "logps/chosen": -159.45297241210938, "logps/rejected": -161.68798828125, "loss": 0.6485, "rewards/accuracies": 0.875, "rewards/chosen": -0.5986157059669495, "rewards/margins": 0.7332737445831299, "rewards/rejected": -1.3318895101547241, "step": 3311 }, { "epoch": 0.39, "learning_rate": 1.8722097555214364e-07, "logits/chosen": -2.314818859100342, "logits/rejected": -2.3607988357543945, "logps/chosen": -303.1656494140625, "logps/rejected": -217.95291137695312, "loss": 0.389, "rewards/accuracies": 0.75, "rewards/chosen": -1.169060468673706, "rewards/margins": 1.576852798461914, "rewards/rejected": -2.745913028717041, "step": 3312 }, { "epoch": 0.39, "learning_rate": 1.8718554387622536e-07, "logits/chosen": -2.5871691703796387, "logits/rejected": -2.842733383178711, "logps/chosen": -356.845458984375, "logps/rejected": -220.0530242919922, "loss": 0.2723, "rewards/accuracies": 0.875, "rewards/chosen": -0.04655265063047409, "rewards/margins": 1.8413044214248657, "rewards/rejected": -1.8878570795059204, "step": 3313 }, { "epoch": 0.39, "learning_rate": 1.8715011220030708e-07, "logits/chosen": -2.221605062484741, "logits/rejected": -2.3359503746032715, "logps/chosen": -200.07846069335938, "logps/rejected": -165.93438720703125, "loss": 0.32, "rewards/accuracies": 1.0, "rewards/chosen": -0.7202491760253906, "rewards/margins": 1.2252986431121826, "rewards/rejected": -1.9455479383468628, "step": 3314 }, { "epoch": 0.39, "learning_rate": 1.871146805243888e-07, "logits/chosen": -2.6385338306427, "logits/rejected": -2.315721035003662, "logps/chosen": -104.0951919555664, "logps/rejected": -194.08631896972656, "loss": 0.4855, "rewards/accuracies": 0.75, "rewards/chosen": -0.4786313772201538, "rewards/margins": 1.1688787937164307, "rewards/rejected": -1.6475101709365845, "step": 3315 }, { "epoch": 0.39, "learning_rate": 1.8707924884847052e-07, "logits/chosen": -2.3582026958465576, "logits/rejected": -1.9953635931015015, "logps/chosen": -276.3548583984375, "logps/rejected": -320.47955322265625, "loss": 0.2439, "rewards/accuracies": 1.0, "rewards/chosen": -0.21279442310333252, "rewards/margins": 1.643676519393921, "rewards/rejected": -1.8564708232879639, "step": 3316 }, { "epoch": 0.39, "learning_rate": 1.8704381717255225e-07, "logits/chosen": -2.866072177886963, "logits/rejected": -2.8295044898986816, "logps/chosen": -291.04632568359375, "logps/rejected": -254.11065673828125, "loss": 0.191, "rewards/accuracies": 1.0, "rewards/chosen": -0.6383817791938782, "rewards/margins": 2.701434373855591, "rewards/rejected": -3.339816093444824, "step": 3317 }, { "epoch": 0.39, "learning_rate": 1.8700838549663397e-07, "logits/chosen": -1.829728364944458, "logits/rejected": -2.4846200942993164, "logps/chosen": -409.5906982421875, "logps/rejected": -211.8067626953125, "loss": 0.3431, "rewards/accuracies": 0.875, "rewards/chosen": -0.7719281911849976, "rewards/margins": 2.0962283611297607, "rewards/rejected": -2.868156671524048, "step": 3318 }, { "epoch": 0.39, "learning_rate": 1.8697295382071572e-07, "logits/chosen": -2.8726956844329834, "logits/rejected": -2.3012900352478027, "logps/chosen": -184.71572875976562, "logps/rejected": -337.9961853027344, "loss": 0.1151, "rewards/accuracies": 1.0, "rewards/chosen": -0.8557183146476746, "rewards/margins": 4.99931526184082, "rewards/rejected": -5.855033874511719, "step": 3319 }, { "epoch": 0.39, "learning_rate": 1.8693752214479744e-07, "logits/chosen": -1.928252935409546, "logits/rejected": -2.077303886413574, "logps/chosen": -354.3191223144531, "logps/rejected": -289.010986328125, "loss": 0.3326, "rewards/accuracies": 0.875, "rewards/chosen": -1.1839746236801147, "rewards/margins": 2.1675992012023926, "rewards/rejected": -3.3515734672546387, "step": 3320 }, { "epoch": 0.39, "learning_rate": 1.8690209046887916e-07, "logits/chosen": -2.381877899169922, "logits/rejected": -2.5532679557800293, "logps/chosen": -356.2655944824219, "logps/rejected": -237.33538818359375, "loss": 0.2109, "rewards/accuracies": 1.0, "rewards/chosen": -1.0129700899124146, "rewards/margins": 1.920851469039917, "rewards/rejected": -2.933821439743042, "step": 3321 }, { "epoch": 0.39, "learning_rate": 1.8686665879296088e-07, "logits/chosen": -2.1813547611236572, "logits/rejected": -2.044093608856201, "logps/chosen": -213.29417419433594, "logps/rejected": -284.45184326171875, "loss": 0.2663, "rewards/accuracies": 0.75, "rewards/chosen": -0.775046706199646, "rewards/margins": 4.860607147216797, "rewards/rejected": -5.635654449462891, "step": 3322 }, { "epoch": 0.39, "learning_rate": 1.868312271170426e-07, "logits/chosen": -2.2203164100646973, "logits/rejected": -2.448683977127075, "logps/chosen": -390.7969665527344, "logps/rejected": -443.055419921875, "loss": 0.1291, "rewards/accuracies": 1.0, "rewards/chosen": -1.0547288656234741, "rewards/margins": 2.9086837768554688, "rewards/rejected": -3.9634127616882324, "step": 3323 }, { "epoch": 0.39, "learning_rate": 1.8679579544112438e-07, "logits/chosen": -2.5076441764831543, "logits/rejected": -2.720520496368408, "logps/chosen": -439.2734680175781, "logps/rejected": -361.6002197265625, "loss": 0.452, "rewards/accuracies": 0.75, "rewards/chosen": -0.31858623027801514, "rewards/margins": 1.3467223644256592, "rewards/rejected": -1.6653087139129639, "step": 3324 }, { "epoch": 0.39, "learning_rate": 1.867603637652061e-07, "logits/chosen": -1.9937670230865479, "logits/rejected": -1.9443154335021973, "logps/chosen": -344.8470458984375, "logps/rejected": -230.26991271972656, "loss": 0.3954, "rewards/accuracies": 0.875, "rewards/chosen": -0.81369549036026, "rewards/margins": 1.363511085510254, "rewards/rejected": -2.177206516265869, "step": 3325 }, { "epoch": 0.39, "learning_rate": 1.8672493208928783e-07, "logits/chosen": -2.5026955604553223, "logits/rejected": -2.5758228302001953, "logps/chosen": -258.6585693359375, "logps/rejected": -217.68820190429688, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": -1.6799204349517822, "rewards/margins": 0.4473345875740051, "rewards/rejected": -2.1272552013397217, "step": 3326 }, { "epoch": 0.39, "learning_rate": 1.8668950041336955e-07, "logits/chosen": -2.3101446628570557, "logits/rejected": -2.4943928718566895, "logps/chosen": -245.93479919433594, "logps/rejected": -253.57289123535156, "loss": 0.5011, "rewards/accuracies": 0.625, "rewards/chosen": -1.1684565544128418, "rewards/margins": 1.916216492652893, "rewards/rejected": -3.0846729278564453, "step": 3327 }, { "epoch": 0.39, "learning_rate": 1.8665406873745127e-07, "logits/chosen": -2.1146552562713623, "logits/rejected": -1.8615665435791016, "logps/chosen": -343.9508056640625, "logps/rejected": -263.4842834472656, "loss": 0.2565, "rewards/accuracies": 0.875, "rewards/chosen": -0.9381371140480042, "rewards/margins": 2.7295093536376953, "rewards/rejected": -3.6676464080810547, "step": 3328 }, { "epoch": 0.39, "learning_rate": 1.86618637061533e-07, "logits/chosen": -2.3748559951782227, "logits/rejected": -2.1856462955474854, "logps/chosen": -392.9393310546875, "logps/rejected": -284.84002685546875, "loss": 0.2259, "rewards/accuracies": 1.0, "rewards/chosen": -0.1904180645942688, "rewards/margins": 1.7698698043823242, "rewards/rejected": -1.9602878093719482, "step": 3329 }, { "epoch": 0.39, "learning_rate": 1.8658320538561474e-07, "logits/chosen": -2.0648443698883057, "logits/rejected": -2.3510148525238037, "logps/chosen": -437.35052490234375, "logps/rejected": -214.12652587890625, "loss": 0.2125, "rewards/accuracies": 1.0, "rewards/chosen": -0.6996749639511108, "rewards/margins": 2.5047664642333984, "rewards/rejected": -3.2044413089752197, "step": 3330 }, { "epoch": 0.39, "learning_rate": 1.8654777370969646e-07, "logits/chosen": -2.55344820022583, "logits/rejected": -2.7280197143554688, "logps/chosen": -246.74685668945312, "logps/rejected": -162.6654510498047, "loss": 0.279, "rewards/accuracies": 1.0, "rewards/chosen": -0.968451976776123, "rewards/margins": 1.7407293319702148, "rewards/rejected": -2.709181308746338, "step": 3331 }, { "epoch": 0.39, "learning_rate": 1.8651234203377818e-07, "logits/chosen": -2.50717830657959, "logits/rejected": -2.4567785263061523, "logps/chosen": -153.53237915039062, "logps/rejected": -120.8471908569336, "loss": 0.2286, "rewards/accuracies": 1.0, "rewards/chosen": 0.0896887332201004, "rewards/margins": 1.7906875610351562, "rewards/rejected": -1.7009987831115723, "step": 3332 }, { "epoch": 0.39, "learning_rate": 1.864769103578599e-07, "logits/chosen": -2.581838607788086, "logits/rejected": -2.47988224029541, "logps/chosen": -183.25698852539062, "logps/rejected": -304.2426452636719, "loss": 0.2916, "rewards/accuracies": 0.875, "rewards/chosen": -0.9398155212402344, "rewards/margins": 2.5162930488586426, "rewards/rejected": -3.456108808517456, "step": 3333 }, { "epoch": 0.39, "learning_rate": 1.8644147868194163e-07, "logits/chosen": -2.358576774597168, "logits/rejected": -2.3485751152038574, "logps/chosen": -123.43535614013672, "logps/rejected": -158.7224884033203, "loss": 0.4572, "rewards/accuracies": 0.875, "rewards/chosen": -0.5003291368484497, "rewards/margins": 1.1466755867004395, "rewards/rejected": -1.6470046043395996, "step": 3334 }, { "epoch": 0.39, "learning_rate": 1.864060470060234e-07, "logits/chosen": -1.9941812753677368, "logits/rejected": -2.103116035461426, "logps/chosen": -261.99432373046875, "logps/rejected": -258.77264404296875, "loss": 0.3595, "rewards/accuracies": 0.875, "rewards/chosen": -0.5648093223571777, "rewards/margins": 1.7207016944885254, "rewards/rejected": -2.285511016845703, "step": 3335 }, { "epoch": 0.39, "learning_rate": 1.8637061533010513e-07, "logits/chosen": -2.4754061698913574, "logits/rejected": -2.5634381771087646, "logps/chosen": -177.4255828857422, "logps/rejected": -200.84011840820312, "loss": 0.5608, "rewards/accuracies": 0.75, "rewards/chosen": -0.8623149394989014, "rewards/margins": 1.6991163492202759, "rewards/rejected": -2.5614311695098877, "step": 3336 }, { "epoch": 0.39, "learning_rate": 1.8633518365418685e-07, "logits/chosen": -1.8519277572631836, "logits/rejected": -2.1480677127838135, "logps/chosen": -413.74554443359375, "logps/rejected": -283.88446044921875, "loss": 0.3933, "rewards/accuracies": 0.75, "rewards/chosen": -1.4207637310028076, "rewards/margins": 1.6770353317260742, "rewards/rejected": -3.097799062728882, "step": 3337 }, { "epoch": 0.39, "learning_rate": 1.8629975197826857e-07, "logits/chosen": -2.4018783569335938, "logits/rejected": -2.290470838546753, "logps/chosen": -95.01655578613281, "logps/rejected": -212.64405822753906, "loss": 0.0998, "rewards/accuracies": 1.0, "rewards/chosen": -0.5135877728462219, "rewards/margins": 3.3934874534606934, "rewards/rejected": -3.9070754051208496, "step": 3338 }, { "epoch": 0.39, "learning_rate": 1.862643203023503e-07, "logits/chosen": -2.568800210952759, "logits/rejected": -2.854388952255249, "logps/chosen": -224.6456756591797, "logps/rejected": -269.7056579589844, "loss": 0.5071, "rewards/accuracies": 0.75, "rewards/chosen": -1.078813076019287, "rewards/margins": 1.965907096862793, "rewards/rejected": -3.04472017288208, "step": 3339 }, { "epoch": 0.39, "learning_rate": 1.8622888862643201e-07, "logits/chosen": -2.9694817066192627, "logits/rejected": -2.994757652282715, "logps/chosen": -344.67596435546875, "logps/rejected": -305.9635314941406, "loss": 0.67, "rewards/accuracies": 0.75, "rewards/chosen": -1.1518006324768066, "rewards/margins": 1.0053941011428833, "rewards/rejected": -2.1571946144104004, "step": 3340 }, { "epoch": 0.39, "learning_rate": 1.8619345695051376e-07, "logits/chosen": -2.6398398876190186, "logits/rejected": -2.655496120452881, "logps/chosen": -353.6522521972656, "logps/rejected": -320.16552734375, "loss": 0.1471, "rewards/accuracies": 1.0, "rewards/chosen": -0.7402046918869019, "rewards/margins": 3.9241464138031006, "rewards/rejected": -4.664350986480713, "step": 3341 }, { "epoch": 0.39, "learning_rate": 1.8615802527459548e-07, "logits/chosen": -2.510622501373291, "logits/rejected": -2.5672836303710938, "logps/chosen": -251.099609375, "logps/rejected": -227.36441040039062, "loss": 0.2572, "rewards/accuracies": 0.875, "rewards/chosen": -0.9761390686035156, "rewards/margins": 2.103545665740967, "rewards/rejected": -3.0796849727630615, "step": 3342 }, { "epoch": 0.39, "learning_rate": 1.861225935986772e-07, "logits/chosen": -2.2911269664764404, "logits/rejected": -2.1103334426879883, "logps/chosen": -291.43133544921875, "logps/rejected": -416.4708557128906, "loss": 0.3121, "rewards/accuracies": 0.875, "rewards/chosen": -0.5300992727279663, "rewards/margins": 2.452591896057129, "rewards/rejected": -2.9826908111572266, "step": 3343 }, { "epoch": 0.39, "learning_rate": 1.8608716192275893e-07, "logits/chosen": -2.241710662841797, "logits/rejected": -2.292165517807007, "logps/chosen": -233.99810791015625, "logps/rejected": -321.11309814453125, "loss": 0.1604, "rewards/accuracies": 1.0, "rewards/chosen": -1.0217769145965576, "rewards/margins": 2.6047046184539795, "rewards/rejected": -3.626481533050537, "step": 3344 }, { "epoch": 0.39, "learning_rate": 1.8605173024684065e-07, "logits/chosen": -2.350900650024414, "logits/rejected": -2.3255608081817627, "logps/chosen": -221.67034912109375, "logps/rejected": -238.7288360595703, "loss": 0.2186, "rewards/accuracies": 0.875, "rewards/chosen": -0.5745895504951477, "rewards/margins": 1.7933828830718994, "rewards/rejected": -2.3679726123809814, "step": 3345 }, { "epoch": 0.39, "learning_rate": 1.8601629857092237e-07, "logits/chosen": -2.5243687629699707, "logits/rejected": -2.4382429122924805, "logps/chosen": -283.927490234375, "logps/rejected": -219.6595001220703, "loss": 0.3144, "rewards/accuracies": 1.0, "rewards/chosen": -0.6615235805511475, "rewards/margins": 1.6599574089050293, "rewards/rejected": -2.321481227874756, "step": 3346 }, { "epoch": 0.39, "learning_rate": 1.8598086689500415e-07, "logits/chosen": -1.4668315649032593, "logits/rejected": -1.9588223695755005, "logps/chosen": -418.39691162109375, "logps/rejected": -253.52468872070312, "loss": 0.1594, "rewards/accuracies": 1.0, "rewards/chosen": -0.04646792262792587, "rewards/margins": 2.7656056880950928, "rewards/rejected": -2.8120737075805664, "step": 3347 }, { "epoch": 0.39, "learning_rate": 1.8594543521908587e-07, "logits/chosen": -2.0182158946990967, "logits/rejected": -2.154639720916748, "logps/chosen": -308.4435729980469, "logps/rejected": -269.21868896484375, "loss": 0.3398, "rewards/accuracies": 1.0, "rewards/chosen": -0.5667620301246643, "rewards/margins": 1.583616852760315, "rewards/rejected": -2.150378704071045, "step": 3348 }, { "epoch": 0.39, "learning_rate": 1.859100035431676e-07, "logits/chosen": -2.190103769302368, "logits/rejected": -2.234595775604248, "logps/chosen": -274.7421875, "logps/rejected": -274.5881042480469, "loss": 0.1005, "rewards/accuracies": 1.0, "rewards/chosen": -0.4870738983154297, "rewards/margins": 3.19370698928833, "rewards/rejected": -3.6807806491851807, "step": 3349 }, { "epoch": 0.39, "learning_rate": 1.8587457186724931e-07, "logits/chosen": -2.8689074516296387, "logits/rejected": -2.886915445327759, "logps/chosen": -180.06417846679688, "logps/rejected": -134.15542602539062, "loss": 0.41, "rewards/accuracies": 0.875, "rewards/chosen": -0.3957795798778534, "rewards/margins": 0.9580878019332886, "rewards/rejected": -1.3538674116134644, "step": 3350 }, { "epoch": 0.39, "learning_rate": 1.8583914019133104e-07, "logits/chosen": -2.2750868797302246, "logits/rejected": -2.3264975547790527, "logps/chosen": -108.59992980957031, "logps/rejected": -254.7178955078125, "loss": 0.3528, "rewards/accuracies": 0.875, "rewards/chosen": -0.6613905429840088, "rewards/margins": 4.389074802398682, "rewards/rejected": -5.0504655838012695, "step": 3351 }, { "epoch": 0.39, "learning_rate": 1.8580370851541276e-07, "logits/chosen": -2.9245758056640625, "logits/rejected": -2.8773388862609863, "logps/chosen": -208.82894897460938, "logps/rejected": -121.12397766113281, "loss": 0.3857, "rewards/accuracies": 0.75, "rewards/chosen": -0.3245733976364136, "rewards/margins": 2.0999860763549805, "rewards/rejected": -2.4245593547821045, "step": 3352 }, { "epoch": 0.39, "learning_rate": 1.857682768394945e-07, "logits/chosen": -2.0385944843292236, "logits/rejected": -2.108271837234497, "logps/chosen": -360.73211669921875, "logps/rejected": -335.3489685058594, "loss": 0.325, "rewards/accuracies": 0.875, "rewards/chosen": -0.5335421562194824, "rewards/margins": 2.035780429840088, "rewards/rejected": -2.5693225860595703, "step": 3353 }, { "epoch": 0.39, "learning_rate": 1.8573284516357623e-07, "logits/chosen": -2.2143685817718506, "logits/rejected": -2.388627529144287, "logps/chosen": -331.9945068359375, "logps/rejected": -288.0626220703125, "loss": 0.3354, "rewards/accuracies": 0.75, "rewards/chosen": -1.1735994815826416, "rewards/margins": 2.3883297443389893, "rewards/rejected": -3.561929225921631, "step": 3354 }, { "epoch": 0.39, "learning_rate": 1.8569741348765795e-07, "logits/chosen": -2.5897045135498047, "logits/rejected": -2.4716248512268066, "logps/chosen": -211.24725341796875, "logps/rejected": -258.7600402832031, "loss": 0.5871, "rewards/accuracies": 0.625, "rewards/chosen": -1.1549172401428223, "rewards/margins": 1.1625418663024902, "rewards/rejected": -2.3174591064453125, "step": 3355 }, { "epoch": 0.39, "learning_rate": 1.8566198181173967e-07, "logits/chosen": -1.9563812017440796, "logits/rejected": -2.3838822841644287, "logps/chosen": -279.91558837890625, "logps/rejected": -194.49490356445312, "loss": 0.5686, "rewards/accuracies": 0.625, "rewards/chosen": -1.37327241897583, "rewards/margins": 0.907346785068512, "rewards/rejected": -2.2806191444396973, "step": 3356 }, { "epoch": 0.39, "learning_rate": 1.856265501358214e-07, "logits/chosen": -2.3209033012390137, "logits/rejected": -2.299435615539551, "logps/chosen": -277.2779846191406, "logps/rejected": -425.1461486816406, "loss": 0.3113, "rewards/accuracies": 0.75, "rewards/chosen": -0.6206858158111572, "rewards/margins": 2.0329325199127197, "rewards/rejected": -2.653618335723877, "step": 3357 }, { "epoch": 0.39, "learning_rate": 1.8559111845990312e-07, "logits/chosen": -2.6779251098632812, "logits/rejected": -2.851982355117798, "logps/chosen": -298.42529296875, "logps/rejected": -236.91897583007812, "loss": 0.1444, "rewards/accuracies": 1.0, "rewards/chosen": -0.49911510944366455, "rewards/margins": 3.199662923812866, "rewards/rejected": -3.698777914047241, "step": 3358 }, { "epoch": 0.39, "learning_rate": 1.855556867839849e-07, "logits/chosen": -2.393836498260498, "logits/rejected": -2.3785572052001953, "logps/chosen": -197.07711791992188, "logps/rejected": -330.8213195800781, "loss": 0.3793, "rewards/accuracies": 0.75, "rewards/chosen": -0.5398982167243958, "rewards/margins": 2.9000461101531982, "rewards/rejected": -3.439944267272949, "step": 3359 }, { "epoch": 0.39, "learning_rate": 1.8552025510806662e-07, "logits/chosen": -2.209611415863037, "logits/rejected": -1.9604334831237793, "logps/chosen": -401.68878173828125, "logps/rejected": -423.21710205078125, "loss": 0.7055, "rewards/accuracies": 0.875, "rewards/chosen": -0.6965991854667664, "rewards/margins": 1.432546854019165, "rewards/rejected": -2.129146099090576, "step": 3360 }, { "epoch": 0.39, "learning_rate": 1.8548482343214834e-07, "logits/chosen": -2.1547179222106934, "logits/rejected": -1.8922131061553955, "logps/chosen": -266.1447448730469, "logps/rejected": -443.15411376953125, "loss": 0.2395, "rewards/accuracies": 1.0, "rewards/chosen": -0.760726809501648, "rewards/margins": 2.46378231048584, "rewards/rejected": -3.2245092391967773, "step": 3361 }, { "epoch": 0.39, "learning_rate": 1.8544939175623006e-07, "logits/chosen": -2.4448699951171875, "logits/rejected": -2.257011890411377, "logps/chosen": -414.1623840332031, "logps/rejected": -479.10333251953125, "loss": 0.4236, "rewards/accuracies": 0.875, "rewards/chosen": -1.068786859512329, "rewards/margins": 2.298985242843628, "rewards/rejected": -3.367772102355957, "step": 3362 }, { "epoch": 0.39, "learning_rate": 1.8541396008031178e-07, "logits/chosen": -2.2770187854766846, "logits/rejected": -2.4864161014556885, "logps/chosen": -297.4179992675781, "logps/rejected": -198.1750946044922, "loss": 0.2368, "rewards/accuracies": 0.875, "rewards/chosen": -0.6367000937461853, "rewards/margins": 2.038423538208008, "rewards/rejected": -2.675123453140259, "step": 3363 }, { "epoch": 0.39, "learning_rate": 1.8537852840439353e-07, "logits/chosen": -2.6885786056518555, "logits/rejected": -2.8835196495056152, "logps/chosen": -392.40936279296875, "logps/rejected": -195.53433227539062, "loss": 0.4347, "rewards/accuracies": 0.625, "rewards/chosen": -1.3198297023773193, "rewards/margins": 1.8627830743789673, "rewards/rejected": -3.182612895965576, "step": 3364 }, { "epoch": 0.39, "learning_rate": 1.8534309672847525e-07, "logits/chosen": -2.5977823734283447, "logits/rejected": -2.455334186553955, "logps/chosen": -196.82725524902344, "logps/rejected": -311.94415283203125, "loss": 0.1176, "rewards/accuracies": 1.0, "rewards/chosen": -0.7752014398574829, "rewards/margins": 3.6826090812683105, "rewards/rejected": -4.457810401916504, "step": 3365 }, { "epoch": 0.39, "learning_rate": 1.8530766505255697e-07, "logits/chosen": -2.8230714797973633, "logits/rejected": -2.935361385345459, "logps/chosen": -192.60580444335938, "logps/rejected": -184.5399169921875, "loss": 0.3158, "rewards/accuracies": 0.875, "rewards/chosen": -0.5311586856842041, "rewards/margins": 1.745026707649231, "rewards/rejected": -2.2761855125427246, "step": 3366 }, { "epoch": 0.39, "learning_rate": 1.852722333766387e-07, "logits/chosen": -3.038447618484497, "logits/rejected": -2.913233757019043, "logps/chosen": -260.3134765625, "logps/rejected": -241.8472442626953, "loss": 0.5531, "rewards/accuracies": 0.75, "rewards/chosen": -1.3188987970352173, "rewards/margins": 1.4595441818237305, "rewards/rejected": -2.778442859649658, "step": 3367 }, { "epoch": 0.39, "learning_rate": 1.8523680170072042e-07, "logits/chosen": -1.9022458791732788, "logits/rejected": -1.920288324356079, "logps/chosen": -379.50567626953125, "logps/rejected": -335.2005920410156, "loss": 0.4192, "rewards/accuracies": 0.75, "rewards/chosen": -0.7813849449157715, "rewards/margins": 1.1830203533172607, "rewards/rejected": -1.9644052982330322, "step": 3368 }, { "epoch": 0.39, "learning_rate": 1.8520137002480214e-07, "logits/chosen": -2.3535709381103516, "logits/rejected": -2.518467426300049, "logps/chosen": -278.2225646972656, "logps/rejected": -237.33392333984375, "loss": 0.4634, "rewards/accuracies": 0.625, "rewards/chosen": -1.3926496505737305, "rewards/margins": 2.7786455154418945, "rewards/rejected": -4.171295166015625, "step": 3369 }, { "epoch": 0.39, "learning_rate": 1.8516593834888392e-07, "logits/chosen": -2.7618722915649414, "logits/rejected": -2.703028917312622, "logps/chosen": -260.31829833984375, "logps/rejected": -308.255615234375, "loss": 0.1919, "rewards/accuracies": 0.875, "rewards/chosen": -1.1674389839172363, "rewards/margins": 3.200131416320801, "rewards/rejected": -4.367570400238037, "step": 3370 }, { "epoch": 0.39, "learning_rate": 1.8513050667296564e-07, "logits/chosen": -2.4295482635498047, "logits/rejected": -2.465958833694458, "logps/chosen": -212.90882873535156, "logps/rejected": -267.43682861328125, "loss": 0.3377, "rewards/accuracies": 0.875, "rewards/chosen": -0.6721929311752319, "rewards/margins": 2.1515798568725586, "rewards/rejected": -2.82377290725708, "step": 3371 }, { "epoch": 0.39, "learning_rate": 1.8509507499704736e-07, "logits/chosen": -2.2857778072357178, "logits/rejected": -2.4301228523254395, "logps/chosen": -289.69012451171875, "logps/rejected": -287.66156005859375, "loss": 0.5664, "rewards/accuracies": 0.625, "rewards/chosen": -0.4596249461174011, "rewards/margins": 1.9070607423782349, "rewards/rejected": -2.366685390472412, "step": 3372 }, { "epoch": 0.39, "learning_rate": 1.8505964332112908e-07, "logits/chosen": -2.5876855850219727, "logits/rejected": -2.7474260330200195, "logps/chosen": -246.04519653320312, "logps/rejected": -299.189453125, "loss": 0.4011, "rewards/accuracies": 0.875, "rewards/chosen": -1.074571132659912, "rewards/margins": 2.1021828651428223, "rewards/rejected": -3.176753520965576, "step": 3373 }, { "epoch": 0.39, "learning_rate": 1.850242116452108e-07, "logits/chosen": -2.898502826690674, "logits/rejected": -2.7643239498138428, "logps/chosen": -229.05201721191406, "logps/rejected": -252.4426727294922, "loss": 0.9962, "rewards/accuracies": 0.625, "rewards/chosen": -1.1958707571029663, "rewards/margins": 0.9954532384872437, "rewards/rejected": -2.191323757171631, "step": 3374 }, { "epoch": 0.39, "learning_rate": 1.8498877996929255e-07, "logits/chosen": -2.798494815826416, "logits/rejected": -2.7742629051208496, "logps/chosen": -323.30474853515625, "logps/rejected": -243.24293518066406, "loss": 0.284, "rewards/accuracies": 0.875, "rewards/chosen": -0.5761392712593079, "rewards/margins": 2.4230294227600098, "rewards/rejected": -2.999168872833252, "step": 3375 }, { "epoch": 0.39, "learning_rate": 1.8495334829337428e-07, "logits/chosen": -2.1511106491088867, "logits/rejected": -2.350090503692627, "logps/chosen": -401.6890563964844, "logps/rejected": -266.52703857421875, "loss": 0.5206, "rewards/accuracies": 0.75, "rewards/chosen": -0.7185509204864502, "rewards/margins": 1.0081268548965454, "rewards/rejected": -1.726677656173706, "step": 3376 }, { "epoch": 0.39, "learning_rate": 1.84917916617456e-07, "logits/chosen": -2.380931854248047, "logits/rejected": -2.5055646896362305, "logps/chosen": -353.9587707519531, "logps/rejected": -282.37164306640625, "loss": 0.637, "rewards/accuracies": 0.5, "rewards/chosen": -0.744989275932312, "rewards/margins": 1.0861908197402954, "rewards/rejected": -1.8311800956726074, "step": 3377 }, { "epoch": 0.39, "learning_rate": 1.8488248494153772e-07, "logits/chosen": -2.0063726902008057, "logits/rejected": -2.1342761516571045, "logps/chosen": -243.80661010742188, "logps/rejected": -296.5588073730469, "loss": 0.6547, "rewards/accuracies": 0.625, "rewards/chosen": -1.0722347497940063, "rewards/margins": 0.9052100777626038, "rewards/rejected": -1.9774448871612549, "step": 3378 }, { "epoch": 0.39, "learning_rate": 1.8484705326561944e-07, "logits/chosen": -2.2630808353424072, "logits/rejected": -1.9485901594161987, "logps/chosen": -250.26153564453125, "logps/rejected": -317.00982666015625, "loss": 0.4519, "rewards/accuracies": 0.75, "rewards/chosen": -1.9268052577972412, "rewards/margins": 1.1187419891357422, "rewards/rejected": -3.0455470085144043, "step": 3379 }, { "epoch": 0.39, "learning_rate": 1.8481162158970116e-07, "logits/chosen": -2.35150146484375, "logits/rejected": -2.490654468536377, "logps/chosen": -191.84776306152344, "logps/rejected": -213.4203643798828, "loss": 0.1698, "rewards/accuracies": 1.0, "rewards/chosen": -0.9626486301422119, "rewards/margins": 2.2764291763305664, "rewards/rejected": -3.2390778064727783, "step": 3380 }, { "epoch": 0.39, "learning_rate": 1.8477618991378289e-07, "logits/chosen": -2.8254480361938477, "logits/rejected": -2.891606092453003, "logps/chosen": -239.35528564453125, "logps/rejected": -481.6474609375, "loss": 0.305, "rewards/accuracies": 0.875, "rewards/chosen": -0.9276472926139832, "rewards/margins": 2.4272212982177734, "rewards/rejected": -3.3548684120178223, "step": 3381 }, { "epoch": 0.39, "learning_rate": 1.8474075823786466e-07, "logits/chosen": -1.466334342956543, "logits/rejected": -1.8428864479064941, "logps/chosen": -464.8097839355469, "logps/rejected": -301.1081237792969, "loss": 0.5536, "rewards/accuracies": 0.625, "rewards/chosen": -0.9402018785476685, "rewards/margins": 0.8904042840003967, "rewards/rejected": -1.83060622215271, "step": 3382 }, { "epoch": 0.39, "learning_rate": 1.8470532656194638e-07, "logits/chosen": -2.291431188583374, "logits/rejected": -2.4347662925720215, "logps/chosen": -232.44873046875, "logps/rejected": -221.07321166992188, "loss": 0.152, "rewards/accuracies": 1.0, "rewards/chosen": -0.09062960743904114, "rewards/margins": 2.6399500370025635, "rewards/rejected": -2.7305796146392822, "step": 3383 }, { "epoch": 0.39, "learning_rate": 1.846698948860281e-07, "logits/chosen": -1.9974241256713867, "logits/rejected": -2.2461326122283936, "logps/chosen": -377.51641845703125, "logps/rejected": -318.126220703125, "loss": 0.2322, "rewards/accuracies": 1.0, "rewards/chosen": -0.5250531435012817, "rewards/margins": 2.056821346282959, "rewards/rejected": -2.581874370574951, "step": 3384 }, { "epoch": 0.39, "learning_rate": 1.8463446321010983e-07, "logits/chosen": -1.4070454835891724, "logits/rejected": -1.860363483428955, "logps/chosen": -354.82623291015625, "logps/rejected": -215.9857940673828, "loss": 0.5349, "rewards/accuracies": 0.875, "rewards/chosen": -1.0824726819992065, "rewards/margins": 1.2527027130126953, "rewards/rejected": -2.3351755142211914, "step": 3385 }, { "epoch": 0.39, "learning_rate": 1.8459903153419155e-07, "logits/chosen": -2.058957815170288, "logits/rejected": -2.2481486797332764, "logps/chosen": -336.38543701171875, "logps/rejected": -279.0199890136719, "loss": 0.4237, "rewards/accuracies": 0.875, "rewards/chosen": -0.6062447428703308, "rewards/margins": 1.8290809392929077, "rewards/rejected": -2.4353256225585938, "step": 3386 }, { "epoch": 0.39, "learning_rate": 1.845635998582733e-07, "logits/chosen": -2.434199333190918, "logits/rejected": -2.3138539791107178, "logps/chosen": -119.56906127929688, "logps/rejected": -196.87399291992188, "loss": 0.2916, "rewards/accuracies": 0.875, "rewards/chosen": -1.3057142496109009, "rewards/margins": 3.055112361907959, "rewards/rejected": -4.36082649230957, "step": 3387 }, { "epoch": 0.39, "learning_rate": 1.8452816818235502e-07, "logits/chosen": -2.657477378845215, "logits/rejected": -2.8132357597351074, "logps/chosen": -223.03981018066406, "logps/rejected": -177.70660400390625, "loss": 0.1743, "rewards/accuracies": 1.0, "rewards/chosen": -0.9501574635505676, "rewards/margins": 1.9658327102661133, "rewards/rejected": -2.915990114212036, "step": 3388 }, { "epoch": 0.39, "learning_rate": 1.8449273650643674e-07, "logits/chosen": -2.0795319080352783, "logits/rejected": -2.0661044120788574, "logps/chosen": -232.7060546875, "logps/rejected": -232.88661193847656, "loss": 0.3979, "rewards/accuracies": 0.75, "rewards/chosen": -2.5535342693328857, "rewards/margins": 1.6858792304992676, "rewards/rejected": -4.239413261413574, "step": 3389 }, { "epoch": 0.39, "learning_rate": 1.8445730483051846e-07, "logits/chosen": -2.5190484523773193, "logits/rejected": -2.4008631706237793, "logps/chosen": -250.22972106933594, "logps/rejected": -362.1461181640625, "loss": 0.3935, "rewards/accuracies": 0.75, "rewards/chosen": -0.47159525752067566, "rewards/margins": 2.203805685043335, "rewards/rejected": -2.675400733947754, "step": 3390 }, { "epoch": 0.39, "learning_rate": 1.8442187315460019e-07, "logits/chosen": -2.583657741546631, "logits/rejected": -2.720963478088379, "logps/chosen": -319.7049255371094, "logps/rejected": -337.41748046875, "loss": 0.2782, "rewards/accuracies": 0.75, "rewards/chosen": -0.33775949478149414, "rewards/margins": 2.493699073791504, "rewards/rejected": -2.831458568572998, "step": 3391 }, { "epoch": 0.39, "learning_rate": 1.843864414786819e-07, "logits/chosen": -1.9466845989227295, "logits/rejected": -1.7100924253463745, "logps/chosen": -440.768310546875, "logps/rejected": -363.82562255859375, "loss": 0.1139, "rewards/accuracies": 1.0, "rewards/chosen": -0.025063902139663696, "rewards/margins": 3.02437686920166, "rewards/rejected": -3.049440860748291, "step": 3392 }, { "epoch": 0.39, "learning_rate": 1.8435100980276366e-07, "logits/chosen": -2.856184959411621, "logits/rejected": -2.6893389225006104, "logps/chosen": -341.54571533203125, "logps/rejected": -271.4803466796875, "loss": 0.7048, "rewards/accuracies": 0.75, "rewards/chosen": -1.7417503595352173, "rewards/margins": 1.1092075109481812, "rewards/rejected": -2.8509578704833984, "step": 3393 }, { "epoch": 0.39, "learning_rate": 1.843155781268454e-07, "logits/chosen": -2.6863150596618652, "logits/rejected": -2.6962392330169678, "logps/chosen": -153.38583374023438, "logps/rejected": -241.77810668945312, "loss": 0.2749, "rewards/accuracies": 1.0, "rewards/chosen": -1.200852870941162, "rewards/margins": 2.3777523040771484, "rewards/rejected": -3.5786051750183105, "step": 3394 }, { "epoch": 0.39, "learning_rate": 1.8428014645092713e-07, "logits/chosen": -2.6275739669799805, "logits/rejected": -2.694567918777466, "logps/chosen": -150.1112060546875, "logps/rejected": -224.17312622070312, "loss": 0.97, "rewards/accuracies": 0.5, "rewards/chosen": -1.0985922813415527, "rewards/margins": 1.3035974502563477, "rewards/rejected": -2.4021897315979004, "step": 3395 }, { "epoch": 0.4, "learning_rate": 1.8424471477500885e-07, "logits/chosen": -2.5806496143341064, "logits/rejected": -2.53804087638855, "logps/chosen": -244.8392791748047, "logps/rejected": -339.36016845703125, "loss": 0.528, "rewards/accuracies": 0.75, "rewards/chosen": -1.7698785066604614, "rewards/margins": 2.193786144256592, "rewards/rejected": -3.9636647701263428, "step": 3396 }, { "epoch": 0.4, "learning_rate": 1.8420928309909057e-07, "logits/chosen": -2.183762311935425, "logits/rejected": -2.3645591735839844, "logps/chosen": -436.77374267578125, "logps/rejected": -274.9459228515625, "loss": 0.2079, "rewards/accuracies": 0.875, "rewards/chosen": -0.17809192836284637, "rewards/margins": 2.2717766761779785, "rewards/rejected": -2.449868679046631, "step": 3397 }, { "epoch": 0.4, "learning_rate": 1.8417385142317232e-07, "logits/chosen": -2.506166458129883, "logits/rejected": -2.466632604598999, "logps/chosen": -244.1412353515625, "logps/rejected": -421.9756774902344, "loss": 0.2104, "rewards/accuracies": 0.875, "rewards/chosen": -0.5013792514801025, "rewards/margins": 2.5061001777648926, "rewards/rejected": -3.007479429244995, "step": 3398 }, { "epoch": 0.4, "learning_rate": 1.8413841974725404e-07, "logits/chosen": -2.421644687652588, "logits/rejected": -2.436990261077881, "logps/chosen": -109.25733947753906, "logps/rejected": -184.52667236328125, "loss": 0.2479, "rewards/accuracies": 0.75, "rewards/chosen": -0.5189033150672913, "rewards/margins": 2.108459711074829, "rewards/rejected": -2.6273629665374756, "step": 3399 }, { "epoch": 0.4, "learning_rate": 1.8410298807133577e-07, "logits/chosen": -2.79703688621521, "logits/rejected": -2.5895659923553467, "logps/chosen": -217.38339233398438, "logps/rejected": -234.58453369140625, "loss": 0.3201, "rewards/accuracies": 0.75, "rewards/chosen": -1.4397207498550415, "rewards/margins": 2.3359646797180176, "rewards/rejected": -3.7756855487823486, "step": 3400 }, { "epoch": 0.4, "learning_rate": 1.840675563954175e-07, "logits/chosen": -2.2023959159851074, "logits/rejected": -2.325378656387329, "logps/chosen": -531.6226806640625, "logps/rejected": -475.36907958984375, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": -0.48696762323379517, "rewards/margins": 4.032780647277832, "rewards/rejected": -4.519747734069824, "step": 3401 }, { "epoch": 0.4, "learning_rate": 1.840321247194992e-07, "logits/chosen": -2.315439224243164, "logits/rejected": -2.7421669960021973, "logps/chosen": -287.54510498046875, "logps/rejected": -193.55133056640625, "loss": 0.1192, "rewards/accuracies": 1.0, "rewards/chosen": -0.4484342336654663, "rewards/margins": 4.028387069702148, "rewards/rejected": -4.4768218994140625, "step": 3402 }, { "epoch": 0.4, "learning_rate": 1.8399669304358093e-07, "logits/chosen": -2.279280424118042, "logits/rejected": -2.2649879455566406, "logps/chosen": -265.00482177734375, "logps/rejected": -320.2498474121094, "loss": 0.2106, "rewards/accuracies": 0.875, "rewards/chosen": -0.13400880992412567, "rewards/margins": 2.6128296852111816, "rewards/rejected": -2.746838331222534, "step": 3403 }, { "epoch": 0.4, "learning_rate": 1.8396126136766268e-07, "logits/chosen": -1.9287952184677124, "logits/rejected": -1.9925240278244019, "logps/chosen": -310.74298095703125, "logps/rejected": -380.138427734375, "loss": 0.4418, "rewards/accuracies": 0.625, "rewards/chosen": -1.1386207342147827, "rewards/margins": 2.426340341567993, "rewards/rejected": -3.5649609565734863, "step": 3404 }, { "epoch": 0.4, "learning_rate": 1.8392582969174443e-07, "logits/chosen": -1.8607568740844727, "logits/rejected": -1.7253414392471313, "logps/chosen": -274.81195068359375, "logps/rejected": -341.8802185058594, "loss": 0.4892, "rewards/accuracies": 0.75, "rewards/chosen": -1.2398412227630615, "rewards/margins": 0.9031581878662109, "rewards/rejected": -2.1429994106292725, "step": 3405 }, { "epoch": 0.4, "learning_rate": 1.8389039801582615e-07, "logits/chosen": -2.2108569145202637, "logits/rejected": -2.370239734649658, "logps/chosen": -122.34679412841797, "logps/rejected": -191.93954467773438, "loss": 0.3282, "rewards/accuracies": 0.875, "rewards/chosen": -0.17209647595882416, "rewards/margins": 2.146580457687378, "rewards/rejected": -2.3186769485473633, "step": 3406 }, { "epoch": 0.4, "learning_rate": 1.8385496633990787e-07, "logits/chosen": -2.393409490585327, "logits/rejected": -1.99038565158844, "logps/chosen": -225.8734588623047, "logps/rejected": -338.2386779785156, "loss": 0.1444, "rewards/accuracies": 1.0, "rewards/chosen": -0.4594680964946747, "rewards/margins": 3.004650592803955, "rewards/rejected": -3.464118480682373, "step": 3407 }, { "epoch": 0.4, "learning_rate": 1.838195346639896e-07, "logits/chosen": -1.5098017454147339, "logits/rejected": -1.5717741250991821, "logps/chosen": -453.8023376464844, "logps/rejected": -414.75274658203125, "loss": 0.5073, "rewards/accuracies": 0.5, "rewards/chosen": -1.2314974069595337, "rewards/margins": 1.3030622005462646, "rewards/rejected": -2.534559726715088, "step": 3408 }, { "epoch": 0.4, "learning_rate": 1.8378410298807134e-07, "logits/chosen": -2.160737991333008, "logits/rejected": -1.99154794216156, "logps/chosen": -235.7310333251953, "logps/rejected": -359.54864501953125, "loss": 0.7449, "rewards/accuracies": 0.5, "rewards/chosen": -1.389125943183899, "rewards/margins": 0.7373011112213135, "rewards/rejected": -2.126427173614502, "step": 3409 }, { "epoch": 0.4, "learning_rate": 1.8374867131215307e-07, "logits/chosen": -2.0576906204223633, "logits/rejected": -2.491405487060547, "logps/chosen": -336.63446044921875, "logps/rejected": -346.96923828125, "loss": 0.5451, "rewards/accuracies": 0.875, "rewards/chosen": -1.0263760089874268, "rewards/margins": 1.245700716972351, "rewards/rejected": -2.2720768451690674, "step": 3410 }, { "epoch": 0.4, "learning_rate": 1.837132396362348e-07, "logits/chosen": -1.917681097984314, "logits/rejected": -1.8454885482788086, "logps/chosen": -148.79788208007812, "logps/rejected": -215.19613647460938, "loss": 0.3919, "rewards/accuracies": 0.75, "rewards/chosen": -1.3400630950927734, "rewards/margins": 2.0094645023345947, "rewards/rejected": -3.349527597427368, "step": 3411 }, { "epoch": 0.4, "learning_rate": 1.836778079603165e-07, "logits/chosen": -2.0476274490356445, "logits/rejected": -2.1638777256011963, "logps/chosen": -367.7312927246094, "logps/rejected": -271.1231689453125, "loss": 0.737, "rewards/accuracies": 0.625, "rewards/chosen": -2.064182758331299, "rewards/margins": 1.6046191453933716, "rewards/rejected": -3.668801784515381, "step": 3412 }, { "epoch": 0.4, "learning_rate": 1.8364237628439823e-07, "logits/chosen": -2.203263998031616, "logits/rejected": -2.3298511505126953, "logps/chosen": -425.442626953125, "logps/rejected": -334.72650146484375, "loss": 0.3528, "rewards/accuracies": 0.875, "rewards/chosen": -0.96681147813797, "rewards/margins": 1.7388677597045898, "rewards/rejected": -2.705679416656494, "step": 3413 }, { "epoch": 0.4, "learning_rate": 1.8360694460847995e-07, "logits/chosen": -2.0349578857421875, "logits/rejected": -1.8259761333465576, "logps/chosen": -425.437744140625, "logps/rejected": -288.0302734375, "loss": 0.347, "rewards/accuracies": 0.75, "rewards/chosen": -0.34897899627685547, "rewards/margins": 1.5794318914413452, "rewards/rejected": -1.9284110069274902, "step": 3414 }, { "epoch": 0.4, "learning_rate": 1.8357151293256168e-07, "logits/chosen": -1.8451001644134521, "logits/rejected": -2.286156415939331, "logps/chosen": -465.2957458496094, "logps/rejected": -228.60989379882812, "loss": 0.5056, "rewards/accuracies": 0.75, "rewards/chosen": -0.9549244046211243, "rewards/margins": 1.3354610204696655, "rewards/rejected": -2.2903852462768555, "step": 3415 }, { "epoch": 0.4, "learning_rate": 1.8353608125664343e-07, "logits/chosen": -2.524022102355957, "logits/rejected": -2.590106964111328, "logps/chosen": -217.944580078125, "logps/rejected": -173.87588500976562, "loss": 0.6161, "rewards/accuracies": 0.75, "rewards/chosen": -0.7616043090820312, "rewards/margins": 1.2919037342071533, "rewards/rejected": -2.0535080432891846, "step": 3416 }, { "epoch": 0.4, "learning_rate": 1.8350064958072517e-07, "logits/chosen": -1.838868260383606, "logits/rejected": -2.116912603378296, "logps/chosen": -363.2449951171875, "logps/rejected": -340.6737365722656, "loss": 0.1579, "rewards/accuracies": 1.0, "rewards/chosen": -0.15411730110645294, "rewards/margins": 2.877066135406494, "rewards/rejected": -3.0311832427978516, "step": 3417 }, { "epoch": 0.4, "learning_rate": 1.834652179048069e-07, "logits/chosen": -2.3090085983276367, "logits/rejected": -2.4306185245513916, "logps/chosen": -248.18258666992188, "logps/rejected": -208.00596618652344, "loss": 0.4291, "rewards/accuracies": 0.875, "rewards/chosen": -0.4839983582496643, "rewards/margins": 1.2085407972335815, "rewards/rejected": -1.692539095878601, "step": 3418 }, { "epoch": 0.4, "learning_rate": 1.8342978622888862e-07, "logits/chosen": -2.5586283206939697, "logits/rejected": -2.6537680625915527, "logps/chosen": -309.36273193359375, "logps/rejected": -265.89404296875, "loss": 0.2422, "rewards/accuracies": 0.875, "rewards/chosen": -0.695320188999176, "rewards/margins": 1.7607567310333252, "rewards/rejected": -2.4560768604278564, "step": 3419 }, { "epoch": 0.4, "learning_rate": 1.8339435455297037e-07, "logits/chosen": -2.0043845176696777, "logits/rejected": -1.89007568359375, "logps/chosen": -342.0821533203125, "logps/rejected": -367.994873046875, "loss": 0.3976, "rewards/accuracies": 0.75, "rewards/chosen": -0.033517688512802124, "rewards/margins": 1.744642972946167, "rewards/rejected": -1.7781606912612915, "step": 3420 }, { "epoch": 0.4, "learning_rate": 1.833589228770521e-07, "logits/chosen": -2.3497707843780518, "logits/rejected": -2.291553020477295, "logps/chosen": -181.2816162109375, "logps/rejected": -284.2139587402344, "loss": 2.2077, "rewards/accuracies": 0.5, "rewards/chosen": -3.2609784603118896, "rewards/margins": -0.9471589922904968, "rewards/rejected": -2.313819408416748, "step": 3421 }, { "epoch": 0.4, "learning_rate": 1.833234912011338e-07, "logits/chosen": -1.9613993167877197, "logits/rejected": -1.8648691177368164, "logps/chosen": -191.7077178955078, "logps/rejected": -179.7085723876953, "loss": 0.4297, "rewards/accuracies": 0.875, "rewards/chosen": -1.1766455173492432, "rewards/margins": 1.1588020324707031, "rewards/rejected": -2.3354475498199463, "step": 3422 }, { "epoch": 0.4, "learning_rate": 1.8328805952521553e-07, "logits/chosen": -1.9801139831542969, "logits/rejected": -2.2710070610046387, "logps/chosen": -436.3531799316406, "logps/rejected": -286.6208801269531, "loss": 0.3139, "rewards/accuracies": 0.875, "rewards/chosen": -1.5968255996704102, "rewards/margins": 1.8534002304077148, "rewards/rejected": -3.450226068496704, "step": 3423 }, { "epoch": 0.4, "learning_rate": 1.8325262784929726e-07, "logits/chosen": -2.6476526260375977, "logits/rejected": -2.8960063457489014, "logps/chosen": -261.958984375, "logps/rejected": -184.93478393554688, "loss": 0.6177, "rewards/accuracies": 0.625, "rewards/chosen": -0.7138870358467102, "rewards/margins": 1.0215260982513428, "rewards/rejected": -1.7354131937026978, "step": 3424 }, { "epoch": 0.4, "learning_rate": 1.8321719617337898e-07, "logits/chosen": -1.9933302402496338, "logits/rejected": -2.072636604309082, "logps/chosen": -294.7523193359375, "logps/rejected": -340.1819763183594, "loss": 0.4764, "rewards/accuracies": 0.875, "rewards/chosen": -0.9526187181472778, "rewards/margins": 2.197521209716797, "rewards/rejected": -3.1501400470733643, "step": 3425 }, { "epoch": 0.4, "learning_rate": 1.831817644974607e-07, "logits/chosen": -2.3068716526031494, "logits/rejected": -2.191986560821533, "logps/chosen": -282.1511535644531, "logps/rejected": -385.1468811035156, "loss": 0.4565, "rewards/accuracies": 0.625, "rewards/chosen": -1.1387066841125488, "rewards/margins": 1.0770667791366577, "rewards/rejected": -2.215773582458496, "step": 3426 }, { "epoch": 0.4, "learning_rate": 1.8314633282154245e-07, "logits/chosen": -1.8716621398925781, "logits/rejected": -1.7429122924804688, "logps/chosen": -135.0226593017578, "logps/rejected": -155.77450561523438, "loss": 0.7122, "rewards/accuracies": 0.625, "rewards/chosen": -0.9409748315811157, "rewards/margins": 0.2603878974914551, "rewards/rejected": -1.2013627290725708, "step": 3427 }, { "epoch": 0.4, "learning_rate": 1.8311090114562417e-07, "logits/chosen": -2.2367982864379883, "logits/rejected": -2.2505195140838623, "logps/chosen": -320.6040344238281, "logps/rejected": -292.02801513671875, "loss": 0.3037, "rewards/accuracies": 0.875, "rewards/chosen": -0.5819922685623169, "rewards/margins": 1.9463709592819214, "rewards/rejected": -2.5283632278442383, "step": 3428 }, { "epoch": 0.4, "learning_rate": 1.8307546946970592e-07, "logits/chosen": -2.063913345336914, "logits/rejected": -2.2774219512939453, "logps/chosen": -333.12786865234375, "logps/rejected": -265.0030517578125, "loss": 0.3549, "rewards/accuracies": 1.0, "rewards/chosen": -0.33602482080459595, "rewards/margins": 1.420503854751587, "rewards/rejected": -1.756528615951538, "step": 3429 }, { "epoch": 0.4, "learning_rate": 1.8304003779378764e-07, "logits/chosen": -1.988664150238037, "logits/rejected": -2.303290367126465, "logps/chosen": -389.89019775390625, "logps/rejected": -414.17626953125, "loss": 0.2615, "rewards/accuracies": 0.875, "rewards/chosen": -0.5854094624519348, "rewards/margins": 2.9702248573303223, "rewards/rejected": -3.5556344985961914, "step": 3430 }, { "epoch": 0.4, "learning_rate": 1.8300460611786936e-07, "logits/chosen": -2.9957938194274902, "logits/rejected": -2.9585652351379395, "logps/chosen": -230.4889373779297, "logps/rejected": -334.8667297363281, "loss": 0.502, "rewards/accuracies": 0.875, "rewards/chosen": -1.8904938697814941, "rewards/margins": 3.502958059310913, "rewards/rejected": -5.3934526443481445, "step": 3431 }, { "epoch": 0.4, "learning_rate": 1.829691744419511e-07, "logits/chosen": -1.8003120422363281, "logits/rejected": -1.9285058975219727, "logps/chosen": -459.41912841796875, "logps/rejected": -311.0648498535156, "loss": 0.7339, "rewards/accuracies": 0.75, "rewards/chosen": -1.9928489923477173, "rewards/margins": 0.5760178565979004, "rewards/rejected": -2.568866729736328, "step": 3432 }, { "epoch": 0.4, "learning_rate": 1.8293374276603283e-07, "logits/chosen": -2.255795478820801, "logits/rejected": -2.1986513137817383, "logps/chosen": -223.6953887939453, "logps/rejected": -255.81591796875, "loss": 0.7323, "rewards/accuracies": 0.625, "rewards/chosen": -1.2311562299728394, "rewards/margins": 0.9870451092720032, "rewards/rejected": -2.2182013988494873, "step": 3433 }, { "epoch": 0.4, "learning_rate": 1.8289831109011456e-07, "logits/chosen": -2.3228206634521484, "logits/rejected": -2.08485746383667, "logps/chosen": -87.28106689453125, "logps/rejected": -177.53013610839844, "loss": 0.1988, "rewards/accuracies": 1.0, "rewards/chosen": -0.07531140744686127, "rewards/margins": 2.7301158905029297, "rewards/rejected": -2.805427312850952, "step": 3434 }, { "epoch": 0.4, "learning_rate": 1.8286287941419628e-07, "logits/chosen": -2.026492118835449, "logits/rejected": -2.1857399940490723, "logps/chosen": -321.1061706542969, "logps/rejected": -274.46502685546875, "loss": 0.4373, "rewards/accuracies": 0.875, "rewards/chosen": -0.5961174964904785, "rewards/margins": 1.5987675189971924, "rewards/rejected": -2.19488525390625, "step": 3435 }, { "epoch": 0.4, "learning_rate": 1.82827447738278e-07, "logits/chosen": -2.499723434448242, "logits/rejected": -2.3671600818634033, "logps/chosen": -135.6142578125, "logps/rejected": -153.54759216308594, "loss": 0.2885, "rewards/accuracies": 0.875, "rewards/chosen": -0.8509641885757446, "rewards/margins": 2.261913776397705, "rewards/rejected": -3.11287784576416, "step": 3436 }, { "epoch": 0.4, "learning_rate": 1.8279201606235972e-07, "logits/chosen": -2.418226718902588, "logits/rejected": -2.295963764190674, "logps/chosen": -350.7650451660156, "logps/rejected": -445.7560729980469, "loss": 0.7336, "rewards/accuracies": 0.75, "rewards/chosen": -1.4148454666137695, "rewards/margins": 1.1387925148010254, "rewards/rejected": -2.553637742996216, "step": 3437 }, { "epoch": 0.4, "learning_rate": 1.8275658438644147e-07, "logits/chosen": -2.3481552600860596, "logits/rejected": -2.3030970096588135, "logps/chosen": -284.3382568359375, "logps/rejected": -284.246826171875, "loss": 0.2341, "rewards/accuracies": 1.0, "rewards/chosen": -0.24014732241630554, "rewards/margins": 1.6432760953903198, "rewards/rejected": -1.8834234476089478, "step": 3438 }, { "epoch": 0.4, "learning_rate": 1.827211527105232e-07, "logits/chosen": -2.6225171089172363, "logits/rejected": -2.5971035957336426, "logps/chosen": -244.91941833496094, "logps/rejected": -251.77198791503906, "loss": 0.1185, "rewards/accuracies": 1.0, "rewards/chosen": -0.6536723375320435, "rewards/margins": 2.9695374965667725, "rewards/rejected": -3.6232099533081055, "step": 3439 }, { "epoch": 0.4, "learning_rate": 1.8268572103460494e-07, "logits/chosen": -2.3729755878448486, "logits/rejected": -2.5604405403137207, "logps/chosen": -186.47593688964844, "logps/rejected": -178.91043090820312, "loss": 0.579, "rewards/accuracies": 0.75, "rewards/chosen": -0.7151098847389221, "rewards/margins": 1.7111750841140747, "rewards/rejected": -2.4262850284576416, "step": 3440 }, { "epoch": 0.4, "learning_rate": 1.8265028935868666e-07, "logits/chosen": -2.422905445098877, "logits/rejected": -2.6406314373016357, "logps/chosen": -317.6475830078125, "logps/rejected": -228.85494995117188, "loss": 0.4481, "rewards/accuracies": 0.625, "rewards/chosen": -0.7034437656402588, "rewards/margins": 2.0382862091064453, "rewards/rejected": -2.741729974746704, "step": 3441 }, { "epoch": 0.4, "learning_rate": 1.8261485768276839e-07, "logits/chosen": -2.862699270248413, "logits/rejected": -2.724250078201294, "logps/chosen": -204.6074981689453, "logps/rejected": -257.3564453125, "loss": 0.1655, "rewards/accuracies": 1.0, "rewards/chosen": -0.8959665298461914, "rewards/margins": 3.011035442352295, "rewards/rejected": -3.9070017337799072, "step": 3442 }, { "epoch": 0.4, "learning_rate": 1.8257942600685013e-07, "logits/chosen": -2.3302226066589355, "logits/rejected": -2.0286476612091064, "logps/chosen": -257.14666748046875, "logps/rejected": -306.5703125, "loss": 0.3527, "rewards/accuracies": 0.875, "rewards/chosen": -0.779371976852417, "rewards/margins": 1.4865121841430664, "rewards/rejected": -2.2658839225769043, "step": 3443 }, { "epoch": 0.4, "learning_rate": 1.8254399433093186e-07, "logits/chosen": -1.9845811128616333, "logits/rejected": -2.1462204456329346, "logps/chosen": -474.25921630859375, "logps/rejected": -438.884765625, "loss": 0.1911, "rewards/accuracies": 1.0, "rewards/chosen": -0.42834004759788513, "rewards/margins": 2.771885871887207, "rewards/rejected": -3.200226068496704, "step": 3444 }, { "epoch": 0.4, "learning_rate": 1.8250856265501358e-07, "logits/chosen": -2.149412155151367, "logits/rejected": -2.2782368659973145, "logps/chosen": -260.5132751464844, "logps/rejected": -336.49951171875, "loss": 0.4143, "rewards/accuracies": 0.875, "rewards/chosen": -0.7634851932525635, "rewards/margins": 1.7064323425292969, "rewards/rejected": -2.4699175357818604, "step": 3445 }, { "epoch": 0.4, "learning_rate": 1.824731309790953e-07, "logits/chosen": -1.6671053171157837, "logits/rejected": -1.535812258720398, "logps/chosen": -506.6976013183594, "logps/rejected": -442.39404296875, "loss": 0.2963, "rewards/accuracies": 0.75, "rewards/chosen": 0.20182761549949646, "rewards/margins": 2.8442916870117188, "rewards/rejected": -2.6424641609191895, "step": 3446 }, { "epoch": 0.4, "learning_rate": 1.8243769930317702e-07, "logits/chosen": -1.7697808742523193, "logits/rejected": -1.8003170490264893, "logps/chosen": -234.4547119140625, "logps/rejected": -236.1967315673828, "loss": 0.6206, "rewards/accuracies": 0.875, "rewards/chosen": -1.318357229232788, "rewards/margins": 1.2679312229156494, "rewards/rejected": -2.5862884521484375, "step": 3447 }, { "epoch": 0.4, "learning_rate": 1.8240226762725874e-07, "logits/chosen": -1.6967637538909912, "logits/rejected": -1.7848191261291504, "logps/chosen": -140.1956787109375, "logps/rejected": -134.94064331054688, "loss": 0.2248, "rewards/accuracies": 0.875, "rewards/chosen": -0.9871538281440735, "rewards/margins": 1.8120685815811157, "rewards/rejected": -2.799222469329834, "step": 3448 }, { "epoch": 0.4, "learning_rate": 1.823668359513405e-07, "logits/chosen": -2.6802191734313965, "logits/rejected": -2.652160167694092, "logps/chosen": -142.31756591796875, "logps/rejected": -176.07008361816406, "loss": 0.2669, "rewards/accuracies": 0.875, "rewards/chosen": -0.6251325607299805, "rewards/margins": 2.531029224395752, "rewards/rejected": -3.1561617851257324, "step": 3449 }, { "epoch": 0.4, "learning_rate": 1.8233140427542222e-07, "logits/chosen": -2.280028820037842, "logits/rejected": -2.7876639366149902, "logps/chosen": -387.5239562988281, "logps/rejected": -139.0717315673828, "loss": 0.1644, "rewards/accuracies": 1.0, "rewards/chosen": -0.22632421553134918, "rewards/margins": 2.5755815505981445, "rewards/rejected": -2.801905870437622, "step": 3450 }, { "epoch": 0.4, "learning_rate": 1.8229597259950394e-07, "logits/chosen": -2.146669864654541, "logits/rejected": -2.1526191234588623, "logps/chosen": -177.8876953125, "logps/rejected": -180.3375244140625, "loss": 0.4018, "rewards/accuracies": 0.75, "rewards/chosen": -1.0363225936889648, "rewards/margins": 1.62081778049469, "rewards/rejected": -2.6571404933929443, "step": 3451 }, { "epoch": 0.4, "learning_rate": 1.8226054092358569e-07, "logits/chosen": -1.8966842889785767, "logits/rejected": -2.008312463760376, "logps/chosen": -269.3478088378906, "logps/rejected": -235.667236328125, "loss": 0.2421, "rewards/accuracies": 0.875, "rewards/chosen": -0.623069703578949, "rewards/margins": 2.0242176055908203, "rewards/rejected": -2.647287368774414, "step": 3452 }, { "epoch": 0.4, "learning_rate": 1.822251092476674e-07, "logits/chosen": -2.3339154720306396, "logits/rejected": -2.684643507003784, "logps/chosen": -418.045166015625, "logps/rejected": -324.20648193359375, "loss": 0.2287, "rewards/accuracies": 1.0, "rewards/chosen": -0.8053046464920044, "rewards/margins": 1.9975948333740234, "rewards/rejected": -2.8028995990753174, "step": 3453 }, { "epoch": 0.4, "learning_rate": 1.8218967757174916e-07, "logits/chosen": -2.811805486679077, "logits/rejected": -2.9165151119232178, "logps/chosen": -187.67620849609375, "logps/rejected": -164.572265625, "loss": 0.6514, "rewards/accuracies": 0.5, "rewards/chosen": -0.7946363687515259, "rewards/margins": 1.0353386402130127, "rewards/rejected": -1.8299751281738281, "step": 3454 }, { "epoch": 0.4, "learning_rate": 1.8215424589583088e-07, "logits/chosen": -1.6600738763809204, "logits/rejected": -1.6296623945236206, "logps/chosen": -454.31903076171875, "logps/rejected": -482.933837890625, "loss": 0.252, "rewards/accuracies": 0.875, "rewards/chosen": -0.30861684679985046, "rewards/margins": 4.083954811096191, "rewards/rejected": -4.392571449279785, "step": 3455 }, { "epoch": 0.4, "learning_rate": 1.821188142199126e-07, "logits/chosen": -2.2531771659851074, "logits/rejected": -2.2589941024780273, "logps/chosen": -224.0650177001953, "logps/rejected": -161.78729248046875, "loss": 0.3826, "rewards/accuracies": 0.75, "rewards/chosen": -0.6734862923622131, "rewards/margins": 1.4671533107757568, "rewards/rejected": -2.140639543533325, "step": 3456 }, { "epoch": 0.4, "learning_rate": 1.8208338254399432e-07, "logits/chosen": -2.6245877742767334, "logits/rejected": -2.4835093021392822, "logps/chosen": -185.7711181640625, "logps/rejected": -348.203857421875, "loss": 0.2139, "rewards/accuracies": 0.875, "rewards/chosen": -0.6677801609039307, "rewards/margins": 3.8298797607421875, "rewards/rejected": -4.497659683227539, "step": 3457 }, { "epoch": 0.4, "learning_rate": 1.8204795086807605e-07, "logits/chosen": -2.172866106033325, "logits/rejected": -2.2109076976776123, "logps/chosen": -343.8246154785156, "logps/rejected": -275.88568115234375, "loss": 0.347, "rewards/accuracies": 1.0, "rewards/chosen": -0.9842840433120728, "rewards/margins": 1.6339099407196045, "rewards/rejected": -2.618194103240967, "step": 3458 }, { "epoch": 0.4, "learning_rate": 1.8201251919215777e-07, "logits/chosen": -2.7038345336914062, "logits/rejected": -2.5690157413482666, "logps/chosen": -208.51568603515625, "logps/rejected": -256.2114562988281, "loss": 0.1692, "rewards/accuracies": 1.0, "rewards/chosen": -0.43005993962287903, "rewards/margins": 3.176769256591797, "rewards/rejected": -3.6068289279937744, "step": 3459 }, { "epoch": 0.4, "learning_rate": 1.819770875162395e-07, "logits/chosen": -2.684129238128662, "logits/rejected": -2.689105749130249, "logps/chosen": -261.56329345703125, "logps/rejected": -306.5220947265625, "loss": 0.6021, "rewards/accuracies": 0.75, "rewards/chosen": -0.6093653440475464, "rewards/margins": 1.2726601362228394, "rewards/rejected": -1.8820254802703857, "step": 3460 }, { "epoch": 0.4, "learning_rate": 1.8194165584032124e-07, "logits/chosen": -2.184813976287842, "logits/rejected": -2.0836241245269775, "logps/chosen": -183.69354248046875, "logps/rejected": -216.99349975585938, "loss": 0.3274, "rewards/accuracies": 0.875, "rewards/chosen": -1.1437641382217407, "rewards/margins": 1.7714369297027588, "rewards/rejected": -2.915201187133789, "step": 3461 }, { "epoch": 0.4, "learning_rate": 1.8190622416440296e-07, "logits/chosen": -2.167269706726074, "logits/rejected": -2.369142770767212, "logps/chosen": -197.41116333007812, "logps/rejected": -217.00262451171875, "loss": 0.2199, "rewards/accuracies": 1.0, "rewards/chosen": -0.5151286125183105, "rewards/margins": 1.6658915281295776, "rewards/rejected": -2.1810202598571777, "step": 3462 }, { "epoch": 0.4, "learning_rate": 1.8187079248848468e-07, "logits/chosen": -1.7284345626831055, "logits/rejected": -1.8576667308807373, "logps/chosen": -281.6728515625, "logps/rejected": -262.4928894042969, "loss": 0.3819, "rewards/accuracies": 0.75, "rewards/chosen": -0.8051906228065491, "rewards/margins": 2.4754250049591064, "rewards/rejected": -3.28061580657959, "step": 3463 }, { "epoch": 0.4, "learning_rate": 1.8183536081256643e-07, "logits/chosen": -2.0820486545562744, "logits/rejected": -1.8396461009979248, "logps/chosen": -257.5462341308594, "logps/rejected": -349.35980224609375, "loss": 0.5407, "rewards/accuracies": 0.75, "rewards/chosen": -1.8029916286468506, "rewards/margins": 2.4207277297973633, "rewards/rejected": -4.223719120025635, "step": 3464 }, { "epoch": 0.4, "learning_rate": 1.8179992913664818e-07, "logits/chosen": -2.2973759174346924, "logits/rejected": -2.3458855152130127, "logps/chosen": -267.74725341796875, "logps/rejected": -211.85549926757812, "loss": 0.2421, "rewards/accuracies": 0.875, "rewards/chosen": -0.9000104665756226, "rewards/margins": 1.9934169054031372, "rewards/rejected": -2.8934273719787598, "step": 3465 }, { "epoch": 0.4, "learning_rate": 1.817644974607299e-07, "logits/chosen": -2.381632089614868, "logits/rejected": -2.385927677154541, "logps/chosen": -350.2432556152344, "logps/rejected": -376.9503173828125, "loss": 0.5431, "rewards/accuracies": 0.75, "rewards/chosen": -0.8776895999908447, "rewards/margins": 1.1777267456054688, "rewards/rejected": -2.0554161071777344, "step": 3466 }, { "epoch": 0.4, "learning_rate": 1.8172906578481162e-07, "logits/chosen": -1.6446380615234375, "logits/rejected": -1.8471620082855225, "logps/chosen": -323.9928283691406, "logps/rejected": -194.76776123046875, "loss": 0.5308, "rewards/accuracies": 0.75, "rewards/chosen": -1.0138981342315674, "rewards/margins": 1.8179337978363037, "rewards/rejected": -2.831831932067871, "step": 3467 }, { "epoch": 0.4, "learning_rate": 1.8169363410889335e-07, "logits/chosen": -2.436187505722046, "logits/rejected": -2.117535352706909, "logps/chosen": -215.30230712890625, "logps/rejected": -349.6695251464844, "loss": 0.5181, "rewards/accuracies": 0.75, "rewards/chosen": -1.7542338371276855, "rewards/margins": 1.3095452785491943, "rewards/rejected": -3.063779354095459, "step": 3468 }, { "epoch": 0.4, "learning_rate": 1.8165820243297507e-07, "logits/chosen": -2.23575496673584, "logits/rejected": -2.237022638320923, "logps/chosen": -209.94631958007812, "logps/rejected": -187.55474853515625, "loss": 0.5578, "rewards/accuracies": 0.875, "rewards/chosen": -1.0012305974960327, "rewards/margins": 1.7419180870056152, "rewards/rejected": -2.7431485652923584, "step": 3469 }, { "epoch": 0.4, "learning_rate": 1.816227707570568e-07, "logits/chosen": -2.0229835510253906, "logits/rejected": -2.3649144172668457, "logps/chosen": -256.272216796875, "logps/rejected": -245.07492065429688, "loss": 0.4463, "rewards/accuracies": 0.875, "rewards/chosen": -0.6389966011047363, "rewards/margins": 1.7969733476638794, "rewards/rejected": -2.435969829559326, "step": 3470 }, { "epoch": 0.4, "learning_rate": 1.815873390811385e-07, "logits/chosen": -2.314714193344116, "logits/rejected": -2.226654529571533, "logps/chosen": -143.49826049804688, "logps/rejected": -255.7918243408203, "loss": 0.3637, "rewards/accuracies": 0.875, "rewards/chosen": -0.21251842379570007, "rewards/margins": 1.3284201622009277, "rewards/rejected": -1.5409386157989502, "step": 3471 }, { "epoch": 0.4, "learning_rate": 1.8155190740522026e-07, "logits/chosen": -2.4801931381225586, "logits/rejected": -2.494591236114502, "logps/chosen": -287.0316162109375, "logps/rejected": -273.40869140625, "loss": 0.2804, "rewards/accuracies": 0.875, "rewards/chosen": -0.858190655708313, "rewards/margins": 1.8553781509399414, "rewards/rejected": -2.713568687438965, "step": 3472 }, { "epoch": 0.4, "learning_rate": 1.8151647572930198e-07, "logits/chosen": -2.9624462127685547, "logits/rejected": -2.8155722618103027, "logps/chosen": -196.74366760253906, "logps/rejected": -230.84494018554688, "loss": 0.4332, "rewards/accuracies": 0.875, "rewards/chosen": -1.003164291381836, "rewards/margins": 1.9147225618362427, "rewards/rejected": -2.917886734008789, "step": 3473 }, { "epoch": 0.4, "learning_rate": 1.814810440533837e-07, "logits/chosen": -2.722914695739746, "logits/rejected": -2.4370079040527344, "logps/chosen": -241.6461181640625, "logps/rejected": -175.7159423828125, "loss": 0.1466, "rewards/accuracies": 1.0, "rewards/chosen": -0.03708430379629135, "rewards/margins": 2.8979039192199707, "rewards/rejected": -2.934988498687744, "step": 3474 }, { "epoch": 0.4, "learning_rate": 1.8144561237746545e-07, "logits/chosen": -1.7048097848892212, "logits/rejected": -1.9716781377792358, "logps/chosen": -178.8767852783203, "logps/rejected": -158.35025024414062, "loss": 0.3327, "rewards/accuracies": 0.875, "rewards/chosen": -0.5342893600463867, "rewards/margins": 1.9375873804092407, "rewards/rejected": -2.471876859664917, "step": 3475 }, { "epoch": 0.4, "learning_rate": 1.8141018070154718e-07, "logits/chosen": -2.469611167907715, "logits/rejected": -2.6071360111236572, "logps/chosen": -411.48944091796875, "logps/rejected": -358.63494873046875, "loss": 0.408, "rewards/accuracies": 0.875, "rewards/chosen": -0.9457463026046753, "rewards/margins": 0.9883435368537903, "rewards/rejected": -1.9340897798538208, "step": 3476 }, { "epoch": 0.4, "learning_rate": 1.8137474902562892e-07, "logits/chosen": -2.190236806869507, "logits/rejected": -2.340275764465332, "logps/chosen": -371.406982421875, "logps/rejected": -312.7951965332031, "loss": 1.0041, "rewards/accuracies": 0.5, "rewards/chosen": -1.7384518384933472, "rewards/margins": 1.2432079315185547, "rewards/rejected": -2.9816598892211914, "step": 3477 }, { "epoch": 0.4, "learning_rate": 1.8133931734971065e-07, "logits/chosen": -2.0100231170654297, "logits/rejected": -2.238492727279663, "logps/chosen": -540.544677734375, "logps/rejected": -260.81964111328125, "loss": 0.1303, "rewards/accuracies": 1.0, "rewards/chosen": -1.0537341833114624, "rewards/margins": 2.8624868392944336, "rewards/rejected": -3.9162209033966064, "step": 3478 }, { "epoch": 0.4, "learning_rate": 1.8130388567379237e-07, "logits/chosen": -2.8566157817840576, "logits/rejected": -2.6121459007263184, "logps/chosen": -286.8302917480469, "logps/rejected": -219.6306915283203, "loss": 0.5166, "rewards/accuracies": 0.875, "rewards/chosen": -1.4558669328689575, "rewards/margins": 3.2675795555114746, "rewards/rejected": -4.723446369171143, "step": 3479 }, { "epoch": 0.4, "learning_rate": 1.812684539978741e-07, "logits/chosen": -2.5391106605529785, "logits/rejected": -2.3765158653259277, "logps/chosen": -248.482177734375, "logps/rejected": -293.71917724609375, "loss": 0.1909, "rewards/accuracies": 1.0, "rewards/chosen": -0.5479005575180054, "rewards/margins": 2.992936372756958, "rewards/rejected": -3.540836811065674, "step": 3480 }, { "epoch": 0.4, "learning_rate": 1.812330223219558e-07, "logits/chosen": -2.289834499359131, "logits/rejected": -2.6613874435424805, "logps/chosen": -453.4770202636719, "logps/rejected": -229.69097900390625, "loss": 0.4775, "rewards/accuracies": 0.75, "rewards/chosen": -0.9666922092437744, "rewards/margins": 1.94059419631958, "rewards/rejected": -2.9072864055633545, "step": 3481 }, { "epoch": 0.41, "learning_rate": 1.8119759064603754e-07, "logits/chosen": -1.8712096214294434, "logits/rejected": -1.8772907257080078, "logps/chosen": -374.19696044921875, "logps/rejected": -354.5101013183594, "loss": 0.4168, "rewards/accuracies": 0.875, "rewards/chosen": -0.7251015305519104, "rewards/margins": 2.4606475830078125, "rewards/rejected": -3.185749053955078, "step": 3482 }, { "epoch": 0.41, "learning_rate": 1.8116215897011928e-07, "logits/chosen": -2.292731285095215, "logits/rejected": -2.1961114406585693, "logps/chosen": -315.6981506347656, "logps/rejected": -263.0450134277344, "loss": 0.3967, "rewards/accuracies": 0.625, "rewards/chosen": -1.0341318845748901, "rewards/margins": 1.7900776863098145, "rewards/rejected": -2.824209690093994, "step": 3483 }, { "epoch": 0.41, "learning_rate": 1.81126727294201e-07, "logits/chosen": -2.0701613426208496, "logits/rejected": -2.54555344581604, "logps/chosen": -369.0523681640625, "logps/rejected": -306.4394836425781, "loss": 0.5889, "rewards/accuracies": 0.625, "rewards/chosen": -0.5256201028823853, "rewards/margins": 1.6960835456848145, "rewards/rejected": -2.2217037677764893, "step": 3484 }, { "epoch": 0.41, "learning_rate": 1.8109129561828273e-07, "logits/chosen": -1.6034080982208252, "logits/rejected": -1.8846237659454346, "logps/chosen": -285.5194396972656, "logps/rejected": -279.722412109375, "loss": 0.6802, "rewards/accuracies": 0.75, "rewards/chosen": -2.2692179679870605, "rewards/margins": 1.1913917064666748, "rewards/rejected": -3.4606096744537354, "step": 3485 }, { "epoch": 0.41, "learning_rate": 1.8105586394236445e-07, "logits/chosen": -1.8610413074493408, "logits/rejected": -1.8935245275497437, "logps/chosen": -180.952392578125, "logps/rejected": -164.76702880859375, "loss": 0.3643, "rewards/accuracies": 0.75, "rewards/chosen": -0.8129696249961853, "rewards/margins": 1.7661027908325195, "rewards/rejected": -2.5790724754333496, "step": 3486 }, { "epoch": 0.41, "learning_rate": 1.810204322664462e-07, "logits/chosen": -2.23494291305542, "logits/rejected": -2.0021286010742188, "logps/chosen": -220.959716796875, "logps/rejected": -366.25482177734375, "loss": 0.1153, "rewards/accuracies": 1.0, "rewards/chosen": -0.27513551712036133, "rewards/margins": 2.357337474822998, "rewards/rejected": -2.6324732303619385, "step": 3487 }, { "epoch": 0.41, "learning_rate": 1.8098500059052795e-07, "logits/chosen": -2.4572815895080566, "logits/rejected": -2.74783992767334, "logps/chosen": -450.77935791015625, "logps/rejected": -287.99066162109375, "loss": 0.932, "rewards/accuracies": 0.75, "rewards/chosen": -2.7419416904449463, "rewards/margins": 1.1591975688934326, "rewards/rejected": -3.9011390209198, "step": 3488 }, { "epoch": 0.41, "learning_rate": 1.8094956891460967e-07, "logits/chosen": -2.1882410049438477, "logits/rejected": -2.083984851837158, "logps/chosen": -210.57313537597656, "logps/rejected": -311.7477722167969, "loss": 0.6179, "rewards/accuracies": 0.625, "rewards/chosen": -1.1949985027313232, "rewards/margins": 1.1025619506835938, "rewards/rejected": -2.297560691833496, "step": 3489 }, { "epoch": 0.41, "learning_rate": 1.809141372386914e-07, "logits/chosen": -2.760089874267578, "logits/rejected": -2.685616970062256, "logps/chosen": -164.60475158691406, "logps/rejected": -216.81430053710938, "loss": 0.2386, "rewards/accuracies": 1.0, "rewards/chosen": -0.5426988005638123, "rewards/margins": 2.8257641792297363, "rewards/rejected": -3.3684630393981934, "step": 3490 }, { "epoch": 0.41, "learning_rate": 1.8087870556277311e-07, "logits/chosen": -2.046090841293335, "logits/rejected": -1.7054047584533691, "logps/chosen": -238.82742309570312, "logps/rejected": -287.22332763671875, "loss": 0.862, "rewards/accuracies": 0.625, "rewards/chosen": -1.7993741035461426, "rewards/margins": -0.12083463370800018, "rewards/rejected": -1.678539514541626, "step": 3491 }, { "epoch": 0.41, "learning_rate": 1.8084327388685484e-07, "logits/chosen": -2.0527124404907227, "logits/rejected": -1.7574806213378906, "logps/chosen": -231.71376037597656, "logps/rejected": -313.06182861328125, "loss": 1.0016, "rewards/accuracies": 0.5, "rewards/chosen": -1.0811411142349243, "rewards/margins": 1.249955415725708, "rewards/rejected": -2.3310964107513428, "step": 3492 }, { "epoch": 0.41, "learning_rate": 1.8080784221093656e-07, "logits/chosen": -2.3059990406036377, "logits/rejected": -2.4513185024261475, "logps/chosen": -288.9188232421875, "logps/rejected": -256.84271240234375, "loss": 0.4665, "rewards/accuracies": 0.75, "rewards/chosen": -0.2827642858028412, "rewards/margins": 2.0554280281066895, "rewards/rejected": -2.3381922245025635, "step": 3493 }, { "epoch": 0.41, "learning_rate": 1.807724105350183e-07, "logits/chosen": -2.375497817993164, "logits/rejected": -2.4758927822113037, "logps/chosen": -301.5941467285156, "logps/rejected": -253.87278747558594, "loss": 0.2251, "rewards/accuracies": 0.875, "rewards/chosen": -0.983431339263916, "rewards/margins": 2.6547458171844482, "rewards/rejected": -3.6381771564483643, "step": 3494 }, { "epoch": 0.41, "learning_rate": 1.8073697885910003e-07, "logits/chosen": -2.234487533569336, "logits/rejected": -2.447134256362915, "logps/chosen": -375.3169250488281, "logps/rejected": -286.3021240234375, "loss": 0.5735, "rewards/accuracies": 0.625, "rewards/chosen": -0.8082699775695801, "rewards/margins": 1.4392603635787964, "rewards/rejected": -2.247530460357666, "step": 3495 }, { "epoch": 0.41, "learning_rate": 1.8070154718318175e-07, "logits/chosen": -2.3936147689819336, "logits/rejected": -2.562300205230713, "logps/chosen": -483.7294006347656, "logps/rejected": -306.5520324707031, "loss": 0.5889, "rewards/accuracies": 0.625, "rewards/chosen": -1.3517714738845825, "rewards/margins": 1.3163055181503296, "rewards/rejected": -2.668076992034912, "step": 3496 }, { "epoch": 0.41, "learning_rate": 1.8066611550726347e-07, "logits/chosen": -2.871753454208374, "logits/rejected": -2.822007179260254, "logps/chosen": -96.70232391357422, "logps/rejected": -265.482666015625, "loss": 0.0748, "rewards/accuracies": 1.0, "rewards/chosen": -0.031190797686576843, "rewards/margins": 4.913817405700684, "rewards/rejected": -4.945008277893066, "step": 3497 }, { "epoch": 0.41, "learning_rate": 1.806306838313452e-07, "logits/chosen": -1.9708285331726074, "logits/rejected": -2.004542589187622, "logps/chosen": -295.3489685058594, "logps/rejected": -360.9647521972656, "loss": 0.4208, "rewards/accuracies": 0.625, "rewards/chosen": -0.8711240887641907, "rewards/margins": 1.293111801147461, "rewards/rejected": -2.164236068725586, "step": 3498 }, { "epoch": 0.41, "learning_rate": 1.8059525215542697e-07, "logits/chosen": -1.8350584506988525, "logits/rejected": -1.9046859741210938, "logps/chosen": -368.670654296875, "logps/rejected": -369.3427734375, "loss": 0.5639, "rewards/accuracies": 0.75, "rewards/chosen": -1.269379734992981, "rewards/margins": 1.5329030752182007, "rewards/rejected": -2.8022828102111816, "step": 3499 }, { "epoch": 0.41, "learning_rate": 1.805598204795087e-07, "logits/chosen": -2.3209214210510254, "logits/rejected": -2.073077440261841, "logps/chosen": -311.97979736328125, "logps/rejected": -241.4569091796875, "loss": 0.1676, "rewards/accuracies": 1.0, "rewards/chosen": -1.1335914134979248, "rewards/margins": 2.1335089206695557, "rewards/rejected": -3.2671003341674805, "step": 3500 }, { "epoch": 0.41, "learning_rate": 1.8052438880359041e-07, "logits/chosen": -2.7670509815216064, "logits/rejected": -2.384779453277588, "logps/chosen": -229.99908447265625, "logps/rejected": -215.5937957763672, "loss": 0.4046, "rewards/accuracies": 0.875, "rewards/chosen": -0.6581175327301025, "rewards/margins": 1.1227374076843262, "rewards/rejected": -1.7808549404144287, "step": 3501 }, { "epoch": 0.41, "learning_rate": 1.8048895712767214e-07, "logits/chosen": -1.5841097831726074, "logits/rejected": -1.7556402683258057, "logps/chosen": -474.21783447265625, "logps/rejected": -300.3135986328125, "loss": 0.8974, "rewards/accuracies": 0.625, "rewards/chosen": -0.8037099242210388, "rewards/margins": 1.026113748550415, "rewards/rejected": -1.829823613166809, "step": 3502 }, { "epoch": 0.41, "learning_rate": 1.8045352545175386e-07, "logits/chosen": -1.4763684272766113, "logits/rejected": -1.6163674592971802, "logps/chosen": -275.1263122558594, "logps/rejected": -289.859375, "loss": 0.29, "rewards/accuracies": 0.875, "rewards/chosen": -0.3430905342102051, "rewards/margins": 1.8014004230499268, "rewards/rejected": -2.144490957260132, "step": 3503 }, { "epoch": 0.41, "learning_rate": 1.8041809377583558e-07, "logits/chosen": -2.443110942840576, "logits/rejected": -2.2346606254577637, "logps/chosen": -175.27273559570312, "logps/rejected": -355.02685546875, "loss": 0.1441, "rewards/accuracies": 1.0, "rewards/chosen": -0.27173158526420593, "rewards/margins": 3.894028663635254, "rewards/rejected": -4.165760040283203, "step": 3504 }, { "epoch": 0.41, "learning_rate": 1.803826620999173e-07, "logits/chosen": -1.6112265586853027, "logits/rejected": -1.9491150379180908, "logps/chosen": -521.761962890625, "logps/rejected": -339.87762451171875, "loss": 0.3651, "rewards/accuracies": 0.75, "rewards/chosen": -0.6493743658065796, "rewards/margins": 1.5721012353897095, "rewards/rejected": -2.221475601196289, "step": 3505 }, { "epoch": 0.41, "learning_rate": 1.8034723042399905e-07, "logits/chosen": -2.650249481201172, "logits/rejected": -2.5606021881103516, "logps/chosen": -93.33280944824219, "logps/rejected": -166.89125061035156, "loss": 0.3666, "rewards/accuracies": 0.875, "rewards/chosen": -1.5659737586975098, "rewards/margins": 1.8510222434997559, "rewards/rejected": -3.4169960021972656, "step": 3506 }, { "epoch": 0.41, "learning_rate": 1.8031179874808077e-07, "logits/chosen": -2.2135753631591797, "logits/rejected": -2.378681182861328, "logps/chosen": -398.87066650390625, "logps/rejected": -267.5250244140625, "loss": 0.3053, "rewards/accuracies": 0.875, "rewards/chosen": -1.0656906366348267, "rewards/margins": 2.171907901763916, "rewards/rejected": -3.237598180770874, "step": 3507 }, { "epoch": 0.41, "learning_rate": 1.802763670721625e-07, "logits/chosen": -2.9413599967956543, "logits/rejected": -2.9726080894470215, "logps/chosen": -162.19247436523438, "logps/rejected": -218.4949188232422, "loss": 0.5164, "rewards/accuracies": 0.875, "rewards/chosen": -0.9628802537918091, "rewards/margins": 1.8480364084243774, "rewards/rejected": -2.8109166622161865, "step": 3508 }, { "epoch": 0.41, "learning_rate": 1.8024093539624422e-07, "logits/chosen": -2.6523141860961914, "logits/rejected": -2.5905001163482666, "logps/chosen": -301.1302490234375, "logps/rejected": -346.9701232910156, "loss": 0.4735, "rewards/accuracies": 0.625, "rewards/chosen": -0.9804761409759521, "rewards/margins": 1.849817156791687, "rewards/rejected": -2.8302931785583496, "step": 3509 }, { "epoch": 0.41, "learning_rate": 1.80205503720326e-07, "logits/chosen": -2.1097044944763184, "logits/rejected": -2.0307507514953613, "logps/chosen": -324.13385009765625, "logps/rejected": -342.94622802734375, "loss": 0.3942, "rewards/accuracies": 0.75, "rewards/chosen": -0.5603328347206116, "rewards/margins": 1.3314919471740723, "rewards/rejected": -1.8918248414993286, "step": 3510 }, { "epoch": 0.41, "learning_rate": 1.8017007204440772e-07, "logits/chosen": -3.062049150466919, "logits/rejected": -3.036087989807129, "logps/chosen": -294.62030029296875, "logps/rejected": -280.7332763671875, "loss": 0.3501, "rewards/accuracies": 0.75, "rewards/chosen": -0.458784282207489, "rewards/margins": 2.485551357269287, "rewards/rejected": -2.944335460662842, "step": 3511 }, { "epoch": 0.41, "learning_rate": 1.8013464036848944e-07, "logits/chosen": -2.5273399353027344, "logits/rejected": -2.7085394859313965, "logps/chosen": -256.06280517578125, "logps/rejected": -320.10601806640625, "loss": 0.3613, "rewards/accuracies": 0.75, "rewards/chosen": -0.516601026058197, "rewards/margins": 2.4955244064331055, "rewards/rejected": -3.0121254920959473, "step": 3512 }, { "epoch": 0.41, "learning_rate": 1.8009920869257116e-07, "logits/chosen": -2.1126956939697266, "logits/rejected": -1.93386709690094, "logps/chosen": -398.8091735839844, "logps/rejected": -679.89794921875, "loss": 0.1578, "rewards/accuracies": 1.0, "rewards/chosen": -0.3739142417907715, "rewards/margins": 2.4208693504333496, "rewards/rejected": -2.794783592224121, "step": 3513 }, { "epoch": 0.41, "learning_rate": 1.8006377701665288e-07, "logits/chosen": -1.780928373336792, "logits/rejected": -1.9208498001098633, "logps/chosen": -295.7181091308594, "logps/rejected": -297.3642883300781, "loss": 0.3326, "rewards/accuracies": 0.875, "rewards/chosen": -0.5296118855476379, "rewards/margins": 2.3239622116088867, "rewards/rejected": -2.853574275970459, "step": 3514 }, { "epoch": 0.41, "learning_rate": 1.800283453407346e-07, "logits/chosen": -2.2927370071411133, "logits/rejected": -2.2423202991485596, "logps/chosen": -244.54112243652344, "logps/rejected": -206.78167724609375, "loss": 0.5312, "rewards/accuracies": 0.625, "rewards/chosen": -0.6663179397583008, "rewards/margins": 0.9385527968406677, "rewards/rejected": -1.6048707962036133, "step": 3515 }, { "epoch": 0.41, "learning_rate": 1.7999291366481633e-07, "logits/chosen": -2.1340131759643555, "logits/rejected": -1.9963161945343018, "logps/chosen": -292.4080810546875, "logps/rejected": -326.043212890625, "loss": 0.24, "rewards/accuracies": 0.875, "rewards/chosen": -0.6062695980072021, "rewards/margins": 2.322446346282959, "rewards/rejected": -2.928715705871582, "step": 3516 }, { "epoch": 0.41, "learning_rate": 1.7995748198889807e-07, "logits/chosen": -2.1126465797424316, "logits/rejected": -2.2372891902923584, "logps/chosen": -256.61053466796875, "logps/rejected": -286.8475646972656, "loss": 0.3407, "rewards/accuracies": 0.75, "rewards/chosen": -0.41370606422424316, "rewards/margins": 2.21718168258667, "rewards/rejected": -2.630887746810913, "step": 3517 }, { "epoch": 0.41, "learning_rate": 1.799220503129798e-07, "logits/chosen": -1.7795729637145996, "logits/rejected": -1.4197008609771729, "logps/chosen": -255.68893432617188, "logps/rejected": -491.74407958984375, "loss": 0.4916, "rewards/accuracies": 0.75, "rewards/chosen": -1.4601662158966064, "rewards/margins": 1.8375284671783447, "rewards/rejected": -3.297694683074951, "step": 3518 }, { "epoch": 0.41, "learning_rate": 1.7988661863706152e-07, "logits/chosen": -2.5678749084472656, "logits/rejected": -2.712343215942383, "logps/chosen": -325.0626220703125, "logps/rejected": -282.9248046875, "loss": 0.0933, "rewards/accuracies": 1.0, "rewards/chosen": -0.5546925067901611, "rewards/margins": 3.773632526397705, "rewards/rejected": -4.328324794769287, "step": 3519 }, { "epoch": 0.41, "learning_rate": 1.7985118696114324e-07, "logits/chosen": -2.684727907180786, "logits/rejected": -2.7776882648468018, "logps/chosen": -306.3103332519531, "logps/rejected": -250.71902465820312, "loss": 0.1226, "rewards/accuracies": 1.0, "rewards/chosen": -0.16544845700263977, "rewards/margins": 3.3635013103485107, "rewards/rejected": -3.5289502143859863, "step": 3520 }, { "epoch": 0.41, "learning_rate": 1.7981575528522496e-07, "logits/chosen": -2.343658924102783, "logits/rejected": -2.5683722496032715, "logps/chosen": -337.3077392578125, "logps/rejected": -239.2969512939453, "loss": 0.3855, "rewards/accuracies": 0.875, "rewards/chosen": -1.0015336275100708, "rewards/margins": 1.9261853694915771, "rewards/rejected": -2.9277191162109375, "step": 3521 }, { "epoch": 0.41, "learning_rate": 1.7978032360930674e-07, "logits/chosen": -2.182919502258301, "logits/rejected": -2.2048633098602295, "logps/chosen": -460.90325927734375, "logps/rejected": -355.2412109375, "loss": 0.2833, "rewards/accuracies": 0.875, "rewards/chosen": -1.2295035123825073, "rewards/margins": 2.1774675846099854, "rewards/rejected": -3.406970977783203, "step": 3522 }, { "epoch": 0.41, "learning_rate": 1.7974489193338846e-07, "logits/chosen": -1.9187732934951782, "logits/rejected": -1.9722661972045898, "logps/chosen": -400.7926330566406, "logps/rejected": -451.9626770019531, "loss": 0.1581, "rewards/accuracies": 1.0, "rewards/chosen": -0.5209985971450806, "rewards/margins": 2.953122615814209, "rewards/rejected": -3.47412109375, "step": 3523 }, { "epoch": 0.41, "learning_rate": 1.7970946025747018e-07, "logits/chosen": -2.0372047424316406, "logits/rejected": -1.7956178188323975, "logps/chosen": -256.33807373046875, "logps/rejected": -227.98388671875, "loss": 0.841, "rewards/accuracies": 0.75, "rewards/chosen": -0.6912475824356079, "rewards/margins": 1.1883018016815186, "rewards/rejected": -1.8795496225357056, "step": 3524 }, { "epoch": 0.41, "learning_rate": 1.796740285815519e-07, "logits/chosen": -2.0953521728515625, "logits/rejected": -2.1761341094970703, "logps/chosen": -265.03973388671875, "logps/rejected": -220.97825622558594, "loss": 0.6842, "rewards/accuracies": 0.625, "rewards/chosen": -1.3149622678756714, "rewards/margins": 0.9173247218132019, "rewards/rejected": -2.2322869300842285, "step": 3525 }, { "epoch": 0.41, "learning_rate": 1.7963859690563363e-07, "logits/chosen": -1.6164772510528564, "logits/rejected": -1.7761706113815308, "logps/chosen": -340.1525573730469, "logps/rejected": -401.46380615234375, "loss": 0.6148, "rewards/accuracies": 0.625, "rewards/chosen": -1.1540478467941284, "rewards/margins": 1.143075942993164, "rewards/rejected": -2.297123908996582, "step": 3526 }, { "epoch": 0.41, "learning_rate": 1.7960316522971535e-07, "logits/chosen": -1.5704371929168701, "logits/rejected": -2.412461757659912, "logps/chosen": -430.100830078125, "logps/rejected": -238.9053192138672, "loss": 0.8264, "rewards/accuracies": 0.625, "rewards/chosen": -1.6638660430908203, "rewards/margins": 1.5185625553131104, "rewards/rejected": -3.1824283599853516, "step": 3527 }, { "epoch": 0.41, "learning_rate": 1.795677335537971e-07, "logits/chosen": -2.345571994781494, "logits/rejected": -2.446469783782959, "logps/chosen": -253.28292846679688, "logps/rejected": -382.1732177734375, "loss": 0.8381, "rewards/accuracies": 0.625, "rewards/chosen": -1.7856773138046265, "rewards/margins": 2.4898681640625, "rewards/rejected": -4.275545597076416, "step": 3528 }, { "epoch": 0.41, "learning_rate": 1.7953230187787882e-07, "logits/chosen": -1.757242202758789, "logits/rejected": -1.9060916900634766, "logps/chosen": -436.24365234375, "logps/rejected": -448.3809814453125, "loss": 0.3436, "rewards/accuracies": 0.875, "rewards/chosen": -0.5778489112854004, "rewards/margins": 1.7005529403686523, "rewards/rejected": -2.2784018516540527, "step": 3529 }, { "epoch": 0.41, "learning_rate": 1.7949687020196054e-07, "logits/chosen": -2.4470512866973877, "logits/rejected": -2.5518784523010254, "logps/chosen": -501.38458251953125, "logps/rejected": -380.09503173828125, "loss": 0.2604, "rewards/accuracies": 0.875, "rewards/chosen": -1.362999439239502, "rewards/margins": 3.1923904418945312, "rewards/rejected": -4.555389881134033, "step": 3530 }, { "epoch": 0.41, "learning_rate": 1.7946143852604226e-07, "logits/chosen": -2.3042213916778564, "logits/rejected": -2.732530355453491, "logps/chosen": -238.48646545410156, "logps/rejected": -145.545166015625, "loss": 0.3691, "rewards/accuracies": 0.875, "rewards/chosen": -0.6837102174758911, "rewards/margins": 1.9494374990463257, "rewards/rejected": -2.633147716522217, "step": 3531 }, { "epoch": 0.41, "learning_rate": 1.7942600685012399e-07, "logits/chosen": -2.0871894359588623, "logits/rejected": -1.7665197849273682, "logps/chosen": -252.0256805419922, "logps/rejected": -283.9460144042969, "loss": 0.9837, "rewards/accuracies": 0.625, "rewards/chosen": -1.7659492492675781, "rewards/margins": 0.19532887637615204, "rewards/rejected": -1.961277961730957, "step": 3532 }, { "epoch": 0.41, "learning_rate": 1.793905751742057e-07, "logits/chosen": -2.557657241821289, "logits/rejected": -2.61757755279541, "logps/chosen": -335.87109375, "logps/rejected": -241.59959411621094, "loss": 1.2604, "rewards/accuracies": 0.375, "rewards/chosen": -2.322140693664551, "rewards/margins": 0.5192098617553711, "rewards/rejected": -2.8413500785827637, "step": 3533 }, { "epoch": 0.41, "learning_rate": 1.7935514349828748e-07, "logits/chosen": -1.8706696033477783, "logits/rejected": -1.7128409147262573, "logps/chosen": -422.0047912597656, "logps/rejected": -315.1707458496094, "loss": 0.4431, "rewards/accuracies": 0.75, "rewards/chosen": -1.0879558324813843, "rewards/margins": 0.8794743418693542, "rewards/rejected": -1.9674302339553833, "step": 3534 }, { "epoch": 0.41, "learning_rate": 1.793197118223692e-07, "logits/chosen": -2.341827154159546, "logits/rejected": -2.3963005542755127, "logps/chosen": -353.41265869140625, "logps/rejected": -372.6905517578125, "loss": 0.5782, "rewards/accuracies": 0.75, "rewards/chosen": -1.9014909267425537, "rewards/margins": 1.74837327003479, "rewards/rejected": -3.6498639583587646, "step": 3535 }, { "epoch": 0.41, "learning_rate": 1.7928428014645093e-07, "logits/chosen": -2.5787148475646973, "logits/rejected": -2.742330312728882, "logps/chosen": -245.3394012451172, "logps/rejected": -267.8628234863281, "loss": 0.3825, "rewards/accuracies": 0.875, "rewards/chosen": -1.1837018728256226, "rewards/margins": 2.1545286178588867, "rewards/rejected": -3.3382303714752197, "step": 3536 }, { "epoch": 0.41, "learning_rate": 1.7924884847053265e-07, "logits/chosen": -2.4647176265716553, "logits/rejected": -2.455451488494873, "logps/chosen": -295.1525573730469, "logps/rejected": -391.12188720703125, "loss": 0.4218, "rewards/accuracies": 0.875, "rewards/chosen": -1.8545994758605957, "rewards/margins": 0.995447039604187, "rewards/rejected": -2.850046157836914, "step": 3537 }, { "epoch": 0.41, "learning_rate": 1.7921341679461437e-07, "logits/chosen": -1.7004281282424927, "logits/rejected": -1.9507696628570557, "logps/chosen": -494.37646484375, "logps/rejected": -227.14193725585938, "loss": 0.4808, "rewards/accuracies": 0.625, "rewards/chosen": -1.1681797504425049, "rewards/margins": 0.7754212617874146, "rewards/rejected": -1.9436010122299194, "step": 3538 }, { "epoch": 0.41, "learning_rate": 1.7917798511869612e-07, "logits/chosen": -1.7739512920379639, "logits/rejected": -1.9718095064163208, "logps/chosen": -437.89849853515625, "logps/rejected": -357.6981201171875, "loss": 0.5267, "rewards/accuracies": 0.75, "rewards/chosen": -1.0080795288085938, "rewards/margins": 0.7793274521827698, "rewards/rejected": -1.7874071598052979, "step": 3539 }, { "epoch": 0.41, "learning_rate": 1.7914255344277784e-07, "logits/chosen": -2.911754608154297, "logits/rejected": -2.804948568344116, "logps/chosen": -157.03355407714844, "logps/rejected": -169.161865234375, "loss": 0.2739, "rewards/accuracies": 0.875, "rewards/chosen": -0.6699576377868652, "rewards/margins": 2.372821807861328, "rewards/rejected": -3.0427799224853516, "step": 3540 }, { "epoch": 0.41, "learning_rate": 1.7910712176685956e-07, "logits/chosen": -2.305417537689209, "logits/rejected": -2.5289762020111084, "logps/chosen": -529.061279296875, "logps/rejected": -326.00439453125, "loss": 0.2355, "rewards/accuracies": 0.875, "rewards/chosen": -0.999625563621521, "rewards/margins": 2.4277429580688477, "rewards/rejected": -3.427368640899658, "step": 3541 }, { "epoch": 0.41, "learning_rate": 1.7907169009094129e-07, "logits/chosen": -2.047551393508911, "logits/rejected": -2.28385329246521, "logps/chosen": -385.5389709472656, "logps/rejected": -283.9618225097656, "loss": 0.3993, "rewards/accuracies": 0.625, "rewards/chosen": -0.022163957357406616, "rewards/margins": 1.9295943975448608, "rewards/rejected": -1.9517585039138794, "step": 3542 }, { "epoch": 0.41, "learning_rate": 1.79036258415023e-07, "logits/chosen": -2.390101671218872, "logits/rejected": -2.5970568656921387, "logps/chosen": -252.03076171875, "logps/rejected": -175.04025268554688, "loss": 0.644, "rewards/accuracies": 0.625, "rewards/chosen": -0.897097110748291, "rewards/margins": 0.5689926147460938, "rewards/rejected": -1.4660897254943848, "step": 3543 }, { "epoch": 0.41, "learning_rate": 1.7900082673910473e-07, "logits/chosen": -2.293724536895752, "logits/rejected": -2.166513442993164, "logps/chosen": -289.19854736328125, "logps/rejected": -250.8894805908203, "loss": 0.4317, "rewards/accuracies": 0.75, "rewards/chosen": -0.5054531097412109, "rewards/margins": 1.270387887954712, "rewards/rejected": -1.7758409976959229, "step": 3544 }, { "epoch": 0.41, "learning_rate": 1.789653950631865e-07, "logits/chosen": -2.3611490726470947, "logits/rejected": -2.4121742248535156, "logps/chosen": -354.8812255859375, "logps/rejected": -219.28604125976562, "loss": 0.1796, "rewards/accuracies": 0.875, "rewards/chosen": -0.8508141040802002, "rewards/margins": 2.677259683609009, "rewards/rejected": -3.528073787689209, "step": 3545 }, { "epoch": 0.41, "learning_rate": 1.7892996338726823e-07, "logits/chosen": -2.5104284286499023, "logits/rejected": -2.669137954711914, "logps/chosen": -206.39553833007812, "logps/rejected": -268.5229797363281, "loss": 0.303, "rewards/accuracies": 0.875, "rewards/chosen": -1.100898265838623, "rewards/margins": 2.9684388637542725, "rewards/rejected": -4.069336891174316, "step": 3546 }, { "epoch": 0.41, "learning_rate": 1.7889453171134995e-07, "logits/chosen": -2.480896234512329, "logits/rejected": -2.676133155822754, "logps/chosen": -329.6253356933594, "logps/rejected": -279.0306091308594, "loss": 0.1207, "rewards/accuracies": 1.0, "rewards/chosen": -0.6654515266418457, "rewards/margins": 3.365018367767334, "rewards/rejected": -4.03046989440918, "step": 3547 }, { "epoch": 0.41, "learning_rate": 1.7885910003543167e-07, "logits/chosen": -2.8447577953338623, "logits/rejected": -2.5669243335723877, "logps/chosen": -155.79615783691406, "logps/rejected": -218.80670166015625, "loss": 0.3051, "rewards/accuracies": 0.875, "rewards/chosen": -1.4439754486083984, "rewards/margins": 2.430471897125244, "rewards/rejected": -3.8744473457336426, "step": 3548 }, { "epoch": 0.41, "learning_rate": 1.788236683595134e-07, "logits/chosen": -2.5551083087921143, "logits/rejected": -2.35359525680542, "logps/chosen": -274.81097412109375, "logps/rejected": -388.0503845214844, "loss": 0.286, "rewards/accuracies": 0.875, "rewards/chosen": -0.6405341625213623, "rewards/margins": 2.932678699493408, "rewards/rejected": -3.5732128620147705, "step": 3549 }, { "epoch": 0.41, "learning_rate": 1.7878823668359512e-07, "logits/chosen": -1.6210076808929443, "logits/rejected": -1.4251939058303833, "logps/chosen": -289.1561279296875, "logps/rejected": -381.55828857421875, "loss": 0.682, "rewards/accuracies": 0.625, "rewards/chosen": -1.0639681816101074, "rewards/margins": 1.4740254878997803, "rewards/rejected": -2.537993907928467, "step": 3550 }, { "epoch": 0.41, "learning_rate": 1.7875280500767686e-07, "logits/chosen": -2.367678165435791, "logits/rejected": -2.5169663429260254, "logps/chosen": -225.14309692382812, "logps/rejected": -190.63180541992188, "loss": 0.4571, "rewards/accuracies": 0.75, "rewards/chosen": -0.6098225116729736, "rewards/margins": 1.59074068069458, "rewards/rejected": -2.2005631923675537, "step": 3551 }, { "epoch": 0.41, "learning_rate": 1.787173733317586e-07, "logits/chosen": -2.5995543003082275, "logits/rejected": -2.5345382690429688, "logps/chosen": -90.4053955078125, "logps/rejected": -151.53567504882812, "loss": 1.2431, "rewards/accuracies": 0.75, "rewards/chosen": -2.2667312622070312, "rewards/margins": -0.011912524700164795, "rewards/rejected": -2.2548186779022217, "step": 3552 }, { "epoch": 0.41, "learning_rate": 1.786819416558403e-07, "logits/chosen": -2.086848258972168, "logits/rejected": -2.436957597732544, "logps/chosen": -279.67852783203125, "logps/rejected": -192.96359252929688, "loss": 0.1879, "rewards/accuracies": 1.0, "rewards/chosen": -0.26876595616340637, "rewards/margins": 3.0440266132354736, "rewards/rejected": -3.3127923011779785, "step": 3553 }, { "epoch": 0.41, "learning_rate": 1.7864650997992203e-07, "logits/chosen": -2.781536340713501, "logits/rejected": -2.6414291858673096, "logps/chosen": -183.00436401367188, "logps/rejected": -178.260009765625, "loss": 0.3951, "rewards/accuracies": 0.75, "rewards/chosen": -1.7767140865325928, "rewards/margins": 1.3530120849609375, "rewards/rejected": -3.1297261714935303, "step": 3554 }, { "epoch": 0.41, "learning_rate": 1.7861107830400375e-07, "logits/chosen": -2.7459869384765625, "logits/rejected": -2.7292261123657227, "logps/chosen": -306.73883056640625, "logps/rejected": -355.31353759765625, "loss": 0.4011, "rewards/accuracies": 0.75, "rewards/chosen": -0.7559719085693359, "rewards/margins": 3.492588996887207, "rewards/rejected": -4.248560905456543, "step": 3555 }, { "epoch": 0.41, "learning_rate": 1.7857564662808548e-07, "logits/chosen": -2.2535552978515625, "logits/rejected": -2.2723939418792725, "logps/chosen": -247.45884704589844, "logps/rejected": -276.532958984375, "loss": 0.4472, "rewards/accuracies": 0.75, "rewards/chosen": -1.026872158050537, "rewards/margins": 2.3950953483581543, "rewards/rejected": -3.4219675064086914, "step": 3556 }, { "epoch": 0.41, "learning_rate": 1.7854021495216725e-07, "logits/chosen": -2.839066982269287, "logits/rejected": -2.7557475566864014, "logps/chosen": -110.80368041992188, "logps/rejected": -173.53857421875, "loss": 0.3137, "rewards/accuracies": 0.875, "rewards/chosen": -0.4238817095756531, "rewards/margins": 2.6966474056243896, "rewards/rejected": -3.1205291748046875, "step": 3557 }, { "epoch": 0.41, "learning_rate": 1.7850478327624897e-07, "logits/chosen": -2.57080078125, "logits/rejected": -2.656045913696289, "logps/chosen": -177.0863494873047, "logps/rejected": -321.3726501464844, "loss": 0.3506, "rewards/accuracies": 0.75, "rewards/chosen": -1.4827063083648682, "rewards/margins": 2.9566259384155273, "rewards/rejected": -4.439332008361816, "step": 3558 }, { "epoch": 0.41, "learning_rate": 1.784693516003307e-07, "logits/chosen": -1.5809412002563477, "logits/rejected": -2.154513359069824, "logps/chosen": -367.22540283203125, "logps/rejected": -354.9560546875, "loss": 0.2447, "rewards/accuracies": 0.875, "rewards/chosen": -0.758314311504364, "rewards/margins": 3.441124200820923, "rewards/rejected": -4.19943904876709, "step": 3559 }, { "epoch": 0.41, "learning_rate": 1.7843391992441242e-07, "logits/chosen": -2.4254488945007324, "logits/rejected": -2.488272190093994, "logps/chosen": -233.5460205078125, "logps/rejected": -254.53948974609375, "loss": 0.5147, "rewards/accuracies": 0.625, "rewards/chosen": -0.7740948796272278, "rewards/margins": 1.2447701692581177, "rewards/rejected": -2.0188651084899902, "step": 3560 }, { "epoch": 0.41, "learning_rate": 1.7839848824849414e-07, "logits/chosen": -1.762966275215149, "logits/rejected": -1.8379013538360596, "logps/chosen": -369.89849853515625, "logps/rejected": -246.66749572753906, "loss": 0.6498, "rewards/accuracies": 0.625, "rewards/chosen": -0.5052419900894165, "rewards/margins": 0.4532448947429657, "rewards/rejected": -0.9584868550300598, "step": 3561 }, { "epoch": 0.41, "learning_rate": 1.783630565725759e-07, "logits/chosen": -2.361143112182617, "logits/rejected": -2.192044496536255, "logps/chosen": -293.4867858886719, "logps/rejected": -277.02056884765625, "loss": 0.6629, "rewards/accuracies": 0.875, "rewards/chosen": -1.1997603178024292, "rewards/margins": 3.244347095489502, "rewards/rejected": -4.4441070556640625, "step": 3562 }, { "epoch": 0.41, "learning_rate": 1.783276248966576e-07, "logits/chosen": -2.2934041023254395, "logits/rejected": -2.162379026412964, "logps/chosen": -462.61248779296875, "logps/rejected": -396.71942138671875, "loss": 0.1872, "rewards/accuracies": 0.875, "rewards/chosen": -1.041495442390442, "rewards/margins": 3.1131086349487305, "rewards/rejected": -4.154603958129883, "step": 3563 }, { "epoch": 0.41, "learning_rate": 1.7829219322073933e-07, "logits/chosen": -2.6462767124176025, "logits/rejected": -2.7737021446228027, "logps/chosen": -317.071533203125, "logps/rejected": -165.41400146484375, "loss": 0.2775, "rewards/accuracies": 0.875, "rewards/chosen": -0.1857931911945343, "rewards/margins": 2.4301185607910156, "rewards/rejected": -2.6159117221832275, "step": 3564 }, { "epoch": 0.41, "learning_rate": 1.7825676154482105e-07, "logits/chosen": -2.790363073348999, "logits/rejected": -2.722480297088623, "logps/chosen": -193.44439697265625, "logps/rejected": -508.78594970703125, "loss": 0.7189, "rewards/accuracies": 0.75, "rewards/chosen": -1.300142526626587, "rewards/margins": 1.875503420829773, "rewards/rejected": -3.1756460666656494, "step": 3565 }, { "epoch": 0.41, "learning_rate": 1.7822132986890278e-07, "logits/chosen": -2.493271827697754, "logits/rejected": -2.254833221435547, "logps/chosen": -310.09539794921875, "logps/rejected": -253.7480926513672, "loss": 0.2471, "rewards/accuracies": 0.875, "rewards/chosen": -1.2590174674987793, "rewards/margins": 1.9139039516448975, "rewards/rejected": -3.172921657562256, "step": 3566 }, { "epoch": 0.41, "learning_rate": 1.781858981929845e-07, "logits/chosen": -1.979998230934143, "logits/rejected": -1.9005886316299438, "logps/chosen": -170.67770385742188, "logps/rejected": -251.8497314453125, "loss": 0.3279, "rewards/accuracies": 0.75, "rewards/chosen": -0.5751266479492188, "rewards/margins": 3.0666005611419678, "rewards/rejected": -3.6417269706726074, "step": 3567 }, { "epoch": 0.42, "learning_rate": 1.7815046651706625e-07, "logits/chosen": -2.5845284461975098, "logits/rejected": -2.626157760620117, "logps/chosen": -179.29840087890625, "logps/rejected": -229.39495849609375, "loss": 0.2864, "rewards/accuracies": 0.75, "rewards/chosen": -1.083378553390503, "rewards/margins": 4.380307197570801, "rewards/rejected": -5.463685512542725, "step": 3568 }, { "epoch": 0.42, "learning_rate": 1.78115034841148e-07, "logits/chosen": -2.1874494552612305, "logits/rejected": -2.142787456512451, "logps/chosen": -426.765625, "logps/rejected": -378.71051025390625, "loss": 0.1005, "rewards/accuracies": 1.0, "rewards/chosen": -0.21815919876098633, "rewards/margins": 3.244081497192383, "rewards/rejected": -3.462240695953369, "step": 3569 }, { "epoch": 0.42, "learning_rate": 1.7807960316522972e-07, "logits/chosen": -2.0221614837646484, "logits/rejected": -1.90340256690979, "logps/chosen": -316.963623046875, "logps/rejected": -304.0686950683594, "loss": 0.4104, "rewards/accuracies": 0.875, "rewards/chosen": -0.34536200761795044, "rewards/margins": 1.8000059127807617, "rewards/rejected": -2.1453678607940674, "step": 3570 }, { "epoch": 0.42, "learning_rate": 1.7804417148931144e-07, "logits/chosen": -2.0624890327453613, "logits/rejected": -2.423828125, "logps/chosen": -468.12786865234375, "logps/rejected": -317.5612487792969, "loss": 0.4791, "rewards/accuracies": 0.75, "rewards/chosen": -0.40505450963974, "rewards/margins": 2.180227041244507, "rewards/rejected": -2.5852818489074707, "step": 3571 }, { "epoch": 0.42, "learning_rate": 1.7800873981339316e-07, "logits/chosen": -2.6220409870147705, "logits/rejected": -2.660689115524292, "logps/chosen": -244.15060424804688, "logps/rejected": -222.98370361328125, "loss": 0.5038, "rewards/accuracies": 0.75, "rewards/chosen": -0.73484867811203, "rewards/margins": 1.7305066585540771, "rewards/rejected": -2.465355157852173, "step": 3572 }, { "epoch": 0.42, "learning_rate": 1.779733081374749e-07, "logits/chosen": -2.7767302989959717, "logits/rejected": -2.654404401779175, "logps/chosen": -159.46878051757812, "logps/rejected": -207.09323120117188, "loss": 0.4145, "rewards/accuracies": 0.875, "rewards/chosen": -0.4634143114089966, "rewards/margins": 3.207425594329834, "rewards/rejected": -3.670839786529541, "step": 3573 }, { "epoch": 0.42, "learning_rate": 1.7793787646155663e-07, "logits/chosen": -2.261578321456909, "logits/rejected": -2.408339262008667, "logps/chosen": -311.24273681640625, "logps/rejected": -213.4822540283203, "loss": 0.4469, "rewards/accuracies": 0.75, "rewards/chosen": -1.9702577590942383, "rewards/margins": 1.3133666515350342, "rewards/rejected": -3.2836246490478516, "step": 3574 }, { "epoch": 0.42, "learning_rate": 1.7790244478563835e-07, "logits/chosen": -2.3036224842071533, "logits/rejected": -2.3730084896087646, "logps/chosen": -290.65936279296875, "logps/rejected": -193.30018615722656, "loss": 0.3107, "rewards/accuracies": 0.75, "rewards/chosen": -0.6184695959091187, "rewards/margins": 2.126649856567383, "rewards/rejected": -2.745119333267212, "step": 3575 }, { "epoch": 0.42, "learning_rate": 1.7786701310972008e-07, "logits/chosen": -2.6591644287109375, "logits/rejected": -2.596353530883789, "logps/chosen": -357.39239501953125, "logps/rejected": -297.5425720214844, "loss": 1.1002, "rewards/accuracies": 0.75, "rewards/chosen": -1.900606632232666, "rewards/margins": 1.0658588409423828, "rewards/rejected": -2.966465473175049, "step": 3576 }, { "epoch": 0.42, "learning_rate": 1.778315814338018e-07, "logits/chosen": -2.6177947521209717, "logits/rejected": -2.5440125465393066, "logps/chosen": -627.4856567382812, "logps/rejected": -329.6288757324219, "loss": 0.1689, "rewards/accuracies": 0.875, "rewards/chosen": -0.8175778388977051, "rewards/margins": 3.001054048538208, "rewards/rejected": -3.818632125854492, "step": 3577 }, { "epoch": 0.42, "learning_rate": 1.7779614975788352e-07, "logits/chosen": -2.025712490081787, "logits/rejected": -2.3920509815216064, "logps/chosen": -350.3843994140625, "logps/rejected": -237.90760803222656, "loss": 0.2389, "rewards/accuracies": 1.0, "rewards/chosen": -0.4798944294452667, "rewards/margins": 2.5500729084014893, "rewards/rejected": -3.0299673080444336, "step": 3578 }, { "epoch": 0.42, "learning_rate": 1.7776071808196524e-07, "logits/chosen": -2.768082857131958, "logits/rejected": -2.6145167350769043, "logps/chosen": -87.90728759765625, "logps/rejected": -210.63143920898438, "loss": 0.6045, "rewards/accuracies": 0.625, "rewards/chosen": -1.2232578992843628, "rewards/margins": 2.6498827934265137, "rewards/rejected": -3.873140335083008, "step": 3579 }, { "epoch": 0.42, "learning_rate": 1.77725286406047e-07, "logits/chosen": -1.7401509284973145, "logits/rejected": -1.832111120223999, "logps/chosen": -368.4840393066406, "logps/rejected": -313.94329833984375, "loss": 1.2187, "rewards/accuracies": 0.5, "rewards/chosen": -3.2665634155273438, "rewards/margins": -0.09929028153419495, "rewards/rejected": -3.1672730445861816, "step": 3580 }, { "epoch": 0.42, "learning_rate": 1.7768985473012874e-07, "logits/chosen": -2.432283401489258, "logits/rejected": -2.452744245529175, "logps/chosen": -257.9860534667969, "logps/rejected": -193.98097229003906, "loss": 0.1479, "rewards/accuracies": 1.0, "rewards/chosen": 0.2247353196144104, "rewards/margins": 2.963430166244507, "rewards/rejected": -2.738694667816162, "step": 3581 }, { "epoch": 0.42, "learning_rate": 1.7765442305421046e-07, "logits/chosen": -1.7374093532562256, "logits/rejected": -1.4071621894836426, "logps/chosen": -240.60130310058594, "logps/rejected": -265.11376953125, "loss": 0.4674, "rewards/accuracies": 0.625, "rewards/chosen": -0.6125986576080322, "rewards/margins": 1.462754249572754, "rewards/rejected": -2.075352907180786, "step": 3582 }, { "epoch": 0.42, "learning_rate": 1.7761899137829218e-07, "logits/chosen": -2.8444929122924805, "logits/rejected": -2.783632278442383, "logps/chosen": -343.1923828125, "logps/rejected": -237.08937072753906, "loss": 0.4051, "rewards/accuracies": 0.625, "rewards/chosen": -1.251845121383667, "rewards/margins": 2.149898052215576, "rewards/rejected": -3.401743173599243, "step": 3583 }, { "epoch": 0.42, "learning_rate": 1.775835597023739e-07, "logits/chosen": -2.0380043983459473, "logits/rejected": -2.1664633750915527, "logps/chosen": -218.8868408203125, "logps/rejected": -207.6944122314453, "loss": 0.7806, "rewards/accuracies": 0.5, "rewards/chosen": -1.3493804931640625, "rewards/margins": 0.6057875156402588, "rewards/rejected": -1.9551681280136108, "step": 3584 }, { "epoch": 0.42, "learning_rate": 1.7754812802645566e-07, "logits/chosen": -1.9674994945526123, "logits/rejected": -2.103658437728882, "logps/chosen": -514.6777954101562, "logps/rejected": -317.1293640136719, "loss": 0.4516, "rewards/accuracies": 0.625, "rewards/chosen": -0.8153533935546875, "rewards/margins": 2.0653188228607178, "rewards/rejected": -2.880671977996826, "step": 3585 }, { "epoch": 0.42, "learning_rate": 1.7751269635053738e-07, "logits/chosen": -2.6532673835754395, "logits/rejected": -2.511963367462158, "logps/chosen": -189.17442321777344, "logps/rejected": -206.99172973632812, "loss": 0.2706, "rewards/accuracies": 0.875, "rewards/chosen": -0.9026861190795898, "rewards/margins": 1.764678955078125, "rewards/rejected": -2.667365074157715, "step": 3586 }, { "epoch": 0.42, "learning_rate": 1.774772646746191e-07, "logits/chosen": -2.826728105545044, "logits/rejected": -2.6105151176452637, "logps/chosen": -142.25827026367188, "logps/rejected": -294.17462158203125, "loss": 0.2584, "rewards/accuracies": 1.0, "rewards/chosen": -0.6224987506866455, "rewards/margins": 2.319270133972168, "rewards/rejected": -2.9417686462402344, "step": 3587 }, { "epoch": 0.42, "learning_rate": 1.7744183299870082e-07, "logits/chosen": -2.3481814861297607, "logits/rejected": -2.044620990753174, "logps/chosen": -205.29986572265625, "logps/rejected": -316.3489685058594, "loss": 1.0331, "rewards/accuracies": 0.75, "rewards/chosen": -1.477330207824707, "rewards/margins": 1.7766166925430298, "rewards/rejected": -3.2539470195770264, "step": 3588 }, { "epoch": 0.42, "learning_rate": 1.7740640132278254e-07, "logits/chosen": -2.2341949939727783, "logits/rejected": -2.1889734268188477, "logps/chosen": -226.51527404785156, "logps/rejected": -238.739501953125, "loss": 0.1091, "rewards/accuracies": 1.0, "rewards/chosen": -0.7186123132705688, "rewards/margins": 3.299710273742676, "rewards/rejected": -4.018322467803955, "step": 3589 }, { "epoch": 0.42, "learning_rate": 1.7737096964686427e-07, "logits/chosen": -2.0105953216552734, "logits/rejected": -1.8021154403686523, "logps/chosen": -151.1517333984375, "logps/rejected": -382.5098876953125, "loss": 0.4521, "rewards/accuracies": 0.625, "rewards/chosen": -0.39452704787254333, "rewards/margins": 1.8812806606292725, "rewards/rejected": -2.2758076190948486, "step": 3590 }, { "epoch": 0.42, "learning_rate": 1.7733553797094601e-07, "logits/chosen": -2.51668119430542, "logits/rejected": -2.247572898864746, "logps/chosen": -171.36346435546875, "logps/rejected": -397.53155517578125, "loss": 0.4308, "rewards/accuracies": 0.75, "rewards/chosen": -0.9683553576469421, "rewards/margins": 1.9785417318344116, "rewards/rejected": -2.946897029876709, "step": 3591 }, { "epoch": 0.42, "learning_rate": 1.7730010629502776e-07, "logits/chosen": -2.838071584701538, "logits/rejected": -2.5414888858795166, "logps/chosen": -153.71627807617188, "logps/rejected": -215.041748046875, "loss": 0.2599, "rewards/accuracies": 0.875, "rewards/chosen": -0.3468116521835327, "rewards/margins": 2.122192144393921, "rewards/rejected": -2.469003915786743, "step": 3592 }, { "epoch": 0.42, "learning_rate": 1.7726467461910949e-07, "logits/chosen": -2.336592435836792, "logits/rejected": -2.4484665393829346, "logps/chosen": -267.7991027832031, "logps/rejected": -228.49066162109375, "loss": 0.1501, "rewards/accuracies": 1.0, "rewards/chosen": -0.13436336815357208, "rewards/margins": 2.288339853286743, "rewards/rejected": -2.422703266143799, "step": 3593 }, { "epoch": 0.42, "learning_rate": 1.772292429431912e-07, "logits/chosen": -2.1479883193969727, "logits/rejected": -2.079428195953369, "logps/chosen": -200.84573364257812, "logps/rejected": -323.9367980957031, "loss": 0.126, "rewards/accuracies": 1.0, "rewards/chosen": -0.07172293961048126, "rewards/margins": 3.0589191913604736, "rewards/rejected": -3.1306421756744385, "step": 3594 }, { "epoch": 0.42, "learning_rate": 1.7719381126727293e-07, "logits/chosen": -2.1414804458618164, "logits/rejected": -2.124013662338257, "logps/chosen": -233.09112548828125, "logps/rejected": -219.2594757080078, "loss": 0.4309, "rewards/accuracies": 0.75, "rewards/chosen": -0.9435594081878662, "rewards/margins": 1.9618418216705322, "rewards/rejected": -2.9054014682769775, "step": 3595 }, { "epoch": 0.42, "learning_rate": 1.7715837959135468e-07, "logits/chosen": -2.3214995861053467, "logits/rejected": -2.4274303913116455, "logps/chosen": -323.5358581542969, "logps/rejected": -292.90985107421875, "loss": 0.5032, "rewards/accuracies": 0.75, "rewards/chosen": -1.2833102941513062, "rewards/margins": 2.7156105041503906, "rewards/rejected": -3.9989206790924072, "step": 3596 }, { "epoch": 0.42, "learning_rate": 1.771229479154364e-07, "logits/chosen": -2.029998302459717, "logits/rejected": -2.204063653945923, "logps/chosen": -362.75396728515625, "logps/rejected": -280.8029479980469, "loss": 0.3929, "rewards/accuracies": 0.75, "rewards/chosen": -1.520453691482544, "rewards/margins": 1.7143495082855225, "rewards/rejected": -3.2348031997680664, "step": 3597 }, { "epoch": 0.42, "learning_rate": 1.7708751623951812e-07, "logits/chosen": -2.3789286613464355, "logits/rejected": -2.2715697288513184, "logps/chosen": -193.6046600341797, "logps/rejected": -333.04931640625, "loss": 0.8181, "rewards/accuracies": 0.5, "rewards/chosen": -0.7458158135414124, "rewards/margins": 0.49195536971092224, "rewards/rejected": -1.2377711534500122, "step": 3598 }, { "epoch": 0.42, "learning_rate": 1.7705208456359984e-07, "logits/chosen": -2.5348358154296875, "logits/rejected": -2.5544633865356445, "logps/chosen": -243.3099365234375, "logps/rejected": -220.73489379882812, "loss": 0.2718, "rewards/accuracies": 0.875, "rewards/chosen": -0.08000697195529938, "rewards/margins": 2.8680825233459473, "rewards/rejected": -2.948089122772217, "step": 3599 }, { "epoch": 0.42, "learning_rate": 1.7701665288768157e-07, "logits/chosen": -2.3766307830810547, "logits/rejected": -2.594350576400757, "logps/chosen": -362.82098388671875, "logps/rejected": -447.5337829589844, "loss": 0.052, "rewards/accuracies": 1.0, "rewards/chosen": -1.0997234582901, "rewards/margins": 4.818094730377197, "rewards/rejected": -5.917818069458008, "step": 3600 }, { "epoch": 0.42, "learning_rate": 1.769812212117633e-07, "logits/chosen": -2.704664945602417, "logits/rejected": -2.80230712890625, "logps/chosen": -238.21820068359375, "logps/rejected": -227.79002380371094, "loss": 0.4534, "rewards/accuracies": 0.75, "rewards/chosen": -0.3818296790122986, "rewards/margins": 3.0973236560821533, "rewards/rejected": -3.4791531562805176, "step": 3601 }, { "epoch": 0.42, "learning_rate": 1.7694578953584504e-07, "logits/chosen": -2.4974918365478516, "logits/rejected": -2.3183693885803223, "logps/chosen": -202.5987548828125, "logps/rejected": -327.6900329589844, "loss": 0.1487, "rewards/accuracies": 1.0, "rewards/chosen": -1.104593276977539, "rewards/margins": 3.6806461811065674, "rewards/rejected": -4.7852396965026855, "step": 3602 }, { "epoch": 0.42, "learning_rate": 1.7691035785992676e-07, "logits/chosen": -2.3356196880340576, "logits/rejected": -2.3622946739196777, "logps/chosen": -164.20257568359375, "logps/rejected": -286.4875793457031, "loss": 0.552, "rewards/accuracies": 0.75, "rewards/chosen": -0.9016519784927368, "rewards/margins": 3.297826051712036, "rewards/rejected": -4.1994781494140625, "step": 3603 }, { "epoch": 0.42, "learning_rate": 1.768749261840085e-07, "logits/chosen": -2.0031211376190186, "logits/rejected": -1.7946248054504395, "logps/chosen": -196.7084197998047, "logps/rejected": -336.53106689453125, "loss": 0.4314, "rewards/accuracies": 0.75, "rewards/chosen": -0.8308393955230713, "rewards/margins": 1.4122384786605835, "rewards/rejected": -2.2430777549743652, "step": 3604 }, { "epoch": 0.42, "learning_rate": 1.7683949450809023e-07, "logits/chosen": -2.0044994354248047, "logits/rejected": -2.441993236541748, "logps/chosen": -317.69305419921875, "logps/rejected": -299.50958251953125, "loss": 0.2989, "rewards/accuracies": 0.875, "rewards/chosen": -0.8257529735565186, "rewards/margins": 3.0481374263763428, "rewards/rejected": -3.8738903999328613, "step": 3605 }, { "epoch": 0.42, "learning_rate": 1.7680406283217195e-07, "logits/chosen": -2.4023056030273438, "logits/rejected": -2.4707415103912354, "logps/chosen": -249.3460693359375, "logps/rejected": -272.2886047363281, "loss": 0.6729, "rewards/accuracies": 0.625, "rewards/chosen": -1.8009041547775269, "rewards/margins": 1.5948766469955444, "rewards/rejected": -3.395780563354492, "step": 3606 }, { "epoch": 0.42, "learning_rate": 1.767686311562537e-07, "logits/chosen": -2.607919692993164, "logits/rejected": -2.7359237670898438, "logps/chosen": -207.57656860351562, "logps/rejected": -148.43223571777344, "loss": 0.1703, "rewards/accuracies": 1.0, "rewards/chosen": -0.9396916031837463, "rewards/margins": 2.1709530353546143, "rewards/rejected": -3.1106443405151367, "step": 3607 }, { "epoch": 0.42, "learning_rate": 1.7673319948033542e-07, "logits/chosen": -2.688372850418091, "logits/rejected": -2.842137098312378, "logps/chosen": -299.37530517578125, "logps/rejected": -236.3158721923828, "loss": 0.2016, "rewards/accuracies": 0.875, "rewards/chosen": -0.48713594675064087, "rewards/margins": 2.918518543243408, "rewards/rejected": -3.405653953552246, "step": 3608 }, { "epoch": 0.42, "learning_rate": 1.7669776780441714e-07, "logits/chosen": -1.997929334640503, "logits/rejected": -2.156121253967285, "logps/chosen": -471.1583557128906, "logps/rejected": -377.3288879394531, "loss": 0.1142, "rewards/accuracies": 1.0, "rewards/chosen": -0.7391446232795715, "rewards/margins": 5.186575889587402, "rewards/rejected": -5.92572021484375, "step": 3609 }, { "epoch": 0.42, "learning_rate": 1.7666233612849887e-07, "logits/chosen": -2.4074432849884033, "logits/rejected": -2.3415870666503906, "logps/chosen": -397.9464416503906, "logps/rejected": -233.77035522460938, "loss": 0.3947, "rewards/accuracies": 0.75, "rewards/chosen": -0.9254862666130066, "rewards/margins": 1.8310589790344238, "rewards/rejected": -2.756545305252075, "step": 3610 }, { "epoch": 0.42, "learning_rate": 1.766269044525806e-07, "logits/chosen": -1.8646204471588135, "logits/rejected": -2.292757272720337, "logps/chosen": -481.5156555175781, "logps/rejected": -306.01080322265625, "loss": 0.4113, "rewards/accuracies": 0.75, "rewards/chosen": -0.6206088662147522, "rewards/margins": 1.5286095142364502, "rewards/rejected": -2.1492183208465576, "step": 3611 }, { "epoch": 0.42, "learning_rate": 1.765914727766623e-07, "logits/chosen": -1.4575624465942383, "logits/rejected": -1.3540453910827637, "logps/chosen": -148.73287963867188, "logps/rejected": -260.62713623046875, "loss": 0.442, "rewards/accuracies": 0.625, "rewards/chosen": -0.3345039188861847, "rewards/margins": 2.223738431930542, "rewards/rejected": -2.5582425594329834, "step": 3612 }, { "epoch": 0.42, "learning_rate": 1.7655604110074403e-07, "logits/chosen": -2.8486075401306152, "logits/rejected": -2.6044223308563232, "logps/chosen": -255.39425659179688, "logps/rejected": -167.30819702148438, "loss": 0.6194, "rewards/accuracies": 0.875, "rewards/chosen": -1.9307503700256348, "rewards/margins": 1.1964340209960938, "rewards/rejected": -3.1271843910217285, "step": 3613 }, { "epoch": 0.42, "learning_rate": 1.7652060942482578e-07, "logits/chosen": -2.1573729515075684, "logits/rejected": -2.346149206161499, "logps/chosen": -295.8320007324219, "logps/rejected": -279.00494384765625, "loss": 0.6039, "rewards/accuracies": 0.625, "rewards/chosen": -0.9237630367279053, "rewards/margins": 1.9030187129974365, "rewards/rejected": -2.826781749725342, "step": 3614 }, { "epoch": 0.42, "learning_rate": 1.764851777489075e-07, "logits/chosen": -2.639853000640869, "logits/rejected": -2.5196051597595215, "logps/chosen": -185.4152069091797, "logps/rejected": -212.96261596679688, "loss": 0.2606, "rewards/accuracies": 1.0, "rewards/chosen": -0.6488139629364014, "rewards/margins": 1.8429820537567139, "rewards/rejected": -2.4917960166931152, "step": 3615 }, { "epoch": 0.42, "learning_rate": 1.7644974607298925e-07, "logits/chosen": -2.2430191040039062, "logits/rejected": -2.585036277770996, "logps/chosen": -310.9378967285156, "logps/rejected": -220.43612670898438, "loss": 0.3892, "rewards/accuracies": 0.75, "rewards/chosen": -1.0127099752426147, "rewards/margins": 1.9801961183547974, "rewards/rejected": -2.992906332015991, "step": 3616 }, { "epoch": 0.42, "learning_rate": 1.7641431439707097e-07, "logits/chosen": -2.369838237762451, "logits/rejected": -2.4044392108917236, "logps/chosen": -320.7086181640625, "logps/rejected": -406.14361572265625, "loss": 0.194, "rewards/accuracies": 1.0, "rewards/chosen": -0.8556973338127136, "rewards/margins": 2.885791063308716, "rewards/rejected": -3.741488456726074, "step": 3617 }, { "epoch": 0.42, "learning_rate": 1.7637888272115272e-07, "logits/chosen": -2.0076959133148193, "logits/rejected": -2.0877718925476074, "logps/chosen": -154.7200927734375, "logps/rejected": -253.339111328125, "loss": 0.4229, "rewards/accuracies": 0.75, "rewards/chosen": -0.2616881728172302, "rewards/margins": 1.908022403717041, "rewards/rejected": -2.169710636138916, "step": 3618 }, { "epoch": 0.42, "learning_rate": 1.7634345104523445e-07, "logits/chosen": -2.677542209625244, "logits/rejected": -2.627998113632202, "logps/chosen": -192.49197387695312, "logps/rejected": -240.58126831054688, "loss": 0.2186, "rewards/accuracies": 1.0, "rewards/chosen": -0.06599956750869751, "rewards/margins": 2.7855265140533447, "rewards/rejected": -2.8515260219573975, "step": 3619 }, { "epoch": 0.42, "learning_rate": 1.7630801936931617e-07, "logits/chosen": -2.1320834159851074, "logits/rejected": -2.513894557952881, "logps/chosen": -550.2503662109375, "logps/rejected": -219.17367553710938, "loss": 0.3941, "rewards/accuracies": 0.75, "rewards/chosen": -0.6197540760040283, "rewards/margins": 2.113739013671875, "rewards/rejected": -2.7334930896759033, "step": 3620 }, { "epoch": 0.42, "learning_rate": 1.762725876933979e-07, "logits/chosen": -2.001929998397827, "logits/rejected": -2.2676005363464355, "logps/chosen": -362.23370361328125, "logps/rejected": -468.70123291015625, "loss": 0.1128, "rewards/accuracies": 1.0, "rewards/chosen": -0.1526196300983429, "rewards/margins": 4.143484592437744, "rewards/rejected": -4.296104431152344, "step": 3621 }, { "epoch": 0.42, "learning_rate": 1.762371560174796e-07, "logits/chosen": -2.197519063949585, "logits/rejected": -2.4499287605285645, "logps/chosen": -274.94390869140625, "logps/rejected": -260.0486145019531, "loss": 0.6271, "rewards/accuracies": 0.625, "rewards/chosen": -2.656251907348633, "rewards/margins": 0.9504786133766174, "rewards/rejected": -3.6067304611206055, "step": 3622 }, { "epoch": 0.42, "learning_rate": 1.7620172434156133e-07, "logits/chosen": -1.92734956741333, "logits/rejected": -1.7617801427841187, "logps/chosen": -218.84906005859375, "logps/rejected": -155.29173278808594, "loss": 0.6482, "rewards/accuracies": 0.625, "rewards/chosen": -0.2556428909301758, "rewards/margins": 0.8617305159568787, "rewards/rejected": -1.1173733472824097, "step": 3623 }, { "epoch": 0.42, "learning_rate": 1.7616629266564306e-07, "logits/chosen": -2.460552930831909, "logits/rejected": -2.45141863822937, "logps/chosen": -275.479736328125, "logps/rejected": -338.16973876953125, "loss": 0.254, "rewards/accuracies": 1.0, "rewards/chosen": -0.38652104139328003, "rewards/margins": 1.5926625728607178, "rewards/rejected": -1.9791836738586426, "step": 3624 }, { "epoch": 0.42, "learning_rate": 1.761308609897248e-07, "logits/chosen": -2.2082760334014893, "logits/rejected": -2.326277494430542, "logps/chosen": -494.88446044921875, "logps/rejected": -403.92779541015625, "loss": 0.2491, "rewards/accuracies": 0.875, "rewards/chosen": 0.027856767177581787, "rewards/margins": 1.723174810409546, "rewards/rejected": -1.6953179836273193, "step": 3625 }, { "epoch": 0.42, "learning_rate": 1.7609542931380653e-07, "logits/chosen": -2.5149309635162354, "logits/rejected": -2.5833613872528076, "logps/chosen": -487.967529296875, "logps/rejected": -341.82525634765625, "loss": 0.2632, "rewards/accuracies": 0.875, "rewards/chosen": -0.7141487002372742, "rewards/margins": 2.247788429260254, "rewards/rejected": -2.9619369506835938, "step": 3626 }, { "epoch": 0.42, "learning_rate": 1.7605999763788828e-07, "logits/chosen": -2.285888195037842, "logits/rejected": -2.4170308113098145, "logps/chosen": -368.3277587890625, "logps/rejected": -204.67825317382812, "loss": 0.8157, "rewards/accuracies": 0.5, "rewards/chosen": -1.4751067161560059, "rewards/margins": 1.2618650197982788, "rewards/rejected": -2.736971616744995, "step": 3627 }, { "epoch": 0.42, "learning_rate": 1.7602456596197e-07, "logits/chosen": -2.5381674766540527, "logits/rejected": -2.5913352966308594, "logps/chosen": -122.52070617675781, "logps/rejected": -194.04750061035156, "loss": 0.334, "rewards/accuracies": 0.875, "rewards/chosen": -0.49838101863861084, "rewards/margins": 1.9317799806594849, "rewards/rejected": -2.4301609992980957, "step": 3628 }, { "epoch": 0.42, "learning_rate": 1.7598913428605172e-07, "logits/chosen": -2.540062665939331, "logits/rejected": -2.578360080718994, "logps/chosen": -324.86517333984375, "logps/rejected": -314.5015563964844, "loss": 0.4111, "rewards/accuracies": 0.75, "rewards/chosen": -0.8811057209968567, "rewards/margins": 2.2924718856811523, "rewards/rejected": -3.1735775470733643, "step": 3629 }, { "epoch": 0.42, "learning_rate": 1.7595370261013347e-07, "logits/chosen": -2.856123208999634, "logits/rejected": -2.9191625118255615, "logps/chosen": -173.734619140625, "logps/rejected": -193.050537109375, "loss": 0.9113, "rewards/accuracies": 0.625, "rewards/chosen": -1.9070520401000977, "rewards/margins": 0.49310481548309326, "rewards/rejected": -2.4001567363739014, "step": 3630 }, { "epoch": 0.42, "learning_rate": 1.759182709342152e-07, "logits/chosen": -2.822242259979248, "logits/rejected": -2.734461545944214, "logps/chosen": -270.19036865234375, "logps/rejected": -331.5507507324219, "loss": 0.322, "rewards/accuracies": 0.875, "rewards/chosen": -0.7706082463264465, "rewards/margins": 2.7023744583129883, "rewards/rejected": -3.47298264503479, "step": 3631 }, { "epoch": 0.42, "learning_rate": 1.758828392582969e-07, "logits/chosen": -2.628826856613159, "logits/rejected": -2.6068978309631348, "logps/chosen": -303.2485046386719, "logps/rejected": -346.0715637207031, "loss": 0.2163, "rewards/accuracies": 1.0, "rewards/chosen": -0.9181984066963196, "rewards/margins": 2.5616564750671387, "rewards/rejected": -3.4798545837402344, "step": 3632 }, { "epoch": 0.42, "learning_rate": 1.7584740758237863e-07, "logits/chosen": -2.658538341522217, "logits/rejected": -2.7123095989227295, "logps/chosen": -240.0086669921875, "logps/rejected": -241.41644287109375, "loss": 0.1482, "rewards/accuracies": 1.0, "rewards/chosen": -0.4360697567462921, "rewards/margins": 3.5168418884277344, "rewards/rejected": -3.952911853790283, "step": 3633 }, { "epoch": 0.42, "learning_rate": 1.7581197590646036e-07, "logits/chosen": -2.311310291290283, "logits/rejected": -2.070061683654785, "logps/chosen": -455.2848205566406, "logps/rejected": -367.4862060546875, "loss": 0.6661, "rewards/accuracies": 0.75, "rewards/chosen": -0.7782317996025085, "rewards/margins": 1.1486155986785889, "rewards/rejected": -1.9268474578857422, "step": 3634 }, { "epoch": 0.42, "learning_rate": 1.7577654423054208e-07, "logits/chosen": -2.2321739196777344, "logits/rejected": -2.442387819290161, "logps/chosen": -183.42288208007812, "logps/rejected": -225.12030029296875, "loss": 0.5709, "rewards/accuracies": 0.75, "rewards/chosen": -0.8367000222206116, "rewards/margins": 1.8083546161651611, "rewards/rejected": -2.645054817199707, "step": 3635 }, { "epoch": 0.42, "learning_rate": 1.7574111255462383e-07, "logits/chosen": -2.8326711654663086, "logits/rejected": -2.5293850898742676, "logps/chosen": -166.69435119628906, "logps/rejected": -209.31329345703125, "loss": 0.219, "rewards/accuracies": 1.0, "rewards/chosen": -0.25717100501060486, "rewards/margins": 2.0742545127868652, "rewards/rejected": -2.331425428390503, "step": 3636 }, { "epoch": 0.42, "learning_rate": 1.7570568087870555e-07, "logits/chosen": -2.351154088973999, "logits/rejected": -2.0335121154785156, "logps/chosen": -205.51963806152344, "logps/rejected": -263.6110534667969, "loss": 0.3445, "rewards/accuracies": 1.0, "rewards/chosen": -0.8186399936676025, "rewards/margins": 0.9557206630706787, "rewards/rejected": -1.7743606567382812, "step": 3637 }, { "epoch": 0.42, "learning_rate": 1.7567024920278727e-07, "logits/chosen": -1.7017322778701782, "logits/rejected": -1.820804238319397, "logps/chosen": -571.2379150390625, "logps/rejected": -458.09307861328125, "loss": 0.1829, "rewards/accuracies": 0.875, "rewards/chosen": -0.46206966042518616, "rewards/margins": 3.1792893409729004, "rewards/rejected": -3.6413590908050537, "step": 3638 }, { "epoch": 0.42, "learning_rate": 1.7563481752686902e-07, "logits/chosen": -2.2000551223754883, "logits/rejected": -2.4209840297698975, "logps/chosen": -322.6048583984375, "logps/rejected": -389.43841552734375, "loss": 0.2781, "rewards/accuracies": 0.875, "rewards/chosen": -0.8882119655609131, "rewards/margins": 2.910886287689209, "rewards/rejected": -3.799098253250122, "step": 3639 }, { "epoch": 0.42, "learning_rate": 1.7559938585095074e-07, "logits/chosen": -2.1074576377868652, "logits/rejected": -2.356843948364258, "logps/chosen": -326.22528076171875, "logps/rejected": -241.0065460205078, "loss": 0.3365, "rewards/accuracies": 0.875, "rewards/chosen": -0.6816136837005615, "rewards/margins": 1.553244709968567, "rewards/rejected": -2.234858512878418, "step": 3640 }, { "epoch": 0.42, "learning_rate": 1.755639541750325e-07, "logits/chosen": -2.316636562347412, "logits/rejected": -2.364795207977295, "logps/chosen": -220.51646423339844, "logps/rejected": -208.14425659179688, "loss": 0.2753, "rewards/accuracies": 0.875, "rewards/chosen": -0.9230340123176575, "rewards/margins": 1.4553242921829224, "rewards/rejected": -2.3783583641052246, "step": 3641 }, { "epoch": 0.42, "learning_rate": 1.7552852249911421e-07, "logits/chosen": -2.191530466079712, "logits/rejected": -2.504563808441162, "logps/chosen": -304.494873046875, "logps/rejected": -238.13687133789062, "loss": 0.3144, "rewards/accuracies": 0.875, "rewards/chosen": -0.708904504776001, "rewards/margins": 1.8391530513763428, "rewards/rejected": -2.548057794570923, "step": 3642 }, { "epoch": 0.42, "learning_rate": 1.7549309082319594e-07, "logits/chosen": -2.3753397464752197, "logits/rejected": -2.7118935585021973, "logps/chosen": -337.87139892578125, "logps/rejected": -161.96112060546875, "loss": 0.3199, "rewards/accuracies": 1.0, "rewards/chosen": -0.417232871055603, "rewards/margins": 1.6350390911102295, "rewards/rejected": -2.052271842956543, "step": 3643 }, { "epoch": 0.42, "learning_rate": 1.7545765914727766e-07, "logits/chosen": -2.751394510269165, "logits/rejected": -2.6526002883911133, "logps/chosen": -264.1773376464844, "logps/rejected": -324.6272888183594, "loss": 0.0865, "rewards/accuracies": 1.0, "rewards/chosen": -0.2512415945529938, "rewards/margins": 4.298307418823242, "rewards/rejected": -4.549549102783203, "step": 3644 }, { "epoch": 0.42, "learning_rate": 1.7542222747135938e-07, "logits/chosen": -1.7065523862838745, "logits/rejected": -1.848132848739624, "logps/chosen": -270.46124267578125, "logps/rejected": -361.68511962890625, "loss": 0.1943, "rewards/accuracies": 1.0, "rewards/chosen": -0.6576786637306213, "rewards/margins": 2.6710641384124756, "rewards/rejected": -3.328742504119873, "step": 3645 }, { "epoch": 0.42, "learning_rate": 1.753867957954411e-07, "logits/chosen": -2.247823476791382, "logits/rejected": -2.5076019763946533, "logps/chosen": -503.8516845703125, "logps/rejected": -327.9425354003906, "loss": 0.8168, "rewards/accuracies": 0.5, "rewards/chosen": -1.851790428161621, "rewards/margins": 0.9290279746055603, "rewards/rejected": -2.780818462371826, "step": 3646 }, { "epoch": 0.42, "learning_rate": 1.7535136411952285e-07, "logits/chosen": -2.3846371173858643, "logits/rejected": -2.264427661895752, "logps/chosen": -353.2913818359375, "logps/rejected": -334.5704345703125, "loss": 0.4607, "rewards/accuracies": 0.625, "rewards/chosen": -0.6028943061828613, "rewards/margins": 0.9870700240135193, "rewards/rejected": -1.5899642705917358, "step": 3647 }, { "epoch": 0.42, "learning_rate": 1.7531593244360457e-07, "logits/chosen": -2.6449031829833984, "logits/rejected": -2.7855143547058105, "logps/chosen": -415.25384521484375, "logps/rejected": -283.5061340332031, "loss": 0.0944, "rewards/accuracies": 1.0, "rewards/chosen": -0.947851300239563, "rewards/margins": 3.3880558013916016, "rewards/rejected": -4.335906982421875, "step": 3648 }, { "epoch": 0.42, "learning_rate": 1.752805007676863e-07, "logits/chosen": -2.6904072761535645, "logits/rejected": -2.6580357551574707, "logps/chosen": -371.0420227050781, "logps/rejected": -260.58209228515625, "loss": 0.3959, "rewards/accuracies": 0.875, "rewards/chosen": -0.8946399092674255, "rewards/margins": 1.2156535387039185, "rewards/rejected": -2.110293388366699, "step": 3649 }, { "epoch": 0.42, "learning_rate": 1.7524506909176802e-07, "logits/chosen": -2.022993564605713, "logits/rejected": -2.1597447395324707, "logps/chosen": -156.243896484375, "logps/rejected": -219.60995483398438, "loss": 0.1791, "rewards/accuracies": 1.0, "rewards/chosen": -0.03595474362373352, "rewards/margins": 3.1561880111694336, "rewards/rejected": -3.192142963409424, "step": 3650 }, { "epoch": 0.42, "learning_rate": 1.7520963741584977e-07, "logits/chosen": -2.7479283809661865, "logits/rejected": -2.676039695739746, "logps/chosen": -270.45074462890625, "logps/rejected": -174.1519012451172, "loss": 0.5636, "rewards/accuracies": 0.75, "rewards/chosen": -2.0217928886413574, "rewards/margins": 2.4436194896698, "rewards/rejected": -4.465412139892578, "step": 3651 }, { "epoch": 0.42, "learning_rate": 1.7517420573993151e-07, "logits/chosen": -1.9016468524932861, "logits/rejected": -2.3999900817871094, "logps/chosen": -354.0035095214844, "logps/rejected": -319.2190246582031, "loss": 0.528, "rewards/accuracies": 0.75, "rewards/chosen": -1.6545166969299316, "rewards/margins": 1.8044581413269043, "rewards/rejected": -3.458974838256836, "step": 3652 }, { "epoch": 0.42, "learning_rate": 1.7513877406401324e-07, "logits/chosen": -2.8612794876098633, "logits/rejected": -2.923551082611084, "logps/chosen": -105.82888793945312, "logps/rejected": -172.19403076171875, "loss": 0.2134, "rewards/accuracies": 0.875, "rewards/chosen": -0.21372301876544952, "rewards/margins": 3.086059093475342, "rewards/rejected": -3.2997817993164062, "step": 3653 }, { "epoch": 0.43, "learning_rate": 1.7510334238809496e-07, "logits/chosen": -2.3122005462646484, "logits/rejected": -2.1965112686157227, "logps/chosen": -298.6417236328125, "logps/rejected": -319.1836242675781, "loss": 0.2091, "rewards/accuracies": 1.0, "rewards/chosen": -0.3625435531139374, "rewards/margins": 2.2177846431732178, "rewards/rejected": -2.5803279876708984, "step": 3654 }, { "epoch": 0.43, "learning_rate": 1.7506791071217668e-07, "logits/chosen": -1.774406909942627, "logits/rejected": -1.708235740661621, "logps/chosen": -230.31338500976562, "logps/rejected": -284.8048095703125, "loss": 0.3547, "rewards/accuracies": 0.875, "rewards/chosen": -0.5063499808311462, "rewards/margins": 1.306469202041626, "rewards/rejected": -1.8128191232681274, "step": 3655 }, { "epoch": 0.43, "learning_rate": 1.750324790362584e-07, "logits/chosen": -2.140639543533325, "logits/rejected": -2.2157797813415527, "logps/chosen": -247.4978485107422, "logps/rejected": -223.87081909179688, "loss": 0.6543, "rewards/accuracies": 0.625, "rewards/chosen": -1.812157392501831, "rewards/margins": 0.8908335566520691, "rewards/rejected": -2.702991008758545, "step": 3656 }, { "epoch": 0.43, "learning_rate": 1.7499704736034012e-07, "logits/chosen": -2.9164481163024902, "logits/rejected": -2.8174870014190674, "logps/chosen": -197.41128540039062, "logps/rejected": -215.27635192871094, "loss": 0.22, "rewards/accuracies": 1.0, "rewards/chosen": -0.6035594344139099, "rewards/margins": 2.151754140853882, "rewards/rejected": -2.7553138732910156, "step": 3657 }, { "epoch": 0.43, "learning_rate": 1.7496161568442185e-07, "logits/chosen": -1.7119629383087158, "logits/rejected": -1.4527220726013184, "logps/chosen": -294.8026428222656, "logps/rejected": -387.5682067871094, "loss": 0.2694, "rewards/accuracies": 0.875, "rewards/chosen": -0.853924036026001, "rewards/margins": 2.255444288253784, "rewards/rejected": -3.109368324279785, "step": 3658 }, { "epoch": 0.43, "learning_rate": 1.749261840085036e-07, "logits/chosen": -1.9631750583648682, "logits/rejected": -2.1188595294952393, "logps/chosen": -158.3531494140625, "logps/rejected": -218.49269104003906, "loss": 1.4792, "rewards/accuracies": 0.625, "rewards/chosen": -2.1300792694091797, "rewards/margins": 0.18798059225082397, "rewards/rejected": -2.3180601596832275, "step": 3659 }, { "epoch": 0.43, "learning_rate": 1.7489075233258532e-07, "logits/chosen": -2.6155920028686523, "logits/rejected": -2.1889071464538574, "logps/chosen": -132.99642944335938, "logps/rejected": -309.3650817871094, "loss": 0.2064, "rewards/accuracies": 0.875, "rewards/chosen": -1.4085540771484375, "rewards/margins": 3.946611166000366, "rewards/rejected": -5.355165481567383, "step": 3660 }, { "epoch": 0.43, "learning_rate": 1.7485532065666704e-07, "logits/chosen": -1.891045331954956, "logits/rejected": -1.6461834907531738, "logps/chosen": -380.045166015625, "logps/rejected": -405.4801940917969, "loss": 0.3355, "rewards/accuracies": 0.75, "rewards/chosen": -0.4075565040111542, "rewards/margins": 1.602735996246338, "rewards/rejected": -2.0102925300598145, "step": 3661 }, { "epoch": 0.43, "learning_rate": 1.748198889807488e-07, "logits/chosen": -2.235243320465088, "logits/rejected": -2.374389410018921, "logps/chosen": -203.2404022216797, "logps/rejected": -251.3274688720703, "loss": 0.3808, "rewards/accuracies": 0.875, "rewards/chosen": -1.5026143789291382, "rewards/margins": 1.8153672218322754, "rewards/rejected": -3.317981719970703, "step": 3662 }, { "epoch": 0.43, "learning_rate": 1.7478445730483054e-07, "logits/chosen": -2.907538890838623, "logits/rejected": -2.6081912517547607, "logps/chosen": -296.9075927734375, "logps/rejected": -287.37152099609375, "loss": 0.1463, "rewards/accuracies": 1.0, "rewards/chosen": -0.08826004713773727, "rewards/margins": 2.672557830810547, "rewards/rejected": -2.7608180046081543, "step": 3663 }, { "epoch": 0.43, "learning_rate": 1.7474902562891226e-07, "logits/chosen": -2.971066951751709, "logits/rejected": -2.9624288082122803, "logps/chosen": -211.5835418701172, "logps/rejected": -316.7471923828125, "loss": 0.1042, "rewards/accuracies": 1.0, "rewards/chosen": -0.7265113592147827, "rewards/margins": 3.3798723220825195, "rewards/rejected": -4.106383323669434, "step": 3664 }, { "epoch": 0.43, "learning_rate": 1.7471359395299398e-07, "logits/chosen": -2.715458393096924, "logits/rejected": -2.5708374977111816, "logps/chosen": -177.61712646484375, "logps/rejected": -219.39047241210938, "loss": 0.3029, "rewards/accuracies": 0.75, "rewards/chosen": -1.4614828824996948, "rewards/margins": 2.4243717193603516, "rewards/rejected": -3.8858542442321777, "step": 3665 }, { "epoch": 0.43, "learning_rate": 1.746781622770757e-07, "logits/chosen": -1.7088422775268555, "logits/rejected": -2.1317272186279297, "logps/chosen": -391.8970031738281, "logps/rejected": -357.0691833496094, "loss": 0.5218, "rewards/accuracies": 0.75, "rewards/chosen": -0.8623418807983398, "rewards/margins": 2.220327138900757, "rewards/rejected": -3.082669258117676, "step": 3666 }, { "epoch": 0.43, "learning_rate": 1.7464273060115743e-07, "logits/chosen": -2.436995506286621, "logits/rejected": -2.580872058868408, "logps/chosen": -369.73388671875, "logps/rejected": -330.9620056152344, "loss": 0.1144, "rewards/accuracies": 1.0, "rewards/chosen": -0.7926731705665588, "rewards/margins": 3.267302989959717, "rewards/rejected": -4.059976100921631, "step": 3667 }, { "epoch": 0.43, "learning_rate": 1.7460729892523915e-07, "logits/chosen": -2.164095401763916, "logits/rejected": -2.4507744312286377, "logps/chosen": -248.36773681640625, "logps/rejected": -167.18174743652344, "loss": 0.4431, "rewards/accuracies": 0.75, "rewards/chosen": -0.8717927932739258, "rewards/margins": 1.6182336807250977, "rewards/rejected": -2.4900267124176025, "step": 3668 }, { "epoch": 0.43, "learning_rate": 1.7457186724932087e-07, "logits/chosen": -2.2899506092071533, "logits/rejected": -2.407367706298828, "logps/chosen": -276.9530944824219, "logps/rejected": -217.07403564453125, "loss": 0.811, "rewards/accuracies": 0.75, "rewards/chosen": -1.265005111694336, "rewards/margins": 0.9955097436904907, "rewards/rejected": -2.260514974594116, "step": 3669 }, { "epoch": 0.43, "learning_rate": 1.7453643557340262e-07, "logits/chosen": -2.0404410362243652, "logits/rejected": -1.8280999660491943, "logps/chosen": -165.88226318359375, "logps/rejected": -218.3700408935547, "loss": 0.645, "rewards/accuracies": 0.625, "rewards/chosen": -0.9352635741233826, "rewards/margins": 1.0920538902282715, "rewards/rejected": -2.0273172855377197, "step": 3670 }, { "epoch": 0.43, "learning_rate": 1.7450100389748434e-07, "logits/chosen": -2.474993944168091, "logits/rejected": -2.780876636505127, "logps/chosen": -311.75677490234375, "logps/rejected": -267.763916015625, "loss": 0.1587, "rewards/accuracies": 0.875, "rewards/chosen": -0.04807084798812866, "rewards/margins": 3.2806639671325684, "rewards/rejected": -3.3287346363067627, "step": 3671 }, { "epoch": 0.43, "learning_rate": 1.7446557222156606e-07, "logits/chosen": -2.5547847747802734, "logits/rejected": -2.2353501319885254, "logps/chosen": -157.34701538085938, "logps/rejected": -216.4887237548828, "loss": 0.2215, "rewards/accuracies": 1.0, "rewards/chosen": -0.7408726215362549, "rewards/margins": 2.433030843734741, "rewards/rejected": -3.173903465270996, "step": 3672 }, { "epoch": 0.43, "learning_rate": 1.7443014054564778e-07, "logits/chosen": -2.6178956031799316, "logits/rejected": -2.79403018951416, "logps/chosen": -140.9967803955078, "logps/rejected": -293.86444091796875, "loss": 0.1593, "rewards/accuracies": 1.0, "rewards/chosen": -1.0327720642089844, "rewards/margins": 3.035249710083008, "rewards/rejected": -4.06802225112915, "step": 3673 }, { "epoch": 0.43, "learning_rate": 1.7439470886972953e-07, "logits/chosen": -1.8702095746994019, "logits/rejected": -1.9370931386947632, "logps/chosen": -490.37139892578125, "logps/rejected": -314.7297058105469, "loss": 0.3381, "rewards/accuracies": 0.875, "rewards/chosen": -0.792751669883728, "rewards/margins": 1.9245471954345703, "rewards/rejected": -2.717298746109009, "step": 3674 }, { "epoch": 0.43, "learning_rate": 1.7435927719381128e-07, "logits/chosen": -2.0421814918518066, "logits/rejected": -2.472233772277832, "logps/chosen": -399.0526123046875, "logps/rejected": -266.0621643066406, "loss": 0.2427, "rewards/accuracies": 0.875, "rewards/chosen": -0.73717200756073, "rewards/margins": 4.044706344604492, "rewards/rejected": -4.78187894821167, "step": 3675 }, { "epoch": 0.43, "learning_rate": 1.74323845517893e-07, "logits/chosen": -2.6309547424316406, "logits/rejected": -2.5544681549072266, "logps/chosen": -371.57122802734375, "logps/rejected": -391.730224609375, "loss": 0.4933, "rewards/accuracies": 0.75, "rewards/chosen": -0.9490096569061279, "rewards/margins": 1.0312548875808716, "rewards/rejected": -1.9802645444869995, "step": 3676 }, { "epoch": 0.43, "learning_rate": 1.7428841384197473e-07, "logits/chosen": -2.866204261779785, "logits/rejected": -2.8542251586914062, "logps/chosen": -124.91893768310547, "logps/rejected": -223.81712341308594, "loss": 0.3337, "rewards/accuracies": 0.75, "rewards/chosen": -0.537009596824646, "rewards/margins": 1.9068275690078735, "rewards/rejected": -2.4438371658325195, "step": 3677 }, { "epoch": 0.43, "learning_rate": 1.7425298216605645e-07, "logits/chosen": -2.377229690551758, "logits/rejected": -2.197810173034668, "logps/chosen": -272.00543212890625, "logps/rejected": -245.77224731445312, "loss": 0.2691, "rewards/accuracies": 0.875, "rewards/chosen": -0.824256956577301, "rewards/margins": 1.9091439247131348, "rewards/rejected": -2.733401298522949, "step": 3678 }, { "epoch": 0.43, "learning_rate": 1.7421755049013817e-07, "logits/chosen": -2.0848677158355713, "logits/rejected": -1.936612606048584, "logps/chosen": -198.53013610839844, "logps/rejected": -290.92657470703125, "loss": 0.4032, "rewards/accuracies": 0.875, "rewards/chosen": -1.0603196620941162, "rewards/margins": 1.729354739189148, "rewards/rejected": -2.7896745204925537, "step": 3679 }, { "epoch": 0.43, "learning_rate": 1.741821188142199e-07, "logits/chosen": -2.268199920654297, "logits/rejected": -2.526427745819092, "logps/chosen": -257.79144287109375, "logps/rejected": -227.1619415283203, "loss": 0.602, "rewards/accuracies": 0.75, "rewards/chosen": -0.3463366627693176, "rewards/margins": 1.14854097366333, "rewards/rejected": -1.4948776960372925, "step": 3680 }, { "epoch": 0.43, "learning_rate": 1.7414668713830164e-07, "logits/chosen": -1.9737370014190674, "logits/rejected": -1.9244928359985352, "logps/chosen": -366.2268981933594, "logps/rejected": -405.58013916015625, "loss": 0.6481, "rewards/accuracies": 0.75, "rewards/chosen": -0.5612242817878723, "rewards/margins": 1.4461132287979126, "rewards/rejected": -2.0073373317718506, "step": 3681 }, { "epoch": 0.43, "learning_rate": 1.7411125546238336e-07, "logits/chosen": -2.7329518795013428, "logits/rejected": -2.6488242149353027, "logps/chosen": -187.4397430419922, "logps/rejected": -331.52520751953125, "loss": 0.1102, "rewards/accuracies": 1.0, "rewards/chosen": -0.8117837905883789, "rewards/margins": 3.612819194793701, "rewards/rejected": -4.424602508544922, "step": 3682 }, { "epoch": 0.43, "learning_rate": 1.7407582378646509e-07, "logits/chosen": -2.788693904876709, "logits/rejected": -2.7505176067352295, "logps/chosen": -296.1656799316406, "logps/rejected": -266.333984375, "loss": 0.285, "rewards/accuracies": 0.75, "rewards/chosen": -0.13962876796722412, "rewards/margins": 2.231947422027588, "rewards/rejected": -2.3715763092041016, "step": 3683 }, { "epoch": 0.43, "learning_rate": 1.740403921105468e-07, "logits/chosen": -2.2580204010009766, "logits/rejected": -2.4298202991485596, "logps/chosen": -325.7272033691406, "logps/rejected": -216.823486328125, "loss": 0.5955, "rewards/accuracies": 0.625, "rewards/chosen": -1.025530457496643, "rewards/margins": 1.2183220386505127, "rewards/rejected": -2.2438526153564453, "step": 3684 }, { "epoch": 0.43, "learning_rate": 1.7400496043462853e-07, "logits/chosen": -2.445176362991333, "logits/rejected": -2.136286973953247, "logps/chosen": -283.36590576171875, "logps/rejected": -303.21734619140625, "loss": 0.3033, "rewards/accuracies": 0.875, "rewards/chosen": -0.47659796476364136, "rewards/margins": 2.1696689128875732, "rewards/rejected": -2.6462669372558594, "step": 3685 }, { "epoch": 0.43, "learning_rate": 1.739695287587103e-07, "logits/chosen": -2.0911810398101807, "logits/rejected": -1.9011523723602295, "logps/chosen": -130.83779907226562, "logps/rejected": -190.7473907470703, "loss": 0.415, "rewards/accuracies": 0.75, "rewards/chosen": -0.14735761284828186, "rewards/margins": 1.249127984046936, "rewards/rejected": -1.3964858055114746, "step": 3686 }, { "epoch": 0.43, "learning_rate": 1.7393409708279203e-07, "logits/chosen": -2.894599199295044, "logits/rejected": -3.033484935760498, "logps/chosen": -239.06558227539062, "logps/rejected": -229.97348022460938, "loss": 1.3614, "rewards/accuracies": 0.625, "rewards/chosen": -3.611859083175659, "rewards/margins": 0.9862688779830933, "rewards/rejected": -4.598127841949463, "step": 3687 }, { "epoch": 0.43, "learning_rate": 1.7389866540687375e-07, "logits/chosen": -2.7142202854156494, "logits/rejected": -2.3902058601379395, "logps/chosen": -198.78036499023438, "logps/rejected": -221.470947265625, "loss": 0.4643, "rewards/accuracies": 0.875, "rewards/chosen": -0.5688228011131287, "rewards/margins": 2.6083273887634277, "rewards/rejected": -3.177150249481201, "step": 3688 }, { "epoch": 0.43, "learning_rate": 1.7386323373095547e-07, "logits/chosen": -2.2120471000671387, "logits/rejected": -1.9061753749847412, "logps/chosen": -238.64190673828125, "logps/rejected": -388.8365478515625, "loss": 0.1696, "rewards/accuracies": 0.875, "rewards/chosen": -0.06947195529937744, "rewards/margins": 4.040794372558594, "rewards/rejected": -4.110266208648682, "step": 3689 }, { "epoch": 0.43, "learning_rate": 1.738278020550372e-07, "logits/chosen": -1.9914309978485107, "logits/rejected": -2.0724055767059326, "logps/chosen": -282.9148254394531, "logps/rejected": -324.8716735839844, "loss": 0.22, "rewards/accuracies": 1.0, "rewards/chosen": -1.267249345779419, "rewards/margins": 3.686800003051758, "rewards/rejected": -4.954049587249756, "step": 3690 }, { "epoch": 0.43, "learning_rate": 1.7379237037911892e-07, "logits/chosen": -2.6573290824890137, "logits/rejected": -2.6227431297302246, "logps/chosen": -510.3601989746094, "logps/rejected": -385.32421875, "loss": 0.2296, "rewards/accuracies": 0.875, "rewards/chosen": -0.8787400722503662, "rewards/margins": 1.974020004272461, "rewards/rejected": -2.8527603149414062, "step": 3691 }, { "epoch": 0.43, "learning_rate": 1.7375693870320066e-07, "logits/chosen": -2.5903353691101074, "logits/rejected": -2.598322629928589, "logps/chosen": -188.16555786132812, "logps/rejected": -201.2687225341797, "loss": 0.2726, "rewards/accuracies": 0.875, "rewards/chosen": -0.15021520853042603, "rewards/margins": 2.7779810428619385, "rewards/rejected": -2.928196430206299, "step": 3692 }, { "epoch": 0.43, "learning_rate": 1.7372150702728239e-07, "logits/chosen": -2.039206027984619, "logits/rejected": -1.9984009265899658, "logps/chosen": -318.2369689941406, "logps/rejected": -484.5920715332031, "loss": 0.3577, "rewards/accuracies": 0.75, "rewards/chosen": -1.2695448398590088, "rewards/margins": 2.803101062774658, "rewards/rejected": -4.072646141052246, "step": 3693 }, { "epoch": 0.43, "learning_rate": 1.736860753513641e-07, "logits/chosen": -1.696550726890564, "logits/rejected": -2.2406504154205322, "logps/chosen": -448.8092346191406, "logps/rejected": -266.0439758300781, "loss": 0.365, "rewards/accuracies": 0.75, "rewards/chosen": -0.5180296301841736, "rewards/margins": 2.553469657897949, "rewards/rejected": -3.0714993476867676, "step": 3694 }, { "epoch": 0.43, "learning_rate": 1.7365064367544583e-07, "logits/chosen": -2.1854357719421387, "logits/rejected": -2.035473585128784, "logps/chosen": -309.9385986328125, "logps/rejected": -435.86273193359375, "loss": 0.4115, "rewards/accuracies": 0.875, "rewards/chosen": -0.6499674320220947, "rewards/margins": 1.6143262386322021, "rewards/rejected": -2.2642934322357178, "step": 3695 }, { "epoch": 0.43, "learning_rate": 1.7361521199952755e-07, "logits/chosen": -2.4479734897613525, "logits/rejected": -2.3185224533081055, "logps/chosen": -216.40597534179688, "logps/rejected": -235.10678100585938, "loss": 0.4099, "rewards/accuracies": 0.75, "rewards/chosen": -2.094369411468506, "rewards/margins": 2.2627477645874023, "rewards/rejected": -4.357117176055908, "step": 3696 }, { "epoch": 0.43, "learning_rate": 1.7357978032360933e-07, "logits/chosen": -2.672917366027832, "logits/rejected": -2.635641098022461, "logps/chosen": -216.56942749023438, "logps/rejected": -186.6765594482422, "loss": 0.7269, "rewards/accuracies": 0.625, "rewards/chosen": -1.3228914737701416, "rewards/margins": 1.340186357498169, "rewards/rejected": -2.6630780696868896, "step": 3697 }, { "epoch": 0.43, "learning_rate": 1.7354434864769105e-07, "logits/chosen": -2.0059165954589844, "logits/rejected": -2.123657703399658, "logps/chosen": -353.39892578125, "logps/rejected": -302.18060302734375, "loss": 0.2747, "rewards/accuracies": 0.875, "rewards/chosen": -0.860396146774292, "rewards/margins": 2.3353824615478516, "rewards/rejected": -3.1957786083221436, "step": 3698 }, { "epoch": 0.43, "learning_rate": 1.7350891697177277e-07, "logits/chosen": -1.9109951257705688, "logits/rejected": -2.07395601272583, "logps/chosen": -238.9143829345703, "logps/rejected": -249.48208618164062, "loss": 0.8497, "rewards/accuracies": 0.75, "rewards/chosen": -1.8662872314453125, "rewards/margins": 2.381871461868286, "rewards/rejected": -4.2481584548950195, "step": 3699 }, { "epoch": 0.43, "learning_rate": 1.734734852958545e-07, "logits/chosen": -2.145282745361328, "logits/rejected": -2.50301456451416, "logps/chosen": -399.3047790527344, "logps/rejected": -314.114501953125, "loss": 0.3085, "rewards/accuracies": 0.875, "rewards/chosen": -0.7943548560142517, "rewards/margins": 2.3854169845581055, "rewards/rejected": -3.179771661758423, "step": 3700 }, { "epoch": 0.43, "learning_rate": 1.7343805361993622e-07, "logits/chosen": -2.04487681388855, "logits/rejected": -1.7817836999893188, "logps/chosen": -381.40496826171875, "logps/rejected": -392.351318359375, "loss": 0.436, "rewards/accuracies": 0.5, "rewards/chosen": -0.8191475868225098, "rewards/margins": 1.4170055389404297, "rewards/rejected": -2.2361531257629395, "step": 3701 }, { "epoch": 0.43, "learning_rate": 1.7340262194401794e-07, "logits/chosen": -1.5543417930603027, "logits/rejected": -1.7486350536346436, "logps/chosen": -330.330322265625, "logps/rejected": -363.55889892578125, "loss": 0.4802, "rewards/accuracies": 0.875, "rewards/chosen": -1.3951191902160645, "rewards/margins": 3.1329269409179688, "rewards/rejected": -4.528045654296875, "step": 3702 }, { "epoch": 0.43, "learning_rate": 1.7336719026809966e-07, "logits/chosen": -2.1352009773254395, "logits/rejected": -2.0588603019714355, "logps/chosen": -368.5689697265625, "logps/rejected": -307.3861389160156, "loss": 0.2925, "rewards/accuracies": 0.875, "rewards/chosen": -0.5209349393844604, "rewards/margins": 1.9946428537368774, "rewards/rejected": -2.515577793121338, "step": 3703 }, { "epoch": 0.43, "learning_rate": 1.733317585921814e-07, "logits/chosen": -1.746111273765564, "logits/rejected": -2.0118613243103027, "logps/chosen": -553.3530883789062, "logps/rejected": -291.9944152832031, "loss": 0.5178, "rewards/accuracies": 0.625, "rewards/chosen": -1.4164044857025146, "rewards/margins": 1.3938798904418945, "rewards/rejected": -2.81028413772583, "step": 3704 }, { "epoch": 0.43, "learning_rate": 1.7329632691626313e-07, "logits/chosen": -2.065673589706421, "logits/rejected": -1.9595096111297607, "logps/chosen": -243.19338989257812, "logps/rejected": -327.6018371582031, "loss": 0.1798, "rewards/accuracies": 1.0, "rewards/chosen": -1.2481385469436646, "rewards/margins": 3.85477352142334, "rewards/rejected": -5.102911949157715, "step": 3705 }, { "epoch": 0.43, "learning_rate": 1.7326089524034485e-07, "logits/chosen": -2.8990285396575928, "logits/rejected": -2.6984751224517822, "logps/chosen": -354.3673400878906, "logps/rejected": -206.9667510986328, "loss": 0.2671, "rewards/accuracies": 0.875, "rewards/chosen": -1.5646005868911743, "rewards/margins": 2.0903940200805664, "rewards/rejected": -3.6549947261810303, "step": 3706 }, { "epoch": 0.43, "learning_rate": 1.7322546356442657e-07, "logits/chosen": -2.411559581756592, "logits/rejected": -2.423661708831787, "logps/chosen": -258.35968017578125, "logps/rejected": -278.9551696777344, "loss": 0.273, "rewards/accuracies": 0.875, "rewards/chosen": -0.444812536239624, "rewards/margins": 1.675734043121338, "rewards/rejected": -2.120546579360962, "step": 3707 }, { "epoch": 0.43, "learning_rate": 1.731900318885083e-07, "logits/chosen": -2.3971192836761475, "logits/rejected": -2.1488118171691895, "logps/chosen": -191.97755432128906, "logps/rejected": -395.44842529296875, "loss": 0.4075, "rewards/accuracies": 0.75, "rewards/chosen": -0.7783681154251099, "rewards/margins": 1.7208970785140991, "rewards/rejected": -2.499265193939209, "step": 3708 }, { "epoch": 0.43, "learning_rate": 1.7315460021259007e-07, "logits/chosen": -2.4006502628326416, "logits/rejected": -2.5030553340911865, "logps/chosen": -243.72093200683594, "logps/rejected": -182.96682739257812, "loss": 0.6075, "rewards/accuracies": 0.625, "rewards/chosen": -1.0155597925186157, "rewards/margins": 0.6701515316963196, "rewards/rejected": -1.68571138381958, "step": 3709 }, { "epoch": 0.43, "learning_rate": 1.731191685366718e-07, "logits/chosen": -2.6574642658233643, "logits/rejected": -2.7064478397369385, "logps/chosen": -223.228515625, "logps/rejected": -237.41146850585938, "loss": 0.6371, "rewards/accuracies": 0.625, "rewards/chosen": -0.9970271587371826, "rewards/margins": 1.9122612476348877, "rewards/rejected": -2.9092884063720703, "step": 3710 }, { "epoch": 0.43, "learning_rate": 1.7308373686075352e-07, "logits/chosen": -2.391976833343506, "logits/rejected": -2.497063159942627, "logps/chosen": -295.07232666015625, "logps/rejected": -241.7061767578125, "loss": 0.3301, "rewards/accuracies": 0.75, "rewards/chosen": -0.8970012068748474, "rewards/margins": 2.3239829540252686, "rewards/rejected": -3.22098445892334, "step": 3711 }, { "epoch": 0.43, "learning_rate": 1.7304830518483524e-07, "logits/chosen": -2.6321370601654053, "logits/rejected": -2.775364637374878, "logps/chosen": -217.9580535888672, "logps/rejected": -236.94781494140625, "loss": 0.149, "rewards/accuracies": 0.875, "rewards/chosen": -0.3888705372810364, "rewards/margins": 4.132527828216553, "rewards/rejected": -4.521398544311523, "step": 3712 }, { "epoch": 0.43, "learning_rate": 1.7301287350891696e-07, "logits/chosen": -2.6394784450531006, "logits/rejected": -2.5765581130981445, "logps/chosen": -188.92958068847656, "logps/rejected": -187.55145263671875, "loss": 0.739, "rewards/accuracies": 0.75, "rewards/chosen": -1.3894834518432617, "rewards/margins": 1.9422335624694824, "rewards/rejected": -3.331716775894165, "step": 3713 }, { "epoch": 0.43, "learning_rate": 1.7297744183299868e-07, "logits/chosen": -1.965806484222412, "logits/rejected": -1.7914789915084839, "logps/chosen": -319.6033935546875, "logps/rejected": -335.74072265625, "loss": 0.3654, "rewards/accuracies": 0.75, "rewards/chosen": -0.9713230133056641, "rewards/margins": 1.536490797996521, "rewards/rejected": -2.5078139305114746, "step": 3714 }, { "epoch": 0.43, "learning_rate": 1.7294201015708043e-07, "logits/chosen": -2.0414977073669434, "logits/rejected": -1.8441251516342163, "logps/chosen": -302.99169921875, "logps/rejected": -284.26605224609375, "loss": 0.506, "rewards/accuracies": 0.625, "rewards/chosen": -0.7070732116699219, "rewards/margins": 0.9306473731994629, "rewards/rejected": -1.6377205848693848, "step": 3715 }, { "epoch": 0.43, "learning_rate": 1.7290657848116215e-07, "logits/chosen": -2.5921173095703125, "logits/rejected": -2.277160167694092, "logps/chosen": -298.1441345214844, "logps/rejected": -377.3199768066406, "loss": 0.1564, "rewards/accuracies": 1.0, "rewards/chosen": -0.6873561143875122, "rewards/margins": 3.1565096378326416, "rewards/rejected": -3.843865394592285, "step": 3716 }, { "epoch": 0.43, "learning_rate": 1.7287114680524388e-07, "logits/chosen": -2.6138858795166016, "logits/rejected": -2.3548648357391357, "logps/chosen": -308.98193359375, "logps/rejected": -352.6666564941406, "loss": 0.2856, "rewards/accuracies": 0.875, "rewards/chosen": -1.3538254499435425, "rewards/margins": 2.7473530769348145, "rewards/rejected": -4.1011786460876465, "step": 3717 }, { "epoch": 0.43, "learning_rate": 1.728357151293256e-07, "logits/chosen": -2.5532941818237305, "logits/rejected": -2.467545509338379, "logps/chosen": -221.00914001464844, "logps/rejected": -182.41798400878906, "loss": 0.339, "rewards/accuracies": 0.75, "rewards/chosen": -1.2797306776046753, "rewards/margins": 1.8898282051086426, "rewards/rejected": -3.1695590019226074, "step": 3718 }, { "epoch": 0.43, "learning_rate": 1.7280028345340732e-07, "logits/chosen": -1.7028050422668457, "logits/rejected": -2.285731077194214, "logps/chosen": -667.54052734375, "logps/rejected": -236.13845825195312, "loss": 0.2136, "rewards/accuracies": 1.0, "rewards/chosen": -0.5811326503753662, "rewards/margins": 2.1210849285125732, "rewards/rejected": -2.7022175788879395, "step": 3719 }, { "epoch": 0.43, "learning_rate": 1.7276485177748904e-07, "logits/chosen": -2.338312864303589, "logits/rejected": -2.547163963317871, "logps/chosen": -224.9838409423828, "logps/rejected": -209.48057556152344, "loss": 0.1432, "rewards/accuracies": 1.0, "rewards/chosen": -0.4627833068370819, "rewards/margins": 2.9405112266540527, "rewards/rejected": -3.403294086456299, "step": 3720 }, { "epoch": 0.43, "learning_rate": 1.7272942010157082e-07, "logits/chosen": -2.065648078918457, "logits/rejected": -2.5420784950256348, "logps/chosen": -657.2072143554688, "logps/rejected": -327.7154541015625, "loss": 0.0936, "rewards/accuracies": 1.0, "rewards/chosen": 0.005409598350524902, "rewards/margins": 3.625261068344116, "rewards/rejected": -3.619851589202881, "step": 3721 }, { "epoch": 0.43, "learning_rate": 1.7269398842565254e-07, "logits/chosen": -2.4185280799865723, "logits/rejected": -2.457672357559204, "logps/chosen": -246.00563049316406, "logps/rejected": -282.01727294921875, "loss": 0.2141, "rewards/accuracies": 0.875, "rewards/chosen": -0.5099341869354248, "rewards/margins": 3.4903016090393066, "rewards/rejected": -4.000235557556152, "step": 3722 }, { "epoch": 0.43, "learning_rate": 1.7265855674973426e-07, "logits/chosen": -2.240135669708252, "logits/rejected": -2.002568006515503, "logps/chosen": -176.00177001953125, "logps/rejected": -154.2832489013672, "loss": 0.3663, "rewards/accuracies": 0.875, "rewards/chosen": -0.14821940660476685, "rewards/margins": 1.4750512838363647, "rewards/rejected": -1.6232706308364868, "step": 3723 }, { "epoch": 0.43, "learning_rate": 1.7262312507381598e-07, "logits/chosen": -2.6076619625091553, "logits/rejected": -2.796152353286743, "logps/chosen": -211.88922119140625, "logps/rejected": -223.60079956054688, "loss": 0.5081, "rewards/accuracies": 0.75, "rewards/chosen": -1.2487589120864868, "rewards/margins": 1.393790364265442, "rewards/rejected": -2.6425490379333496, "step": 3724 }, { "epoch": 0.43, "learning_rate": 1.725876933978977e-07, "logits/chosen": -3.027440309524536, "logits/rejected": -2.908684730529785, "logps/chosen": -201.9524688720703, "logps/rejected": -231.9432373046875, "loss": 0.1933, "rewards/accuracies": 1.0, "rewards/chosen": -0.9422208666801453, "rewards/margins": 3.074080228805542, "rewards/rejected": -4.016301155090332, "step": 3725 }, { "epoch": 0.43, "learning_rate": 1.7255226172197945e-07, "logits/chosen": -2.1773812770843506, "logits/rejected": -2.0532565116882324, "logps/chosen": -306.9954833984375, "logps/rejected": -360.8705749511719, "loss": 0.1467, "rewards/accuracies": 1.0, "rewards/chosen": -1.1133252382278442, "rewards/margins": 3.4586222171783447, "rewards/rejected": -4.57194709777832, "step": 3726 }, { "epoch": 0.43, "learning_rate": 1.7251683004606118e-07, "logits/chosen": -2.944957733154297, "logits/rejected": -2.915398359298706, "logps/chosen": -220.62588500976562, "logps/rejected": -157.22938537597656, "loss": 0.7815, "rewards/accuracies": 0.5, "rewards/chosen": -1.2194383144378662, "rewards/margins": 0.9688323736190796, "rewards/rejected": -2.1882705688476562, "step": 3727 }, { "epoch": 0.43, "learning_rate": 1.724813983701429e-07, "logits/chosen": -2.7415571212768555, "logits/rejected": -2.64961576461792, "logps/chosen": -272.9709777832031, "logps/rejected": -193.2978973388672, "loss": 0.179, "rewards/accuracies": 1.0, "rewards/chosen": -1.505323052406311, "rewards/margins": 1.9852540493011475, "rewards/rejected": -3.490577220916748, "step": 3728 }, { "epoch": 0.43, "learning_rate": 1.7244596669422462e-07, "logits/chosen": -2.6818881034851074, "logits/rejected": -2.7878992557525635, "logps/chosen": -153.30621337890625, "logps/rejected": -172.41943359375, "loss": 0.5326, "rewards/accuracies": 0.625, "rewards/chosen": -0.9614912271499634, "rewards/margins": 1.2398717403411865, "rewards/rejected": -2.2013628482818604, "step": 3729 }, { "epoch": 0.43, "learning_rate": 1.7241053501830634e-07, "logits/chosen": -1.9672021865844727, "logits/rejected": -2.069403886795044, "logps/chosen": -390.5616455078125, "logps/rejected": -296.745849609375, "loss": 1.4922, "rewards/accuracies": 0.375, "rewards/chosen": -3.451507568359375, "rewards/margins": 0.4091653525829315, "rewards/rejected": -3.860672950744629, "step": 3730 }, { "epoch": 0.43, "learning_rate": 1.7237510334238806e-07, "logits/chosen": -2.519448757171631, "logits/rejected": -2.7333261966705322, "logps/chosen": -343.583984375, "logps/rejected": -231.3396759033203, "loss": 0.7934, "rewards/accuracies": 0.5, "rewards/chosen": -1.8787013292312622, "rewards/margins": 0.494794636964798, "rewards/rejected": -2.3734960556030273, "step": 3731 }, { "epoch": 0.43, "learning_rate": 1.7233967166646984e-07, "logits/chosen": -2.2560088634490967, "logits/rejected": -2.2314107418060303, "logps/chosen": -377.1141357421875, "logps/rejected": -310.6207580566406, "loss": 0.249, "rewards/accuracies": 0.875, "rewards/chosen": -0.7289540767669678, "rewards/margins": 2.2992656230926514, "rewards/rejected": -3.028219699859619, "step": 3732 }, { "epoch": 0.43, "learning_rate": 1.7230423999055156e-07, "logits/chosen": -2.3759357929229736, "logits/rejected": -2.716285228729248, "logps/chosen": -277.15863037109375, "logps/rejected": -303.8381042480469, "loss": 0.1405, "rewards/accuracies": 1.0, "rewards/chosen": -0.9000710248947144, "rewards/margins": 3.6192898750305176, "rewards/rejected": -4.5193610191345215, "step": 3733 }, { "epoch": 0.43, "learning_rate": 1.7226880831463328e-07, "logits/chosen": -1.8162533044815063, "logits/rejected": -1.8569942712783813, "logps/chosen": -388.8763427734375, "logps/rejected": -274.67974853515625, "loss": 0.4228, "rewards/accuracies": 0.75, "rewards/chosen": -0.9277607202529907, "rewards/margins": 1.353257417678833, "rewards/rejected": -2.281018018722534, "step": 3734 }, { "epoch": 0.43, "learning_rate": 1.72233376638715e-07, "logits/chosen": -2.659852981567383, "logits/rejected": -2.4328322410583496, "logps/chosen": -399.3940124511719, "logps/rejected": -344.71380615234375, "loss": 0.2261, "rewards/accuracies": 0.875, "rewards/chosen": -0.7002333998680115, "rewards/margins": 2.2400121688842773, "rewards/rejected": -2.9402458667755127, "step": 3735 }, { "epoch": 0.43, "learning_rate": 1.7219794496279673e-07, "logits/chosen": -2.7664642333984375, "logits/rejected": -2.6604623794555664, "logps/chosen": -124.04094696044922, "logps/rejected": -158.7914276123047, "loss": 0.3977, "rewards/accuracies": 0.875, "rewards/chosen": -0.6143667101860046, "rewards/margins": 1.2038018703460693, "rewards/rejected": -1.8181684017181396, "step": 3736 }, { "epoch": 0.43, "learning_rate": 1.7216251328687845e-07, "logits/chosen": -2.6661806106567383, "logits/rejected": -2.5586349964141846, "logps/chosen": -238.3379364013672, "logps/rejected": -231.2430877685547, "loss": 0.4086, "rewards/accuracies": 0.875, "rewards/chosen": -0.8307764530181885, "rewards/margins": 2.457336664199829, "rewards/rejected": -3.2881131172180176, "step": 3737 }, { "epoch": 0.43, "learning_rate": 1.721270816109602e-07, "logits/chosen": -2.105785608291626, "logits/rejected": -1.8706681728363037, "logps/chosen": -300.86761474609375, "logps/rejected": -347.4740295410156, "loss": 0.5658, "rewards/accuracies": 0.625, "rewards/chosen": -1.0396195650100708, "rewards/margins": 1.0815443992614746, "rewards/rejected": -2.121164083480835, "step": 3738 }, { "epoch": 0.43, "learning_rate": 1.7209164993504192e-07, "logits/chosen": -1.652765154838562, "logits/rejected": -2.1318516731262207, "logps/chosen": -379.6932373046875, "logps/rejected": -274.25689697265625, "loss": 0.2882, "rewards/accuracies": 0.75, "rewards/chosen": -0.770958662033081, "rewards/margins": 2.4208149909973145, "rewards/rejected": -3.1917738914489746, "step": 3739 }, { "epoch": 0.44, "learning_rate": 1.7205621825912364e-07, "logits/chosen": -1.6141008138656616, "logits/rejected": -2.035013198852539, "logps/chosen": -353.3890686035156, "logps/rejected": -267.54058837890625, "loss": 0.3227, "rewards/accuracies": 1.0, "rewards/chosen": -1.131401538848877, "rewards/margins": 2.1745893955230713, "rewards/rejected": -3.305990695953369, "step": 3740 }, { "epoch": 0.44, "learning_rate": 1.7202078658320537e-07, "logits/chosen": -2.8174548149108887, "logits/rejected": -2.592378854751587, "logps/chosen": -168.69192504882812, "logps/rejected": -236.2540740966797, "loss": 0.2657, "rewards/accuracies": 0.875, "rewards/chosen": -0.6635015606880188, "rewards/margins": 2.921603202819824, "rewards/rejected": -3.5851047039031982, "step": 3741 }, { "epoch": 0.44, "learning_rate": 1.719853549072871e-07, "logits/chosen": -2.064040422439575, "logits/rejected": -2.240743637084961, "logps/chosen": -369.3329162597656, "logps/rejected": -341.30145263671875, "loss": 0.5814, "rewards/accuracies": 0.75, "rewards/chosen": -0.5661029815673828, "rewards/margins": 1.2274757623672485, "rewards/rejected": -1.7935787439346313, "step": 3742 }, { "epoch": 0.44, "learning_rate": 1.719499232313688e-07, "logits/chosen": -1.7912862300872803, "logits/rejected": -1.890390157699585, "logps/chosen": -239.19192504882812, "logps/rejected": -240.91177368164062, "loss": 0.2584, "rewards/accuracies": 1.0, "rewards/chosen": -0.9479097723960876, "rewards/margins": 1.497235655784607, "rewards/rejected": -2.445145606994629, "step": 3743 }, { "epoch": 0.44, "learning_rate": 1.7191449155545058e-07, "logits/chosen": -2.304654598236084, "logits/rejected": -2.2825629711151123, "logps/chosen": -233.4609375, "logps/rejected": -323.0733642578125, "loss": 0.0813, "rewards/accuracies": 1.0, "rewards/chosen": -0.6426647901535034, "rewards/margins": 3.874387264251709, "rewards/rejected": -4.517051696777344, "step": 3744 }, { "epoch": 0.44, "learning_rate": 1.718790598795323e-07, "logits/chosen": -1.6577391624450684, "logits/rejected": -1.8928120136260986, "logps/chosen": -375.6813659667969, "logps/rejected": -297.20831298828125, "loss": 0.2241, "rewards/accuracies": 1.0, "rewards/chosen": -1.1766550540924072, "rewards/margins": 2.2279229164123535, "rewards/rejected": -3.4045779705047607, "step": 3745 }, { "epoch": 0.44, "learning_rate": 1.7184362820361403e-07, "logits/chosen": -2.5379116535186768, "logits/rejected": -2.7222719192504883, "logps/chosen": -316.79718017578125, "logps/rejected": -189.24615478515625, "loss": 0.8262, "rewards/accuracies": 0.875, "rewards/chosen": -1.4928467273712158, "rewards/margins": 0.5650280117988586, "rewards/rejected": -2.0578746795654297, "step": 3746 }, { "epoch": 0.44, "learning_rate": 1.7180819652769575e-07, "logits/chosen": -2.3402161598205566, "logits/rejected": -2.507727861404419, "logps/chosen": -370.5191650390625, "logps/rejected": -365.3287353515625, "loss": 0.7351, "rewards/accuracies": 0.625, "rewards/chosen": -1.5296844244003296, "rewards/margins": 1.0636063814163208, "rewards/rejected": -2.5932905673980713, "step": 3747 }, { "epoch": 0.44, "learning_rate": 1.7177276485177747e-07, "logits/chosen": -2.437608003616333, "logits/rejected": -2.531290054321289, "logps/chosen": -374.33868408203125, "logps/rejected": -344.0832214355469, "loss": 0.2847, "rewards/accuracies": 0.75, "rewards/chosen": -1.463425636291504, "rewards/margins": 2.3477416038513184, "rewards/rejected": -3.811166763305664, "step": 3748 }, { "epoch": 0.44, "learning_rate": 1.7173733317585922e-07, "logits/chosen": -2.2733047008514404, "logits/rejected": -2.3666274547576904, "logps/chosen": -168.85377502441406, "logps/rejected": -153.43067932128906, "loss": 0.7193, "rewards/accuracies": 0.5, "rewards/chosen": -1.449173927307129, "rewards/margins": 1.6674580574035645, "rewards/rejected": -3.1166317462921143, "step": 3749 }, { "epoch": 0.44, "learning_rate": 1.7170190149994094e-07, "logits/chosen": -1.8260974884033203, "logits/rejected": -2.141425371170044, "logps/chosen": -307.3590087890625, "logps/rejected": -267.62139892578125, "loss": 0.3974, "rewards/accuracies": 0.875, "rewards/chosen": -0.7419998049736023, "rewards/margins": 1.3440253734588623, "rewards/rejected": -2.0860252380371094, "step": 3750 }, { "epoch": 0.44, "learning_rate": 1.7166646982402267e-07, "logits/chosen": -2.31095290184021, "logits/rejected": -2.496816396713257, "logps/chosen": -266.34405517578125, "logps/rejected": -218.66644287109375, "loss": 0.7084, "rewards/accuracies": 0.625, "rewards/chosen": -0.7933177947998047, "rewards/margins": 1.474044680595398, "rewards/rejected": -2.267362594604492, "step": 3751 }, { "epoch": 0.44, "learning_rate": 1.716310381481044e-07, "logits/chosen": -1.9681038856506348, "logits/rejected": -1.9804086685180664, "logps/chosen": -153.40902709960938, "logps/rejected": -185.9090576171875, "loss": 0.5293, "rewards/accuracies": 0.75, "rewards/chosen": -0.7002599835395813, "rewards/margins": 1.4783798456192017, "rewards/rejected": -2.1786398887634277, "step": 3752 }, { "epoch": 0.44, "learning_rate": 1.715956064721861e-07, "logits/chosen": -2.234421730041504, "logits/rejected": -2.400183916091919, "logps/chosen": -280.2693786621094, "logps/rejected": -307.4417419433594, "loss": 0.1735, "rewards/accuracies": 1.0, "rewards/chosen": -0.5054105520248413, "rewards/margins": 3.0307154655456543, "rewards/rejected": -3.536125898361206, "step": 3753 }, { "epoch": 0.44, "learning_rate": 1.7156017479626783e-07, "logits/chosen": -2.7145230770111084, "logits/rejected": -2.8044052124023438, "logps/chosen": -188.31272888183594, "logps/rejected": -157.29623413085938, "loss": 0.3696, "rewards/accuracies": 0.75, "rewards/chosen": -0.8319178223609924, "rewards/margins": 2.4807350635528564, "rewards/rejected": -3.312653064727783, "step": 3754 }, { "epoch": 0.44, "learning_rate": 1.7152474312034958e-07, "logits/chosen": -2.4802496433258057, "logits/rejected": -2.7808759212493896, "logps/chosen": -285.4593200683594, "logps/rejected": -299.30792236328125, "loss": 0.3339, "rewards/accuracies": 0.875, "rewards/chosen": -0.5036831498146057, "rewards/margins": 1.7123324871063232, "rewards/rejected": -2.2160158157348633, "step": 3755 }, { "epoch": 0.44, "learning_rate": 1.7148931144443133e-07, "logits/chosen": -2.574021100997925, "logits/rejected": -2.441335916519165, "logps/chosen": -346.53619384765625, "logps/rejected": -338.0230712890625, "loss": 0.6001, "rewards/accuracies": 0.625, "rewards/chosen": -1.119659423828125, "rewards/margins": 1.4179264307022095, "rewards/rejected": -2.537585735321045, "step": 3756 }, { "epoch": 0.44, "learning_rate": 1.7145387976851305e-07, "logits/chosen": -2.50264835357666, "logits/rejected": -2.5166878700256348, "logps/chosen": -142.37432861328125, "logps/rejected": -176.509033203125, "loss": 0.4811, "rewards/accuracies": 0.875, "rewards/chosen": -0.7571601867675781, "rewards/margins": 2.1136577129364014, "rewards/rejected": -2.8708176612854004, "step": 3757 }, { "epoch": 0.44, "learning_rate": 1.7141844809259477e-07, "logits/chosen": -2.1976494789123535, "logits/rejected": -2.239157199859619, "logps/chosen": -168.4442138671875, "logps/rejected": -196.59994506835938, "loss": 0.4041, "rewards/accuracies": 0.75, "rewards/chosen": -0.17015379667282104, "rewards/margins": 0.9558542966842651, "rewards/rejected": -1.1260080337524414, "step": 3758 }, { "epoch": 0.44, "learning_rate": 1.713830164166765e-07, "logits/chosen": -3.0385544300079346, "logits/rejected": -2.985635280609131, "logps/chosen": -325.5106201171875, "logps/rejected": -247.2646026611328, "loss": 0.2774, "rewards/accuracies": 0.75, "rewards/chosen": -0.7209484577178955, "rewards/margins": 2.325742721557617, "rewards/rejected": -3.0466909408569336, "step": 3759 }, { "epoch": 0.44, "learning_rate": 1.7134758474075824e-07, "logits/chosen": -2.216569185256958, "logits/rejected": -2.5557949542999268, "logps/chosen": -473.6844177246094, "logps/rejected": -337.4504089355469, "loss": 0.3678, "rewards/accuracies": 0.75, "rewards/chosen": -1.43375563621521, "rewards/margins": 2.153794050216675, "rewards/rejected": -3.5875496864318848, "step": 3760 }, { "epoch": 0.44, "learning_rate": 1.7131215306483997e-07, "logits/chosen": -2.2817442417144775, "logits/rejected": -2.2967941761016846, "logps/chosen": -178.56314086914062, "logps/rejected": -238.62457275390625, "loss": 0.2831, "rewards/accuracies": 0.875, "rewards/chosen": -0.2943980097770691, "rewards/margins": 2.636892557144165, "rewards/rejected": -2.9312903881073, "step": 3761 }, { "epoch": 0.44, "learning_rate": 1.712767213889217e-07, "logits/chosen": -1.9332555532455444, "logits/rejected": -2.3321938514709473, "logps/chosen": -599.8321533203125, "logps/rejected": -328.58221435546875, "loss": 0.2275, "rewards/accuracies": 0.875, "rewards/chosen": -0.8049410581588745, "rewards/margins": 2.013509750366211, "rewards/rejected": -2.818450927734375, "step": 3762 }, { "epoch": 0.44, "learning_rate": 1.712412897130034e-07, "logits/chosen": -2.0838022232055664, "logits/rejected": -2.046811580657959, "logps/chosen": -236.88043212890625, "logps/rejected": -331.28875732421875, "loss": 0.3506, "rewards/accuracies": 0.75, "rewards/chosen": -0.6956567168235779, "rewards/margins": 1.7534615993499756, "rewards/rejected": -2.4491183757781982, "step": 3763 }, { "epoch": 0.44, "learning_rate": 1.7120585803708513e-07, "logits/chosen": -2.398810863494873, "logits/rejected": -2.5273096561431885, "logps/chosen": -380.80364990234375, "logps/rejected": -240.86607360839844, "loss": 0.4475, "rewards/accuracies": 0.625, "rewards/chosen": -0.8784935474395752, "rewards/margins": 1.4033005237579346, "rewards/rejected": -2.2817940711975098, "step": 3764 }, { "epoch": 0.44, "learning_rate": 1.7117042636116686e-07, "logits/chosen": -2.118960380554199, "logits/rejected": -2.149174213409424, "logps/chosen": -259.4793701171875, "logps/rejected": -194.9567108154297, "loss": 0.3017, "rewards/accuracies": 0.875, "rewards/chosen": -0.5692083835601807, "rewards/margins": 2.2336740493774414, "rewards/rejected": -2.802882432937622, "step": 3765 }, { "epoch": 0.44, "learning_rate": 1.7113499468524858e-07, "logits/chosen": -2.107173204421997, "logits/rejected": -1.9755395650863647, "logps/chosen": -374.44677734375, "logps/rejected": -382.8333435058594, "loss": 0.1542, "rewards/accuracies": 1.0, "rewards/chosen": -0.695237398147583, "rewards/margins": 2.859785795211792, "rewards/rejected": -3.555023193359375, "step": 3766 }, { "epoch": 0.44, "learning_rate": 1.7109956300933035e-07, "logits/chosen": -2.025876045227051, "logits/rejected": -1.855359435081482, "logps/chosen": -384.7864990234375, "logps/rejected": -324.9702453613281, "loss": 0.4329, "rewards/accuracies": 0.625, "rewards/chosen": -0.4192999005317688, "rewards/margins": 1.4119718074798584, "rewards/rejected": -1.8312718868255615, "step": 3767 }, { "epoch": 0.44, "learning_rate": 1.7106413133341207e-07, "logits/chosen": -2.419177770614624, "logits/rejected": -2.533783435821533, "logps/chosen": -229.5596923828125, "logps/rejected": -118.90814971923828, "loss": 1.7951, "rewards/accuracies": 0.625, "rewards/chosen": -2.095093011856079, "rewards/margins": -0.29753679037094116, "rewards/rejected": -1.7975562810897827, "step": 3768 }, { "epoch": 0.44, "learning_rate": 1.710286996574938e-07, "logits/chosen": -1.7026597261428833, "logits/rejected": -1.985854983329773, "logps/chosen": -381.9714050292969, "logps/rejected": -221.5395965576172, "loss": 0.6559, "rewards/accuracies": 0.625, "rewards/chosen": -1.4126040935516357, "rewards/margins": 1.198707938194275, "rewards/rejected": -2.611311912536621, "step": 3769 }, { "epoch": 0.44, "learning_rate": 1.7099326798157552e-07, "logits/chosen": -2.6705987453460693, "logits/rejected": -2.822188138961792, "logps/chosen": -308.56658935546875, "logps/rejected": -303.24285888671875, "loss": 0.1802, "rewards/accuracies": 1.0, "rewards/chosen": -0.9306091666221619, "rewards/margins": 3.848820447921753, "rewards/rejected": -4.7794294357299805, "step": 3770 }, { "epoch": 0.44, "learning_rate": 1.7095783630565727e-07, "logits/chosen": -2.691370964050293, "logits/rejected": -2.7330918312072754, "logps/chosen": -393.5550537109375, "logps/rejected": -242.232666015625, "loss": 0.4034, "rewards/accuracies": 0.75, "rewards/chosen": -0.3191079795360565, "rewards/margins": 1.3799216747283936, "rewards/rejected": -1.6990296840667725, "step": 3771 }, { "epoch": 0.44, "learning_rate": 1.70922404629739e-07, "logits/chosen": -2.1604669094085693, "logits/rejected": -2.2781217098236084, "logps/chosen": -243.04519653320312, "logps/rejected": -311.46893310546875, "loss": 0.5547, "rewards/accuracies": 0.625, "rewards/chosen": -1.6873865127563477, "rewards/margins": 1.1615521907806396, "rewards/rejected": -2.8489387035369873, "step": 3772 }, { "epoch": 0.44, "learning_rate": 1.708869729538207e-07, "logits/chosen": -2.8215832710266113, "logits/rejected": -2.7337865829467773, "logps/chosen": -291.673583984375, "logps/rejected": -281.651123046875, "loss": 0.4424, "rewards/accuracies": 0.875, "rewards/chosen": -0.9925156831741333, "rewards/margins": 3.1228511333465576, "rewards/rejected": -4.1153669357299805, "step": 3773 }, { "epoch": 0.44, "learning_rate": 1.7085154127790243e-07, "logits/chosen": -2.391026020050049, "logits/rejected": -2.370880603790283, "logps/chosen": -417.5521240234375, "logps/rejected": -322.7779235839844, "loss": 0.4944, "rewards/accuracies": 0.75, "rewards/chosen": -0.3103489875793457, "rewards/margins": 0.9004037976264954, "rewards/rejected": -1.2107528448104858, "step": 3774 }, { "epoch": 0.44, "learning_rate": 1.7081610960198416e-07, "logits/chosen": -2.6042723655700684, "logits/rejected": -2.582612991333008, "logps/chosen": -101.27119445800781, "logps/rejected": -201.7843017578125, "loss": 0.2097, "rewards/accuracies": 0.875, "rewards/chosen": -0.5204496383666992, "rewards/margins": 3.3009033203125, "rewards/rejected": -3.821352958679199, "step": 3775 }, { "epoch": 0.44, "learning_rate": 1.7078067792606588e-07, "logits/chosen": -1.8413634300231934, "logits/rejected": -2.11630916595459, "logps/chosen": -274.393798828125, "logps/rejected": -170.50503540039062, "loss": 0.6766, "rewards/accuracies": 0.625, "rewards/chosen": -0.4597012996673584, "rewards/margins": 0.5728222131729126, "rewards/rejected": -1.032523512840271, "step": 3776 }, { "epoch": 0.44, "learning_rate": 1.707452462501476e-07, "logits/chosen": -2.4838674068450928, "logits/rejected": -2.206958532333374, "logps/chosen": -171.67654418945312, "logps/rejected": -316.51708984375, "loss": 0.3382, "rewards/accuracies": 0.875, "rewards/chosen": -1.1162574291229248, "rewards/margins": 2.1751821041107178, "rewards/rejected": -3.2914395332336426, "step": 3777 }, { "epoch": 0.44, "learning_rate": 1.7070981457422935e-07, "logits/chosen": -2.316068649291992, "logits/rejected": -2.5531888008117676, "logps/chosen": -239.25057983398438, "logps/rejected": -154.2504119873047, "loss": 0.305, "rewards/accuracies": 0.875, "rewards/chosen": -0.7143497467041016, "rewards/margins": 1.6597325801849365, "rewards/rejected": -2.374082088470459, "step": 3778 }, { "epoch": 0.44, "learning_rate": 1.706743828983111e-07, "logits/chosen": -1.8977967500686646, "logits/rejected": -2.0684292316436768, "logps/chosen": -289.68499755859375, "logps/rejected": -273.0801696777344, "loss": 0.4918, "rewards/accuracies": 0.875, "rewards/chosen": -0.7247843742370605, "rewards/margins": 0.9484382271766663, "rewards/rejected": -1.673222541809082, "step": 3779 }, { "epoch": 0.44, "learning_rate": 1.7063895122239282e-07, "logits/chosen": -2.3799855709075928, "logits/rejected": -2.4277968406677246, "logps/chosen": -211.29977416992188, "logps/rejected": -344.5084533691406, "loss": 0.5896, "rewards/accuracies": 0.75, "rewards/chosen": -0.5603629946708679, "rewards/margins": 2.235182285308838, "rewards/rejected": -2.7955453395843506, "step": 3780 }, { "epoch": 0.44, "learning_rate": 1.7060351954647454e-07, "logits/chosen": -2.541721820831299, "logits/rejected": -2.463068723678589, "logps/chosen": -389.0673828125, "logps/rejected": -282.7577819824219, "loss": 0.2377, "rewards/accuracies": 0.875, "rewards/chosen": -0.8875350952148438, "rewards/margins": 1.9013620615005493, "rewards/rejected": -2.7888970375061035, "step": 3781 }, { "epoch": 0.44, "learning_rate": 1.7056808787055626e-07, "logits/chosen": -2.552694320678711, "logits/rejected": -2.7252066135406494, "logps/chosen": -256.26812744140625, "logps/rejected": -221.91213989257812, "loss": 0.0508, "rewards/accuracies": 1.0, "rewards/chosen": -0.2729310691356659, "rewards/margins": 3.9842653274536133, "rewards/rejected": -4.257196426391602, "step": 3782 }, { "epoch": 0.44, "learning_rate": 1.70532656194638e-07, "logits/chosen": -2.44295597076416, "logits/rejected": -2.4547760486602783, "logps/chosen": -196.97120666503906, "logps/rejected": -257.48553466796875, "loss": 0.4387, "rewards/accuracies": 0.75, "rewards/chosen": -1.2553638219833374, "rewards/margins": 2.1991448402404785, "rewards/rejected": -3.4545087814331055, "step": 3783 }, { "epoch": 0.44, "learning_rate": 1.7049722451871973e-07, "logits/chosen": -1.771080493927002, "logits/rejected": -2.0155606269836426, "logps/chosen": -573.7744140625, "logps/rejected": -309.09027099609375, "loss": 0.3691, "rewards/accuracies": 0.75, "rewards/chosen": -1.2098865509033203, "rewards/margins": 1.635573148727417, "rewards/rejected": -2.8454599380493164, "step": 3784 }, { "epoch": 0.44, "learning_rate": 1.7046179284280146e-07, "logits/chosen": -2.750241279602051, "logits/rejected": -2.6381092071533203, "logps/chosen": -181.6207275390625, "logps/rejected": -310.04962158203125, "loss": 0.4858, "rewards/accuracies": 0.75, "rewards/chosen": -0.5318410396575928, "rewards/margins": 1.692322015762329, "rewards/rejected": -2.224163055419922, "step": 3785 }, { "epoch": 0.44, "learning_rate": 1.7042636116688318e-07, "logits/chosen": -2.147456645965576, "logits/rejected": -2.265768527984619, "logps/chosen": -326.4219970703125, "logps/rejected": -337.4965515136719, "loss": 0.8649, "rewards/accuracies": 0.375, "rewards/chosen": -1.0378087759017944, "rewards/margins": 0.4177129864692688, "rewards/rejected": -1.455521821975708, "step": 3786 }, { "epoch": 0.44, "learning_rate": 1.703909294909649e-07, "logits/chosen": -2.4669437408447266, "logits/rejected": -2.426112651824951, "logps/chosen": -123.30867767333984, "logps/rejected": -209.21609497070312, "loss": 0.3848, "rewards/accuracies": 0.875, "rewards/chosen": -0.6679010987281799, "rewards/margins": 2.6773533821105957, "rewards/rejected": -3.345254421234131, "step": 3787 }, { "epoch": 0.44, "learning_rate": 1.7035549781504662e-07, "logits/chosen": -1.6903389692306519, "logits/rejected": -1.818345069885254, "logps/chosen": -372.83050537109375, "logps/rejected": -348.4471130371094, "loss": 0.5429, "rewards/accuracies": 0.75, "rewards/chosen": -1.5003893375396729, "rewards/margins": 1.194337248802185, "rewards/rejected": -2.6947264671325684, "step": 3788 }, { "epoch": 0.44, "learning_rate": 1.7032006613912837e-07, "logits/chosen": -2.6125450134277344, "logits/rejected": -2.5782251358032227, "logps/chosen": -352.9016418457031, "logps/rejected": -274.00128173828125, "loss": 0.3472, "rewards/accuracies": 0.875, "rewards/chosen": -0.86961430311203, "rewards/margins": 1.9341182708740234, "rewards/rejected": -2.8037328720092773, "step": 3789 }, { "epoch": 0.44, "learning_rate": 1.702846344632101e-07, "logits/chosen": -1.9954301118850708, "logits/rejected": -1.6997146606445312, "logps/chosen": -207.79861450195312, "logps/rejected": -411.94195556640625, "loss": 0.4243, "rewards/accuracies": 0.75, "rewards/chosen": -0.90308678150177, "rewards/margins": 2.4553451538085938, "rewards/rejected": -3.358431816101074, "step": 3790 }, { "epoch": 0.44, "learning_rate": 1.7024920278729184e-07, "logits/chosen": -2.3168773651123047, "logits/rejected": -2.3801403045654297, "logps/chosen": -286.3769226074219, "logps/rejected": -259.5494384765625, "loss": 0.2446, "rewards/accuracies": 0.875, "rewards/chosen": -2.1294069290161133, "rewards/margins": 2.508059501647949, "rewards/rejected": -4.6374664306640625, "step": 3791 }, { "epoch": 0.44, "learning_rate": 1.7021377111137356e-07, "logits/chosen": -2.6780409812927246, "logits/rejected": -2.3034110069274902, "logps/chosen": -192.69677734375, "logps/rejected": -338.19683837890625, "loss": 0.3384, "rewards/accuracies": 0.75, "rewards/chosen": -0.4733058214187622, "rewards/margins": 1.9423298835754395, "rewards/rejected": -2.415635585784912, "step": 3792 }, { "epoch": 0.44, "learning_rate": 1.7017833943545529e-07, "logits/chosen": -1.7744717597961426, "logits/rejected": -1.7573881149291992, "logps/chosen": -442.5962829589844, "logps/rejected": -363.1497497558594, "loss": 0.7051, "rewards/accuracies": 0.75, "rewards/chosen": -2.302548408508301, "rewards/margins": 1.5919055938720703, "rewards/rejected": -3.894454002380371, "step": 3793 }, { "epoch": 0.44, "learning_rate": 1.7014290775953703e-07, "logits/chosen": -2.6654059886932373, "logits/rejected": -2.80353045463562, "logps/chosen": -242.20758056640625, "logps/rejected": -288.70123291015625, "loss": 0.3068, "rewards/accuracies": 0.875, "rewards/chosen": -0.49731239676475525, "rewards/margins": 2.529067277908325, "rewards/rejected": -3.0263798236846924, "step": 3794 }, { "epoch": 0.44, "learning_rate": 1.7010747608361876e-07, "logits/chosen": -1.3455537557601929, "logits/rejected": -1.25901460647583, "logps/chosen": -433.4952697753906, "logps/rejected": -465.3505859375, "loss": 0.2013, "rewards/accuracies": 0.875, "rewards/chosen": -0.47454094886779785, "rewards/margins": 2.747326612472534, "rewards/rejected": -3.221867561340332, "step": 3795 }, { "epoch": 0.44, "learning_rate": 1.7007204440770048e-07, "logits/chosen": -2.9750380516052246, "logits/rejected": -3.0171055793762207, "logps/chosen": -194.07867431640625, "logps/rejected": -265.92193603515625, "loss": 0.2705, "rewards/accuracies": 0.75, "rewards/chosen": -1.0400372743606567, "rewards/margins": 2.950512170791626, "rewards/rejected": -3.990549325942993, "step": 3796 }, { "epoch": 0.44, "learning_rate": 1.700366127317822e-07, "logits/chosen": -2.4030394554138184, "logits/rejected": -2.34592866897583, "logps/chosen": -280.1178283691406, "logps/rejected": -336.9311218261719, "loss": 0.1286, "rewards/accuracies": 1.0, "rewards/chosen": -0.8290759325027466, "rewards/margins": 2.9297900199890137, "rewards/rejected": -3.7588655948638916, "step": 3797 }, { "epoch": 0.44, "learning_rate": 1.7000118105586392e-07, "logits/chosen": -2.0979678630828857, "logits/rejected": -2.384960651397705, "logps/chosen": -264.5905456542969, "logps/rejected": -187.75953674316406, "loss": 0.3361, "rewards/accuracies": 0.875, "rewards/chosen": -0.739276647567749, "rewards/margins": 1.5950044393539429, "rewards/rejected": -2.3342809677124023, "step": 3798 }, { "epoch": 0.44, "learning_rate": 1.6996574937994565e-07, "logits/chosen": -2.6050305366516113, "logits/rejected": -2.4710755348205566, "logps/chosen": -251.95994567871094, "logps/rejected": -289.75982666015625, "loss": 0.1, "rewards/accuracies": 1.0, "rewards/chosen": -0.5248978137969971, "rewards/margins": 4.193580150604248, "rewards/rejected": -4.718477725982666, "step": 3799 }, { "epoch": 0.44, "learning_rate": 1.699303177040274e-07, "logits/chosen": -1.9977912902832031, "logits/rejected": -2.385557174682617, "logps/chosen": -376.3791198730469, "logps/rejected": -360.1363525390625, "loss": 0.1617, "rewards/accuracies": 0.875, "rewards/chosen": -0.7329797148704529, "rewards/margins": 3.9863181114196777, "rewards/rejected": -4.719297885894775, "step": 3800 }, { "epoch": 0.44, "learning_rate": 1.6989488602810912e-07, "logits/chosen": -1.998733639717102, "logits/rejected": -2.0676770210266113, "logps/chosen": -229.2128448486328, "logps/rejected": -248.56346130371094, "loss": 0.4845, "rewards/accuracies": 0.75, "rewards/chosen": -0.8291890621185303, "rewards/margins": 1.2897917032241821, "rewards/rejected": -2.118980884552002, "step": 3801 }, { "epoch": 0.44, "learning_rate": 1.6985945435219086e-07, "logits/chosen": -2.1137795448303223, "logits/rejected": -2.646155834197998, "logps/chosen": -452.292724609375, "logps/rejected": -245.48651123046875, "loss": 0.3524, "rewards/accuracies": 0.875, "rewards/chosen": -0.49639061093330383, "rewards/margins": 2.143827199935913, "rewards/rejected": -2.6402180194854736, "step": 3802 }, { "epoch": 0.44, "learning_rate": 1.698240226762726e-07, "logits/chosen": -2.749824047088623, "logits/rejected": -2.762599468231201, "logps/chosen": -105.44390106201172, "logps/rejected": -176.61407470703125, "loss": 0.8021, "rewards/accuracies": 0.5, "rewards/chosen": -1.0743046998977661, "rewards/margins": 1.1487983465194702, "rewards/rejected": -2.2231030464172363, "step": 3803 }, { "epoch": 0.44, "learning_rate": 1.697885910003543e-07, "logits/chosen": -2.345707416534424, "logits/rejected": -2.4009432792663574, "logps/chosen": -182.53590393066406, "logps/rejected": -226.47857666015625, "loss": 0.3841, "rewards/accuracies": 0.75, "rewards/chosen": -1.119269847869873, "rewards/margins": 2.193181037902832, "rewards/rejected": -3.312450885772705, "step": 3804 }, { "epoch": 0.44, "learning_rate": 1.6975315932443606e-07, "logits/chosen": -1.9573720693588257, "logits/rejected": -1.7582001686096191, "logps/chosen": -105.86338806152344, "logps/rejected": -271.5712585449219, "loss": 0.3033, "rewards/accuracies": 0.875, "rewards/chosen": -0.16308534145355225, "rewards/margins": 2.8431951999664307, "rewards/rejected": -3.0062804222106934, "step": 3805 }, { "epoch": 0.44, "learning_rate": 1.6971772764851778e-07, "logits/chosen": -1.6990699768066406, "logits/rejected": -1.642216682434082, "logps/chosen": -270.33465576171875, "logps/rejected": -355.02642822265625, "loss": 0.5785, "rewards/accuracies": 0.625, "rewards/chosen": -0.5129654407501221, "rewards/margins": 1.2147226333618164, "rewards/rejected": -1.7276880741119385, "step": 3806 }, { "epoch": 0.44, "learning_rate": 1.696822959725995e-07, "logits/chosen": -1.8806803226470947, "logits/rejected": -1.9546822309494019, "logps/chosen": -272.9195251464844, "logps/rejected": -348.9990234375, "loss": 0.3218, "rewards/accuracies": 0.75, "rewards/chosen": -1.192997932434082, "rewards/margins": 2.1266751289367676, "rewards/rejected": -3.3196730613708496, "step": 3807 }, { "epoch": 0.44, "learning_rate": 1.6964686429668122e-07, "logits/chosen": -2.0564887523651123, "logits/rejected": -2.413443088531494, "logps/chosen": -301.5754089355469, "logps/rejected": -246.79257202148438, "loss": 0.5525, "rewards/accuracies": 0.625, "rewards/chosen": -0.9160251617431641, "rewards/margins": 2.058910369873047, "rewards/rejected": -2.974936008453369, "step": 3808 }, { "epoch": 0.44, "learning_rate": 1.6961143262076295e-07, "logits/chosen": -2.3290672302246094, "logits/rejected": -2.4375576972961426, "logps/chosen": -253.19122314453125, "logps/rejected": -282.9997253417969, "loss": 0.2847, "rewards/accuracies": 0.75, "rewards/chosen": -0.5727555751800537, "rewards/margins": 3.100311040878296, "rewards/rejected": -3.6730666160583496, "step": 3809 }, { "epoch": 0.44, "learning_rate": 1.6957600094484467e-07, "logits/chosen": -2.46608567237854, "logits/rejected": -2.53562331199646, "logps/chosen": -306.95404052734375, "logps/rejected": -348.24993896484375, "loss": 0.5591, "rewards/accuracies": 0.75, "rewards/chosen": -0.8656816482543945, "rewards/margins": 0.485037624835968, "rewards/rejected": -1.3507193326950073, "step": 3810 }, { "epoch": 0.44, "learning_rate": 1.695405692689264e-07, "logits/chosen": -2.3709793090820312, "logits/rejected": -2.5072054862976074, "logps/chosen": -229.4619140625, "logps/rejected": -313.1302795410156, "loss": 0.3482, "rewards/accuracies": 0.75, "rewards/chosen": -0.699439287185669, "rewards/margins": 2.894807815551758, "rewards/rejected": -3.5942466259002686, "step": 3811 }, { "epoch": 0.44, "learning_rate": 1.6950513759300814e-07, "logits/chosen": -2.248223304748535, "logits/rejected": -2.088867425918579, "logps/chosen": -184.47830200195312, "logps/rejected": -277.65435791015625, "loss": 0.6972, "rewards/accuracies": 0.75, "rewards/chosen": -0.6227772831916809, "rewards/margins": 0.8355299830436707, "rewards/rejected": -1.4583073854446411, "step": 3812 }, { "epoch": 0.44, "learning_rate": 1.6946970591708986e-07, "logits/chosen": -2.9243359565734863, "logits/rejected": -2.6137940883636475, "logps/chosen": -648.00732421875, "logps/rejected": -369.20367431640625, "loss": 0.2215, "rewards/accuracies": 0.875, "rewards/chosen": -0.8166504502296448, "rewards/margins": 3.634876012802124, "rewards/rejected": -4.451526641845703, "step": 3813 }, { "epoch": 0.44, "learning_rate": 1.694342742411716e-07, "logits/chosen": -1.915997862815857, "logits/rejected": -1.8202357292175293, "logps/chosen": -331.0302734375, "logps/rejected": -322.8487548828125, "loss": 0.4651, "rewards/accuracies": 0.75, "rewards/chosen": -1.5964080095291138, "rewards/margins": 1.3518257141113281, "rewards/rejected": -2.9482338428497314, "step": 3814 }, { "epoch": 0.44, "learning_rate": 1.6939884256525333e-07, "logits/chosen": -2.5051538944244385, "logits/rejected": -2.427781343460083, "logps/chosen": -407.1469421386719, "logps/rejected": -401.8533630371094, "loss": 0.5027, "rewards/accuracies": 0.75, "rewards/chosen": -1.0337773561477661, "rewards/margins": 2.84604549407959, "rewards/rejected": -3.8798229694366455, "step": 3815 }, { "epoch": 0.44, "learning_rate": 1.6936341088933508e-07, "logits/chosen": -2.479362726211548, "logits/rejected": -2.6405086517333984, "logps/chosen": -266.91314697265625, "logps/rejected": -330.23004150390625, "loss": 0.2021, "rewards/accuracies": 1.0, "rewards/chosen": -0.8573217391967773, "rewards/margins": 2.789606809616089, "rewards/rejected": -3.646928310394287, "step": 3816 }, { "epoch": 0.44, "learning_rate": 1.693279792134168e-07, "logits/chosen": -2.3473703861236572, "logits/rejected": -2.3644118309020996, "logps/chosen": -192.52603149414062, "logps/rejected": -246.68521118164062, "loss": 0.5147, "rewards/accuracies": 0.75, "rewards/chosen": -0.7844061851501465, "rewards/margins": 3.8912973403930664, "rewards/rejected": -4.675703525543213, "step": 3817 }, { "epoch": 0.44, "learning_rate": 1.6929254753749852e-07, "logits/chosen": -2.5933048725128174, "logits/rejected": -2.5572397708892822, "logps/chosen": -149.1268310546875, "logps/rejected": -188.1115264892578, "loss": 1.0319, "rewards/accuracies": 0.75, "rewards/chosen": -2.248836040496826, "rewards/margins": 1.2450098991394043, "rewards/rejected": -3.4938457012176514, "step": 3818 }, { "epoch": 0.44, "learning_rate": 1.6925711586158025e-07, "logits/chosen": -2.6353275775909424, "logits/rejected": -2.4235434532165527, "logps/chosen": -141.26608276367188, "logps/rejected": -223.60427856445312, "loss": 0.5521, "rewards/accuracies": 0.75, "rewards/chosen": -1.7224225997924805, "rewards/margins": 1.6562871932983398, "rewards/rejected": -3.3787097930908203, "step": 3819 }, { "epoch": 0.44, "learning_rate": 1.6922168418566197e-07, "logits/chosen": -1.861281156539917, "logits/rejected": -2.05714750289917, "logps/chosen": -523.281005859375, "logps/rejected": -352.99615478515625, "loss": 0.3111, "rewards/accuracies": 0.875, "rewards/chosen": -0.658854603767395, "rewards/margins": 2.544095754623413, "rewards/rejected": -3.2029502391815186, "step": 3820 }, { "epoch": 0.44, "learning_rate": 1.691862525097437e-07, "logits/chosen": -1.50089430809021, "logits/rejected": -1.8316129446029663, "logps/chosen": -678.7535400390625, "logps/rejected": -551.0390625, "loss": 0.3514, "rewards/accuracies": 0.875, "rewards/chosen": -0.5951563119888306, "rewards/margins": 2.3509392738342285, "rewards/rejected": -2.9460954666137695, "step": 3821 }, { "epoch": 0.44, "learning_rate": 1.691508208338254e-07, "logits/chosen": -2.340848445892334, "logits/rejected": -2.574352502822876, "logps/chosen": -292.50750732421875, "logps/rejected": -245.32638549804688, "loss": 0.9604, "rewards/accuracies": 0.625, "rewards/chosen": -2.042235851287842, "rewards/margins": 0.6551424264907837, "rewards/rejected": -2.697378158569336, "step": 3822 }, { "epoch": 0.44, "learning_rate": 1.6911538915790716e-07, "logits/chosen": -2.955321788787842, "logits/rejected": -2.7621734142303467, "logps/chosen": -266.4412841796875, "logps/rejected": -286.2760009765625, "loss": 0.3938, "rewards/accuracies": 0.75, "rewards/chosen": -1.0673372745513916, "rewards/margins": 2.9986519813537598, "rewards/rejected": -4.065989017486572, "step": 3823 }, { "epoch": 0.44, "learning_rate": 1.6907995748198888e-07, "logits/chosen": -2.2558627128601074, "logits/rejected": -2.4350106716156006, "logps/chosen": -321.59381103515625, "logps/rejected": -243.19546508789062, "loss": 0.3324, "rewards/accuracies": 0.875, "rewards/chosen": -0.6221703886985779, "rewards/margins": 1.205824375152588, "rewards/rejected": -1.8279948234558105, "step": 3824 }, { "epoch": 0.44, "learning_rate": 1.690445258060706e-07, "logits/chosen": -2.3505730628967285, "logits/rejected": -2.0519866943359375, "logps/chosen": -215.49693298339844, "logps/rejected": -320.3085021972656, "loss": 0.2716, "rewards/accuracies": 0.875, "rewards/chosen": -1.8347928524017334, "rewards/margins": 2.889139413833618, "rewards/rejected": -4.723931789398193, "step": 3825 }, { "epoch": 0.45, "learning_rate": 1.6900909413015235e-07, "logits/chosen": -2.0997064113616943, "logits/rejected": -1.8105250597000122, "logps/chosen": -330.0223388671875, "logps/rejected": -371.4913024902344, "loss": 0.381, "rewards/accuracies": 0.875, "rewards/chosen": -0.36151543259620667, "rewards/margins": 1.5176966190338135, "rewards/rejected": -1.8792121410369873, "step": 3826 }, { "epoch": 0.45, "learning_rate": 1.6897366245423408e-07, "logits/chosen": -2.4775309562683105, "logits/rejected": -2.5103628635406494, "logps/chosen": -246.88796997070312, "logps/rejected": -341.9145812988281, "loss": 0.4795, "rewards/accuracies": 0.75, "rewards/chosen": -1.9078621864318848, "rewards/margins": 0.7733854055404663, "rewards/rejected": -2.6812474727630615, "step": 3827 }, { "epoch": 0.45, "learning_rate": 1.6893823077831583e-07, "logits/chosen": -2.201251268386841, "logits/rejected": -2.367485523223877, "logps/chosen": -369.099365234375, "logps/rejected": -232.81051635742188, "loss": 0.3126, "rewards/accuracies": 0.875, "rewards/chosen": -1.2392033338546753, "rewards/margins": 1.4680966138839722, "rewards/rejected": -2.7072999477386475, "step": 3828 }, { "epoch": 0.45, "learning_rate": 1.6890279910239755e-07, "logits/chosen": -1.9296603202819824, "logits/rejected": -2.0190649032592773, "logps/chosen": -336.2305908203125, "logps/rejected": -327.4036865234375, "loss": 0.6317, "rewards/accuracies": 0.875, "rewards/chosen": -1.0076663494110107, "rewards/margins": 2.1324057579040527, "rewards/rejected": -3.1400718688964844, "step": 3829 }, { "epoch": 0.45, "learning_rate": 1.6886736742647927e-07, "logits/chosen": -2.5764341354370117, "logits/rejected": -2.672109842300415, "logps/chosen": -294.19342041015625, "logps/rejected": -276.70465087890625, "loss": 0.2054, "rewards/accuracies": 1.0, "rewards/chosen": -1.339808702468872, "rewards/margins": 3.521073818206787, "rewards/rejected": -4.860882759094238, "step": 3830 }, { "epoch": 0.45, "learning_rate": 1.68831935750561e-07, "logits/chosen": -2.8378944396972656, "logits/rejected": -2.6434946060180664, "logps/chosen": -240.7099151611328, "logps/rejected": -347.62567138671875, "loss": 0.5889, "rewards/accuracies": 0.75, "rewards/chosen": -1.6218593120574951, "rewards/margins": 2.683563709259033, "rewards/rejected": -4.305422782897949, "step": 3831 }, { "epoch": 0.45, "learning_rate": 1.6879650407464271e-07, "logits/chosen": -1.9786877632141113, "logits/rejected": -2.3769423961639404, "logps/chosen": -503.4774169921875, "logps/rejected": -282.1924133300781, "loss": 0.3259, "rewards/accuracies": 0.875, "rewards/chosen": -1.2291380167007446, "rewards/margins": 1.903969407081604, "rewards/rejected": -3.1331076622009277, "step": 3832 }, { "epoch": 0.45, "learning_rate": 1.6876107239872444e-07, "logits/chosen": -2.591981887817383, "logits/rejected": -2.639894485473633, "logps/chosen": -208.23513793945312, "logps/rejected": -365.4489440917969, "loss": 0.1689, "rewards/accuracies": 1.0, "rewards/chosen": -0.8175657391548157, "rewards/margins": 3.630483627319336, "rewards/rejected": -4.448049545288086, "step": 3833 }, { "epoch": 0.45, "learning_rate": 1.6872564072280618e-07, "logits/chosen": -2.569997787475586, "logits/rejected": -2.4607348442077637, "logps/chosen": -136.52171325683594, "logps/rejected": -193.04742431640625, "loss": 0.519, "rewards/accuracies": 0.625, "rewards/chosen": -0.4237495958805084, "rewards/margins": 1.0032405853271484, "rewards/rejected": -1.4269901514053345, "step": 3834 }, { "epoch": 0.45, "learning_rate": 1.686902090468879e-07, "logits/chosen": -2.391303539276123, "logits/rejected": -2.215677499771118, "logps/chosen": -136.81101989746094, "logps/rejected": -212.63095092773438, "loss": 0.5272, "rewards/accuracies": 0.625, "rewards/chosen": -1.5388848781585693, "rewards/margins": 1.495161533355713, "rewards/rejected": -3.034046173095703, "step": 3835 }, { "epoch": 0.45, "learning_rate": 1.6865477737096963e-07, "logits/chosen": -1.9732255935668945, "logits/rejected": -2.1740615367889404, "logps/chosen": -223.4727783203125, "logps/rejected": -286.63189697265625, "loss": 0.2425, "rewards/accuracies": 0.875, "rewards/chosen": -0.8594789505004883, "rewards/margins": 3.4460387229919434, "rewards/rejected": -4.305517196655273, "step": 3836 }, { "epoch": 0.45, "learning_rate": 1.6861934569505138e-07, "logits/chosen": -2.314375162124634, "logits/rejected": -2.3977975845336914, "logps/chosen": -323.109619140625, "logps/rejected": -206.76963806152344, "loss": 0.4224, "rewards/accuracies": 0.875, "rewards/chosen": -1.316775918006897, "rewards/margins": 2.080357551574707, "rewards/rejected": -3.3971333503723145, "step": 3837 }, { "epoch": 0.45, "learning_rate": 1.685839140191331e-07, "logits/chosen": -2.1541738510131836, "logits/rejected": -1.967624306678772, "logps/chosen": -203.4284210205078, "logps/rejected": -197.76893615722656, "loss": 0.2615, "rewards/accuracies": 1.0, "rewards/chosen": -0.9353717565536499, "rewards/margins": 1.8561707735061646, "rewards/rejected": -2.7915425300598145, "step": 3838 }, { "epoch": 0.45, "learning_rate": 1.6854848234321485e-07, "logits/chosen": -2.922396183013916, "logits/rejected": -2.760819911956787, "logps/chosen": -206.8907470703125, "logps/rejected": -392.7366943359375, "loss": 0.2492, "rewards/accuracies": 0.875, "rewards/chosen": -1.02013099193573, "rewards/margins": 3.468158006668091, "rewards/rejected": -4.4882893562316895, "step": 3839 }, { "epoch": 0.45, "learning_rate": 1.6851305066729657e-07, "logits/chosen": -2.0662384033203125, "logits/rejected": -2.0066256523132324, "logps/chosen": -332.0126647949219, "logps/rejected": -433.36529541015625, "loss": 0.3628, "rewards/accuracies": 0.75, "rewards/chosen": -0.7707081437110901, "rewards/margins": 3.1651487350463867, "rewards/rejected": -3.935857057571411, "step": 3840 }, { "epoch": 0.45, "learning_rate": 1.684776189913783e-07, "logits/chosen": -2.822439193725586, "logits/rejected": -2.8959145545959473, "logps/chosen": -159.08189392089844, "logps/rejected": -275.6336364746094, "loss": 0.0678, "rewards/accuracies": 1.0, "rewards/chosen": -1.8464915752410889, "rewards/margins": 4.825191497802734, "rewards/rejected": -6.671683311462402, "step": 3841 }, { "epoch": 0.45, "learning_rate": 1.6844218731546001e-07, "logits/chosen": -2.0508203506469727, "logits/rejected": -2.3440301418304443, "logps/chosen": -314.48138427734375, "logps/rejected": -242.37493896484375, "loss": 0.2581, "rewards/accuracies": 0.875, "rewards/chosen": -0.8466165661811829, "rewards/margins": 3.564997911453247, "rewards/rejected": -4.411614418029785, "step": 3842 }, { "epoch": 0.45, "learning_rate": 1.6840675563954174e-07, "logits/chosen": -2.015721559524536, "logits/rejected": -2.233762264251709, "logps/chosen": -320.73297119140625, "logps/rejected": -308.6557922363281, "loss": 0.2471, "rewards/accuracies": 0.875, "rewards/chosen": 0.1274835616350174, "rewards/margins": 1.909595251083374, "rewards/rejected": -1.782111644744873, "step": 3843 }, { "epoch": 0.45, "learning_rate": 1.6837132396362346e-07, "logits/chosen": -2.3871235847473145, "logits/rejected": -2.041787624359131, "logps/chosen": -315.1697998046875, "logps/rejected": -264.20599365234375, "loss": 0.2156, "rewards/accuracies": 0.875, "rewards/chosen": -0.6867462992668152, "rewards/margins": 2.8909568786621094, "rewards/rejected": -3.577702760696411, "step": 3844 }, { "epoch": 0.45, "learning_rate": 1.683358922877052e-07, "logits/chosen": -1.468582272529602, "logits/rejected": -1.8102762699127197, "logps/chosen": -378.8179931640625, "logps/rejected": -308.75286865234375, "loss": 0.8428, "rewards/accuracies": 0.375, "rewards/chosen": -0.6144113540649414, "rewards/margins": 0.9861509799957275, "rewards/rejected": -1.600562334060669, "step": 3845 }, { "epoch": 0.45, "learning_rate": 1.6830046061178693e-07, "logits/chosen": -2.0488529205322266, "logits/rejected": -2.255938768386841, "logps/chosen": -267.12115478515625, "logps/rejected": -361.5212707519531, "loss": 0.6584, "rewards/accuracies": 0.625, "rewards/chosen": -1.3445175886154175, "rewards/margins": 2.225180149078369, "rewards/rejected": -3.569697856903076, "step": 3846 }, { "epoch": 0.45, "learning_rate": 1.6826502893586865e-07, "logits/chosen": -2.8471899032592773, "logits/rejected": -2.8838720321655273, "logps/chosen": -159.9838409423828, "logps/rejected": -159.1259002685547, "loss": 0.6447, "rewards/accuracies": 0.625, "rewards/chosen": -1.4881021976470947, "rewards/margins": 0.7805740237236023, "rewards/rejected": -2.268676280975342, "step": 3847 }, { "epoch": 0.45, "learning_rate": 1.6822959725995037e-07, "logits/chosen": -2.1884493827819824, "logits/rejected": -2.104937791824341, "logps/chosen": -262.7991943359375, "logps/rejected": -340.73785400390625, "loss": 0.3151, "rewards/accuracies": 0.875, "rewards/chosen": -0.5290105938911438, "rewards/margins": 1.8442312479019165, "rewards/rejected": -2.373241901397705, "step": 3848 }, { "epoch": 0.45, "learning_rate": 1.6819416558403212e-07, "logits/chosen": -2.0645217895507812, "logits/rejected": -2.066375732421875, "logps/chosen": -303.4767761230469, "logps/rejected": -285.02069091796875, "loss": 0.5682, "rewards/accuracies": 0.5, "rewards/chosen": -0.48978739976882935, "rewards/margins": 0.8603252172470093, "rewards/rejected": -1.3501126766204834, "step": 3849 }, { "epoch": 0.45, "learning_rate": 1.6815873390811387e-07, "logits/chosen": -2.140878200531006, "logits/rejected": -2.4445528984069824, "logps/chosen": -255.47531127929688, "logps/rejected": -204.00942993164062, "loss": 0.2296, "rewards/accuracies": 0.875, "rewards/chosen": -0.862006664276123, "rewards/margins": 2.726315975189209, "rewards/rejected": -3.588322639465332, "step": 3850 }, { "epoch": 0.45, "learning_rate": 1.681233022321956e-07, "logits/chosen": -1.8923450708389282, "logits/rejected": -2.1914265155792236, "logps/chosen": -481.4262390136719, "logps/rejected": -272.91558837890625, "loss": 0.4112, "rewards/accuracies": 0.75, "rewards/chosen": -1.2347122430801392, "rewards/margins": 2.110973834991455, "rewards/rejected": -3.3456859588623047, "step": 3851 }, { "epoch": 0.45, "learning_rate": 1.6808787055627732e-07, "logits/chosen": -1.7963777780532837, "logits/rejected": -2.1551105976104736, "logps/chosen": -422.10430908203125, "logps/rejected": -324.40447998046875, "loss": 1.0366, "rewards/accuracies": 0.75, "rewards/chosen": -1.9448168277740479, "rewards/margins": 1.8680858612060547, "rewards/rejected": -3.8129026889801025, "step": 3852 }, { "epoch": 0.45, "learning_rate": 1.6805243888035904e-07, "logits/chosen": -2.0492641925811768, "logits/rejected": -1.9815880060195923, "logps/chosen": -250.93099975585938, "logps/rejected": -252.45323181152344, "loss": 0.4375, "rewards/accuracies": 0.75, "rewards/chosen": -0.7611191868782043, "rewards/margins": 1.6259925365447998, "rewards/rejected": -2.3871116638183594, "step": 3853 }, { "epoch": 0.45, "learning_rate": 1.6801700720444076e-07, "logits/chosen": -2.856076240539551, "logits/rejected": -2.8754382133483887, "logps/chosen": -370.9661560058594, "logps/rejected": -450.0996398925781, "loss": 0.1712, "rewards/accuracies": 1.0, "rewards/chosen": -0.6117712259292603, "rewards/margins": 2.463886260986328, "rewards/rejected": -3.075657606124878, "step": 3854 }, { "epoch": 0.45, "learning_rate": 1.6798157552852248e-07, "logits/chosen": -2.7387659549713135, "logits/rejected": -2.8454809188842773, "logps/chosen": -362.1965637207031, "logps/rejected": -215.19100952148438, "loss": 0.1249, "rewards/accuracies": 1.0, "rewards/chosen": -0.7841247320175171, "rewards/margins": 2.7710533142089844, "rewards/rejected": -3.555178165435791, "step": 3855 }, { "epoch": 0.45, "learning_rate": 1.679461438526042e-07, "logits/chosen": -2.61087703704834, "logits/rejected": -2.5356714725494385, "logps/chosen": -201.07167053222656, "logps/rejected": -259.5919494628906, "loss": 0.3922, "rewards/accuracies": 0.875, "rewards/chosen": -1.2156479358673096, "rewards/margins": 1.6319248676300049, "rewards/rejected": -2.8475728034973145, "step": 3856 }, { "epoch": 0.45, "learning_rate": 1.6791071217668595e-07, "logits/chosen": -2.593956708908081, "logits/rejected": -2.64593243598938, "logps/chosen": -324.75665283203125, "logps/rejected": -215.24440002441406, "loss": 0.5698, "rewards/accuracies": 0.75, "rewards/chosen": -1.1936200857162476, "rewards/margins": 1.683941125869751, "rewards/rejected": -2.877561330795288, "step": 3857 }, { "epoch": 0.45, "learning_rate": 1.6787528050076767e-07, "logits/chosen": -2.3305928707122803, "logits/rejected": -2.65920352935791, "logps/chosen": -256.4825134277344, "logps/rejected": -154.06976318359375, "loss": 1.012, "rewards/accuracies": 0.625, "rewards/chosen": -2.4522407054901123, "rewards/margins": 0.4977017343044281, "rewards/rejected": -2.9499423503875732, "step": 3858 }, { "epoch": 0.45, "learning_rate": 1.678398488248494e-07, "logits/chosen": -2.0241971015930176, "logits/rejected": -1.8860857486724854, "logps/chosen": -265.261474609375, "logps/rejected": -366.9164733886719, "loss": 0.3578, "rewards/accuracies": 0.75, "rewards/chosen": -1.0568616390228271, "rewards/margins": 1.6688852310180664, "rewards/rejected": -2.7257471084594727, "step": 3859 }, { "epoch": 0.45, "learning_rate": 1.6780441714893112e-07, "logits/chosen": -2.1653449535369873, "logits/rejected": -2.280505895614624, "logps/chosen": -249.0057373046875, "logps/rejected": -308.88543701171875, "loss": 0.3885, "rewards/accuracies": 0.875, "rewards/chosen": -1.5435121059417725, "rewards/margins": 2.4796223640441895, "rewards/rejected": -4.023134708404541, "step": 3860 }, { "epoch": 0.45, "learning_rate": 1.677689854730129e-07, "logits/chosen": -2.31093430519104, "logits/rejected": -2.286336660385132, "logps/chosen": -326.8172302246094, "logps/rejected": -298.0384826660156, "loss": 0.2513, "rewards/accuracies": 1.0, "rewards/chosen": -0.251858115196228, "rewards/margins": 1.6906893253326416, "rewards/rejected": -1.94254732131958, "step": 3861 }, { "epoch": 0.45, "learning_rate": 1.6773355379709462e-07, "logits/chosen": -2.35324764251709, "logits/rejected": -2.5623562335968018, "logps/chosen": -389.74468994140625, "logps/rejected": -304.6593933105469, "loss": 0.1591, "rewards/accuracies": 1.0, "rewards/chosen": -0.5386452674865723, "rewards/margins": 3.6157727241516113, "rewards/rejected": -4.154417514801025, "step": 3862 }, { "epoch": 0.45, "learning_rate": 1.6769812212117634e-07, "logits/chosen": -2.6122653484344482, "logits/rejected": -2.692624807357788, "logps/chosen": -212.1173095703125, "logps/rejected": -232.46310424804688, "loss": 0.3578, "rewards/accuracies": 1.0, "rewards/chosen": -0.8686950206756592, "rewards/margins": 1.85826575756073, "rewards/rejected": -2.7269606590270996, "step": 3863 }, { "epoch": 0.45, "learning_rate": 1.6766269044525806e-07, "logits/chosen": -2.4600796699523926, "logits/rejected": -2.5046467781066895, "logps/chosen": -156.1494140625, "logps/rejected": -199.78964233398438, "loss": 0.3185, "rewards/accuracies": 0.875, "rewards/chosen": -0.43602097034454346, "rewards/margins": 2.016326427459717, "rewards/rejected": -2.45234751701355, "step": 3864 }, { "epoch": 0.45, "learning_rate": 1.6762725876933978e-07, "logits/chosen": -2.858031988143921, "logits/rejected": -2.513591766357422, "logps/chosen": -272.6485290527344, "logps/rejected": -313.1524353027344, "loss": 0.4944, "rewards/accuracies": 0.625, "rewards/chosen": -0.9666910767555237, "rewards/margins": 0.8768966197967529, "rewards/rejected": -1.8435877561569214, "step": 3865 }, { "epoch": 0.45, "learning_rate": 1.675918270934215e-07, "logits/chosen": -2.5844719409942627, "logits/rejected": -2.799694538116455, "logps/chosen": -172.38026428222656, "logps/rejected": -208.25148010253906, "loss": 0.1677, "rewards/accuracies": 0.875, "rewards/chosen": 0.05365535244345665, "rewards/margins": 3.254483461380005, "rewards/rejected": -3.2008278369903564, "step": 3866 }, { "epoch": 0.45, "learning_rate": 1.6755639541750323e-07, "logits/chosen": -2.787827968597412, "logits/rejected": -2.8657631874084473, "logps/chosen": -155.89907836914062, "logps/rejected": -306.1426086425781, "loss": 0.2308, "rewards/accuracies": 0.875, "rewards/chosen": -0.4150611460208893, "rewards/margins": 3.615582227706909, "rewards/rejected": -4.030643463134766, "step": 3867 }, { "epoch": 0.45, "learning_rate": 1.6752096374158498e-07, "logits/chosen": -1.7727798223495483, "logits/rejected": -1.77206289768219, "logps/chosen": -345.71612548828125, "logps/rejected": -337.3167724609375, "loss": 0.397, "rewards/accuracies": 0.875, "rewards/chosen": -0.8792455196380615, "rewards/margins": 2.4160561561584473, "rewards/rejected": -3.295301675796509, "step": 3868 }, { "epoch": 0.45, "learning_rate": 1.674855320656667e-07, "logits/chosen": -2.1119894981384277, "logits/rejected": -1.9880409240722656, "logps/chosen": -338.15277099609375, "logps/rejected": -363.08428955078125, "loss": 0.2095, "rewards/accuracies": 0.875, "rewards/chosen": -0.29326075315475464, "rewards/margins": 2.445756435394287, "rewards/rejected": -2.7390172481536865, "step": 3869 }, { "epoch": 0.45, "learning_rate": 1.6745010038974842e-07, "logits/chosen": -2.531792163848877, "logits/rejected": -2.36822509765625, "logps/chosen": -256.2059631347656, "logps/rejected": -242.43502807617188, "loss": 0.2168, "rewards/accuracies": 0.875, "rewards/chosen": -0.29968786239624023, "rewards/margins": 2.91050124168396, "rewards/rejected": -3.210188865661621, "step": 3870 }, { "epoch": 0.45, "learning_rate": 1.6741466871383014e-07, "logits/chosen": -2.436530351638794, "logits/rejected": -2.273883819580078, "logps/chosen": -171.18447875976562, "logps/rejected": -256.0296325683594, "loss": 0.2564, "rewards/accuracies": 0.875, "rewards/chosen": -1.0797022581100464, "rewards/margins": 1.7788752317428589, "rewards/rejected": -2.8585774898529053, "step": 3871 }, { "epoch": 0.45, "learning_rate": 1.673792370379119e-07, "logits/chosen": -2.208768367767334, "logits/rejected": -2.3265886306762695, "logps/chosen": -287.88824462890625, "logps/rejected": -327.5821533203125, "loss": 0.382, "rewards/accuracies": 0.875, "rewards/chosen": -0.4417760968208313, "rewards/margins": 1.3021957874298096, "rewards/rejected": -1.7439717054367065, "step": 3872 }, { "epoch": 0.45, "learning_rate": 1.6734380536199364e-07, "logits/chosen": -2.046501636505127, "logits/rejected": -1.876584768295288, "logps/chosen": -315.60260009765625, "logps/rejected": -404.8988952636719, "loss": 0.3111, "rewards/accuracies": 0.75, "rewards/chosen": -1.3199584484100342, "rewards/margins": 2.9584078788757324, "rewards/rejected": -4.2783660888671875, "step": 3873 }, { "epoch": 0.45, "learning_rate": 1.6730837368607536e-07, "logits/chosen": -2.037048101425171, "logits/rejected": -1.9812512397766113, "logps/chosen": -246.26678466796875, "logps/rejected": -346.2870788574219, "loss": 0.1375, "rewards/accuracies": 1.0, "rewards/chosen": -0.4406641721725464, "rewards/margins": 3.086928606033325, "rewards/rejected": -3.527592658996582, "step": 3874 }, { "epoch": 0.45, "learning_rate": 1.6727294201015708e-07, "logits/chosen": -2.1197123527526855, "logits/rejected": -2.023097276687622, "logps/chosen": -180.98997497558594, "logps/rejected": -271.0020446777344, "loss": 0.4355, "rewards/accuracies": 1.0, "rewards/chosen": -1.0543116331100464, "rewards/margins": 0.8606459498405457, "rewards/rejected": -1.9149576425552368, "step": 3875 }, { "epoch": 0.45, "learning_rate": 1.672375103342388e-07, "logits/chosen": -2.550811529159546, "logits/rejected": -2.604170322418213, "logps/chosen": -180.4134521484375, "logps/rejected": -244.74346923828125, "loss": 0.6118, "rewards/accuracies": 0.875, "rewards/chosen": -0.9743334650993347, "rewards/margins": 2.4420697689056396, "rewards/rejected": -3.416403293609619, "step": 3876 }, { "epoch": 0.45, "learning_rate": 1.6720207865832053e-07, "logits/chosen": -2.106332778930664, "logits/rejected": -2.263204336166382, "logps/chosen": -258.62677001953125, "logps/rejected": -159.3791961669922, "loss": 0.6645, "rewards/accuracies": 0.625, "rewards/chosen": -1.5052158832550049, "rewards/margins": 0.6435899138450623, "rewards/rejected": -2.148805618286133, "step": 3877 }, { "epoch": 0.45, "learning_rate": 1.6716664698240225e-07, "logits/chosen": -2.3312036991119385, "logits/rejected": -2.4568278789520264, "logps/chosen": -300.5892639160156, "logps/rejected": -273.6029052734375, "loss": 0.3891, "rewards/accuracies": 0.75, "rewards/chosen": -1.0013070106506348, "rewards/margins": 2.0554420948028564, "rewards/rejected": -3.056748867034912, "step": 3878 }, { "epoch": 0.45, "learning_rate": 1.67131215306484e-07, "logits/chosen": -2.5587446689605713, "logits/rejected": -2.565516710281372, "logps/chosen": -285.08624267578125, "logps/rejected": -277.631103515625, "loss": 0.3141, "rewards/accuracies": 0.75, "rewards/chosen": -1.1133155822753906, "rewards/margins": 3.5506796836853027, "rewards/rejected": -4.663995265960693, "step": 3879 }, { "epoch": 0.45, "learning_rate": 1.6709578363056572e-07, "logits/chosen": -2.55995512008667, "logits/rejected": -2.4365720748901367, "logps/chosen": -256.30352783203125, "logps/rejected": -301.9946594238281, "loss": 0.2077, "rewards/accuracies": 0.875, "rewards/chosen": -1.0809688568115234, "rewards/margins": 4.236901760101318, "rewards/rejected": -5.317870140075684, "step": 3880 }, { "epoch": 0.45, "learning_rate": 1.6706035195464744e-07, "logits/chosen": -1.593240737915039, "logits/rejected": -1.9584397077560425, "logps/chosen": -447.4654235839844, "logps/rejected": -366.2733459472656, "loss": 0.6007, "rewards/accuracies": 0.5, "rewards/chosen": -1.4272202253341675, "rewards/margins": 2.0359816551208496, "rewards/rejected": -3.4632019996643066, "step": 3881 }, { "epoch": 0.45, "learning_rate": 1.6702492027872916e-07, "logits/chosen": -2.248110294342041, "logits/rejected": -1.958954095840454, "logps/chosen": -290.76617431640625, "logps/rejected": -368.9569091796875, "loss": 0.3876, "rewards/accuracies": 0.75, "rewards/chosen": -1.7283235788345337, "rewards/margins": 1.5859572887420654, "rewards/rejected": -3.3142809867858887, "step": 3882 }, { "epoch": 0.45, "learning_rate": 1.6698948860281089e-07, "logits/chosen": -2.4536895751953125, "logits/rejected": -2.537228584289551, "logps/chosen": -285.68365478515625, "logps/rejected": -248.40589904785156, "loss": 0.5856, "rewards/accuracies": 0.625, "rewards/chosen": -0.3895840644836426, "rewards/margins": 0.8382235765457153, "rewards/rejected": -1.2278075218200684, "step": 3883 }, { "epoch": 0.45, "learning_rate": 1.6695405692689266e-07, "logits/chosen": -2.2874979972839355, "logits/rejected": -2.6131410598754883, "logps/chosen": -196.2591552734375, "logps/rejected": -177.0309600830078, "loss": 0.3898, "rewards/accuracies": 0.875, "rewards/chosen": -1.3315287828445435, "rewards/margins": 1.544589877128601, "rewards/rejected": -2.8761186599731445, "step": 3884 }, { "epoch": 0.45, "learning_rate": 1.6691862525097438e-07, "logits/chosen": -2.1648576259613037, "logits/rejected": -2.1737892627716064, "logps/chosen": -226.75765991210938, "logps/rejected": -304.38934326171875, "loss": 0.3737, "rewards/accuracies": 0.875, "rewards/chosen": -0.38477036356925964, "rewards/margins": 2.0864386558532715, "rewards/rejected": -2.4712088108062744, "step": 3885 }, { "epoch": 0.45, "learning_rate": 1.668831935750561e-07, "logits/chosen": -2.267943859100342, "logits/rejected": -2.165076971054077, "logps/chosen": -237.361083984375, "logps/rejected": -319.022705078125, "loss": 0.4182, "rewards/accuracies": 0.75, "rewards/chosen": -0.7557692527770996, "rewards/margins": 1.766971468925476, "rewards/rejected": -2.522740602493286, "step": 3886 }, { "epoch": 0.45, "learning_rate": 1.6684776189913783e-07, "logits/chosen": -1.9549814462661743, "logits/rejected": -1.7990045547485352, "logps/chosen": -273.0657958984375, "logps/rejected": -321.4599609375, "loss": 0.2819, "rewards/accuracies": 0.875, "rewards/chosen": -0.5449931025505066, "rewards/margins": 2.982187271118164, "rewards/rejected": -3.5271804332733154, "step": 3887 }, { "epoch": 0.45, "learning_rate": 1.6681233022321955e-07, "logits/chosen": -2.3014609813690186, "logits/rejected": -2.1888084411621094, "logps/chosen": -239.59417724609375, "logps/rejected": -294.0715026855469, "loss": 0.646, "rewards/accuracies": 0.75, "rewards/chosen": -0.36529475450515747, "rewards/margins": 1.1496926546096802, "rewards/rejected": -1.5149874687194824, "step": 3888 }, { "epoch": 0.45, "learning_rate": 1.6677689854730127e-07, "logits/chosen": -2.2841453552246094, "logits/rejected": -2.2381153106689453, "logps/chosen": -220.14822387695312, "logps/rejected": -186.7149658203125, "loss": 0.6736, "rewards/accuracies": 0.75, "rewards/chosen": -1.0027287006378174, "rewards/margins": 0.6269006729125977, "rewards/rejected": -1.6296294927597046, "step": 3889 }, { "epoch": 0.45, "learning_rate": 1.6674146687138302e-07, "logits/chosen": -2.5120835304260254, "logits/rejected": -2.591970682144165, "logps/chosen": -210.82000732421875, "logps/rejected": -192.7767791748047, "loss": 0.3871, "rewards/accuracies": 0.75, "rewards/chosen": -0.7750200033187866, "rewards/margins": 1.6773831844329834, "rewards/rejected": -2.4524030685424805, "step": 3890 }, { "epoch": 0.45, "learning_rate": 1.6670603519546474e-07, "logits/chosen": -2.1459460258483887, "logits/rejected": -2.1027731895446777, "logps/chosen": -131.5851593017578, "logps/rejected": -144.72669982910156, "loss": 0.5706, "rewards/accuracies": 0.75, "rewards/chosen": -0.914670467376709, "rewards/margins": 1.7257053852081299, "rewards/rejected": -2.640375852584839, "step": 3891 }, { "epoch": 0.45, "learning_rate": 1.6667060351954646e-07, "logits/chosen": -1.952289342880249, "logits/rejected": -2.000966787338257, "logps/chosen": -344.2003173828125, "logps/rejected": -394.0994873046875, "loss": 0.2317, "rewards/accuracies": 1.0, "rewards/chosen": -1.0872116088867188, "rewards/margins": 1.9442073106765747, "rewards/rejected": -3.031418800354004, "step": 3892 }, { "epoch": 0.45, "learning_rate": 1.666351718436282e-07, "logits/chosen": -2.2933108806610107, "logits/rejected": -2.5552964210510254, "logps/chosen": -449.40155029296875, "logps/rejected": -297.04925537109375, "loss": 0.9894, "rewards/accuracies": 0.5, "rewards/chosen": -2.088310718536377, "rewards/margins": 0.07103085517883301, "rewards/rejected": -2.15934157371521, "step": 3893 }, { "epoch": 0.45, "learning_rate": 1.665997401677099e-07, "logits/chosen": -2.512629508972168, "logits/rejected": -2.503746509552002, "logps/chosen": -308.7872314453125, "logps/rejected": -310.9063415527344, "loss": 0.1923, "rewards/accuracies": 0.875, "rewards/chosen": -0.296944260597229, "rewards/margins": 3.005025625228882, "rewards/rejected": -3.301969528198242, "step": 3894 }, { "epoch": 0.45, "learning_rate": 1.6656430849179163e-07, "logits/chosen": -2.131422996520996, "logits/rejected": -2.4820127487182617, "logps/chosen": -563.6387329101562, "logps/rejected": -351.0616760253906, "loss": 0.4032, "rewards/accuracies": 0.75, "rewards/chosen": -0.9796396493911743, "rewards/margins": 2.0491464138031006, "rewards/rejected": -3.0287861824035645, "step": 3895 }, { "epoch": 0.45, "learning_rate": 1.665288768158734e-07, "logits/chosen": -1.9110342264175415, "logits/rejected": -1.6736944913864136, "logps/chosen": -324.4891357421875, "logps/rejected": -363.16326904296875, "loss": 0.6152, "rewards/accuracies": 0.75, "rewards/chosen": -1.0614359378814697, "rewards/margins": 0.576869547367096, "rewards/rejected": -1.638305425643921, "step": 3896 }, { "epoch": 0.45, "learning_rate": 1.6649344513995513e-07, "logits/chosen": -1.7349880933761597, "logits/rejected": -1.6993216276168823, "logps/chosen": -296.19573974609375, "logps/rejected": -304.4298095703125, "loss": 0.4281, "rewards/accuracies": 0.625, "rewards/chosen": -0.9680901765823364, "rewards/margins": 2.222815990447998, "rewards/rejected": -3.190906047821045, "step": 3897 }, { "epoch": 0.45, "learning_rate": 1.6645801346403685e-07, "logits/chosen": -2.2893307209014893, "logits/rejected": -2.2873010635375977, "logps/chosen": -481.09088134765625, "logps/rejected": -465.574462890625, "loss": 0.3964, "rewards/accuracies": 0.75, "rewards/chosen": -0.6989883184432983, "rewards/margins": 2.219311475753784, "rewards/rejected": -2.918299674987793, "step": 3898 }, { "epoch": 0.45, "learning_rate": 1.6642258178811857e-07, "logits/chosen": -1.9570164680480957, "logits/rejected": -2.068056106567383, "logps/chosen": -265.257568359375, "logps/rejected": -257.0851135253906, "loss": 0.2853, "rewards/accuracies": 0.875, "rewards/chosen": -0.7775898575782776, "rewards/margins": 1.5611759424209595, "rewards/rejected": -2.338765859603882, "step": 3899 }, { "epoch": 0.45, "learning_rate": 1.663871501122003e-07, "logits/chosen": -2.5773377418518066, "logits/rejected": -2.7421507835388184, "logps/chosen": -348.7246398925781, "logps/rejected": -271.55548095703125, "loss": 0.4629, "rewards/accuracies": 0.875, "rewards/chosen": -1.9303429126739502, "rewards/margins": 1.960776448249817, "rewards/rejected": -3.8911194801330566, "step": 3900 }, { "epoch": 0.45, "learning_rate": 1.6635171843628202e-07, "logits/chosen": -2.056032657623291, "logits/rejected": -2.264566421508789, "logps/chosen": -359.62225341796875, "logps/rejected": -383.87994384765625, "loss": 0.3998, "rewards/accuracies": 0.875, "rewards/chosen": -1.47682785987854, "rewards/margins": 3.0207619667053223, "rewards/rejected": -4.497590065002441, "step": 3901 }, { "epoch": 0.45, "learning_rate": 1.6631628676036377e-07, "logits/chosen": -2.2757906913757324, "logits/rejected": -2.371882915496826, "logps/chosen": -210.89828491210938, "logps/rejected": -195.07049560546875, "loss": 0.5804, "rewards/accuracies": 0.75, "rewards/chosen": -1.295375108718872, "rewards/margins": 1.797385334968567, "rewards/rejected": -3.0927603244781494, "step": 3902 }, { "epoch": 0.45, "learning_rate": 1.662808550844455e-07, "logits/chosen": -1.6172164678573608, "logits/rejected": -1.426568865776062, "logps/chosen": -528.005126953125, "logps/rejected": -598.367431640625, "loss": 0.5379, "rewards/accuracies": 0.75, "rewards/chosen": -0.8151198029518127, "rewards/margins": 1.8337925672531128, "rewards/rejected": -2.6489124298095703, "step": 3903 }, { "epoch": 0.45, "learning_rate": 1.662454234085272e-07, "logits/chosen": -2.273738384246826, "logits/rejected": -2.321366548538208, "logps/chosen": -123.86614990234375, "logps/rejected": -181.62725830078125, "loss": 0.2689, "rewards/accuracies": 0.875, "rewards/chosen": -0.6715587377548218, "rewards/margins": 1.542258381843567, "rewards/rejected": -2.2138171195983887, "step": 3904 }, { "epoch": 0.45, "learning_rate": 1.6620999173260893e-07, "logits/chosen": -2.00551176071167, "logits/rejected": -2.0700478553771973, "logps/chosen": -298.8359069824219, "logps/rejected": -277.1605224609375, "loss": 0.5549, "rewards/accuracies": 0.75, "rewards/chosen": -0.8068256974220276, "rewards/margins": 1.3865652084350586, "rewards/rejected": -2.1933910846710205, "step": 3905 }, { "epoch": 0.45, "learning_rate": 1.6617456005669065e-07, "logits/chosen": -1.9215893745422363, "logits/rejected": -2.0448343753814697, "logps/chosen": -447.53546142578125, "logps/rejected": -300.39447021484375, "loss": 0.4892, "rewards/accuracies": 0.875, "rewards/chosen": -0.9539096355438232, "rewards/margins": 1.0335050821304321, "rewards/rejected": -1.987414836883545, "step": 3906 }, { "epoch": 0.45, "learning_rate": 1.6613912838077238e-07, "logits/chosen": -2.6586968898773193, "logits/rejected": -2.383171558380127, "logps/chosen": -198.85208129882812, "logps/rejected": -280.6541748046875, "loss": 0.5391, "rewards/accuracies": 0.75, "rewards/chosen": -1.0294334888458252, "rewards/margins": 1.330021858215332, "rewards/rejected": -2.3594553470611572, "step": 3907 }, { "epoch": 0.45, "learning_rate": 1.6610369670485415e-07, "logits/chosen": -2.564728021621704, "logits/rejected": -2.7271430492401123, "logps/chosen": -207.4271240234375, "logps/rejected": -125.0422592163086, "loss": 0.4145, "rewards/accuracies": 0.625, "rewards/chosen": -0.6323509216308594, "rewards/margins": 1.0975420475006104, "rewards/rejected": -1.7298928499221802, "step": 3908 }, { "epoch": 0.45, "learning_rate": 1.6606826502893587e-07, "logits/chosen": -2.1196799278259277, "logits/rejected": -1.9212218523025513, "logps/chosen": -221.9855194091797, "logps/rejected": -424.0548095703125, "loss": 0.335, "rewards/accuracies": 0.875, "rewards/chosen": -1.4790626764297485, "rewards/margins": 2.4551355838775635, "rewards/rejected": -3.9341983795166016, "step": 3909 }, { "epoch": 0.45, "learning_rate": 1.660328333530176e-07, "logits/chosen": -2.0002923011779785, "logits/rejected": -2.4046037197113037, "logps/chosen": -406.3184814453125, "logps/rejected": -297.5899963378906, "loss": 0.4728, "rewards/accuracies": 0.625, "rewards/chosen": -1.39532470703125, "rewards/margins": 1.999868631362915, "rewards/rejected": -3.395193338394165, "step": 3910 }, { "epoch": 0.45, "learning_rate": 1.6599740167709932e-07, "logits/chosen": -2.2699995040893555, "logits/rejected": -2.2734501361846924, "logps/chosen": -187.1719512939453, "logps/rejected": -163.6668243408203, "loss": 0.279, "rewards/accuracies": 0.875, "rewards/chosen": -0.8037168979644775, "rewards/margins": 2.3717522621154785, "rewards/rejected": -3.175469398498535, "step": 3911 }, { "epoch": 0.46, "learning_rate": 1.6596197000118104e-07, "logits/chosen": -2.3722338676452637, "logits/rejected": -2.7493209838867188, "logps/chosen": -456.46722412109375, "logps/rejected": -342.0536804199219, "loss": 0.6746, "rewards/accuracies": 0.875, "rewards/chosen": -1.0042991638183594, "rewards/margins": 1.1311392784118652, "rewards/rejected": -2.1354384422302246, "step": 3912 }, { "epoch": 0.46, "learning_rate": 1.659265383252628e-07, "logits/chosen": -2.9287166595458984, "logits/rejected": -2.9435372352600098, "logps/chosen": -218.3606719970703, "logps/rejected": -139.0884246826172, "loss": 0.2999, "rewards/accuracies": 0.75, "rewards/chosen": -0.5440170168876648, "rewards/margins": 2.1228206157684326, "rewards/rejected": -2.666837692260742, "step": 3913 }, { "epoch": 0.46, "learning_rate": 1.658911066493445e-07, "logits/chosen": -2.3693840503692627, "logits/rejected": -2.4994516372680664, "logps/chosen": -192.30520629882812, "logps/rejected": -200.2140655517578, "loss": 0.3718, "rewards/accuracies": 0.75, "rewards/chosen": -0.04872839152812958, "rewards/margins": 1.548474907875061, "rewards/rejected": -1.597203254699707, "step": 3914 }, { "epoch": 0.46, "learning_rate": 1.6585567497342623e-07, "logits/chosen": -2.785851001739502, "logits/rejected": -2.702601909637451, "logps/chosen": -204.90921020507812, "logps/rejected": -217.53948974609375, "loss": 0.4224, "rewards/accuracies": 0.875, "rewards/chosen": -1.6771920919418335, "rewards/margins": 1.876772165298462, "rewards/rejected": -3.553964376449585, "step": 3915 }, { "epoch": 0.46, "learning_rate": 1.6582024329750795e-07, "logits/chosen": -2.8932981491088867, "logits/rejected": -2.6543326377868652, "logps/chosen": -363.5979309082031, "logps/rejected": -272.1506652832031, "loss": 0.6161, "rewards/accuracies": 0.625, "rewards/chosen": -1.7143642902374268, "rewards/margins": 1.3277819156646729, "rewards/rejected": -3.0421462059020996, "step": 3916 }, { "epoch": 0.46, "learning_rate": 1.6578481162158968e-07, "logits/chosen": -1.404897689819336, "logits/rejected": -2.0405707359313965, "logps/chosen": -430.088134765625, "logps/rejected": -245.97152709960938, "loss": 0.52, "rewards/accuracies": 0.625, "rewards/chosen": -0.24080932140350342, "rewards/margins": 1.2592658996582031, "rewards/rejected": -1.500075101852417, "step": 3917 }, { "epoch": 0.46, "learning_rate": 1.657493799456714e-07, "logits/chosen": -2.1882576942443848, "logits/rejected": -2.244640827178955, "logps/chosen": -230.83399963378906, "logps/rejected": -331.74176025390625, "loss": 0.9269, "rewards/accuracies": 0.75, "rewards/chosen": -1.364326000213623, "rewards/margins": 1.4826722145080566, "rewards/rejected": -2.8469979763031006, "step": 3918 }, { "epoch": 0.46, "learning_rate": 1.6571394826975317e-07, "logits/chosen": -2.0727832317352295, "logits/rejected": -2.070251226425171, "logps/chosen": -284.667724609375, "logps/rejected": -286.181396484375, "loss": 0.3498, "rewards/accuracies": 0.75, "rewards/chosen": -0.3417971134185791, "rewards/margins": 1.585906982421875, "rewards/rejected": -1.927704095840454, "step": 3919 }, { "epoch": 0.46, "learning_rate": 1.656785165938349e-07, "logits/chosen": -1.7428734302520752, "logits/rejected": -2.0255985260009766, "logps/chosen": -461.7159729003906, "logps/rejected": -316.0364685058594, "loss": 0.9836, "rewards/accuracies": 0.75, "rewards/chosen": -1.2697467803955078, "rewards/margins": 1.329374074935913, "rewards/rejected": -2.599120855331421, "step": 3920 }, { "epoch": 0.46, "learning_rate": 1.6564308491791662e-07, "logits/chosen": -1.9331209659576416, "logits/rejected": -2.0482234954833984, "logps/chosen": -369.12921142578125, "logps/rejected": -340.3963317871094, "loss": 0.5502, "rewards/accuracies": 0.75, "rewards/chosen": -0.8258023858070374, "rewards/margins": 0.858640730381012, "rewards/rejected": -1.6844431161880493, "step": 3921 }, { "epoch": 0.46, "learning_rate": 1.6560765324199834e-07, "logits/chosen": -2.6940712928771973, "logits/rejected": -2.6113667488098145, "logps/chosen": -52.231117248535156, "logps/rejected": -173.03965759277344, "loss": 0.199, "rewards/accuracies": 0.875, "rewards/chosen": 0.1006915420293808, "rewards/margins": 2.283607006072998, "rewards/rejected": -2.182915687561035, "step": 3922 }, { "epoch": 0.46, "learning_rate": 1.6557222156608006e-07, "logits/chosen": -2.481154441833496, "logits/rejected": -2.4103400707244873, "logps/chosen": -280.0674133300781, "logps/rejected": -273.2349548339844, "loss": 0.187, "rewards/accuracies": 0.875, "rewards/chosen": -0.648833155632019, "rewards/margins": 2.793947219848633, "rewards/rejected": -3.4427809715270996, "step": 3923 }, { "epoch": 0.46, "learning_rate": 1.655367898901618e-07, "logits/chosen": -2.5242955684661865, "logits/rejected": -2.7565605640411377, "logps/chosen": -165.9093017578125, "logps/rejected": -230.6689453125, "loss": 0.5166, "rewards/accuracies": 0.875, "rewards/chosen": -1.2453938722610474, "rewards/margins": 2.4396090507507324, "rewards/rejected": -3.6850030422210693, "step": 3924 }, { "epoch": 0.46, "learning_rate": 1.6550135821424353e-07, "logits/chosen": -2.7933897972106934, "logits/rejected": -2.8297200202941895, "logps/chosen": -333.6321105957031, "logps/rejected": -299.3306884765625, "loss": 0.1996, "rewards/accuracies": 1.0, "rewards/chosen": -0.5977796912193298, "rewards/margins": 2.359827756881714, "rewards/rejected": -2.9576077461242676, "step": 3925 }, { "epoch": 0.46, "learning_rate": 1.6546592653832526e-07, "logits/chosen": -2.687225818634033, "logits/rejected": -2.8150887489318848, "logps/chosen": -293.63409423828125, "logps/rejected": -228.01708984375, "loss": 0.2861, "rewards/accuracies": 0.875, "rewards/chosen": -0.5138037204742432, "rewards/margins": 2.7215728759765625, "rewards/rejected": -3.2353765964508057, "step": 3926 }, { "epoch": 0.46, "learning_rate": 1.6543049486240698e-07, "logits/chosen": -2.2444589138031006, "logits/rejected": -2.273538827896118, "logps/chosen": -230.5950927734375, "logps/rejected": -262.35333251953125, "loss": 0.2783, "rewards/accuracies": 0.875, "rewards/chosen": -0.6988614201545715, "rewards/margins": 2.431675910949707, "rewards/rejected": -3.130537509918213, "step": 3927 }, { "epoch": 0.46, "learning_rate": 1.653950631864887e-07, "logits/chosen": -2.3953919410705566, "logits/rejected": -2.6913251876831055, "logps/chosen": -241.2333984375, "logps/rejected": -264.0606384277344, "loss": 0.7335, "rewards/accuracies": 0.625, "rewards/chosen": -1.76862633228302, "rewards/margins": 2.348984718322754, "rewards/rejected": -4.117610931396484, "step": 3928 }, { "epoch": 0.46, "learning_rate": 1.6535963151057042e-07, "logits/chosen": -2.156292676925659, "logits/rejected": -2.2925949096679688, "logps/chosen": -550.068359375, "logps/rejected": -436.76025390625, "loss": 0.3793, "rewards/accuracies": 0.75, "rewards/chosen": -0.7540846467018127, "rewards/margins": 1.5945425033569336, "rewards/rejected": -2.3486270904541016, "step": 3929 }, { "epoch": 0.46, "learning_rate": 1.6532419983465214e-07, "logits/chosen": -2.116757392883301, "logits/rejected": -2.0873019695281982, "logps/chosen": -205.5311279296875, "logps/rejected": -306.070068359375, "loss": 0.4203, "rewards/accuracies": 0.75, "rewards/chosen": -1.20888090133667, "rewards/margins": 2.12562894821167, "rewards/rejected": -3.3345096111297607, "step": 3930 }, { "epoch": 0.46, "learning_rate": 1.6528876815873392e-07, "logits/chosen": -2.4310860633850098, "logits/rejected": -2.392638683319092, "logps/chosen": -337.08514404296875, "logps/rejected": -342.1105651855469, "loss": 0.4623, "rewards/accuracies": 0.875, "rewards/chosen": -1.2214616537094116, "rewards/margins": 2.3843374252319336, "rewards/rejected": -3.6057987213134766, "step": 3931 }, { "epoch": 0.46, "learning_rate": 1.6525333648281564e-07, "logits/chosen": -2.5516531467437744, "logits/rejected": -2.4415719509124756, "logps/chosen": -325.0041809082031, "logps/rejected": -347.8976135253906, "loss": 0.2106, "rewards/accuracies": 1.0, "rewards/chosen": -0.6062057614326477, "rewards/margins": 2.931074857711792, "rewards/rejected": -3.537280559539795, "step": 3932 }, { "epoch": 0.46, "learning_rate": 1.6521790480689736e-07, "logits/chosen": -2.5044026374816895, "logits/rejected": -2.5501670837402344, "logps/chosen": -230.84014892578125, "logps/rejected": -294.1318359375, "loss": 0.7257, "rewards/accuracies": 0.75, "rewards/chosen": -1.749709963798523, "rewards/margins": 0.653721809387207, "rewards/rejected": -2.4034318923950195, "step": 3933 }, { "epoch": 0.46, "learning_rate": 1.6518247313097909e-07, "logits/chosen": -1.5549858808517456, "logits/rejected": -1.9962610006332397, "logps/chosen": -305.12139892578125, "logps/rejected": -221.55844116210938, "loss": 0.2369, "rewards/accuracies": 0.875, "rewards/chosen": -1.8440015316009521, "rewards/margins": 2.5838680267333984, "rewards/rejected": -4.42786979675293, "step": 3934 }, { "epoch": 0.46, "learning_rate": 1.651470414550608e-07, "logits/chosen": -2.3633902072906494, "logits/rejected": -2.5667688846588135, "logps/chosen": -168.75755310058594, "logps/rejected": -290.710205078125, "loss": 0.3304, "rewards/accuracies": 0.75, "rewards/chosen": -1.0187458992004395, "rewards/margins": 2.9705708026885986, "rewards/rejected": -3.989316463470459, "step": 3935 }, { "epoch": 0.46, "learning_rate": 1.6511160977914256e-07, "logits/chosen": -2.6231470108032227, "logits/rejected": -2.577360153198242, "logps/chosen": -256.7175598144531, "logps/rejected": -316.27459716796875, "loss": 0.3439, "rewards/accuracies": 0.75, "rewards/chosen": -0.5895966291427612, "rewards/margins": 1.558298110961914, "rewards/rejected": -2.147894859313965, "step": 3936 }, { "epoch": 0.46, "learning_rate": 1.6507617810322428e-07, "logits/chosen": -2.5242719650268555, "logits/rejected": -2.485776662826538, "logps/chosen": -287.8277282714844, "logps/rejected": -299.5181579589844, "loss": 0.0951, "rewards/accuracies": 1.0, "rewards/chosen": -0.5263023376464844, "rewards/margins": 3.133920431137085, "rewards/rejected": -3.6602225303649902, "step": 3937 }, { "epoch": 0.46, "learning_rate": 1.65040746427306e-07, "logits/chosen": -1.920130729675293, "logits/rejected": -2.0677363872528076, "logps/chosen": -396.0291442871094, "logps/rejected": -397.230712890625, "loss": 0.4815, "rewards/accuracies": 0.875, "rewards/chosen": -0.8796119689941406, "rewards/margins": 1.2572662830352783, "rewards/rejected": -2.136878252029419, "step": 3938 }, { "epoch": 0.46, "learning_rate": 1.6500531475138772e-07, "logits/chosen": -2.0578975677490234, "logits/rejected": -2.1588358879089355, "logps/chosen": -353.5740661621094, "logps/rejected": -243.0382080078125, "loss": 0.5404, "rewards/accuracies": 0.875, "rewards/chosen": -1.434918761253357, "rewards/margins": 1.977284550666809, "rewards/rejected": -3.412203311920166, "step": 3939 }, { "epoch": 0.46, "learning_rate": 1.6496988307546944e-07, "logits/chosen": -1.8925567865371704, "logits/rejected": -1.7885117530822754, "logps/chosen": -215.15457153320312, "logps/rejected": -206.30328369140625, "loss": 0.6131, "rewards/accuracies": 0.75, "rewards/chosen": -1.354048490524292, "rewards/margins": 1.4506523609161377, "rewards/rejected": -2.8047006130218506, "step": 3940 }, { "epoch": 0.46, "learning_rate": 1.6493445139955117e-07, "logits/chosen": -2.027043342590332, "logits/rejected": -2.024496555328369, "logps/chosen": -307.4137878417969, "logps/rejected": -424.326416015625, "loss": 0.2339, "rewards/accuracies": 0.875, "rewards/chosen": -0.6002962589263916, "rewards/margins": 3.2508492469787598, "rewards/rejected": -3.8511455059051514, "step": 3941 }, { "epoch": 0.46, "learning_rate": 1.6489901972363292e-07, "logits/chosen": -2.6939916610717773, "logits/rejected": -2.5931665897369385, "logps/chosen": -173.3814239501953, "logps/rejected": -162.88107299804688, "loss": 0.3034, "rewards/accuracies": 1.0, "rewards/chosen": -0.44174665212631226, "rewards/margins": 1.7786290645599365, "rewards/rejected": -2.2203755378723145, "step": 3942 }, { "epoch": 0.46, "learning_rate": 1.6486358804771466e-07, "logits/chosen": -2.2949297428131104, "logits/rejected": -2.504296064376831, "logps/chosen": -298.02667236328125, "logps/rejected": -163.6577911376953, "loss": 0.6218, "rewards/accuracies": 0.75, "rewards/chosen": -0.044179074466228485, "rewards/margins": 1.8085417747497559, "rewards/rejected": -1.852720856666565, "step": 3943 }, { "epoch": 0.46, "learning_rate": 1.6482815637179639e-07, "logits/chosen": -2.456106662750244, "logits/rejected": -2.376206398010254, "logps/chosen": -178.7095947265625, "logps/rejected": -251.34060668945312, "loss": 0.334, "rewards/accuracies": 0.875, "rewards/chosen": -0.9643751978874207, "rewards/margins": 1.6535300016403198, "rewards/rejected": -2.6179051399230957, "step": 3944 }, { "epoch": 0.46, "learning_rate": 1.647927246958781e-07, "logits/chosen": -2.0813639163970947, "logits/rejected": -2.301832675933838, "logps/chosen": -188.51470947265625, "logps/rejected": -208.46986389160156, "loss": 0.3864, "rewards/accuracies": 0.75, "rewards/chosen": -0.7985163331031799, "rewards/margins": 2.8208892345428467, "rewards/rejected": -3.6194052696228027, "step": 3945 }, { "epoch": 0.46, "learning_rate": 1.6475729301995983e-07, "logits/chosen": -2.1538877487182617, "logits/rejected": -2.3655076026916504, "logps/chosen": -393.18975830078125, "logps/rejected": -236.78524780273438, "loss": 0.5358, "rewards/accuracies": 0.875, "rewards/chosen": -1.0000369548797607, "rewards/margins": 0.9976277351379395, "rewards/rejected": -1.9976646900177002, "step": 3946 }, { "epoch": 0.46, "learning_rate": 1.6472186134404158e-07, "logits/chosen": -2.4258294105529785, "logits/rejected": -2.6544787883758545, "logps/chosen": -347.4997253417969, "logps/rejected": -257.243896484375, "loss": 0.402, "rewards/accuracies": 0.75, "rewards/chosen": -0.6963886618614197, "rewards/margins": 2.235750913619995, "rewards/rejected": -2.9321393966674805, "step": 3947 }, { "epoch": 0.46, "learning_rate": 1.646864296681233e-07, "logits/chosen": -2.3404111862182617, "logits/rejected": -2.274977684020996, "logps/chosen": -228.67739868164062, "logps/rejected": -218.5743865966797, "loss": 0.3371, "rewards/accuracies": 0.875, "rewards/chosen": -0.22307144105434418, "rewards/margins": 1.3316730260849, "rewards/rejected": -1.5547446012496948, "step": 3948 }, { "epoch": 0.46, "learning_rate": 1.6465099799220502e-07, "logits/chosen": -2.677840232849121, "logits/rejected": -2.5003280639648438, "logps/chosen": -248.92935180664062, "logps/rejected": -246.9120330810547, "loss": 0.3518, "rewards/accuracies": 0.875, "rewards/chosen": -1.265497088432312, "rewards/margins": 1.9520766735076904, "rewards/rejected": -3.217573642730713, "step": 3949 }, { "epoch": 0.46, "learning_rate": 1.6461556631628675e-07, "logits/chosen": -2.2416977882385254, "logits/rejected": -2.5748939514160156, "logps/chosen": -490.63818359375, "logps/rejected": -232.35504150390625, "loss": 0.329, "rewards/accuracies": 0.875, "rewards/chosen": -0.8970564603805542, "rewards/margins": 2.0748486518859863, "rewards/rejected": -2.971904993057251, "step": 3950 }, { "epoch": 0.46, "learning_rate": 1.6458013464036847e-07, "logits/chosen": -1.8287098407745361, "logits/rejected": -1.8242080211639404, "logps/chosen": -376.67529296875, "logps/rejected": -436.81256103515625, "loss": 0.3504, "rewards/accuracies": 0.875, "rewards/chosen": -0.960844874382019, "rewards/margins": 3.16070556640625, "rewards/rejected": -4.1215500831604, "step": 3951 }, { "epoch": 0.46, "learning_rate": 1.645447029644502e-07, "logits/chosen": -2.2309231758117676, "logits/rejected": -2.4436192512512207, "logps/chosen": -208.72853088378906, "logps/rejected": -285.3692626953125, "loss": 0.5293, "rewards/accuracies": 0.875, "rewards/chosen": -0.6695567965507507, "rewards/margins": 3.393733263015747, "rewards/rejected": -4.063289642333984, "step": 3952 }, { "epoch": 0.46, "learning_rate": 1.6450927128853194e-07, "logits/chosen": -2.13130521774292, "logits/rejected": -2.097860813140869, "logps/chosen": -367.9957275390625, "logps/rejected": -439.1777648925781, "loss": 0.2111, "rewards/accuracies": 0.875, "rewards/chosen": -0.41047775745391846, "rewards/margins": 2.7937567234039307, "rewards/rejected": -3.2042346000671387, "step": 3953 }, { "epoch": 0.46, "learning_rate": 1.6447383961261369e-07, "logits/chosen": -2.4099628925323486, "logits/rejected": -2.640214681625366, "logps/chosen": -150.7002716064453, "logps/rejected": -159.5825653076172, "loss": 0.3707, "rewards/accuracies": 0.75, "rewards/chosen": -0.8288857340812683, "rewards/margins": 1.8333728313446045, "rewards/rejected": -2.6622586250305176, "step": 3954 }, { "epoch": 0.46, "learning_rate": 1.644384079366954e-07, "logits/chosen": -2.637030601501465, "logits/rejected": -2.2473363876342773, "logps/chosen": -88.22962188720703, "logps/rejected": -226.756591796875, "loss": 0.866, "rewards/accuracies": 0.75, "rewards/chosen": -1.9672958850860596, "rewards/margins": 1.7017645835876465, "rewards/rejected": -3.669060468673706, "step": 3955 }, { "epoch": 0.46, "learning_rate": 1.6440297626077713e-07, "logits/chosen": -2.813427686691284, "logits/rejected": -2.8487465381622314, "logps/chosen": -121.28565979003906, "logps/rejected": -201.32872009277344, "loss": 1.0488, "rewards/accuracies": 0.75, "rewards/chosen": -2.074873447418213, "rewards/margins": 1.1152749061584473, "rewards/rejected": -3.1901485919952393, "step": 3956 }, { "epoch": 0.46, "learning_rate": 1.6436754458485885e-07, "logits/chosen": -2.063302516937256, "logits/rejected": -2.305544853210449, "logps/chosen": -258.9646911621094, "logps/rejected": -160.5577850341797, "loss": 0.2897, "rewards/accuracies": 0.875, "rewards/chosen": -0.2232809215784073, "rewards/margins": 1.6698435544967651, "rewards/rejected": -1.8931243419647217, "step": 3957 }, { "epoch": 0.46, "learning_rate": 1.643321129089406e-07, "logits/chosen": -2.4128925800323486, "logits/rejected": -2.6066646575927734, "logps/chosen": -262.2380676269531, "logps/rejected": -278.1166687011719, "loss": 0.566, "rewards/accuracies": 0.75, "rewards/chosen": -1.0172700881958008, "rewards/margins": 1.6816158294677734, "rewards/rejected": -2.698885917663574, "step": 3958 }, { "epoch": 0.46, "learning_rate": 1.6429668123302232e-07, "logits/chosen": -2.2732763290405273, "logits/rejected": -2.3244009017944336, "logps/chosen": -201.82440185546875, "logps/rejected": -236.677490234375, "loss": 0.3603, "rewards/accuracies": 0.75, "rewards/chosen": -0.6881381869316101, "rewards/margins": 1.4232892990112305, "rewards/rejected": -2.1114275455474854, "step": 3959 }, { "epoch": 0.46, "learning_rate": 1.6426124955710405e-07, "logits/chosen": -1.8601810932159424, "logits/rejected": -2.181622266769409, "logps/chosen": -626.0748901367188, "logps/rejected": -449.293212890625, "loss": 0.467, "rewards/accuracies": 0.75, "rewards/chosen": -0.7602765560150146, "rewards/margins": 1.8340922594070435, "rewards/rejected": -2.5943689346313477, "step": 3960 }, { "epoch": 0.46, "learning_rate": 1.6422581788118577e-07, "logits/chosen": -2.671356201171875, "logits/rejected": -2.751289129257202, "logps/chosen": -184.88131713867188, "logps/rejected": -280.28546142578125, "loss": 0.6385, "rewards/accuracies": 0.625, "rewards/chosen": -1.0858287811279297, "rewards/margins": 2.511718511581421, "rewards/rejected": -3.5975472927093506, "step": 3961 }, { "epoch": 0.46, "learning_rate": 1.641903862052675e-07, "logits/chosen": -2.3870863914489746, "logits/rejected": -2.158416748046875, "logps/chosen": -412.8333740234375, "logps/rejected": -390.12017822265625, "loss": 0.7054, "rewards/accuracies": 0.875, "rewards/chosen": -1.2829443216323853, "rewards/margins": 1.6606245040893555, "rewards/rejected": -2.943568706512451, "step": 3962 }, { "epoch": 0.46, "learning_rate": 1.641549545293492e-07, "logits/chosen": -2.340913772583008, "logits/rejected": -2.1604578495025635, "logps/chosen": -206.87167358398438, "logps/rejected": -278.2321472167969, "loss": 0.2374, "rewards/accuracies": 0.875, "rewards/chosen": -1.433334469795227, "rewards/margins": 3.97241473197937, "rewards/rejected": -5.405749320983887, "step": 3963 }, { "epoch": 0.46, "learning_rate": 1.6411952285343093e-07, "logits/chosen": -2.6634902954101562, "logits/rejected": -2.224545955657959, "logps/chosen": -224.65077209472656, "logps/rejected": -189.88925170898438, "loss": 0.3033, "rewards/accuracies": 0.75, "rewards/chosen": -0.794945240020752, "rewards/margins": 3.1383814811706543, "rewards/rejected": -3.9333269596099854, "step": 3964 }, { "epoch": 0.46, "learning_rate": 1.6408409117751268e-07, "logits/chosen": -1.9960612058639526, "logits/rejected": -2.2806451320648193, "logps/chosen": -326.1009521484375, "logps/rejected": -243.41036987304688, "loss": 0.2488, "rewards/accuracies": 0.875, "rewards/chosen": 0.0011952295899391174, "rewards/margins": 2.0707128047943115, "rewards/rejected": -2.0695176124572754, "step": 3965 }, { "epoch": 0.46, "learning_rate": 1.6404865950159443e-07, "logits/chosen": -2.4857444763183594, "logits/rejected": -2.0633797645568848, "logps/chosen": -213.16876220703125, "logps/rejected": -275.69049072265625, "loss": 0.186, "rewards/accuracies": 1.0, "rewards/chosen": 0.09581833332777023, "rewards/margins": 2.343961715698242, "rewards/rejected": -2.248143196105957, "step": 3966 }, { "epoch": 0.46, "learning_rate": 1.6401322782567615e-07, "logits/chosen": -1.9964426755905151, "logits/rejected": -2.133298397064209, "logps/chosen": -258.18109130859375, "logps/rejected": -334.46783447265625, "loss": 0.3318, "rewards/accuracies": 0.75, "rewards/chosen": -0.8978146314620972, "rewards/margins": 3.4809908866882324, "rewards/rejected": -4.378805160522461, "step": 3967 }, { "epoch": 0.46, "learning_rate": 1.6397779614975788e-07, "logits/chosen": -2.0834102630615234, "logits/rejected": -2.2590813636779785, "logps/chosen": -514.2142333984375, "logps/rejected": -407.45263671875, "loss": 0.1985, "rewards/accuracies": 0.875, "rewards/chosen": -0.47627344727516174, "rewards/margins": 2.7707161903381348, "rewards/rejected": -3.2469894886016846, "step": 3968 }, { "epoch": 0.46, "learning_rate": 1.6394236447383962e-07, "logits/chosen": -2.384265899658203, "logits/rejected": -2.5301554203033447, "logps/chosen": -262.34918212890625, "logps/rejected": -281.2230529785156, "loss": 0.2304, "rewards/accuracies": 1.0, "rewards/chosen": -0.5596275925636292, "rewards/margins": 2.131875514984131, "rewards/rejected": -2.691502809524536, "step": 3969 }, { "epoch": 0.46, "learning_rate": 1.6390693279792135e-07, "logits/chosen": -1.831903100013733, "logits/rejected": -2.494029998779297, "logps/chosen": -374.9416198730469, "logps/rejected": -189.73104858398438, "loss": 0.3027, "rewards/accuracies": 0.875, "rewards/chosen": -0.9706882238388062, "rewards/margins": 2.267364263534546, "rewards/rejected": -3.2380526065826416, "step": 3970 }, { "epoch": 0.46, "learning_rate": 1.6387150112200307e-07, "logits/chosen": -2.2646617889404297, "logits/rejected": -2.242034673690796, "logps/chosen": -423.46746826171875, "logps/rejected": -329.1251525878906, "loss": 0.2035, "rewards/accuracies": 1.0, "rewards/chosen": -0.8615195751190186, "rewards/margins": 2.820004463195801, "rewards/rejected": -3.6815240383148193, "step": 3971 }, { "epoch": 0.46, "learning_rate": 1.638360694460848e-07, "logits/chosen": -2.369971513748169, "logits/rejected": -2.3231699466705322, "logps/chosen": -140.62388610839844, "logps/rejected": -196.24606323242188, "loss": 0.1569, "rewards/accuracies": 1.0, "rewards/chosen": 0.23546014726161957, "rewards/margins": 3.0574488639831543, "rewards/rejected": -2.821988821029663, "step": 3972 }, { "epoch": 0.46, "learning_rate": 1.638006377701665e-07, "logits/chosen": -1.8697021007537842, "logits/rejected": -1.9884072542190552, "logps/chosen": -368.5655822753906, "logps/rejected": -383.21734619140625, "loss": 1.0667, "rewards/accuracies": 0.375, "rewards/chosen": -1.8240963220596313, "rewards/margins": -0.5172037482261658, "rewards/rejected": -1.3068926334381104, "step": 3973 }, { "epoch": 0.46, "learning_rate": 1.6376520609424823e-07, "logits/chosen": -2.50384521484375, "logits/rejected": -2.335244655609131, "logps/chosen": -205.60092163085938, "logps/rejected": -285.5077209472656, "loss": 0.2135, "rewards/accuracies": 1.0, "rewards/chosen": -1.3143959045410156, "rewards/margins": 3.9873692989349365, "rewards/rejected": -5.301765441894531, "step": 3974 }, { "epoch": 0.46, "learning_rate": 1.6372977441832996e-07, "logits/chosen": -1.9997432231903076, "logits/rejected": -2.0570549964904785, "logps/chosen": -313.7476501464844, "logps/rejected": -355.53228759765625, "loss": 0.2643, "rewards/accuracies": 0.875, "rewards/chosen": -0.7654287815093994, "rewards/margins": 2.790038585662842, "rewards/rejected": -3.555467128753662, "step": 3975 }, { "epoch": 0.46, "learning_rate": 1.636943427424117e-07, "logits/chosen": -2.423778533935547, "logits/rejected": -2.629275321960449, "logps/chosen": -353.6055603027344, "logps/rejected": -295.0767517089844, "loss": 0.1995, "rewards/accuracies": 0.875, "rewards/chosen": -0.2444050908088684, "rewards/margins": 2.5270917415618896, "rewards/rejected": -2.7714967727661133, "step": 3976 }, { "epoch": 0.46, "learning_rate": 1.6365891106649343e-07, "logits/chosen": -2.3694143295288086, "logits/rejected": -2.3338515758514404, "logps/chosen": -216.8291778564453, "logps/rejected": -205.60296630859375, "loss": 0.6496, "rewards/accuracies": 0.75, "rewards/chosen": -1.2938556671142578, "rewards/margins": 0.5532680749893188, "rewards/rejected": -1.8471238613128662, "step": 3977 }, { "epoch": 0.46, "learning_rate": 1.6362347939057518e-07, "logits/chosen": -1.847383975982666, "logits/rejected": -1.8584564924240112, "logps/chosen": -289.0009765625, "logps/rejected": -408.8981628417969, "loss": 0.4967, "rewards/accuracies": 0.75, "rewards/chosen": -0.825747549533844, "rewards/margins": 3.1812713146209717, "rewards/rejected": -4.00701904296875, "step": 3978 }, { "epoch": 0.46, "learning_rate": 1.635880477146569e-07, "logits/chosen": -2.7667036056518555, "logits/rejected": -2.4772160053253174, "logps/chosen": -203.95765686035156, "logps/rejected": -288.8656921386719, "loss": 0.4296, "rewards/accuracies": 0.875, "rewards/chosen": -1.0858700275421143, "rewards/margins": 1.9554939270019531, "rewards/rejected": -3.0413639545440674, "step": 3979 }, { "epoch": 0.46, "learning_rate": 1.6355261603873862e-07, "logits/chosen": -2.5727648735046387, "logits/rejected": -2.4634881019592285, "logps/chosen": -170.88404846191406, "logps/rejected": -319.9769287109375, "loss": 0.5968, "rewards/accuracies": 0.75, "rewards/chosen": -1.2404130697250366, "rewards/margins": 1.5059418678283691, "rewards/rejected": -2.7463550567626953, "step": 3980 }, { "epoch": 0.46, "learning_rate": 1.6351718436282037e-07, "logits/chosen": -2.6819450855255127, "logits/rejected": -2.6617238521575928, "logps/chosen": -184.51055908203125, "logps/rejected": -146.26290893554688, "loss": 0.3572, "rewards/accuracies": 0.875, "rewards/chosen": -1.4780151844024658, "rewards/margins": 1.1833654642105103, "rewards/rejected": -2.6613807678222656, "step": 3981 }, { "epoch": 0.46, "learning_rate": 1.634817526869021e-07, "logits/chosen": -2.5229270458221436, "logits/rejected": -2.326030969619751, "logps/chosen": -141.95672607421875, "logps/rejected": -308.0113220214844, "loss": 0.2517, "rewards/accuracies": 0.875, "rewards/chosen": -1.2896267175674438, "rewards/margins": 1.9409444332122803, "rewards/rejected": -3.2305712699890137, "step": 3982 }, { "epoch": 0.46, "learning_rate": 1.6344632101098381e-07, "logits/chosen": -2.587704658508301, "logits/rejected": -2.8659188747406006, "logps/chosen": -302.96575927734375, "logps/rejected": -321.80291748046875, "loss": 0.406, "rewards/accuracies": 0.75, "rewards/chosen": -1.1690610647201538, "rewards/margins": 2.596224546432495, "rewards/rejected": -3.7652854919433594, "step": 3983 }, { "epoch": 0.46, "learning_rate": 1.6341088933506554e-07, "logits/chosen": -2.233531951904297, "logits/rejected": -1.998093605041504, "logps/chosen": -349.4864501953125, "logps/rejected": -276.64874267578125, "loss": 0.1877, "rewards/accuracies": 1.0, "rewards/chosen": 0.20434747636318207, "rewards/margins": 2.5251004695892334, "rewards/rejected": -2.3207528591156006, "step": 3984 }, { "epoch": 0.46, "learning_rate": 1.6337545765914726e-07, "logits/chosen": -1.958150029182434, "logits/rejected": -1.9585254192352295, "logps/chosen": -341.4512023925781, "logps/rejected": -237.99676513671875, "loss": 0.3805, "rewards/accuracies": 0.75, "rewards/chosen": -0.6409279704093933, "rewards/margins": 1.2214231491088867, "rewards/rejected": -1.8623511791229248, "step": 3985 }, { "epoch": 0.46, "learning_rate": 1.6334002598322898e-07, "logits/chosen": -2.1044516563415527, "logits/rejected": -2.0845272541046143, "logps/chosen": -341.22723388671875, "logps/rejected": -256.0553894042969, "loss": 0.3522, "rewards/accuracies": 0.75, "rewards/chosen": -0.6865931749343872, "rewards/margins": 2.3085055351257324, "rewards/rejected": -2.9950990676879883, "step": 3986 }, { "epoch": 0.46, "learning_rate": 1.6330459430731073e-07, "logits/chosen": -1.354891061782837, "logits/rejected": -1.3291728496551514, "logps/chosen": -388.2460632324219, "logps/rejected": -345.3612060546875, "loss": 0.8292, "rewards/accuracies": 0.75, "rewards/chosen": -1.0791282653808594, "rewards/margins": 0.8187131881713867, "rewards/rejected": -1.897841453552246, "step": 3987 }, { "epoch": 0.46, "learning_rate": 1.6326916263139245e-07, "logits/chosen": -2.8874642848968506, "logits/rejected": -2.9945285320281982, "logps/chosen": -246.10214233398438, "logps/rejected": -215.39735412597656, "loss": 0.3906, "rewards/accuracies": 0.875, "rewards/chosen": -0.8647282123565674, "rewards/margins": 1.9278221130371094, "rewards/rejected": -2.7925500869750977, "step": 3988 }, { "epoch": 0.46, "learning_rate": 1.632337309554742e-07, "logits/chosen": -1.7505451440811157, "logits/rejected": -2.356766939163208, "logps/chosen": -469.513671875, "logps/rejected": -173.0124969482422, "loss": 0.3861, "rewards/accuracies": 0.75, "rewards/chosen": 0.017610162496566772, "rewards/margins": 1.6261199712753296, "rewards/rejected": -1.6085097789764404, "step": 3989 }, { "epoch": 0.46, "learning_rate": 1.6319829927955592e-07, "logits/chosen": -2.2524514198303223, "logits/rejected": -2.233635663986206, "logps/chosen": -185.6726531982422, "logps/rejected": -206.17462158203125, "loss": 0.4314, "rewards/accuracies": 0.75, "rewards/chosen": -0.4770694077014923, "rewards/margins": 1.2357823848724365, "rewards/rejected": -1.712851881980896, "step": 3990 }, { "epoch": 0.46, "learning_rate": 1.6316286760363764e-07, "logits/chosen": -2.6384668350219727, "logits/rejected": -2.7295751571655273, "logps/chosen": -211.89797973632812, "logps/rejected": -157.75331115722656, "loss": 0.4633, "rewards/accuracies": 0.75, "rewards/chosen": -0.7456852793693542, "rewards/margins": 0.7141335010528564, "rewards/rejected": -1.459818720817566, "step": 3991 }, { "epoch": 0.46, "learning_rate": 1.631274359277194e-07, "logits/chosen": -2.5034780502319336, "logits/rejected": -2.6355082988739014, "logps/chosen": -458.5139465332031, "logps/rejected": -365.8216247558594, "loss": 0.413, "rewards/accuracies": 0.75, "rewards/chosen": -1.2951167821884155, "rewards/margins": 2.463738203048706, "rewards/rejected": -3.758854866027832, "step": 3992 }, { "epoch": 0.46, "learning_rate": 1.6309200425180111e-07, "logits/chosen": -2.2143714427948, "logits/rejected": -1.9750316143035889, "logps/chosen": -236.6873016357422, "logps/rejected": -306.2986755371094, "loss": 0.4948, "rewards/accuracies": 0.75, "rewards/chosen": -0.8285441398620605, "rewards/margins": 0.9282800555229187, "rewards/rejected": -1.756824254989624, "step": 3993 }, { "epoch": 0.46, "learning_rate": 1.6305657257588284e-07, "logits/chosen": -2.1235861778259277, "logits/rejected": -2.258441686630249, "logps/chosen": -251.4058837890625, "logps/rejected": -255.95050048828125, "loss": 0.2592, "rewards/accuracies": 0.875, "rewards/chosen": -0.5137506723403931, "rewards/margins": 2.896247625350952, "rewards/rejected": -3.4099984169006348, "step": 3994 }, { "epoch": 0.46, "learning_rate": 1.6302114089996456e-07, "logits/chosen": -2.4660308361053467, "logits/rejected": -2.448822498321533, "logps/chosen": -462.6173095703125, "logps/rejected": -464.00909423828125, "loss": 0.2487, "rewards/accuracies": 0.875, "rewards/chosen": -0.06790051609277725, "rewards/margins": 3.669581413269043, "rewards/rejected": -3.7374818325042725, "step": 3995 }, { "epoch": 0.46, "learning_rate": 1.6298570922404628e-07, "logits/chosen": -2.359591007232666, "logits/rejected": -2.1683249473571777, "logps/chosen": -205.99949645996094, "logps/rejected": -257.1788330078125, "loss": 0.3221, "rewards/accuracies": 0.875, "rewards/chosen": -2.3679707050323486, "rewards/margins": 1.4290827512741089, "rewards/rejected": -3.797053098678589, "step": 3996 }, { "epoch": 0.46, "learning_rate": 1.62950277548128e-07, "logits/chosen": -2.207282543182373, "logits/rejected": -2.295748710632324, "logps/chosen": -243.1979522705078, "logps/rejected": -279.11920166015625, "loss": 1.4723, "rewards/accuracies": 0.625, "rewards/chosen": -2.282073974609375, "rewards/margins": -0.4423837661743164, "rewards/rejected": -1.8396903276443481, "step": 3997 }, { "epoch": 0.47, "learning_rate": 1.6291484587220975e-07, "logits/chosen": -1.6296473741531372, "logits/rejected": -1.9392175674438477, "logps/chosen": -398.195556640625, "logps/rejected": -332.976806640625, "loss": 0.2, "rewards/accuracies": 0.875, "rewards/chosen": -1.1722580194473267, "rewards/margins": 2.830291986465454, "rewards/rejected": -4.00255012512207, "step": 3998 }, { "epoch": 0.47, "learning_rate": 1.6287941419629147e-07, "logits/chosen": -2.768129825592041, "logits/rejected": -2.815741539001465, "logps/chosen": -319.0478820800781, "logps/rejected": -249.6585693359375, "loss": 0.5352, "rewards/accuracies": 0.5, "rewards/chosen": -1.5207188129425049, "rewards/margins": 0.9085509181022644, "rewards/rejected": -2.429269790649414, "step": 3999 }, { "epoch": 0.47, "learning_rate": 1.628439825203732e-07, "logits/chosen": -2.1106624603271484, "logits/rejected": -2.1956934928894043, "logps/chosen": -253.6958770751953, "logps/rejected": -309.3354797363281, "loss": 0.4074, "rewards/accuracies": 0.875, "rewards/chosen": -1.4726953506469727, "rewards/margins": 2.2677605152130127, "rewards/rejected": -3.7404556274414062, "step": 4000 }, { "epoch": 0.47, "eval_logits/chosen": -1.7543429136276245, "eval_logits/rejected": -1.756016492843628, "eval_logps/chosen": -279.1766662597656, "eval_logps/rejected": -278.8044738769531, "eval_loss": 0.3739631474018097, "eval_rewards/accuracies": 0.8390804529190063, "eval_rewards/chosen": -0.7011234164237976, "eval_rewards/margins": 2.0786702632904053, "eval_rewards/rejected": -2.7797935009002686, "eval_runtime": 265.312, "eval_samples_per_second": 2.62, "eval_steps_per_second": 1.312, "step": 4000 }, { "epoch": 0.47, "learning_rate": 1.6280855084445494e-07, "logits/chosen": -2.379930257797241, "logits/rejected": -2.430798292160034, "logps/chosen": -296.9425354003906, "logps/rejected": -471.7687072753906, "loss": 0.524, "rewards/accuracies": 0.75, "rewards/chosen": -1.2584030628204346, "rewards/margins": 2.2124128341674805, "rewards/rejected": -3.470816135406494, "step": 4001 }, { "epoch": 0.47, "learning_rate": 1.6277311916853667e-07, "logits/chosen": -2.2328290939331055, "logits/rejected": -2.102674961090088, "logps/chosen": -243.14926147460938, "logps/rejected": -316.7289123535156, "loss": 0.6844, "rewards/accuracies": 0.625, "rewards/chosen": -1.2766386270523071, "rewards/margins": 1.322392225265503, "rewards/rejected": -2.5990309715270996, "step": 4002 }, { "epoch": 0.47, "learning_rate": 1.6273768749261841e-07, "logits/chosen": -2.6471667289733887, "logits/rejected": -2.703214645385742, "logps/chosen": -133.6107635498047, "logps/rejected": -128.947509765625, "loss": 0.267, "rewards/accuracies": 1.0, "rewards/chosen": 0.002619832754135132, "rewards/margins": 1.599892258644104, "rewards/rejected": -1.5972723960876465, "step": 4003 }, { "epoch": 0.47, "learning_rate": 1.6270225581670014e-07, "logits/chosen": -2.3063712120056152, "logits/rejected": -2.1044745445251465, "logps/chosen": -530.57666015625, "logps/rejected": -378.1014404296875, "loss": 0.1876, "rewards/accuracies": 1.0, "rewards/chosen": -0.38555681705474854, "rewards/margins": 2.2903881072998047, "rewards/rejected": -2.6759450435638428, "step": 4004 }, { "epoch": 0.47, "learning_rate": 1.6266682414078186e-07, "logits/chosen": -1.9888043403625488, "logits/rejected": -2.1537792682647705, "logps/chosen": -323.05401611328125, "logps/rejected": -311.66796875, "loss": 0.7458, "rewards/accuracies": 0.5, "rewards/chosen": -0.8847439885139465, "rewards/margins": 0.5592479109764099, "rewards/rejected": -1.4439918994903564, "step": 4005 }, { "epoch": 0.47, "learning_rate": 1.6263139246486358e-07, "logits/chosen": -2.0223121643066406, "logits/rejected": -1.7573957443237305, "logps/chosen": -235.67868041992188, "logps/rejected": -324.9088134765625, "loss": 0.7484, "rewards/accuracies": 0.625, "rewards/chosen": -1.4342806339263916, "rewards/margins": 0.368630588054657, "rewards/rejected": -1.8029110431671143, "step": 4006 }, { "epoch": 0.47, "learning_rate": 1.625959607889453e-07, "logits/chosen": -2.4409708976745605, "logits/rejected": -2.517169952392578, "logps/chosen": -314.3721923828125, "logps/rejected": -290.0794677734375, "loss": 0.1322, "rewards/accuracies": 1.0, "rewards/chosen": -0.5039801001548767, "rewards/margins": 3.0455732345581055, "rewards/rejected": -3.549553394317627, "step": 4007 }, { "epoch": 0.47, "learning_rate": 1.6256052911302703e-07, "logits/chosen": -1.6862294673919678, "logits/rejected": -1.5986900329589844, "logps/chosen": -420.53533935546875, "logps/rejected": -433.80242919921875, "loss": 0.2507, "rewards/accuracies": 0.875, "rewards/chosen": -0.2712683379650116, "rewards/margins": 3.392768383026123, "rewards/rejected": -3.664036750793457, "step": 4008 }, { "epoch": 0.47, "learning_rate": 1.6252509743710875e-07, "logits/chosen": -2.7687180042266846, "logits/rejected": -2.7921576499938965, "logps/chosen": -264.06744384765625, "logps/rejected": -234.9364471435547, "loss": 0.2674, "rewards/accuracies": 0.875, "rewards/chosen": -0.845574140548706, "rewards/margins": 1.548851490020752, "rewards/rejected": -2.394425630569458, "step": 4009 }, { "epoch": 0.47, "learning_rate": 1.624896657611905e-07, "logits/chosen": -2.2160019874572754, "logits/rejected": -1.870922565460205, "logps/chosen": -162.74537658691406, "logps/rejected": -244.1003875732422, "loss": 0.2429, "rewards/accuracies": 1.0, "rewards/chosen": -1.959694504737854, "rewards/margins": 1.8619040250778198, "rewards/rejected": -3.821598529815674, "step": 4010 }, { "epoch": 0.47, "learning_rate": 1.6245423408527222e-07, "logits/chosen": -2.296600341796875, "logits/rejected": -2.544523239135742, "logps/chosen": -322.9901123046875, "logps/rejected": -284.6758117675781, "loss": 0.3879, "rewards/accuracies": 0.75, "rewards/chosen": -0.1900469809770584, "rewards/margins": 2.621302366256714, "rewards/rejected": -2.811349391937256, "step": 4011 }, { "epoch": 0.47, "learning_rate": 1.6241880240935394e-07, "logits/chosen": -2.365767002105713, "logits/rejected": -2.323242425918579, "logps/chosen": -409.7174072265625, "logps/rejected": -527.154296875, "loss": 0.3994, "rewards/accuracies": 0.75, "rewards/chosen": -0.7204976081848145, "rewards/margins": 2.4530062675476074, "rewards/rejected": -3.173503875732422, "step": 4012 }, { "epoch": 0.47, "learning_rate": 1.623833707334357e-07, "logits/chosen": -2.03511118888855, "logits/rejected": -2.220942735671997, "logps/chosen": -379.9289855957031, "logps/rejected": -317.8266296386719, "loss": 0.2949, "rewards/accuracies": 0.875, "rewards/chosen": -0.5238538980484009, "rewards/margins": 2.7420990467071533, "rewards/rejected": -3.2659528255462646, "step": 4013 }, { "epoch": 0.47, "learning_rate": 1.6234793905751744e-07, "logits/chosen": -2.1484086513519287, "logits/rejected": -2.218473196029663, "logps/chosen": -293.8561706542969, "logps/rejected": -332.3392028808594, "loss": 0.4194, "rewards/accuracies": 0.75, "rewards/chosen": -1.9928566217422485, "rewards/margins": 2.919572114944458, "rewards/rejected": -4.912428855895996, "step": 4014 }, { "epoch": 0.47, "learning_rate": 1.6231250738159916e-07, "logits/chosen": -2.280313014984131, "logits/rejected": -2.080308437347412, "logps/chosen": -256.4566650390625, "logps/rejected": -292.1198425292969, "loss": 0.2401, "rewards/accuracies": 0.875, "rewards/chosen": -1.376746654510498, "rewards/margins": 2.990668535232544, "rewards/rejected": -4.367414951324463, "step": 4015 }, { "epoch": 0.47, "learning_rate": 1.6227707570568088e-07, "logits/chosen": -2.017169237136841, "logits/rejected": -2.0995397567749023, "logps/chosen": -162.23745727539062, "logps/rejected": -257.9078674316406, "loss": 0.3163, "rewards/accuracies": 0.875, "rewards/chosen": -0.6523802280426025, "rewards/margins": 2.626143217086792, "rewards/rejected": -3.2785236835479736, "step": 4016 }, { "epoch": 0.47, "learning_rate": 1.622416440297626e-07, "logits/chosen": -2.786778688430786, "logits/rejected": -2.7719759941101074, "logps/chosen": -323.81829833984375, "logps/rejected": -327.05670166015625, "loss": 0.3937, "rewards/accuracies": 0.875, "rewards/chosen": -1.182190179824829, "rewards/margins": 3.739973545074463, "rewards/rejected": -4.922163963317871, "step": 4017 }, { "epoch": 0.47, "learning_rate": 1.6220621235384433e-07, "logits/chosen": -2.2916903495788574, "logits/rejected": -2.186828136444092, "logps/chosen": -242.97518920898438, "logps/rejected": -259.56756591796875, "loss": 0.3234, "rewards/accuracies": 0.875, "rewards/chosen": -0.6445357799530029, "rewards/margins": 1.6932637691497803, "rewards/rejected": -2.3377997875213623, "step": 4018 }, { "epoch": 0.47, "learning_rate": 1.6217078067792605e-07, "logits/chosen": -2.265206813812256, "logits/rejected": -2.304734230041504, "logps/chosen": -319.0094299316406, "logps/rejected": -331.67279052734375, "loss": 0.3195, "rewards/accuracies": 0.875, "rewards/chosen": -0.7656305432319641, "rewards/margins": 1.9768619537353516, "rewards/rejected": -2.742492198944092, "step": 4019 }, { "epoch": 0.47, "learning_rate": 1.6213534900200777e-07, "logits/chosen": -2.470607042312622, "logits/rejected": -2.414360523223877, "logps/chosen": -269.27008056640625, "logps/rejected": -349.06365966796875, "loss": 0.3613, "rewards/accuracies": 0.875, "rewards/chosen": -1.120682954788208, "rewards/margins": 2.846076011657715, "rewards/rejected": -3.966758966445923, "step": 4020 }, { "epoch": 0.47, "learning_rate": 1.6209991732608952e-07, "logits/chosen": -2.4460277557373047, "logits/rejected": -2.6244146823883057, "logps/chosen": -230.62615966796875, "logps/rejected": -254.40890502929688, "loss": 0.1826, "rewards/accuracies": 1.0, "rewards/chosen": -0.789209246635437, "rewards/margins": 2.248028516769409, "rewards/rejected": -3.0372378826141357, "step": 4021 }, { "epoch": 0.47, "learning_rate": 1.6206448565017124e-07, "logits/chosen": -1.6611381769180298, "logits/rejected": -2.2368080615997314, "logps/chosen": -403.5237121582031, "logps/rejected": -284.08453369140625, "loss": 0.2781, "rewards/accuracies": 0.875, "rewards/chosen": -1.1248193979263306, "rewards/margins": 3.752411365509033, "rewards/rejected": -4.877230644226074, "step": 4022 }, { "epoch": 0.47, "learning_rate": 1.6202905397425296e-07, "logits/chosen": -2.951423406600952, "logits/rejected": -2.8697543144226074, "logps/chosen": -335.9925842285156, "logps/rejected": -313.7688293457031, "loss": 0.5323, "rewards/accuracies": 0.75, "rewards/chosen": -1.5580132007598877, "rewards/margins": 2.8239974975585938, "rewards/rejected": -4.382010459899902, "step": 4023 }, { "epoch": 0.47, "learning_rate": 1.619936222983347e-07, "logits/chosen": -2.9665780067443848, "logits/rejected": -2.9253180027008057, "logps/chosen": -212.20179748535156, "logps/rejected": -164.77145385742188, "loss": 0.1098, "rewards/accuracies": 1.0, "rewards/chosen": -0.747307538986206, "rewards/margins": 2.8341691493988037, "rewards/rejected": -3.5814766883850098, "step": 4024 }, { "epoch": 0.47, "learning_rate": 1.6195819062241643e-07, "logits/chosen": -2.271594524383545, "logits/rejected": -2.4642539024353027, "logps/chosen": -204.7490692138672, "logps/rejected": -216.47128295898438, "loss": 0.2789, "rewards/accuracies": 0.875, "rewards/chosen": -0.9359766244888306, "rewards/margins": 1.6395865678787231, "rewards/rejected": -2.5755631923675537, "step": 4025 }, { "epoch": 0.47, "learning_rate": 1.6192275894649818e-07, "logits/chosen": -2.7245378494262695, "logits/rejected": -2.629551887512207, "logps/chosen": -186.0338134765625, "logps/rejected": -227.85263061523438, "loss": 0.2886, "rewards/accuracies": 0.75, "rewards/chosen": -0.5112806558609009, "rewards/margins": 2.917816162109375, "rewards/rejected": -3.4290971755981445, "step": 4026 }, { "epoch": 0.47, "learning_rate": 1.618873272705799e-07, "logits/chosen": -2.020596981048584, "logits/rejected": -2.4954113960266113, "logps/chosen": -400.50286865234375, "logps/rejected": -185.51773071289062, "loss": 0.4373, "rewards/accuracies": 0.875, "rewards/chosen": -1.0377007722854614, "rewards/margins": 1.5791009664535522, "rewards/rejected": -2.6168017387390137, "step": 4027 }, { "epoch": 0.47, "learning_rate": 1.6185189559466163e-07, "logits/chosen": -2.401359796524048, "logits/rejected": -2.3842291831970215, "logps/chosen": -283.03997802734375, "logps/rejected": -235.09490966796875, "loss": 0.5316, "rewards/accuracies": 0.75, "rewards/chosen": -1.3585634231567383, "rewards/margins": 1.1304881572723389, "rewards/rejected": -2.4890518188476562, "step": 4028 }, { "epoch": 0.47, "learning_rate": 1.6181646391874335e-07, "logits/chosen": -2.611067771911621, "logits/rejected": -2.8740954399108887, "logps/chosen": -274.0760803222656, "logps/rejected": -225.32354736328125, "loss": 0.4676, "rewards/accuracies": 0.875, "rewards/chosen": -1.0485639572143555, "rewards/margins": 2.136234760284424, "rewards/rejected": -3.1847987174987793, "step": 4029 }, { "epoch": 0.47, "learning_rate": 1.6178103224282507e-07, "logits/chosen": -2.02046275138855, "logits/rejected": -2.1551530361175537, "logps/chosen": -363.42169189453125, "logps/rejected": -458.1724853515625, "loss": 0.0779, "rewards/accuracies": 1.0, "rewards/chosen": -0.396560400724411, "rewards/margins": 5.026959419250488, "rewards/rejected": -5.423519611358643, "step": 4030 }, { "epoch": 0.47, "learning_rate": 1.617456005669068e-07, "logits/chosen": -2.393760919570923, "logits/rejected": -2.4512062072753906, "logps/chosen": -159.62484741210938, "logps/rejected": -175.38662719726562, "loss": 0.2828, "rewards/accuracies": 0.875, "rewards/chosen": -0.8472874760627747, "rewards/margins": 2.7341525554656982, "rewards/rejected": -3.581439733505249, "step": 4031 }, { "epoch": 0.47, "learning_rate": 1.6171016889098854e-07, "logits/chosen": -2.2262816429138184, "logits/rejected": -2.1051530838012695, "logps/chosen": -315.9046630859375, "logps/rejected": -330.21575927734375, "loss": 0.2684, "rewards/accuracies": 1.0, "rewards/chosen": -0.9145344495773315, "rewards/margins": 1.866511583328247, "rewards/rejected": -2.781046152114868, "step": 4032 }, { "epoch": 0.47, "learning_rate": 1.6167473721507026e-07, "logits/chosen": -1.9460225105285645, "logits/rejected": -2.028550624847412, "logps/chosen": -381.16546630859375, "logps/rejected": -288.1831970214844, "loss": 0.6179, "rewards/accuracies": 0.625, "rewards/chosen": -1.0359983444213867, "rewards/margins": 0.8157005310058594, "rewards/rejected": -1.8516989946365356, "step": 4033 }, { "epoch": 0.47, "learning_rate": 1.6163930553915199e-07, "logits/chosen": -2.157963752746582, "logits/rejected": -2.2237064838409424, "logps/chosen": -233.5794677734375, "logps/rejected": -245.76197814941406, "loss": 0.625, "rewards/accuracies": 0.625, "rewards/chosen": -0.5960309505462646, "rewards/margins": 1.6215870380401611, "rewards/rejected": -2.217617988586426, "step": 4034 }, { "epoch": 0.47, "learning_rate": 1.616038738632337e-07, "logits/chosen": -2.289233684539795, "logits/rejected": -2.1838035583496094, "logps/chosen": -342.93853759765625, "logps/rejected": -457.87554931640625, "loss": 0.4264, "rewards/accuracies": 0.75, "rewards/chosen": -0.35494521260261536, "rewards/margins": 1.5172770023345947, "rewards/rejected": -1.8722221851348877, "step": 4035 }, { "epoch": 0.47, "learning_rate": 1.6156844218731546e-07, "logits/chosen": -2.4482433795928955, "logits/rejected": -2.6665053367614746, "logps/chosen": -303.666259765625, "logps/rejected": -302.1606750488281, "loss": 0.3323, "rewards/accuracies": 0.75, "rewards/chosen": -0.5726577043533325, "rewards/margins": 2.3571507930755615, "rewards/rejected": -2.9298083782196045, "step": 4036 }, { "epoch": 0.47, "learning_rate": 1.615330105113972e-07, "logits/chosen": -2.490072250366211, "logits/rejected": -2.8387701511383057, "logps/chosen": -169.9790802001953, "logps/rejected": -199.3741912841797, "loss": 0.1145, "rewards/accuracies": 1.0, "rewards/chosen": -0.4634712338447571, "rewards/margins": 3.913933277130127, "rewards/rejected": -4.37740421295166, "step": 4037 }, { "epoch": 0.47, "learning_rate": 1.6149757883547893e-07, "logits/chosen": -1.8796353340148926, "logits/rejected": -2.210146903991699, "logps/chosen": -304.7650451660156, "logps/rejected": -210.4641876220703, "loss": 0.491, "rewards/accuracies": 0.625, "rewards/chosen": -1.0575072765350342, "rewards/margins": 0.9071345329284668, "rewards/rejected": -1.964641809463501, "step": 4038 }, { "epoch": 0.47, "learning_rate": 1.6146214715956065e-07, "logits/chosen": -2.542036771774292, "logits/rejected": -2.674370765686035, "logps/chosen": -286.69110107421875, "logps/rejected": -243.807861328125, "loss": 0.1299, "rewards/accuracies": 1.0, "rewards/chosen": -1.1560657024383545, "rewards/margins": 3.1867973804473877, "rewards/rejected": -4.342863082885742, "step": 4039 }, { "epoch": 0.47, "learning_rate": 1.6142671548364237e-07, "logits/chosen": -2.040658712387085, "logits/rejected": -2.1781704425811768, "logps/chosen": -408.151611328125, "logps/rejected": -346.5936279296875, "loss": 0.4058, "rewards/accuracies": 0.875, "rewards/chosen": -1.6145296096801758, "rewards/margins": 2.5092685222625732, "rewards/rejected": -4.123798370361328, "step": 4040 }, { "epoch": 0.47, "learning_rate": 1.613912838077241e-07, "logits/chosen": -2.2706711292266846, "logits/rejected": -2.2583773136138916, "logps/chosen": -281.1419982910156, "logps/rejected": -217.33255004882812, "loss": 0.5457, "rewards/accuracies": 0.75, "rewards/chosen": -0.9789040684700012, "rewards/margins": 1.506022572517395, "rewards/rejected": -2.484926700592041, "step": 4041 }, { "epoch": 0.47, "learning_rate": 1.6135585213180582e-07, "logits/chosen": -2.363219738006592, "logits/rejected": -2.7590720653533936, "logps/chosen": -337.92718505859375, "logps/rejected": -150.04483032226562, "loss": 0.3396, "rewards/accuracies": 0.875, "rewards/chosen": -0.47705310583114624, "rewards/margins": 1.2572790384292603, "rewards/rejected": -1.7343320846557617, "step": 4042 }, { "epoch": 0.47, "learning_rate": 1.6132042045588756e-07, "logits/chosen": -2.3465445041656494, "logits/rejected": -2.5133755207061768, "logps/chosen": -339.67559814453125, "logps/rejected": -287.059326171875, "loss": 0.4527, "rewards/accuracies": 0.625, "rewards/chosen": -1.5442863702774048, "rewards/margins": 1.576307773590088, "rewards/rejected": -3.120594024658203, "step": 4043 }, { "epoch": 0.47, "learning_rate": 1.6128498877996929e-07, "logits/chosen": -2.4933533668518066, "logits/rejected": -2.4782967567443848, "logps/chosen": -320.68670654296875, "logps/rejected": -287.8603210449219, "loss": 0.193, "rewards/accuracies": 0.875, "rewards/chosen": -1.1807019710540771, "rewards/margins": 2.167166233062744, "rewards/rejected": -3.347867965698242, "step": 4044 }, { "epoch": 0.47, "learning_rate": 1.61249557104051e-07, "logits/chosen": -2.078871488571167, "logits/rejected": -2.25752329826355, "logps/chosen": -307.66552734375, "logps/rejected": -325.8763427734375, "loss": 0.6708, "rewards/accuracies": 0.875, "rewards/chosen": -1.390794038772583, "rewards/margins": 1.597348928451538, "rewards/rejected": -2.988142967224121, "step": 4045 }, { "epoch": 0.47, "learning_rate": 1.6121412542813273e-07, "logits/chosen": -2.2558836936950684, "logits/rejected": -2.182671546936035, "logps/chosen": -374.37921142578125, "logps/rejected": -371.9806823730469, "loss": 0.3568, "rewards/accuracies": 0.875, "rewards/chosen": -1.3891608715057373, "rewards/margins": 1.6558938026428223, "rewards/rejected": -3.0450546741485596, "step": 4046 }, { "epoch": 0.47, "learning_rate": 1.6117869375221445e-07, "logits/chosen": -2.5904879570007324, "logits/rejected": -2.591486930847168, "logps/chosen": -168.71810913085938, "logps/rejected": -211.0722198486328, "loss": 0.3098, "rewards/accuracies": 0.625, "rewards/chosen": -0.5889180302619934, "rewards/margins": 2.0322959423065186, "rewards/rejected": -2.6212141513824463, "step": 4047 }, { "epoch": 0.47, "learning_rate": 1.6114326207629623e-07, "logits/chosen": -2.299152374267578, "logits/rejected": -2.5416386127471924, "logps/chosen": -449.2962646484375, "logps/rejected": -310.5745849609375, "loss": 0.3183, "rewards/accuracies": 0.875, "rewards/chosen": -1.6246856451034546, "rewards/margins": 2.300295829772949, "rewards/rejected": -3.9249815940856934, "step": 4048 }, { "epoch": 0.47, "learning_rate": 1.6110783040037795e-07, "logits/chosen": -2.633873462677002, "logits/rejected": -2.4719269275665283, "logps/chosen": -178.69044494628906, "logps/rejected": -172.24151611328125, "loss": 0.3892, "rewards/accuracies": 0.625, "rewards/chosen": -1.914673089981079, "rewards/margins": 2.1106061935424805, "rewards/rejected": -4.0252790451049805, "step": 4049 }, { "epoch": 0.47, "learning_rate": 1.6107239872445967e-07, "logits/chosen": -2.881516933441162, "logits/rejected": -2.8404793739318848, "logps/chosen": -152.82778930664062, "logps/rejected": -167.233154296875, "loss": 0.5245, "rewards/accuracies": 0.75, "rewards/chosen": -1.4401497840881348, "rewards/margins": 1.0958932638168335, "rewards/rejected": -2.5360429286956787, "step": 4050 }, { "epoch": 0.47, "learning_rate": 1.610369670485414e-07, "logits/chosen": -2.2415738105773926, "logits/rejected": -2.407860279083252, "logps/chosen": -380.449951171875, "logps/rejected": -198.9984588623047, "loss": 0.5611, "rewards/accuracies": 0.625, "rewards/chosen": -0.6974926590919495, "rewards/margins": 0.98536616563797, "rewards/rejected": -1.6828587055206299, "step": 4051 }, { "epoch": 0.47, "learning_rate": 1.6100153537262312e-07, "logits/chosen": -2.1791763305664062, "logits/rejected": -1.7162070274353027, "logps/chosen": -139.60792541503906, "logps/rejected": -235.7552947998047, "loss": 0.3149, "rewards/accuracies": 0.875, "rewards/chosen": -1.2683665752410889, "rewards/margins": 1.5227947235107422, "rewards/rejected": -2.791161298751831, "step": 4052 }, { "epoch": 0.47, "learning_rate": 1.6096610369670484e-07, "logits/chosen": -2.2600324153900146, "logits/rejected": -2.2509241104125977, "logps/chosen": -395.8753967285156, "logps/rejected": -469.9883117675781, "loss": 0.2143, "rewards/accuracies": 1.0, "rewards/chosen": -1.1634414196014404, "rewards/margins": 3.703289031982422, "rewards/rejected": -4.866730213165283, "step": 4053 }, { "epoch": 0.47, "learning_rate": 1.6093067202078656e-07, "logits/chosen": -2.444830894470215, "logits/rejected": -2.54936146736145, "logps/chosen": -269.7178955078125, "logps/rejected": -260.8194580078125, "loss": 1.0573, "rewards/accuracies": 0.625, "rewards/chosen": -2.498363494873047, "rewards/margins": 0.09302544593811035, "rewards/rejected": -2.5913891792297363, "step": 4054 }, { "epoch": 0.47, "learning_rate": 1.608952403448683e-07, "logits/chosen": -2.7121853828430176, "logits/rejected": -2.7707598209381104, "logps/chosen": -262.52447509765625, "logps/rejected": -308.056640625, "loss": 0.396, "rewards/accuracies": 0.75, "rewards/chosen": -1.2341034412384033, "rewards/margins": 3.3376901149749756, "rewards/rejected": -4.571793556213379, "step": 4055 }, { "epoch": 0.47, "learning_rate": 1.6085980866895003e-07, "logits/chosen": -2.401360273361206, "logits/rejected": -2.4464221000671387, "logps/chosen": -441.3764953613281, "logps/rejected": -197.8928985595703, "loss": 0.2244, "rewards/accuracies": 1.0, "rewards/chosen": -0.35251086950302124, "rewards/margins": 2.2192182540893555, "rewards/rejected": -2.5717291831970215, "step": 4056 }, { "epoch": 0.47, "learning_rate": 1.6082437699303175e-07, "logits/chosen": -2.2656633853912354, "logits/rejected": -1.8890137672424316, "logps/chosen": -175.5037078857422, "logps/rejected": -265.0185546875, "loss": 0.1933, "rewards/accuracies": 1.0, "rewards/chosen": -0.972220778465271, "rewards/margins": 2.718860626220703, "rewards/rejected": -3.6910812854766846, "step": 4057 }, { "epoch": 0.47, "learning_rate": 1.6078894531711348e-07, "logits/chosen": -2.0339388847351074, "logits/rejected": -1.7547874450683594, "logps/chosen": -382.5373229980469, "logps/rejected": -415.5988464355469, "loss": 0.5108, "rewards/accuracies": 0.625, "rewards/chosen": -0.6526508927345276, "rewards/margins": 0.6679400205612183, "rewards/rejected": -1.3205909729003906, "step": 4058 }, { "epoch": 0.47, "learning_rate": 1.6075351364119525e-07, "logits/chosen": -2.1825973987579346, "logits/rejected": -2.397676467895508, "logps/chosen": -401.91998291015625, "logps/rejected": -306.63262939453125, "loss": 0.2271, "rewards/accuracies": 0.875, "rewards/chosen": 0.04859080910682678, "rewards/margins": 2.8722405433654785, "rewards/rejected": -2.8236498832702637, "step": 4059 }, { "epoch": 0.47, "learning_rate": 1.6071808196527697e-07, "logits/chosen": -2.354980230331421, "logits/rejected": -2.0915491580963135, "logps/chosen": -425.04241943359375, "logps/rejected": -349.2367858886719, "loss": 0.6154, "rewards/accuracies": 0.625, "rewards/chosen": -0.7031338214874268, "rewards/margins": 0.9441513419151306, "rewards/rejected": -1.6472852230072021, "step": 4060 }, { "epoch": 0.47, "learning_rate": 1.606826502893587e-07, "logits/chosen": -2.585381031036377, "logits/rejected": -2.640963554382324, "logps/chosen": -232.6954345703125, "logps/rejected": -179.76553344726562, "loss": 0.7165, "rewards/accuracies": 0.625, "rewards/chosen": -0.6204997897148132, "rewards/margins": 0.6255846619606018, "rewards/rejected": -1.2460843324661255, "step": 4061 }, { "epoch": 0.47, "learning_rate": 1.6064721861344042e-07, "logits/chosen": -1.9641785621643066, "logits/rejected": -2.026552677154541, "logps/chosen": -245.78564453125, "logps/rejected": -314.75531005859375, "loss": 0.1802, "rewards/accuracies": 1.0, "rewards/chosen": -1.6634852886199951, "rewards/margins": 2.6505494117736816, "rewards/rejected": -4.314034461975098, "step": 4062 }, { "epoch": 0.47, "learning_rate": 1.6061178693752214e-07, "logits/chosen": -3.1083731651306152, "logits/rejected": -2.98187255859375, "logps/chosen": -331.98712158203125, "logps/rejected": -261.7937316894531, "loss": 0.5732, "rewards/accuracies": 0.75, "rewards/chosen": -1.6876945495605469, "rewards/margins": 1.7389991283416748, "rewards/rejected": -3.4266934394836426, "step": 4063 }, { "epoch": 0.47, "learning_rate": 1.6057635526160386e-07, "logits/chosen": -2.554898500442505, "logits/rejected": -2.4133505821228027, "logps/chosen": -208.2342071533203, "logps/rejected": -204.43707275390625, "loss": 0.5257, "rewards/accuracies": 0.875, "rewards/chosen": -0.8244565725326538, "rewards/margins": 1.4461679458618164, "rewards/rejected": -2.2706246376037598, "step": 4064 }, { "epoch": 0.47, "learning_rate": 1.6054092358568558e-07, "logits/chosen": -1.821702480316162, "logits/rejected": -2.0414624214172363, "logps/chosen": -290.4552001953125, "logps/rejected": -256.58544921875, "loss": 0.6059, "rewards/accuracies": 0.875, "rewards/chosen": -1.5079951286315918, "rewards/margins": 1.7242575883865356, "rewards/rejected": -3.232252836227417, "step": 4065 }, { "epoch": 0.47, "learning_rate": 1.6050549190976733e-07, "logits/chosen": -2.4960572719573975, "logits/rejected": -2.640667676925659, "logps/chosen": -139.92697143554688, "logps/rejected": -181.45993041992188, "loss": 0.1263, "rewards/accuracies": 1.0, "rewards/chosen": -0.49396538734436035, "rewards/margins": 3.050597906112671, "rewards/rejected": -3.5445632934570312, "step": 4066 }, { "epoch": 0.47, "learning_rate": 1.6047006023384905e-07, "logits/chosen": -2.3991169929504395, "logits/rejected": -2.4583399295806885, "logps/chosen": -395.4876708984375, "logps/rejected": -282.66326904296875, "loss": 0.0929, "rewards/accuracies": 1.0, "rewards/chosen": -0.769955039024353, "rewards/margins": 3.436938762664795, "rewards/rejected": -4.2068939208984375, "step": 4067 }, { "epoch": 0.47, "learning_rate": 1.6043462855793078e-07, "logits/chosen": -2.444474220275879, "logits/rejected": -2.571960210800171, "logps/chosen": -295.9311218261719, "logps/rejected": -317.5608825683594, "loss": 0.1707, "rewards/accuracies": 0.875, "rewards/chosen": -0.20013374090194702, "rewards/margins": 3.0900442600250244, "rewards/rejected": -3.290178060531616, "step": 4068 }, { "epoch": 0.47, "learning_rate": 1.603991968820125e-07, "logits/chosen": -2.5156807899475098, "logits/rejected": -2.5111725330352783, "logps/chosen": -198.70034790039062, "logps/rejected": -290.62298583984375, "loss": 0.6655, "rewards/accuracies": 0.625, "rewards/chosen": -0.8097915053367615, "rewards/margins": 1.1327472925186157, "rewards/rejected": -1.9425387382507324, "step": 4069 }, { "epoch": 0.47, "learning_rate": 1.6036376520609422e-07, "logits/chosen": -2.0453906059265137, "logits/rejected": -2.0513157844543457, "logps/chosen": -274.03863525390625, "logps/rejected": -367.76739501953125, "loss": 0.7074, "rewards/accuracies": 0.875, "rewards/chosen": -1.3181802034378052, "rewards/margins": 2.529111623764038, "rewards/rejected": -3.8472914695739746, "step": 4070 }, { "epoch": 0.47, "learning_rate": 1.60328333530176e-07, "logits/chosen": -1.9644604921340942, "logits/rejected": -1.909451961517334, "logps/chosen": -193.8740234375, "logps/rejected": -261.7751159667969, "loss": 0.2701, "rewards/accuracies": 0.875, "rewards/chosen": -0.5871630907058716, "rewards/margins": 2.0218992233276367, "rewards/rejected": -2.6090621948242188, "step": 4071 }, { "epoch": 0.47, "learning_rate": 1.6029290185425772e-07, "logits/chosen": -2.579878807067871, "logits/rejected": -2.503446340560913, "logps/chosen": -220.50244140625, "logps/rejected": -192.82559204101562, "loss": 0.6029, "rewards/accuracies": 0.75, "rewards/chosen": -1.6051865816116333, "rewards/margins": 0.7177762985229492, "rewards/rejected": -2.322962760925293, "step": 4072 }, { "epoch": 0.47, "learning_rate": 1.6025747017833944e-07, "logits/chosen": -1.7583746910095215, "logits/rejected": -1.6010172367095947, "logps/chosen": -183.11566162109375, "logps/rejected": -325.06402587890625, "loss": 0.6041, "rewards/accuracies": 0.625, "rewards/chosen": -0.6765381693840027, "rewards/margins": 1.2048523426055908, "rewards/rejected": -1.8813905715942383, "step": 4073 }, { "epoch": 0.47, "learning_rate": 1.6022203850242116e-07, "logits/chosen": -2.0507805347442627, "logits/rejected": -1.8608496189117432, "logps/chosen": -138.75413513183594, "logps/rejected": -261.38214111328125, "loss": 0.7084, "rewards/accuracies": 0.875, "rewards/chosen": -0.5281267166137695, "rewards/margins": 1.9847915172576904, "rewards/rejected": -2.512917995452881, "step": 4074 }, { "epoch": 0.47, "learning_rate": 1.6018660682650288e-07, "logits/chosen": -2.4153058528900146, "logits/rejected": -2.544142246246338, "logps/chosen": -447.7703857421875, "logps/rejected": -368.1720886230469, "loss": 0.3818, "rewards/accuracies": 0.875, "rewards/chosen": -0.8595754504203796, "rewards/margins": 1.5147007703781128, "rewards/rejected": -2.3742763996124268, "step": 4075 }, { "epoch": 0.47, "learning_rate": 1.601511751505846e-07, "logits/chosen": -1.6977249383926392, "logits/rejected": -1.8011817932128906, "logps/chosen": -472.72723388671875, "logps/rejected": -351.3356018066406, "loss": 0.181, "rewards/accuracies": 1.0, "rewards/chosen": -0.4079134166240692, "rewards/margins": 2.84122896194458, "rewards/rejected": -3.2491424083709717, "step": 4076 }, { "epoch": 0.47, "learning_rate": 1.6011574347466635e-07, "logits/chosen": -2.35325288772583, "logits/rejected": -2.475297451019287, "logps/chosen": -141.10743713378906, "logps/rejected": -267.9314270019531, "loss": 0.5449, "rewards/accuracies": 0.75, "rewards/chosen": -0.9425958395004272, "rewards/margins": 2.1664507389068604, "rewards/rejected": -3.109046697616577, "step": 4077 }, { "epoch": 0.47, "learning_rate": 1.6008031179874808e-07, "logits/chosen": -1.7343977689743042, "logits/rejected": -1.6708441972732544, "logps/chosen": -221.2890625, "logps/rejected": -274.0637512207031, "loss": 0.9271, "rewards/accuracies": 0.625, "rewards/chosen": -0.9744048118591309, "rewards/margins": 1.1185492277145386, "rewards/rejected": -2.09295392036438, "step": 4078 }, { "epoch": 0.47, "learning_rate": 1.600448801228298e-07, "logits/chosen": -1.8908538818359375, "logits/rejected": -2.3978636264801025, "logps/chosen": -457.4634704589844, "logps/rejected": -279.3254699707031, "loss": 0.6214, "rewards/accuracies": 0.625, "rewards/chosen": -1.2218141555786133, "rewards/margins": 1.3700751066207886, "rewards/rejected": -2.5918893814086914, "step": 4079 }, { "epoch": 0.47, "learning_rate": 1.6000944844691152e-07, "logits/chosen": -2.451700448989868, "logits/rejected": -2.6494975090026855, "logps/chosen": -253.4405517578125, "logps/rejected": -237.72079467773438, "loss": 0.253, "rewards/accuracies": 0.875, "rewards/chosen": -0.920867919921875, "rewards/margins": 2.4679102897644043, "rewards/rejected": -3.3887784481048584, "step": 4080 }, { "epoch": 0.47, "learning_rate": 1.5997401677099324e-07, "logits/chosen": -2.4073398113250732, "logits/rejected": -2.6285667419433594, "logps/chosen": -224.21771240234375, "logps/rejected": -178.3167724609375, "loss": 1.4038, "rewards/accuracies": 0.625, "rewards/chosen": -2.1904518604278564, "rewards/margins": 1.5713820457458496, "rewards/rejected": -3.761833906173706, "step": 4081 }, { "epoch": 0.47, "learning_rate": 1.5993858509507497e-07, "logits/chosen": -2.6058974266052246, "logits/rejected": -2.5536632537841797, "logps/chosen": -338.1208190917969, "logps/rejected": -302.50103759765625, "loss": 1.5742, "rewards/accuracies": 0.5, "rewards/chosen": -2.7505910396575928, "rewards/margins": 0.7087265253067017, "rewards/rejected": -3.459317684173584, "step": 4082 }, { "epoch": 0.47, "learning_rate": 1.5990315341915674e-07, "logits/chosen": -2.1105403900146484, "logits/rejected": -2.048630714416504, "logps/chosen": -329.95867919921875, "logps/rejected": -260.979736328125, "loss": 0.4957, "rewards/accuracies": 0.75, "rewards/chosen": -0.794643759727478, "rewards/margins": 1.9263921976089478, "rewards/rejected": -2.721035957336426, "step": 4083 }, { "epoch": 0.48, "learning_rate": 1.5986772174323846e-07, "logits/chosen": -2.377633810043335, "logits/rejected": -2.227105140686035, "logps/chosen": -237.14263916015625, "logps/rejected": -223.47979736328125, "loss": 0.3091, "rewards/accuracies": 0.875, "rewards/chosen": -0.7461654543876648, "rewards/margins": 2.60564923286438, "rewards/rejected": -3.3518147468566895, "step": 4084 }, { "epoch": 0.48, "learning_rate": 1.5983229006732018e-07, "logits/chosen": -2.4711110591888428, "logits/rejected": -2.490372657775879, "logps/chosen": -279.9700927734375, "logps/rejected": -370.2623291015625, "loss": 0.4187, "rewards/accuracies": 0.875, "rewards/chosen": -0.9257657527923584, "rewards/margins": 2.35088849067688, "rewards/rejected": -3.2766542434692383, "step": 4085 }, { "epoch": 0.48, "learning_rate": 1.597968583914019e-07, "logits/chosen": -2.845172166824341, "logits/rejected": -2.797882556915283, "logps/chosen": -378.9098205566406, "logps/rejected": -258.28448486328125, "loss": 0.2672, "rewards/accuracies": 0.75, "rewards/chosen": -1.5431761741638184, "rewards/margins": 2.036651849746704, "rewards/rejected": -3.5798280239105225, "step": 4086 }, { "epoch": 0.48, "learning_rate": 1.5976142671548363e-07, "logits/chosen": -1.8797632455825806, "logits/rejected": -2.002561330795288, "logps/chosen": -183.81027221679688, "logps/rejected": -206.70887756347656, "loss": 0.261, "rewards/accuracies": 0.875, "rewards/chosen": -0.6311947107315063, "rewards/margins": 2.5990540981292725, "rewards/rejected": -3.2302486896514893, "step": 4087 }, { "epoch": 0.48, "learning_rate": 1.5972599503956538e-07, "logits/chosen": -2.213416576385498, "logits/rejected": -2.044665575027466, "logps/chosen": -259.0989685058594, "logps/rejected": -218.49151611328125, "loss": 0.3329, "rewards/accuracies": 0.875, "rewards/chosen": -0.5532823801040649, "rewards/margins": 1.7879688739776611, "rewards/rejected": -2.3412511348724365, "step": 4088 }, { "epoch": 0.48, "learning_rate": 1.596905633636471e-07, "logits/chosen": -2.1516809463500977, "logits/rejected": -2.295790672302246, "logps/chosen": -256.18621826171875, "logps/rejected": -427.3019104003906, "loss": 0.2658, "rewards/accuracies": 0.875, "rewards/chosen": -0.7278546094894409, "rewards/margins": 2.1314985752105713, "rewards/rejected": -2.8593530654907227, "step": 4089 }, { "epoch": 0.48, "learning_rate": 1.5965513168772882e-07, "logits/chosen": -2.514441967010498, "logits/rejected": -2.64673113822937, "logps/chosen": -296.86993408203125, "logps/rejected": -305.88250732421875, "loss": 0.2891, "rewards/accuracies": 0.875, "rewards/chosen": -0.6698538064956665, "rewards/margins": 2.2777507305145264, "rewards/rejected": -2.9476046562194824, "step": 4090 }, { "epoch": 0.48, "learning_rate": 1.5961970001181054e-07, "logits/chosen": -2.4124927520751953, "logits/rejected": -2.173229455947876, "logps/chosen": -278.9635314941406, "logps/rejected": -202.28460693359375, "loss": 0.8594, "rewards/accuracies": 0.625, "rewards/chosen": -0.8075892329216003, "rewards/margins": 0.6399751901626587, "rewards/rejected": -1.4475644826889038, "step": 4091 }, { "epoch": 0.48, "learning_rate": 1.5958426833589227e-07, "logits/chosen": -2.3530056476593018, "logits/rejected": -2.238673686981201, "logps/chosen": -447.05438232421875, "logps/rejected": -315.23736572265625, "loss": 0.6116, "rewards/accuracies": 0.75, "rewards/chosen": -1.0672606229782104, "rewards/margins": 0.6063184142112732, "rewards/rejected": -1.6735789775848389, "step": 4092 }, { "epoch": 0.48, "learning_rate": 1.59548836659974e-07, "logits/chosen": -1.638364553451538, "logits/rejected": -1.8158390522003174, "logps/chosen": -623.6826782226562, "logps/rejected": -539.3189697265625, "loss": 0.6915, "rewards/accuracies": 0.625, "rewards/chosen": -0.9681775569915771, "rewards/margins": 1.3104493618011475, "rewards/rejected": -2.2786269187927246, "step": 4093 }, { "epoch": 0.48, "learning_rate": 1.5951340498405576e-07, "logits/chosen": -3.0527052879333496, "logits/rejected": -3.051730155944824, "logps/chosen": -354.683837890625, "logps/rejected": -333.9454650878906, "loss": 0.5658, "rewards/accuracies": 0.875, "rewards/chosen": -1.9278634786605835, "rewards/margins": 1.8784294128417969, "rewards/rejected": -3.80629301071167, "step": 4094 }, { "epoch": 0.48, "learning_rate": 1.5947797330813749e-07, "logits/chosen": -2.32759690284729, "logits/rejected": -2.39555025100708, "logps/chosen": -192.4286651611328, "logps/rejected": -249.15121459960938, "loss": 0.387, "rewards/accuracies": 0.875, "rewards/chosen": -1.2137242555618286, "rewards/margins": 1.6946457624435425, "rewards/rejected": -2.908370018005371, "step": 4095 }, { "epoch": 0.48, "learning_rate": 1.594425416322192e-07, "logits/chosen": -2.1371660232543945, "logits/rejected": -2.4133615493774414, "logps/chosen": -466.2069091796875, "logps/rejected": -310.89532470703125, "loss": 0.4371, "rewards/accuracies": 0.75, "rewards/chosen": 0.050594180822372437, "rewards/margins": 1.6599353551864624, "rewards/rejected": -1.6093411445617676, "step": 4096 }, { "epoch": 0.48, "learning_rate": 1.5940710995630093e-07, "logits/chosen": -2.536724090576172, "logits/rejected": -2.2177071571350098, "logps/chosen": -216.52041625976562, "logps/rejected": -206.5153350830078, "loss": 0.3491, "rewards/accuracies": 0.75, "rewards/chosen": -0.17656826972961426, "rewards/margins": 1.8213014602661133, "rewards/rejected": -1.997869610786438, "step": 4097 }, { "epoch": 0.48, "learning_rate": 1.5937167828038265e-07, "logits/chosen": -2.7953038215637207, "logits/rejected": -2.7435901165008545, "logps/chosen": -159.49229431152344, "logps/rejected": -181.7948455810547, "loss": 0.9458, "rewards/accuracies": 0.5, "rewards/chosen": -1.6305214166641235, "rewards/margins": -0.2839755713939667, "rewards/rejected": -1.346545696258545, "step": 4098 }, { "epoch": 0.48, "learning_rate": 1.5933624660446437e-07, "logits/chosen": -1.999406337738037, "logits/rejected": -1.912480115890503, "logps/chosen": -356.0205078125, "logps/rejected": -362.4521789550781, "loss": 0.2611, "rewards/accuracies": 0.875, "rewards/chosen": -1.212924599647522, "rewards/margins": 3.8814027309417725, "rewards/rejected": -5.094327449798584, "step": 4099 }, { "epoch": 0.48, "learning_rate": 1.5930081492854612e-07, "logits/chosen": -2.679166316986084, "logits/rejected": -2.674785852432251, "logps/chosen": -166.195556640625, "logps/rejected": -218.36532592773438, "loss": 0.4153, "rewards/accuracies": 0.875, "rewards/chosen": -0.6651751399040222, "rewards/margins": 0.944736659526825, "rewards/rejected": -1.6099117994308472, "step": 4100 }, { "epoch": 0.48, "learning_rate": 1.5926538325262784e-07, "logits/chosen": -2.332929849624634, "logits/rejected": -2.505073070526123, "logps/chosen": -280.653564453125, "logps/rejected": -260.27862548828125, "loss": 1.727, "rewards/accuracies": 0.375, "rewards/chosen": -2.04426646232605, "rewards/margins": -0.043112486600875854, "rewards/rejected": -2.0011541843414307, "step": 4101 }, { "epoch": 0.48, "learning_rate": 1.5922995157670957e-07, "logits/chosen": -2.8159780502319336, "logits/rejected": -2.74099063873291, "logps/chosen": -162.16282653808594, "logps/rejected": -133.0289306640625, "loss": 1.6611, "rewards/accuracies": 0.375, "rewards/chosen": -1.957200288772583, "rewards/margins": -0.46663814783096313, "rewards/rejected": -1.4905623197555542, "step": 4102 }, { "epoch": 0.48, "learning_rate": 1.591945199007913e-07, "logits/chosen": -2.060239315032959, "logits/rejected": -2.409789562225342, "logps/chosen": -339.5497741699219, "logps/rejected": -237.48089599609375, "loss": 0.262, "rewards/accuracies": 0.875, "rewards/chosen": -0.6689355969429016, "rewards/margins": 3.102839231491089, "rewards/rejected": -3.7717747688293457, "step": 4103 }, { "epoch": 0.48, "learning_rate": 1.59159088224873e-07, "logits/chosen": -2.528618574142456, "logits/rejected": -2.521982431411743, "logps/chosen": -451.046630859375, "logps/rejected": -358.0485534667969, "loss": 0.25, "rewards/accuracies": 0.875, "rewards/chosen": -0.5276907682418823, "rewards/margins": 2.8991539478302, "rewards/rejected": -3.426844835281372, "step": 4104 }, { "epoch": 0.48, "learning_rate": 1.5912365654895473e-07, "logits/chosen": -1.9094429016113281, "logits/rejected": -1.9235280752182007, "logps/chosen": -239.66941833496094, "logps/rejected": -322.7490539550781, "loss": 1.1488, "rewards/accuracies": 0.5, "rewards/chosen": -1.6217199563980103, "rewards/margins": 0.2931760549545288, "rewards/rejected": -1.914896011352539, "step": 4105 }, { "epoch": 0.48, "learning_rate": 1.590882248730365e-07, "logits/chosen": -2.2975056171417236, "logits/rejected": -1.9994398355484009, "logps/chosen": -250.85430908203125, "logps/rejected": -336.4378662109375, "loss": 0.1773, "rewards/accuracies": 0.875, "rewards/chosen": -0.6649555563926697, "rewards/margins": 3.0838699340820312, "rewards/rejected": -3.7488255500793457, "step": 4106 }, { "epoch": 0.48, "learning_rate": 1.5905279319711823e-07, "logits/chosen": -2.58534836769104, "logits/rejected": -2.168038845062256, "logps/chosen": -102.24021911621094, "logps/rejected": -228.93296813964844, "loss": 0.2939, "rewards/accuracies": 0.75, "rewards/chosen": -0.9426016211509705, "rewards/margins": 2.437493324279785, "rewards/rejected": -3.3800950050354004, "step": 4107 }, { "epoch": 0.48, "learning_rate": 1.5901736152119995e-07, "logits/chosen": -2.071634292602539, "logits/rejected": -2.1887335777282715, "logps/chosen": -284.48193359375, "logps/rejected": -284.4460144042969, "loss": 0.5139, "rewards/accuracies": 0.875, "rewards/chosen": -0.7219847440719604, "rewards/margins": 1.8136142492294312, "rewards/rejected": -2.5355989933013916, "step": 4108 }, { "epoch": 0.48, "learning_rate": 1.5898192984528167e-07, "logits/chosen": -2.9952168464660645, "logits/rejected": -3.003607988357544, "logps/chosen": -132.6145477294922, "logps/rejected": -196.7735595703125, "loss": 0.4071, "rewards/accuracies": 0.75, "rewards/chosen": -0.9975700378417969, "rewards/margins": 2.131071090698242, "rewards/rejected": -3.128641128540039, "step": 4109 }, { "epoch": 0.48, "learning_rate": 1.589464981693634e-07, "logits/chosen": -2.0340311527252197, "logits/rejected": -1.9708693027496338, "logps/chosen": -295.36181640625, "logps/rejected": -339.752685546875, "loss": 0.4462, "rewards/accuracies": 0.75, "rewards/chosen": -0.7982165217399597, "rewards/margins": 1.0012586116790771, "rewards/rejected": -1.799475073814392, "step": 4110 }, { "epoch": 0.48, "learning_rate": 1.5891106649344515e-07, "logits/chosen": -1.9854971170425415, "logits/rejected": -2.1044232845306396, "logps/chosen": -179.58389282226562, "logps/rejected": -238.39712524414062, "loss": 0.3881, "rewards/accuracies": 0.75, "rewards/chosen": -0.8694287538528442, "rewards/margins": 3.079129695892334, "rewards/rejected": -3.9485583305358887, "step": 4111 }, { "epoch": 0.48, "learning_rate": 1.5887563481752687e-07, "logits/chosen": -2.178144693374634, "logits/rejected": -2.2932960987091064, "logps/chosen": -400.01080322265625, "logps/rejected": -340.9632873535156, "loss": 0.3896, "rewards/accuracies": 0.75, "rewards/chosen": -0.8133999109268188, "rewards/margins": 1.5972542762756348, "rewards/rejected": -2.410654067993164, "step": 4112 }, { "epoch": 0.48, "learning_rate": 1.588402031416086e-07, "logits/chosen": -2.5651512145996094, "logits/rejected": -2.5871524810791016, "logps/chosen": -261.5509033203125, "logps/rejected": -305.1123046875, "loss": 0.6294, "rewards/accuracies": 0.625, "rewards/chosen": -1.9217718839645386, "rewards/margins": 1.7191444635391235, "rewards/rejected": -3.640916109085083, "step": 4113 }, { "epoch": 0.48, "learning_rate": 1.588047714656903e-07, "logits/chosen": -2.842311382293701, "logits/rejected": -2.912226438522339, "logps/chosen": -350.75164794921875, "logps/rejected": -340.19842529296875, "loss": 0.3128, "rewards/accuracies": 0.875, "rewards/chosen": -1.7577451467514038, "rewards/margins": 2.4276111125946045, "rewards/rejected": -4.185356140136719, "step": 4114 }, { "epoch": 0.48, "learning_rate": 1.5876933978977203e-07, "logits/chosen": -2.1172144412994385, "logits/rejected": -2.4964070320129395, "logps/chosen": -241.9571533203125, "logps/rejected": -245.68923950195312, "loss": 0.4407, "rewards/accuracies": 0.5, "rewards/chosen": -1.0332794189453125, "rewards/margins": 4.227479934692383, "rewards/rejected": -5.260759353637695, "step": 4115 }, { "epoch": 0.48, "learning_rate": 1.5873390811385376e-07, "logits/chosen": -1.5568325519561768, "logits/rejected": -1.7480478286743164, "logps/chosen": -393.9427185058594, "logps/rejected": -322.64398193359375, "loss": 0.449, "rewards/accuracies": 0.75, "rewards/chosen": -0.7210593223571777, "rewards/margins": 2.0277280807495117, "rewards/rejected": -2.7487874031066895, "step": 4116 }, { "epoch": 0.48, "learning_rate": 1.5869847643793548e-07, "logits/chosen": -2.088031530380249, "logits/rejected": -2.176208734512329, "logps/chosen": -573.229736328125, "logps/rejected": -345.135498046875, "loss": 0.2559, "rewards/accuracies": 0.875, "rewards/chosen": -0.8664940595626831, "rewards/margins": 2.7311201095581055, "rewards/rejected": -3.597614288330078, "step": 4117 }, { "epoch": 0.48, "learning_rate": 1.5866304476201725e-07, "logits/chosen": -2.3836605548858643, "logits/rejected": -2.455425500869751, "logps/chosen": -275.0123291015625, "logps/rejected": -281.6199035644531, "loss": 0.2118, "rewards/accuracies": 1.0, "rewards/chosen": -0.23598292469978333, "rewards/margins": 2.445526123046875, "rewards/rejected": -2.681509017944336, "step": 4118 }, { "epoch": 0.48, "learning_rate": 1.5862761308609898e-07, "logits/chosen": -2.4276106357574463, "logits/rejected": -2.5549001693725586, "logps/chosen": -298.1375427246094, "logps/rejected": -156.0191650390625, "loss": 0.5989, "rewards/accuracies": 0.75, "rewards/chosen": -1.3395609855651855, "rewards/margins": 0.7980355620384216, "rewards/rejected": -2.137596607208252, "step": 4119 }, { "epoch": 0.48, "learning_rate": 1.585921814101807e-07, "logits/chosen": -2.9116568565368652, "logits/rejected": -2.7068023681640625, "logps/chosen": -616.156494140625, "logps/rejected": -286.8794250488281, "loss": 0.3279, "rewards/accuracies": 0.875, "rewards/chosen": -1.8033970594406128, "rewards/margins": 2.395587682723999, "rewards/rejected": -4.198984622955322, "step": 4120 }, { "epoch": 0.48, "learning_rate": 1.5855674973426242e-07, "logits/chosen": -2.0914127826690674, "logits/rejected": -2.4580025672912598, "logps/chosen": -394.1136169433594, "logps/rejected": -217.47097778320312, "loss": 0.226, "rewards/accuracies": 1.0, "rewards/chosen": -0.8979743123054504, "rewards/margins": 1.8563799858093262, "rewards/rejected": -2.754354476928711, "step": 4121 }, { "epoch": 0.48, "learning_rate": 1.5852131805834417e-07, "logits/chosen": -1.9182684421539307, "logits/rejected": -2.076828956604004, "logps/chosen": -334.1504821777344, "logps/rejected": -262.4606628417969, "loss": 0.2653, "rewards/accuracies": 1.0, "rewards/chosen": -1.2752124071121216, "rewards/margins": 1.6516660451889038, "rewards/rejected": -2.9268784523010254, "step": 4122 }, { "epoch": 0.48, "learning_rate": 1.584858863824259e-07, "logits/chosen": -2.4182639122009277, "logits/rejected": -2.109921455383301, "logps/chosen": -290.3524475097656, "logps/rejected": -365.86029052734375, "loss": 0.1722, "rewards/accuracies": 1.0, "rewards/chosen": -0.711117148399353, "rewards/margins": 2.268132209777832, "rewards/rejected": -2.9792490005493164, "step": 4123 }, { "epoch": 0.48, "learning_rate": 1.584504547065076e-07, "logits/chosen": -2.4011192321777344, "logits/rejected": -2.567085027694702, "logps/chosen": -213.08224487304688, "logps/rejected": -247.150146484375, "loss": 0.2504, "rewards/accuracies": 1.0, "rewards/chosen": -1.1518572568893433, "rewards/margins": 2.173279047012329, "rewards/rejected": -3.325136423110962, "step": 4124 }, { "epoch": 0.48, "learning_rate": 1.5841502303058933e-07, "logits/chosen": -2.5838942527770996, "logits/rejected": -2.3286519050598145, "logps/chosen": -199.90118408203125, "logps/rejected": -423.9735107421875, "loss": 0.1645, "rewards/accuracies": 1.0, "rewards/chosen": -0.9944974184036255, "rewards/margins": 2.408916711807251, "rewards/rejected": -3.403414011001587, "step": 4125 }, { "epoch": 0.48, "learning_rate": 1.5837959135467106e-07, "logits/chosen": -2.046470880508423, "logits/rejected": -1.9612995386123657, "logps/chosen": -279.3263854980469, "logps/rejected": -294.11248779296875, "loss": 0.3631, "rewards/accuracies": 0.75, "rewards/chosen": -0.8511017560958862, "rewards/margins": 2.7475969791412354, "rewards/rejected": -3.5986990928649902, "step": 4126 }, { "epoch": 0.48, "learning_rate": 1.5834415967875278e-07, "logits/chosen": -1.8600528240203857, "logits/rejected": -1.7321062088012695, "logps/chosen": -350.84381103515625, "logps/rejected": -332.35809326171875, "loss": 0.4289, "rewards/accuracies": 0.625, "rewards/chosen": -1.3021193742752075, "rewards/margins": 2.328277587890625, "rewards/rejected": -3.630396842956543, "step": 4127 }, { "epoch": 0.48, "learning_rate": 1.583087280028345e-07, "logits/chosen": -2.6455893516540527, "logits/rejected": -2.674375295639038, "logps/chosen": -324.5091247558594, "logps/rejected": -201.71070861816406, "loss": 0.4566, "rewards/accuracies": 0.625, "rewards/chosen": -0.7309709787368774, "rewards/margins": 0.9964444637298584, "rewards/rejected": -1.7274155616760254, "step": 4128 }, { "epoch": 0.48, "learning_rate": 1.5827329632691628e-07, "logits/chosen": -2.0027313232421875, "logits/rejected": -2.3337321281433105, "logps/chosen": -365.75262451171875, "logps/rejected": -234.27976989746094, "loss": 0.24, "rewards/accuracies": 1.0, "rewards/chosen": -0.5847543478012085, "rewards/margins": 1.6078301668167114, "rewards/rejected": -2.19258451461792, "step": 4129 }, { "epoch": 0.48, "learning_rate": 1.58237864650998e-07, "logits/chosen": -2.267455577850342, "logits/rejected": -2.1600022315979004, "logps/chosen": -188.8294677734375, "logps/rejected": -262.0408935546875, "loss": 0.2313, "rewards/accuracies": 0.875, "rewards/chosen": -0.8660821914672852, "rewards/margins": 3.101630687713623, "rewards/rejected": -3.967712640762329, "step": 4130 }, { "epoch": 0.48, "learning_rate": 1.5820243297507972e-07, "logits/chosen": -1.854107141494751, "logits/rejected": -2.101840019226074, "logps/chosen": -271.6152038574219, "logps/rejected": -181.65872192382812, "loss": 0.4207, "rewards/accuracies": 0.875, "rewards/chosen": -0.6665902733802795, "rewards/margins": 1.7577829360961914, "rewards/rejected": -2.424373149871826, "step": 4131 }, { "epoch": 0.48, "learning_rate": 1.5816700129916144e-07, "logits/chosen": -2.2030980587005615, "logits/rejected": -2.2924413681030273, "logps/chosen": -308.7123718261719, "logps/rejected": -295.85333251953125, "loss": 0.0961, "rewards/accuracies": 1.0, "rewards/chosen": 0.08946412801742554, "rewards/margins": 4.345136642456055, "rewards/rejected": -4.255672454833984, "step": 4132 }, { "epoch": 0.48, "learning_rate": 1.5813156962324316e-07, "logits/chosen": -2.695335865020752, "logits/rejected": -2.5826609134674072, "logps/chosen": -172.8690948486328, "logps/rejected": -188.6759796142578, "loss": 0.4017, "rewards/accuracies": 0.75, "rewards/chosen": -1.2125296592712402, "rewards/margins": 1.624316930770874, "rewards/rejected": -2.836846351623535, "step": 4133 }, { "epoch": 0.48, "learning_rate": 1.580961379473249e-07, "logits/chosen": -2.611358404159546, "logits/rejected": -2.7531533241271973, "logps/chosen": -252.48388671875, "logps/rejected": -139.24212646484375, "loss": 0.1829, "rewards/accuracies": 1.0, "rewards/chosen": -0.39181241393089294, "rewards/margins": 2.7404918670654297, "rewards/rejected": -3.1323046684265137, "step": 4134 }, { "epoch": 0.48, "learning_rate": 1.5806070627140664e-07, "logits/chosen": -2.60723614692688, "logits/rejected": -2.730010986328125, "logps/chosen": -276.92657470703125, "logps/rejected": -236.3046112060547, "loss": 0.2371, "rewards/accuracies": 0.875, "rewards/chosen": -0.2439155876636505, "rewards/margins": 2.052091360092163, "rewards/rejected": -2.2960071563720703, "step": 4135 }, { "epoch": 0.48, "learning_rate": 1.5802527459548836e-07, "logits/chosen": -2.461400270462036, "logits/rejected": -2.0530707836151123, "logps/chosen": -259.42596435546875, "logps/rejected": -479.298095703125, "loss": 0.5264, "rewards/accuracies": 0.5, "rewards/chosen": -1.4358010292053223, "rewards/margins": 1.455265998840332, "rewards/rejected": -2.8910670280456543, "step": 4136 }, { "epoch": 0.48, "learning_rate": 1.5798984291957008e-07, "logits/chosen": -2.2360522747039795, "logits/rejected": -2.1889970302581787, "logps/chosen": -196.19100952148438, "logps/rejected": -318.1798095703125, "loss": 0.227, "rewards/accuracies": 0.875, "rewards/chosen": -0.723287045955658, "rewards/margins": 3.6495487689971924, "rewards/rejected": -4.372836112976074, "step": 4137 }, { "epoch": 0.48, "learning_rate": 1.579544112436518e-07, "logits/chosen": -1.7125202417373657, "logits/rejected": -2.2954537868499756, "logps/chosen": -376.6832275390625, "logps/rejected": -325.34698486328125, "loss": 0.2094, "rewards/accuracies": 1.0, "rewards/chosen": -0.4437797963619232, "rewards/margins": 1.9978491067886353, "rewards/rejected": -2.441628932952881, "step": 4138 }, { "epoch": 0.48, "learning_rate": 1.5791897956773352e-07, "logits/chosen": -2.237619161605835, "logits/rejected": -1.9758802652359009, "logps/chosen": -234.68589782714844, "logps/rejected": -300.09912109375, "loss": 0.5729, "rewards/accuracies": 0.75, "rewards/chosen": -0.5957270264625549, "rewards/margins": 1.801027774810791, "rewards/rejected": -2.396754741668701, "step": 4139 }, { "epoch": 0.48, "learning_rate": 1.5788354789181527e-07, "logits/chosen": -2.1142961978912354, "logits/rejected": -2.160720109939575, "logps/chosen": -289.5484924316406, "logps/rejected": -194.80850219726562, "loss": 0.4188, "rewards/accuracies": 0.75, "rewards/chosen": -0.11683648824691772, "rewards/margins": 1.3440399169921875, "rewards/rejected": -1.46087646484375, "step": 4140 }, { "epoch": 0.48, "learning_rate": 1.5784811621589702e-07, "logits/chosen": -2.1995062828063965, "logits/rejected": -2.1393966674804688, "logps/chosen": -362.2760009765625, "logps/rejected": -277.73358154296875, "loss": 0.7324, "rewards/accuracies": 0.75, "rewards/chosen": -1.5141454935073853, "rewards/margins": 1.1892544031143188, "rewards/rejected": -2.703399658203125, "step": 4141 }, { "epoch": 0.48, "learning_rate": 1.5781268453997874e-07, "logits/chosen": -2.1861867904663086, "logits/rejected": -1.904154896736145, "logps/chosen": -286.0841979980469, "logps/rejected": -327.94866943359375, "loss": 0.3459, "rewards/accuracies": 1.0, "rewards/chosen": -1.0573030710220337, "rewards/margins": 1.0182193517684937, "rewards/rejected": -2.0755224227905273, "step": 4142 }, { "epoch": 0.48, "learning_rate": 1.5777725286406046e-07, "logits/chosen": -2.6749157905578613, "logits/rejected": -2.546511173248291, "logps/chosen": -224.00933837890625, "logps/rejected": -326.5504150390625, "loss": 0.359, "rewards/accuracies": 0.875, "rewards/chosen": -1.0685029029846191, "rewards/margins": 3.0021939277648926, "rewards/rejected": -4.070696830749512, "step": 4143 }, { "epoch": 0.48, "learning_rate": 1.577418211881422e-07, "logits/chosen": -2.2558960914611816, "logits/rejected": -2.281399726867676, "logps/chosen": -259.3851013183594, "logps/rejected": -226.3347625732422, "loss": 0.4838, "rewards/accuracies": 0.75, "rewards/chosen": -0.8091917037963867, "rewards/margins": 1.419957160949707, "rewards/rejected": -2.229149103164673, "step": 4144 }, { "epoch": 0.48, "learning_rate": 1.5770638951222394e-07, "logits/chosen": -2.080216884613037, "logits/rejected": -1.7466087341308594, "logps/chosen": -274.3110656738281, "logps/rejected": -319.20849609375, "loss": 0.6257, "rewards/accuracies": 0.875, "rewards/chosen": -1.1373041868209839, "rewards/margins": 0.7189969420433044, "rewards/rejected": -1.8563013076782227, "step": 4145 }, { "epoch": 0.48, "learning_rate": 1.5767095783630566e-07, "logits/chosen": -2.1442840099334717, "logits/rejected": -1.6273415088653564, "logps/chosen": -298.6656188964844, "logps/rejected": -490.8800048828125, "loss": 0.5133, "rewards/accuracies": 0.75, "rewards/chosen": -0.8488000631332397, "rewards/margins": 1.258759617805481, "rewards/rejected": -2.1075596809387207, "step": 4146 }, { "epoch": 0.48, "learning_rate": 1.5763552616038738e-07, "logits/chosen": -2.706585168838501, "logits/rejected": -2.980534076690674, "logps/chosen": -304.9236145019531, "logps/rejected": -270.6559753417969, "loss": 0.1493, "rewards/accuracies": 1.0, "rewards/chosen": -1.7622177600860596, "rewards/margins": 3.7300093173980713, "rewards/rejected": -5.492227077484131, "step": 4147 }, { "epoch": 0.48, "learning_rate": 1.576000944844691e-07, "logits/chosen": -2.0274767875671387, "logits/rejected": -2.4374520778656006, "logps/chosen": -405.0201110839844, "logps/rejected": -247.704833984375, "loss": 0.3328, "rewards/accuracies": 0.875, "rewards/chosen": -1.3348127603530884, "rewards/margins": 1.4745140075683594, "rewards/rejected": -2.8093268871307373, "step": 4148 }, { "epoch": 0.48, "learning_rate": 1.5756466280855082e-07, "logits/chosen": -2.0397393703460693, "logits/rejected": -1.807332992553711, "logps/chosen": -270.2458801269531, "logps/rejected": -342.57452392578125, "loss": 0.4034, "rewards/accuracies": 0.875, "rewards/chosen": -0.4827522933483124, "rewards/margins": 1.4381444454193115, "rewards/rejected": -1.9208967685699463, "step": 4149 }, { "epoch": 0.48, "learning_rate": 1.5752923113263255e-07, "logits/chosen": -2.0220909118652344, "logits/rejected": -2.1483194828033447, "logps/chosen": -427.98199462890625, "logps/rejected": -390.4346008300781, "loss": 0.4177, "rewards/accuracies": 0.75, "rewards/chosen": -0.7513718605041504, "rewards/margins": 2.4837427139282227, "rewards/rejected": -3.235114574432373, "step": 4150 }, { "epoch": 0.48, "learning_rate": 1.574937994567143e-07, "logits/chosen": -2.6240487098693848, "logits/rejected": -2.6398673057556152, "logps/chosen": -194.1738739013672, "logps/rejected": -287.0479431152344, "loss": 0.334, "rewards/accuracies": 0.875, "rewards/chosen": -1.160439133644104, "rewards/margins": 2.7707040309906006, "rewards/rejected": -3.931143283843994, "step": 4151 }, { "epoch": 0.48, "learning_rate": 1.5745836778079602e-07, "logits/chosen": -2.2501888275146484, "logits/rejected": -2.395749807357788, "logps/chosen": -316.8781433105469, "logps/rejected": -320.2548828125, "loss": 0.324, "rewards/accuracies": 0.625, "rewards/chosen": -0.20750093460083008, "rewards/margins": 2.4453327655792236, "rewards/rejected": -2.6528334617614746, "step": 4152 }, { "epoch": 0.48, "learning_rate": 1.5742293610487777e-07, "logits/chosen": -2.6261677742004395, "logits/rejected": -2.5325379371643066, "logps/chosen": -253.69210815429688, "logps/rejected": -237.56460571289062, "loss": 0.4457, "rewards/accuracies": 0.875, "rewards/chosen": -1.8740341663360596, "rewards/margins": 1.1637897491455078, "rewards/rejected": -3.0378239154815674, "step": 4153 }, { "epoch": 0.48, "learning_rate": 1.573875044289595e-07, "logits/chosen": -2.260078191757202, "logits/rejected": -2.154541254043579, "logps/chosen": -241.07449340820312, "logps/rejected": -356.5749816894531, "loss": 0.2393, "rewards/accuracies": 1.0, "rewards/chosen": -0.6519558429718018, "rewards/margins": 2.1383867263793945, "rewards/rejected": -2.7903428077697754, "step": 4154 }, { "epoch": 0.48, "learning_rate": 1.573520727530412e-07, "logits/chosen": -2.6148130893707275, "logits/rejected": -2.574190139770508, "logps/chosen": -248.4049072265625, "logps/rejected": -290.5125732421875, "loss": 0.2695, "rewards/accuracies": 0.75, "rewards/chosen": -0.9471781253814697, "rewards/margins": 2.7451679706573486, "rewards/rejected": -3.6923460960388184, "step": 4155 }, { "epoch": 0.48, "learning_rate": 1.5731664107712296e-07, "logits/chosen": -2.799419641494751, "logits/rejected": -2.7524819374084473, "logps/chosen": -230.13363647460938, "logps/rejected": -192.35418701171875, "loss": 0.5201, "rewards/accuracies": 0.625, "rewards/chosen": -0.5818525552749634, "rewards/margins": 1.5065317153930664, "rewards/rejected": -2.0883843898773193, "step": 4156 }, { "epoch": 0.48, "learning_rate": 1.5728120940120468e-07, "logits/chosen": -2.4714205265045166, "logits/rejected": -2.495512008666992, "logps/chosen": -191.78375244140625, "logps/rejected": -246.6481475830078, "loss": 0.8582, "rewards/accuracies": 0.5, "rewards/chosen": -1.9701004028320312, "rewards/margins": 0.6615644097328186, "rewards/rejected": -2.631664752960205, "step": 4157 }, { "epoch": 0.48, "learning_rate": 1.572457777252864e-07, "logits/chosen": -2.223565101623535, "logits/rejected": -2.2625820636749268, "logps/chosen": -158.71435546875, "logps/rejected": -229.4991455078125, "loss": 0.4696, "rewards/accuracies": 0.75, "rewards/chosen": -1.460983395576477, "rewards/margins": 1.9164379835128784, "rewards/rejected": -3.3774213790893555, "step": 4158 }, { "epoch": 0.48, "learning_rate": 1.5721034604936812e-07, "logits/chosen": -2.4221091270446777, "logits/rejected": -2.606306552886963, "logps/chosen": -254.51182556152344, "logps/rejected": -415.460693359375, "loss": 0.5508, "rewards/accuracies": 0.75, "rewards/chosen": -1.2438970804214478, "rewards/margins": 0.9935634136199951, "rewards/rejected": -2.2374606132507324, "step": 4159 }, { "epoch": 0.48, "learning_rate": 1.5717491437344985e-07, "logits/chosen": -1.8214982748031616, "logits/rejected": -2.2387146949768066, "logps/chosen": -465.3339538574219, "logps/rejected": -340.72564697265625, "loss": 0.3403, "rewards/accuracies": 0.75, "rewards/chosen": -1.0367165803909302, "rewards/margins": 1.889784812927246, "rewards/rejected": -2.9265012741088867, "step": 4160 }, { "epoch": 0.48, "learning_rate": 1.5713948269753157e-07, "logits/chosen": -1.5033838748931885, "logits/rejected": -1.6752598285675049, "logps/chosen": -345.5698547363281, "logps/rejected": -322.150390625, "loss": 0.2213, "rewards/accuracies": 1.0, "rewards/chosen": -0.4426257908344269, "rewards/margins": 2.2727503776550293, "rewards/rejected": -2.715376138687134, "step": 4161 }, { "epoch": 0.48, "learning_rate": 1.571040510216133e-07, "logits/chosen": -2.3698413372039795, "logits/rejected": -2.273284673690796, "logps/chosen": -183.62930297851562, "logps/rejected": -240.69412231445312, "loss": 0.259, "rewards/accuracies": 0.875, "rewards/chosen": -0.8423725962638855, "rewards/margins": 2.6861393451690674, "rewards/rejected": -3.5285120010375977, "step": 4162 }, { "epoch": 0.48, "learning_rate": 1.5706861934569504e-07, "logits/chosen": -2.5375421047210693, "logits/rejected": -2.6934731006622314, "logps/chosen": -296.34173583984375, "logps/rejected": -322.0248107910156, "loss": 0.4092, "rewards/accuracies": 0.875, "rewards/chosen": -0.8352963924407959, "rewards/margins": 2.5348501205444336, "rewards/rejected": -3.3701465129852295, "step": 4163 }, { "epoch": 0.48, "learning_rate": 1.570331876697768e-07, "logits/chosen": -2.068100929260254, "logits/rejected": -2.138357162475586, "logps/chosen": -307.0281982421875, "logps/rejected": -287.1894226074219, "loss": 0.7942, "rewards/accuracies": 0.75, "rewards/chosen": -0.8522269129753113, "rewards/margins": 0.883599579334259, "rewards/rejected": -1.7358264923095703, "step": 4164 }, { "epoch": 0.48, "learning_rate": 1.569977559938585e-07, "logits/chosen": -2.755688428878784, "logits/rejected": -2.6583595275878906, "logps/chosen": -311.52374267578125, "logps/rejected": -204.33914184570312, "loss": 1.0426, "rewards/accuracies": 0.875, "rewards/chosen": -1.6276205778121948, "rewards/margins": 1.5248422622680664, "rewards/rejected": -3.1524627208709717, "step": 4165 }, { "epoch": 0.48, "learning_rate": 1.5696232431794023e-07, "logits/chosen": -2.16886043548584, "logits/rejected": -2.1241908073425293, "logps/chosen": -201.84625244140625, "logps/rejected": -233.87863159179688, "loss": 0.6916, "rewards/accuracies": 0.75, "rewards/chosen": -1.2230740785598755, "rewards/margins": 1.1839728355407715, "rewards/rejected": -2.4070467948913574, "step": 4166 }, { "epoch": 0.48, "learning_rate": 1.5692689264202198e-07, "logits/chosen": -2.7029006481170654, "logits/rejected": -2.643820285797119, "logps/chosen": -332.34967041015625, "logps/rejected": -276.01239013671875, "loss": 0.2798, "rewards/accuracies": 0.875, "rewards/chosen": -0.7206541299819946, "rewards/margins": 2.4563968181610107, "rewards/rejected": -3.177050828933716, "step": 4167 }, { "epoch": 0.48, "learning_rate": 1.568914609661037e-07, "logits/chosen": -1.797261357307434, "logits/rejected": -2.235586166381836, "logps/chosen": -223.82232666015625, "logps/rejected": -209.6043243408203, "loss": 0.6156, "rewards/accuracies": 0.625, "rewards/chosen": -1.105198621749878, "rewards/margins": 1.9258440732955933, "rewards/rejected": -3.0310428142547607, "step": 4168 }, { "epoch": 0.48, "learning_rate": 1.5685602929018543e-07, "logits/chosen": -2.632432222366333, "logits/rejected": -2.5465149879455566, "logps/chosen": -178.07418823242188, "logps/rejected": -260.344970703125, "loss": 0.5325, "rewards/accuracies": 0.875, "rewards/chosen": -0.18315337598323822, "rewards/margins": 1.485952377319336, "rewards/rejected": -1.6691056489944458, "step": 4169 }, { "epoch": 0.49, "learning_rate": 1.5682059761426715e-07, "logits/chosen": -2.151028633117676, "logits/rejected": -2.2559762001037598, "logps/chosen": -321.5174865722656, "logps/rejected": -207.8048095703125, "loss": 0.5687, "rewards/accuracies": 0.625, "rewards/chosen": -1.7346638441085815, "rewards/margins": 0.9496192336082458, "rewards/rejected": -2.6842830181121826, "step": 4170 }, { "epoch": 0.49, "learning_rate": 1.5678516593834887e-07, "logits/chosen": -2.592406749725342, "logits/rejected": -2.3248939514160156, "logps/chosen": -221.30752563476562, "logps/rejected": -205.48233032226562, "loss": 0.6085, "rewards/accuracies": 0.75, "rewards/chosen": -1.0589115619659424, "rewards/margins": 1.013702630996704, "rewards/rejected": -2.0726141929626465, "step": 4171 }, { "epoch": 0.49, "learning_rate": 1.567497342624306e-07, "logits/chosen": -2.9331204891204834, "logits/rejected": -2.8310275077819824, "logps/chosen": -171.8787384033203, "logps/rejected": -287.83935546875, "loss": 0.1835, "rewards/accuracies": 1.0, "rewards/chosen": -1.1742606163024902, "rewards/margins": 2.7964389324188232, "rewards/rejected": -3.9706995487213135, "step": 4172 }, { "epoch": 0.49, "learning_rate": 1.5671430258651231e-07, "logits/chosen": -2.329993724822998, "logits/rejected": -2.3867180347442627, "logps/chosen": -462.095458984375, "logps/rejected": -252.86248779296875, "loss": 0.8832, "rewards/accuracies": 0.75, "rewards/chosen": -1.6894361972808838, "rewards/margins": 0.31260111927986145, "rewards/rejected": -2.002037286758423, "step": 4173 }, { "epoch": 0.49, "learning_rate": 1.5667887091059406e-07, "logits/chosen": -2.70963191986084, "logits/rejected": -2.8010990619659424, "logps/chosen": -439.6603088378906, "logps/rejected": -278.14447021484375, "loss": 0.2254, "rewards/accuracies": 1.0, "rewards/chosen": -0.47038334608078003, "rewards/margins": 2.2186856269836426, "rewards/rejected": -2.6890687942504883, "step": 4174 }, { "epoch": 0.49, "learning_rate": 1.5664343923467578e-07, "logits/chosen": -2.080286979675293, "logits/rejected": -2.2805774211883545, "logps/chosen": -304.99261474609375, "logps/rejected": -224.95518493652344, "loss": 0.5609, "rewards/accuracies": 0.625, "rewards/chosen": -0.6011609435081482, "rewards/margins": 1.2242482900619507, "rewards/rejected": -1.8254092931747437, "step": 4175 }, { "epoch": 0.49, "learning_rate": 1.5660800755875753e-07, "logits/chosen": -2.211413860321045, "logits/rejected": -2.255418539047241, "logps/chosen": -278.8165283203125, "logps/rejected": -318.9987487792969, "loss": 0.4567, "rewards/accuracies": 0.75, "rewards/chosen": -1.0322880744934082, "rewards/margins": 1.083324670791626, "rewards/rejected": -2.115612745285034, "step": 4176 }, { "epoch": 0.49, "learning_rate": 1.5657257588283926e-07, "logits/chosen": -2.354156255722046, "logits/rejected": -2.682738780975342, "logps/chosen": -574.1495361328125, "logps/rejected": -373.4350891113281, "loss": 0.2529, "rewards/accuracies": 1.0, "rewards/chosen": -0.6412387490272522, "rewards/margins": 1.6676678657531738, "rewards/rejected": -2.3089065551757812, "step": 4177 }, { "epoch": 0.49, "learning_rate": 1.5653714420692098e-07, "logits/chosen": -1.8615033626556396, "logits/rejected": -2.2277145385742188, "logps/chosen": -489.3848876953125, "logps/rejected": -266.11309814453125, "loss": 0.3086, "rewards/accuracies": 0.875, "rewards/chosen": 0.45478957891464233, "rewards/margins": 1.8817253112792969, "rewards/rejected": -1.4269357919692993, "step": 4178 }, { "epoch": 0.49, "learning_rate": 1.5650171253100273e-07, "logits/chosen": -2.7533535957336426, "logits/rejected": -2.787672996520996, "logps/chosen": -273.18682861328125, "logps/rejected": -213.01812744140625, "loss": 0.3894, "rewards/accuracies": 0.75, "rewards/chosen": -1.62388277053833, "rewards/margins": 1.7281194925308228, "rewards/rejected": -3.3520023822784424, "step": 4179 }, { "epoch": 0.49, "learning_rate": 1.5646628085508445e-07, "logits/chosen": -1.9549129009246826, "logits/rejected": -2.45469069480896, "logps/chosen": -457.2225341796875, "logps/rejected": -286.02703857421875, "loss": 0.5814, "rewards/accuracies": 0.75, "rewards/chosen": -0.7884546518325806, "rewards/margins": 0.9379298686981201, "rewards/rejected": -1.7263846397399902, "step": 4180 }, { "epoch": 0.49, "learning_rate": 1.5643084917916617e-07, "logits/chosen": -2.4998726844787598, "logits/rejected": -2.3361010551452637, "logps/chosen": -239.412109375, "logps/rejected": -254.39218139648438, "loss": 0.2945, "rewards/accuracies": 0.875, "rewards/chosen": -0.6998356580734253, "rewards/margins": 2.188354969024658, "rewards/rejected": -2.888190269470215, "step": 4181 }, { "epoch": 0.49, "learning_rate": 1.563954175032479e-07, "logits/chosen": -2.622525453567505, "logits/rejected": -2.7748355865478516, "logps/chosen": -162.9314422607422, "logps/rejected": -152.78211975097656, "loss": 0.1348, "rewards/accuracies": 1.0, "rewards/chosen": -0.5086824893951416, "rewards/margins": 3.160824775695801, "rewards/rejected": -3.6695075035095215, "step": 4182 }, { "epoch": 0.49, "learning_rate": 1.5635998582732961e-07, "logits/chosen": -2.2013843059539795, "logits/rejected": -2.3301632404327393, "logps/chosen": -381.08221435546875, "logps/rejected": -308.3149108886719, "loss": 0.9379, "rewards/accuracies": 0.625, "rewards/chosen": -1.129316806793213, "rewards/margins": 0.7018431425094604, "rewards/rejected": -1.8311598300933838, "step": 4183 }, { "epoch": 0.49, "learning_rate": 1.5632455415141134e-07, "logits/chosen": -2.5435256958007812, "logits/rejected": -2.4566032886505127, "logps/chosen": -309.3577575683594, "logps/rejected": -262.3311767578125, "loss": 0.8936, "rewards/accuracies": 0.75, "rewards/chosen": -1.7115654945373535, "rewards/margins": 1.763122797012329, "rewards/rejected": -3.4746882915496826, "step": 4184 }, { "epoch": 0.49, "learning_rate": 1.5628912247549309e-07, "logits/chosen": -2.1664795875549316, "logits/rejected": -2.3976573944091797, "logps/chosen": -270.93133544921875, "logps/rejected": -300.8971862792969, "loss": 0.4149, "rewards/accuracies": 0.5, "rewards/chosen": -0.8059624433517456, "rewards/margins": 4.1478471755981445, "rewards/rejected": -4.95380973815918, "step": 4185 }, { "epoch": 0.49, "learning_rate": 1.562536907995748e-07, "logits/chosen": -2.7821292877197266, "logits/rejected": -2.4968490600585938, "logps/chosen": -203.15623474121094, "logps/rejected": -301.80682373046875, "loss": 0.169, "rewards/accuracies": 1.0, "rewards/chosen": -0.9515830874443054, "rewards/margins": 3.129225730895996, "rewards/rejected": -4.080808639526367, "step": 4186 }, { "epoch": 0.49, "learning_rate": 1.5621825912365653e-07, "logits/chosen": -2.0574231147766113, "logits/rejected": -1.9697329998016357, "logps/chosen": -359.9841003417969, "logps/rejected": -399.0179138183594, "loss": 0.4878, "rewards/accuracies": 0.75, "rewards/chosen": -0.5525528788566589, "rewards/margins": 2.3682796955108643, "rewards/rejected": -2.920832395553589, "step": 4187 }, { "epoch": 0.49, "learning_rate": 1.5618282744773828e-07, "logits/chosen": -2.276045322418213, "logits/rejected": -2.499728202819824, "logps/chosen": -389.3753967285156, "logps/rejected": -294.92919921875, "loss": 0.4174, "rewards/accuracies": 0.875, "rewards/chosen": -1.3425109386444092, "rewards/margins": 1.598778486251831, "rewards/rejected": -2.9412894248962402, "step": 4188 }, { "epoch": 0.49, "learning_rate": 1.5614739577182e-07, "logits/chosen": -2.2978153228759766, "logits/rejected": -2.251972198486328, "logps/chosen": -106.7737808227539, "logps/rejected": -218.66073608398438, "loss": 0.3172, "rewards/accuracies": 0.875, "rewards/chosen": -0.36114704608917236, "rewards/margins": 2.5171573162078857, "rewards/rejected": -2.8783042430877686, "step": 4189 }, { "epoch": 0.49, "learning_rate": 1.5611196409590175e-07, "logits/chosen": -2.1270995140075684, "logits/rejected": -2.393162727355957, "logps/chosen": -248.9272918701172, "logps/rejected": -202.97271728515625, "loss": 0.226, "rewards/accuracies": 0.875, "rewards/chosen": -0.39361095428466797, "rewards/margins": 2.41978120803833, "rewards/rejected": -2.813392400741577, "step": 4190 }, { "epoch": 0.49, "learning_rate": 1.5607653241998347e-07, "logits/chosen": -1.7374160289764404, "logits/rejected": -2.2144100666046143, "logps/chosen": -345.683349609375, "logps/rejected": -242.41519165039062, "loss": 0.6332, "rewards/accuracies": 0.5, "rewards/chosen": -1.2654688358306885, "rewards/margins": 0.6928931474685669, "rewards/rejected": -1.958362102508545, "step": 4191 }, { "epoch": 0.49, "learning_rate": 1.560411007440652e-07, "logits/chosen": -1.8285834789276123, "logits/rejected": -2.087998390197754, "logps/chosen": -367.35260009765625, "logps/rejected": -230.97195434570312, "loss": 0.3991, "rewards/accuracies": 0.75, "rewards/chosen": -0.6413822770118713, "rewards/margins": 1.4651374816894531, "rewards/rejected": -2.1065196990966797, "step": 4192 }, { "epoch": 0.49, "learning_rate": 1.5600566906814692e-07, "logits/chosen": -2.4793336391448975, "logits/rejected": -2.39030385017395, "logps/chosen": -294.58233642578125, "logps/rejected": -314.3695983886719, "loss": 0.3441, "rewards/accuracies": 0.75, "rewards/chosen": -0.9479573369026184, "rewards/margins": 1.3553906679153442, "rewards/rejected": -2.3033480644226074, "step": 4193 }, { "epoch": 0.49, "learning_rate": 1.5597023739222864e-07, "logits/chosen": -2.1798839569091797, "logits/rejected": -2.373538017272949, "logps/chosen": -434.94158935546875, "logps/rejected": -276.64691162109375, "loss": 0.3779, "rewards/accuracies": 0.75, "rewards/chosen": -0.7101126909255981, "rewards/margins": 1.8202152252197266, "rewards/rejected": -2.530327796936035, "step": 4194 }, { "epoch": 0.49, "learning_rate": 1.5593480571631036e-07, "logits/chosen": -2.0496816635131836, "logits/rejected": -2.1611108779907227, "logps/chosen": -282.61993408203125, "logps/rejected": -208.932373046875, "loss": 0.4921, "rewards/accuracies": 0.75, "rewards/chosen": -1.06795072555542, "rewards/margins": 0.9938252568244934, "rewards/rejected": -2.0617761611938477, "step": 4195 }, { "epoch": 0.49, "learning_rate": 1.558993740403921e-07, "logits/chosen": -1.8750758171081543, "logits/rejected": -2.384117841720581, "logps/chosen": -344.7598571777344, "logps/rejected": -205.66404724121094, "loss": 0.1731, "rewards/accuracies": 1.0, "rewards/chosen": -0.18671998381614685, "rewards/margins": 2.0612125396728516, "rewards/rejected": -2.2479326725006104, "step": 4196 }, { "epoch": 0.49, "learning_rate": 1.5586394236447383e-07, "logits/chosen": -1.8473994731903076, "logits/rejected": -2.2152717113494873, "logps/chosen": -345.2440490722656, "logps/rejected": -263.2420654296875, "loss": 0.086, "rewards/accuracies": 1.0, "rewards/chosen": -0.4963598847389221, "rewards/margins": 3.757162094116211, "rewards/rejected": -4.253521919250488, "step": 4197 }, { "epoch": 0.49, "learning_rate": 1.5582851068855555e-07, "logits/chosen": -1.9456243515014648, "logits/rejected": -2.1886649131774902, "logps/chosen": -451.83697509765625, "logps/rejected": -452.882568359375, "loss": 1.0008, "rewards/accuracies": 0.875, "rewards/chosen": -1.3961187601089478, "rewards/margins": 1.994165301322937, "rewards/rejected": -3.3902840614318848, "step": 4198 }, { "epoch": 0.49, "learning_rate": 1.557930790126373e-07, "logits/chosen": -2.069958209991455, "logits/rejected": -2.069769859313965, "logps/chosen": -400.955810546875, "logps/rejected": -371.6534423828125, "loss": 0.6835, "rewards/accuracies": 0.75, "rewards/chosen": -1.7135387659072876, "rewards/margins": 1.782979965209961, "rewards/rejected": -3.496519088745117, "step": 4199 }, { "epoch": 0.49, "learning_rate": 1.5575764733671902e-07, "logits/chosen": -2.3866684436798096, "logits/rejected": -2.308103561401367, "logps/chosen": -201.48760986328125, "logps/rejected": -269.93402099609375, "loss": 0.5399, "rewards/accuracies": 0.625, "rewards/chosen": -1.0937325954437256, "rewards/margins": 1.3600457906723022, "rewards/rejected": -2.4537785053253174, "step": 4200 }, { "epoch": 0.49, "learning_rate": 1.5572221566080077e-07, "logits/chosen": -2.1886377334594727, "logits/rejected": -2.0658164024353027, "logps/chosen": -247.9052734375, "logps/rejected": -273.84002685546875, "loss": 0.7834, "rewards/accuracies": 0.625, "rewards/chosen": -1.1680951118469238, "rewards/margins": 1.1702641248703003, "rewards/rejected": -2.3383591175079346, "step": 4201 }, { "epoch": 0.49, "learning_rate": 1.556867839848825e-07, "logits/chosen": -2.33994722366333, "logits/rejected": -2.2882912158966064, "logps/chosen": -290.715087890625, "logps/rejected": -357.2547607421875, "loss": 0.1872, "rewards/accuracies": 0.875, "rewards/chosen": -0.2784448564052582, "rewards/margins": 4.278828144073486, "rewards/rejected": -4.557272911071777, "step": 4202 }, { "epoch": 0.49, "learning_rate": 1.5565135230896422e-07, "logits/chosen": -1.9057643413543701, "logits/rejected": -1.6760625839233398, "logps/chosen": -162.06646728515625, "logps/rejected": -604.0193481445312, "loss": 0.239, "rewards/accuracies": 0.875, "rewards/chosen": -0.35089200735092163, "rewards/margins": 3.8521459102630615, "rewards/rejected": -4.203037738800049, "step": 4203 }, { "epoch": 0.49, "learning_rate": 1.5561592063304594e-07, "logits/chosen": -2.204662799835205, "logits/rejected": -2.542515516281128, "logps/chosen": -375.55401611328125, "logps/rejected": -143.68309020996094, "loss": 0.6425, "rewards/accuracies": 0.625, "rewards/chosen": -0.8651166558265686, "rewards/margins": 1.170993685722351, "rewards/rejected": -2.0361101627349854, "step": 4204 }, { "epoch": 0.49, "learning_rate": 1.5558048895712766e-07, "logits/chosen": -2.539971113204956, "logits/rejected": -2.4846532344818115, "logps/chosen": -270.4479064941406, "logps/rejected": -263.5343322753906, "loss": 0.2834, "rewards/accuracies": 0.875, "rewards/chosen": -0.8396803140640259, "rewards/margins": 2.8850934505462646, "rewards/rejected": -3.72477388381958, "step": 4205 }, { "epoch": 0.49, "learning_rate": 1.5554505728120938e-07, "logits/chosen": -2.2344443798065186, "logits/rejected": -2.551969051361084, "logps/chosen": -395.16827392578125, "logps/rejected": -268.52093505859375, "loss": 0.2452, "rewards/accuracies": 1.0, "rewards/chosen": -0.5827472805976868, "rewards/margins": 2.541327953338623, "rewards/rejected": -3.124075174331665, "step": 4206 }, { "epoch": 0.49, "learning_rate": 1.555096256052911e-07, "logits/chosen": -2.7171030044555664, "logits/rejected": -2.8946352005004883, "logps/chosen": -148.84063720703125, "logps/rejected": -175.6888427734375, "loss": 0.5884, "rewards/accuracies": 0.75, "rewards/chosen": -1.2476805448532104, "rewards/margins": 1.5699149370193481, "rewards/rejected": -2.8175952434539795, "step": 4207 }, { "epoch": 0.49, "learning_rate": 1.5547419392937285e-07, "logits/chosen": -1.9031076431274414, "logits/rejected": -2.1907944679260254, "logps/chosen": -531.751220703125, "logps/rejected": -388.6005859375, "loss": 0.218, "rewards/accuracies": 0.875, "rewards/chosen": -0.6631029844284058, "rewards/margins": 1.9835073947906494, "rewards/rejected": -2.6466104984283447, "step": 4208 }, { "epoch": 0.49, "learning_rate": 1.5543876225345458e-07, "logits/chosen": -1.951372504234314, "logits/rejected": -2.36936616897583, "logps/chosen": -346.507568359375, "logps/rejected": -148.57513427734375, "loss": 0.6355, "rewards/accuracies": 0.5, "rewards/chosen": -1.4825596809387207, "rewards/margins": 0.8358340859413147, "rewards/rejected": -2.3183937072753906, "step": 4209 }, { "epoch": 0.49, "learning_rate": 1.554033305775363e-07, "logits/chosen": -2.337108850479126, "logits/rejected": -1.8362160921096802, "logps/chosen": -74.19679260253906, "logps/rejected": -226.6238250732422, "loss": 0.5865, "rewards/accuracies": 0.875, "rewards/chosen": -0.653024435043335, "rewards/margins": 1.8003034591674805, "rewards/rejected": -2.4533281326293945, "step": 4210 }, { "epoch": 0.49, "learning_rate": 1.5536789890161805e-07, "logits/chosen": -2.4479455947875977, "logits/rejected": -2.4788005352020264, "logps/chosen": -225.18568420410156, "logps/rejected": -279.6341247558594, "loss": 0.2967, "rewards/accuracies": 0.875, "rewards/chosen": -0.6062159538269043, "rewards/margins": 2.6959404945373535, "rewards/rejected": -3.302156448364258, "step": 4211 }, { "epoch": 0.49, "learning_rate": 1.553324672256998e-07, "logits/chosen": -1.8374024629592896, "logits/rejected": -2.0174384117126465, "logps/chosen": -353.2993469238281, "logps/rejected": -298.183349609375, "loss": 0.2056, "rewards/accuracies": 1.0, "rewards/chosen": -0.21171113848686218, "rewards/margins": 2.379586696624756, "rewards/rejected": -2.5912978649139404, "step": 4212 }, { "epoch": 0.49, "learning_rate": 1.5529703554978152e-07, "logits/chosen": -2.0530803203582764, "logits/rejected": -2.0405948162078857, "logps/chosen": -388.78680419921875, "logps/rejected": -314.1803894042969, "loss": 0.4966, "rewards/accuracies": 0.75, "rewards/chosen": -1.1562906503677368, "rewards/margins": 2.565918445587158, "rewards/rejected": -3.7222089767456055, "step": 4213 }, { "epoch": 0.49, "learning_rate": 1.5526160387386324e-07, "logits/chosen": -2.235501527786255, "logits/rejected": -2.1123273372650146, "logps/chosen": -362.783935546875, "logps/rejected": -330.083251953125, "loss": 0.358, "rewards/accuracies": 0.75, "rewards/chosen": -1.1011303663253784, "rewards/margins": 1.2766927480697632, "rewards/rejected": -2.3778231143951416, "step": 4214 }, { "epoch": 0.49, "learning_rate": 1.5522617219794496e-07, "logits/chosen": -2.780184745788574, "logits/rejected": -2.807072877883911, "logps/chosen": -138.5372772216797, "logps/rejected": -178.1685791015625, "loss": 0.1168, "rewards/accuracies": 1.0, "rewards/chosen": -0.6500250101089478, "rewards/margins": 3.2305593490600586, "rewards/rejected": -3.880584239959717, "step": 4215 }, { "epoch": 0.49, "learning_rate": 1.5519074052202668e-07, "logits/chosen": -2.601388454437256, "logits/rejected": -2.641458749771118, "logps/chosen": -211.64303588867188, "logps/rejected": -240.1005096435547, "loss": 0.4351, "rewards/accuracies": 0.875, "rewards/chosen": -0.974123477935791, "rewards/margins": 1.745782732963562, "rewards/rejected": -2.7199063301086426, "step": 4216 }, { "epoch": 0.49, "learning_rate": 1.551553088461084e-07, "logits/chosen": -2.091341495513916, "logits/rejected": -1.9680372476577759, "logps/chosen": -373.9072265625, "logps/rejected": -366.6389465332031, "loss": 0.4754, "rewards/accuracies": 0.625, "rewards/chosen": -0.6733686923980713, "rewards/margins": 2.289907455444336, "rewards/rejected": -2.9632761478424072, "step": 4217 }, { "epoch": 0.49, "learning_rate": 1.5511987717019013e-07, "logits/chosen": -2.360729217529297, "logits/rejected": -2.2917418479919434, "logps/chosen": -483.8581848144531, "logps/rejected": -440.9471740722656, "loss": 0.164, "rewards/accuracies": 0.875, "rewards/chosen": -0.3209574222564697, "rewards/margins": 3.651742696762085, "rewards/rejected": -3.9727001190185547, "step": 4218 }, { "epoch": 0.49, "learning_rate": 1.5508444549427188e-07, "logits/chosen": -2.3913514614105225, "logits/rejected": -2.2192699909210205, "logps/chosen": -162.5697479248047, "logps/rejected": -177.00511169433594, "loss": 0.238, "rewards/accuracies": 1.0, "rewards/chosen": -0.3811604976654053, "rewards/margins": 1.5962038040161133, "rewards/rejected": -1.9773643016815186, "step": 4219 }, { "epoch": 0.49, "learning_rate": 1.550490138183536e-07, "logits/chosen": -2.2838666439056396, "logits/rejected": -2.416447639465332, "logps/chosen": -250.20236206054688, "logps/rejected": -213.37826538085938, "loss": 0.2526, "rewards/accuracies": 0.875, "rewards/chosen": -0.21300038695335388, "rewards/margins": 3.25825834274292, "rewards/rejected": -3.4712586402893066, "step": 4220 }, { "epoch": 0.49, "learning_rate": 1.5501358214243532e-07, "logits/chosen": -2.152167797088623, "logits/rejected": -2.3065335750579834, "logps/chosen": -392.2319030761719, "logps/rejected": -257.9918518066406, "loss": 0.6436, "rewards/accuracies": 0.625, "rewards/chosen": -0.463146448135376, "rewards/margins": 1.1819086074829102, "rewards/rejected": -1.6450550556182861, "step": 4221 }, { "epoch": 0.49, "learning_rate": 1.5497815046651704e-07, "logits/chosen": -1.6172950267791748, "logits/rejected": -1.8373398780822754, "logps/chosen": -487.0157470703125, "logps/rejected": -464.99652099609375, "loss": 0.3284, "rewards/accuracies": 0.75, "rewards/chosen": -0.6367915868759155, "rewards/margins": 1.657346248626709, "rewards/rejected": -2.294137954711914, "step": 4222 }, { "epoch": 0.49, "learning_rate": 1.549427187905988e-07, "logits/chosen": -1.9449739456176758, "logits/rejected": -2.0185787677764893, "logps/chosen": -158.22862243652344, "logps/rejected": -205.89923095703125, "loss": 0.4544, "rewards/accuracies": 0.625, "rewards/chosen": -0.7416872978210449, "rewards/margins": 1.414862036705017, "rewards/rejected": -2.1565492153167725, "step": 4223 }, { "epoch": 0.49, "learning_rate": 1.5490728711468054e-07, "logits/chosen": -2.492893695831299, "logits/rejected": -2.1953775882720947, "logps/chosen": -155.6333465576172, "logps/rejected": -250.72544860839844, "loss": 0.3481, "rewards/accuracies": 0.875, "rewards/chosen": -1.5742707252502441, "rewards/margins": 1.864371418952942, "rewards/rejected": -3.4386420249938965, "step": 4224 }, { "epoch": 0.49, "learning_rate": 1.5487185543876226e-07, "logits/chosen": -1.9641962051391602, "logits/rejected": -1.933701753616333, "logps/chosen": -325.01434326171875, "logps/rejected": -335.8203125, "loss": 0.4156, "rewards/accuracies": 0.625, "rewards/chosen": -0.6353504657745361, "rewards/margins": 2.295146942138672, "rewards/rejected": -2.930497646331787, "step": 4225 }, { "epoch": 0.49, "learning_rate": 1.5483642376284398e-07, "logits/chosen": -2.008098840713501, "logits/rejected": -2.1107101440429688, "logps/chosen": -263.98382568359375, "logps/rejected": -286.8031005859375, "loss": 0.4547, "rewards/accuracies": 0.75, "rewards/chosen": -0.7853267788887024, "rewards/margins": 1.1948570013046265, "rewards/rejected": -1.9801836013793945, "step": 4226 }, { "epoch": 0.49, "learning_rate": 1.548009920869257e-07, "logits/chosen": -2.7829713821411133, "logits/rejected": -2.796865701675415, "logps/chosen": -198.48440551757812, "logps/rejected": -240.46401977539062, "loss": 0.1052, "rewards/accuracies": 1.0, "rewards/chosen": -0.12219832092523575, "rewards/margins": 4.2068891525268555, "rewards/rejected": -4.329087257385254, "step": 4227 }, { "epoch": 0.49, "learning_rate": 1.5476556041100743e-07, "logits/chosen": -2.8691625595092773, "logits/rejected": -2.90232253074646, "logps/chosen": -259.3534240722656, "logps/rejected": -187.81802368164062, "loss": 0.2825, "rewards/accuracies": 0.875, "rewards/chosen": -0.2154710292816162, "rewards/margins": 2.3499207496643066, "rewards/rejected": -2.565391778945923, "step": 4228 }, { "epoch": 0.49, "learning_rate": 1.5473012873508915e-07, "logits/chosen": -2.8094186782836914, "logits/rejected": -2.8581786155700684, "logps/chosen": -147.36273193359375, "logps/rejected": -115.0368881225586, "loss": 0.4565, "rewards/accuracies": 0.875, "rewards/chosen": -0.014792539179325104, "rewards/margins": 1.1600532531738281, "rewards/rejected": -1.1748459339141846, "step": 4229 }, { "epoch": 0.49, "learning_rate": 1.546946970591709e-07, "logits/chosen": -2.225023031234741, "logits/rejected": -1.918619155883789, "logps/chosen": -170.2281951904297, "logps/rejected": -236.80023193359375, "loss": 0.3079, "rewards/accuracies": 0.875, "rewards/chosen": -1.0115596055984497, "rewards/margins": 1.7705206871032715, "rewards/rejected": -2.7820801734924316, "step": 4230 }, { "epoch": 0.49, "learning_rate": 1.5465926538325262e-07, "logits/chosen": -2.6455230712890625, "logits/rejected": -2.620805263519287, "logps/chosen": -242.54000854492188, "logps/rejected": -371.3762512207031, "loss": 0.4212, "rewards/accuracies": 0.875, "rewards/chosen": -1.157359004020691, "rewards/margins": 1.5229735374450684, "rewards/rejected": -2.6803324222564697, "step": 4231 }, { "epoch": 0.49, "learning_rate": 1.5462383370733434e-07, "logits/chosen": -2.147968292236328, "logits/rejected": -2.2863383293151855, "logps/chosen": -489.2957763671875, "logps/rejected": -332.1770935058594, "loss": 0.3851, "rewards/accuracies": 0.875, "rewards/chosen": -1.0014922618865967, "rewards/margins": 1.895164966583252, "rewards/rejected": -2.8966572284698486, "step": 4232 }, { "epoch": 0.49, "learning_rate": 1.5458840203141606e-07, "logits/chosen": -2.0664100646972656, "logits/rejected": -2.5880985260009766, "logps/chosen": -236.2112579345703, "logps/rejected": -217.5749053955078, "loss": 0.2151, "rewards/accuracies": 1.0, "rewards/chosen": -0.22217904031276703, "rewards/margins": 2.874112844467163, "rewards/rejected": -3.0962917804718018, "step": 4233 }, { "epoch": 0.49, "learning_rate": 1.545529703554978e-07, "logits/chosen": -2.154740810394287, "logits/rejected": -2.2495694160461426, "logps/chosen": -223.77037048339844, "logps/rejected": -263.9287109375, "loss": 0.1402, "rewards/accuracies": 0.875, "rewards/chosen": -0.31739386916160583, "rewards/margins": 3.3459737300872803, "rewards/rejected": -3.663367509841919, "step": 4234 }, { "epoch": 0.49, "learning_rate": 1.5451753867957956e-07, "logits/chosen": -1.8370375633239746, "logits/rejected": -1.6753730773925781, "logps/chosen": -233.3326873779297, "logps/rejected": -267.75634765625, "loss": 0.283, "rewards/accuracies": 0.75, "rewards/chosen": -0.9684141874313354, "rewards/margins": 2.9151928424835205, "rewards/rejected": -3.8836069107055664, "step": 4235 }, { "epoch": 0.49, "learning_rate": 1.5448210700366128e-07, "logits/chosen": -2.000152587890625, "logits/rejected": -2.1005642414093018, "logps/chosen": -285.3634338378906, "logps/rejected": -356.9669494628906, "loss": 0.2536, "rewards/accuracies": 0.75, "rewards/chosen": -0.7302247285842896, "rewards/margins": 3.0643539428710938, "rewards/rejected": -3.7945785522460938, "step": 4236 }, { "epoch": 0.49, "learning_rate": 1.54446675327743e-07, "logits/chosen": -1.9809647798538208, "logits/rejected": -2.2148256301879883, "logps/chosen": -403.2225341796875, "logps/rejected": -360.6706848144531, "loss": 0.4555, "rewards/accuracies": 0.75, "rewards/chosen": -0.049302101135253906, "rewards/margins": 1.2935963869094849, "rewards/rejected": -1.3428986072540283, "step": 4237 }, { "epoch": 0.49, "learning_rate": 1.5441124365182473e-07, "logits/chosen": -2.1211373805999756, "logits/rejected": -2.3110873699188232, "logps/chosen": -296.46356201171875, "logps/rejected": -260.5908508300781, "loss": 0.3092, "rewards/accuracies": 1.0, "rewards/chosen": -0.8240818977355957, "rewards/margins": 1.2839206457138062, "rewards/rejected": -2.1080026626586914, "step": 4238 }, { "epoch": 0.49, "learning_rate": 1.5437581197590645e-07, "logits/chosen": -2.1035172939300537, "logits/rejected": -2.0641393661499023, "logps/chosen": -356.18536376953125, "logps/rejected": -260.9963073730469, "loss": 0.6869, "rewards/accuracies": 0.75, "rewards/chosen": -0.8862773180007935, "rewards/margins": 1.2397949695587158, "rewards/rejected": -2.1260721683502197, "step": 4239 }, { "epoch": 0.49, "learning_rate": 1.5434038029998817e-07, "logits/chosen": -2.2825767993927, "logits/rejected": -1.6988734006881714, "logps/chosen": -344.5052795410156, "logps/rejected": -399.63690185546875, "loss": 0.4786, "rewards/accuracies": 0.875, "rewards/chosen": -1.2615984678268433, "rewards/margins": 1.814473032951355, "rewards/rejected": -3.0760715007781982, "step": 4240 }, { "epoch": 0.49, "learning_rate": 1.5430494862406992e-07, "logits/chosen": -2.268984794616699, "logits/rejected": -2.490394115447998, "logps/chosen": -345.7473449707031, "logps/rejected": -179.95140075683594, "loss": 0.2446, "rewards/accuracies": 1.0, "rewards/chosen": -0.34967711567878723, "rewards/margins": 2.02929425239563, "rewards/rejected": -2.3789713382720947, "step": 4241 }, { "epoch": 0.49, "learning_rate": 1.5426951694815164e-07, "logits/chosen": -2.028651714324951, "logits/rejected": -1.8188753128051758, "logps/chosen": -318.8696594238281, "logps/rejected": -366.59368896484375, "loss": 0.6116, "rewards/accuracies": 0.5, "rewards/chosen": -0.06175708770751953, "rewards/margins": 0.4544529914855957, "rewards/rejected": -0.5162100791931152, "step": 4242 }, { "epoch": 0.49, "learning_rate": 1.5423408527223337e-07, "logits/chosen": -1.949172019958496, "logits/rejected": -2.052313804626465, "logps/chosen": -155.8046875, "logps/rejected": -164.6605682373047, "loss": 0.316, "rewards/accuracies": 1.0, "rewards/chosen": -0.4880596995353699, "rewards/margins": 1.4798177480697632, "rewards/rejected": -1.9678773880004883, "step": 4243 }, { "epoch": 0.49, "learning_rate": 1.541986535963151e-07, "logits/chosen": -2.3810529708862305, "logits/rejected": -2.328674077987671, "logps/chosen": -164.00653076171875, "logps/rejected": -309.40423583984375, "loss": 0.2108, "rewards/accuracies": 0.875, "rewards/chosen": -1.5997473001480103, "rewards/margins": 3.881175994873047, "rewards/rejected": -5.480923175811768, "step": 4244 }, { "epoch": 0.49, "learning_rate": 1.541632219203968e-07, "logits/chosen": -2.868957996368408, "logits/rejected": -2.790341854095459, "logps/chosen": -196.22256469726562, "logps/rejected": -313.3121032714844, "loss": 0.7041, "rewards/accuracies": 0.625, "rewards/chosen": -1.494431495666504, "rewards/margins": 2.2776315212249756, "rewards/rejected": -3.7720630168914795, "step": 4245 }, { "epoch": 0.49, "learning_rate": 1.5412779024447858e-07, "logits/chosen": -2.7504994869232178, "logits/rejected": -2.8103044033050537, "logps/chosen": -220.88858032226562, "logps/rejected": -320.0758972167969, "loss": 0.1679, "rewards/accuracies": 1.0, "rewards/chosen": -0.2829967141151428, "rewards/margins": 2.1074719429016113, "rewards/rejected": -2.3904685974121094, "step": 4246 }, { "epoch": 0.49, "learning_rate": 1.540923585685603e-07, "logits/chosen": -2.6229262351989746, "logits/rejected": -2.5707273483276367, "logps/chosen": -241.9980926513672, "logps/rejected": -267.77801513671875, "loss": 0.2288, "rewards/accuracies": 0.875, "rewards/chosen": -0.5068312287330627, "rewards/margins": 3.1791698932647705, "rewards/rejected": -3.6860010623931885, "step": 4247 }, { "epoch": 0.49, "learning_rate": 1.5405692689264203e-07, "logits/chosen": -2.339515209197998, "logits/rejected": -2.3078110218048096, "logps/chosen": -315.0824279785156, "logps/rejected": -359.0888671875, "loss": 0.6425, "rewards/accuracies": 0.625, "rewards/chosen": -1.257819652557373, "rewards/margins": 0.9051704406738281, "rewards/rejected": -2.162990093231201, "step": 4248 }, { "epoch": 0.49, "learning_rate": 1.5402149521672375e-07, "logits/chosen": -2.543381452560425, "logits/rejected": -2.650830030441284, "logps/chosen": -246.1514892578125, "logps/rejected": -257.54052734375, "loss": 0.2973, "rewards/accuracies": 1.0, "rewards/chosen": -0.5963013172149658, "rewards/margins": 2.495551109313965, "rewards/rejected": -3.0918524265289307, "step": 4249 }, { "epoch": 0.49, "learning_rate": 1.5398606354080547e-07, "logits/chosen": -2.1643595695495605, "logits/rejected": -2.356955051422119, "logps/chosen": -295.4329833984375, "logps/rejected": -198.77330017089844, "loss": 0.8915, "rewards/accuracies": 0.625, "rewards/chosen": -1.2637898921966553, "rewards/margins": 0.5414870381355286, "rewards/rejected": -1.8052769899368286, "step": 4250 }, { "epoch": 0.49, "learning_rate": 1.539506318648872e-07, "logits/chosen": -2.211146354675293, "logits/rejected": -2.2021608352661133, "logps/chosen": -102.38069152832031, "logps/rejected": -225.507568359375, "loss": 0.5021, "rewards/accuracies": 0.875, "rewards/chosen": -1.6190791130065918, "rewards/margins": 2.0115549564361572, "rewards/rejected": -3.630634069442749, "step": 4251 }, { "epoch": 0.49, "learning_rate": 1.5391520018896892e-07, "logits/chosen": -1.987952709197998, "logits/rejected": -1.972144365310669, "logps/chosen": -251.96258544921875, "logps/rejected": -364.8106689453125, "loss": 0.2861, "rewards/accuracies": 0.875, "rewards/chosen": -1.1091543436050415, "rewards/margins": 2.4591972827911377, "rewards/rejected": -3.5683517456054688, "step": 4252 }, { "epoch": 0.49, "learning_rate": 1.5387976851305067e-07, "logits/chosen": -2.130178451538086, "logits/rejected": -2.0667145252227783, "logps/chosen": -268.03118896484375, "logps/rejected": -377.71929931640625, "loss": 0.2815, "rewards/accuracies": 0.875, "rewards/chosen": -0.8018983602523804, "rewards/margins": 2.1182358264923096, "rewards/rejected": -2.9201340675354004, "step": 4253 }, { "epoch": 0.49, "learning_rate": 1.538443368371324e-07, "logits/chosen": -1.9856798648834229, "logits/rejected": -2.466815948486328, "logps/chosen": -427.8575134277344, "logps/rejected": -270.6650390625, "loss": 0.1404, "rewards/accuracies": 1.0, "rewards/chosen": -0.02171120047569275, "rewards/margins": 2.6592845916748047, "rewards/rejected": -2.6809957027435303, "step": 4254 }, { "epoch": 0.49, "learning_rate": 1.538089051612141e-07, "logits/chosen": -2.3740897178649902, "logits/rejected": -2.353344202041626, "logps/chosen": -217.17514038085938, "logps/rejected": -310.5447998046875, "loss": 0.2201, "rewards/accuracies": 1.0, "rewards/chosen": -1.5423228740692139, "rewards/margins": 2.296145439147949, "rewards/rejected": -3.838468313217163, "step": 4255 }, { "epoch": 0.5, "learning_rate": 1.5377347348529583e-07, "logits/chosen": -1.9890837669372559, "logits/rejected": -1.9718835353851318, "logps/chosen": -411.16162109375, "logps/rejected": -361.01019287109375, "loss": 0.4568, "rewards/accuracies": 0.75, "rewards/chosen": -1.001009225845337, "rewards/margins": 1.7641944885253906, "rewards/rejected": -2.7652037143707275, "step": 4256 }, { "epoch": 0.5, "learning_rate": 1.5373804180937755e-07, "logits/chosen": -1.9421712160110474, "logits/rejected": -1.934262752532959, "logps/chosen": -340.1602783203125, "logps/rejected": -348.1224365234375, "loss": 0.3906, "rewards/accuracies": 0.875, "rewards/chosen": -1.1256228685379028, "rewards/margins": 1.5955533981323242, "rewards/rejected": -2.7211761474609375, "step": 4257 }, { "epoch": 0.5, "learning_rate": 1.5370261013345933e-07, "logits/chosen": -2.2184653282165527, "logits/rejected": -2.3386740684509277, "logps/chosen": -181.63800048828125, "logps/rejected": -248.48541259765625, "loss": 0.2732, "rewards/accuracies": 1.0, "rewards/chosen": -0.6168058514595032, "rewards/margins": 1.8257722854614258, "rewards/rejected": -2.442578077316284, "step": 4258 }, { "epoch": 0.5, "learning_rate": 1.5366717845754105e-07, "logits/chosen": -2.403015613555908, "logits/rejected": -2.0070316791534424, "logps/chosen": -289.396484375, "logps/rejected": -358.3860778808594, "loss": 0.1974, "rewards/accuracies": 0.875, "rewards/chosen": -0.5178790092468262, "rewards/margins": 2.545314311981201, "rewards/rejected": -3.0631933212280273, "step": 4259 }, { "epoch": 0.5, "learning_rate": 1.5363174678162277e-07, "logits/chosen": -2.5418899059295654, "logits/rejected": -2.352541208267212, "logps/chosen": -319.2630920410156, "logps/rejected": -313.2183532714844, "loss": 0.2209, "rewards/accuracies": 0.875, "rewards/chosen": -0.4045635461807251, "rewards/margins": 3.0415146350860596, "rewards/rejected": -3.446078300476074, "step": 4260 }, { "epoch": 0.5, "learning_rate": 1.535963151057045e-07, "logits/chosen": -2.1589102745056152, "logits/rejected": -2.0008838176727295, "logps/chosen": -375.5217590332031, "logps/rejected": -358.9851989746094, "loss": 0.4773, "rewards/accuracies": 0.75, "rewards/chosen": -0.6098263263702393, "rewards/margins": 1.4764554500579834, "rewards/rejected": -2.0862817764282227, "step": 4261 }, { "epoch": 0.5, "learning_rate": 1.5356088342978622e-07, "logits/chosen": -2.1733298301696777, "logits/rejected": -2.0634188652038574, "logps/chosen": -229.7484130859375, "logps/rejected": -291.6719055175781, "loss": 0.4286, "rewards/accuracies": 0.75, "rewards/chosen": -1.3729580640792847, "rewards/margins": 1.4149776697158813, "rewards/rejected": -2.787935733795166, "step": 4262 }, { "epoch": 0.5, "learning_rate": 1.5352545175386794e-07, "logits/chosen": -2.0859203338623047, "logits/rejected": -2.4530980587005615, "logps/chosen": -251.95108032226562, "logps/rejected": -252.70628356933594, "loss": 0.6782, "rewards/accuracies": 0.75, "rewards/chosen": -0.5641568899154663, "rewards/margins": 1.3612463474273682, "rewards/rejected": -1.9254032373428345, "step": 4263 }, { "epoch": 0.5, "learning_rate": 1.534900200779497e-07, "logits/chosen": -2.016542434692383, "logits/rejected": -2.209221601486206, "logps/chosen": -214.40408325195312, "logps/rejected": -244.93080139160156, "loss": 1.1063, "rewards/accuracies": 0.625, "rewards/chosen": -1.9619942903518677, "rewards/margins": 0.07737934589385986, "rewards/rejected": -2.0393736362457275, "step": 4264 }, { "epoch": 0.5, "learning_rate": 1.534545884020314e-07, "logits/chosen": -2.290271043777466, "logits/rejected": -2.236297607421875, "logps/chosen": -267.8720397949219, "logps/rejected": -325.80584716796875, "loss": 0.1913, "rewards/accuracies": 1.0, "rewards/chosen": -0.17568451166152954, "rewards/margins": 2.413940906524658, "rewards/rejected": -2.589625358581543, "step": 4265 }, { "epoch": 0.5, "learning_rate": 1.5341915672611313e-07, "logits/chosen": -2.362647533416748, "logits/rejected": -2.214596748352051, "logps/chosen": -406.8388671875, "logps/rejected": -354.97894287109375, "loss": 0.3288, "rewards/accuracies": 0.75, "rewards/chosen": -1.1725614070892334, "rewards/margins": 2.5254924297332764, "rewards/rejected": -3.698054075241089, "step": 4266 }, { "epoch": 0.5, "learning_rate": 1.5338372505019486e-07, "logits/chosen": -2.920297861099243, "logits/rejected": -2.980464458465576, "logps/chosen": -157.0888214111328, "logps/rejected": -225.21641540527344, "loss": 0.2012, "rewards/accuracies": 0.875, "rewards/chosen": -0.5910956859588623, "rewards/margins": 2.5340590476989746, "rewards/rejected": -3.125154495239258, "step": 4267 }, { "epoch": 0.5, "learning_rate": 1.5334829337427658e-07, "logits/chosen": -2.490616798400879, "logits/rejected": -2.14863657951355, "logps/chosen": -233.7051239013672, "logps/rejected": -330.0008544921875, "loss": 0.2783, "rewards/accuracies": 0.875, "rewards/chosen": -1.1076128482818604, "rewards/margins": 3.4714250564575195, "rewards/rejected": -4.579038143157959, "step": 4268 }, { "epoch": 0.5, "learning_rate": 1.533128616983583e-07, "logits/chosen": -2.8016772270202637, "logits/rejected": -2.7487833499908447, "logps/chosen": -246.37582397460938, "logps/rejected": -213.29348754882812, "loss": 0.651, "rewards/accuracies": 0.625, "rewards/chosen": -1.8931890726089478, "rewards/margins": 0.5293000936508179, "rewards/rejected": -2.4224891662597656, "step": 4269 }, { "epoch": 0.5, "learning_rate": 1.5327743002244007e-07, "logits/chosen": -2.752962112426758, "logits/rejected": -2.6458139419555664, "logps/chosen": -211.2667999267578, "logps/rejected": -336.0350341796875, "loss": 0.1565, "rewards/accuracies": 1.0, "rewards/chosen": -0.8676900267601013, "rewards/margins": 3.5591137409210205, "rewards/rejected": -4.4268035888671875, "step": 4270 }, { "epoch": 0.5, "learning_rate": 1.532419983465218e-07, "logits/chosen": -2.3950929641723633, "logits/rejected": -2.710789918899536, "logps/chosen": -198.43841552734375, "logps/rejected": -225.40200805664062, "loss": 0.1764, "rewards/accuracies": 1.0, "rewards/chosen": -0.6378539204597473, "rewards/margins": 2.739706039428711, "rewards/rejected": -3.3775601387023926, "step": 4271 }, { "epoch": 0.5, "learning_rate": 1.5320656667060352e-07, "logits/chosen": -2.662505626678467, "logits/rejected": -2.634950637817383, "logps/chosen": -206.844482421875, "logps/rejected": -257.8430480957031, "loss": 0.2815, "rewards/accuracies": 0.875, "rewards/chosen": -0.48766660690307617, "rewards/margins": 2.2955124378204346, "rewards/rejected": -2.78317928314209, "step": 4272 }, { "epoch": 0.5, "learning_rate": 1.5317113499468524e-07, "logits/chosen": -2.4974470138549805, "logits/rejected": -2.734520435333252, "logps/chosen": -296.371826171875, "logps/rejected": -186.68031311035156, "loss": 0.2344, "rewards/accuracies": 1.0, "rewards/chosen": -0.6894539594650269, "rewards/margins": 1.7265090942382812, "rewards/rejected": -2.4159629344940186, "step": 4273 }, { "epoch": 0.5, "learning_rate": 1.5313570331876696e-07, "logits/chosen": -2.2007408142089844, "logits/rejected": -2.4097177982330322, "logps/chosen": -382.18585205078125, "logps/rejected": -311.4286804199219, "loss": 1.0448, "rewards/accuracies": 0.625, "rewards/chosen": -1.0980472564697266, "rewards/margins": 2.0328850746154785, "rewards/rejected": -3.130932331085205, "step": 4274 }, { "epoch": 0.5, "learning_rate": 1.531002716428487e-07, "logits/chosen": -1.8832228183746338, "logits/rejected": -1.780751347541809, "logps/chosen": -309.95599365234375, "logps/rejected": -387.327392578125, "loss": 0.5056, "rewards/accuracies": 0.625, "rewards/chosen": -1.1864901781082153, "rewards/margins": 2.0453972816467285, "rewards/rejected": -3.2318875789642334, "step": 4275 }, { "epoch": 0.5, "learning_rate": 1.5306483996693043e-07, "logits/chosen": -2.6675634384155273, "logits/rejected": -2.880091905593872, "logps/chosen": -301.7516174316406, "logps/rejected": -199.12220764160156, "loss": 0.2024, "rewards/accuracies": 1.0, "rewards/chosen": -0.7444517612457275, "rewards/margins": 2.3038105964660645, "rewards/rejected": -3.048262596130371, "step": 4276 }, { "epoch": 0.5, "learning_rate": 1.5302940829101216e-07, "logits/chosen": -2.2409582138061523, "logits/rejected": -2.323681354522705, "logps/chosen": -318.89453125, "logps/rejected": -600.5203247070312, "loss": 0.1329, "rewards/accuracies": 1.0, "rewards/chosen": -0.9868684411048889, "rewards/margins": 3.8234286308288574, "rewards/rejected": -4.810297012329102, "step": 4277 }, { "epoch": 0.5, "learning_rate": 1.5299397661509388e-07, "logits/chosen": -2.3955976963043213, "logits/rejected": -2.700786590576172, "logps/chosen": -288.8055114746094, "logps/rejected": -182.29733276367188, "loss": 0.8295, "rewards/accuracies": 0.625, "rewards/chosen": -1.8316454887390137, "rewards/margins": 0.45133861899375916, "rewards/rejected": -2.2829842567443848, "step": 4278 }, { "epoch": 0.5, "learning_rate": 1.529585449391756e-07, "logits/chosen": -2.7779595851898193, "logits/rejected": -2.877051830291748, "logps/chosen": -260.39215087890625, "logps/rejected": -281.03155517578125, "loss": 0.4675, "rewards/accuracies": 0.625, "rewards/chosen": -1.139236569404602, "rewards/margins": 1.7268693447113037, "rewards/rejected": -2.866105794906616, "step": 4279 }, { "epoch": 0.5, "learning_rate": 1.5292311326325732e-07, "logits/chosen": -2.23439359664917, "logits/rejected": -2.4839017391204834, "logps/chosen": -236.51361083984375, "logps/rejected": -312.40667724609375, "loss": 0.3296, "rewards/accuracies": 0.875, "rewards/chosen": -1.3737927675247192, "rewards/margins": 3.157532215118408, "rewards/rejected": -4.531325340270996, "step": 4280 }, { "epoch": 0.5, "learning_rate": 1.528876815873391e-07, "logits/chosen": -1.8599714040756226, "logits/rejected": -2.1752562522888184, "logps/chosen": -378.04608154296875, "logps/rejected": -410.76947021484375, "loss": 0.6835, "rewards/accuracies": 0.625, "rewards/chosen": -1.2734568119049072, "rewards/margins": 0.7233983278274536, "rewards/rejected": -1.9968551397323608, "step": 4281 }, { "epoch": 0.5, "learning_rate": 1.5285224991142082e-07, "logits/chosen": -2.559166193008423, "logits/rejected": -2.6843860149383545, "logps/chosen": -154.65261840820312, "logps/rejected": -276.34112548828125, "loss": 0.3374, "rewards/accuracies": 0.75, "rewards/chosen": -0.5990694761276245, "rewards/margins": 3.390658378601074, "rewards/rejected": -3.9897279739379883, "step": 4282 }, { "epoch": 0.5, "learning_rate": 1.5281681823550254e-07, "logits/chosen": -2.544830799102783, "logits/rejected": -2.8360440731048584, "logps/chosen": -294.1865539550781, "logps/rejected": -204.4533233642578, "loss": 0.2522, "rewards/accuracies": 0.875, "rewards/chosen": -0.8448627591133118, "rewards/margins": 2.4339609146118164, "rewards/rejected": -3.2788236141204834, "step": 4283 }, { "epoch": 0.5, "learning_rate": 1.5278138655958426e-07, "logits/chosen": -2.765310049057007, "logits/rejected": -2.611494541168213, "logps/chosen": -355.063232421875, "logps/rejected": -397.748046875, "loss": 0.2973, "rewards/accuracies": 0.75, "rewards/chosen": -1.2748652696609497, "rewards/margins": 2.362043857574463, "rewards/rejected": -3.6369094848632812, "step": 4284 }, { "epoch": 0.5, "learning_rate": 1.5274595488366599e-07, "logits/chosen": -1.9682056903839111, "logits/rejected": -1.7485270500183105, "logps/chosen": -248.74639892578125, "logps/rejected": -302.92681884765625, "loss": 0.3395, "rewards/accuracies": 0.75, "rewards/chosen": -1.1154016256332397, "rewards/margins": 1.7855627536773682, "rewards/rejected": -2.9009644985198975, "step": 4285 }, { "epoch": 0.5, "learning_rate": 1.527105232077477e-07, "logits/chosen": -1.521147608757019, "logits/rejected": -1.682644009590149, "logps/chosen": -415.6841735839844, "logps/rejected": -354.0915222167969, "loss": 0.2539, "rewards/accuracies": 1.0, "rewards/chosen": -1.2118538618087769, "rewards/margins": 2.22906756401062, "rewards/rejected": -3.4409215450286865, "step": 4286 }, { "epoch": 0.5, "learning_rate": 1.5267509153182946e-07, "logits/chosen": -1.7822232246398926, "logits/rejected": -2.1787190437316895, "logps/chosen": -442.8018798828125, "logps/rejected": -217.33682250976562, "loss": 0.5106, "rewards/accuracies": 0.75, "rewards/chosen": -0.1929667592048645, "rewards/margins": 1.4088464975357056, "rewards/rejected": -1.6018133163452148, "step": 4287 }, { "epoch": 0.5, "learning_rate": 1.5263965985591118e-07, "logits/chosen": -2.2087419033050537, "logits/rejected": -2.0138778686523438, "logps/chosen": -293.10589599609375, "logps/rejected": -243.96463012695312, "loss": 0.4307, "rewards/accuracies": 0.625, "rewards/chosen": -0.6133863925933838, "rewards/margins": 1.2432156801223755, "rewards/rejected": -1.8566019535064697, "step": 4288 }, { "epoch": 0.5, "learning_rate": 1.526042281799929e-07, "logits/chosen": -2.241809606552124, "logits/rejected": -2.357722520828247, "logps/chosen": -194.8080596923828, "logps/rejected": -257.8428039550781, "loss": 0.4751, "rewards/accuracies": 0.75, "rewards/chosen": -1.1622743606567383, "rewards/margins": 2.955214262008667, "rewards/rejected": -4.117488861083984, "step": 4289 }, { "epoch": 0.5, "learning_rate": 1.5256879650407462e-07, "logits/chosen": -2.113306999206543, "logits/rejected": -2.2567076683044434, "logps/chosen": -385.4450988769531, "logps/rejected": -252.20614624023438, "loss": 0.4274, "rewards/accuracies": 0.75, "rewards/chosen": -0.6423571705818176, "rewards/margins": 1.3067986965179443, "rewards/rejected": -1.9491558074951172, "step": 4290 }, { "epoch": 0.5, "learning_rate": 1.5253336482815635e-07, "logits/chosen": -2.92417573928833, "logits/rejected": -2.781428813934326, "logps/chosen": -370.0450744628906, "logps/rejected": -304.86083984375, "loss": 0.3689, "rewards/accuracies": 0.75, "rewards/chosen": -1.5746009349822998, "rewards/margins": 3.243821620941162, "rewards/rejected": -4.818422317504883, "step": 4291 }, { "epoch": 0.5, "learning_rate": 1.5249793315223807e-07, "logits/chosen": -2.130568027496338, "logits/rejected": -2.474905014038086, "logps/chosen": -342.45050048828125, "logps/rejected": -217.0756378173828, "loss": 0.6094, "rewards/accuracies": 0.5, "rewards/chosen": -1.0228044986724854, "rewards/margins": 1.1828935146331787, "rewards/rejected": -2.205698013305664, "step": 4292 }, { "epoch": 0.5, "learning_rate": 1.5246250147631984e-07, "logits/chosen": -2.3553173542022705, "logits/rejected": -2.137087821960449, "logps/chosen": -278.73541259765625, "logps/rejected": -270.9977111816406, "loss": 0.1765, "rewards/accuracies": 1.0, "rewards/chosen": -1.2739839553833008, "rewards/margins": 3.074730157852173, "rewards/rejected": -4.3487138748168945, "step": 4293 }, { "epoch": 0.5, "learning_rate": 1.5242706980040156e-07, "logits/chosen": -2.162520408630371, "logits/rejected": -1.8134863376617432, "logps/chosen": -192.6844482421875, "logps/rejected": -314.5346374511719, "loss": 0.2077, "rewards/accuracies": 0.875, "rewards/chosen": -0.3970511555671692, "rewards/margins": 3.701812267303467, "rewards/rejected": -4.09886360168457, "step": 4294 }, { "epoch": 0.5, "learning_rate": 1.5239163812448329e-07, "logits/chosen": -2.7187843322753906, "logits/rejected": -2.7966771125793457, "logps/chosen": -209.6675262451172, "logps/rejected": -219.25469970703125, "loss": 0.408, "rewards/accuracies": 0.75, "rewards/chosen": -0.9427275657653809, "rewards/margins": 1.8578760623931885, "rewards/rejected": -2.8006036281585693, "step": 4295 }, { "epoch": 0.5, "learning_rate": 1.52356206448565e-07, "logits/chosen": -2.3302390575408936, "logits/rejected": -2.1605334281921387, "logps/chosen": -111.98717498779297, "logps/rejected": -207.28973388671875, "loss": 0.2261, "rewards/accuracies": 0.875, "rewards/chosen": -0.5030367970466614, "rewards/margins": 2.518113851547241, "rewards/rejected": -3.0211503505706787, "step": 4296 }, { "epoch": 0.5, "learning_rate": 1.5232077477264673e-07, "logits/chosen": -2.2052013874053955, "logits/rejected": -2.083253860473633, "logps/chosen": -113.82157897949219, "logps/rejected": -227.1693115234375, "loss": 0.2351, "rewards/accuracies": 0.875, "rewards/chosen": -0.15822666883468628, "rewards/margins": 3.32332181930542, "rewards/rejected": -3.48154878616333, "step": 4297 }, { "epoch": 0.5, "learning_rate": 1.5228534309672848e-07, "logits/chosen": -2.181292772293091, "logits/rejected": -1.952545404434204, "logps/chosen": -142.2515411376953, "logps/rejected": -185.93263244628906, "loss": 0.3966, "rewards/accuracies": 0.875, "rewards/chosen": -0.958098292350769, "rewards/margins": 2.0660760402679443, "rewards/rejected": -3.024174451828003, "step": 4298 }, { "epoch": 0.5, "learning_rate": 1.522499114208102e-07, "logits/chosen": -2.4294776916503906, "logits/rejected": -2.37638258934021, "logps/chosen": -154.66912841796875, "logps/rejected": -226.6961669921875, "loss": 0.7055, "rewards/accuracies": 0.625, "rewards/chosen": -1.5065608024597168, "rewards/margins": 2.1269843578338623, "rewards/rejected": -3.633544921875, "step": 4299 }, { "epoch": 0.5, "learning_rate": 1.5221447974489192e-07, "logits/chosen": -2.714020013809204, "logits/rejected": -2.652264356613159, "logps/chosen": -299.5177001953125, "logps/rejected": -310.5702819824219, "loss": 0.2176, "rewards/accuracies": 0.875, "rewards/chosen": -0.32908204197883606, "rewards/margins": 2.784719467163086, "rewards/rejected": -3.1138014793395996, "step": 4300 }, { "epoch": 0.5, "learning_rate": 1.5217904806897365e-07, "logits/chosen": -2.453956127166748, "logits/rejected": -2.624359607696533, "logps/chosen": -331.8744201660156, "logps/rejected": -269.48162841796875, "loss": 0.2403, "rewards/accuracies": 0.75, "rewards/chosen": -0.8841144442558289, "rewards/margins": 2.9946963787078857, "rewards/rejected": -3.8788108825683594, "step": 4301 }, { "epoch": 0.5, "learning_rate": 1.5214361639305537e-07, "logits/chosen": -2.4486608505249023, "logits/rejected": -2.586564540863037, "logps/chosen": -411.9715881347656, "logps/rejected": -282.53778076171875, "loss": 0.4723, "rewards/accuracies": 0.875, "rewards/chosen": -0.4530561864376068, "rewards/margins": 1.5842235088348389, "rewards/rejected": -2.0372796058654785, "step": 4302 }, { "epoch": 0.5, "learning_rate": 1.521081847171371e-07, "logits/chosen": -2.1105005741119385, "logits/rejected": -2.1378371715545654, "logps/chosen": -321.71551513671875, "logps/rejected": -283.59991455078125, "loss": 0.3345, "rewards/accuracies": 0.875, "rewards/chosen": -0.8658463954925537, "rewards/margins": 1.7376617193222046, "rewards/rejected": -2.603508234024048, "step": 4303 }, { "epoch": 0.5, "learning_rate": 1.5207275304121884e-07, "logits/chosen": -2.7593157291412354, "logits/rejected": -2.7036802768707275, "logps/chosen": -353.3402099609375, "logps/rejected": -270.9657897949219, "loss": 0.4833, "rewards/accuracies": 0.875, "rewards/chosen": -0.824008584022522, "rewards/margins": 3.386936664581299, "rewards/rejected": -4.210945129394531, "step": 4304 }, { "epoch": 0.5, "learning_rate": 1.520373213653006e-07, "logits/chosen": -2.701078414916992, "logits/rejected": -2.4819047451019287, "logps/chosen": -403.67633056640625, "logps/rejected": -249.9696044921875, "loss": 0.4415, "rewards/accuracies": 0.75, "rewards/chosen": -0.8156898021697998, "rewards/margins": 1.8898921012878418, "rewards/rejected": -2.7055821418762207, "step": 4305 }, { "epoch": 0.5, "learning_rate": 1.520018896893823e-07, "logits/chosen": -1.8966426849365234, "logits/rejected": -2.2716963291168213, "logps/chosen": -561.7974853515625, "logps/rejected": -248.41207885742188, "loss": 0.6988, "rewards/accuracies": 0.625, "rewards/chosen": -1.078560471534729, "rewards/margins": 1.3422222137451172, "rewards/rejected": -2.4207825660705566, "step": 4306 }, { "epoch": 0.5, "learning_rate": 1.5196645801346403e-07, "logits/chosen": -2.3306541442871094, "logits/rejected": -2.763977527618408, "logps/chosen": -325.7136535644531, "logps/rejected": -264.84893798828125, "loss": 0.0761, "rewards/accuracies": 1.0, "rewards/chosen": -0.9100849628448486, "rewards/margins": 5.037590980529785, "rewards/rejected": -5.947675704956055, "step": 4307 }, { "epoch": 0.5, "learning_rate": 1.5193102633754575e-07, "logits/chosen": -1.6247892379760742, "logits/rejected": -1.7137699127197266, "logps/chosen": -417.07354736328125, "logps/rejected": -409.0220947265625, "loss": 0.2535, "rewards/accuracies": 0.875, "rewards/chosen": -0.4209728240966797, "rewards/margins": 2.9849705696105957, "rewards/rejected": -3.4059433937072754, "step": 4308 }, { "epoch": 0.5, "learning_rate": 1.518955946616275e-07, "logits/chosen": -2.698357343673706, "logits/rejected": -2.264911413192749, "logps/chosen": -358.6046142578125, "logps/rejected": -266.30230712890625, "loss": 0.2344, "rewards/accuracies": 0.875, "rewards/chosen": -0.6404500603675842, "rewards/margins": 3.1649279594421387, "rewards/rejected": -3.805377960205078, "step": 4309 }, { "epoch": 0.5, "learning_rate": 1.5186016298570922e-07, "logits/chosen": -2.084332227706909, "logits/rejected": -2.0502638816833496, "logps/chosen": -247.8494873046875, "logps/rejected": -284.5571594238281, "loss": 0.6324, "rewards/accuracies": 0.625, "rewards/chosen": -1.420001745223999, "rewards/margins": 0.6811508536338806, "rewards/rejected": -2.1011524200439453, "step": 4310 }, { "epoch": 0.5, "learning_rate": 1.5182473130979095e-07, "logits/chosen": -2.403601884841919, "logits/rejected": -2.3329012393951416, "logps/chosen": -211.25611877441406, "logps/rejected": -217.35263061523438, "loss": 0.6067, "rewards/accuracies": 0.625, "rewards/chosen": -1.3527064323425293, "rewards/margins": 1.0846030712127686, "rewards/rejected": -2.437309741973877, "step": 4311 }, { "epoch": 0.5, "learning_rate": 1.5178929963387267e-07, "logits/chosen": -2.3866121768951416, "logits/rejected": -2.626359224319458, "logps/chosen": -329.42840576171875, "logps/rejected": -222.88406372070312, "loss": 0.0971, "rewards/accuracies": 1.0, "rewards/chosen": -0.04774979501962662, "rewards/margins": 3.0765109062194824, "rewards/rejected": -3.124260425567627, "step": 4312 }, { "epoch": 0.5, "learning_rate": 1.517538679579544e-07, "logits/chosen": -2.5660974979400635, "logits/rejected": -2.4182331562042236, "logps/chosen": -186.1149139404297, "logps/rejected": -166.7367401123047, "loss": 0.3587, "rewards/accuracies": 1.0, "rewards/chosen": -0.5715744495391846, "rewards/margins": 1.1570802927017212, "rewards/rejected": -1.7286546230316162, "step": 4313 }, { "epoch": 0.5, "learning_rate": 1.517184362820361e-07, "logits/chosen": -1.7928569316864014, "logits/rejected": -2.027052402496338, "logps/chosen": -167.12301635742188, "logps/rejected": -238.40406799316406, "loss": 0.3288, "rewards/accuracies": 0.875, "rewards/chosen": -0.8394443392753601, "rewards/margins": 2.688851833343506, "rewards/rejected": -3.5282962322235107, "step": 4314 }, { "epoch": 0.5, "learning_rate": 1.5168300460611783e-07, "logits/chosen": -2.561551094055176, "logits/rejected": -2.8854410648345947, "logps/chosen": -488.9709777832031, "logps/rejected": -257.8424377441406, "loss": 0.3089, "rewards/accuracies": 0.75, "rewards/chosen": -0.989282488822937, "rewards/margins": 2.291337013244629, "rewards/rejected": -3.2806191444396973, "step": 4315 }, { "epoch": 0.5, "learning_rate": 1.516475729301996e-07, "logits/chosen": -2.6186091899871826, "logits/rejected": -2.6712095737457275, "logps/chosen": -118.32412719726562, "logps/rejected": -271.76690673828125, "loss": 0.1932, "rewards/accuracies": 0.875, "rewards/chosen": -0.1257067173719406, "rewards/margins": 3.8512825965881348, "rewards/rejected": -3.976989507675171, "step": 4316 }, { "epoch": 0.5, "learning_rate": 1.5161214125428133e-07, "logits/chosen": -2.5843405723571777, "logits/rejected": -2.538076877593994, "logps/chosen": -104.80885314941406, "logps/rejected": -149.2321014404297, "loss": 0.3903, "rewards/accuracies": 0.875, "rewards/chosen": -0.8238985538482666, "rewards/margins": 1.055555820465088, "rewards/rejected": -1.8794543743133545, "step": 4317 }, { "epoch": 0.5, "learning_rate": 1.5157670957836305e-07, "logits/chosen": -2.530078411102295, "logits/rejected": -2.4148571491241455, "logps/chosen": -242.86953735351562, "logps/rejected": -237.532470703125, "loss": 0.2974, "rewards/accuracies": 1.0, "rewards/chosen": -0.7064067125320435, "rewards/margins": 1.3039573431015015, "rewards/rejected": -2.010364055633545, "step": 4318 }, { "epoch": 0.5, "learning_rate": 1.5154127790244478e-07, "logits/chosen": -2.362574577331543, "logits/rejected": -2.3392527103424072, "logps/chosen": -281.58203125, "logps/rejected": -280.62469482421875, "loss": 0.8528, "rewards/accuracies": 0.625, "rewards/chosen": -1.3266464471817017, "rewards/margins": 0.6573824882507324, "rewards/rejected": -1.984028935432434, "step": 4319 }, { "epoch": 0.5, "learning_rate": 1.5150584622652653e-07, "logits/chosen": -1.9832206964492798, "logits/rejected": -2.1096866130828857, "logps/chosen": -112.9889907836914, "logps/rejected": -166.43572998046875, "loss": 0.5653, "rewards/accuracies": 0.75, "rewards/chosen": -1.1626429557800293, "rewards/margins": 1.6260852813720703, "rewards/rejected": -2.7887282371520996, "step": 4320 }, { "epoch": 0.5, "learning_rate": 1.5147041455060825e-07, "logits/chosen": -2.2844033241271973, "logits/rejected": -2.5145881175994873, "logps/chosen": -370.1441345214844, "logps/rejected": -331.8449401855469, "loss": 0.8432, "rewards/accuracies": 0.625, "rewards/chosen": -1.2318850755691528, "rewards/margins": 1.7976487874984741, "rewards/rejected": -3.029533863067627, "step": 4321 }, { "epoch": 0.5, "learning_rate": 1.5143498287468997e-07, "logits/chosen": -2.946277618408203, "logits/rejected": -2.8118672370910645, "logps/chosen": -190.28897094726562, "logps/rejected": -156.46151733398438, "loss": 0.6061, "rewards/accuracies": 0.625, "rewards/chosen": -1.6267460584640503, "rewards/margins": 1.3819265365600586, "rewards/rejected": -3.0086727142333984, "step": 4322 }, { "epoch": 0.5, "learning_rate": 1.513995511987717e-07, "logits/chosen": -2.6600160598754883, "logits/rejected": -2.845024347305298, "logps/chosen": -251.10476684570312, "logps/rejected": -264.8751220703125, "loss": 0.7116, "rewards/accuracies": 0.75, "rewards/chosen": -0.7866148352622986, "rewards/margins": 2.1639723777770996, "rewards/rejected": -2.9505867958068848, "step": 4323 }, { "epoch": 0.5, "learning_rate": 1.5136411952285341e-07, "logits/chosen": -2.060102939605713, "logits/rejected": -2.5831716060638428, "logps/chosen": -360.67413330078125, "logps/rejected": -242.4019775390625, "loss": 0.1068, "rewards/accuracies": 1.0, "rewards/chosen": -0.17287471890449524, "rewards/margins": 2.8815369606018066, "rewards/rejected": -3.0544118881225586, "step": 4324 }, { "epoch": 0.5, "learning_rate": 1.5132868784693514e-07, "logits/chosen": -2.660162925720215, "logits/rejected": -2.615445613861084, "logps/chosen": -285.03900146484375, "logps/rejected": -338.6811218261719, "loss": 0.2661, "rewards/accuracies": 1.0, "rewards/chosen": -0.5105436444282532, "rewards/margins": 3.0604135990142822, "rewards/rejected": -3.5709569454193115, "step": 4325 }, { "epoch": 0.5, "learning_rate": 1.5129325617101686e-07, "logits/chosen": -2.029813289642334, "logits/rejected": -2.363755941390991, "logps/chosen": -285.2708740234375, "logps/rejected": -256.00823974609375, "loss": 0.4949, "rewards/accuracies": 0.75, "rewards/chosen": -1.0765560865402222, "rewards/margins": 1.385362148284912, "rewards/rejected": -2.461918354034424, "step": 4326 }, { "epoch": 0.5, "learning_rate": 1.512578244950986e-07, "logits/chosen": -2.7684950828552246, "logits/rejected": -2.7302324771881104, "logps/chosen": -179.16163635253906, "logps/rejected": -250.98574829101562, "loss": 0.4748, "rewards/accuracies": 0.75, "rewards/chosen": -0.5737764239311218, "rewards/margins": 1.9058587551116943, "rewards/rejected": -2.479635238647461, "step": 4327 }, { "epoch": 0.5, "learning_rate": 1.5122239281918035e-07, "logits/chosen": -2.225161075592041, "logits/rejected": -2.1888508796691895, "logps/chosen": -250.87237548828125, "logps/rejected": -346.39227294921875, "loss": 0.193, "rewards/accuracies": 1.0, "rewards/chosen": -1.5558536052703857, "rewards/margins": 2.6224405765533447, "rewards/rejected": -4.1782941818237305, "step": 4328 }, { "epoch": 0.5, "learning_rate": 1.5118696114326208e-07, "logits/chosen": -2.0678539276123047, "logits/rejected": -2.03318190574646, "logps/chosen": -193.02719116210938, "logps/rejected": -209.3085479736328, "loss": 0.6492, "rewards/accuracies": 0.75, "rewards/chosen": -1.3768494129180908, "rewards/margins": 2.378671407699585, "rewards/rejected": -3.755520820617676, "step": 4329 }, { "epoch": 0.5, "learning_rate": 1.511515294673438e-07, "logits/chosen": -2.3158762454986572, "logits/rejected": -2.334256410598755, "logps/chosen": -293.54150390625, "logps/rejected": -356.881591796875, "loss": 0.3303, "rewards/accuracies": 0.75, "rewards/chosen": -1.3904640674591064, "rewards/margins": 3.342280626296997, "rewards/rejected": -4.732744216918945, "step": 4330 }, { "epoch": 0.5, "learning_rate": 1.5111609779142552e-07, "logits/chosen": -2.3744165897369385, "logits/rejected": -2.518599033355713, "logps/chosen": -288.6755065917969, "logps/rejected": -232.468505859375, "loss": 0.5281, "rewards/accuracies": 0.75, "rewards/chosen": -1.306557536125183, "rewards/margins": 2.2666637897491455, "rewards/rejected": -3.573221445083618, "step": 4331 }, { "epoch": 0.5, "learning_rate": 1.5108066611550727e-07, "logits/chosen": -1.9691746234893799, "logits/rejected": -2.1620397567749023, "logps/chosen": -304.16387939453125, "logps/rejected": -257.9756164550781, "loss": 0.4329, "rewards/accuracies": 0.625, "rewards/chosen": -0.6263852119445801, "rewards/margins": 2.8525993824005127, "rewards/rejected": -3.478984832763672, "step": 4332 }, { "epoch": 0.5, "learning_rate": 1.51045234439589e-07, "logits/chosen": -1.9688916206359863, "logits/rejected": -1.6499273777008057, "logps/chosen": -211.80352783203125, "logps/rejected": -318.6793518066406, "loss": 0.2711, "rewards/accuracies": 0.875, "rewards/chosen": -0.6547039747238159, "rewards/margins": 2.51863694190979, "rewards/rejected": -3.1733407974243164, "step": 4333 }, { "epoch": 0.5, "learning_rate": 1.5100980276367071e-07, "logits/chosen": -2.2902441024780273, "logits/rejected": -2.2905776500701904, "logps/chosen": -171.33584594726562, "logps/rejected": -261.6302185058594, "loss": 0.6412, "rewards/accuracies": 0.875, "rewards/chosen": -1.7318154573440552, "rewards/margins": 1.3136053085327148, "rewards/rejected": -3.0454208850860596, "step": 4334 }, { "epoch": 0.5, "learning_rate": 1.5097437108775244e-07, "logits/chosen": -1.7281849384307861, "logits/rejected": -1.8013017177581787, "logps/chosen": -307.65008544921875, "logps/rejected": -346.1015625, "loss": 0.3856, "rewards/accuracies": 0.625, "rewards/chosen": -0.6965197324752808, "rewards/margins": 2.1779327392578125, "rewards/rejected": -2.8744523525238037, "step": 4335 }, { "epoch": 0.5, "learning_rate": 1.5093893941183416e-07, "logits/chosen": -2.8501462936401367, "logits/rejected": -2.8830363750457764, "logps/chosen": -104.16026306152344, "logps/rejected": -134.36285400390625, "loss": 0.4108, "rewards/accuracies": 1.0, "rewards/chosen": -0.9449959993362427, "rewards/margins": 0.7604342699050903, "rewards/rejected": -1.7054301500320435, "step": 4336 }, { "epoch": 0.5, "learning_rate": 1.5090350773591588e-07, "logits/chosen": -2.0073204040527344, "logits/rejected": -2.323707103729248, "logps/chosen": -470.0325927734375, "logps/rejected": -294.54913330078125, "loss": 0.422, "rewards/accuracies": 0.875, "rewards/chosen": -1.2827104330062866, "rewards/margins": 2.0566630363464355, "rewards/rejected": -3.3393735885620117, "step": 4337 }, { "epoch": 0.5, "learning_rate": 1.5086807605999763e-07, "logits/chosen": -2.486642360687256, "logits/rejected": -2.4480013847351074, "logps/chosen": -102.0362777709961, "logps/rejected": -188.59942626953125, "loss": 0.3011, "rewards/accuracies": 0.875, "rewards/chosen": -0.07007420063018799, "rewards/margins": 2.4947991371154785, "rewards/rejected": -2.564873218536377, "step": 4338 }, { "epoch": 0.5, "learning_rate": 1.5083264438407935e-07, "logits/chosen": -2.397627830505371, "logits/rejected": -2.5381393432617188, "logps/chosen": -217.27732849121094, "logps/rejected": -283.69842529296875, "loss": 0.2487, "rewards/accuracies": 1.0, "rewards/chosen": -0.8965975642204285, "rewards/margins": 1.9952188730239868, "rewards/rejected": -2.8918166160583496, "step": 4339 }, { "epoch": 0.5, "learning_rate": 1.507972127081611e-07, "logits/chosen": -2.234879732131958, "logits/rejected": -2.138388156890869, "logps/chosen": -252.5362548828125, "logps/rejected": -264.9149169921875, "loss": 0.5781, "rewards/accuracies": 0.75, "rewards/chosen": -0.392747163772583, "rewards/margins": 2.000958204269409, "rewards/rejected": -2.393705129623413, "step": 4340 }, { "epoch": 0.5, "learning_rate": 1.5076178103224282e-07, "logits/chosen": -1.8806730508804321, "logits/rejected": -1.704809308052063, "logps/chosen": -543.2327880859375, "logps/rejected": -658.5618896484375, "loss": 0.7936, "rewards/accuracies": 0.75, "rewards/chosen": -1.0469233989715576, "rewards/margins": 1.0340230464935303, "rewards/rejected": -2.080946445465088, "step": 4341 }, { "epoch": 0.51, "learning_rate": 1.5072634935632454e-07, "logits/chosen": -2.2330739498138428, "logits/rejected": -2.3611886501312256, "logps/chosen": -332.44549560546875, "logps/rejected": -313.6429443359375, "loss": 0.1205, "rewards/accuracies": 1.0, "rewards/chosen": -0.4973880648612976, "rewards/margins": 2.983412742614746, "rewards/rejected": -3.4808008670806885, "step": 4342 }, { "epoch": 0.51, "learning_rate": 1.506909176804063e-07, "logits/chosen": -2.603106737136841, "logits/rejected": -2.329122543334961, "logps/chosen": -359.9208984375, "logps/rejected": -405.07568359375, "loss": 0.5659, "rewards/accuracies": 0.625, "rewards/chosen": -0.4199979305267334, "rewards/margins": 1.9746787548065186, "rewards/rejected": -2.394676685333252, "step": 4343 }, { "epoch": 0.51, "learning_rate": 1.5065548600448801e-07, "logits/chosen": -2.4392142295837402, "logits/rejected": -2.474942684173584, "logps/chosen": -165.6344451904297, "logps/rejected": -204.64625549316406, "loss": 0.5239, "rewards/accuracies": 0.75, "rewards/chosen": -1.3761308193206787, "rewards/margins": 2.425549030303955, "rewards/rejected": -3.8016796112060547, "step": 4344 }, { "epoch": 0.51, "learning_rate": 1.5062005432856974e-07, "logits/chosen": -2.1699328422546387, "logits/rejected": -2.277768611907959, "logps/chosen": -276.71563720703125, "logps/rejected": -281.0272521972656, "loss": 0.8349, "rewards/accuracies": 0.625, "rewards/chosen": -0.9548039436340332, "rewards/margins": 1.033818244934082, "rewards/rejected": -1.9886221885681152, "step": 4345 }, { "epoch": 0.51, "learning_rate": 1.5058462265265146e-07, "logits/chosen": -2.006967544555664, "logits/rejected": -2.0948214530944824, "logps/chosen": -393.83221435546875, "logps/rejected": -394.51702880859375, "loss": 0.6173, "rewards/accuracies": 0.75, "rewards/chosen": -1.5407195091247559, "rewards/margins": 2.1803205013275146, "rewards/rejected": -3.7210402488708496, "step": 4346 }, { "epoch": 0.51, "learning_rate": 1.5054919097673318e-07, "logits/chosen": -2.2900214195251465, "logits/rejected": -2.2937660217285156, "logps/chosen": -327.0431823730469, "logps/rejected": -348.63165283203125, "loss": 0.4819, "rewards/accuracies": 0.75, "rewards/chosen": -1.1783134937286377, "rewards/margins": 1.6505095958709717, "rewards/rejected": -2.8288230895996094, "step": 4347 }, { "epoch": 0.51, "learning_rate": 1.505137593008149e-07, "logits/chosen": -2.187046766281128, "logits/rejected": -2.378138542175293, "logps/chosen": -345.8704833984375, "logps/rejected": -278.2528076171875, "loss": 0.1878, "rewards/accuracies": 1.0, "rewards/chosen": -1.1015912294387817, "rewards/margins": 2.045687675476074, "rewards/rejected": -3.1472787857055664, "step": 4348 }, { "epoch": 0.51, "learning_rate": 1.5047832762489665e-07, "logits/chosen": -2.5904488563537598, "logits/rejected": -2.6615853309631348, "logps/chosen": -236.78591918945312, "logps/rejected": -144.87545776367188, "loss": 0.81, "rewards/accuracies": 0.625, "rewards/chosen": -1.2676069736480713, "rewards/margins": 1.2389637231826782, "rewards/rejected": -2.506570816040039, "step": 4349 }, { "epoch": 0.51, "learning_rate": 1.5044289594897837e-07, "logits/chosen": -2.5963644981384277, "logits/rejected": -2.5106048583984375, "logps/chosen": -210.99586486816406, "logps/rejected": -260.60223388671875, "loss": 0.3051, "rewards/accuracies": 0.875, "rewards/chosen": -0.7144404053688049, "rewards/margins": 3.857468605041504, "rewards/rejected": -4.571908950805664, "step": 4350 }, { "epoch": 0.51, "learning_rate": 1.5040746427306012e-07, "logits/chosen": -1.7436047792434692, "logits/rejected": -2.170863628387451, "logps/chosen": -430.507568359375, "logps/rejected": -254.6909942626953, "loss": 0.4358, "rewards/accuracies": 0.875, "rewards/chosen": -0.5758795738220215, "rewards/margins": 0.9057127833366394, "rewards/rejected": -1.4815922975540161, "step": 4351 }, { "epoch": 0.51, "learning_rate": 1.5037203259714184e-07, "logits/chosen": -1.9794533252716064, "logits/rejected": -2.282529592514038, "logps/chosen": -358.34979248046875, "logps/rejected": -335.7793884277344, "loss": 0.5172, "rewards/accuracies": 0.625, "rewards/chosen": -1.638986349105835, "rewards/margins": 0.9403858184814453, "rewards/rejected": -2.5793724060058594, "step": 4352 }, { "epoch": 0.51, "learning_rate": 1.5033660092122357e-07, "logits/chosen": -2.3798699378967285, "logits/rejected": -2.5058705806732178, "logps/chosen": -274.5151672363281, "logps/rejected": -346.3312072753906, "loss": 0.3919, "rewards/accuracies": 0.75, "rewards/chosen": -1.5503833293914795, "rewards/margins": 1.507620096206665, "rewards/rejected": -3.0580034255981445, "step": 4353 }, { "epoch": 0.51, "learning_rate": 1.5030116924530532e-07, "logits/chosen": -1.9549859762191772, "logits/rejected": -2.54410719871521, "logps/chosen": -278.669189453125, "logps/rejected": -287.09942626953125, "loss": 0.1684, "rewards/accuracies": 1.0, "rewards/chosen": -0.6237742900848389, "rewards/margins": 3.6327781677246094, "rewards/rejected": -4.256552219390869, "step": 4354 }, { "epoch": 0.51, "learning_rate": 1.5026573756938704e-07, "logits/chosen": -2.3094558715820312, "logits/rejected": -2.359281063079834, "logps/chosen": -216.14822387695312, "logps/rejected": -372.9872741699219, "loss": 0.1787, "rewards/accuracies": 0.875, "rewards/chosen": -0.7349266409873962, "rewards/margins": 3.896794557571411, "rewards/rejected": -4.631720542907715, "step": 4355 }, { "epoch": 0.51, "learning_rate": 1.5023030589346876e-07, "logits/chosen": -1.9102256298065186, "logits/rejected": -1.9716451168060303, "logps/chosen": -174.44606018066406, "logps/rejected": -179.79747009277344, "loss": 0.975, "rewards/accuracies": 0.625, "rewards/chosen": -2.0311594009399414, "rewards/margins": 0.7269107103347778, "rewards/rejected": -2.7580699920654297, "step": 4356 }, { "epoch": 0.51, "learning_rate": 1.5019487421755048e-07, "logits/chosen": -1.8558202981948853, "logits/rejected": -1.714080572128296, "logps/chosen": -280.8904113769531, "logps/rejected": -355.36968994140625, "loss": 0.4609, "rewards/accuracies": 0.875, "rewards/chosen": -1.142195701599121, "rewards/margins": 1.453489065170288, "rewards/rejected": -2.59568452835083, "step": 4357 }, { "epoch": 0.51, "learning_rate": 1.501594425416322e-07, "logits/chosen": -2.3113255500793457, "logits/rejected": -2.1292450428009033, "logps/chosen": -381.748046875, "logps/rejected": -215.47662353515625, "loss": 0.1957, "rewards/accuracies": 1.0, "rewards/chosen": -1.3703558444976807, "rewards/margins": 2.1256959438323975, "rewards/rejected": -3.496051788330078, "step": 4358 }, { "epoch": 0.51, "learning_rate": 1.5012401086571393e-07, "logits/chosen": -2.406862735748291, "logits/rejected": -2.487161159515381, "logps/chosen": -254.95794677734375, "logps/rejected": -344.4727783203125, "loss": 0.1961, "rewards/accuracies": 1.0, "rewards/chosen": 0.06246212124824524, "rewards/margins": 3.110140800476074, "rewards/rejected": -3.0476784706115723, "step": 4359 }, { "epoch": 0.51, "learning_rate": 1.5008857918979565e-07, "logits/chosen": -2.265291690826416, "logits/rejected": -2.060199737548828, "logps/chosen": -194.21383666992188, "logps/rejected": -244.22549438476562, "loss": 0.4298, "rewards/accuracies": 0.75, "rewards/chosen": -0.9951977729797363, "rewards/margins": 1.6636674404144287, "rewards/rejected": -2.658865213394165, "step": 4360 }, { "epoch": 0.51, "learning_rate": 1.500531475138774e-07, "logits/chosen": -1.9943825006484985, "logits/rejected": -1.9677015542984009, "logps/chosen": -496.36578369140625, "logps/rejected": -272.0693359375, "loss": 0.2804, "rewards/accuracies": 0.875, "rewards/chosen": -0.6931813359260559, "rewards/margins": 2.0923101902008057, "rewards/rejected": -2.785491466522217, "step": 4361 }, { "epoch": 0.51, "learning_rate": 1.5001771583795912e-07, "logits/chosen": -2.1320672035217285, "logits/rejected": -2.006071090698242, "logps/chosen": -220.083251953125, "logps/rejected": -283.9471130371094, "loss": 0.4156, "rewards/accuracies": 0.75, "rewards/chosen": -0.47227972745895386, "rewards/margins": 1.1195602416992188, "rewards/rejected": -1.5918400287628174, "step": 4362 }, { "epoch": 0.51, "learning_rate": 1.4998228416204087e-07, "logits/chosen": -2.6416006088256836, "logits/rejected": -2.790005683898926, "logps/chosen": -261.884521484375, "logps/rejected": -266.50653076171875, "loss": 0.1078, "rewards/accuracies": 1.0, "rewards/chosen": -0.1440531611442566, "rewards/margins": 3.558241128921509, "rewards/rejected": -3.70229434967041, "step": 4363 }, { "epoch": 0.51, "learning_rate": 1.499468524861226e-07, "logits/chosen": -2.9722530841827393, "logits/rejected": -2.9671471118927, "logps/chosen": -206.3912811279297, "logps/rejected": -178.916015625, "loss": 0.5078, "rewards/accuracies": 0.75, "rewards/chosen": -0.5252015590667725, "rewards/margins": 1.6576200723648071, "rewards/rejected": -2.182821750640869, "step": 4364 }, { "epoch": 0.51, "learning_rate": 1.499114208102043e-07, "logits/chosen": -2.6765661239624023, "logits/rejected": -2.647322416305542, "logps/chosen": -225.3341827392578, "logps/rejected": -263.080810546875, "loss": 0.2772, "rewards/accuracies": 0.875, "rewards/chosen": -0.9254167675971985, "rewards/margins": 2.201167106628418, "rewards/rejected": -3.1265838146209717, "step": 4365 }, { "epoch": 0.51, "learning_rate": 1.4987598913428606e-07, "logits/chosen": -2.1692962646484375, "logits/rejected": -2.252351760864258, "logps/chosen": -307.15081787109375, "logps/rejected": -212.50759887695312, "loss": 0.4129, "rewards/accuracies": 0.875, "rewards/chosen": -0.5721405148506165, "rewards/margins": 3.0663905143737793, "rewards/rejected": -3.638530969619751, "step": 4366 }, { "epoch": 0.51, "learning_rate": 1.4984055745836778e-07, "logits/chosen": -2.7623291015625, "logits/rejected": -2.8632280826568604, "logps/chosen": -217.71620178222656, "logps/rejected": -228.7214813232422, "loss": 0.2426, "rewards/accuracies": 0.875, "rewards/chosen": -1.0911099910736084, "rewards/margins": 2.347404956817627, "rewards/rejected": -3.4385149478912354, "step": 4367 }, { "epoch": 0.51, "learning_rate": 1.498051257824495e-07, "logits/chosen": -2.310675859451294, "logits/rejected": -2.348601818084717, "logps/chosen": -225.42730712890625, "logps/rejected": -225.019775390625, "loss": 0.2782, "rewards/accuracies": 0.75, "rewards/chosen": -0.36424386501312256, "rewards/margins": 2.0844473838806152, "rewards/rejected": -2.4486913681030273, "step": 4368 }, { "epoch": 0.51, "learning_rate": 1.4976969410653123e-07, "logits/chosen": -2.5015225410461426, "logits/rejected": -2.5293760299682617, "logps/chosen": -541.6838989257812, "logps/rejected": -507.81488037109375, "loss": 0.315, "rewards/accuracies": 0.875, "rewards/chosen": -1.2868618965148926, "rewards/margins": 2.9761388301849365, "rewards/rejected": -4.26300048828125, "step": 4369 }, { "epoch": 0.51, "learning_rate": 1.4973426243061295e-07, "logits/chosen": -2.5372400283813477, "logits/rejected": -2.498185396194458, "logps/chosen": -418.7004089355469, "logps/rejected": -464.050537109375, "loss": 0.3025, "rewards/accuracies": 0.75, "rewards/chosen": -0.9781321287155151, "rewards/margins": 3.6465299129486084, "rewards/rejected": -4.624661922454834, "step": 4370 }, { "epoch": 0.51, "learning_rate": 1.4969883075469467e-07, "logits/chosen": -2.75283145904541, "logits/rejected": -2.5639398097991943, "logps/chosen": -294.8426208496094, "logps/rejected": -266.1232604980469, "loss": 0.2143, "rewards/accuracies": 1.0, "rewards/chosen": -1.4081199169158936, "rewards/margins": 1.7354803085327148, "rewards/rejected": -3.1436002254486084, "step": 4371 }, { "epoch": 0.51, "learning_rate": 1.4966339907877642e-07, "logits/chosen": -2.316554546356201, "logits/rejected": -2.0293965339660645, "logps/chosen": -406.33233642578125, "logps/rejected": -498.1932067871094, "loss": 0.5817, "rewards/accuracies": 0.625, "rewards/chosen": -1.1302292346954346, "rewards/margins": 1.7246274948120117, "rewards/rejected": -2.854856491088867, "step": 4372 }, { "epoch": 0.51, "learning_rate": 1.4962796740285814e-07, "logits/chosen": -1.9122366905212402, "logits/rejected": -2.1462059020996094, "logps/chosen": -390.9963684082031, "logps/rejected": -290.6221923828125, "loss": 1.1691, "rewards/accuracies": 0.625, "rewards/chosen": -1.8259488344192505, "rewards/margins": 0.15067905187606812, "rewards/rejected": -1.9766278266906738, "step": 4373 }, { "epoch": 0.51, "learning_rate": 1.495925357269399e-07, "logits/chosen": -2.0770342350006104, "logits/rejected": -2.274329662322998, "logps/chosen": -452.0794677734375, "logps/rejected": -383.7308349609375, "loss": 0.0807, "rewards/accuracies": 1.0, "rewards/chosen": -1.4660171270370483, "rewards/margins": 3.7271156311035156, "rewards/rejected": -5.1931328773498535, "step": 4374 }, { "epoch": 0.51, "learning_rate": 1.495571040510216e-07, "logits/chosen": -1.9078186750411987, "logits/rejected": -1.688300609588623, "logps/chosen": -308.55194091796875, "logps/rejected": -286.681884765625, "loss": 0.5419, "rewards/accuracies": 0.875, "rewards/chosen": -0.6246774196624756, "rewards/margins": 1.6940176486968994, "rewards/rejected": -2.318695068359375, "step": 4375 }, { "epoch": 0.51, "learning_rate": 1.4952167237510333e-07, "logits/chosen": -2.62184476852417, "logits/rejected": -2.839738368988037, "logps/chosen": -353.78607177734375, "logps/rejected": -205.7127685546875, "loss": 0.2973, "rewards/accuracies": 0.875, "rewards/chosen": -0.9544470310211182, "rewards/margins": 2.1276042461395264, "rewards/rejected": -3.0820512771606445, "step": 4376 }, { "epoch": 0.51, "learning_rate": 1.4948624069918506e-07, "logits/chosen": -2.182478189468384, "logits/rejected": -2.4130959510803223, "logps/chosen": -299.7957763671875, "logps/rejected": -209.65316772460938, "loss": 0.2684, "rewards/accuracies": 0.875, "rewards/chosen": -0.726485013961792, "rewards/margins": 2.0807764530181885, "rewards/rejected": -2.8072617053985596, "step": 4377 }, { "epoch": 0.51, "learning_rate": 1.494508090232668e-07, "logits/chosen": -2.3520045280456543, "logits/rejected": -2.3126676082611084, "logps/chosen": -255.115234375, "logps/rejected": -276.49151611328125, "loss": 0.3904, "rewards/accuracies": 0.875, "rewards/chosen": -0.7623100280761719, "rewards/margins": 2.470048666000366, "rewards/rejected": -3.232358694076538, "step": 4378 }, { "epoch": 0.51, "learning_rate": 1.4941537734734853e-07, "logits/chosen": -2.218036651611328, "logits/rejected": -2.0261313915252686, "logps/chosen": -233.1011962890625, "logps/rejected": -399.63427734375, "loss": 0.5243, "rewards/accuracies": 0.875, "rewards/chosen": -1.193806767463684, "rewards/margins": 1.6197278499603271, "rewards/rejected": -2.8135344982147217, "step": 4379 }, { "epoch": 0.51, "learning_rate": 1.4937994567143025e-07, "logits/chosen": -2.521239757537842, "logits/rejected": -2.2199630737304688, "logps/chosen": -218.27894592285156, "logps/rejected": -271.52215576171875, "loss": 0.2735, "rewards/accuracies": 0.875, "rewards/chosen": -0.9013968706130981, "rewards/margins": 2.6468002796173096, "rewards/rejected": -3.5481972694396973, "step": 4380 }, { "epoch": 0.51, "learning_rate": 1.4934451399551197e-07, "logits/chosen": -2.236525297164917, "logits/rejected": -2.069474220275879, "logps/chosen": -226.61947631835938, "logps/rejected": -200.11383056640625, "loss": 1.4552, "rewards/accuracies": 0.875, "rewards/chosen": -2.448176860809326, "rewards/margins": 0.2535504698753357, "rewards/rejected": -2.7017273902893066, "step": 4381 }, { "epoch": 0.51, "learning_rate": 1.493090823195937e-07, "logits/chosen": -2.589669704437256, "logits/rejected": -2.5461068153381348, "logps/chosen": -339.33685302734375, "logps/rejected": -250.9718780517578, "loss": 0.2535, "rewards/accuracies": 0.875, "rewards/chosen": -0.37482020258903503, "rewards/margins": 3.1432723999023438, "rewards/rejected": -3.518092632293701, "step": 4382 }, { "epoch": 0.51, "learning_rate": 1.4927365064367544e-07, "logits/chosen": -2.2279052734375, "logits/rejected": -2.6070244312286377, "logps/chosen": -253.30596923828125, "logps/rejected": -228.40545654296875, "loss": 0.6172, "rewards/accuracies": 0.875, "rewards/chosen": -1.5758841037750244, "rewards/margins": 1.280324101448059, "rewards/rejected": -2.856208324432373, "step": 4383 }, { "epoch": 0.51, "learning_rate": 1.4923821896775716e-07, "logits/chosen": -2.454094648361206, "logits/rejected": -2.856034755706787, "logps/chosen": -446.74072265625, "logps/rejected": -245.69976806640625, "loss": 0.5204, "rewards/accuracies": 0.75, "rewards/chosen": -1.0648690462112427, "rewards/margins": 1.5602524280548096, "rewards/rejected": -2.625121593475342, "step": 4384 }, { "epoch": 0.51, "learning_rate": 1.4920278729183889e-07, "logits/chosen": -2.4626951217651367, "logits/rejected": -2.562631368637085, "logps/chosen": -234.2470245361328, "logps/rejected": -256.95556640625, "loss": 0.5188, "rewards/accuracies": 0.5, "rewards/chosen": -0.9999050498008728, "rewards/margins": 1.2314295768737793, "rewards/rejected": -2.231334686279297, "step": 4385 }, { "epoch": 0.51, "learning_rate": 1.4916735561592064e-07, "logits/chosen": -2.3105568885803223, "logits/rejected": -2.1869077682495117, "logps/chosen": -333.7840881347656, "logps/rejected": -324.4123229980469, "loss": 1.0571, "rewards/accuracies": 0.75, "rewards/chosen": -1.9736287593841553, "rewards/margins": 0.6410728096961975, "rewards/rejected": -2.614701509475708, "step": 4386 }, { "epoch": 0.51, "learning_rate": 1.4913192394000236e-07, "logits/chosen": -2.420621871948242, "logits/rejected": -2.5317068099975586, "logps/chosen": -210.7329864501953, "logps/rejected": -255.68417358398438, "loss": 0.5233, "rewards/accuracies": 0.75, "rewards/chosen": -1.2133358716964722, "rewards/margins": 1.0521609783172607, "rewards/rejected": -2.2654967308044434, "step": 4387 }, { "epoch": 0.51, "learning_rate": 1.4909649226408408e-07, "logits/chosen": -2.8436591625213623, "logits/rejected": -2.812608003616333, "logps/chosen": -200.7388458251953, "logps/rejected": -187.2425537109375, "loss": 0.244, "rewards/accuracies": 1.0, "rewards/chosen": -1.3373832702636719, "rewards/margins": 1.6355093717575073, "rewards/rejected": -2.9728925228118896, "step": 4388 }, { "epoch": 0.51, "learning_rate": 1.4906106058816583e-07, "logits/chosen": -2.505821466445923, "logits/rejected": -2.4162914752960205, "logps/chosen": -119.76532745361328, "logps/rejected": -171.96142578125, "loss": 0.6685, "rewards/accuracies": 0.75, "rewards/chosen": -1.2128705978393555, "rewards/margins": 1.1468853950500488, "rewards/rejected": -2.3597559928894043, "step": 4389 }, { "epoch": 0.51, "learning_rate": 1.4902562891224755e-07, "logits/chosen": -1.8850551843643188, "logits/rejected": -2.1123485565185547, "logps/chosen": -312.9224548339844, "logps/rejected": -252.53005981445312, "loss": 0.2902, "rewards/accuracies": 0.875, "rewards/chosen": -0.5672345757484436, "rewards/margins": 1.911978006362915, "rewards/rejected": -2.479212522506714, "step": 4390 }, { "epoch": 0.51, "learning_rate": 1.4899019723632927e-07, "logits/chosen": -2.2002546787261963, "logits/rejected": -2.296029806137085, "logps/chosen": -425.8719787597656, "logps/rejected": -395.3763122558594, "loss": 0.3651, "rewards/accuracies": 0.75, "rewards/chosen": -1.2021698951721191, "rewards/margins": 2.863576889038086, "rewards/rejected": -4.065746784210205, "step": 4391 }, { "epoch": 0.51, "learning_rate": 1.48954765560411e-07, "logits/chosen": -1.8527302742004395, "logits/rejected": -1.6333009004592896, "logps/chosen": -221.0084686279297, "logps/rejected": -291.35211181640625, "loss": 0.2237, "rewards/accuracies": 0.875, "rewards/chosen": -0.4222627878189087, "rewards/margins": 3.366766929626465, "rewards/rejected": -3.789029836654663, "step": 4392 }, { "epoch": 0.51, "learning_rate": 1.4891933388449272e-07, "logits/chosen": -2.5589468479156494, "logits/rejected": -2.4643869400024414, "logps/chosen": -170.20748901367188, "logps/rejected": -152.09963989257812, "loss": 0.2349, "rewards/accuracies": 1.0, "rewards/chosen": -0.28161168098449707, "rewards/margins": 2.009808301925659, "rewards/rejected": -2.2914199829101562, "step": 4393 }, { "epoch": 0.51, "learning_rate": 1.4888390220857447e-07, "logits/chosen": -2.6228833198547363, "logits/rejected": -2.5463998317718506, "logps/chosen": -133.10140991210938, "logps/rejected": -128.49986267089844, "loss": 0.5215, "rewards/accuracies": 0.75, "rewards/chosen": -0.37436443567276, "rewards/margins": 0.9123578071594238, "rewards/rejected": -1.286722183227539, "step": 4394 }, { "epoch": 0.51, "learning_rate": 1.488484705326562e-07, "logits/chosen": -2.649916172027588, "logits/rejected": -2.5899627208709717, "logps/chosen": -186.29647827148438, "logps/rejected": -350.657958984375, "loss": 0.2673, "rewards/accuracies": 0.875, "rewards/chosen": -0.7304617166519165, "rewards/margins": 2.3380022048950195, "rewards/rejected": -3.0684638023376465, "step": 4395 }, { "epoch": 0.51, "learning_rate": 1.488130388567379e-07, "logits/chosen": -1.977373480796814, "logits/rejected": -1.6695935726165771, "logps/chosen": -342.6892395019531, "logps/rejected": -328.6878662109375, "loss": 0.5453, "rewards/accuracies": 0.625, "rewards/chosen": -0.7726660966873169, "rewards/margins": 1.2011258602142334, "rewards/rejected": -1.9737920761108398, "step": 4396 }, { "epoch": 0.51, "learning_rate": 1.4877760718081966e-07, "logits/chosen": -2.2806236743927, "logits/rejected": -2.449937105178833, "logps/chosen": -289.75189208984375, "logps/rejected": -220.74557495117188, "loss": 0.2903, "rewards/accuracies": 0.75, "rewards/chosen": -0.6180912256240845, "rewards/margins": 2.3723080158233643, "rewards/rejected": -2.990399122238159, "step": 4397 }, { "epoch": 0.51, "learning_rate": 1.4874217550490138e-07, "logits/chosen": -2.4363765716552734, "logits/rejected": -2.4653265476226807, "logps/chosen": -263.3052978515625, "logps/rejected": -339.00994873046875, "loss": 0.4572, "rewards/accuracies": 0.875, "rewards/chosen": -1.726157546043396, "rewards/margins": 1.8498708009719849, "rewards/rejected": -3.576028347015381, "step": 4398 }, { "epoch": 0.51, "learning_rate": 1.487067438289831e-07, "logits/chosen": -1.8131859302520752, "logits/rejected": -2.1827306747436523, "logps/chosen": -344.66522216796875, "logps/rejected": -271.425048828125, "loss": 0.2509, "rewards/accuracies": 0.875, "rewards/chosen": -1.028161644935608, "rewards/margins": 2.6561880111694336, "rewards/rejected": -3.684349536895752, "step": 4399 }, { "epoch": 0.51, "learning_rate": 1.4867131215306482e-07, "logits/chosen": -2.0924620628356934, "logits/rejected": -2.1976428031921387, "logps/chosen": -265.1179504394531, "logps/rejected": -318.68109130859375, "loss": 0.2819, "rewards/accuracies": 0.875, "rewards/chosen": -0.9029265642166138, "rewards/margins": 2.494323492050171, "rewards/rejected": -3.397249937057495, "step": 4400 }, { "epoch": 0.51, "learning_rate": 1.4863588047714657e-07, "logits/chosen": -2.968360662460327, "logits/rejected": -2.9503936767578125, "logps/chosen": -182.49832153320312, "logps/rejected": -177.2061309814453, "loss": 0.2897, "rewards/accuracies": 0.875, "rewards/chosen": -1.1017954349517822, "rewards/margins": 2.0853538513183594, "rewards/rejected": -3.1871492862701416, "step": 4401 }, { "epoch": 0.51, "learning_rate": 1.486004488012283e-07, "logits/chosen": -1.8406156301498413, "logits/rejected": -2.0943050384521484, "logps/chosen": -295.34283447265625, "logps/rejected": -261.4505920410156, "loss": 0.6362, "rewards/accuracies": 0.5, "rewards/chosen": -1.893394947052002, "rewards/margins": 1.4385179281234741, "rewards/rejected": -3.3319129943847656, "step": 4402 }, { "epoch": 0.51, "learning_rate": 1.4856501712531002e-07, "logits/chosen": -2.2200498580932617, "logits/rejected": -2.3146586418151855, "logps/chosen": -244.54991149902344, "logps/rejected": -291.24224853515625, "loss": 0.3311, "rewards/accuracies": 0.875, "rewards/chosen": -1.3029348850250244, "rewards/margins": 2.7298858165740967, "rewards/rejected": -4.032820701599121, "step": 4403 }, { "epoch": 0.51, "learning_rate": 1.4852958544939174e-07, "logits/chosen": -2.4234228134155273, "logits/rejected": -2.4797098636627197, "logps/chosen": -295.424072265625, "logps/rejected": -202.92066955566406, "loss": 0.5093, "rewards/accuracies": 0.75, "rewards/chosen": -1.8641774654388428, "rewards/margins": 1.2280616760253906, "rewards/rejected": -3.0922389030456543, "step": 4404 }, { "epoch": 0.51, "learning_rate": 1.4849415377347346e-07, "logits/chosen": -2.034693956375122, "logits/rejected": -2.1512560844421387, "logps/chosen": -396.93121337890625, "logps/rejected": -347.546142578125, "loss": 0.2333, "rewards/accuracies": 1.0, "rewards/chosen": -1.4289146661758423, "rewards/margins": 1.821908712387085, "rewards/rejected": -3.2508232593536377, "step": 4405 }, { "epoch": 0.51, "learning_rate": 1.484587220975552e-07, "logits/chosen": -2.6328794956207275, "logits/rejected": -2.6797876358032227, "logps/chosen": -166.774658203125, "logps/rejected": -209.33944702148438, "loss": 0.2129, "rewards/accuracies": 0.875, "rewards/chosen": -0.5647311806678772, "rewards/margins": 3.3435425758361816, "rewards/rejected": -3.908273696899414, "step": 4406 }, { "epoch": 0.51, "learning_rate": 1.4842329042163693e-07, "logits/chosen": -2.381096601486206, "logits/rejected": -2.547952890396118, "logps/chosen": -295.4427795410156, "logps/rejected": -223.9435577392578, "loss": 0.1637, "rewards/accuracies": 1.0, "rewards/chosen": -0.3597848117351532, "rewards/margins": 2.4268367290496826, "rewards/rejected": -2.786621570587158, "step": 4407 }, { "epoch": 0.51, "learning_rate": 1.4838785874571868e-07, "logits/chosen": -2.4607720375061035, "logits/rejected": -2.2779037952423096, "logps/chosen": -189.45005798339844, "logps/rejected": -266.0784912109375, "loss": 0.7671, "rewards/accuracies": 0.625, "rewards/chosen": -1.7229597568511963, "rewards/margins": 1.09784734249115, "rewards/rejected": -2.8208069801330566, "step": 4408 }, { "epoch": 0.51, "learning_rate": 1.483524270698004e-07, "logits/chosen": -2.4146833419799805, "logits/rejected": -2.670118808746338, "logps/chosen": -327.0577392578125, "logps/rejected": -250.93777465820312, "loss": 0.1746, "rewards/accuracies": 1.0, "rewards/chosen": -0.9281965494155884, "rewards/margins": 2.2258803844451904, "rewards/rejected": -3.1540770530700684, "step": 4409 }, { "epoch": 0.51, "learning_rate": 1.4831699539388212e-07, "logits/chosen": -2.5412850379943848, "logits/rejected": -2.3235068321228027, "logps/chosen": -214.45562744140625, "logps/rejected": -299.60821533203125, "loss": 0.201, "rewards/accuracies": 1.0, "rewards/chosen": -0.3515143096446991, "rewards/margins": 2.426133155822754, "rewards/rejected": -2.7776474952697754, "step": 4410 }, { "epoch": 0.51, "learning_rate": 1.4828156371796385e-07, "logits/chosen": -2.643850803375244, "logits/rejected": -2.778143882751465, "logps/chosen": -317.8985900878906, "logps/rejected": -266.9397277832031, "loss": 0.9565, "rewards/accuracies": 0.5, "rewards/chosen": -1.8946664333343506, "rewards/margins": 1.142998456954956, "rewards/rejected": -3.0376646518707275, "step": 4411 }, { "epoch": 0.51, "learning_rate": 1.4824613204204557e-07, "logits/chosen": -1.8103883266448975, "logits/rejected": -1.7367323637008667, "logps/chosen": -161.3958740234375, "logps/rejected": -241.13726806640625, "loss": 0.1599, "rewards/accuracies": 1.0, "rewards/chosen": -1.0110852718353271, "rewards/margins": 1.9242312908172607, "rewards/rejected": -2.935316562652588, "step": 4412 }, { "epoch": 0.51, "learning_rate": 1.4821070036612732e-07, "logits/chosen": -2.3891959190368652, "logits/rejected": -2.6364450454711914, "logps/chosen": -238.0491943359375, "logps/rejected": -217.40872192382812, "loss": 0.2476, "rewards/accuracies": 1.0, "rewards/chosen": -1.0949835777282715, "rewards/margins": 2.981060028076172, "rewards/rejected": -4.076043128967285, "step": 4413 }, { "epoch": 0.51, "learning_rate": 1.4817526869020904e-07, "logits/chosen": -2.5794312953948975, "logits/rejected": -2.549661874771118, "logps/chosen": -116.14244079589844, "logps/rejected": -149.17257690429688, "loss": 0.7781, "rewards/accuracies": 0.75, "rewards/chosen": -0.9410846829414368, "rewards/margins": 2.013380765914917, "rewards/rejected": -2.954465389251709, "step": 4414 }, { "epoch": 0.51, "learning_rate": 1.4813983701429076e-07, "logits/chosen": -2.1912097930908203, "logits/rejected": -2.1807496547698975, "logps/chosen": -250.3771514892578, "logps/rejected": -289.1433410644531, "loss": 0.4719, "rewards/accuracies": 0.625, "rewards/chosen": -0.304254949092865, "rewards/margins": 1.1198935508728027, "rewards/rejected": -1.4241485595703125, "step": 4415 }, { "epoch": 0.51, "learning_rate": 1.4810440533837248e-07, "logits/chosen": -2.538388252258301, "logits/rejected": -2.57651424407959, "logps/chosen": -218.90145874023438, "logps/rejected": -254.50723266601562, "loss": 0.2231, "rewards/accuracies": 0.875, "rewards/chosen": -1.0254383087158203, "rewards/margins": 2.687488079071045, "rewards/rejected": -3.712926149368286, "step": 4416 }, { "epoch": 0.51, "learning_rate": 1.4806897366245423e-07, "logits/chosen": -2.8534984588623047, "logits/rejected": -2.792548179626465, "logps/chosen": -134.83261108398438, "logps/rejected": -221.30496215820312, "loss": 0.088, "rewards/accuracies": 1.0, "rewards/chosen": 0.060146212577819824, "rewards/margins": 5.51931095123291, "rewards/rejected": -5.459164619445801, "step": 4417 }, { "epoch": 0.51, "learning_rate": 1.4803354198653595e-07, "logits/chosen": -2.43074107170105, "logits/rejected": -2.2295186519622803, "logps/chosen": -218.0709228515625, "logps/rejected": -277.57666015625, "loss": 0.4934, "rewards/accuracies": 0.75, "rewards/chosen": -1.8720704317092896, "rewards/margins": 3.302969217300415, "rewards/rejected": -5.175039768218994, "step": 4418 }, { "epoch": 0.51, "learning_rate": 1.479981103106177e-07, "logits/chosen": -2.0074965953826904, "logits/rejected": -1.83356773853302, "logps/chosen": -345.2950439453125, "logps/rejected": -298.92523193359375, "loss": 0.3687, "rewards/accuracies": 0.875, "rewards/chosen": -0.9536505341529846, "rewards/margins": 2.594271659851074, "rewards/rejected": -3.547922134399414, "step": 4419 }, { "epoch": 0.51, "learning_rate": 1.4796267863469943e-07, "logits/chosen": -2.479872465133667, "logits/rejected": -2.3713173866271973, "logps/chosen": -270.0908508300781, "logps/rejected": -262.513427734375, "loss": 0.361, "rewards/accuracies": 0.875, "rewards/chosen": -0.39090344309806824, "rewards/margins": 1.1342356204986572, "rewards/rejected": -1.5251390933990479, "step": 4420 }, { "epoch": 0.51, "learning_rate": 1.4792724695878115e-07, "logits/chosen": -2.397322654724121, "logits/rejected": -2.4213998317718506, "logps/chosen": -303.75518798828125, "logps/rejected": -256.4100036621094, "loss": 0.7796, "rewards/accuracies": 0.5, "rewards/chosen": -1.7788350582122803, "rewards/margins": 0.48534438014030457, "rewards/rejected": -2.264179229736328, "step": 4421 }, { "epoch": 0.51, "learning_rate": 1.4789181528286287e-07, "logits/chosen": -2.20271372795105, "logits/rejected": -2.0609002113342285, "logps/chosen": -251.00485229492188, "logps/rejected": -272.7604675292969, "loss": 0.3942, "rewards/accuracies": 0.75, "rewards/chosen": -0.9944652318954468, "rewards/margins": 3.0154619216918945, "rewards/rejected": -4.009926795959473, "step": 4422 }, { "epoch": 0.51, "learning_rate": 1.478563836069446e-07, "logits/chosen": -1.7998576164245605, "logits/rejected": -2.386542320251465, "logps/chosen": -406.647216796875, "logps/rejected": -297.8609619140625, "loss": 0.2157, "rewards/accuracies": 0.75, "rewards/chosen": -0.5721414089202881, "rewards/margins": 3.177448034286499, "rewards/rejected": -3.749589681625366, "step": 4423 }, { "epoch": 0.51, "learning_rate": 1.4782095193102634e-07, "logits/chosen": -1.617243766784668, "logits/rejected": -2.1043570041656494, "logps/chosen": -391.259033203125, "logps/rejected": -318.3969421386719, "loss": 0.2043, "rewards/accuracies": 1.0, "rewards/chosen": -0.8498345017433167, "rewards/margins": 2.8927087783813477, "rewards/rejected": -3.7425432205200195, "step": 4424 }, { "epoch": 0.51, "learning_rate": 1.4778552025510806e-07, "logits/chosen": -2.149286985397339, "logits/rejected": -2.4479012489318848, "logps/chosen": -375.099853515625, "logps/rejected": -217.349853515625, "loss": 0.7245, "rewards/accuracies": 0.75, "rewards/chosen": -1.0832116603851318, "rewards/margins": 1.632344126701355, "rewards/rejected": -2.7155556678771973, "step": 4425 }, { "epoch": 0.51, "learning_rate": 1.4775008857918978e-07, "logits/chosen": -2.6721880435943604, "logits/rejected": -2.7408366203308105, "logps/chosen": -339.01959228515625, "logps/rejected": -261.10089111328125, "loss": 0.3962, "rewards/accuracies": 0.875, "rewards/chosen": -0.6538711190223694, "rewards/margins": 1.8800548315048218, "rewards/rejected": -2.533926010131836, "step": 4426 }, { "epoch": 0.51, "learning_rate": 1.477146569032715e-07, "logits/chosen": -2.9296205043792725, "logits/rejected": -2.9541945457458496, "logps/chosen": -251.08499145507812, "logps/rejected": -216.32086181640625, "loss": 0.4743, "rewards/accuracies": 0.625, "rewards/chosen": -1.308314323425293, "rewards/margins": 1.8914165496826172, "rewards/rejected": -3.19973087310791, "step": 4427 }, { "epoch": 0.52, "learning_rate": 1.4767922522735326e-07, "logits/chosen": -2.1725010871887207, "logits/rejected": -2.2895779609680176, "logps/chosen": -275.2348327636719, "logps/rejected": -251.96725463867188, "loss": 0.2433, "rewards/accuracies": 1.0, "rewards/chosen": -1.5583562850952148, "rewards/margins": 2.2301840782165527, "rewards/rejected": -3.7885403633117676, "step": 4428 }, { "epoch": 0.52, "learning_rate": 1.4764379355143498e-07, "logits/chosen": -1.9671940803527832, "logits/rejected": -1.940125584602356, "logps/chosen": -391.9891357421875, "logps/rejected": -355.71856689453125, "loss": 0.501, "rewards/accuracies": 0.75, "rewards/chosen": -1.09218168258667, "rewards/margins": 1.0796053409576416, "rewards/rejected": -2.1717872619628906, "step": 4429 }, { "epoch": 0.52, "learning_rate": 1.476083618755167e-07, "logits/chosen": -2.0464701652526855, "logits/rejected": -1.9772510528564453, "logps/chosen": -318.3564147949219, "logps/rejected": -268.5296630859375, "loss": 0.434, "rewards/accuracies": 0.875, "rewards/chosen": -0.5642836689949036, "rewards/margins": 0.9671038389205933, "rewards/rejected": -1.531387448310852, "step": 4430 }, { "epoch": 0.52, "learning_rate": 1.4757293019959845e-07, "logits/chosen": -2.512221574783325, "logits/rejected": -2.619988441467285, "logps/chosen": -248.73089599609375, "logps/rejected": -254.08273315429688, "loss": 0.2285, "rewards/accuracies": 0.875, "rewards/chosen": -0.9480062127113342, "rewards/margins": 2.524616241455078, "rewards/rejected": -3.4726223945617676, "step": 4431 }, { "epoch": 0.52, "learning_rate": 1.4753749852368017e-07, "logits/chosen": -2.988757848739624, "logits/rejected": -3.057882070541382, "logps/chosen": -137.3933563232422, "logps/rejected": -209.21343994140625, "loss": 0.2188, "rewards/accuracies": 1.0, "rewards/chosen": -0.7389181852340698, "rewards/margins": 2.2509453296661377, "rewards/rejected": -2.989863634109497, "step": 4432 }, { "epoch": 0.52, "learning_rate": 1.475020668477619e-07, "logits/chosen": -2.1609950065612793, "logits/rejected": -2.303598165512085, "logps/chosen": -278.7253112792969, "logps/rejected": -179.31640625, "loss": 0.1657, "rewards/accuracies": 1.0, "rewards/chosen": -0.08576703071594238, "rewards/margins": 2.3501505851745605, "rewards/rejected": -2.435917615890503, "step": 4433 }, { "epoch": 0.52, "learning_rate": 1.4746663517184361e-07, "logits/chosen": -2.4267539978027344, "logits/rejected": -2.0118002891540527, "logps/chosen": -270.8675537109375, "logps/rejected": -290.1181945800781, "loss": 0.2195, "rewards/accuracies": 1.0, "rewards/chosen": -0.43265408277511597, "rewards/margins": 2.4803638458251953, "rewards/rejected": -2.913018226623535, "step": 4434 }, { "epoch": 0.52, "learning_rate": 1.4743120349592534e-07, "logits/chosen": -2.362781047821045, "logits/rejected": -2.33396577835083, "logps/chosen": -341.7763671875, "logps/rejected": -331.458740234375, "loss": 0.2306, "rewards/accuracies": 1.0, "rewards/chosen": -0.8991145491600037, "rewards/margins": 1.691605806350708, "rewards/rejected": -2.5907204151153564, "step": 4435 }, { "epoch": 0.52, "learning_rate": 1.4739577182000709e-07, "logits/chosen": -2.4533236026763916, "logits/rejected": -2.331653594970703, "logps/chosen": -338.546630859375, "logps/rejected": -323.87261962890625, "loss": 0.4362, "rewards/accuracies": 0.75, "rewards/chosen": -0.7489994764328003, "rewards/margins": 1.180591106414795, "rewards/rejected": -1.9295908212661743, "step": 4436 }, { "epoch": 0.52, "learning_rate": 1.473603401440888e-07, "logits/chosen": -2.4877233505249023, "logits/rejected": -2.321103096008301, "logps/chosen": -132.04811096191406, "logps/rejected": -275.34332275390625, "loss": 0.1804, "rewards/accuracies": 0.875, "rewards/chosen": -0.7433214783668518, "rewards/margins": 4.282045364379883, "rewards/rejected": -5.02536678314209, "step": 4437 }, { "epoch": 0.52, "learning_rate": 1.4732490846817053e-07, "logits/chosen": -1.9070383310317993, "logits/rejected": -2.264163017272949, "logps/chosen": -512.89208984375, "logps/rejected": -258.14520263671875, "loss": 0.4108, "rewards/accuracies": 0.875, "rewards/chosen": -0.8968722224235535, "rewards/margins": 1.2877230644226074, "rewards/rejected": -2.1845953464508057, "step": 4438 }, { "epoch": 0.52, "learning_rate": 1.4728947679225228e-07, "logits/chosen": -2.262068033218384, "logits/rejected": -2.207988739013672, "logps/chosen": -318.7604675292969, "logps/rejected": -328.91162109375, "loss": 0.3935, "rewards/accuracies": 0.875, "rewards/chosen": -0.6034828424453735, "rewards/margins": 3.05527400970459, "rewards/rejected": -3.658756732940674, "step": 4439 }, { "epoch": 0.52, "learning_rate": 1.47254045116334e-07, "logits/chosen": -2.025137424468994, "logits/rejected": -1.870969533920288, "logps/chosen": -315.0353698730469, "logps/rejected": -304.2069091796875, "loss": 0.4461, "rewards/accuracies": 0.75, "rewards/chosen": -1.385631799697876, "rewards/margins": 0.9987223744392395, "rewards/rejected": -2.3843541145324707, "step": 4440 }, { "epoch": 0.52, "learning_rate": 1.4721861344041572e-07, "logits/chosen": -2.0687079429626465, "logits/rejected": -2.2213711738586426, "logps/chosen": -292.3248291015625, "logps/rejected": -275.29608154296875, "loss": 1.4275, "rewards/accuracies": 0.375, "rewards/chosen": -1.931654930114746, "rewards/margins": -0.6592932939529419, "rewards/rejected": -1.2723616361618042, "step": 4441 }, { "epoch": 0.52, "learning_rate": 1.4718318176449747e-07, "logits/chosen": -1.7691656351089478, "logits/rejected": -2.208822727203369, "logps/chosen": -556.1945190429688, "logps/rejected": -302.4568786621094, "loss": 0.2766, "rewards/accuracies": 1.0, "rewards/chosen": -0.6165403127670288, "rewards/margins": 1.6032326221466064, "rewards/rejected": -2.2197728157043457, "step": 4442 }, { "epoch": 0.52, "learning_rate": 1.471477500885792e-07, "logits/chosen": -2.3519840240478516, "logits/rejected": -2.558540105819702, "logps/chosen": -389.3567810058594, "logps/rejected": -326.2566833496094, "loss": 0.3025, "rewards/accuracies": 1.0, "rewards/chosen": -1.0227785110473633, "rewards/margins": 1.8923583030700684, "rewards/rejected": -2.9151368141174316, "step": 4443 }, { "epoch": 0.52, "learning_rate": 1.4711231841266092e-07, "logits/chosen": -2.569638729095459, "logits/rejected": -2.523052215576172, "logps/chosen": -205.23114013671875, "logps/rejected": -353.7296142578125, "loss": 0.1027, "rewards/accuracies": 1.0, "rewards/chosen": -0.6614469885826111, "rewards/margins": 3.8840219974517822, "rewards/rejected": -4.545469284057617, "step": 4444 }, { "epoch": 0.52, "learning_rate": 1.4707688673674264e-07, "logits/chosen": -1.8673027753829956, "logits/rejected": -2.145742654800415, "logps/chosen": -298.67291259765625, "logps/rejected": -344.64013671875, "loss": 0.5412, "rewards/accuracies": 0.875, "rewards/chosen": -1.011865496635437, "rewards/margins": 3.21443510055542, "rewards/rejected": -4.2263007164001465, "step": 4445 }, { "epoch": 0.52, "learning_rate": 1.4704145506082436e-07, "logits/chosen": -2.2536473274230957, "logits/rejected": -2.128931760787964, "logps/chosen": -321.917236328125, "logps/rejected": -322.8045654296875, "loss": 0.4554, "rewards/accuracies": 0.875, "rewards/chosen": -1.0958231687545776, "rewards/margins": 1.1885547637939453, "rewards/rejected": -2.2843778133392334, "step": 4446 }, { "epoch": 0.52, "learning_rate": 1.4700602338490608e-07, "logits/chosen": -2.766233205795288, "logits/rejected": -2.8095147609710693, "logps/chosen": -245.27239990234375, "logps/rejected": -294.1472473144531, "loss": 0.2607, "rewards/accuracies": 0.875, "rewards/chosen": -0.7898604869842529, "rewards/margins": 2.7628350257873535, "rewards/rejected": -3.5526952743530273, "step": 4447 }, { "epoch": 0.52, "learning_rate": 1.4697059170898783e-07, "logits/chosen": -2.4857375621795654, "logits/rejected": -2.342219591140747, "logps/chosen": -318.2737121582031, "logps/rejected": -278.203857421875, "loss": 0.1923, "rewards/accuracies": 0.875, "rewards/chosen": -0.6695927381515503, "rewards/margins": 3.376140594482422, "rewards/rejected": -4.045733451843262, "step": 4448 }, { "epoch": 0.52, "learning_rate": 1.4693516003306955e-07, "logits/chosen": -2.0723586082458496, "logits/rejected": -2.108640432357788, "logps/chosen": -201.29833984375, "logps/rejected": -223.54495239257812, "loss": 0.3859, "rewards/accuracies": 0.875, "rewards/chosen": -1.5666395425796509, "rewards/margins": 2.223055839538574, "rewards/rejected": -3.7896952629089355, "step": 4449 }, { "epoch": 0.52, "learning_rate": 1.4689972835715127e-07, "logits/chosen": -2.2349987030029297, "logits/rejected": -2.4069197177886963, "logps/chosen": -450.22802734375, "logps/rejected": -362.393310546875, "loss": 0.5329, "rewards/accuracies": 0.75, "rewards/chosen": -1.1170331239700317, "rewards/margins": 2.380876064300537, "rewards/rejected": -3.4979093074798584, "step": 4450 }, { "epoch": 0.52, "learning_rate": 1.4686429668123302e-07, "logits/chosen": -2.5749642848968506, "logits/rejected": -2.612610101699829, "logps/chosen": -285.2718200683594, "logps/rejected": -244.83592224121094, "loss": 0.3427, "rewards/accuracies": 0.875, "rewards/chosen": -1.0477691888809204, "rewards/margins": 2.3544816970825195, "rewards/rejected": -3.4022510051727295, "step": 4451 }, { "epoch": 0.52, "learning_rate": 1.4682886500531475e-07, "logits/chosen": -2.2493345737457275, "logits/rejected": -2.1813173294067383, "logps/chosen": -245.28187561035156, "logps/rejected": -329.4098205566406, "loss": 0.4165, "rewards/accuracies": 0.875, "rewards/chosen": -0.3334898352622986, "rewards/margins": 2.6428403854370117, "rewards/rejected": -2.976330041885376, "step": 4452 }, { "epoch": 0.52, "learning_rate": 1.4679343332939647e-07, "logits/chosen": -2.0255753993988037, "logits/rejected": -2.0662360191345215, "logps/chosen": -304.1136169433594, "logps/rejected": -213.90328979492188, "loss": 0.5954, "rewards/accuracies": 0.625, "rewards/chosen": -1.030884027481079, "rewards/margins": 1.6108708381652832, "rewards/rejected": -2.6417548656463623, "step": 4453 }, { "epoch": 0.52, "learning_rate": 1.4675800165347822e-07, "logits/chosen": -2.625333309173584, "logits/rejected": -2.5561375617980957, "logps/chosen": -99.22433471679688, "logps/rejected": -215.77012634277344, "loss": 0.4811, "rewards/accuracies": 0.75, "rewards/chosen": -0.8753434419631958, "rewards/margins": 2.069636821746826, "rewards/rejected": -2.9449803829193115, "step": 4454 }, { "epoch": 0.52, "learning_rate": 1.4672256997755994e-07, "logits/chosen": -2.2792840003967285, "logits/rejected": -2.1142284870147705, "logps/chosen": -287.46429443359375, "logps/rejected": -327.88873291015625, "loss": 0.3179, "rewards/accuracies": 0.875, "rewards/chosen": -1.4376065731048584, "rewards/margins": 2.2809529304504395, "rewards/rejected": -3.718559741973877, "step": 4455 }, { "epoch": 0.52, "learning_rate": 1.4668713830164166e-07, "logits/chosen": -2.701112985610962, "logits/rejected": -2.6729681491851807, "logps/chosen": -222.4083709716797, "logps/rejected": -388.5052185058594, "loss": 0.295, "rewards/accuracies": 0.75, "rewards/chosen": -0.7814595699310303, "rewards/margins": 3.6106553077697754, "rewards/rejected": -4.392114639282227, "step": 4456 }, { "epoch": 0.52, "learning_rate": 1.4665170662572338e-07, "logits/chosen": -2.605877161026001, "logits/rejected": -2.68992280960083, "logps/chosen": -307.8085021972656, "logps/rejected": -307.5794677734375, "loss": 0.253, "rewards/accuracies": 0.875, "rewards/chosen": -0.3876197338104248, "rewards/margins": 5.3514580726623535, "rewards/rejected": -5.739077568054199, "step": 4457 }, { "epoch": 0.52, "learning_rate": 1.466162749498051e-07, "logits/chosen": -2.809152841567993, "logits/rejected": -2.6513051986694336, "logps/chosen": -197.27479553222656, "logps/rejected": -157.20521545410156, "loss": 0.1316, "rewards/accuracies": 1.0, "rewards/chosen": -0.18291398882865906, "rewards/margins": 2.7612855434417725, "rewards/rejected": -2.944199323654175, "step": 4458 }, { "epoch": 0.52, "learning_rate": 1.4658084327388685e-07, "logits/chosen": -2.5619490146636963, "logits/rejected": -2.363804340362549, "logps/chosen": -190.09835815429688, "logps/rejected": -348.0328063964844, "loss": 0.3735, "rewards/accuracies": 0.75, "rewards/chosen": -0.13806763291358948, "rewards/margins": 2.8524105548858643, "rewards/rejected": -2.990478038787842, "step": 4459 }, { "epoch": 0.52, "learning_rate": 1.4654541159796858e-07, "logits/chosen": -2.495720624923706, "logits/rejected": -2.3665590286254883, "logps/chosen": -138.03118896484375, "logps/rejected": -193.0414276123047, "loss": 0.4064, "rewards/accuracies": 0.75, "rewards/chosen": -1.4779880046844482, "rewards/margins": 2.1500084400177, "rewards/rejected": -3.6279964447021484, "step": 4460 }, { "epoch": 0.52, "learning_rate": 1.465099799220503e-07, "logits/chosen": -2.2872748374938965, "logits/rejected": -2.305588483810425, "logps/chosen": -250.34957885742188, "logps/rejected": -186.43698120117188, "loss": 0.5969, "rewards/accuracies": 0.625, "rewards/chosen": -1.0724120140075684, "rewards/margins": 0.6916897296905518, "rewards/rejected": -1.7641019821166992, "step": 4461 }, { "epoch": 0.52, "learning_rate": 1.4647454824613205e-07, "logits/chosen": -1.1987195014953613, "logits/rejected": -2.02854061126709, "logps/chosen": -445.2873229980469, "logps/rejected": -198.29734802246094, "loss": 0.5596, "rewards/accuracies": 0.75, "rewards/chosen": -0.6525984406471252, "rewards/margins": 1.8891656398773193, "rewards/rejected": -2.541764259338379, "step": 4462 }, { "epoch": 0.52, "learning_rate": 1.4643911657021377e-07, "logits/chosen": -2.0466513633728027, "logits/rejected": -1.8301678895950317, "logps/chosen": -178.20452880859375, "logps/rejected": -253.1067352294922, "loss": 0.3468, "rewards/accuracies": 0.875, "rewards/chosen": -1.0872677564620972, "rewards/margins": 1.8799058198928833, "rewards/rejected": -2.9671733379364014, "step": 4463 }, { "epoch": 0.52, "learning_rate": 1.464036848942955e-07, "logits/chosen": -2.5822713375091553, "logits/rejected": -2.498356819152832, "logps/chosen": -208.46078491210938, "logps/rejected": -286.2212829589844, "loss": 0.4677, "rewards/accuracies": 0.625, "rewards/chosen": -1.3653315305709839, "rewards/margins": 1.6777690649032593, "rewards/rejected": -3.043100357055664, "step": 4464 }, { "epoch": 0.52, "learning_rate": 1.4636825321837724e-07, "logits/chosen": -2.2921411991119385, "logits/rejected": -2.2969882488250732, "logps/chosen": -188.29824829101562, "logps/rejected": -204.5272216796875, "loss": 0.3132, "rewards/accuracies": 0.875, "rewards/chosen": -1.3106204271316528, "rewards/margins": 1.6569228172302246, "rewards/rejected": -2.967543601989746, "step": 4465 }, { "epoch": 0.52, "learning_rate": 1.4633282154245896e-07, "logits/chosen": -1.8771297931671143, "logits/rejected": -2.386168956756592, "logps/chosen": -513.5070190429688, "logps/rejected": -304.9075927734375, "loss": 0.1891, "rewards/accuracies": 1.0, "rewards/chosen": -0.9737687110900879, "rewards/margins": 2.774693012237549, "rewards/rejected": -3.7484617233276367, "step": 4466 }, { "epoch": 0.52, "learning_rate": 1.4629738986654068e-07, "logits/chosen": -2.164501667022705, "logits/rejected": -2.154564380645752, "logps/chosen": -377.2845458984375, "logps/rejected": -317.6806335449219, "loss": 0.291, "rewards/accuracies": 0.75, "rewards/chosen": -0.7112680673599243, "rewards/margins": 3.057116746902466, "rewards/rejected": -3.7683846950531006, "step": 4467 }, { "epoch": 0.52, "learning_rate": 1.462619581906224e-07, "logits/chosen": -2.7060515880584717, "logits/rejected": -2.898784875869751, "logps/chosen": -225.28872680664062, "logps/rejected": -237.53672790527344, "loss": 0.3125, "rewards/accuracies": 0.875, "rewards/chosen": -1.2169175148010254, "rewards/margins": 1.9486417770385742, "rewards/rejected": -3.1655595302581787, "step": 4468 }, { "epoch": 0.52, "learning_rate": 1.4622652651470413e-07, "logits/chosen": -2.081125259399414, "logits/rejected": -2.3295023441314697, "logps/chosen": -90.39495849609375, "logps/rejected": -78.12962341308594, "loss": 0.3544, "rewards/accuracies": 0.75, "rewards/chosen": -0.5589986443519592, "rewards/margins": 1.2361222505569458, "rewards/rejected": -1.7951208353042603, "step": 4469 }, { "epoch": 0.52, "learning_rate": 1.4619109483878585e-07, "logits/chosen": -2.8612313270568848, "logits/rejected": -2.902712821960449, "logps/chosen": -129.88836669921875, "logps/rejected": -233.85845947265625, "loss": 0.335, "rewards/accuracies": 0.875, "rewards/chosen": -1.039818525314331, "rewards/margins": 3.191118001937866, "rewards/rejected": -4.230936527252197, "step": 4470 }, { "epoch": 0.52, "learning_rate": 1.461556631628676e-07, "logits/chosen": -1.7133870124816895, "logits/rejected": -2.015822410583496, "logps/chosen": -348.4404602050781, "logps/rejected": -243.8916015625, "loss": 0.7743, "rewards/accuracies": 0.75, "rewards/chosen": -1.3400354385375977, "rewards/margins": 1.526307225227356, "rewards/rejected": -2.866342544555664, "step": 4471 }, { "epoch": 0.52, "learning_rate": 1.4612023148694932e-07, "logits/chosen": -2.9060301780700684, "logits/rejected": -2.969766616821289, "logps/chosen": -90.10565185546875, "logps/rejected": -144.2926025390625, "loss": 0.5102, "rewards/accuracies": 0.625, "rewards/chosen": -0.7778643369674683, "rewards/margins": 1.5235484838485718, "rewards/rejected": -2.30141282081604, "step": 4472 }, { "epoch": 0.52, "learning_rate": 1.4608479981103107e-07, "logits/chosen": -2.423031806945801, "logits/rejected": -2.318917989730835, "logps/chosen": -319.1260986328125, "logps/rejected": -259.3294372558594, "loss": 0.3037, "rewards/accuracies": 0.875, "rewards/chosen": -0.7504431009292603, "rewards/margins": 2.212116241455078, "rewards/rejected": -2.962559223175049, "step": 4473 }, { "epoch": 0.52, "learning_rate": 1.460493681351128e-07, "logits/chosen": -2.179124355316162, "logits/rejected": -2.1011312007904053, "logps/chosen": -242.96009826660156, "logps/rejected": -248.56988525390625, "loss": 0.7599, "rewards/accuracies": 0.875, "rewards/chosen": -1.0776407718658447, "rewards/margins": 2.032346248626709, "rewards/rejected": -3.1099870204925537, "step": 4474 }, { "epoch": 0.52, "learning_rate": 1.460139364591945e-07, "logits/chosen": -2.8879988193511963, "logits/rejected": -2.660626173019409, "logps/chosen": -208.4625244140625, "logps/rejected": -293.02532958984375, "loss": 0.2156, "rewards/accuracies": 1.0, "rewards/chosen": -0.5916134715080261, "rewards/margins": 2.238893985748291, "rewards/rejected": -2.830507278442383, "step": 4475 }, { "epoch": 0.52, "learning_rate": 1.4597850478327624e-07, "logits/chosen": -1.5811975002288818, "logits/rejected": -1.9255549907684326, "logps/chosen": -517.3548583984375, "logps/rejected": -401.31170654296875, "loss": 0.6448, "rewards/accuracies": 0.625, "rewards/chosen": -0.6135041117668152, "rewards/margins": 0.8250095248222351, "rewards/rejected": -1.4385137557983398, "step": 4476 }, { "epoch": 0.52, "learning_rate": 1.4594307310735798e-07, "logits/chosen": -1.8996312618255615, "logits/rejected": -1.7134811878204346, "logps/chosen": -181.67945861816406, "logps/rejected": -250.56008911132812, "loss": 0.3931, "rewards/accuracies": 0.75, "rewards/chosen": -0.5300594568252563, "rewards/margins": 1.6830741167068481, "rewards/rejected": -2.2131335735321045, "step": 4477 }, { "epoch": 0.52, "learning_rate": 1.459076414314397e-07, "logits/chosen": -1.8532767295837402, "logits/rejected": -1.7179992198944092, "logps/chosen": -219.01376342773438, "logps/rejected": -284.02264404296875, "loss": 0.333, "rewards/accuracies": 0.75, "rewards/chosen": -0.8556686639785767, "rewards/margins": 1.981954574584961, "rewards/rejected": -2.837623119354248, "step": 4478 }, { "epoch": 0.52, "learning_rate": 1.4587220975552143e-07, "logits/chosen": -2.4246692657470703, "logits/rejected": -2.4030208587646484, "logps/chosen": -160.03680419921875, "logps/rejected": -246.8701629638672, "loss": 0.1299, "rewards/accuracies": 1.0, "rewards/chosen": -0.25532275438308716, "rewards/margins": 2.757906675338745, "rewards/rejected": -3.0132293701171875, "step": 4479 }, { "epoch": 0.52, "learning_rate": 1.4583677807960315e-07, "logits/chosen": -1.6362032890319824, "logits/rejected": -1.732161521911621, "logps/chosen": -249.58319091796875, "logps/rejected": -300.4599304199219, "loss": 0.2851, "rewards/accuracies": 1.0, "rewards/chosen": -0.7609021663665771, "rewards/margins": 1.6288145780563354, "rewards/rejected": -2.389716625213623, "step": 4480 }, { "epoch": 0.52, "learning_rate": 1.4580134640368487e-07, "logits/chosen": -2.402101516723633, "logits/rejected": -2.631615400314331, "logps/chosen": -481.0220947265625, "logps/rejected": -290.1775207519531, "loss": 0.4889, "rewards/accuracies": 0.875, "rewards/chosen": -1.410093069076538, "rewards/margins": 1.709699273109436, "rewards/rejected": -3.1197924613952637, "step": 4481 }, { "epoch": 0.52, "learning_rate": 1.4576591472776662e-07, "logits/chosen": -2.4820668697357178, "logits/rejected": -2.071434259414673, "logps/chosen": -81.98477935791016, "logps/rejected": -273.60540771484375, "loss": 0.0979, "rewards/accuracies": 1.0, "rewards/chosen": 0.019898168742656708, "rewards/margins": 2.942033290863037, "rewards/rejected": -2.922135353088379, "step": 4482 }, { "epoch": 0.52, "learning_rate": 1.4573048305184834e-07, "logits/chosen": -2.4995055198669434, "logits/rejected": -2.6168086528778076, "logps/chosen": -285.3377990722656, "logps/rejected": -166.1121368408203, "loss": 0.5506, "rewards/accuracies": 0.625, "rewards/chosen": -0.9135602712631226, "rewards/margins": 1.1943471431732178, "rewards/rejected": -2.10790753364563, "step": 4483 }, { "epoch": 0.52, "learning_rate": 1.4569505137593007e-07, "logits/chosen": -2.2843873500823975, "logits/rejected": -2.759885787963867, "logps/chosen": -331.755615234375, "logps/rejected": -216.98208618164062, "loss": 0.8224, "rewards/accuracies": 0.5, "rewards/chosen": -1.3159453868865967, "rewards/margins": 0.15904884040355682, "rewards/rejected": -1.4749943017959595, "step": 4484 }, { "epoch": 0.52, "learning_rate": 1.4565961970001181e-07, "logits/chosen": -2.084843397140503, "logits/rejected": -2.456906318664551, "logps/chosen": -267.0753173828125, "logps/rejected": -247.46316528320312, "loss": 0.4102, "rewards/accuracies": 0.75, "rewards/chosen": -0.7693420052528381, "rewards/margins": 1.8761694431304932, "rewards/rejected": -2.6455113887786865, "step": 4485 }, { "epoch": 0.52, "learning_rate": 1.4562418802409354e-07, "logits/chosen": -2.6055572032928467, "logits/rejected": -2.7297492027282715, "logps/chosen": -351.4302673339844, "logps/rejected": -348.66851806640625, "loss": 0.3002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4020070433616638, "rewards/margins": 1.6331117153167725, "rewards/rejected": -2.035118818283081, "step": 4486 }, { "epoch": 0.52, "learning_rate": 1.4558875634817526e-07, "logits/chosen": -2.4523234367370605, "logits/rejected": -2.682985782623291, "logps/chosen": -685.8344116210938, "logps/rejected": -258.84661865234375, "loss": 0.4713, "rewards/accuracies": 0.75, "rewards/chosen": -1.0630455017089844, "rewards/margins": 2.051617383956909, "rewards/rejected": -3.1146628856658936, "step": 4487 }, { "epoch": 0.52, "learning_rate": 1.4555332467225698e-07, "logits/chosen": -2.6613357067108154, "logits/rejected": -2.6569604873657227, "logps/chosen": -301.30035400390625, "logps/rejected": -367.19561767578125, "loss": 0.3322, "rewards/accuracies": 0.875, "rewards/chosen": -0.6537983417510986, "rewards/margins": 2.7344160079956055, "rewards/rejected": -3.388214111328125, "step": 4488 }, { "epoch": 0.52, "learning_rate": 1.4551789299633873e-07, "logits/chosen": -1.8501089811325073, "logits/rejected": -2.242534637451172, "logps/chosen": -236.48529052734375, "logps/rejected": -188.29867553710938, "loss": 0.1387, "rewards/accuracies": 1.0, "rewards/chosen": -0.2772791087627411, "rewards/margins": 2.63201642036438, "rewards/rejected": -2.9092957973480225, "step": 4489 }, { "epoch": 0.52, "learning_rate": 1.4548246132042045e-07, "logits/chosen": -1.9406241178512573, "logits/rejected": -1.9414557218551636, "logps/chosen": -154.046630859375, "logps/rejected": -223.7883758544922, "loss": 0.6037, "rewards/accuracies": 0.75, "rewards/chosen": -1.2056920528411865, "rewards/margins": 0.9932038187980652, "rewards/rejected": -2.1988959312438965, "step": 4490 }, { "epoch": 0.52, "learning_rate": 1.4544702964450217e-07, "logits/chosen": -2.4831719398498535, "logits/rejected": -2.5373644828796387, "logps/chosen": -288.54156494140625, "logps/rejected": -162.93429565429688, "loss": 0.2924, "rewards/accuracies": 1.0, "rewards/chosen": -1.3155889511108398, "rewards/margins": 2.4275221824645996, "rewards/rejected": -3.7431111335754395, "step": 4491 }, { "epoch": 0.52, "learning_rate": 1.454115979685839e-07, "logits/chosen": -2.6326756477355957, "logits/rejected": -2.6019070148468018, "logps/chosen": -322.994873046875, "logps/rejected": -321.88037109375, "loss": 0.6734, "rewards/accuracies": 0.75, "rewards/chosen": -0.8376266360282898, "rewards/margins": 1.9719176292419434, "rewards/rejected": -2.809544324874878, "step": 4492 }, { "epoch": 0.52, "learning_rate": 1.4537616629266564e-07, "logits/chosen": -2.6733832359313965, "logits/rejected": -2.63567852973938, "logps/chosen": -350.3821105957031, "logps/rejected": -275.8485107421875, "loss": 0.4188, "rewards/accuracies": 0.875, "rewards/chosen": -1.144783616065979, "rewards/margins": 1.749941110610962, "rewards/rejected": -2.8947248458862305, "step": 4493 }, { "epoch": 0.52, "learning_rate": 1.4534073461674737e-07, "logits/chosen": -2.6723110675811768, "logits/rejected": -2.6978647708892822, "logps/chosen": -277.2783203125, "logps/rejected": -321.64306640625, "loss": 0.1699, "rewards/accuracies": 0.875, "rewards/chosen": -0.7720358371734619, "rewards/margins": 4.211016654968262, "rewards/rejected": -4.9830522537231445, "step": 4494 }, { "epoch": 0.52, "learning_rate": 1.453053029408291e-07, "logits/chosen": -2.287447452545166, "logits/rejected": -1.9264509677886963, "logps/chosen": -255.82891845703125, "logps/rejected": -374.95501708984375, "loss": 0.3699, "rewards/accuracies": 0.75, "rewards/chosen": -0.4010649025440216, "rewards/margins": 2.2390036582946777, "rewards/rejected": -2.640068531036377, "step": 4495 }, { "epoch": 0.52, "learning_rate": 1.4526987126491084e-07, "logits/chosen": -2.0508456230163574, "logits/rejected": -2.5995490550994873, "logps/chosen": -473.290771484375, "logps/rejected": -195.6824951171875, "loss": 0.3038, "rewards/accuracies": 0.875, "rewards/chosen": -0.29383036494255066, "rewards/margins": 1.712847113609314, "rewards/rejected": -2.0066773891448975, "step": 4496 }, { "epoch": 0.52, "learning_rate": 1.4523443958899256e-07, "logits/chosen": -1.8114819526672363, "logits/rejected": -1.8963158130645752, "logps/chosen": -249.40232849121094, "logps/rejected": -203.25222778320312, "loss": 0.7529, "rewards/accuracies": 0.625, "rewards/chosen": -0.933061957359314, "rewards/margins": 0.19214923679828644, "rewards/rejected": -1.1252111196517944, "step": 4497 }, { "epoch": 0.52, "learning_rate": 1.4519900791307428e-07, "logits/chosen": -2.3821778297424316, "logits/rejected": -2.6239259243011475, "logps/chosen": -479.08837890625, "logps/rejected": -271.6213073730469, "loss": 0.7108, "rewards/accuracies": 0.875, "rewards/chosen": -1.279365062713623, "rewards/margins": 1.2666561603546143, "rewards/rejected": -2.5460212230682373, "step": 4498 }, { "epoch": 0.52, "learning_rate": 1.45163576237156e-07, "logits/chosen": -3.1678967475891113, "logits/rejected": -2.9324545860290527, "logps/chosen": -244.92601013183594, "logps/rejected": -169.06808471679688, "loss": 0.2878, "rewards/accuracies": 0.75, "rewards/chosen": -1.077366828918457, "rewards/margins": 1.9760448932647705, "rewards/rejected": -3.0534117221832275, "step": 4499 }, { "epoch": 0.52, "learning_rate": 1.4512814456123775e-07, "logits/chosen": -2.3210928440093994, "logits/rejected": -1.9703279733657837, "logps/chosen": -236.15118408203125, "logps/rejected": -333.5020751953125, "loss": 0.1707, "rewards/accuracies": 0.875, "rewards/chosen": -0.6681629419326782, "rewards/margins": 3.728099822998047, "rewards/rejected": -4.396262168884277, "step": 4500 }, { "epoch": 0.52, "learning_rate": 1.4509271288531947e-07, "logits/chosen": -2.6626482009887695, "logits/rejected": -2.9056429862976074, "logps/chosen": -328.17657470703125, "logps/rejected": -297.45220947265625, "loss": 0.6198, "rewards/accuracies": 0.75, "rewards/chosen": -1.637305498123169, "rewards/margins": 2.253784418106079, "rewards/rejected": -3.891089916229248, "step": 4501 }, { "epoch": 0.52, "learning_rate": 1.450572812094012e-07, "logits/chosen": -2.532075881958008, "logits/rejected": -2.2468795776367188, "logps/chosen": -230.837646484375, "logps/rejected": -175.48361206054688, "loss": 0.5205, "rewards/accuracies": 0.625, "rewards/chosen": -0.9369468092918396, "rewards/margins": 0.6457905769348145, "rewards/rejected": -1.5827374458312988, "step": 4502 }, { "epoch": 0.52, "learning_rate": 1.4502184953348292e-07, "logits/chosen": -2.6798038482666016, "logits/rejected": -2.317324638366699, "logps/chosen": -198.99826049804688, "logps/rejected": -425.55682373046875, "loss": 0.3911, "rewards/accuracies": 0.75, "rewards/chosen": -0.397950679063797, "rewards/margins": 1.8767015933990479, "rewards/rejected": -2.2746522426605225, "step": 4503 }, { "epoch": 0.52, "learning_rate": 1.4498641785756464e-07, "logits/chosen": -2.2641334533691406, "logits/rejected": -2.2100231647491455, "logps/chosen": -285.7616271972656, "logps/rejected": -288.3416748046875, "loss": 0.3013, "rewards/accuracies": 0.875, "rewards/chosen": -0.9303148984909058, "rewards/margins": 1.9492205381393433, "rewards/rejected": -2.879535436630249, "step": 4504 }, { "epoch": 0.52, "learning_rate": 1.449509861816464e-07, "logits/chosen": -2.534092426300049, "logits/rejected": -2.621316432952881, "logps/chosen": -415.91070556640625, "logps/rejected": -248.5422821044922, "loss": 0.6313, "rewards/accuracies": 0.625, "rewards/chosen": -1.2406803369522095, "rewards/margins": 1.3138843774795532, "rewards/rejected": -2.554564952850342, "step": 4505 }, { "epoch": 0.52, "learning_rate": 1.449155545057281e-07, "logits/chosen": -2.1466965675354004, "logits/rejected": -2.2590575218200684, "logps/chosen": -377.6200866699219, "logps/rejected": -338.7649841308594, "loss": 0.1773, "rewards/accuracies": 1.0, "rewards/chosen": -0.29249611496925354, "rewards/margins": 3.055487632751465, "rewards/rejected": -3.3479838371276855, "step": 4506 }, { "epoch": 0.52, "learning_rate": 1.4488012282980986e-07, "logits/chosen": -1.8711618185043335, "logits/rejected": -2.2112176418304443, "logps/chosen": -424.2223815917969, "logps/rejected": -454.1842346191406, "loss": 0.3042, "rewards/accuracies": 0.875, "rewards/chosen": -0.15294623374938965, "rewards/margins": 1.6848701238632202, "rewards/rejected": -1.8378162384033203, "step": 4507 }, { "epoch": 0.52, "learning_rate": 1.4484469115389158e-07, "logits/chosen": -1.8297598361968994, "logits/rejected": -2.1137871742248535, "logps/chosen": -552.373291015625, "logps/rejected": -322.032958984375, "loss": 0.1635, "rewards/accuracies": 1.0, "rewards/chosen": -0.377629816532135, "rewards/margins": 2.665693759918213, "rewards/rejected": -3.043323516845703, "step": 4508 }, { "epoch": 0.52, "learning_rate": 1.448092594779733e-07, "logits/chosen": -2.17305326461792, "logits/rejected": -2.3085408210754395, "logps/chosen": -215.98678588867188, "logps/rejected": -167.53207397460938, "loss": 0.6224, "rewards/accuracies": 0.75, "rewards/chosen": -0.7037274241447449, "rewards/margins": 0.3869680166244507, "rewards/rejected": -1.0906953811645508, "step": 4509 }, { "epoch": 0.52, "learning_rate": 1.4477382780205503e-07, "logits/chosen": -2.492035388946533, "logits/rejected": -2.3766770362854004, "logps/chosen": -96.54740905761719, "logps/rejected": -123.30646514892578, "loss": 0.4338, "rewards/accuracies": 0.875, "rewards/chosen": -1.1493990421295166, "rewards/margins": 1.530091404914856, "rewards/rejected": -2.679490566253662, "step": 4510 }, { "epoch": 0.52, "learning_rate": 1.4473839612613675e-07, "logits/chosen": -2.7520909309387207, "logits/rejected": -2.6018526554107666, "logps/chosen": -228.30950927734375, "logps/rejected": -279.6526794433594, "loss": 0.6753, "rewards/accuracies": 0.625, "rewards/chosen": -1.4812743663787842, "rewards/margins": 1.1138497591018677, "rewards/rejected": -2.5951240062713623, "step": 4511 }, { "epoch": 0.52, "learning_rate": 1.447029644502185e-07, "logits/chosen": -2.7976064682006836, "logits/rejected": -2.594771385192871, "logps/chosen": -276.5714111328125, "logps/rejected": -221.60467529296875, "loss": 0.2525, "rewards/accuracies": 0.875, "rewards/chosen": -0.21855135262012482, "rewards/margins": 3.214864730834961, "rewards/rejected": -3.433415651321411, "step": 4512 }, { "epoch": 0.52, "learning_rate": 1.4466753277430022e-07, "logits/chosen": -2.695167064666748, "logits/rejected": -2.552050828933716, "logps/chosen": -127.60615539550781, "logps/rejected": -241.47325134277344, "loss": 0.2782, "rewards/accuracies": 0.875, "rewards/chosen": -0.7626305818557739, "rewards/margins": 2.4334850311279297, "rewards/rejected": -3.196115732192993, "step": 4513 }, { "epoch": 0.53, "learning_rate": 1.4463210109838194e-07, "logits/chosen": -2.5713653564453125, "logits/rejected": -2.457108736038208, "logps/chosen": -234.27789306640625, "logps/rejected": -367.5045471191406, "loss": 0.2575, "rewards/accuracies": 0.875, "rewards/chosen": -1.4797108173370361, "rewards/margins": 2.2798445224761963, "rewards/rejected": -3.7595553398132324, "step": 4514 }, { "epoch": 0.53, "learning_rate": 1.4459666942246366e-07, "logits/chosen": -2.323906898498535, "logits/rejected": -2.237969160079956, "logps/chosen": -218.26930236816406, "logps/rejected": -295.8759460449219, "loss": 0.2498, "rewards/accuracies": 0.875, "rewards/chosen": -0.545875608921051, "rewards/margins": 2.273963689804077, "rewards/rejected": -2.8198394775390625, "step": 4515 }, { "epoch": 0.53, "learning_rate": 1.445612377465454e-07, "logits/chosen": -2.5516295433044434, "logits/rejected": -2.4767520427703857, "logps/chosen": -217.02427673339844, "logps/rejected": -250.64633178710938, "loss": 0.5553, "rewards/accuracies": 0.625, "rewards/chosen": -1.0402499437332153, "rewards/margins": 2.0265557765960693, "rewards/rejected": -3.066805839538574, "step": 4516 }, { "epoch": 0.53, "learning_rate": 1.4452580607062713e-07, "logits/chosen": -2.4409875869750977, "logits/rejected": -2.286607503890991, "logps/chosen": -223.46481323242188, "logps/rejected": -322.7868347167969, "loss": 0.1258, "rewards/accuracies": 1.0, "rewards/chosen": -0.7761311531066895, "rewards/margins": 3.540062189102173, "rewards/rejected": -4.316193580627441, "step": 4517 }, { "epoch": 0.53, "learning_rate": 1.4449037439470888e-07, "logits/chosen": -2.1901299953460693, "logits/rejected": -2.258115768432617, "logps/chosen": -190.00509643554688, "logps/rejected": -183.37911987304688, "loss": 0.478, "rewards/accuracies": 0.875, "rewards/chosen": -0.8091549277305603, "rewards/margins": 1.242027759552002, "rewards/rejected": -2.051182508468628, "step": 4518 }, { "epoch": 0.53, "learning_rate": 1.444549427187906e-07, "logits/chosen": -2.595181465148926, "logits/rejected": -2.555992603302002, "logps/chosen": -112.34892272949219, "logps/rejected": -113.49689483642578, "loss": 0.3377, "rewards/accuracies": 0.875, "rewards/chosen": -0.5503670573234558, "rewards/margins": 1.5663617849349976, "rewards/rejected": -2.1167287826538086, "step": 4519 }, { "epoch": 0.53, "learning_rate": 1.4441951104287233e-07, "logits/chosen": -1.6047509908676147, "logits/rejected": -2.100574016571045, "logps/chosen": -320.12371826171875, "logps/rejected": -213.81773376464844, "loss": 0.5428, "rewards/accuracies": 0.75, "rewards/chosen": -0.6118432283401489, "rewards/margins": 1.4327383041381836, "rewards/rejected": -2.044581651687622, "step": 4520 }, { "epoch": 0.53, "learning_rate": 1.4438407936695405e-07, "logits/chosen": -1.3728859424591064, "logits/rejected": -1.734853982925415, "logps/chosen": -554.4150390625, "logps/rejected": -481.1383056640625, "loss": 0.421, "rewards/accuracies": 0.75, "rewards/chosen": -0.31647610664367676, "rewards/margins": 1.1784223318099976, "rewards/rejected": -1.4948984384536743, "step": 4521 }, { "epoch": 0.53, "learning_rate": 1.4434864769103577e-07, "logits/chosen": -2.1794750690460205, "logits/rejected": -2.0429024696350098, "logps/chosen": -391.2835998535156, "logps/rejected": -379.21368408203125, "loss": 1.075, "rewards/accuracies": 0.5, "rewards/chosen": -1.484217643737793, "rewards/margins": 0.5887270569801331, "rewards/rejected": -2.0729446411132812, "step": 4522 }, { "epoch": 0.53, "learning_rate": 1.443132160151175e-07, "logits/chosen": -1.181962013244629, "logits/rejected": -1.734419822692871, "logps/chosen": -562.5978393554688, "logps/rejected": -291.0789489746094, "loss": 0.5154, "rewards/accuracies": 0.75, "rewards/chosen": -0.6563250422477722, "rewards/margins": 1.4066563844680786, "rewards/rejected": -2.062981367111206, "step": 4523 }, { "epoch": 0.53, "learning_rate": 1.4427778433919924e-07, "logits/chosen": -2.309689521789551, "logits/rejected": -2.1552557945251465, "logps/chosen": -203.62754821777344, "logps/rejected": -229.5712127685547, "loss": 0.2164, "rewards/accuracies": 0.875, "rewards/chosen": 0.08582200109958649, "rewards/margins": 2.134716272354126, "rewards/rejected": -2.048894166946411, "step": 4524 }, { "epoch": 0.53, "learning_rate": 1.4424235266328096e-07, "logits/chosen": -2.534682512283325, "logits/rejected": -2.4873056411743164, "logps/chosen": -122.90055847167969, "logps/rejected": -207.57847595214844, "loss": 0.5359, "rewards/accuracies": 0.75, "rewards/chosen": -1.1138291358947754, "rewards/margins": 1.6452456712722778, "rewards/rejected": -2.7590749263763428, "step": 4525 }, { "epoch": 0.53, "learning_rate": 1.4420692098736269e-07, "logits/chosen": -2.8869314193725586, "logits/rejected": -3.0136208534240723, "logps/chosen": -184.37977600097656, "logps/rejected": -291.44024658203125, "loss": 0.5312, "rewards/accuracies": 0.75, "rewards/chosen": -1.41671884059906, "rewards/margins": 2.3138339519500732, "rewards/rejected": -3.7305526733398438, "step": 4526 }, { "epoch": 0.53, "learning_rate": 1.4417148931144443e-07, "logits/chosen": -2.4688076972961426, "logits/rejected": -2.436426877975464, "logps/chosen": -101.77072143554688, "logps/rejected": -163.4958038330078, "loss": 0.3662, "rewards/accuracies": 0.875, "rewards/chosen": -0.8609261512756348, "rewards/margins": 2.0007665157318115, "rewards/rejected": -2.8616926670074463, "step": 4527 }, { "epoch": 0.53, "learning_rate": 1.4413605763552616e-07, "logits/chosen": -1.7943674325942993, "logits/rejected": -2.15969181060791, "logps/chosen": -460.53460693359375, "logps/rejected": -393.0439453125, "loss": 0.4543, "rewards/accuracies": 0.875, "rewards/chosen": -1.407240390777588, "rewards/margins": 1.0326753854751587, "rewards/rejected": -2.439915895462036, "step": 4528 }, { "epoch": 0.53, "learning_rate": 1.4410062595960788e-07, "logits/chosen": -2.8639392852783203, "logits/rejected": -2.7998557090759277, "logps/chosen": -217.3195343017578, "logps/rejected": -258.15679931640625, "loss": 0.36, "rewards/accuracies": 0.875, "rewards/chosen": -0.40182435512542725, "rewards/margins": 1.919093370437622, "rewards/rejected": -2.3209176063537598, "step": 4529 }, { "epoch": 0.53, "learning_rate": 1.4406519428368963e-07, "logits/chosen": -2.055154323577881, "logits/rejected": -2.263939619064331, "logps/chosen": -222.68838500976562, "logps/rejected": -175.53009033203125, "loss": 0.288, "rewards/accuracies": 0.75, "rewards/chosen": -0.2472681850194931, "rewards/margins": 1.9792189598083496, "rewards/rejected": -2.226487159729004, "step": 4530 }, { "epoch": 0.53, "learning_rate": 1.4402976260777135e-07, "logits/chosen": -2.527340888977051, "logits/rejected": -2.203871965408325, "logps/chosen": -157.57061767578125, "logps/rejected": -161.94886779785156, "loss": 0.3919, "rewards/accuracies": 0.875, "rewards/chosen": -0.559215784072876, "rewards/margins": 1.808696985244751, "rewards/rejected": -2.367912769317627, "step": 4531 }, { "epoch": 0.53, "learning_rate": 1.4399433093185307e-07, "logits/chosen": -2.4067320823669434, "logits/rejected": -2.3116726875305176, "logps/chosen": -225.63128662109375, "logps/rejected": -309.308349609375, "loss": 0.2273, "rewards/accuracies": 0.875, "rewards/chosen": -1.132466435432434, "rewards/margins": 2.7170751094818115, "rewards/rejected": -3.849541664123535, "step": 4532 }, { "epoch": 0.53, "learning_rate": 1.439588992559348e-07, "logits/chosen": -1.8923847675323486, "logits/rejected": -2.040466785430908, "logps/chosen": -331.92962646484375, "logps/rejected": -275.8675537109375, "loss": 0.3431, "rewards/accuracies": 0.875, "rewards/chosen": -1.3218657970428467, "rewards/margins": 1.3280314207077026, "rewards/rejected": -2.6498970985412598, "step": 4533 }, { "epoch": 0.53, "learning_rate": 1.4392346758001652e-07, "logits/chosen": -1.9332072734832764, "logits/rejected": -2.268585443496704, "logps/chosen": -331.6237487792969, "logps/rejected": -237.0109100341797, "loss": 1.2664, "rewards/accuracies": 0.625, "rewards/chosen": -2.5782623291015625, "rewards/margins": 0.922237753868103, "rewards/rejected": -3.500500202178955, "step": 4534 }, { "epoch": 0.53, "learning_rate": 1.4388803590409826e-07, "logits/chosen": -2.3668246269226074, "logits/rejected": -2.5182385444641113, "logps/chosen": -298.1815490722656, "logps/rejected": -338.0044860839844, "loss": 0.4519, "rewards/accuracies": 0.625, "rewards/chosen": -0.9222475290298462, "rewards/margins": 3.586411952972412, "rewards/rejected": -4.508659362792969, "step": 4535 }, { "epoch": 0.53, "learning_rate": 1.4385260422817999e-07, "logits/chosen": -1.7934093475341797, "logits/rejected": -1.9206140041351318, "logps/chosen": -637.3226318359375, "logps/rejected": -405.039794921875, "loss": 0.4205, "rewards/accuracies": 0.875, "rewards/chosen": -0.7445183396339417, "rewards/margins": 1.1738955974578857, "rewards/rejected": -1.9184138774871826, "step": 4536 }, { "epoch": 0.53, "learning_rate": 1.438171725522617e-07, "logits/chosen": -2.119317054748535, "logits/rejected": -2.1920840740203857, "logps/chosen": -153.34149169921875, "logps/rejected": -165.95285034179688, "loss": 0.5006, "rewards/accuracies": 0.75, "rewards/chosen": -0.7456852197647095, "rewards/margins": 1.7846500873565674, "rewards/rejected": -2.5303354263305664, "step": 4537 }, { "epoch": 0.53, "learning_rate": 1.4378174087634346e-07, "logits/chosen": -1.9258193969726562, "logits/rejected": -2.2234652042388916, "logps/chosen": -403.2442626953125, "logps/rejected": -284.0052490234375, "loss": 0.3428, "rewards/accuracies": 0.625, "rewards/chosen": 0.08641433715820312, "rewards/margins": 2.7570629119873047, "rewards/rejected": -2.6706485748291016, "step": 4538 }, { "epoch": 0.53, "learning_rate": 1.4374630920042518e-07, "logits/chosen": -2.1453492641448975, "logits/rejected": -2.4140048027038574, "logps/chosen": -276.9625244140625, "logps/rejected": -274.12261962890625, "loss": 0.3794, "rewards/accuracies": 0.875, "rewards/chosen": -0.9709456562995911, "rewards/margins": 3.295370578765869, "rewards/rejected": -4.2663164138793945, "step": 4539 }, { "epoch": 0.53, "learning_rate": 1.437108775245069e-07, "logits/chosen": -2.187670946121216, "logits/rejected": -1.9425549507141113, "logps/chosen": -378.585693359375, "logps/rejected": -300.03009033203125, "loss": 0.5154, "rewards/accuracies": 0.875, "rewards/chosen": -1.0079045295715332, "rewards/margins": 0.5125000476837158, "rewards/rejected": -1.520404577255249, "step": 4540 }, { "epoch": 0.53, "learning_rate": 1.4367544584858865e-07, "logits/chosen": -2.5555591583251953, "logits/rejected": -2.438849449157715, "logps/chosen": -183.1067352294922, "logps/rejected": -143.8574676513672, "loss": 0.4778, "rewards/accuracies": 0.875, "rewards/chosen": -1.0507951974868774, "rewards/margins": 1.8909821510314941, "rewards/rejected": -2.941777229309082, "step": 4541 }, { "epoch": 0.53, "learning_rate": 1.4364001417267037e-07, "logits/chosen": -2.7133073806762695, "logits/rejected": -2.596078872680664, "logps/chosen": -163.97145080566406, "logps/rejected": -263.69207763671875, "loss": 0.5348, "rewards/accuracies": 0.75, "rewards/chosen": -1.0817997455596924, "rewards/margins": 1.4727386236190796, "rewards/rejected": -2.5545387268066406, "step": 4542 }, { "epoch": 0.53, "learning_rate": 1.436045824967521e-07, "logits/chosen": -1.5911260843276978, "logits/rejected": -1.4432694911956787, "logps/chosen": -305.38671875, "logps/rejected": -438.78729248046875, "loss": 0.4665, "rewards/accuracies": 0.625, "rewards/chosen": -1.3665088415145874, "rewards/margins": 1.8471590280532837, "rewards/rejected": -3.213667631149292, "step": 4543 }, { "epoch": 0.53, "learning_rate": 1.4356915082083382e-07, "logits/chosen": -2.4066531658172607, "logits/rejected": -2.2318778038024902, "logps/chosen": -146.27267456054688, "logps/rejected": -181.6082763671875, "loss": 0.5859, "rewards/accuracies": 0.75, "rewards/chosen": -1.6682428121566772, "rewards/margins": 1.59196138381958, "rewards/rejected": -3.2602040767669678, "step": 4544 }, { "epoch": 0.53, "learning_rate": 1.4353371914491554e-07, "logits/chosen": -2.579958915710449, "logits/rejected": -2.521435260772705, "logps/chosen": -284.2271728515625, "logps/rejected": -334.385009765625, "loss": 0.1399, "rewards/accuracies": 1.0, "rewards/chosen": -0.4357292652130127, "rewards/margins": 3.7329325675964355, "rewards/rejected": -4.168662071228027, "step": 4545 }, { "epoch": 0.53, "learning_rate": 1.4349828746899726e-07, "logits/chosen": -2.657261371612549, "logits/rejected": -2.6024065017700195, "logps/chosen": -416.35235595703125, "logps/rejected": -299.7491455078125, "loss": 0.1677, "rewards/accuracies": 1.0, "rewards/chosen": -0.8758676052093506, "rewards/margins": 2.643146276473999, "rewards/rejected": -3.5190141201019287, "step": 4546 }, { "epoch": 0.53, "learning_rate": 1.43462855793079e-07, "logits/chosen": -2.011324405670166, "logits/rejected": -1.9053081274032593, "logps/chosen": -246.220703125, "logps/rejected": -231.98468017578125, "loss": 0.4304, "rewards/accuracies": 0.875, "rewards/chosen": -0.8850646018981934, "rewards/margins": 1.8854163885116577, "rewards/rejected": -2.7704808712005615, "step": 4547 }, { "epoch": 0.53, "learning_rate": 1.4342742411716073e-07, "logits/chosen": -2.2863762378692627, "logits/rejected": -2.182316541671753, "logps/chosen": -175.74551391601562, "logps/rejected": -281.2192687988281, "loss": 0.2808, "rewards/accuracies": 0.875, "rewards/chosen": -0.2007759064435959, "rewards/margins": 3.655919313430786, "rewards/rejected": -3.8566954135894775, "step": 4548 }, { "epoch": 0.53, "learning_rate": 1.4339199244124245e-07, "logits/chosen": -2.1655240058898926, "logits/rejected": -2.4422080516815186, "logps/chosen": -340.9500427246094, "logps/rejected": -290.5348815917969, "loss": 0.3238, "rewards/accuracies": 0.875, "rewards/chosen": -0.14776934683322906, "rewards/margins": 1.8168530464172363, "rewards/rejected": -1.9646224975585938, "step": 4549 }, { "epoch": 0.53, "learning_rate": 1.433565607653242e-07, "logits/chosen": -1.4708178043365479, "logits/rejected": -2.040053606033325, "logps/chosen": -497.3683776855469, "logps/rejected": -358.2477111816406, "loss": 0.5847, "rewards/accuracies": 0.875, "rewards/chosen": -0.8377424478530884, "rewards/margins": 1.123195767402649, "rewards/rejected": -1.9609382152557373, "step": 4550 }, { "epoch": 0.53, "learning_rate": 1.4332112908940592e-07, "logits/chosen": -1.9071755409240723, "logits/rejected": -1.8008759021759033, "logps/chosen": -438.569580078125, "logps/rejected": -374.8845520019531, "loss": 0.4042, "rewards/accuracies": 0.75, "rewards/chosen": -0.9436010122299194, "rewards/margins": 2.160256862640381, "rewards/rejected": -3.1038577556610107, "step": 4551 }, { "epoch": 0.53, "learning_rate": 1.4328569741348765e-07, "logits/chosen": -2.0289788246154785, "logits/rejected": -1.8425214290618896, "logps/chosen": -377.4927978515625, "logps/rejected": -387.34515380859375, "loss": 0.9871, "rewards/accuracies": 0.5, "rewards/chosen": -1.4708130359649658, "rewards/margins": 0.5777369737625122, "rewards/rejected": -2.0485498905181885, "step": 4552 }, { "epoch": 0.53, "learning_rate": 1.432502657375694e-07, "logits/chosen": -1.9287724494934082, "logits/rejected": -1.932666301727295, "logps/chosen": -203.9404754638672, "logps/rejected": -348.5835266113281, "loss": 0.1033, "rewards/accuracies": 1.0, "rewards/chosen": -0.39140504598617554, "rewards/margins": 5.071662425994873, "rewards/rejected": -5.463068008422852, "step": 4553 }, { "epoch": 0.53, "learning_rate": 1.4321483406165112e-07, "logits/chosen": -1.6115461587905884, "logits/rejected": -2.1434059143066406, "logps/chosen": -453.5948791503906, "logps/rejected": -257.0216369628906, "loss": 0.3281, "rewards/accuracies": 0.875, "rewards/chosen": -0.6525439620018005, "rewards/margins": 1.3091933727264404, "rewards/rejected": -1.9617373943328857, "step": 4554 }, { "epoch": 0.53, "learning_rate": 1.4317940238573284e-07, "logits/chosen": -2.5339245796203613, "logits/rejected": -2.7047104835510254, "logps/chosen": -200.31126403808594, "logps/rejected": -189.361572265625, "loss": 0.2267, "rewards/accuracies": 1.0, "rewards/chosen": 0.6265925765037537, "rewards/margins": 3.195383071899414, "rewards/rejected": -2.5687904357910156, "step": 4555 }, { "epoch": 0.53, "learning_rate": 1.4314397070981456e-07, "logits/chosen": -2.073204517364502, "logits/rejected": -2.312370777130127, "logps/chosen": -493.60052490234375, "logps/rejected": -383.4974060058594, "loss": 0.4152, "rewards/accuracies": 0.625, "rewards/chosen": -0.9416556358337402, "rewards/margins": 1.3919603824615479, "rewards/rejected": -2.333616018295288, "step": 4556 }, { "epoch": 0.53, "learning_rate": 1.4310853903389628e-07, "logits/chosen": -2.1404306888580322, "logits/rejected": -1.9542174339294434, "logps/chosen": -324.094482421875, "logps/rejected": -447.409912109375, "loss": 0.8349, "rewards/accuracies": 0.625, "rewards/chosen": -1.15049147605896, "rewards/margins": 0.7797902226448059, "rewards/rejected": -1.930281639099121, "step": 4557 }, { "epoch": 0.53, "learning_rate": 1.43073107357978e-07, "logits/chosen": -1.772026777267456, "logits/rejected": -2.270484447479248, "logps/chosen": -646.3619995117188, "logps/rejected": -470.0079345703125, "loss": 0.6181, "rewards/accuracies": 0.625, "rewards/chosen": -1.1211533546447754, "rewards/margins": 0.42548900842666626, "rewards/rejected": -1.5466423034667969, "step": 4558 }, { "epoch": 0.53, "learning_rate": 1.4303767568205975e-07, "logits/chosen": -2.7580230236053467, "logits/rejected": -2.7416677474975586, "logps/chosen": -259.8551025390625, "logps/rejected": -291.7745666503906, "loss": 0.3658, "rewards/accuracies": 0.75, "rewards/chosen": -1.0203540325164795, "rewards/margins": 2.654365301132202, "rewards/rejected": -3.6747193336486816, "step": 4559 }, { "epoch": 0.53, "learning_rate": 1.4300224400614148e-07, "logits/chosen": -2.2695071697235107, "logits/rejected": -2.325585126876831, "logps/chosen": -187.5009307861328, "logps/rejected": -250.90591430664062, "loss": 0.4016, "rewards/accuracies": 0.75, "rewards/chosen": -0.7131271958351135, "rewards/margins": 1.8361079692840576, "rewards/rejected": -2.5492351055145264, "step": 4560 }, { "epoch": 0.53, "learning_rate": 1.4296681233022322e-07, "logits/chosen": -2.750105857849121, "logits/rejected": -2.4683191776275635, "logps/chosen": -270.112548828125, "logps/rejected": -304.95965576171875, "loss": 0.4085, "rewards/accuracies": 0.875, "rewards/chosen": -0.6515185832977295, "rewards/margins": 1.5852755308151245, "rewards/rejected": -2.2367942333221436, "step": 4561 }, { "epoch": 0.53, "learning_rate": 1.4293138065430495e-07, "logits/chosen": -2.3127176761627197, "logits/rejected": -2.3834595680236816, "logps/chosen": -371.79388427734375, "logps/rejected": -259.0505065917969, "loss": 0.7814, "rewards/accuracies": 0.5, "rewards/chosen": -1.9851155281066895, "rewards/margins": 1.247124433517456, "rewards/rejected": -3.2322397232055664, "step": 4562 }, { "epoch": 0.53, "learning_rate": 1.4289594897838667e-07, "logits/chosen": -2.1493802070617676, "logits/rejected": -2.200692653656006, "logps/chosen": -275.173583984375, "logps/rejected": -332.397216796875, "loss": 0.2149, "rewards/accuracies": 0.875, "rewards/chosen": -0.3591339588165283, "rewards/margins": 2.6663687229156494, "rewards/rejected": -3.0255024433135986, "step": 4563 }, { "epoch": 0.53, "learning_rate": 1.4286051730246842e-07, "logits/chosen": -2.340829372406006, "logits/rejected": -2.2817506790161133, "logps/chosen": -303.1248779296875, "logps/rejected": -231.0667724609375, "loss": 0.6783, "rewards/accuracies": 0.75, "rewards/chosen": -1.3850717544555664, "rewards/margins": 1.4177653789520264, "rewards/rejected": -2.802837371826172, "step": 4564 }, { "epoch": 0.53, "learning_rate": 1.4282508562655014e-07, "logits/chosen": -2.807140350341797, "logits/rejected": -2.882100820541382, "logps/chosen": -257.6672668457031, "logps/rejected": -265.2479248046875, "loss": 0.481, "rewards/accuracies": 0.875, "rewards/chosen": -1.363244891166687, "rewards/margins": 1.60715913772583, "rewards/rejected": -2.9704041481018066, "step": 4565 }, { "epoch": 0.53, "learning_rate": 1.4278965395063186e-07, "logits/chosen": -2.681138038635254, "logits/rejected": -2.7151107788085938, "logps/chosen": -221.9573516845703, "logps/rejected": -280.098876953125, "loss": 0.1009, "rewards/accuracies": 1.0, "rewards/chosen": -0.3648585379123688, "rewards/margins": 2.921353578567505, "rewards/rejected": -3.2862119674682617, "step": 4566 }, { "epoch": 0.53, "learning_rate": 1.4275422227471358e-07, "logits/chosen": -1.8168375492095947, "logits/rejected": -2.171895742416382, "logps/chosen": -390.2123718261719, "logps/rejected": -309.4970397949219, "loss": 1.4804, "rewards/accuracies": 0.75, "rewards/chosen": -2.6768441200256348, "rewards/margins": 0.007744848728179932, "rewards/rejected": -2.68458890914917, "step": 4567 }, { "epoch": 0.53, "learning_rate": 1.427187905987953e-07, "logits/chosen": -2.327075481414795, "logits/rejected": -2.245744228363037, "logps/chosen": -231.1329345703125, "logps/rejected": -319.91912841796875, "loss": 0.4647, "rewards/accuracies": 0.875, "rewards/chosen": -0.734835147857666, "rewards/margins": 0.8618031144142151, "rewards/rejected": -1.5966382026672363, "step": 4568 }, { "epoch": 0.53, "learning_rate": 1.4268335892287703e-07, "logits/chosen": -2.2313027381896973, "logits/rejected": -2.457914352416992, "logps/chosen": -406.89227294921875, "logps/rejected": -373.0696716308594, "loss": 0.2248, "rewards/accuracies": 0.875, "rewards/chosen": -1.1542465686798096, "rewards/margins": 3.549875020980835, "rewards/rejected": -4.7041215896606445, "step": 4569 }, { "epoch": 0.53, "learning_rate": 1.4264792724695878e-07, "logits/chosen": -2.6116902828216553, "logits/rejected": -2.4388294219970703, "logps/chosen": -169.36260986328125, "logps/rejected": -192.33892822265625, "loss": 0.8204, "rewards/accuracies": 0.625, "rewards/chosen": -1.3113291263580322, "rewards/margins": 0.9894109964370728, "rewards/rejected": -2.3007402420043945, "step": 4570 }, { "epoch": 0.53, "learning_rate": 1.426124955710405e-07, "logits/chosen": -2.2877373695373535, "logits/rejected": -2.3286972045898438, "logps/chosen": -414.1171875, "logps/rejected": -380.396484375, "loss": 0.3828, "rewards/accuracies": 0.75, "rewards/chosen": 0.19829264283180237, "rewards/margins": 1.762611985206604, "rewards/rejected": -1.5643192529678345, "step": 4571 }, { "epoch": 0.53, "learning_rate": 1.4257706389512225e-07, "logits/chosen": -2.172844409942627, "logits/rejected": -2.2091777324676514, "logps/chosen": -401.7303771972656, "logps/rejected": -249.33340454101562, "loss": 0.4236, "rewards/accuracies": 0.875, "rewards/chosen": -1.0799400806427002, "rewards/margins": 1.86809504032135, "rewards/rejected": -2.94803524017334, "step": 4572 }, { "epoch": 0.53, "learning_rate": 1.4254163221920397e-07, "logits/chosen": -2.473447322845459, "logits/rejected": -2.342238426208496, "logps/chosen": -287.98779296875, "logps/rejected": -195.54818725585938, "loss": 0.3523, "rewards/accuracies": 0.75, "rewards/chosen": -0.5149829983711243, "rewards/margins": 1.5939180850982666, "rewards/rejected": -2.108901023864746, "step": 4573 }, { "epoch": 0.53, "learning_rate": 1.425062005432857e-07, "logits/chosen": -2.347651720046997, "logits/rejected": -2.516204833984375, "logps/chosen": -457.66217041015625, "logps/rejected": -350.29998779296875, "loss": 0.3574, "rewards/accuracies": 0.75, "rewards/chosen": -0.7880032062530518, "rewards/margins": 2.1141960620880127, "rewards/rejected": -2.9021992683410645, "step": 4574 }, { "epoch": 0.53, "learning_rate": 1.4247076886736741e-07, "logits/chosen": -2.8588175773620605, "logits/rejected": -2.772818088531494, "logps/chosen": -490.4708251953125, "logps/rejected": -440.9217224121094, "loss": 0.1517, "rewards/accuracies": 0.875, "rewards/chosen": -1.1680539846420288, "rewards/margins": 4.840087890625, "rewards/rejected": -6.00814151763916, "step": 4575 }, { "epoch": 0.53, "learning_rate": 1.4243533719144916e-07, "logits/chosen": -1.696918249130249, "logits/rejected": -1.8250718116760254, "logps/chosen": -586.201416015625, "logps/rejected": -441.43475341796875, "loss": 0.5173, "rewards/accuracies": 0.625, "rewards/chosen": -0.13461264967918396, "rewards/margins": 2.304497480392456, "rewards/rejected": -2.439110279083252, "step": 4576 }, { "epoch": 0.53, "learning_rate": 1.4239990551553088e-07, "logits/chosen": -2.141784906387329, "logits/rejected": -1.6939605474472046, "logps/chosen": -307.0180358886719, "logps/rejected": -432.68695068359375, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": -1.0283546447753906, "rewards/margins": 4.45158576965332, "rewards/rejected": -5.479939937591553, "step": 4577 }, { "epoch": 0.53, "learning_rate": 1.423644738396126e-07, "logits/chosen": -2.4585256576538086, "logits/rejected": -2.557363271713257, "logps/chosen": -283.5745544433594, "logps/rejected": -299.0815124511719, "loss": 0.3576, "rewards/accuracies": 0.75, "rewards/chosen": -0.5687925815582275, "rewards/margins": 2.9264378547668457, "rewards/rejected": -3.495230197906494, "step": 4578 }, { "epoch": 0.53, "learning_rate": 1.4232904216369433e-07, "logits/chosen": -2.364220142364502, "logits/rejected": -2.2840168476104736, "logps/chosen": -301.54241943359375, "logps/rejected": -222.2979278564453, "loss": 0.3328, "rewards/accuracies": 0.875, "rewards/chosen": -1.1832081079483032, "rewards/margins": 1.9876179695129395, "rewards/rejected": -3.1708261966705322, "step": 4579 }, { "epoch": 0.53, "learning_rate": 1.4229361048777605e-07, "logits/chosen": -2.1571855545043945, "logits/rejected": -2.1304352283477783, "logps/chosen": -223.14669799804688, "logps/rejected": -215.6138458251953, "loss": 0.554, "rewards/accuracies": 0.625, "rewards/chosen": -0.4926740229129791, "rewards/margins": 2.303830862045288, "rewards/rejected": -2.7965049743652344, "step": 4580 }, { "epoch": 0.53, "learning_rate": 1.422581788118578e-07, "logits/chosen": -2.50447678565979, "logits/rejected": -2.2331831455230713, "logps/chosen": -210.81455993652344, "logps/rejected": -391.03717041015625, "loss": 0.3002, "rewards/accuracies": 0.875, "rewards/chosen": -2.0266237258911133, "rewards/margins": 4.442818641662598, "rewards/rejected": -6.469442367553711, "step": 4581 }, { "epoch": 0.53, "learning_rate": 1.4222274713593952e-07, "logits/chosen": -2.205030918121338, "logits/rejected": -2.0796868801116943, "logps/chosen": -351.6739807128906, "logps/rejected": -240.228759765625, "loss": 0.4903, "rewards/accuracies": 0.625, "rewards/chosen": -1.6667861938476562, "rewards/margins": 1.360234022140503, "rewards/rejected": -3.0270204544067383, "step": 4582 }, { "epoch": 0.53, "learning_rate": 1.4218731546002124e-07, "logits/chosen": -1.9994723796844482, "logits/rejected": -2.1341073513031006, "logps/chosen": -216.28082275390625, "logps/rejected": -256.68499755859375, "loss": 0.4098, "rewards/accuracies": 0.75, "rewards/chosen": -1.4084702730178833, "rewards/margins": 2.0712666511535645, "rewards/rejected": -3.479736804962158, "step": 4583 }, { "epoch": 0.53, "learning_rate": 1.42151883784103e-07, "logits/chosen": -1.9265532493591309, "logits/rejected": -1.692497968673706, "logps/chosen": -363.0285339355469, "logps/rejected": -354.53936767578125, "loss": 0.3624, "rewards/accuracies": 0.75, "rewards/chosen": -1.044895052909851, "rewards/margins": 2.8845739364624023, "rewards/rejected": -3.9294686317443848, "step": 4584 }, { "epoch": 0.53, "learning_rate": 1.4211645210818471e-07, "logits/chosen": -2.676792621612549, "logits/rejected": -2.5210492610931396, "logps/chosen": -299.9329833984375, "logps/rejected": -339.12322998046875, "loss": 0.3146, "rewards/accuracies": 1.0, "rewards/chosen": -1.4944720268249512, "rewards/margins": 1.8063539266586304, "rewards/rejected": -3.300826072692871, "step": 4585 }, { "epoch": 0.53, "learning_rate": 1.4208102043226644e-07, "logits/chosen": -2.399466037750244, "logits/rejected": -2.7145683765411377, "logps/chosen": -222.4784698486328, "logps/rejected": -162.9097900390625, "loss": 0.5488, "rewards/accuracies": 0.625, "rewards/chosen": -1.3615578413009644, "rewards/margins": 1.9946962594985962, "rewards/rejected": -3.3562541007995605, "step": 4586 }, { "epoch": 0.53, "learning_rate": 1.4204558875634816e-07, "logits/chosen": -2.5588457584381104, "logits/rejected": -2.474881410598755, "logps/chosen": -405.8204650878906, "logps/rejected": -335.6864013671875, "loss": 0.1903, "rewards/accuracies": 0.875, "rewards/chosen": -0.6817061305046082, "rewards/margins": 2.8251466751098633, "rewards/rejected": -3.5068531036376953, "step": 4587 }, { "epoch": 0.53, "learning_rate": 1.420101570804299e-07, "logits/chosen": -2.753014326095581, "logits/rejected": -2.9598236083984375, "logps/chosen": -447.74066162109375, "logps/rejected": -271.0645446777344, "loss": 0.3964, "rewards/accuracies": 0.875, "rewards/chosen": -0.9950798153877258, "rewards/margins": 3.0899975299835205, "rewards/rejected": -4.085077285766602, "step": 4588 }, { "epoch": 0.53, "learning_rate": 1.4197472540451163e-07, "logits/chosen": -2.3595938682556152, "logits/rejected": -2.1703238487243652, "logps/chosen": -516.5152587890625, "logps/rejected": -401.35333251953125, "loss": 0.618, "rewards/accuracies": 0.875, "rewards/chosen": -1.0838663578033447, "rewards/margins": 1.3270374536514282, "rewards/rejected": -2.4109039306640625, "step": 4589 }, { "epoch": 0.53, "learning_rate": 1.4193929372859335e-07, "logits/chosen": -3.195132255554199, "logits/rejected": -3.0004518032073975, "logps/chosen": -404.9744873046875, "logps/rejected": -344.565673828125, "loss": 0.2132, "rewards/accuracies": 0.875, "rewards/chosen": -1.2071678638458252, "rewards/margins": 2.50435733795166, "rewards/rejected": -3.7115249633789062, "step": 4590 }, { "epoch": 0.53, "learning_rate": 1.4190386205267507e-07, "logits/chosen": -2.6674184799194336, "logits/rejected": -2.7029495239257812, "logps/chosen": -196.17269897460938, "logps/rejected": -201.492431640625, "loss": 1.266, "rewards/accuracies": 0.5, "rewards/chosen": -1.6181941032409668, "rewards/margins": 0.8772364854812622, "rewards/rejected": -2.4954304695129395, "step": 4591 }, { "epoch": 0.53, "learning_rate": 1.4186843037675682e-07, "logits/chosen": -2.2206661701202393, "logits/rejected": -2.1138124465942383, "logps/chosen": -190.13931274414062, "logps/rejected": -309.45965576171875, "loss": 0.1392, "rewards/accuracies": 1.0, "rewards/chosen": -1.2348183393478394, "rewards/margins": 3.169175386428833, "rewards/rejected": -4.403993606567383, "step": 4592 }, { "epoch": 0.53, "learning_rate": 1.4183299870083854e-07, "logits/chosen": -2.281700611114502, "logits/rejected": -2.081501007080078, "logps/chosen": -241.08177185058594, "logps/rejected": -304.19610595703125, "loss": 0.3404, "rewards/accuracies": 0.75, "rewards/chosen": -1.5322091579437256, "rewards/margins": 3.235095977783203, "rewards/rejected": -4.76730489730835, "step": 4593 }, { "epoch": 0.53, "learning_rate": 1.4179756702492027e-07, "logits/chosen": -2.2878401279449463, "logits/rejected": -2.3796496391296387, "logps/chosen": -261.31817626953125, "logps/rejected": -415.4995422363281, "loss": 0.4587, "rewards/accuracies": 0.75, "rewards/chosen": -1.3747403621673584, "rewards/margins": 1.494633674621582, "rewards/rejected": -2.8693740367889404, "step": 4594 }, { "epoch": 0.53, "learning_rate": 1.4176213534900201e-07, "logits/chosen": -1.9523630142211914, "logits/rejected": -2.4192006587982178, "logps/chosen": -432.78082275390625, "logps/rejected": -225.52833557128906, "loss": 0.6136, "rewards/accuracies": 0.625, "rewards/chosen": -1.544884443283081, "rewards/margins": 0.46388956904411316, "rewards/rejected": -2.0087740421295166, "step": 4595 }, { "epoch": 0.53, "learning_rate": 1.4172670367308374e-07, "logits/chosen": -2.486020088195801, "logits/rejected": -2.5494656562805176, "logps/chosen": -252.11700439453125, "logps/rejected": -252.0416717529297, "loss": 0.8167, "rewards/accuracies": 0.75, "rewards/chosen": -1.458574891090393, "rewards/margins": 0.9344184398651123, "rewards/rejected": -2.392993450164795, "step": 4596 }, { "epoch": 0.53, "learning_rate": 1.4169127199716546e-07, "logits/chosen": -2.645944356918335, "logits/rejected": -2.810886859893799, "logps/chosen": -442.96990966796875, "logps/rejected": -246.04653930664062, "loss": 0.3937, "rewards/accuracies": 0.875, "rewards/chosen": -1.1360650062561035, "rewards/margins": 1.7514028549194336, "rewards/rejected": -2.887467861175537, "step": 4597 }, { "epoch": 0.53, "learning_rate": 1.4165584032124718e-07, "logits/chosen": -2.5343689918518066, "logits/rejected": -2.5346174240112305, "logps/chosen": -378.7255859375, "logps/rejected": -269.8512268066406, "loss": 0.1363, "rewards/accuracies": 1.0, "rewards/chosen": -0.3184240460395813, "rewards/margins": 2.716642379760742, "rewards/rejected": -3.0350663661956787, "step": 4598 }, { "epoch": 0.54, "learning_rate": 1.4162040864532893e-07, "logits/chosen": -2.649057626724243, "logits/rejected": -2.9146320819854736, "logps/chosen": -252.0757293701172, "logps/rejected": -328.2381286621094, "loss": 0.1329, "rewards/accuracies": 1.0, "rewards/chosen": -0.4550088047981262, "rewards/margins": 3.642380475997925, "rewards/rejected": -4.097389221191406, "step": 4599 }, { "epoch": 0.54, "learning_rate": 1.4158497696941065e-07, "logits/chosen": -2.34460186958313, "logits/rejected": -2.5735206604003906, "logps/chosen": -246.5498046875, "logps/rejected": -218.96018981933594, "loss": 0.3431, "rewards/accuracies": 0.875, "rewards/chosen": -1.3370246887207031, "rewards/margins": 1.77339768409729, "rewards/rejected": -3.1104226112365723, "step": 4600 }, { "epoch": 0.54, "learning_rate": 1.4154954529349237e-07, "logits/chosen": -2.1869618892669678, "logits/rejected": -2.1773574352264404, "logps/chosen": -438.0602722167969, "logps/rejected": -352.83837890625, "loss": 0.2702, "rewards/accuracies": 0.75, "rewards/chosen": -0.8134980201721191, "rewards/margins": 2.5503172874450684, "rewards/rejected": -3.3638153076171875, "step": 4601 }, { "epoch": 0.54, "learning_rate": 1.415141136175741e-07, "logits/chosen": -2.126478910446167, "logits/rejected": -2.192349433898926, "logps/chosen": -199.8409423828125, "logps/rejected": -173.22769165039062, "loss": 0.3361, "rewards/accuracies": 0.875, "rewards/chosen": -0.4917775094509125, "rewards/margins": 1.407465934753418, "rewards/rejected": -1.8992433547973633, "step": 4602 }, { "epoch": 0.54, "learning_rate": 1.4147868194165582e-07, "logits/chosen": -3.004516839981079, "logits/rejected": -2.9348459243774414, "logps/chosen": -276.03460693359375, "logps/rejected": -167.3862762451172, "loss": 0.5129, "rewards/accuracies": 0.75, "rewards/chosen": -1.775209903717041, "rewards/margins": 0.9002853631973267, "rewards/rejected": -2.675495147705078, "step": 4603 }, { "epoch": 0.54, "learning_rate": 1.4144325026573757e-07, "logits/chosen": -1.9522188901901245, "logits/rejected": -2.4306392669677734, "logps/chosen": -487.74267578125, "logps/rejected": -278.35675048828125, "loss": 0.5755, "rewards/accuracies": 0.75, "rewards/chosen": -0.5592601895332336, "rewards/margins": 2.151123523712158, "rewards/rejected": -2.710383653640747, "step": 4604 }, { "epoch": 0.54, "learning_rate": 1.414078185898193e-07, "logits/chosen": -2.3156447410583496, "logits/rejected": -2.3067805767059326, "logps/chosen": -340.23406982421875, "logps/rejected": -345.88482666015625, "loss": 0.2681, "rewards/accuracies": 0.875, "rewards/chosen": -1.124963641166687, "rewards/margins": 2.833944797515869, "rewards/rejected": -3.9589083194732666, "step": 4605 }, { "epoch": 0.54, "learning_rate": 1.4137238691390104e-07, "logits/chosen": -2.551785469055176, "logits/rejected": -2.4820475578308105, "logps/chosen": -125.06780242919922, "logps/rejected": -146.87893676757812, "loss": 0.3509, "rewards/accuracies": 0.75, "rewards/chosen": -0.24461446702480316, "rewards/margins": 2.1002471446990967, "rewards/rejected": -2.3448615074157715, "step": 4606 }, { "epoch": 0.54, "learning_rate": 1.4133695523798276e-07, "logits/chosen": -2.117509365081787, "logits/rejected": -1.907294750213623, "logps/chosen": -291.6663818359375, "logps/rejected": -308.48748779296875, "loss": 0.232, "rewards/accuracies": 1.0, "rewards/chosen": 0.009315751492977142, "rewards/margins": 2.9148430824279785, "rewards/rejected": -2.905527353286743, "step": 4607 }, { "epoch": 0.54, "learning_rate": 1.4130152356206448e-07, "logits/chosen": -2.799347400665283, "logits/rejected": -2.826687812805176, "logps/chosen": -168.2774200439453, "logps/rejected": -243.99237060546875, "loss": 0.4035, "rewards/accuracies": 0.75, "rewards/chosen": -0.7215653657913208, "rewards/margins": 3.433682441711426, "rewards/rejected": -4.155247211456299, "step": 4608 }, { "epoch": 0.54, "learning_rate": 1.412660918861462e-07, "logits/chosen": -2.273547410964966, "logits/rejected": -2.332209587097168, "logps/chosen": -258.91265869140625, "logps/rejected": -195.6422119140625, "loss": 0.4132, "rewards/accuracies": 0.75, "rewards/chosen": -0.717590868473053, "rewards/margins": 1.4587090015411377, "rewards/rejected": -2.176299810409546, "step": 4609 }, { "epoch": 0.54, "learning_rate": 1.4123066021022793e-07, "logits/chosen": -2.2808103561401367, "logits/rejected": -2.3269219398498535, "logps/chosen": -109.20295715332031, "logps/rejected": -170.42051696777344, "loss": 0.3024, "rewards/accuracies": 1.0, "rewards/chosen": -0.9899467825889587, "rewards/margins": 2.274631977081299, "rewards/rejected": -3.2645788192749023, "step": 4610 }, { "epoch": 0.54, "learning_rate": 1.4119522853430967e-07, "logits/chosen": -2.561711072921753, "logits/rejected": -2.775409460067749, "logps/chosen": -359.1471862792969, "logps/rejected": -271.50225830078125, "loss": 0.2375, "rewards/accuracies": 0.875, "rewards/chosen": -0.707338273525238, "rewards/margins": 3.361579418182373, "rewards/rejected": -4.068917751312256, "step": 4611 }, { "epoch": 0.54, "learning_rate": 1.411597968583914e-07, "logits/chosen": -2.586505651473999, "logits/rejected": -2.336860179901123, "logps/chosen": -319.4090576171875, "logps/rejected": -272.719482421875, "loss": 0.5567, "rewards/accuracies": 0.625, "rewards/chosen": -1.7085068225860596, "rewards/margins": 1.4417999982833862, "rewards/rejected": -3.1503071784973145, "step": 4612 }, { "epoch": 0.54, "learning_rate": 1.4112436518247312e-07, "logits/chosen": -2.1734559535980225, "logits/rejected": -2.213317394256592, "logps/chosen": -371.52569580078125, "logps/rejected": -415.55364990234375, "loss": 0.2925, "rewards/accuracies": 0.875, "rewards/chosen": -0.3319993019104004, "rewards/margins": 2.779252052307129, "rewards/rejected": -3.1112515926361084, "step": 4613 }, { "epoch": 0.54, "learning_rate": 1.4108893350655484e-07, "logits/chosen": -2.28395676612854, "logits/rejected": -2.529528856277466, "logps/chosen": -212.99978637695312, "logps/rejected": -212.356689453125, "loss": 0.5409, "rewards/accuracies": 0.75, "rewards/chosen": -0.9368150234222412, "rewards/margins": 2.1434431076049805, "rewards/rejected": -3.0802578926086426, "step": 4614 }, { "epoch": 0.54, "learning_rate": 1.410535018306366e-07, "logits/chosen": -2.139815092086792, "logits/rejected": -1.7968776226043701, "logps/chosen": -289.36297607421875, "logps/rejected": -325.81439208984375, "loss": 0.5681, "rewards/accuracies": 0.625, "rewards/chosen": -1.2703001499176025, "rewards/margins": 2.5040411949157715, "rewards/rejected": -3.774341344833374, "step": 4615 }, { "epoch": 0.54, "learning_rate": 1.410180701547183e-07, "logits/chosen": -1.8466485738754272, "logits/rejected": -1.7690751552581787, "logps/chosen": -411.89129638671875, "logps/rejected": -319.6438293457031, "loss": 0.4478, "rewards/accuracies": 0.875, "rewards/chosen": -1.4901155233383179, "rewards/margins": 2.7898993492126465, "rewards/rejected": -4.280014991760254, "step": 4616 }, { "epoch": 0.54, "learning_rate": 1.4098263847880006e-07, "logits/chosen": -1.8233273029327393, "logits/rejected": -1.7715038061141968, "logps/chosen": -186.27394104003906, "logps/rejected": -228.92381286621094, "loss": 1.0429, "rewards/accuracies": 0.5, "rewards/chosen": -1.3475981950759888, "rewards/margins": 0.15446317195892334, "rewards/rejected": -1.502061367034912, "step": 4617 }, { "epoch": 0.54, "learning_rate": 1.4094720680288178e-07, "logits/chosen": -2.3038926124572754, "logits/rejected": -1.980542540550232, "logps/chosen": -259.0091857910156, "logps/rejected": -387.54620361328125, "loss": 0.0806, "rewards/accuracies": 1.0, "rewards/chosen": -0.36256974935531616, "rewards/margins": 3.3528194427490234, "rewards/rejected": -3.7153892517089844, "step": 4618 }, { "epoch": 0.54, "learning_rate": 1.409117751269635e-07, "logits/chosen": -2.442410469055176, "logits/rejected": -2.2034428119659424, "logps/chosen": -261.4752197265625, "logps/rejected": -327.8642883300781, "loss": 0.149, "rewards/accuracies": 1.0, "rewards/chosen": -0.3813626766204834, "rewards/margins": 3.0767712593078613, "rewards/rejected": -3.458134174346924, "step": 4619 }, { "epoch": 0.54, "learning_rate": 1.4087634345104523e-07, "logits/chosen": -2.488736867904663, "logits/rejected": -2.3907792568206787, "logps/chosen": -170.3992156982422, "logps/rejected": -167.36102294921875, "loss": 0.3796, "rewards/accuracies": 0.875, "rewards/chosen": -0.5916995406150818, "rewards/margins": 1.6883834600448608, "rewards/rejected": -2.280082941055298, "step": 4620 }, { "epoch": 0.54, "learning_rate": 1.4084091177512695e-07, "logits/chosen": -1.9242665767669678, "logits/rejected": -1.6609541177749634, "logps/chosen": -314.72540283203125, "logps/rejected": -314.41571044921875, "loss": 0.5002, "rewards/accuracies": 0.875, "rewards/chosen": -0.3276955783367157, "rewards/margins": 1.068794846534729, "rewards/rejected": -1.3964905738830566, "step": 4621 }, { "epoch": 0.54, "learning_rate": 1.4080548009920867e-07, "logits/chosen": -1.883446455001831, "logits/rejected": -2.056710720062256, "logps/chosen": -283.36773681640625, "logps/rejected": -178.4051513671875, "loss": 0.5106, "rewards/accuracies": 0.5, "rewards/chosen": -1.092626690864563, "rewards/margins": 1.982210397720337, "rewards/rejected": -3.0748372077941895, "step": 4622 }, { "epoch": 0.54, "learning_rate": 1.4077004842329042e-07, "logits/chosen": -2.173339366912842, "logits/rejected": -2.258835792541504, "logps/chosen": -275.67449951171875, "logps/rejected": -359.0852966308594, "loss": 0.4557, "rewards/accuracies": 0.875, "rewards/chosen": -1.3687057495117188, "rewards/margins": 1.614367961883545, "rewards/rejected": -2.9830737113952637, "step": 4623 }, { "epoch": 0.54, "learning_rate": 1.4073461674737214e-07, "logits/chosen": -2.7102890014648438, "logits/rejected": -2.9394314289093018, "logps/chosen": -182.8770294189453, "logps/rejected": -151.47421264648438, "loss": 0.2996, "rewards/accuracies": 0.875, "rewards/chosen": -0.30483877658843994, "rewards/margins": 1.905653715133667, "rewards/rejected": -2.2104926109313965, "step": 4624 }, { "epoch": 0.54, "learning_rate": 1.4069918507145386e-07, "logits/chosen": -2.2822136878967285, "logits/rejected": -2.544072389602661, "logps/chosen": -244.72389221191406, "logps/rejected": -256.6878967285156, "loss": 0.6769, "rewards/accuracies": 0.625, "rewards/chosen": -1.010319471359253, "rewards/margins": 1.0113540887832642, "rewards/rejected": -2.0216736793518066, "step": 4625 }, { "epoch": 0.54, "learning_rate": 1.406637533955356e-07, "logits/chosen": -2.387230634689331, "logits/rejected": -2.511632204055786, "logps/chosen": -270.7560119628906, "logps/rejected": -293.37054443359375, "loss": 0.5062, "rewards/accuracies": 0.75, "rewards/chosen": -0.7429796457290649, "rewards/margins": 1.2903060913085938, "rewards/rejected": -2.033285617828369, "step": 4626 }, { "epoch": 0.54, "learning_rate": 1.4062832171961733e-07, "logits/chosen": -1.650795817375183, "logits/rejected": -1.7438623905181885, "logps/chosen": -258.4410095214844, "logps/rejected": -238.97073364257812, "loss": 0.4304, "rewards/accuracies": 0.875, "rewards/chosen": -0.5347384214401245, "rewards/margins": 2.169691801071167, "rewards/rejected": -2.704430103302002, "step": 4627 }, { "epoch": 0.54, "learning_rate": 1.4059289004369906e-07, "logits/chosen": -2.37996506690979, "logits/rejected": -2.5870180130004883, "logps/chosen": -318.6277770996094, "logps/rejected": -313.6494140625, "loss": 0.258, "rewards/accuracies": 0.875, "rewards/chosen": 0.08037789165973663, "rewards/margins": 2.271235704421997, "rewards/rejected": -2.1908576488494873, "step": 4628 }, { "epoch": 0.54, "learning_rate": 1.405574583677808e-07, "logits/chosen": -2.2125773429870605, "logits/rejected": -2.4222450256347656, "logps/chosen": -283.5555725097656, "logps/rejected": -285.8691711425781, "loss": 0.4535, "rewards/accuracies": 0.75, "rewards/chosen": -1.113051176071167, "rewards/margins": 1.6194679737091064, "rewards/rejected": -2.7325193881988525, "step": 4629 }, { "epoch": 0.54, "learning_rate": 1.4052202669186253e-07, "logits/chosen": -2.072291135787964, "logits/rejected": -2.268136501312256, "logps/chosen": -505.2510681152344, "logps/rejected": -397.8077697753906, "loss": 0.3293, "rewards/accuracies": 0.75, "rewards/chosen": -1.1535784006118774, "rewards/margins": 2.3244216442108154, "rewards/rejected": -3.477999687194824, "step": 4630 }, { "epoch": 0.54, "learning_rate": 1.4048659501594425e-07, "logits/chosen": -2.4458773136138916, "logits/rejected": -2.586926221847534, "logps/chosen": -231.9804229736328, "logps/rejected": -313.34405517578125, "loss": 0.1751, "rewards/accuracies": 0.875, "rewards/chosen": -1.3741075992584229, "rewards/margins": 5.375438690185547, "rewards/rejected": -6.749546527862549, "step": 4631 }, { "epoch": 0.54, "learning_rate": 1.4045116334002597e-07, "logits/chosen": -1.819756031036377, "logits/rejected": -1.9599452018737793, "logps/chosen": -321.6615295410156, "logps/rejected": -229.91293334960938, "loss": 0.2858, "rewards/accuracies": 0.75, "rewards/chosen": -0.32386845350265503, "rewards/margins": 3.019430160522461, "rewards/rejected": -3.3432986736297607, "step": 4632 }, { "epoch": 0.54, "learning_rate": 1.404157316641077e-07, "logits/chosen": -2.6913838386535645, "logits/rejected": -2.713521957397461, "logps/chosen": -297.64520263671875, "logps/rejected": -294.28460693359375, "loss": 0.2131, "rewards/accuracies": 1.0, "rewards/chosen": 0.054552413523197174, "rewards/margins": 2.3052990436553955, "rewards/rejected": -2.250746726989746, "step": 4633 }, { "epoch": 0.54, "learning_rate": 1.4038029998818942e-07, "logits/chosen": -2.0544722080230713, "logits/rejected": -2.0543596744537354, "logps/chosen": -301.5877685546875, "logps/rejected": -407.3842468261719, "loss": 0.3057, "rewards/accuracies": 0.875, "rewards/chosen": -0.7007627487182617, "rewards/margins": 2.8040943145751953, "rewards/rejected": -3.504857301712036, "step": 4634 }, { "epoch": 0.54, "learning_rate": 1.4034486831227116e-07, "logits/chosen": -1.9697105884552002, "logits/rejected": -2.2083659172058105, "logps/chosen": -353.19879150390625, "logps/rejected": -354.5985412597656, "loss": 0.65, "rewards/accuracies": 0.625, "rewards/chosen": -0.9303594827651978, "rewards/margins": 1.6507543325424194, "rewards/rejected": -2.581113815307617, "step": 4635 }, { "epoch": 0.54, "learning_rate": 1.4030943663635289e-07, "logits/chosen": -2.281644821166992, "logits/rejected": -2.199143171310425, "logps/chosen": -99.43919372558594, "logps/rejected": -201.25436401367188, "loss": 0.3422, "rewards/accuracies": 0.75, "rewards/chosen": -0.3843918442726135, "rewards/margins": 1.9153754711151123, "rewards/rejected": -2.299767255783081, "step": 4636 }, { "epoch": 0.54, "learning_rate": 1.402740049604346e-07, "logits/chosen": -2.4119224548339844, "logits/rejected": -2.4123127460479736, "logps/chosen": -291.9481506347656, "logps/rejected": -280.2415466308594, "loss": 0.4058, "rewards/accuracies": 0.75, "rewards/chosen": -0.13311628997325897, "rewards/margins": 2.172309637069702, "rewards/rejected": -2.3054258823394775, "step": 4637 }, { "epoch": 0.54, "learning_rate": 1.4023857328451636e-07, "logits/chosen": -1.960544466972351, "logits/rejected": -2.323045492172241, "logps/chosen": -338.75592041015625, "logps/rejected": -333.9443359375, "loss": 0.5377, "rewards/accuracies": 0.75, "rewards/chosen": -1.0249533653259277, "rewards/margins": 1.5529719591140747, "rewards/rejected": -2.577925205230713, "step": 4638 }, { "epoch": 0.54, "learning_rate": 1.4020314160859808e-07, "logits/chosen": -2.006157875061035, "logits/rejected": -2.267416477203369, "logps/chosen": -610.21728515625, "logps/rejected": -498.1329650878906, "loss": 0.145, "rewards/accuracies": 1.0, "rewards/chosen": -0.391754150390625, "rewards/margins": 2.823577880859375, "rewards/rejected": -3.21533203125, "step": 4639 }, { "epoch": 0.54, "learning_rate": 1.4016770993267983e-07, "logits/chosen": -2.5585439205169678, "logits/rejected": -2.4318017959594727, "logps/chosen": -200.657958984375, "logps/rejected": -302.74822998046875, "loss": 0.2177, "rewards/accuracies": 0.875, "rewards/chosen": -0.2509542405605316, "rewards/margins": 2.479598045349121, "rewards/rejected": -2.7305524349212646, "step": 4640 }, { "epoch": 0.54, "learning_rate": 1.4013227825676155e-07, "logits/chosen": -2.2312424182891846, "logits/rejected": -1.9213645458221436, "logps/chosen": -193.17190551757812, "logps/rejected": -327.869140625, "loss": 0.2002, "rewards/accuracies": 1.0, "rewards/chosen": -0.567861795425415, "rewards/margins": 2.085513114929199, "rewards/rejected": -2.6533749103546143, "step": 4641 }, { "epoch": 0.54, "learning_rate": 1.4009684658084327e-07, "logits/chosen": -2.707710027694702, "logits/rejected": -2.7420802116394043, "logps/chosen": -356.7666931152344, "logps/rejected": -231.7493896484375, "loss": 0.316, "rewards/accuracies": 0.875, "rewards/chosen": -0.1910066157579422, "rewards/margins": 1.9518628120422363, "rewards/rejected": -2.142869472503662, "step": 4642 }, { "epoch": 0.54, "learning_rate": 1.40061414904925e-07, "logits/chosen": -2.6794207096099854, "logits/rejected": -2.668992519378662, "logps/chosen": -300.8631286621094, "logps/rejected": -225.80633544921875, "loss": 0.4465, "rewards/accuracies": 0.75, "rewards/chosen": -1.5382250547409058, "rewards/margins": 1.927200436592102, "rewards/rejected": -3.465425491333008, "step": 4643 }, { "epoch": 0.54, "learning_rate": 1.4002598322900672e-07, "logits/chosen": -2.4029369354248047, "logits/rejected": -2.6959807872772217, "logps/chosen": -396.9256286621094, "logps/rejected": -257.730224609375, "loss": 0.205, "rewards/accuracies": 0.875, "rewards/chosen": -0.36849308013916016, "rewards/margins": 2.134237766265869, "rewards/rejected": -2.5027308464050293, "step": 4644 }, { "epoch": 0.54, "learning_rate": 1.3999055155308844e-07, "logits/chosen": -1.8774166107177734, "logits/rejected": -2.235417366027832, "logps/chosen": -432.1070251464844, "logps/rejected": -348.6182861328125, "loss": 0.7733, "rewards/accuracies": 0.75, "rewards/chosen": -1.7194104194641113, "rewards/margins": 2.225750684738159, "rewards/rejected": -3.9451608657836914, "step": 4645 }, { "epoch": 0.54, "learning_rate": 1.399551198771702e-07, "logits/chosen": -2.5603129863739014, "logits/rejected": -2.5383780002593994, "logps/chosen": -239.2616729736328, "logps/rejected": -214.18389892578125, "loss": 0.3212, "rewards/accuracies": 0.875, "rewards/chosen": -1.0744171142578125, "rewards/margins": 1.9511613845825195, "rewards/rejected": -3.025578498840332, "step": 4646 }, { "epoch": 0.54, "learning_rate": 1.399196882012519e-07, "logits/chosen": -2.394824504852295, "logits/rejected": -2.751594305038452, "logps/chosen": -489.6965026855469, "logps/rejected": -259.13055419921875, "loss": 0.2797, "rewards/accuracies": 0.875, "rewards/chosen": -0.5138629674911499, "rewards/margins": 2.649033546447754, "rewards/rejected": -3.1628966331481934, "step": 4647 }, { "epoch": 0.54, "learning_rate": 1.3988425652533363e-07, "logits/chosen": -2.4699554443359375, "logits/rejected": -2.6205644607543945, "logps/chosen": -131.47962951660156, "logps/rejected": -106.37858581542969, "loss": 0.562, "rewards/accuracies": 0.625, "rewards/chosen": -0.9913872480392456, "rewards/margins": 1.586961030960083, "rewards/rejected": -2.578348159790039, "step": 4648 }, { "epoch": 0.54, "learning_rate": 1.3984882484941538e-07, "logits/chosen": -2.3861100673675537, "logits/rejected": -2.3119096755981445, "logps/chosen": -201.8434600830078, "logps/rejected": -240.46542358398438, "loss": 0.6545, "rewards/accuracies": 0.625, "rewards/chosen": -0.9000641703605652, "rewards/margins": 0.5334414839744568, "rewards/rejected": -1.4335055351257324, "step": 4649 }, { "epoch": 0.54, "learning_rate": 1.398133931734971e-07, "logits/chosen": -2.801379680633545, "logits/rejected": -2.4593918323516846, "logps/chosen": -176.1024169921875, "logps/rejected": -420.041748046875, "loss": 0.3521, "rewards/accuracies": 0.875, "rewards/chosen": -0.6378292441368103, "rewards/margins": 2.119774341583252, "rewards/rejected": -2.757603168487549, "step": 4650 }, { "epoch": 0.54, "learning_rate": 1.3977796149757882e-07, "logits/chosen": -2.583631992340088, "logits/rejected": -2.66263747215271, "logps/chosen": -291.7658996582031, "logps/rejected": -235.30886840820312, "loss": 0.2924, "rewards/accuracies": 0.875, "rewards/chosen": -0.6669131517410278, "rewards/margins": 2.2146811485290527, "rewards/rejected": -2.881594181060791, "step": 4651 }, { "epoch": 0.54, "learning_rate": 1.3974252982166057e-07, "logits/chosen": -1.9886165857315063, "logits/rejected": -2.325502634048462, "logps/chosen": -229.90707397460938, "logps/rejected": -199.75570678710938, "loss": 1.1368, "rewards/accuracies": 0.625, "rewards/chosen": -1.4292023181915283, "rewards/margins": 0.6366958618164062, "rewards/rejected": -2.0658984184265137, "step": 4652 }, { "epoch": 0.54, "learning_rate": 1.397070981457423e-07, "logits/chosen": -2.815807342529297, "logits/rejected": -2.6347432136535645, "logps/chosen": -175.99752807617188, "logps/rejected": -253.67666625976562, "loss": 0.5253, "rewards/accuracies": 0.875, "rewards/chosen": -0.6726453900337219, "rewards/margins": 2.51926326751709, "rewards/rejected": -3.191908359527588, "step": 4653 }, { "epoch": 0.54, "learning_rate": 1.3967166646982402e-07, "logits/chosen": -1.6682302951812744, "logits/rejected": -1.4753551483154297, "logps/chosen": -253.54891967773438, "logps/rejected": -253.41029357910156, "loss": 0.2703, "rewards/accuracies": 0.875, "rewards/chosen": -0.5850856304168701, "rewards/margins": 2.6897430419921875, "rewards/rejected": -3.2748286724090576, "step": 4654 }, { "epoch": 0.54, "learning_rate": 1.3963623479390574e-07, "logits/chosen": -1.7669637203216553, "logits/rejected": -2.0146374702453613, "logps/chosen": -185.62783813476562, "logps/rejected": -218.7101593017578, "loss": 0.806, "rewards/accuracies": 0.75, "rewards/chosen": -0.8467280864715576, "rewards/margins": 2.182321071624756, "rewards/rejected": -3.0290489196777344, "step": 4655 }, { "epoch": 0.54, "learning_rate": 1.3960080311798746e-07, "logits/chosen": -2.125117778778076, "logits/rejected": -1.9678285121917725, "logps/chosen": -237.483154296875, "logps/rejected": -243.57662963867188, "loss": 0.7358, "rewards/accuracies": 0.75, "rewards/chosen": -1.8209328651428223, "rewards/margins": 0.890250027179718, "rewards/rejected": -2.7111828327178955, "step": 4656 }, { "epoch": 0.54, "learning_rate": 1.3956537144206918e-07, "logits/chosen": -2.0038907527923584, "logits/rejected": -2.0493030548095703, "logps/chosen": -457.79010009765625, "logps/rejected": -313.58001708984375, "loss": 0.1384, "rewards/accuracies": 1.0, "rewards/chosen": -0.8392704725265503, "rewards/margins": 2.8022499084472656, "rewards/rejected": -3.6415202617645264, "step": 4657 }, { "epoch": 0.54, "learning_rate": 1.3952993976615093e-07, "logits/chosen": -2.310718297958374, "logits/rejected": -2.179898977279663, "logps/chosen": -281.24920654296875, "logps/rejected": -340.8134460449219, "loss": 0.5465, "rewards/accuracies": 0.625, "rewards/chosen": -0.508171796798706, "rewards/margins": 1.042060375213623, "rewards/rejected": -1.550232172012329, "step": 4658 }, { "epoch": 0.54, "learning_rate": 1.3949450809023265e-07, "logits/chosen": -1.5177257061004639, "logits/rejected": -1.9910404682159424, "logps/chosen": -389.15533447265625, "logps/rejected": -218.09645080566406, "loss": 0.2477, "rewards/accuracies": 0.875, "rewards/chosen": -0.6872767210006714, "rewards/margins": 1.7604118585586548, "rewards/rejected": -2.447688579559326, "step": 4659 }, { "epoch": 0.54, "learning_rate": 1.394590764143144e-07, "logits/chosen": -2.5335559844970703, "logits/rejected": -2.4346776008605957, "logps/chosen": -249.88473510742188, "logps/rejected": -276.7055358886719, "loss": 0.367, "rewards/accuracies": 0.875, "rewards/chosen": -0.6952215433120728, "rewards/margins": 2.3285441398620605, "rewards/rejected": -3.0237655639648438, "step": 4660 }, { "epoch": 0.54, "learning_rate": 1.3942364473839613e-07, "logits/chosen": -2.495677947998047, "logits/rejected": -2.275684118270874, "logps/chosen": -242.21231079101562, "logps/rejected": -212.3388671875, "loss": 0.3379, "rewards/accuracies": 0.75, "rewards/chosen": -0.8226564526557922, "rewards/margins": 2.640901565551758, "rewards/rejected": -3.463557720184326, "step": 4661 }, { "epoch": 0.54, "learning_rate": 1.3938821306247785e-07, "logits/chosen": -2.7891387939453125, "logits/rejected": -2.728475332260132, "logps/chosen": -292.6061096191406, "logps/rejected": -174.230224609375, "loss": 0.572, "rewards/accuracies": 0.75, "rewards/chosen": -1.2863845825195312, "rewards/margins": 0.8544105887413025, "rewards/rejected": -2.1407952308654785, "step": 4662 }, { "epoch": 0.54, "learning_rate": 1.3935278138655957e-07, "logits/chosen": -2.4309675693511963, "logits/rejected": -2.6914401054382324, "logps/chosen": -381.6159362792969, "logps/rejected": -181.98321533203125, "loss": 0.5442, "rewards/accuracies": 0.625, "rewards/chosen": -1.3593753576278687, "rewards/margins": 1.077452301979065, "rewards/rejected": -2.4368276596069336, "step": 4663 }, { "epoch": 0.54, "learning_rate": 1.3931734971064132e-07, "logits/chosen": -2.058439016342163, "logits/rejected": -1.926108956336975, "logps/chosen": -470.2954406738281, "logps/rejected": -412.153564453125, "loss": 0.2975, "rewards/accuracies": 0.875, "rewards/chosen": -0.5503692626953125, "rewards/margins": 3.129492998123169, "rewards/rejected": -3.6798622608184814, "step": 4664 }, { "epoch": 0.54, "learning_rate": 1.3928191803472304e-07, "logits/chosen": -1.5848381519317627, "logits/rejected": -1.6899890899658203, "logps/chosen": -250.3109893798828, "logps/rejected": -230.82565307617188, "loss": 0.5727, "rewards/accuracies": 0.5, "rewards/chosen": -1.1744331121444702, "rewards/margins": 0.5719725489616394, "rewards/rejected": -1.7464056015014648, "step": 4665 }, { "epoch": 0.54, "learning_rate": 1.3924648635880476e-07, "logits/chosen": -2.111471652984619, "logits/rejected": -2.015836715698242, "logps/chosen": -176.54747009277344, "logps/rejected": -309.32293701171875, "loss": 0.3586, "rewards/accuracies": 0.875, "rewards/chosen": -1.5898741483688354, "rewards/margins": 7.034238815307617, "rewards/rejected": -8.624113082885742, "step": 4666 }, { "epoch": 0.54, "learning_rate": 1.3921105468288648e-07, "logits/chosen": -2.110163927078247, "logits/rejected": -1.9838898181915283, "logps/chosen": -241.6524658203125, "logps/rejected": -274.14068603515625, "loss": 0.1318, "rewards/accuracies": 1.0, "rewards/chosen": -0.2469930648803711, "rewards/margins": 3.0673561096191406, "rewards/rejected": -3.3143491744995117, "step": 4667 }, { "epoch": 0.54, "learning_rate": 1.391756230069682e-07, "logits/chosen": -1.9278613328933716, "logits/rejected": -2.2332634925842285, "logps/chosen": -288.4875793457031, "logps/rejected": -289.66937255859375, "loss": 0.3831, "rewards/accuracies": 0.75, "rewards/chosen": -0.5456650853157043, "rewards/margins": 2.013261556625366, "rewards/rejected": -2.558926582336426, "step": 4668 }, { "epoch": 0.54, "learning_rate": 1.3914019133104996e-07, "logits/chosen": -3.0415592193603516, "logits/rejected": -3.0117597579956055, "logps/chosen": -269.69622802734375, "logps/rejected": -287.282470703125, "loss": 0.6016, "rewards/accuracies": 0.875, "rewards/chosen": -1.765359878540039, "rewards/margins": 1.8982093334197998, "rewards/rejected": -3.663569450378418, "step": 4669 }, { "epoch": 0.54, "learning_rate": 1.3910475965513168e-07, "logits/chosen": -2.3206262588500977, "logits/rejected": -2.4040768146514893, "logps/chosen": -436.53192138671875, "logps/rejected": -506.7242431640625, "loss": 0.3097, "rewards/accuracies": 0.875, "rewards/chosen": -0.7087792158126831, "rewards/margins": 3.074434757232666, "rewards/rejected": -3.7832140922546387, "step": 4670 }, { "epoch": 0.54, "learning_rate": 1.3906932797921343e-07, "logits/chosen": -2.6716668605804443, "logits/rejected": -2.3149075508117676, "logps/chosen": -356.38958740234375, "logps/rejected": -274.56005859375, "loss": 0.1791, "rewards/accuracies": 1.0, "rewards/chosen": -0.4026784896850586, "rewards/margins": 3.229523181915283, "rewards/rejected": -3.632201671600342, "step": 4671 }, { "epoch": 0.54, "learning_rate": 1.3903389630329515e-07, "logits/chosen": -1.9392061233520508, "logits/rejected": -2.220991611480713, "logps/chosen": -500.70428466796875, "logps/rejected": -271.74481201171875, "loss": 0.326, "rewards/accuracies": 0.75, "rewards/chosen": -0.2649695575237274, "rewards/margins": 2.1823410987854004, "rewards/rejected": -2.44731068611145, "step": 4672 }, { "epoch": 0.54, "learning_rate": 1.3899846462737687e-07, "logits/chosen": -2.6198482513427734, "logits/rejected": -2.862626314163208, "logps/chosen": -349.3892822265625, "logps/rejected": -256.7392883300781, "loss": 0.1228, "rewards/accuracies": 1.0, "rewards/chosen": -0.8212947249412537, "rewards/margins": 2.8251118659973145, "rewards/rejected": -3.6464061737060547, "step": 4673 }, { "epoch": 0.54, "learning_rate": 1.389630329514586e-07, "logits/chosen": -1.9378951787948608, "logits/rejected": -1.890894889831543, "logps/chosen": -110.85163879394531, "logps/rejected": -160.02059936523438, "loss": 0.9412, "rewards/accuracies": 0.875, "rewards/chosen": -0.6994227766990662, "rewards/margins": 0.14792358875274658, "rewards/rejected": -0.8473464250564575, "step": 4674 }, { "epoch": 0.54, "learning_rate": 1.3892760127554034e-07, "logits/chosen": -2.189277410507202, "logits/rejected": -2.0028505325317383, "logps/chosen": -216.20535278320312, "logps/rejected": -396.2156982421875, "loss": 0.6757, "rewards/accuracies": 0.625, "rewards/chosen": -0.5002159476280212, "rewards/margins": 0.8469866514205933, "rewards/rejected": -1.3472027778625488, "step": 4675 }, { "epoch": 0.54, "learning_rate": 1.3889216959962206e-07, "logits/chosen": -2.6757733821868896, "logits/rejected": -2.8256826400756836, "logps/chosen": -168.033203125, "logps/rejected": -135.13600158691406, "loss": 0.4276, "rewards/accuracies": 0.875, "rewards/chosen": -0.9229838848114014, "rewards/margins": 1.540279746055603, "rewards/rejected": -2.463263511657715, "step": 4676 }, { "epoch": 0.54, "learning_rate": 1.3885673792370378e-07, "logits/chosen": -2.509420871734619, "logits/rejected": -2.6747665405273438, "logps/chosen": -218.97369384765625, "logps/rejected": -202.3203887939453, "loss": 0.3462, "rewards/accuracies": 0.75, "rewards/chosen": -0.9100318551063538, "rewards/margins": 2.2310853004455566, "rewards/rejected": -3.1411168575286865, "step": 4677 }, { "epoch": 0.54, "learning_rate": 1.388213062477855e-07, "logits/chosen": -2.481872320175171, "logits/rejected": -2.381617307662964, "logps/chosen": -302.3758850097656, "logps/rejected": -365.77215576171875, "loss": 0.4199, "rewards/accuracies": 0.875, "rewards/chosen": -1.096962332725525, "rewards/margins": 1.1807422637939453, "rewards/rejected": -2.2777047157287598, "step": 4678 }, { "epoch": 0.54, "learning_rate": 1.3878587457186723e-07, "logits/chosen": -2.293200969696045, "logits/rejected": -1.9653453826904297, "logps/chosen": -179.04466247558594, "logps/rejected": -383.2584533691406, "loss": 0.3732, "rewards/accuracies": 0.75, "rewards/chosen": -2.005512237548828, "rewards/margins": 2.4436733722686768, "rewards/rejected": -4.449185371398926, "step": 4679 }, { "epoch": 0.54, "learning_rate": 1.3875044289594898e-07, "logits/chosen": -2.8790366649627686, "logits/rejected": -2.766206741333008, "logps/chosen": -244.59999084472656, "logps/rejected": -306.4901428222656, "loss": 0.3218, "rewards/accuracies": 0.875, "rewards/chosen": -0.8741177916526794, "rewards/margins": 1.7850703001022339, "rewards/rejected": -2.6591880321502686, "step": 4680 }, { "epoch": 0.54, "learning_rate": 1.387150112200307e-07, "logits/chosen": -2.016005754470825, "logits/rejected": -1.959578275680542, "logps/chosen": -217.69479370117188, "logps/rejected": -301.8997802734375, "loss": 0.5459, "rewards/accuracies": 0.625, "rewards/chosen": -0.8471136093139648, "rewards/margins": 1.2172771692276, "rewards/rejected": -2.0643906593322754, "step": 4681 }, { "epoch": 0.54, "learning_rate": 1.3867957954411242e-07, "logits/chosen": -2.341768980026245, "logits/rejected": -2.155132293701172, "logps/chosen": -264.2984313964844, "logps/rejected": -386.89739990234375, "loss": 0.2159, "rewards/accuracies": 0.875, "rewards/chosen": -0.1487504541873932, "rewards/margins": 2.5707225799560547, "rewards/rejected": -2.719472885131836, "step": 4682 }, { "epoch": 0.54, "learning_rate": 1.3864414786819417e-07, "logits/chosen": -2.7579574584960938, "logits/rejected": -2.5138916969299316, "logps/chosen": -239.72943115234375, "logps/rejected": -270.1615295410156, "loss": 0.1711, "rewards/accuracies": 1.0, "rewards/chosen": -0.47561120986938477, "rewards/margins": 2.6028003692626953, "rewards/rejected": -3.07841157913208, "step": 4683 }, { "epoch": 0.54, "learning_rate": 1.386087161922759e-07, "logits/chosen": -2.7259068489074707, "logits/rejected": -2.6352415084838867, "logps/chosen": -407.44451904296875, "logps/rejected": -316.0409240722656, "loss": 0.2891, "rewards/accuracies": 0.75, "rewards/chosen": -1.430635690689087, "rewards/margins": 2.992611885070801, "rewards/rejected": -4.423247337341309, "step": 4684 }, { "epoch": 0.55, "learning_rate": 1.3857328451635761e-07, "logits/chosen": -1.679327368736267, "logits/rejected": -1.8333697319030762, "logps/chosen": -228.38636779785156, "logps/rejected": -256.81866455078125, "loss": 0.3637, "rewards/accuracies": 0.875, "rewards/chosen": -0.03204267472028732, "rewards/margins": 2.2337722778320312, "rewards/rejected": -2.265814781188965, "step": 4685 }, { "epoch": 0.55, "learning_rate": 1.3853785284043934e-07, "logits/chosen": -2.3476147651672363, "logits/rejected": -2.2785134315490723, "logps/chosen": -134.52001953125, "logps/rejected": -199.80426025390625, "loss": 0.7286, "rewards/accuracies": 0.625, "rewards/chosen": -1.2154312133789062, "rewards/margins": 1.8141987323760986, "rewards/rejected": -3.029629707336426, "step": 4686 }, { "epoch": 0.55, "learning_rate": 1.3850242116452109e-07, "logits/chosen": -2.2354538440704346, "logits/rejected": -1.8113107681274414, "logps/chosen": -361.2423400878906, "logps/rejected": -377.1839904785156, "loss": 0.201, "rewards/accuracies": 1.0, "rewards/chosen": -0.7918009757995605, "rewards/margins": 2.529996871948242, "rewards/rejected": -3.3217978477478027, "step": 4687 }, { "epoch": 0.55, "learning_rate": 1.384669894886028e-07, "logits/chosen": -2.1865453720092773, "logits/rejected": -2.2889394760131836, "logps/chosen": -268.517333984375, "logps/rejected": -435.1257019042969, "loss": 0.6081, "rewards/accuracies": 0.625, "rewards/chosen": -1.2378592491149902, "rewards/margins": 0.7339532375335693, "rewards/rejected": -1.9718124866485596, "step": 4688 }, { "epoch": 0.55, "learning_rate": 1.3843155781268453e-07, "logits/chosen": -2.7144253253936768, "logits/rejected": -2.900031566619873, "logps/chosen": -371.5560607910156, "logps/rejected": -180.7022247314453, "loss": 0.2157, "rewards/accuracies": 1.0, "rewards/chosen": 0.1344684213399887, "rewards/margins": 1.8678932189941406, "rewards/rejected": -1.7334246635437012, "step": 4689 }, { "epoch": 0.55, "learning_rate": 1.3839612613676625e-07, "logits/chosen": -2.494483709335327, "logits/rejected": -2.3249659538269043, "logps/chosen": -207.7342071533203, "logps/rejected": -250.6759033203125, "loss": 0.2622, "rewards/accuracies": 0.875, "rewards/chosen": -0.8450804948806763, "rewards/margins": 2.7127411365509033, "rewards/rejected": -3.557821750640869, "step": 4690 }, { "epoch": 0.55, "learning_rate": 1.38360694460848e-07, "logits/chosen": -2.713585615158081, "logits/rejected": -2.4504356384277344, "logps/chosen": -360.0636901855469, "logps/rejected": -457.97808837890625, "loss": 0.0938, "rewards/accuracies": 1.0, "rewards/chosen": -1.4330697059631348, "rewards/margins": 4.008028984069824, "rewards/rejected": -5.441098690032959, "step": 4691 }, { "epoch": 0.55, "learning_rate": 1.3832526278492972e-07, "logits/chosen": -2.8238022327423096, "logits/rejected": -2.8606646060943604, "logps/chosen": -124.88056182861328, "logps/rejected": -162.61724853515625, "loss": 0.5353, "rewards/accuracies": 0.75, "rewards/chosen": -1.848009705543518, "rewards/margins": 1.0346837043762207, "rewards/rejected": -2.882693290710449, "step": 4692 }, { "epoch": 0.55, "learning_rate": 1.3828983110901144e-07, "logits/chosen": -2.189094066619873, "logits/rejected": -2.449833631515503, "logps/chosen": -208.824951171875, "logps/rejected": -168.07872009277344, "loss": 0.8181, "rewards/accuracies": 0.75, "rewards/chosen": -1.0666577816009521, "rewards/margins": 1.6400065422058105, "rewards/rejected": -2.7066640853881836, "step": 4693 }, { "epoch": 0.55, "learning_rate": 1.382543994330932e-07, "logits/chosen": -1.7471110820770264, "logits/rejected": -2.0172762870788574, "logps/chosen": -298.93609619140625, "logps/rejected": -297.5482482910156, "loss": 0.8843, "rewards/accuracies": 0.625, "rewards/chosen": -2.0852224826812744, "rewards/margins": 1.1522774696350098, "rewards/rejected": -3.237499952316284, "step": 4694 }, { "epoch": 0.55, "learning_rate": 1.3821896775717492e-07, "logits/chosen": -2.2259209156036377, "logits/rejected": -2.430196762084961, "logps/chosen": -203.9559326171875, "logps/rejected": -199.2470703125, "loss": 0.4614, "rewards/accuracies": 0.75, "rewards/chosen": -1.1949338912963867, "rewards/margins": 2.9847564697265625, "rewards/rejected": -4.179690361022949, "step": 4695 }, { "epoch": 0.55, "learning_rate": 1.3818353608125664e-07, "logits/chosen": -1.8091020584106445, "logits/rejected": -1.9365923404693604, "logps/chosen": -361.91192626953125, "logps/rejected": -263.3755187988281, "loss": 0.1276, "rewards/accuracies": 1.0, "rewards/chosen": -0.5637113451957703, "rewards/margins": 3.1464719772338867, "rewards/rejected": -3.7101831436157227, "step": 4696 }, { "epoch": 0.55, "learning_rate": 1.3814810440533836e-07, "logits/chosen": -1.8597486019134521, "logits/rejected": -2.0216007232666016, "logps/chosen": -285.0445861816406, "logps/rejected": -253.76336669921875, "loss": 0.3094, "rewards/accuracies": 0.75, "rewards/chosen": -0.599220871925354, "rewards/margins": 3.3847367763519287, "rewards/rejected": -3.9839577674865723, "step": 4697 }, { "epoch": 0.55, "learning_rate": 1.3811267272942008e-07, "logits/chosen": -2.0665149688720703, "logits/rejected": -2.2510690689086914, "logps/chosen": -484.36590576171875, "logps/rejected": -386.00677490234375, "loss": 0.4242, "rewards/accuracies": 0.75, "rewards/chosen": -0.7342631816864014, "rewards/margins": 1.4957157373428345, "rewards/rejected": -2.2299790382385254, "step": 4698 }, { "epoch": 0.55, "learning_rate": 1.3807724105350183e-07, "logits/chosen": -2.6513190269470215, "logits/rejected": -2.3595383167266846, "logps/chosen": -214.05511474609375, "logps/rejected": -370.12371826171875, "loss": 0.3292, "rewards/accuracies": 0.75, "rewards/chosen": -0.8504027128219604, "rewards/margins": 3.853813409805298, "rewards/rejected": -4.704216480255127, "step": 4699 }, { "epoch": 0.55, "learning_rate": 1.3804180937758355e-07, "logits/chosen": -2.69779634475708, "logits/rejected": -2.609147548675537, "logps/chosen": -289.087158203125, "logps/rejected": -250.0310821533203, "loss": 0.1946, "rewards/accuracies": 1.0, "rewards/chosen": -1.4147191047668457, "rewards/margins": 2.46034574508667, "rewards/rejected": -3.8750648498535156, "step": 4700 }, { "epoch": 0.55, "learning_rate": 1.3800637770166527e-07, "logits/chosen": -2.40474009513855, "logits/rejected": -2.6059038639068604, "logps/chosen": -146.51187133789062, "logps/rejected": -143.2216033935547, "loss": 0.782, "rewards/accuracies": 0.625, "rewards/chosen": -2.0190911293029785, "rewards/margins": 0.3009237051010132, "rewards/rejected": -2.3200149536132812, "step": 4701 }, { "epoch": 0.55, "learning_rate": 1.37970946025747e-07, "logits/chosen": -1.8882046937942505, "logits/rejected": -1.9263839721679688, "logps/chosen": -469.96246337890625, "logps/rejected": -328.43328857421875, "loss": 0.5794, "rewards/accuracies": 0.75, "rewards/chosen": -1.505752444267273, "rewards/margins": 0.8482300043106079, "rewards/rejected": -2.353982448577881, "step": 4702 }, { "epoch": 0.55, "learning_rate": 1.3793551434982875e-07, "logits/chosen": -2.121264696121216, "logits/rejected": -2.2644078731536865, "logps/chosen": -405.9455871582031, "logps/rejected": -291.00872802734375, "loss": 0.3004, "rewards/accuracies": 0.875, "rewards/chosen": -0.8322147130966187, "rewards/margins": 2.7103993892669678, "rewards/rejected": -3.542613983154297, "step": 4703 }, { "epoch": 0.55, "learning_rate": 1.3790008267391047e-07, "logits/chosen": -1.8543821573257446, "logits/rejected": -1.9413037300109863, "logps/chosen": -197.53298950195312, "logps/rejected": -193.8294677734375, "loss": 0.3714, "rewards/accuracies": 1.0, "rewards/chosen": -0.4261317849159241, "rewards/margins": 1.0919054746627808, "rewards/rejected": -1.5180373191833496, "step": 4704 }, { "epoch": 0.55, "learning_rate": 1.3786465099799222e-07, "logits/chosen": -2.7397310733795166, "logits/rejected": -2.6462512016296387, "logps/chosen": -422.5434265136719, "logps/rejected": -268.18060302734375, "loss": 0.7928, "rewards/accuracies": 0.5, "rewards/chosen": -0.715246319770813, "rewards/margins": 1.0085017681121826, "rewards/rejected": -1.7237482070922852, "step": 4705 }, { "epoch": 0.55, "learning_rate": 1.3782921932207394e-07, "logits/chosen": -2.4241180419921875, "logits/rejected": -2.1593668460845947, "logps/chosen": -154.4390106201172, "logps/rejected": -309.258544921875, "loss": 0.36, "rewards/accuracies": 0.75, "rewards/chosen": -0.7173805236816406, "rewards/margins": 1.622138500213623, "rewards/rejected": -2.3395190238952637, "step": 4706 }, { "epoch": 0.55, "learning_rate": 1.3779378764615566e-07, "logits/chosen": -2.3804335594177246, "logits/rejected": -2.7067556381225586, "logps/chosen": -326.6612243652344, "logps/rejected": -178.73138427734375, "loss": 0.9157, "rewards/accuracies": 0.5, "rewards/chosen": -1.6315735578536987, "rewards/margins": 1.2318180799484253, "rewards/rejected": -2.863391637802124, "step": 4707 }, { "epoch": 0.55, "learning_rate": 1.3775835597023738e-07, "logits/chosen": -1.770754337310791, "logits/rejected": -1.8549132347106934, "logps/chosen": -521.8173828125, "logps/rejected": -479.09674072265625, "loss": 0.262, "rewards/accuracies": 0.875, "rewards/chosen": -1.039337396621704, "rewards/margins": 2.142674207687378, "rewards/rejected": -3.182011604309082, "step": 4708 }, { "epoch": 0.55, "learning_rate": 1.377229242943191e-07, "logits/chosen": -2.6086838245391846, "logits/rejected": -2.4712724685668945, "logps/chosen": -66.51902770996094, "logps/rejected": -156.0189208984375, "loss": 0.2868, "rewards/accuracies": 0.875, "rewards/chosen": -1.0162909030914307, "rewards/margins": 1.5565400123596191, "rewards/rejected": -2.57283091545105, "step": 4709 }, { "epoch": 0.55, "learning_rate": 1.3768749261840085e-07, "logits/chosen": -2.367035150527954, "logits/rejected": -2.553280830383301, "logps/chosen": -342.42669677734375, "logps/rejected": -290.5960998535156, "loss": 0.2666, "rewards/accuracies": 0.875, "rewards/chosen": -1.1148009300231934, "rewards/margins": 2.8048911094665527, "rewards/rejected": -3.919692039489746, "step": 4710 }, { "epoch": 0.55, "learning_rate": 1.3765206094248258e-07, "logits/chosen": -2.0284957885742188, "logits/rejected": -1.9715826511383057, "logps/chosen": -243.28414916992188, "logps/rejected": -251.28651428222656, "loss": 0.2181, "rewards/accuracies": 1.0, "rewards/chosen": -0.5676429271697998, "rewards/margins": 1.7687158584594727, "rewards/rejected": -2.3363590240478516, "step": 4711 }, { "epoch": 0.55, "learning_rate": 1.376166292665643e-07, "logits/chosen": -2.0973639488220215, "logits/rejected": -2.218979835510254, "logps/chosen": -269.1494445800781, "logps/rejected": -254.00726318359375, "loss": 0.4269, "rewards/accuracies": 0.75, "rewards/chosen": -0.6902115345001221, "rewards/margins": 1.4208730459213257, "rewards/rejected": -2.111084461212158, "step": 4712 }, { "epoch": 0.55, "learning_rate": 1.3758119759064602e-07, "logits/chosen": -2.340134382247925, "logits/rejected": -2.2144041061401367, "logps/chosen": -252.20388793945312, "logps/rejected": -304.486572265625, "loss": 0.6198, "rewards/accuracies": 0.75, "rewards/chosen": -1.1328516006469727, "rewards/margins": 1.602946162223816, "rewards/rejected": -2.735797643661499, "step": 4713 }, { "epoch": 0.55, "learning_rate": 1.3754576591472777e-07, "logits/chosen": -2.6191177368164062, "logits/rejected": -2.5225486755371094, "logps/chosen": -202.28863525390625, "logps/rejected": -244.33389282226562, "loss": 0.2784, "rewards/accuracies": 0.875, "rewards/chosen": -1.9176826477050781, "rewards/margins": 2.280834913253784, "rewards/rejected": -4.198517799377441, "step": 4714 }, { "epoch": 0.55, "learning_rate": 1.375103342388095e-07, "logits/chosen": -2.647246837615967, "logits/rejected": -2.380121946334839, "logps/chosen": -420.21881103515625, "logps/rejected": -450.28741455078125, "loss": 0.0914, "rewards/accuracies": 1.0, "rewards/chosen": 0.055417682975530624, "rewards/margins": 3.6382675170898438, "rewards/rejected": -3.5828499794006348, "step": 4715 }, { "epoch": 0.55, "learning_rate": 1.3747490256289124e-07, "logits/chosen": -1.604865312576294, "logits/rejected": -2.0989575386047363, "logps/chosen": -501.2213439941406, "logps/rejected": -253.29586791992188, "loss": 0.7798, "rewards/accuracies": 0.625, "rewards/chosen": -1.2217791080474854, "rewards/margins": 1.272356390953064, "rewards/rejected": -2.4941353797912598, "step": 4716 }, { "epoch": 0.55, "learning_rate": 1.3743947088697296e-07, "logits/chosen": -2.3008460998535156, "logits/rejected": -2.3037962913513184, "logps/chosen": -292.83856201171875, "logps/rejected": -360.1231689453125, "loss": 0.2687, "rewards/accuracies": 0.875, "rewards/chosen": -0.9344025254249573, "rewards/margins": 2.109858274459839, "rewards/rejected": -3.0442609786987305, "step": 4717 }, { "epoch": 0.55, "learning_rate": 1.3740403921105468e-07, "logits/chosen": -1.9269311428070068, "logits/rejected": -1.9583978652954102, "logps/chosen": -271.16510009765625, "logps/rejected": -293.09326171875, "loss": 0.2718, "rewards/accuracies": 0.75, "rewards/chosen": -1.1010171175003052, "rewards/margins": 2.7805399894714355, "rewards/rejected": -3.8815574645996094, "step": 4718 }, { "epoch": 0.55, "learning_rate": 1.373686075351364e-07, "logits/chosen": -2.4832842350006104, "logits/rejected": -2.166980504989624, "logps/chosen": -187.32382202148438, "logps/rejected": -352.697021484375, "loss": 0.3458, "rewards/accuracies": 0.75, "rewards/chosen": -0.8647884130477905, "rewards/margins": 2.2715275287628174, "rewards/rejected": -3.1363158226013184, "step": 4719 }, { "epoch": 0.55, "learning_rate": 1.3733317585921813e-07, "logits/chosen": -2.352123975753784, "logits/rejected": -2.5097413063049316, "logps/chosen": -181.93553161621094, "logps/rejected": -152.21847534179688, "loss": 1.1801, "rewards/accuracies": 0.625, "rewards/chosen": -1.9667631387710571, "rewards/margins": 0.5230333805084229, "rewards/rejected": -2.4897966384887695, "step": 4720 }, { "epoch": 0.55, "learning_rate": 1.3729774418329985e-07, "logits/chosen": -2.164426803588867, "logits/rejected": -1.9944628477096558, "logps/chosen": -359.66937255859375, "logps/rejected": -393.1251220703125, "loss": 0.5241, "rewards/accuracies": 0.875, "rewards/chosen": -1.1965500116348267, "rewards/margins": 0.8995264172554016, "rewards/rejected": -2.096076488494873, "step": 4721 }, { "epoch": 0.55, "learning_rate": 1.372623125073816e-07, "logits/chosen": -2.9935872554779053, "logits/rejected": -3.0012893676757812, "logps/chosen": -99.36637878417969, "logps/rejected": -197.3230743408203, "loss": 0.2159, "rewards/accuracies": 0.875, "rewards/chosen": 0.11535173654556274, "rewards/margins": 3.1650686264038086, "rewards/rejected": -3.0497169494628906, "step": 4722 }, { "epoch": 0.55, "learning_rate": 1.3722688083146332e-07, "logits/chosen": -2.070338249206543, "logits/rejected": -2.2104783058166504, "logps/chosen": -357.1840515136719, "logps/rejected": -304.708251953125, "loss": 0.4139, "rewards/accuracies": 0.875, "rewards/chosen": -0.4379235804080963, "rewards/margins": 1.8970017433166504, "rewards/rejected": -2.334925413131714, "step": 4723 }, { "epoch": 0.55, "learning_rate": 1.3719144915554504e-07, "logits/chosen": -2.229290008544922, "logits/rejected": -2.1809616088867188, "logps/chosen": -360.4341125488281, "logps/rejected": -275.5603332519531, "loss": 0.3426, "rewards/accuracies": 0.625, "rewards/chosen": 0.06838201731443405, "rewards/margins": 2.1629765033721924, "rewards/rejected": -2.0945944786071777, "step": 4724 }, { "epoch": 0.55, "learning_rate": 1.371560174796268e-07, "logits/chosen": -2.532602310180664, "logits/rejected": -2.4986557960510254, "logps/chosen": -221.0703125, "logps/rejected": -182.41732788085938, "loss": 0.317, "rewards/accuracies": 0.875, "rewards/chosen": -0.8399113416671753, "rewards/margins": 1.9128930568695068, "rewards/rejected": -2.7528042793273926, "step": 4725 }, { "epoch": 0.55, "learning_rate": 1.371205858037085e-07, "logits/chosen": -2.1773502826690674, "logits/rejected": -2.338958740234375, "logps/chosen": -322.22442626953125, "logps/rejected": -322.6492919921875, "loss": 0.5321, "rewards/accuracies": 0.75, "rewards/chosen": -0.862139105796814, "rewards/margins": 1.6975926160812378, "rewards/rejected": -2.5597317218780518, "step": 4726 }, { "epoch": 0.55, "learning_rate": 1.3708515412779024e-07, "logits/chosen": -2.2198712825775146, "logits/rejected": -2.6037635803222656, "logps/chosen": -304.070556640625, "logps/rejected": -226.2404327392578, "loss": 0.5697, "rewards/accuracies": 0.75, "rewards/chosen": -1.0426892042160034, "rewards/margins": 2.4267349243164062, "rewards/rejected": -3.46942400932312, "step": 4727 }, { "epoch": 0.55, "learning_rate": 1.3704972245187198e-07, "logits/chosen": -3.0108132362365723, "logits/rejected": -2.9294087886810303, "logps/chosen": -197.01809692382812, "logps/rejected": -132.1771240234375, "loss": 0.9249, "rewards/accuracies": 0.5, "rewards/chosen": -1.764293909072876, "rewards/margins": 0.0008790642023086548, "rewards/rejected": -1.7651731967926025, "step": 4728 }, { "epoch": 0.55, "learning_rate": 1.370142907759537e-07, "logits/chosen": -1.5378971099853516, "logits/rejected": -2.147143840789795, "logps/chosen": -494.5244140625, "logps/rejected": -325.88848876953125, "loss": 0.4332, "rewards/accuracies": 0.875, "rewards/chosen": -0.5163771510124207, "rewards/margins": 1.7954797744750977, "rewards/rejected": -2.311856985092163, "step": 4729 }, { "epoch": 0.55, "learning_rate": 1.3697885910003543e-07, "logits/chosen": -1.9862951040267944, "logits/rejected": -1.9674913883209229, "logps/chosen": -266.73162841796875, "logps/rejected": -300.3538818359375, "loss": 0.8311, "rewards/accuracies": 0.75, "rewards/chosen": -1.9404716491699219, "rewards/margins": 0.46428078413009644, "rewards/rejected": -2.404752254486084, "step": 4730 }, { "epoch": 0.55, "learning_rate": 1.3694342742411715e-07, "logits/chosen": -2.472410202026367, "logits/rejected": -2.536156415939331, "logps/chosen": -448.71917724609375, "logps/rejected": -346.9501037597656, "loss": 0.1916, "rewards/accuracies": 1.0, "rewards/chosen": -0.9298628568649292, "rewards/margins": 1.9345213174819946, "rewards/rejected": -2.864384174346924, "step": 4731 }, { "epoch": 0.55, "learning_rate": 1.3690799574819887e-07, "logits/chosen": -2.6315619945526123, "logits/rejected": -2.526231527328491, "logps/chosen": -118.54723358154297, "logps/rejected": -164.55337524414062, "loss": 0.438, "rewards/accuracies": 0.75, "rewards/chosen": -0.24835769832134247, "rewards/margins": 0.9508744478225708, "rewards/rejected": -1.1992321014404297, "step": 4732 }, { "epoch": 0.55, "learning_rate": 1.368725640722806e-07, "logits/chosen": -2.6464016437530518, "logits/rejected": -2.5927212238311768, "logps/chosen": -124.32131958007812, "logps/rejected": -206.15338134765625, "loss": 0.4494, "rewards/accuracies": 0.875, "rewards/chosen": -0.8903191685676575, "rewards/margins": 1.5845801830291748, "rewards/rejected": -2.4748992919921875, "step": 4733 }, { "epoch": 0.55, "learning_rate": 1.3683713239636234e-07, "logits/chosen": -1.7996280193328857, "logits/rejected": -1.994588017463684, "logps/chosen": -335.42291259765625, "logps/rejected": -233.84210205078125, "loss": 1.1187, "rewards/accuracies": 0.875, "rewards/chosen": -1.6044812202453613, "rewards/margins": 0.3401778042316437, "rewards/rejected": -1.9446592330932617, "step": 4734 }, { "epoch": 0.55, "learning_rate": 1.3680170072044407e-07, "logits/chosen": -2.220944404602051, "logits/rejected": -2.4758520126342773, "logps/chosen": -356.01873779296875, "logps/rejected": -304.5627136230469, "loss": 0.1366, "rewards/accuracies": 1.0, "rewards/chosen": -0.4546217620372772, "rewards/margins": 2.6972200870513916, "rewards/rejected": -3.151841878890991, "step": 4735 }, { "epoch": 0.55, "learning_rate": 1.367662690445258e-07, "logits/chosen": -2.905578136444092, "logits/rejected": -2.778278350830078, "logps/chosen": -343.2167053222656, "logps/rejected": -269.4004821777344, "loss": 0.4228, "rewards/accuracies": 0.75, "rewards/chosen": -1.3371495008468628, "rewards/margins": 1.0278897285461426, "rewards/rejected": -2.365039348602295, "step": 4736 }, { "epoch": 0.55, "learning_rate": 1.3673083736860754e-07, "logits/chosen": -2.912095308303833, "logits/rejected": -2.8371334075927734, "logps/chosen": -344.013671875, "logps/rejected": -376.49749755859375, "loss": 0.0898, "rewards/accuracies": 1.0, "rewards/chosen": -0.872443675994873, "rewards/margins": 2.715012550354004, "rewards/rejected": -3.587456226348877, "step": 4737 }, { "epoch": 0.55, "learning_rate": 1.3669540569268926e-07, "logits/chosen": -2.9281985759735107, "logits/rejected": -2.8419225215911865, "logps/chosen": -478.85797119140625, "logps/rejected": -256.4886474609375, "loss": 0.4489, "rewards/accuracies": 0.75, "rewards/chosen": -1.0417009592056274, "rewards/margins": 2.2816970348358154, "rewards/rejected": -3.3233981132507324, "step": 4738 }, { "epoch": 0.55, "learning_rate": 1.3665997401677098e-07, "logits/chosen": -1.8992586135864258, "logits/rejected": -1.7674548625946045, "logps/chosen": -416.49981689453125, "logps/rejected": -396.7330322265625, "loss": 0.3283, "rewards/accuracies": 0.875, "rewards/chosen": -0.38935524225234985, "rewards/margins": 2.3906056880950928, "rewards/rejected": -2.779960870742798, "step": 4739 }, { "epoch": 0.55, "learning_rate": 1.3662454234085273e-07, "logits/chosen": -2.078162670135498, "logits/rejected": -2.069307327270508, "logps/chosen": -545.8004150390625, "logps/rejected": -420.8433532714844, "loss": 0.2558, "rewards/accuracies": 0.875, "rewards/chosen": -0.8821578025817871, "rewards/margins": 1.8744664192199707, "rewards/rejected": -2.756624221801758, "step": 4740 }, { "epoch": 0.55, "learning_rate": 1.3658911066493445e-07, "logits/chosen": -2.3431458473205566, "logits/rejected": -2.387803554534912, "logps/chosen": -176.67857360839844, "logps/rejected": -194.964111328125, "loss": 0.2211, "rewards/accuracies": 0.875, "rewards/chosen": -0.5938167572021484, "rewards/margins": 2.395796298980713, "rewards/rejected": -2.9896132946014404, "step": 4741 }, { "epoch": 0.55, "learning_rate": 1.3655367898901617e-07, "logits/chosen": -2.518853187561035, "logits/rejected": -2.461524724960327, "logps/chosen": -139.9394989013672, "logps/rejected": -213.30068969726562, "loss": 0.1163, "rewards/accuracies": 1.0, "rewards/chosen": -1.2897334098815918, "rewards/margins": 3.6248629093170166, "rewards/rejected": -4.914596080780029, "step": 4742 }, { "epoch": 0.55, "learning_rate": 1.365182473130979e-07, "logits/chosen": -2.4313161373138428, "logits/rejected": -2.3159656524658203, "logps/chosen": -251.19412231445312, "logps/rejected": -220.88414001464844, "loss": 0.2752, "rewards/accuracies": 0.875, "rewards/chosen": -1.0568633079528809, "rewards/margins": 2.02512788772583, "rewards/rejected": -3.081991195678711, "step": 4743 }, { "epoch": 0.55, "learning_rate": 1.3648281563717962e-07, "logits/chosen": -2.3864357471466064, "logits/rejected": -2.261124610900879, "logps/chosen": -267.0919189453125, "logps/rejected": -232.5522918701172, "loss": 0.4799, "rewards/accuracies": 0.75, "rewards/chosen": -1.3057079315185547, "rewards/margins": 3.514192581176758, "rewards/rejected": -4.8199005126953125, "step": 4744 }, { "epoch": 0.55, "learning_rate": 1.3644738396126137e-07, "logits/chosen": -2.3787012100219727, "logits/rejected": -2.3395094871520996, "logps/chosen": -430.6835021972656, "logps/rejected": -283.790283203125, "loss": 0.2827, "rewards/accuracies": 0.875, "rewards/chosen": -0.6828256845474243, "rewards/margins": 2.3110737800598145, "rewards/rejected": -2.993899345397949, "step": 4745 }, { "epoch": 0.55, "learning_rate": 1.364119522853431e-07, "logits/chosen": -1.7207090854644775, "logits/rejected": -1.6952297687530518, "logps/chosen": -426.17578125, "logps/rejected": -274.68133544921875, "loss": 0.5377, "rewards/accuracies": 0.625, "rewards/chosen": -0.6128113269805908, "rewards/margins": 1.0564544200897217, "rewards/rejected": -1.6692657470703125, "step": 4746 }, { "epoch": 0.55, "learning_rate": 1.363765206094248e-07, "logits/chosen": -2.077094793319702, "logits/rejected": -2.339543581008911, "logps/chosen": -223.3112030029297, "logps/rejected": -96.54255676269531, "loss": 0.7157, "rewards/accuracies": 0.75, "rewards/chosen": -0.44476884603500366, "rewards/margins": 0.5776172876358032, "rewards/rejected": -1.022386074066162, "step": 4747 }, { "epoch": 0.55, "learning_rate": 1.3634108893350656e-07, "logits/chosen": -1.8230571746826172, "logits/rejected": -1.8413864374160767, "logps/chosen": -512.7545166015625, "logps/rejected": -464.5118408203125, "loss": 0.3476, "rewards/accuracies": 0.875, "rewards/chosen": -0.9805612564086914, "rewards/margins": 2.1053879261016846, "rewards/rejected": -3.085949182510376, "step": 4748 }, { "epoch": 0.55, "learning_rate": 1.3630565725758828e-07, "logits/chosen": -2.8873627185821533, "logits/rejected": -2.7339439392089844, "logps/chosen": -261.1265869140625, "logps/rejected": -279.05743408203125, "loss": 0.4337, "rewards/accuracies": 0.875, "rewards/chosen": -0.8882686495780945, "rewards/margins": 2.327861785888672, "rewards/rejected": -3.216130495071411, "step": 4749 }, { "epoch": 0.55, "learning_rate": 1.3627022558167e-07, "logits/chosen": -2.30623197555542, "logits/rejected": -2.5109710693359375, "logps/chosen": -171.9878692626953, "logps/rejected": -147.439208984375, "loss": 0.6633, "rewards/accuracies": 0.625, "rewards/chosen": -0.5696159601211548, "rewards/margins": 0.6513950824737549, "rewards/rejected": -1.2210111618041992, "step": 4750 }, { "epoch": 0.55, "learning_rate": 1.3623479390575175e-07, "logits/chosen": -2.2310564517974854, "logits/rejected": -2.0480871200561523, "logps/chosen": -239.11392211914062, "logps/rejected": -265.633544921875, "loss": 0.2221, "rewards/accuracies": 1.0, "rewards/chosen": -0.5356967449188232, "rewards/margins": 1.997672438621521, "rewards/rejected": -2.5333690643310547, "step": 4751 }, { "epoch": 0.55, "learning_rate": 1.3619936222983347e-07, "logits/chosen": -2.2992911338806152, "logits/rejected": -2.5622501373291016, "logps/chosen": -367.2425842285156, "logps/rejected": -227.37042236328125, "loss": 0.1911, "rewards/accuracies": 1.0, "rewards/chosen": -0.2531914710998535, "rewards/margins": 2.030552864074707, "rewards/rejected": -2.2837443351745605, "step": 4752 }, { "epoch": 0.55, "learning_rate": 1.361639305539152e-07, "logits/chosen": -2.5114846229553223, "logits/rejected": -2.123502492904663, "logps/chosen": -174.34194946289062, "logps/rejected": -305.5697326660156, "loss": 0.5361, "rewards/accuracies": 0.75, "rewards/chosen": -1.3875906467437744, "rewards/margins": 1.8313299417495728, "rewards/rejected": -3.2189204692840576, "step": 4753 }, { "epoch": 0.55, "learning_rate": 1.3612849887799692e-07, "logits/chosen": -2.238067865371704, "logits/rejected": -2.1840944290161133, "logps/chosen": -224.88929748535156, "logps/rejected": -303.97052001953125, "loss": 0.1842, "rewards/accuracies": 1.0, "rewards/chosen": -0.4682645797729492, "rewards/margins": 2.753000020980835, "rewards/rejected": -3.2212648391723633, "step": 4754 }, { "epoch": 0.55, "learning_rate": 1.3609306720207864e-07, "logits/chosen": -2.536285877227783, "logits/rejected": -2.688255786895752, "logps/chosen": -292.4500427246094, "logps/rejected": -170.01797485351562, "loss": 0.4144, "rewards/accuracies": 0.625, "rewards/chosen": -1.0263276100158691, "rewards/margins": 1.9013323783874512, "rewards/rejected": -2.9276599884033203, "step": 4755 }, { "epoch": 0.55, "learning_rate": 1.3605763552616036e-07, "logits/chosen": -2.3432910442352295, "logits/rejected": -2.406921863555908, "logps/chosen": -202.25233459472656, "logps/rejected": -272.17340087890625, "loss": 0.2041, "rewards/accuracies": 1.0, "rewards/chosen": -0.37929433584213257, "rewards/margins": 3.773375988006592, "rewards/rejected": -4.152669906616211, "step": 4756 }, { "epoch": 0.55, "learning_rate": 1.360222038502421e-07, "logits/chosen": -2.0680954456329346, "logits/rejected": -2.0888071060180664, "logps/chosen": -401.71368408203125, "logps/rejected": -423.43536376953125, "loss": 0.1192, "rewards/accuracies": 1.0, "rewards/chosen": -0.6841655969619751, "rewards/margins": 2.5926270484924316, "rewards/rejected": -3.2767927646636963, "step": 4757 }, { "epoch": 0.55, "learning_rate": 1.3598677217432383e-07, "logits/chosen": -2.6374566555023193, "logits/rejected": -2.5904271602630615, "logps/chosen": -162.10430908203125, "logps/rejected": -238.37954711914062, "loss": 0.3869, "rewards/accuracies": 0.75, "rewards/chosen": -1.4259127378463745, "rewards/margins": 2.817147970199585, "rewards/rejected": -4.24306058883667, "step": 4758 }, { "epoch": 0.55, "learning_rate": 1.3595134049840558e-07, "logits/chosen": -2.7538092136383057, "logits/rejected": -2.74617338180542, "logps/chosen": -102.0278549194336, "logps/rejected": -153.0843505859375, "loss": 0.3391, "rewards/accuracies": 0.875, "rewards/chosen": -1.0910463333129883, "rewards/margins": 2.8075437545776367, "rewards/rejected": -3.898589849472046, "step": 4759 }, { "epoch": 0.55, "learning_rate": 1.359159088224873e-07, "logits/chosen": -2.9400899410247803, "logits/rejected": -2.6833057403564453, "logps/chosen": -284.5726318359375, "logps/rejected": -229.6984405517578, "loss": 0.2192, "rewards/accuracies": 0.875, "rewards/chosen": -0.8604476451873779, "rewards/margins": 2.701650381088257, "rewards/rejected": -3.5620980262756348, "step": 4760 }, { "epoch": 0.55, "learning_rate": 1.3588047714656903e-07, "logits/chosen": -2.5903029441833496, "logits/rejected": -2.21964168548584, "logps/chosen": -93.775634765625, "logps/rejected": -223.4354248046875, "loss": 0.2285, "rewards/accuracies": 0.875, "rewards/chosen": -0.7259180545806885, "rewards/margins": 3.0232691764831543, "rewards/rejected": -3.749187469482422, "step": 4761 }, { "epoch": 0.55, "learning_rate": 1.3584504547065075e-07, "logits/chosen": -2.806459426879883, "logits/rejected": -2.6126503944396973, "logps/chosen": -295.15472412109375, "logps/rejected": -313.27337646484375, "loss": 0.269, "rewards/accuracies": 0.875, "rewards/chosen": -0.7095727920532227, "rewards/margins": 2.582773208618164, "rewards/rejected": -3.292346239089966, "step": 4762 }, { "epoch": 0.55, "learning_rate": 1.358096137947325e-07, "logits/chosen": -2.0713870525360107, "logits/rejected": -2.4536073207855225, "logps/chosen": -366.06610107421875, "logps/rejected": -269.1665344238281, "loss": 0.4244, "rewards/accuracies": 0.75, "rewards/chosen": -0.8157504796981812, "rewards/margins": 2.530465602874756, "rewards/rejected": -3.3462159633636475, "step": 4763 }, { "epoch": 0.55, "learning_rate": 1.3577418211881422e-07, "logits/chosen": -2.917710065841675, "logits/rejected": -2.874675989151001, "logps/chosen": -202.31622314453125, "logps/rejected": -312.83831787109375, "loss": 0.1906, "rewards/accuracies": 1.0, "rewards/chosen": -1.237585425376892, "rewards/margins": 2.4143404960632324, "rewards/rejected": -3.651926040649414, "step": 4764 }, { "epoch": 0.55, "learning_rate": 1.3573875044289594e-07, "logits/chosen": -2.169966220855713, "logits/rejected": -2.217585563659668, "logps/chosen": -247.94467163085938, "logps/rejected": -266.3641357421875, "loss": 0.6586, "rewards/accuracies": 0.625, "rewards/chosen": -1.3053717613220215, "rewards/margins": 0.8699508905410767, "rewards/rejected": -2.1753227710723877, "step": 4765 }, { "epoch": 0.55, "learning_rate": 1.3570331876697766e-07, "logits/chosen": -2.398695707321167, "logits/rejected": -2.5150675773620605, "logps/chosen": -229.18109130859375, "logps/rejected": -276.24615478515625, "loss": 0.3697, "rewards/accuracies": 0.875, "rewards/chosen": -0.5252599120140076, "rewards/margins": 2.5795392990112305, "rewards/rejected": -3.1047987937927246, "step": 4766 }, { "epoch": 0.55, "learning_rate": 1.3566788709105938e-07, "logits/chosen": -1.8994649648666382, "logits/rejected": -1.733847737312317, "logps/chosen": -327.17645263671875, "logps/rejected": -403.698486328125, "loss": 0.1568, "rewards/accuracies": 1.0, "rewards/chosen": -0.6792294979095459, "rewards/margins": 2.6266674995422363, "rewards/rejected": -3.3058969974517822, "step": 4767 }, { "epoch": 0.55, "learning_rate": 1.3563245541514113e-07, "logits/chosen": -2.5133297443389893, "logits/rejected": -2.310063362121582, "logps/chosen": -200.16807556152344, "logps/rejected": -413.283203125, "loss": 0.3174, "rewards/accuracies": 1.0, "rewards/chosen": -0.5755150318145752, "rewards/margins": 3.5376152992248535, "rewards/rejected": -4.113130569458008, "step": 4768 }, { "epoch": 0.55, "learning_rate": 1.3559702373922286e-07, "logits/chosen": -2.288316249847412, "logits/rejected": -2.5732479095458984, "logps/chosen": -516.8251953125, "logps/rejected": -342.52630615234375, "loss": 0.2207, "rewards/accuracies": 0.875, "rewards/chosen": -0.3692968487739563, "rewards/margins": 3.4012653827667236, "rewards/rejected": -3.770561933517456, "step": 4769 }, { "epoch": 0.55, "learning_rate": 1.355615920633046e-07, "logits/chosen": -2.344167470932007, "logits/rejected": -2.4296417236328125, "logps/chosen": -193.3877716064453, "logps/rejected": -227.2036895751953, "loss": 0.2942, "rewards/accuracies": 1.0, "rewards/chosen": -1.7753921747207642, "rewards/margins": 1.6635266542434692, "rewards/rejected": -3.4389188289642334, "step": 4770 }, { "epoch": 0.56, "learning_rate": 1.3552616038738633e-07, "logits/chosen": -1.6669954061508179, "logits/rejected": -1.824260950088501, "logps/chosen": -262.6709899902344, "logps/rejected": -247.2725830078125, "loss": 0.6974, "rewards/accuracies": 0.625, "rewards/chosen": -1.8396344184875488, "rewards/margins": 1.2455623149871826, "rewards/rejected": -3.0851964950561523, "step": 4771 }, { "epoch": 0.56, "learning_rate": 1.3549072871146805e-07, "logits/chosen": -2.3296360969543457, "logits/rejected": -2.1609036922454834, "logps/chosen": -272.47064208984375, "logps/rejected": -259.47625732421875, "loss": 0.42, "rewards/accuracies": 0.625, "rewards/chosen": -1.517580270767212, "rewards/margins": 1.9313591718673706, "rewards/rejected": -3.448939561843872, "step": 4772 }, { "epoch": 0.56, "learning_rate": 1.3545529703554977e-07, "logits/chosen": -2.0885562896728516, "logits/rejected": -2.075273036956787, "logps/chosen": -419.1817321777344, "logps/rejected": -517.1770629882812, "loss": 0.1955, "rewards/accuracies": 0.875, "rewards/chosen": -0.3954195976257324, "rewards/margins": 3.0735950469970703, "rewards/rejected": -3.4690146446228027, "step": 4773 }, { "epoch": 0.56, "learning_rate": 1.354198653596315e-07, "logits/chosen": -1.9208297729492188, "logits/rejected": -1.8786579370498657, "logps/chosen": -452.5075378417969, "logps/rejected": -447.77264404296875, "loss": 0.5056, "rewards/accuracies": 0.75, "rewards/chosen": -0.6049402952194214, "rewards/margins": 2.9190948009490967, "rewards/rejected": -3.5240354537963867, "step": 4774 }, { "epoch": 0.56, "learning_rate": 1.3538443368371324e-07, "logits/chosen": -2.519357442855835, "logits/rejected": -2.3466873168945312, "logps/chosen": -233.63198852539062, "logps/rejected": -249.32061767578125, "loss": 0.129, "rewards/accuracies": 1.0, "rewards/chosen": -0.7203342914581299, "rewards/margins": 3.793567180633545, "rewards/rejected": -4.513901710510254, "step": 4775 }, { "epoch": 0.56, "learning_rate": 1.3534900200779496e-07, "logits/chosen": -1.8717317581176758, "logits/rejected": -2.5876221656799316, "logps/chosen": -420.82080078125, "logps/rejected": -180.8782958984375, "loss": 0.4866, "rewards/accuracies": 0.75, "rewards/chosen": -1.7359131574630737, "rewards/margins": 1.2096350193023682, "rewards/rejected": -2.9455482959747314, "step": 4776 }, { "epoch": 0.56, "learning_rate": 1.3531357033187669e-07, "logits/chosen": -2.190742015838623, "logits/rejected": -2.647852897644043, "logps/chosen": -365.7681884765625, "logps/rejected": -149.70355224609375, "loss": 0.3357, "rewards/accuracies": 0.75, "rewards/chosen": -0.9764113426208496, "rewards/margins": 1.548378825187683, "rewards/rejected": -2.5247902870178223, "step": 4777 }, { "epoch": 0.56, "learning_rate": 1.352781386559584e-07, "logits/chosen": -2.2249386310577393, "logits/rejected": -2.38775372505188, "logps/chosen": -311.9693908691406, "logps/rejected": -337.367919921875, "loss": 0.2288, "rewards/accuracies": 1.0, "rewards/chosen": -0.015069536864757538, "rewards/margins": 2.728260040283203, "rewards/rejected": -2.7433295249938965, "step": 4778 }, { "epoch": 0.56, "learning_rate": 1.3524270698004016e-07, "logits/chosen": -2.4999024868011475, "logits/rejected": -2.50923490524292, "logps/chosen": -427.73797607421875, "logps/rejected": -306.4647521972656, "loss": 0.543, "rewards/accuracies": 0.625, "rewards/chosen": -0.5849019289016724, "rewards/margins": 0.9016056060791016, "rewards/rejected": -1.486507534980774, "step": 4779 }, { "epoch": 0.56, "learning_rate": 1.3520727530412188e-07, "logits/chosen": -2.5646355152130127, "logits/rejected": -2.732895851135254, "logps/chosen": -287.9400634765625, "logps/rejected": -222.17672729492188, "loss": 0.1746, "rewards/accuracies": 0.875, "rewards/chosen": -1.2416011095046997, "rewards/margins": 2.3624701499938965, "rewards/rejected": -3.6040711402893066, "step": 4780 }, { "epoch": 0.56, "learning_rate": 1.351718436282036e-07, "logits/chosen": -2.016615390777588, "logits/rejected": -1.7967348098754883, "logps/chosen": -378.30487060546875, "logps/rejected": -256.3493347167969, "loss": 0.1898, "rewards/accuracies": 1.0, "rewards/chosen": -0.31932157278060913, "rewards/margins": 3.3373537063598633, "rewards/rejected": -3.656675338745117, "step": 4781 }, { "epoch": 0.56, "learning_rate": 1.3513641195228535e-07, "logits/chosen": -2.447911262512207, "logits/rejected": -2.3912229537963867, "logps/chosen": -186.66824340820312, "logps/rejected": -194.14654541015625, "loss": 0.1741, "rewards/accuracies": 1.0, "rewards/chosen": -0.300320029258728, "rewards/margins": 3.0581483840942383, "rewards/rejected": -3.3584680557250977, "step": 4782 }, { "epoch": 0.56, "learning_rate": 1.3510098027636707e-07, "logits/chosen": -2.2296741008758545, "logits/rejected": -2.618868112564087, "logps/chosen": -329.87921142578125, "logps/rejected": -176.23382568359375, "loss": 0.4824, "rewards/accuracies": 0.875, "rewards/chosen": -0.5391423106193542, "rewards/margins": 1.2522938251495361, "rewards/rejected": -1.7914361953735352, "step": 4783 }, { "epoch": 0.56, "learning_rate": 1.350655486004488e-07, "logits/chosen": -2.0495851039886475, "logits/rejected": -1.8757215738296509, "logps/chosen": -206.16131591796875, "logps/rejected": -252.47427368164062, "loss": 0.3389, "rewards/accuracies": 0.75, "rewards/chosen": -1.2968755960464478, "rewards/margins": 2.275651216506958, "rewards/rejected": -3.572526693344116, "step": 4784 }, { "epoch": 0.56, "learning_rate": 1.3503011692453052e-07, "logits/chosen": -2.601973533630371, "logits/rejected": -2.4654483795166016, "logps/chosen": -182.08126831054688, "logps/rejected": -233.82476806640625, "loss": 0.4083, "rewards/accuracies": 0.875, "rewards/chosen": -0.5412182807922363, "rewards/margins": 3.332303047180176, "rewards/rejected": -3.873521327972412, "step": 4785 }, { "epoch": 0.56, "learning_rate": 1.3499468524861226e-07, "logits/chosen": -1.9240566492080688, "logits/rejected": -2.2552785873413086, "logps/chosen": -363.001953125, "logps/rejected": -204.51611328125, "loss": 0.4472, "rewards/accuracies": 0.75, "rewards/chosen": -0.6791923642158508, "rewards/margins": 1.8439723253250122, "rewards/rejected": -2.523164749145508, "step": 4786 }, { "epoch": 0.56, "learning_rate": 1.3495925357269399e-07, "logits/chosen": -2.605337381362915, "logits/rejected": -2.6551809310913086, "logps/chosen": -328.428955078125, "logps/rejected": -267.11083984375, "loss": 0.3106, "rewards/accuracies": 0.75, "rewards/chosen": -0.6002723574638367, "rewards/margins": 2.5191709995269775, "rewards/rejected": -3.119443416595459, "step": 4787 }, { "epoch": 0.56, "learning_rate": 1.349238218967757e-07, "logits/chosen": -2.5228638648986816, "logits/rejected": -2.4802961349487305, "logps/chosen": -201.95742797851562, "logps/rejected": -264.07879638671875, "loss": 0.8355, "rewards/accuracies": 0.75, "rewards/chosen": -0.3274090886116028, "rewards/margins": 2.3236615657806396, "rewards/rejected": -2.6510705947875977, "step": 4788 }, { "epoch": 0.56, "learning_rate": 1.3488839022085743e-07, "logits/chosen": -2.6290879249572754, "logits/rejected": -2.689253091812134, "logps/chosen": -314.3952331542969, "logps/rejected": -363.89154052734375, "loss": 0.5544, "rewards/accuracies": 0.75, "rewards/chosen": -0.8368784189224243, "rewards/margins": 2.710062026977539, "rewards/rejected": -3.546940326690674, "step": 4789 }, { "epoch": 0.56, "learning_rate": 1.3485295854493918e-07, "logits/chosen": -2.65145206451416, "logits/rejected": -2.515990734100342, "logps/chosen": -100.15419006347656, "logps/rejected": -181.13455200195312, "loss": 0.24, "rewards/accuracies": 0.875, "rewards/chosen": -0.6860624551773071, "rewards/margins": 3.311164140701294, "rewards/rejected": -3.9972264766693115, "step": 4790 }, { "epoch": 0.56, "learning_rate": 1.348175268690209e-07, "logits/chosen": -2.135190010070801, "logits/rejected": -2.3148250579833984, "logps/chosen": -328.19061279296875, "logps/rejected": -286.62579345703125, "loss": 0.3627, "rewards/accuracies": 0.75, "rewards/chosen": -0.45264920592308044, "rewards/margins": 1.4107916355133057, "rewards/rejected": -1.863440990447998, "step": 4791 }, { "epoch": 0.56, "learning_rate": 1.3478209519310262e-07, "logits/chosen": -2.938114881515503, "logits/rejected": -2.871096134185791, "logps/chosen": -220.356689453125, "logps/rejected": -168.50709533691406, "loss": 0.6429, "rewards/accuracies": 0.625, "rewards/chosen": -0.837807297706604, "rewards/margins": 0.6550407409667969, "rewards/rejected": -1.4928481578826904, "step": 4792 }, { "epoch": 0.56, "learning_rate": 1.3474666351718437e-07, "logits/chosen": -2.187373161315918, "logits/rejected": -2.391545057296753, "logps/chosen": -363.452880859375, "logps/rejected": -248.85845947265625, "loss": 0.7714, "rewards/accuracies": 0.5, "rewards/chosen": -1.3433905839920044, "rewards/margins": 1.3724381923675537, "rewards/rejected": -2.7158284187316895, "step": 4793 }, { "epoch": 0.56, "learning_rate": 1.347112318412661e-07, "logits/chosen": -2.000432014465332, "logits/rejected": -1.950591802597046, "logps/chosen": -194.1404571533203, "logps/rejected": -262.76641845703125, "loss": 0.5324, "rewards/accuracies": 0.625, "rewards/chosen": -1.1445708274841309, "rewards/margins": 2.57920241355896, "rewards/rejected": -3.723773241043091, "step": 4794 }, { "epoch": 0.56, "learning_rate": 1.3467580016534782e-07, "logits/chosen": -2.598007917404175, "logits/rejected": -2.4690425395965576, "logps/chosen": -495.1966552734375, "logps/rejected": -379.8099365234375, "loss": 0.204, "rewards/accuracies": 0.875, "rewards/chosen": -0.9442806839942932, "rewards/margins": 4.1000847816467285, "rewards/rejected": -5.044365882873535, "step": 4795 }, { "epoch": 0.56, "learning_rate": 1.3464036848942954e-07, "logits/chosen": -2.4276440143585205, "logits/rejected": -2.5586822032928467, "logps/chosen": -243.9134063720703, "logps/rejected": -232.67581176757812, "loss": 0.4682, "rewards/accuracies": 0.625, "rewards/chosen": -0.9979997873306274, "rewards/margins": 1.949330449104309, "rewards/rejected": -2.9473302364349365, "step": 4796 }, { "epoch": 0.56, "learning_rate": 1.3460493681351126e-07, "logits/chosen": -3.004756450653076, "logits/rejected": -2.931865930557251, "logps/chosen": -304.8893127441406, "logps/rejected": -187.06324768066406, "loss": 0.3823, "rewards/accuracies": 0.75, "rewards/chosen": -0.9813922643661499, "rewards/margins": 1.6206156015396118, "rewards/rejected": -2.6020078659057617, "step": 4797 }, { "epoch": 0.56, "learning_rate": 1.34569505137593e-07, "logits/chosen": -2.0442163944244385, "logits/rejected": -1.897104024887085, "logps/chosen": -396.56964111328125, "logps/rejected": -377.3663330078125, "loss": 0.0927, "rewards/accuracies": 1.0, "rewards/chosen": -1.1797077655792236, "rewards/margins": 3.4593849182128906, "rewards/rejected": -4.639092922210693, "step": 4798 }, { "epoch": 0.56, "learning_rate": 1.3453407346167473e-07, "logits/chosen": -2.215677261352539, "logits/rejected": -2.229221820831299, "logps/chosen": -306.0623474121094, "logps/rejected": -232.78497314453125, "loss": 0.5427, "rewards/accuracies": 0.75, "rewards/chosen": -1.6663211584091187, "rewards/margins": 1.0750013589859009, "rewards/rejected": -2.7413225173950195, "step": 4799 }, { "epoch": 0.56, "learning_rate": 1.3449864178575645e-07, "logits/chosen": -1.8940424919128418, "logits/rejected": -2.1272988319396973, "logps/chosen": -351.45703125, "logps/rejected": -211.61441040039062, "loss": 0.2793, "rewards/accuracies": 1.0, "rewards/chosen": -1.5950030088424683, "rewards/margins": 1.6930532455444336, "rewards/rejected": -3.2880563735961914, "step": 4800 }, { "epoch": 0.56, "learning_rate": 1.3446321010983818e-07, "logits/chosen": -1.9469788074493408, "logits/rejected": -1.5836882591247559, "logps/chosen": -257.44671630859375, "logps/rejected": -432.7026672363281, "loss": 0.3106, "rewards/accuracies": 0.875, "rewards/chosen": -0.7608668208122253, "rewards/margins": 1.80302894115448, "rewards/rejected": -2.5638957023620605, "step": 4801 }, { "epoch": 0.56, "learning_rate": 1.3442777843391992e-07, "logits/chosen": -1.705657720565796, "logits/rejected": -2.434072256088257, "logps/chosen": -673.6923828125, "logps/rejected": -386.9703369140625, "loss": 0.5533, "rewards/accuracies": 0.625, "rewards/chosen": -1.3115854263305664, "rewards/margins": 1.8436658382415771, "rewards/rejected": -3.1552510261535645, "step": 4802 }, { "epoch": 0.56, "learning_rate": 1.3439234675800165e-07, "logits/chosen": -2.311337947845459, "logits/rejected": -2.7397637367248535, "logps/chosen": -347.7208251953125, "logps/rejected": -285.2784729003906, "loss": 0.5595, "rewards/accuracies": 0.875, "rewards/chosen": -1.8334397077560425, "rewards/margins": 1.8213317394256592, "rewards/rejected": -3.654771327972412, "step": 4803 }, { "epoch": 0.56, "learning_rate": 1.343569150820834e-07, "logits/chosen": -1.9491355419158936, "logits/rejected": -2.209559917449951, "logps/chosen": -436.05426025390625, "logps/rejected": -305.9840393066406, "loss": 0.2113, "rewards/accuracies": 0.875, "rewards/chosen": -0.4000500738620758, "rewards/margins": 2.8863682746887207, "rewards/rejected": -3.2864181995391846, "step": 4804 }, { "epoch": 0.56, "learning_rate": 1.3432148340616512e-07, "logits/chosen": -1.976977825164795, "logits/rejected": -1.8404932022094727, "logps/chosen": -285.21246337890625, "logps/rejected": -340.08251953125, "loss": 0.4755, "rewards/accuracies": 0.75, "rewards/chosen": -1.107237696647644, "rewards/margins": 1.2450755834579468, "rewards/rejected": -2.352313280105591, "step": 4805 }, { "epoch": 0.56, "learning_rate": 1.3428605173024684e-07, "logits/chosen": -2.2803890705108643, "logits/rejected": -2.1740174293518066, "logps/chosen": -198.27061462402344, "logps/rejected": -295.6157531738281, "loss": 0.5108, "rewards/accuracies": 0.625, "rewards/chosen": -0.5683337450027466, "rewards/margins": 1.9810936450958252, "rewards/rejected": -2.549427032470703, "step": 4806 }, { "epoch": 0.56, "learning_rate": 1.3425062005432856e-07, "logits/chosen": -2.4764933586120605, "logits/rejected": -2.278775453567505, "logps/chosen": -293.0487365722656, "logps/rejected": -266.87969970703125, "loss": 0.3205, "rewards/accuracies": 0.75, "rewards/chosen": -0.7209728956222534, "rewards/margins": 1.8965500593185425, "rewards/rejected": -2.617522954940796, "step": 4807 }, { "epoch": 0.56, "learning_rate": 1.3421518837841028e-07, "logits/chosen": -2.81520414352417, "logits/rejected": -2.5558152198791504, "logps/chosen": -125.82276916503906, "logps/rejected": -222.4290771484375, "loss": 0.1832, "rewards/accuracies": 1.0, "rewards/chosen": -0.679972231388092, "rewards/margins": 2.7240750789642334, "rewards/rejected": -3.4040474891662598, "step": 4808 }, { "epoch": 0.56, "learning_rate": 1.34179756702492e-07, "logits/chosen": -2.25893497467041, "logits/rejected": -2.5644800662994385, "logps/chosen": -239.9957275390625, "logps/rejected": -234.2500762939453, "loss": 0.323, "rewards/accuracies": 0.875, "rewards/chosen": -0.46591848134994507, "rewards/margins": 1.7638959884643555, "rewards/rejected": -2.229814291000366, "step": 4809 }, { "epoch": 0.56, "learning_rate": 1.3414432502657375e-07, "logits/chosen": -2.82350754737854, "logits/rejected": -2.8753676414489746, "logps/chosen": -278.1351013183594, "logps/rejected": -324.8166198730469, "loss": 0.239, "rewards/accuracies": 0.875, "rewards/chosen": -0.23239001631736755, "rewards/margins": 2.864569664001465, "rewards/rejected": -3.0969595909118652, "step": 4810 }, { "epoch": 0.56, "learning_rate": 1.3410889335065548e-07, "logits/chosen": -2.4252471923828125, "logits/rejected": -2.318361282348633, "logps/chosen": -227.4571533203125, "logps/rejected": -326.63665771484375, "loss": 0.4243, "rewards/accuracies": 0.875, "rewards/chosen": -1.1358206272125244, "rewards/margins": 2.5583138465881348, "rewards/rejected": -3.694134473800659, "step": 4811 }, { "epoch": 0.56, "learning_rate": 1.340734616747372e-07, "logits/chosen": -2.5086593627929688, "logits/rejected": -2.4786484241485596, "logps/chosen": -269.8622741699219, "logps/rejected": -278.1499938964844, "loss": 0.5303, "rewards/accuracies": 0.75, "rewards/chosen": -0.7806077599525452, "rewards/margins": 2.248934268951416, "rewards/rejected": -3.0295422077178955, "step": 4812 }, { "epoch": 0.56, "learning_rate": 1.3403802999881895e-07, "logits/chosen": -2.1105756759643555, "logits/rejected": -2.3248865604400635, "logps/chosen": -411.33251953125, "logps/rejected": -215.0321044921875, "loss": 0.4388, "rewards/accuracies": 0.875, "rewards/chosen": -0.7294637560844421, "rewards/margins": 1.6476856470108032, "rewards/rejected": -2.3771495819091797, "step": 4813 }, { "epoch": 0.56, "learning_rate": 1.3400259832290067e-07, "logits/chosen": -2.7094483375549316, "logits/rejected": -2.8354251384735107, "logps/chosen": -122.00049591064453, "logps/rejected": -134.54893493652344, "loss": 0.2746, "rewards/accuracies": 1.0, "rewards/chosen": -0.5770488977432251, "rewards/margins": 1.9796063899993896, "rewards/rejected": -2.556655168533325, "step": 4814 }, { "epoch": 0.56, "learning_rate": 1.339671666469824e-07, "logits/chosen": -2.7510125637054443, "logits/rejected": -2.75443434715271, "logps/chosen": -324.27734375, "logps/rejected": -222.93609619140625, "loss": 0.2876, "rewards/accuracies": 0.875, "rewards/chosen": -0.40036338567733765, "rewards/margins": 2.5495941638946533, "rewards/rejected": -2.9499573707580566, "step": 4815 }, { "epoch": 0.56, "learning_rate": 1.3393173497106414e-07, "logits/chosen": -2.780465602874756, "logits/rejected": -2.619685649871826, "logps/chosen": -72.43958282470703, "logps/rejected": -139.71014404296875, "loss": 0.3446, "rewards/accuracies": 0.75, "rewards/chosen": -0.4918566644191742, "rewards/margins": 1.4393720626831055, "rewards/rejected": -1.931228756904602, "step": 4816 }, { "epoch": 0.56, "learning_rate": 1.3389630329514586e-07, "logits/chosen": -2.473820209503174, "logits/rejected": -2.506575345993042, "logps/chosen": -180.6910400390625, "logps/rejected": -273.0887145996094, "loss": 0.301, "rewards/accuracies": 0.875, "rewards/chosen": -1.0271449089050293, "rewards/margins": 1.8369653224945068, "rewards/rejected": -2.8641104698181152, "step": 4817 }, { "epoch": 0.56, "learning_rate": 1.3386087161922758e-07, "logits/chosen": -2.257101535797119, "logits/rejected": -2.4931561946868896, "logps/chosen": -180.65956115722656, "logps/rejected": -288.21527099609375, "loss": 0.3151, "rewards/accuracies": 0.875, "rewards/chosen": -1.3197062015533447, "rewards/margins": 3.4818365573883057, "rewards/rejected": -4.80154275894165, "step": 4818 }, { "epoch": 0.56, "learning_rate": 1.338254399433093e-07, "logits/chosen": -2.418492317199707, "logits/rejected": -2.545842170715332, "logps/chosen": -227.50601196289062, "logps/rejected": -240.3156280517578, "loss": 0.399, "rewards/accuracies": 0.875, "rewards/chosen": -0.2040722668170929, "rewards/margins": 2.158360481262207, "rewards/rejected": -2.3624327182769775, "step": 4819 }, { "epoch": 0.56, "learning_rate": 1.3379000826739103e-07, "logits/chosen": -2.0776755809783936, "logits/rejected": -2.0328803062438965, "logps/chosen": -154.8576202392578, "logps/rejected": -282.4368591308594, "loss": 0.7125, "rewards/accuracies": 0.75, "rewards/chosen": -1.3417311906814575, "rewards/margins": 2.869412660598755, "rewards/rejected": -4.211143970489502, "step": 4820 }, { "epoch": 0.56, "learning_rate": 1.3375457659147278e-07, "logits/chosen": -2.480557441711426, "logits/rejected": -2.1764285564422607, "logps/chosen": -295.97210693359375, "logps/rejected": -312.1411437988281, "loss": 0.1127, "rewards/accuracies": 1.0, "rewards/chosen": -0.5396378636360168, "rewards/margins": 3.5547091960906982, "rewards/rejected": -4.09434700012207, "step": 4821 }, { "epoch": 0.56, "learning_rate": 1.337191449155545e-07, "logits/chosen": -2.230681896209717, "logits/rejected": -2.3632616996765137, "logps/chosen": -169.21902465820312, "logps/rejected": -159.06396484375, "loss": 0.4943, "rewards/accuracies": 0.625, "rewards/chosen": -0.6703957319259644, "rewards/margins": 1.2191112041473389, "rewards/rejected": -1.8895070552825928, "step": 4822 }, { "epoch": 0.56, "learning_rate": 1.3368371323963622e-07, "logits/chosen": -1.9889390468597412, "logits/rejected": -2.282945156097412, "logps/chosen": -345.9639892578125, "logps/rejected": -224.836181640625, "loss": 0.2828, "rewards/accuracies": 0.75, "rewards/chosen": -0.23787352442741394, "rewards/margins": 2.421614646911621, "rewards/rejected": -2.6594882011413574, "step": 4823 }, { "epoch": 0.56, "learning_rate": 1.3364828156371797e-07, "logits/chosen": -2.402397394180298, "logits/rejected": -2.666292667388916, "logps/chosen": -304.6971130371094, "logps/rejected": -269.91107177734375, "loss": 0.1813, "rewards/accuracies": 1.0, "rewards/chosen": -0.5197363495826721, "rewards/margins": 2.6802635192871094, "rewards/rejected": -3.1999998092651367, "step": 4824 }, { "epoch": 0.56, "learning_rate": 1.336128498877997e-07, "logits/chosen": -2.1652965545654297, "logits/rejected": -1.5787073373794556, "logps/chosen": -247.16275024414062, "logps/rejected": -412.2457275390625, "loss": 0.3239, "rewards/accuracies": 0.75, "rewards/chosen": -1.1093870401382446, "rewards/margins": 2.5163068771362305, "rewards/rejected": -3.6256937980651855, "step": 4825 }, { "epoch": 0.56, "learning_rate": 1.3357741821188141e-07, "logits/chosen": -2.1708731651306152, "logits/rejected": -1.7457194328308105, "logps/chosen": -249.27047729492188, "logps/rejected": -324.54345703125, "loss": 0.8201, "rewards/accuracies": 0.625, "rewards/chosen": -1.219892144203186, "rewards/margins": 2.578939199447632, "rewards/rejected": -3.7988312244415283, "step": 4826 }, { "epoch": 0.56, "learning_rate": 1.3354198653596316e-07, "logits/chosen": -2.128981590270996, "logits/rejected": -2.3160088062286377, "logps/chosen": -405.2899475097656, "logps/rejected": -197.04275512695312, "loss": 0.3419, "rewards/accuracies": 0.75, "rewards/chosen": -1.2086031436920166, "rewards/margins": 1.5092394351959229, "rewards/rejected": -2.7178425788879395, "step": 4827 }, { "epoch": 0.56, "learning_rate": 1.3350655486004488e-07, "logits/chosen": -2.1194238662719727, "logits/rejected": -2.510593891143799, "logps/chosen": -425.111083984375, "logps/rejected": -190.42495727539062, "loss": 1.2198, "rewards/accuracies": 0.375, "rewards/chosen": -0.964802086353302, "rewards/margins": 0.17127111554145813, "rewards/rejected": -1.1360732316970825, "step": 4828 }, { "epoch": 0.56, "learning_rate": 1.334711231841266e-07, "logits/chosen": -2.0685296058654785, "logits/rejected": -1.7098164558410645, "logps/chosen": -191.93795776367188, "logps/rejected": -285.0353698730469, "loss": 0.3717, "rewards/accuracies": 0.875, "rewards/chosen": -1.195899248123169, "rewards/margins": 2.1681838035583496, "rewards/rejected": -3.3640830516815186, "step": 4829 }, { "epoch": 0.56, "learning_rate": 1.3343569150820833e-07, "logits/chosen": -3.022818088531494, "logits/rejected": -2.7374424934387207, "logps/chosen": -178.90403747558594, "logps/rejected": -270.30865478515625, "loss": 0.2087, "rewards/accuracies": 1.0, "rewards/chosen": -0.7888580560684204, "rewards/margins": 2.3844919204711914, "rewards/rejected": -3.1733498573303223, "step": 4830 }, { "epoch": 0.56, "learning_rate": 1.3340025983229005e-07, "logits/chosen": -2.62302827835083, "logits/rejected": -2.3632349967956543, "logps/chosen": -127.32085418701172, "logps/rejected": -195.50387573242188, "loss": 0.5467, "rewards/accuracies": 0.75, "rewards/chosen": -1.6348721981048584, "rewards/margins": 1.4908071756362915, "rewards/rejected": -3.1256794929504395, "step": 4831 }, { "epoch": 0.56, "learning_rate": 1.3336482815637177e-07, "logits/chosen": -2.259399890899658, "logits/rejected": -2.3931002616882324, "logps/chosen": -356.15362548828125, "logps/rejected": -289.8577880859375, "loss": 0.1294, "rewards/accuracies": 1.0, "rewards/chosen": -0.29262447357177734, "rewards/margins": 3.9053874015808105, "rewards/rejected": -4.198011875152588, "step": 4832 }, { "epoch": 0.56, "learning_rate": 1.3332939648045352e-07, "logits/chosen": -1.8416881561279297, "logits/rejected": -1.9172431230545044, "logps/chosen": -181.92164611816406, "logps/rejected": -177.23358154296875, "loss": 1.6134, "rewards/accuracies": 0.625, "rewards/chosen": -3.1789870262145996, "rewards/margins": 0.3244028091430664, "rewards/rejected": -3.503389835357666, "step": 4833 }, { "epoch": 0.56, "learning_rate": 1.3329396480453524e-07, "logits/chosen": -2.12557053565979, "logits/rejected": -2.4236819744110107, "logps/chosen": -490.1539611816406, "logps/rejected": -305.9359130859375, "loss": 0.1106, "rewards/accuracies": 1.0, "rewards/chosen": -0.5510066747665405, "rewards/margins": 3.167400360107422, "rewards/rejected": -3.718407154083252, "step": 4834 }, { "epoch": 0.56, "learning_rate": 1.3325853312861697e-07, "logits/chosen": -2.686554431915283, "logits/rejected": -2.3159339427948, "logps/chosen": -247.21380615234375, "logps/rejected": -349.305908203125, "loss": 0.4349, "rewards/accuracies": 0.75, "rewards/chosen": -0.4346218407154083, "rewards/margins": 2.3758535385131836, "rewards/rejected": -2.8104751110076904, "step": 4835 }, { "epoch": 0.56, "learning_rate": 1.3322310145269871e-07, "logits/chosen": -2.0885813236236572, "logits/rejected": -2.272174119949341, "logps/chosen": -249.76986694335938, "logps/rejected": -216.489990234375, "loss": 0.324, "rewards/accuracies": 0.75, "rewards/chosen": -1.2069412469863892, "rewards/margins": 2.692007541656494, "rewards/rejected": -3.898948907852173, "step": 4836 }, { "epoch": 0.56, "learning_rate": 1.3318766977678044e-07, "logits/chosen": -2.343043804168701, "logits/rejected": -2.4121859073638916, "logps/chosen": -264.8946533203125, "logps/rejected": -274.0367126464844, "loss": 0.4428, "rewards/accuracies": 0.75, "rewards/chosen": -1.5555753707885742, "rewards/margins": 1.7139980792999268, "rewards/rejected": -3.269573211669922, "step": 4837 }, { "epoch": 0.56, "learning_rate": 1.3315223810086216e-07, "logits/chosen": -2.117399215698242, "logits/rejected": -2.284200429916382, "logps/chosen": -455.9658203125, "logps/rejected": -248.62051391601562, "loss": 0.357, "rewards/accuracies": 0.75, "rewards/chosen": -1.0770058631896973, "rewards/margins": 2.6469650268554688, "rewards/rejected": -3.723970890045166, "step": 4838 }, { "epoch": 0.56, "learning_rate": 1.331168064249439e-07, "logits/chosen": -2.541111707687378, "logits/rejected": -2.5157470703125, "logps/chosen": -205.6208038330078, "logps/rejected": -294.0926513671875, "loss": 0.1257, "rewards/accuracies": 1.0, "rewards/chosen": -0.5124972462654114, "rewards/margins": 3.315567970275879, "rewards/rejected": -3.8280653953552246, "step": 4839 }, { "epoch": 0.56, "learning_rate": 1.3308137474902563e-07, "logits/chosen": -2.4285888671875, "logits/rejected": -2.3759918212890625, "logps/chosen": -177.8372039794922, "logps/rejected": -229.3604736328125, "loss": 0.5861, "rewards/accuracies": 0.875, "rewards/chosen": -0.8160719275474548, "rewards/margins": 1.2868075370788574, "rewards/rejected": -2.102879524230957, "step": 4840 }, { "epoch": 0.56, "learning_rate": 1.3304594307310735e-07, "logits/chosen": -2.774801015853882, "logits/rejected": -2.7022886276245117, "logps/chosen": -269.7401123046875, "logps/rejected": -330.5428466796875, "loss": 0.2129, "rewards/accuracies": 1.0, "rewards/chosen": -0.2108013927936554, "rewards/margins": 3.0106611251831055, "rewards/rejected": -3.2214624881744385, "step": 4841 }, { "epoch": 0.56, "learning_rate": 1.3301051139718907e-07, "logits/chosen": -2.108685255050659, "logits/rejected": -2.2624566555023193, "logps/chosen": -334.28363037109375, "logps/rejected": -306.8539733886719, "loss": 0.3423, "rewards/accuracies": 1.0, "rewards/chosen": -0.8335934281349182, "rewards/margins": 1.175866961479187, "rewards/rejected": -2.00946044921875, "step": 4842 }, { "epoch": 0.56, "learning_rate": 1.329750797212708e-07, "logits/chosen": -2.9858222007751465, "logits/rejected": -2.8743252754211426, "logps/chosen": -126.69331359863281, "logps/rejected": -106.56228637695312, "loss": 0.4461, "rewards/accuracies": 0.75, "rewards/chosen": -0.9517567753791809, "rewards/margins": 0.8791329264640808, "rewards/rejected": -1.8308898210525513, "step": 4843 }, { "epoch": 0.56, "learning_rate": 1.3293964804535254e-07, "logits/chosen": -2.4178996086120605, "logits/rejected": -2.299884796142578, "logps/chosen": -305.438232421875, "logps/rejected": -422.0162048339844, "loss": 0.1978, "rewards/accuracies": 1.0, "rewards/chosen": -1.2280575037002563, "rewards/margins": 2.228874683380127, "rewards/rejected": -3.4569315910339355, "step": 4844 }, { "epoch": 0.56, "learning_rate": 1.3290421636943427e-07, "logits/chosen": -2.4443774223327637, "logits/rejected": -2.4611291885375977, "logps/chosen": -174.09104919433594, "logps/rejected": -258.4566650390625, "loss": 0.3092, "rewards/accuracies": 0.875, "rewards/chosen": -0.15403929352760315, "rewards/margins": 1.492767095565796, "rewards/rejected": -1.646806240081787, "step": 4845 }, { "epoch": 0.56, "learning_rate": 1.32868784693516e-07, "logits/chosen": -2.3672800064086914, "logits/rejected": -2.4519002437591553, "logps/chosen": -461.7724609375, "logps/rejected": -310.1210632324219, "loss": 0.1368, "rewards/accuracies": 1.0, "rewards/chosen": -1.2834198474884033, "rewards/margins": 3.6446213722229004, "rewards/rejected": -4.928041458129883, "step": 4846 }, { "epoch": 0.56, "learning_rate": 1.3283335301759774e-07, "logits/chosen": -2.8313148021698, "logits/rejected": -2.7516262531280518, "logps/chosen": -251.89149475097656, "logps/rejected": -222.3126220703125, "loss": 0.2957, "rewards/accuracies": 0.75, "rewards/chosen": -0.6945430636405945, "rewards/margins": 2.3216400146484375, "rewards/rejected": -3.0161828994750977, "step": 4847 }, { "epoch": 0.56, "learning_rate": 1.3279792134167946e-07, "logits/chosen": -3.0157265663146973, "logits/rejected": -3.0970239639282227, "logps/chosen": -145.85411071777344, "logps/rejected": -221.014892578125, "loss": 0.396, "rewards/accuracies": 0.875, "rewards/chosen": -1.3150124549865723, "rewards/margins": 3.540337085723877, "rewards/rejected": -4.855349540710449, "step": 4848 }, { "epoch": 0.56, "learning_rate": 1.3276248966576118e-07, "logits/chosen": -2.439927339553833, "logits/rejected": -2.446319580078125, "logps/chosen": -259.73175048828125, "logps/rejected": -321.76318359375, "loss": 0.3532, "rewards/accuracies": 0.875, "rewards/chosen": -1.218989610671997, "rewards/margins": 5.209564208984375, "rewards/rejected": -6.428553104400635, "step": 4849 }, { "epoch": 0.56, "learning_rate": 1.327270579898429e-07, "logits/chosen": -2.3646745681762695, "logits/rejected": -2.2422561645507812, "logps/chosen": -441.7309875488281, "logps/rejected": -337.9956359863281, "loss": 0.1635, "rewards/accuracies": 0.875, "rewards/chosen": -0.6864237785339355, "rewards/margins": 3.0727810859680176, "rewards/rejected": -3.759204626083374, "step": 4850 }, { "epoch": 0.56, "learning_rate": 1.3269162631392465e-07, "logits/chosen": -2.370487928390503, "logits/rejected": -2.206157684326172, "logps/chosen": -179.42941284179688, "logps/rejected": -268.83154296875, "loss": 0.2961, "rewards/accuracies": 0.75, "rewards/chosen": -0.6279398202896118, "rewards/margins": 2.9814999103546143, "rewards/rejected": -3.6094396114349365, "step": 4851 }, { "epoch": 0.56, "learning_rate": 1.3265619463800637e-07, "logits/chosen": -2.6876261234283447, "logits/rejected": -2.956812858581543, "logps/chosen": -194.4202880859375, "logps/rejected": -210.02622985839844, "loss": 0.48, "rewards/accuracies": 0.75, "rewards/chosen": -0.8661454916000366, "rewards/margins": 2.4760854244232178, "rewards/rejected": -3.342231035232544, "step": 4852 }, { "epoch": 0.56, "learning_rate": 1.326207629620881e-07, "logits/chosen": -2.470817804336548, "logits/rejected": -2.579787254333496, "logps/chosen": -322.1881103515625, "logps/rejected": -264.3547668457031, "loss": 0.6813, "rewards/accuracies": 0.75, "rewards/chosen": -0.9738895297050476, "rewards/margins": 1.1918505430221558, "rewards/rejected": -2.1657400131225586, "step": 4853 }, { "epoch": 0.56, "learning_rate": 1.3258533128616982e-07, "logits/chosen": -2.5028185844421387, "logits/rejected": -2.430607557296753, "logps/chosen": -353.4879150390625, "logps/rejected": -386.04644775390625, "loss": 0.0647, "rewards/accuracies": 1.0, "rewards/chosen": -0.6412597894668579, "rewards/margins": 4.909400463104248, "rewards/rejected": -5.550660133361816, "step": 4854 }, { "epoch": 0.56, "learning_rate": 1.3254989961025154e-07, "logits/chosen": -1.897416353225708, "logits/rejected": -2.0023114681243896, "logps/chosen": -308.72210693359375, "logps/rejected": -294.2940368652344, "loss": 0.2903, "rewards/accuracies": 0.75, "rewards/chosen": -1.8157027959823608, "rewards/margins": 3.429597854614258, "rewards/rejected": -5.245300769805908, "step": 4855 }, { "epoch": 0.56, "learning_rate": 1.325144679343333e-07, "logits/chosen": -1.9952653646469116, "logits/rejected": -1.9006515741348267, "logps/chosen": -290.20819091796875, "logps/rejected": -328.07598876953125, "loss": 0.2381, "rewards/accuracies": 0.875, "rewards/chosen": -2.353482723236084, "rewards/margins": 2.819632053375244, "rewards/rejected": -5.173114776611328, "step": 4856 }, { "epoch": 0.57, "learning_rate": 1.32479036258415e-07, "logits/chosen": -1.9845229387283325, "logits/rejected": -1.805362343788147, "logps/chosen": -318.6240234375, "logps/rejected": -258.4371337890625, "loss": 0.6876, "rewards/accuracies": 0.625, "rewards/chosen": -1.0806046724319458, "rewards/margins": 0.9584529399871826, "rewards/rejected": -2.039057731628418, "step": 4857 }, { "epoch": 0.57, "learning_rate": 1.3244360458249676e-07, "logits/chosen": -3.006474733352661, "logits/rejected": -3.0107474327087402, "logps/chosen": -316.58062744140625, "logps/rejected": -293.6958923339844, "loss": 0.3161, "rewards/accuracies": 1.0, "rewards/chosen": -0.7412282228469849, "rewards/margins": 2.3549535274505615, "rewards/rejected": -3.096181869506836, "step": 4858 }, { "epoch": 0.57, "learning_rate": 1.3240817290657848e-07, "logits/chosen": -2.1828246116638184, "logits/rejected": -2.2827301025390625, "logps/chosen": -229.5074005126953, "logps/rejected": -239.6103515625, "loss": 0.8888, "rewards/accuracies": 0.5, "rewards/chosen": -1.1257296800613403, "rewards/margins": 1.176745891571045, "rewards/rejected": -2.302475690841675, "step": 4859 }, { "epoch": 0.57, "learning_rate": 1.323727412306602e-07, "logits/chosen": -2.203692674636841, "logits/rejected": -2.305471420288086, "logps/chosen": -271.7756652832031, "logps/rejected": -220.06240844726562, "loss": 0.1923, "rewards/accuracies": 1.0, "rewards/chosen": -0.4497028589248657, "rewards/margins": 3.1304235458374023, "rewards/rejected": -3.5801262855529785, "step": 4860 }, { "epoch": 0.57, "learning_rate": 1.3233730955474193e-07, "logits/chosen": -2.17549467086792, "logits/rejected": -2.468386650085449, "logps/chosen": -189.46514892578125, "logps/rejected": -135.9783935546875, "loss": 0.5541, "rewards/accuracies": 0.625, "rewards/chosen": -0.9483795762062073, "rewards/margins": 1.000497817993164, "rewards/rejected": -1.9488774538040161, "step": 4861 }, { "epoch": 0.57, "learning_rate": 1.3230187787882367e-07, "logits/chosen": -2.4614248275756836, "logits/rejected": -2.5643181800842285, "logps/chosen": -193.00230407714844, "logps/rejected": -211.10606384277344, "loss": 0.845, "rewards/accuracies": 0.75, "rewards/chosen": -0.8061829209327698, "rewards/margins": 1.0074421167373657, "rewards/rejected": -1.8136252164840698, "step": 4862 }, { "epoch": 0.57, "learning_rate": 1.322664462029054e-07, "logits/chosen": -2.0437021255493164, "logits/rejected": -1.9147361516952515, "logps/chosen": -133.8846893310547, "logps/rejected": -176.849609375, "loss": 0.303, "rewards/accuracies": 0.875, "rewards/chosen": -1.0504157543182373, "rewards/margins": 1.2902220487594604, "rewards/rejected": -2.340637683868408, "step": 4863 }, { "epoch": 0.57, "learning_rate": 1.3223101452698712e-07, "logits/chosen": -2.1211376190185547, "logits/rejected": -1.8958401679992676, "logps/chosen": -433.8878173828125, "logps/rejected": -361.3505859375, "loss": 0.3309, "rewards/accuracies": 0.875, "rewards/chosen": -1.1925612688064575, "rewards/margins": 2.390378475189209, "rewards/rejected": -3.582939863204956, "step": 4864 }, { "epoch": 0.57, "learning_rate": 1.3219558285106884e-07, "logits/chosen": -2.0353870391845703, "logits/rejected": -2.105717897415161, "logps/chosen": -387.3922119140625, "logps/rejected": -396.1689147949219, "loss": 0.2847, "rewards/accuracies": 0.875, "rewards/chosen": -0.1892784833908081, "rewards/margins": 2.9928905963897705, "rewards/rejected": -3.182169198989868, "step": 4865 }, { "epoch": 0.57, "learning_rate": 1.3216015117515056e-07, "logits/chosen": -2.613842487335205, "logits/rejected": -2.770437240600586, "logps/chosen": -233.72418212890625, "logps/rejected": -385.05657958984375, "loss": 0.4216, "rewards/accuracies": 0.625, "rewards/chosen": -0.7680480480194092, "rewards/margins": 1.6731019020080566, "rewards/rejected": -2.441149950027466, "step": 4866 }, { "epoch": 0.57, "learning_rate": 1.321247194992323e-07, "logits/chosen": -1.9144995212554932, "logits/rejected": -2.162978172302246, "logps/chosen": -464.622802734375, "logps/rejected": -352.81146240234375, "loss": 0.6391, "rewards/accuracies": 0.875, "rewards/chosen": -1.4449738264083862, "rewards/margins": 1.3397817611694336, "rewards/rejected": -2.7847557067871094, "step": 4867 }, { "epoch": 0.57, "learning_rate": 1.3208928782331403e-07, "logits/chosen": -1.799444317817688, "logits/rejected": -1.8814607858657837, "logps/chosen": -219.76414489746094, "logps/rejected": -175.86529541015625, "loss": 0.7493, "rewards/accuracies": 0.625, "rewards/chosen": -0.753049373626709, "rewards/margins": 0.5366894602775574, "rewards/rejected": -1.2897388935089111, "step": 4868 }, { "epoch": 0.57, "learning_rate": 1.3205385614739578e-07, "logits/chosen": -2.827772378921509, "logits/rejected": -2.8862557411193848, "logps/chosen": -352.2066650390625, "logps/rejected": -271.43707275390625, "loss": 0.45, "rewards/accuracies": 0.75, "rewards/chosen": -1.8628345727920532, "rewards/margins": 2.104935646057129, "rewards/rejected": -3.967770576477051, "step": 4869 }, { "epoch": 0.57, "learning_rate": 1.320184244714775e-07, "logits/chosen": -2.436633825302124, "logits/rejected": -2.4843311309814453, "logps/chosen": -381.75897216796875, "logps/rejected": -330.3065185546875, "loss": 0.3081, "rewards/accuracies": 0.875, "rewards/chosen": -0.44621357321739197, "rewards/margins": 1.949384093284607, "rewards/rejected": -2.3955976963043213, "step": 4870 }, { "epoch": 0.57, "learning_rate": 1.3198299279555923e-07, "logits/chosen": -2.0412540435791016, "logits/rejected": -1.8859896659851074, "logps/chosen": -370.6697082519531, "logps/rejected": -390.9971923828125, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": -1.0785162448883057, "rewards/margins": 5.060270309448242, "rewards/rejected": -6.138786315917969, "step": 4871 }, { "epoch": 0.57, "learning_rate": 1.3194756111964095e-07, "logits/chosen": -2.2092747688293457, "logits/rejected": -2.2519941329956055, "logps/chosen": -274.89813232421875, "logps/rejected": -236.9650421142578, "loss": 0.2435, "rewards/accuracies": 1.0, "rewards/chosen": -0.9307136535644531, "rewards/margins": 2.247997283935547, "rewards/rejected": -3.178710699081421, "step": 4872 }, { "epoch": 0.57, "learning_rate": 1.3191212944372267e-07, "logits/chosen": -2.460153102874756, "logits/rejected": -2.5517916679382324, "logps/chosen": -446.050537109375, "logps/rejected": -486.3163146972656, "loss": 0.883, "rewards/accuracies": 0.625, "rewards/chosen": -1.7914328575134277, "rewards/margins": 2.1654887199401855, "rewards/rejected": -3.9569215774536133, "step": 4873 }, { "epoch": 0.57, "learning_rate": 1.3187669776780442e-07, "logits/chosen": -2.3170366287231445, "logits/rejected": -2.4324302673339844, "logps/chosen": -209.82150268554688, "logps/rejected": -207.75918579101562, "loss": 0.1772, "rewards/accuracies": 0.875, "rewards/chosen": -0.6023393273353577, "rewards/margins": 3.063223361968994, "rewards/rejected": -3.665562629699707, "step": 4874 }, { "epoch": 0.57, "learning_rate": 1.3184126609188614e-07, "logits/chosen": -1.707032561302185, "logits/rejected": -2.0126800537109375, "logps/chosen": -246.61380004882812, "logps/rejected": -240.6519012451172, "loss": 0.5848, "rewards/accuracies": 0.625, "rewards/chosen": -0.7620730400085449, "rewards/margins": 0.9961439371109009, "rewards/rejected": -1.7582168579101562, "step": 4875 }, { "epoch": 0.57, "learning_rate": 1.3180583441596786e-07, "logits/chosen": -2.6139612197875977, "logits/rejected": -2.566134452819824, "logps/chosen": -303.848876953125, "logps/rejected": -236.66749572753906, "loss": 0.647, "rewards/accuracies": 0.625, "rewards/chosen": -1.46212899684906, "rewards/margins": 1.2747955322265625, "rewards/rejected": -2.736924648284912, "step": 4876 }, { "epoch": 0.57, "learning_rate": 1.3177040274004959e-07, "logits/chosen": -1.9026943445205688, "logits/rejected": -2.0661065578460693, "logps/chosen": -299.0624084472656, "logps/rejected": -335.7823181152344, "loss": 0.2385, "rewards/accuracies": 0.875, "rewards/chosen": 0.0012033730745315552, "rewards/margins": 3.6685149669647217, "rewards/rejected": -3.667311191558838, "step": 4877 }, { "epoch": 0.57, "learning_rate": 1.3173497106413133e-07, "logits/chosen": -2.267727851867676, "logits/rejected": -2.2949419021606445, "logps/chosen": -471.922607421875, "logps/rejected": -413.017333984375, "loss": 0.7689, "rewards/accuracies": 0.625, "rewards/chosen": -0.7593218088150024, "rewards/margins": 1.7323665618896484, "rewards/rejected": -2.4916884899139404, "step": 4878 }, { "epoch": 0.57, "learning_rate": 1.3169953938821306e-07, "logits/chosen": -3.0514931678771973, "logits/rejected": -2.9911398887634277, "logps/chosen": -320.5049133300781, "logps/rejected": -279.6960144042969, "loss": 0.2605, "rewards/accuracies": 0.875, "rewards/chosen": -1.0562995672225952, "rewards/margins": 2.326406717300415, "rewards/rejected": -3.3827064037323, "step": 4879 }, { "epoch": 0.57, "learning_rate": 1.3166410771229478e-07, "logits/chosen": -1.982433557510376, "logits/rejected": -2.322680711746216, "logps/chosen": -263.6446533203125, "logps/rejected": -138.51004028320312, "loss": 1.1905, "rewards/accuracies": 0.5, "rewards/chosen": -1.344170331954956, "rewards/margins": 0.2974928617477417, "rewards/rejected": -1.6416631937026978, "step": 4880 }, { "epoch": 0.57, "learning_rate": 1.3162867603637653e-07, "logits/chosen": -2.6239237785339355, "logits/rejected": -2.734391212463379, "logps/chosen": -423.7042236328125, "logps/rejected": -205.45262145996094, "loss": 0.2122, "rewards/accuracies": 0.875, "rewards/chosen": -0.13174742460250854, "rewards/margins": 2.2522153854370117, "rewards/rejected": -2.383962631225586, "step": 4881 }, { "epoch": 0.57, "learning_rate": 1.3159324436045825e-07, "logits/chosen": -2.698192834854126, "logits/rejected": -2.869889497756958, "logps/chosen": -211.55145263671875, "logps/rejected": -242.32992553710938, "loss": 0.3798, "rewards/accuracies": 0.75, "rewards/chosen": -1.1815402507781982, "rewards/margins": 1.5053143501281738, "rewards/rejected": -2.686854600906372, "step": 4882 }, { "epoch": 0.57, "learning_rate": 1.3155781268453997e-07, "logits/chosen": -2.5411503314971924, "logits/rejected": -2.6909987926483154, "logps/chosen": -267.0626220703125, "logps/rejected": -189.8928680419922, "loss": 0.3885, "rewards/accuracies": 0.875, "rewards/chosen": -0.8078060150146484, "rewards/margins": 2.268554449081421, "rewards/rejected": -3.0763604640960693, "step": 4883 }, { "epoch": 0.57, "learning_rate": 1.315223810086217e-07, "logits/chosen": -1.7500556707382202, "logits/rejected": -1.8007586002349854, "logps/chosen": -305.53662109375, "logps/rejected": -268.64923095703125, "loss": 0.306, "rewards/accuracies": 0.875, "rewards/chosen": -0.6195812225341797, "rewards/margins": 1.7717528343200684, "rewards/rejected": -2.391334056854248, "step": 4884 }, { "epoch": 0.57, "learning_rate": 1.3148694933270342e-07, "logits/chosen": -1.868788242340088, "logits/rejected": -2.3687856197357178, "logps/chosen": -341.5501708984375, "logps/rejected": -237.43983459472656, "loss": 0.2567, "rewards/accuracies": 0.875, "rewards/chosen": -0.2975740432739258, "rewards/margins": 2.755711793899536, "rewards/rejected": -3.053285837173462, "step": 4885 }, { "epoch": 0.57, "learning_rate": 1.3145151765678516e-07, "logits/chosen": -2.1739184856414795, "logits/rejected": -2.3760266304016113, "logps/chosen": -368.3441162109375, "logps/rejected": -329.6139831542969, "loss": 0.2495, "rewards/accuracies": 0.875, "rewards/chosen": -0.23234453797340393, "rewards/margins": 1.9916355609893799, "rewards/rejected": -2.223980188369751, "step": 4886 }, { "epoch": 0.57, "learning_rate": 1.314160859808669e-07, "logits/chosen": -2.9032163619995117, "logits/rejected": -2.8782596588134766, "logps/chosen": -442.82098388671875, "logps/rejected": -311.3962097167969, "loss": 0.1337, "rewards/accuracies": 0.875, "rewards/chosen": -1.0057841539382935, "rewards/margins": 3.781269073486328, "rewards/rejected": -4.78705358505249, "step": 4887 }, { "epoch": 0.57, "learning_rate": 1.313806543049486e-07, "logits/chosen": -2.726161479949951, "logits/rejected": -2.6783595085144043, "logps/chosen": -170.22482299804688, "logps/rejected": -212.1488800048828, "loss": 0.4659, "rewards/accuracies": 0.625, "rewards/chosen": -0.49220770597457886, "rewards/margins": 2.2177610397338867, "rewards/rejected": -2.7099685668945312, "step": 4888 }, { "epoch": 0.57, "learning_rate": 1.3134522262903036e-07, "logits/chosen": -2.894850969314575, "logits/rejected": -2.6442928314208984, "logps/chosen": -273.3145751953125, "logps/rejected": -368.74462890625, "loss": 0.5498, "rewards/accuracies": 0.75, "rewards/chosen": -1.1080833673477173, "rewards/margins": 1.0282264947891235, "rewards/rejected": -2.136309862136841, "step": 4889 }, { "epoch": 0.57, "learning_rate": 1.3130979095311208e-07, "logits/chosen": -2.6371946334838867, "logits/rejected": -2.7554733753204346, "logps/chosen": -229.92892456054688, "logps/rejected": -203.5054473876953, "loss": 1.3496, "rewards/accuracies": 0.5, "rewards/chosen": -2.0637826919555664, "rewards/margins": 0.538162112236023, "rewards/rejected": -2.6019446849823, "step": 4890 }, { "epoch": 0.57, "learning_rate": 1.312743592771938e-07, "logits/chosen": -2.473237991333008, "logits/rejected": -2.447319746017456, "logps/chosen": -336.63250732421875, "logps/rejected": -258.8016357421875, "loss": 0.3093, "rewards/accuracies": 0.875, "rewards/chosen": -0.7821365594863892, "rewards/margins": 3.529296875, "rewards/rejected": -4.3114333152771, "step": 4891 }, { "epoch": 0.57, "learning_rate": 1.3123892760127555e-07, "logits/chosen": -2.5205070972442627, "logits/rejected": -2.4741930961608887, "logps/chosen": -364.9985046386719, "logps/rejected": -345.62335205078125, "loss": 0.1629, "rewards/accuracies": 1.0, "rewards/chosen": -0.6785728335380554, "rewards/margins": 2.938798666000366, "rewards/rejected": -3.6173715591430664, "step": 4892 }, { "epoch": 0.57, "learning_rate": 1.3120349592535727e-07, "logits/chosen": -1.9815500974655151, "logits/rejected": -2.0793333053588867, "logps/chosen": -483.670654296875, "logps/rejected": -364.821044921875, "loss": 0.2565, "rewards/accuracies": 0.875, "rewards/chosen": -0.5529549717903137, "rewards/margins": 2.605999708175659, "rewards/rejected": -3.158954620361328, "step": 4893 }, { "epoch": 0.57, "learning_rate": 1.31168064249439e-07, "logits/chosen": -2.16487717628479, "logits/rejected": -2.245985746383667, "logps/chosen": -356.5289001464844, "logps/rejected": -220.52694702148438, "loss": 0.4326, "rewards/accuracies": 0.625, "rewards/chosen": -0.8610485792160034, "rewards/margins": 1.593369722366333, "rewards/rejected": -2.454418182373047, "step": 4894 }, { "epoch": 0.57, "learning_rate": 1.3113263257352072e-07, "logits/chosen": -2.204349994659424, "logits/rejected": -1.989993691444397, "logps/chosen": -175.68948364257812, "logps/rejected": -308.66546630859375, "loss": 0.1432, "rewards/accuracies": 1.0, "rewards/chosen": -0.7373576164245605, "rewards/margins": 2.210646152496338, "rewards/rejected": -2.9480037689208984, "step": 4895 }, { "epoch": 0.57, "learning_rate": 1.3109720089760244e-07, "logits/chosen": -2.0302274227142334, "logits/rejected": -2.102696180343628, "logps/chosen": -375.5113830566406, "logps/rejected": -268.3872985839844, "loss": 0.5566, "rewards/accuracies": 0.625, "rewards/chosen": -0.7611495852470398, "rewards/margins": 1.7323479652404785, "rewards/rejected": -2.493497610092163, "step": 4896 }, { "epoch": 0.57, "learning_rate": 1.310617692216842e-07, "logits/chosen": -2.8365378379821777, "logits/rejected": -2.5993003845214844, "logps/chosen": -338.25006103515625, "logps/rejected": -276.83038330078125, "loss": 0.1655, "rewards/accuracies": 1.0, "rewards/chosen": -0.611274003982544, "rewards/margins": 2.212188959121704, "rewards/rejected": -2.823462963104248, "step": 4897 }, { "epoch": 0.57, "learning_rate": 1.310263375457659e-07, "logits/chosen": -2.490060806274414, "logits/rejected": -2.5275661945343018, "logps/chosen": -254.22674560546875, "logps/rejected": -168.57431030273438, "loss": 0.5016, "rewards/accuracies": 0.625, "rewards/chosen": -0.2124173939228058, "rewards/margins": 2.4431004524230957, "rewards/rejected": -2.655517578125, "step": 4898 }, { "epoch": 0.57, "learning_rate": 1.3099090586984763e-07, "logits/chosen": -2.5266757011413574, "logits/rejected": -2.522172451019287, "logps/chosen": -160.60398864746094, "logps/rejected": -308.7193908691406, "loss": 0.4777, "rewards/accuracies": 0.75, "rewards/chosen": -0.6654006242752075, "rewards/margins": 1.9512474536895752, "rewards/rejected": -2.6166481971740723, "step": 4899 }, { "epoch": 0.57, "learning_rate": 1.3095547419392935e-07, "logits/chosen": -2.098836898803711, "logits/rejected": -2.050503730773926, "logps/chosen": -448.47149658203125, "logps/rejected": -325.55328369140625, "loss": 0.3791, "rewards/accuracies": 0.75, "rewards/chosen": -1.776012659072876, "rewards/margins": 2.8517580032348633, "rewards/rejected": -4.627770900726318, "step": 4900 }, { "epoch": 0.57, "learning_rate": 1.309200425180111e-07, "logits/chosen": -2.446624994277954, "logits/rejected": -2.5571067333221436, "logps/chosen": -279.81982421875, "logps/rejected": -250.8458251953125, "loss": 0.3028, "rewards/accuracies": 0.875, "rewards/chosen": -0.9273287653923035, "rewards/margins": 2.4733033180236816, "rewards/rejected": -3.400631904602051, "step": 4901 }, { "epoch": 0.57, "learning_rate": 1.3088461084209282e-07, "logits/chosen": -2.344827175140381, "logits/rejected": -2.4614408016204834, "logps/chosen": -210.6289825439453, "logps/rejected": -169.69204711914062, "loss": 1.2876, "rewards/accuracies": 0.5, "rewards/chosen": -2.4216456413269043, "rewards/margins": 0.2642451524734497, "rewards/rejected": -2.6858906745910645, "step": 4902 }, { "epoch": 0.57, "learning_rate": 1.3084917916617457e-07, "logits/chosen": -1.6561877727508545, "logits/rejected": -1.7253623008728027, "logps/chosen": -416.9621887207031, "logps/rejected": -438.3257141113281, "loss": 0.3237, "rewards/accuracies": 0.875, "rewards/chosen": -0.993068277835846, "rewards/margins": 2.3853416442871094, "rewards/rejected": -3.3784096240997314, "step": 4903 }, { "epoch": 0.57, "learning_rate": 1.308137474902563e-07, "logits/chosen": -2.4472110271453857, "logits/rejected": -2.5242342948913574, "logps/chosen": -139.63827514648438, "logps/rejected": -107.35699462890625, "loss": 0.3066, "rewards/accuracies": 1.0, "rewards/chosen": -0.9662624597549438, "rewards/margins": 1.2339637279510498, "rewards/rejected": -2.200226306915283, "step": 4904 }, { "epoch": 0.57, "learning_rate": 1.3077831581433802e-07, "logits/chosen": -2.2800920009613037, "logits/rejected": -2.4133448600769043, "logps/chosen": -389.1122131347656, "logps/rejected": -299.1798095703125, "loss": 0.6629, "rewards/accuracies": 0.75, "rewards/chosen": -1.003803014755249, "rewards/margins": 0.7560297250747681, "rewards/rejected": -1.759832739830017, "step": 4905 }, { "epoch": 0.57, "learning_rate": 1.3074288413841974e-07, "logits/chosen": -2.2172834873199463, "logits/rejected": -2.3301074504852295, "logps/chosen": -286.9349670410156, "logps/rejected": -189.72958374023438, "loss": 0.4249, "rewards/accuracies": 0.75, "rewards/chosen": -0.9784653186798096, "rewards/margins": 1.6188820600509644, "rewards/rejected": -2.5973474979400635, "step": 4906 }, { "epoch": 0.57, "learning_rate": 1.3070745246250146e-07, "logits/chosen": -2.4728925228118896, "logits/rejected": -2.2771623134613037, "logps/chosen": -242.75350952148438, "logps/rejected": -359.1866455078125, "loss": 0.3923, "rewards/accuracies": 0.75, "rewards/chosen": -1.3976023197174072, "rewards/margins": 2.6350767612457275, "rewards/rejected": -4.032679080963135, "step": 4907 }, { "epoch": 0.57, "learning_rate": 1.3067202078658318e-07, "logits/chosen": -1.7711801528930664, "logits/rejected": -1.9588677883148193, "logps/chosen": -487.91558837890625, "logps/rejected": -500.8773498535156, "loss": 0.5148, "rewards/accuracies": 0.75, "rewards/chosen": -0.9819821715354919, "rewards/margins": 1.5080817937850952, "rewards/rejected": -2.4900641441345215, "step": 4908 }, { "epoch": 0.57, "learning_rate": 1.3063658911066493e-07, "logits/chosen": -2.076648235321045, "logits/rejected": -2.0835564136505127, "logps/chosen": -409.09454345703125, "logps/rejected": -374.7704162597656, "loss": 0.3552, "rewards/accuracies": 0.75, "rewards/chosen": -0.6248995065689087, "rewards/margins": 1.6097500324249268, "rewards/rejected": -2.234649419784546, "step": 4909 }, { "epoch": 0.57, "learning_rate": 1.3060115743474665e-07, "logits/chosen": -2.5695855617523193, "logits/rejected": -2.6179418563842773, "logps/chosen": -298.19384765625, "logps/rejected": -229.6575927734375, "loss": 0.281, "rewards/accuracies": 1.0, "rewards/chosen": -0.6266980171203613, "rewards/margins": 2.1781461238861084, "rewards/rejected": -2.8048441410064697, "step": 4910 }, { "epoch": 0.57, "learning_rate": 1.3056572575882838e-07, "logits/chosen": -2.7867047786712646, "logits/rejected": -2.6262154579162598, "logps/chosen": -199.03916931152344, "logps/rejected": -239.67872619628906, "loss": 0.6788, "rewards/accuracies": 0.625, "rewards/chosen": -1.1712989807128906, "rewards/margins": 1.0142991542816162, "rewards/rejected": -2.185598134994507, "step": 4911 }, { "epoch": 0.57, "learning_rate": 1.3053029408291013e-07, "logits/chosen": -1.9262363910675049, "logits/rejected": -1.9955253601074219, "logps/chosen": -288.3578796386719, "logps/rejected": -347.6042175292969, "loss": 0.3021, "rewards/accuracies": 0.875, "rewards/chosen": -0.3174705505371094, "rewards/margins": 3.5497567653656006, "rewards/rejected": -3.867227554321289, "step": 4912 }, { "epoch": 0.57, "learning_rate": 1.3049486240699185e-07, "logits/chosen": -2.6033542156219482, "logits/rejected": -2.6996002197265625, "logps/chosen": -321.653564453125, "logps/rejected": -319.6850891113281, "loss": 0.4123, "rewards/accuracies": 0.875, "rewards/chosen": -0.665565013885498, "rewards/margins": 1.6352014541625977, "rewards/rejected": -2.3007664680480957, "step": 4913 }, { "epoch": 0.57, "learning_rate": 1.3045943073107357e-07, "logits/chosen": -2.157122850418091, "logits/rejected": -2.4260237216949463, "logps/chosen": -208.73158264160156, "logps/rejected": -236.58409118652344, "loss": 0.9194, "rewards/accuracies": 0.75, "rewards/chosen": -2.3800923824310303, "rewards/margins": 1.466701865196228, "rewards/rejected": -3.8467941284179688, "step": 4914 }, { "epoch": 0.57, "learning_rate": 1.3042399905515532e-07, "logits/chosen": -2.285346269607544, "logits/rejected": -2.0704169273376465, "logps/chosen": -194.35081481933594, "logps/rejected": -261.7137756347656, "loss": 0.5341, "rewards/accuracies": 0.75, "rewards/chosen": -1.1006237268447876, "rewards/margins": 1.5816081762313843, "rewards/rejected": -2.682232141494751, "step": 4915 }, { "epoch": 0.57, "learning_rate": 1.3038856737923704e-07, "logits/chosen": -2.4884984493255615, "logits/rejected": -2.668881893157959, "logps/chosen": -307.3558654785156, "logps/rejected": -310.4018249511719, "loss": 0.2357, "rewards/accuracies": 0.875, "rewards/chosen": -1.4041770696640015, "rewards/margins": 3.434743881225586, "rewards/rejected": -4.838920593261719, "step": 4916 }, { "epoch": 0.57, "learning_rate": 1.3035313570331876e-07, "logits/chosen": -2.060009479522705, "logits/rejected": -1.9025694131851196, "logps/chosen": -305.6308288574219, "logps/rejected": -409.52362060546875, "loss": 0.1882, "rewards/accuracies": 0.875, "rewards/chosen": -0.2018437683582306, "rewards/margins": 2.9881582260131836, "rewards/rejected": -3.190001964569092, "step": 4917 }, { "epoch": 0.57, "learning_rate": 1.3031770402740048e-07, "logits/chosen": -2.2658984661102295, "logits/rejected": -2.102811098098755, "logps/chosen": -297.4826354980469, "logps/rejected": -248.32949829101562, "loss": 0.2389, "rewards/accuracies": 1.0, "rewards/chosen": -0.8426122069358826, "rewards/margins": 2.482356309890747, "rewards/rejected": -3.3249683380126953, "step": 4918 }, { "epoch": 0.57, "learning_rate": 1.302822723514822e-07, "logits/chosen": -1.955812931060791, "logits/rejected": -1.858857274055481, "logps/chosen": -425.9463806152344, "logps/rejected": -325.12017822265625, "loss": 0.3759, "rewards/accuracies": 0.875, "rewards/chosen": -0.2555692791938782, "rewards/margins": 1.5322794914245605, "rewards/rejected": -1.787848711013794, "step": 4919 }, { "epoch": 0.57, "learning_rate": 1.3024684067556393e-07, "logits/chosen": -2.217649459838867, "logits/rejected": -2.6436541080474854, "logps/chosen": -200.87335205078125, "logps/rejected": -145.0616455078125, "loss": 0.2793, "rewards/accuracies": 1.0, "rewards/chosen": -0.23772066831588745, "rewards/margins": 1.3452272415161133, "rewards/rejected": -1.582947850227356, "step": 4920 }, { "epoch": 0.57, "learning_rate": 1.3021140899964568e-07, "logits/chosen": -1.978952169418335, "logits/rejected": -1.83791184425354, "logps/chosen": -224.68748474121094, "logps/rejected": -212.7786865234375, "loss": 0.375, "rewards/accuracies": 0.875, "rewards/chosen": -0.7447393536567688, "rewards/margins": 1.1644201278686523, "rewards/rejected": -1.9091594219207764, "step": 4921 }, { "epoch": 0.57, "learning_rate": 1.301759773237274e-07, "logits/chosen": -2.1389050483703613, "logits/rejected": -2.4295268058776855, "logps/chosen": -353.09808349609375, "logps/rejected": -212.7467041015625, "loss": 0.1531, "rewards/accuracies": 1.0, "rewards/chosen": -0.3820021450519562, "rewards/margins": 2.6475329399108887, "rewards/rejected": -3.0295348167419434, "step": 4922 }, { "epoch": 0.57, "learning_rate": 1.3014054564780915e-07, "logits/chosen": -2.3991661071777344, "logits/rejected": -2.320427656173706, "logps/chosen": -275.2611083984375, "logps/rejected": -264.94873046875, "loss": 0.3954, "rewards/accuracies": 0.75, "rewards/chosen": -1.6660456657409668, "rewards/margins": 2.668148994445801, "rewards/rejected": -4.334194660186768, "step": 4923 }, { "epoch": 0.57, "learning_rate": 1.3010511397189087e-07, "logits/chosen": -2.319770574569702, "logits/rejected": -2.308619260787964, "logps/chosen": -346.07904052734375, "logps/rejected": -257.2724609375, "loss": 0.1931, "rewards/accuracies": 1.0, "rewards/chosen": -0.17696575820446014, "rewards/margins": 1.8882322311401367, "rewards/rejected": -2.0651979446411133, "step": 4924 }, { "epoch": 0.57, "learning_rate": 1.300696822959726e-07, "logits/chosen": -2.877318859100342, "logits/rejected": -2.7470200061798096, "logps/chosen": -611.4945678710938, "logps/rejected": -264.7587890625, "loss": 0.1791, "rewards/accuracies": 1.0, "rewards/chosen": -0.5746270418167114, "rewards/margins": 2.7248153686523438, "rewards/rejected": -3.2994425296783447, "step": 4925 }, { "epoch": 0.57, "learning_rate": 1.3003425062005434e-07, "logits/chosen": -2.028644561767578, "logits/rejected": -2.4943199157714844, "logps/chosen": -410.6567687988281, "logps/rejected": -272.51849365234375, "loss": 0.9134, "rewards/accuracies": 0.75, "rewards/chosen": -1.186037302017212, "rewards/margins": 0.3469725549221039, "rewards/rejected": -1.5330097675323486, "step": 4926 }, { "epoch": 0.57, "learning_rate": 1.2999881894413606e-07, "logits/chosen": -2.436856508255005, "logits/rejected": -2.447812557220459, "logps/chosen": -299.63763427734375, "logps/rejected": -242.04632568359375, "loss": 0.2011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6836138963699341, "rewards/margins": 2.127715587615967, "rewards/rejected": -2.8113296031951904, "step": 4927 }, { "epoch": 0.57, "learning_rate": 1.2996338726821779e-07, "logits/chosen": -2.5746853351593018, "logits/rejected": -2.750491142272949, "logps/chosen": -257.95562744140625, "logps/rejected": -217.0816650390625, "loss": 0.2242, "rewards/accuracies": 0.875, "rewards/chosen": 0.011576220393180847, "rewards/margins": 1.949467420578003, "rewards/rejected": -1.9378911256790161, "step": 4928 }, { "epoch": 0.57, "learning_rate": 1.299279555922995e-07, "logits/chosen": -2.0329642295837402, "logits/rejected": -2.0717101097106934, "logps/chosen": -517.856689453125, "logps/rejected": -297.9531555175781, "loss": 1.1188, "rewards/accuracies": 0.75, "rewards/chosen": -2.7898049354553223, "rewards/margins": 0.4849618077278137, "rewards/rejected": -3.2747669219970703, "step": 4929 }, { "epoch": 0.57, "learning_rate": 1.2989252391638123e-07, "logits/chosen": -2.0974202156066895, "logits/rejected": -2.110975503921509, "logps/chosen": -306.4818115234375, "logps/rejected": -287.61492919921875, "loss": 0.4578, "rewards/accuracies": 0.625, "rewards/chosen": -1.3665285110473633, "rewards/margins": 2.160068988800049, "rewards/rejected": -3.526597738265991, "step": 4930 }, { "epoch": 0.57, "learning_rate": 1.2985709224046295e-07, "logits/chosen": -2.7785258293151855, "logits/rejected": -2.3334174156188965, "logps/chosen": -100.2549057006836, "logps/rejected": -258.3974609375, "loss": 0.289, "rewards/accuracies": 0.75, "rewards/chosen": -0.3372487425804138, "rewards/margins": 2.211582899093628, "rewards/rejected": -2.5488317012786865, "step": 4931 }, { "epoch": 0.57, "learning_rate": 1.298216605645447e-07, "logits/chosen": -2.43625807762146, "logits/rejected": -2.4659969806671143, "logps/chosen": -253.04092407226562, "logps/rejected": -274.9180908203125, "loss": 0.7277, "rewards/accuracies": 0.625, "rewards/chosen": -0.7942510843276978, "rewards/margins": 1.4348478317260742, "rewards/rejected": -2.2290987968444824, "step": 4932 }, { "epoch": 0.57, "learning_rate": 1.2978622888862642e-07, "logits/chosen": -2.2840816974639893, "logits/rejected": -2.0167858600616455, "logps/chosen": -285.70452880859375, "logps/rejected": -347.6697998046875, "loss": 0.2512, "rewards/accuracies": 0.875, "rewards/chosen": -0.909662127494812, "rewards/margins": 3.3156776428222656, "rewards/rejected": -4.225339889526367, "step": 4933 }, { "epoch": 0.57, "learning_rate": 1.2975079721270814e-07, "logits/chosen": -1.9259463548660278, "logits/rejected": -2.447626829147339, "logps/chosen": -491.7336120605469, "logps/rejected": -251.5966033935547, "loss": 0.1199, "rewards/accuracies": 1.0, "rewards/chosen": -0.6146990060806274, "rewards/margins": 3.2180466651916504, "rewards/rejected": -3.8327455520629883, "step": 4934 }, { "epoch": 0.57, "learning_rate": 1.297153655367899e-07, "logits/chosen": -2.542686939239502, "logits/rejected": -2.3447842597961426, "logps/chosen": -197.29840087890625, "logps/rejected": -261.6246643066406, "loss": 0.1429, "rewards/accuracies": 1.0, "rewards/chosen": -0.7434940934181213, "rewards/margins": 3.6033902168273926, "rewards/rejected": -4.346884250640869, "step": 4935 }, { "epoch": 0.57, "learning_rate": 1.2967993386087162e-07, "logits/chosen": -2.0600674152374268, "logits/rejected": -2.3497314453125, "logps/chosen": -593.2804565429688, "logps/rejected": -509.93865966796875, "loss": 0.212, "rewards/accuracies": 0.875, "rewards/chosen": -0.5343054533004761, "rewards/margins": 2.4115982055664062, "rewards/rejected": -2.945903778076172, "step": 4936 }, { "epoch": 0.57, "learning_rate": 1.2964450218495334e-07, "logits/chosen": -1.8445041179656982, "logits/rejected": -2.0008301734924316, "logps/chosen": -416.8793640136719, "logps/rejected": -287.35028076171875, "loss": 0.6965, "rewards/accuracies": 0.625, "rewards/chosen": -1.1846888065338135, "rewards/margins": 1.1824102401733398, "rewards/rejected": -2.3670990467071533, "step": 4937 }, { "epoch": 0.57, "learning_rate": 1.2960907050903509e-07, "logits/chosen": -2.4627020359039307, "logits/rejected": -2.567291259765625, "logps/chosen": -175.21295166015625, "logps/rejected": -330.9231262207031, "loss": 0.2549, "rewards/accuracies": 0.875, "rewards/chosen": -0.6372267603874207, "rewards/margins": 4.0871171951293945, "rewards/rejected": -4.724343776702881, "step": 4938 }, { "epoch": 0.57, "learning_rate": 1.295736388331168e-07, "logits/chosen": -1.842760682106018, "logits/rejected": -2.2526907920837402, "logps/chosen": -184.8463897705078, "logps/rejected": -159.6168212890625, "loss": 0.4453, "rewards/accuracies": 0.75, "rewards/chosen": -1.8329417705535889, "rewards/margins": 1.5737833976745605, "rewards/rejected": -3.4067251682281494, "step": 4939 }, { "epoch": 0.57, "learning_rate": 1.2953820715719853e-07, "logits/chosen": -1.8504279851913452, "logits/rejected": -1.9069013595581055, "logps/chosen": -457.6588134765625, "logps/rejected": -323.8978271484375, "loss": 0.2648, "rewards/accuracies": 0.875, "rewards/chosen": -0.4876168370246887, "rewards/margins": 2.654024600982666, "rewards/rejected": -3.141641616821289, "step": 4940 }, { "epoch": 0.57, "learning_rate": 1.2950277548128025e-07, "logits/chosen": -2.4690239429473877, "logits/rejected": -2.1209776401519775, "logps/chosen": -197.31777954101562, "logps/rejected": -310.25421142578125, "loss": 0.684, "rewards/accuracies": 0.75, "rewards/chosen": -0.8896211981773376, "rewards/margins": 1.9191218614578247, "rewards/rejected": -2.8087430000305176, "step": 4941 }, { "epoch": 0.57, "learning_rate": 1.2946734380536197e-07, "logits/chosen": -1.7603150606155396, "logits/rejected": -1.8750998973846436, "logps/chosen": -444.46527099609375, "logps/rejected": -456.8905029296875, "loss": 0.3339, "rewards/accuracies": 0.875, "rewards/chosen": -1.44185471534729, "rewards/margins": 1.7989068031311035, "rewards/rejected": -3.2407615184783936, "step": 4942 }, { "epoch": 0.58, "learning_rate": 1.2943191212944372e-07, "logits/chosen": -2.0848381519317627, "logits/rejected": -2.1048665046691895, "logps/chosen": -144.6287384033203, "logps/rejected": -234.92112731933594, "loss": 0.6171, "rewards/accuracies": 0.75, "rewards/chosen": -1.722733736038208, "rewards/margins": 1.6206884384155273, "rewards/rejected": -3.3434219360351562, "step": 4943 }, { "epoch": 0.58, "learning_rate": 1.2939648045352544e-07, "logits/chosen": -2.1562881469726562, "logits/rejected": -2.2121524810791016, "logps/chosen": -321.2801513671875, "logps/rejected": -332.8514709472656, "loss": 0.2755, "rewards/accuracies": 1.0, "rewards/chosen": -0.23874785006046295, "rewards/margins": 1.8395302295684814, "rewards/rejected": -2.078278064727783, "step": 4944 }, { "epoch": 0.58, "learning_rate": 1.2936104877760717e-07, "logits/chosen": -2.019896984100342, "logits/rejected": -2.0468990802764893, "logps/chosen": -408.7285461425781, "logps/rejected": -414.41168212890625, "loss": 0.3691, "rewards/accuracies": 0.875, "rewards/chosen": -0.17786996066570282, "rewards/margins": 2.8183822631835938, "rewards/rejected": -2.9962522983551025, "step": 4945 }, { "epoch": 0.58, "learning_rate": 1.2932561710168892e-07, "logits/chosen": -2.4705793857574463, "logits/rejected": -2.3821911811828613, "logps/chosen": -373.4603271484375, "logps/rejected": -298.5753173828125, "loss": 0.4047, "rewards/accuracies": 0.75, "rewards/chosen": -1.4362245798110962, "rewards/margins": 2.996866226196289, "rewards/rejected": -4.4330902099609375, "step": 4946 }, { "epoch": 0.58, "learning_rate": 1.2929018542577064e-07, "logits/chosen": -2.129570960998535, "logits/rejected": -1.9283666610717773, "logps/chosen": -232.6303253173828, "logps/rejected": -336.94580078125, "loss": 0.8464, "rewards/accuracies": 0.75, "rewards/chosen": -1.099475622177124, "rewards/margins": 1.0745607614517212, "rewards/rejected": -2.1740365028381348, "step": 4947 }, { "epoch": 0.58, "learning_rate": 1.2925475374985236e-07, "logits/chosen": -2.154878616333008, "logits/rejected": -2.4892845153808594, "logps/chosen": -366.5902404785156, "logps/rejected": -261.05780029296875, "loss": 1.1211, "rewards/accuracies": 0.375, "rewards/chosen": -1.5631541013717651, "rewards/margins": 0.4615755081176758, "rewards/rejected": -2.0247294902801514, "step": 4948 }, { "epoch": 0.58, "learning_rate": 1.2921932207393408e-07, "logits/chosen": -2.460223913192749, "logits/rejected": -2.3769500255584717, "logps/chosen": -182.82952880859375, "logps/rejected": -279.4175720214844, "loss": 0.091, "rewards/accuracies": 1.0, "rewards/chosen": -0.0462646558880806, "rewards/margins": 4.038254261016846, "rewards/rejected": -4.084518909454346, "step": 4949 }, { "epoch": 0.58, "learning_rate": 1.2918389039801583e-07, "logits/chosen": -2.2186965942382812, "logits/rejected": -2.3140652179718018, "logps/chosen": -318.793212890625, "logps/rejected": -272.7854309082031, "loss": 0.1681, "rewards/accuracies": 0.875, "rewards/chosen": -0.9923732280731201, "rewards/margins": 2.9132678508758545, "rewards/rejected": -3.9056410789489746, "step": 4950 }, { "epoch": 0.58, "learning_rate": 1.2914845872209755e-07, "logits/chosen": -2.2563459873199463, "logits/rejected": -1.9658119678497314, "logps/chosen": -136.78271484375, "logps/rejected": -277.77044677734375, "loss": 0.4569, "rewards/accuracies": 0.875, "rewards/chosen": -1.0255968570709229, "rewards/margins": 3.470048427581787, "rewards/rejected": -4.495645523071289, "step": 4951 }, { "epoch": 0.58, "learning_rate": 1.2911302704617927e-07, "logits/chosen": -2.6663882732391357, "logits/rejected": -2.6697287559509277, "logps/chosen": -288.6773681640625, "logps/rejected": -310.09735107421875, "loss": 0.5662, "rewards/accuracies": 0.75, "rewards/chosen": -1.3684192895889282, "rewards/margins": 3.438345193862915, "rewards/rejected": -4.806764602661133, "step": 4952 }, { "epoch": 0.58, "learning_rate": 1.29077595370261e-07, "logits/chosen": -2.1657121181488037, "logits/rejected": -2.286780834197998, "logps/chosen": -295.60107421875, "logps/rejected": -234.94326782226562, "loss": 0.3945, "rewards/accuracies": 0.875, "rewards/chosen": -1.619848608970642, "rewards/margins": 1.8318440914154053, "rewards/rejected": -3.451692581176758, "step": 4953 }, { "epoch": 0.58, "learning_rate": 1.2904216369434272e-07, "logits/chosen": -2.4368579387664795, "logits/rejected": -2.4067797660827637, "logps/chosen": -322.55755615234375, "logps/rejected": -322.501953125, "loss": 0.4102, "rewards/accuracies": 0.625, "rewards/chosen": -1.3796987533569336, "rewards/margins": 2.9810991287231445, "rewards/rejected": -4.360797882080078, "step": 4954 }, { "epoch": 0.58, "learning_rate": 1.2900673201842447e-07, "logits/chosen": -2.537346124649048, "logits/rejected": -2.3009536266326904, "logps/chosen": -214.00863647460938, "logps/rejected": -293.10614013671875, "loss": 0.1763, "rewards/accuracies": 0.875, "rewards/chosen": -0.5657811164855957, "rewards/margins": 2.8408422470092773, "rewards/rejected": -3.406623601913452, "step": 4955 }, { "epoch": 0.58, "learning_rate": 1.289713003425062e-07, "logits/chosen": -1.6116416454315186, "logits/rejected": -2.1356890201568604, "logps/chosen": -331.9773254394531, "logps/rejected": -177.7339324951172, "loss": 0.6966, "rewards/accuracies": 0.5, "rewards/chosen": -1.3787846565246582, "rewards/margins": 0.2444840520620346, "rewards/rejected": -1.6232688426971436, "step": 4956 }, { "epoch": 0.58, "learning_rate": 1.2893586866658794e-07, "logits/chosen": -2.2807111740112305, "logits/rejected": -2.329294204711914, "logps/chosen": -575.661376953125, "logps/rejected": -377.9850158691406, "loss": 0.7078, "rewards/accuracies": 0.875, "rewards/chosen": -1.6326277256011963, "rewards/margins": 0.8870232105255127, "rewards/rejected": -2.519650936126709, "step": 4957 }, { "epoch": 0.58, "learning_rate": 1.2890043699066966e-07, "logits/chosen": -2.7589313983917236, "logits/rejected": -2.727663993835449, "logps/chosen": -179.0670166015625, "logps/rejected": -101.14022064208984, "loss": 0.6405, "rewards/accuracies": 0.875, "rewards/chosen": -1.3017739057540894, "rewards/margins": 0.6723085045814514, "rewards/rejected": -1.9740824699401855, "step": 4958 }, { "epoch": 0.58, "learning_rate": 1.2886500531475138e-07, "logits/chosen": -2.42645001411438, "logits/rejected": -2.6815199851989746, "logps/chosen": -459.3863525390625, "logps/rejected": -242.99600219726562, "loss": 0.2489, "rewards/accuracies": 1.0, "rewards/chosen": -0.7337267994880676, "rewards/margins": 1.9174344539642334, "rewards/rejected": -2.6511611938476562, "step": 4959 }, { "epoch": 0.58, "learning_rate": 1.288295736388331e-07, "logits/chosen": -2.365729808807373, "logits/rejected": -2.226304531097412, "logps/chosen": -160.03855895996094, "logps/rejected": -255.910400390625, "loss": 0.2548, "rewards/accuracies": 1.0, "rewards/chosen": -1.6791954040527344, "rewards/margins": 2.042104721069336, "rewards/rejected": -3.7213003635406494, "step": 4960 }, { "epoch": 0.58, "learning_rate": 1.2879414196291483e-07, "logits/chosen": -2.7303483486175537, "logits/rejected": -2.609713315963745, "logps/chosen": -220.66314697265625, "logps/rejected": -252.3252410888672, "loss": 0.8425, "rewards/accuracies": 0.625, "rewards/chosen": -2.0714027881622314, "rewards/margins": 0.8686571717262268, "rewards/rejected": -2.9400601387023926, "step": 4961 }, { "epoch": 0.58, "learning_rate": 1.2875871028699658e-07, "logits/chosen": -2.04184889793396, "logits/rejected": -2.089648485183716, "logps/chosen": -111.89359283447266, "logps/rejected": -250.9869384765625, "loss": 0.3175, "rewards/accuracies": 0.75, "rewards/chosen": -1.081406593322754, "rewards/margins": 2.8639979362487793, "rewards/rejected": -3.945404529571533, "step": 4962 }, { "epoch": 0.58, "learning_rate": 1.287232786110783e-07, "logits/chosen": -2.818045139312744, "logits/rejected": -2.88004732131958, "logps/chosen": -187.65798950195312, "logps/rejected": -168.46170043945312, "loss": 0.3209, "rewards/accuracies": 0.75, "rewards/chosen": -0.692453145980835, "rewards/margins": 2.398386001586914, "rewards/rejected": -3.090839385986328, "step": 4963 }, { "epoch": 0.58, "learning_rate": 1.2868784693516002e-07, "logits/chosen": -2.3949756622314453, "logits/rejected": -2.437565565109253, "logps/chosen": -298.41552734375, "logps/rejected": -308.6033935546875, "loss": 0.0999, "rewards/accuracies": 1.0, "rewards/chosen": -0.9097143411636353, "rewards/margins": 2.430297374725342, "rewards/rejected": -3.3400118350982666, "step": 4964 }, { "epoch": 0.58, "learning_rate": 1.2865241525924174e-07, "logits/chosen": -2.1086783409118652, "logits/rejected": -2.0637729167938232, "logps/chosen": -396.76043701171875, "logps/rejected": -425.26995849609375, "loss": 0.4722, "rewards/accuracies": 0.875, "rewards/chosen": -1.0683314800262451, "rewards/margins": 2.0109074115753174, "rewards/rejected": -3.0792388916015625, "step": 4965 }, { "epoch": 0.58, "learning_rate": 1.286169835833235e-07, "logits/chosen": -2.108046531677246, "logits/rejected": -2.0814669132232666, "logps/chosen": -291.6985168457031, "logps/rejected": -388.8736572265625, "loss": 0.5962, "rewards/accuracies": 0.75, "rewards/chosen": -1.4186750650405884, "rewards/margins": 1.1423519849777222, "rewards/rejected": -2.5610270500183105, "step": 4966 }, { "epoch": 0.58, "learning_rate": 1.285815519074052e-07, "logits/chosen": -2.1042392253875732, "logits/rejected": -2.4184608459472656, "logps/chosen": -267.4323425292969, "logps/rejected": -193.12094116210938, "loss": 0.5841, "rewards/accuracies": 0.75, "rewards/chosen": -0.5966074466705322, "rewards/margins": 1.0796830654144287, "rewards/rejected": -1.6762906312942505, "step": 4967 }, { "epoch": 0.58, "learning_rate": 1.2854612023148696e-07, "logits/chosen": -1.851239800453186, "logits/rejected": -2.01800537109375, "logps/chosen": -352.7012634277344, "logps/rejected": -262.5887756347656, "loss": 0.2342, "rewards/accuracies": 1.0, "rewards/chosen": -0.08510750532150269, "rewards/margins": 1.5105820894241333, "rewards/rejected": -1.5956895351409912, "step": 4968 }, { "epoch": 0.58, "learning_rate": 1.2851068855556868e-07, "logits/chosen": -2.432558536529541, "logits/rejected": -2.633359670639038, "logps/chosen": -182.19703674316406, "logps/rejected": -198.9443359375, "loss": 0.1765, "rewards/accuracies": 1.0, "rewards/chosen": -0.720443069934845, "rewards/margins": 3.0327236652374268, "rewards/rejected": -3.753166913986206, "step": 4969 }, { "epoch": 0.58, "learning_rate": 1.284752568796504e-07, "logits/chosen": -2.5202078819274902, "logits/rejected": -2.4729084968566895, "logps/chosen": -219.0676727294922, "logps/rejected": -187.67848205566406, "loss": 0.2653, "rewards/accuracies": 0.875, "rewards/chosen": -0.11130917072296143, "rewards/margins": 1.8346264362335205, "rewards/rejected": -1.945935606956482, "step": 4970 }, { "epoch": 0.58, "learning_rate": 1.2843982520373213e-07, "logits/chosen": -2.0898869037628174, "logits/rejected": -2.237042188644409, "logps/chosen": -540.52294921875, "logps/rejected": -342.38055419921875, "loss": 0.6953, "rewards/accuracies": 0.5, "rewards/chosen": -1.0757588148117065, "rewards/margins": 0.9184942841529846, "rewards/rejected": -1.994253158569336, "step": 4971 }, { "epoch": 0.58, "learning_rate": 1.2840439352781385e-07, "logits/chosen": -2.341604471206665, "logits/rejected": -2.5139918327331543, "logps/chosen": -544.1693115234375, "logps/rejected": -362.1025695800781, "loss": 0.6268, "rewards/accuracies": 0.875, "rewards/chosen": -0.33385393023490906, "rewards/margins": 2.2152416706085205, "rewards/rejected": -2.54909610748291, "step": 4972 }, { "epoch": 0.58, "learning_rate": 1.283689618518956e-07, "logits/chosen": -2.2023181915283203, "logits/rejected": -1.9675989151000977, "logps/chosen": -285.7481689453125, "logps/rejected": -354.8930358886719, "loss": 1.1541, "rewards/accuracies": 0.375, "rewards/chosen": -0.5335869789123535, "rewards/margins": 1.3830063343048096, "rewards/rejected": -1.9165929555892944, "step": 4973 }, { "epoch": 0.58, "learning_rate": 1.2833353017597732e-07, "logits/chosen": -2.7190661430358887, "logits/rejected": -2.7233190536499023, "logps/chosen": -278.17254638671875, "logps/rejected": -219.3129119873047, "loss": 0.314, "rewards/accuracies": 0.875, "rewards/chosen": -0.7535488605499268, "rewards/margins": 1.7164993286132812, "rewards/rejected": -2.470048189163208, "step": 4974 }, { "epoch": 0.58, "learning_rate": 1.2829809850005904e-07, "logits/chosen": -1.973333477973938, "logits/rejected": -2.2589316368103027, "logps/chosen": -499.8240661621094, "logps/rejected": -332.400634765625, "loss": 0.2862, "rewards/accuracies": 0.875, "rewards/chosen": -1.5119386911392212, "rewards/margins": 2.5388479232788086, "rewards/rejected": -4.050786018371582, "step": 4975 }, { "epoch": 0.58, "learning_rate": 1.2826266682414076e-07, "logits/chosen": -2.306122303009033, "logits/rejected": -2.555237293243408, "logps/chosen": -371.9221496582031, "logps/rejected": -290.66473388671875, "loss": 0.3225, "rewards/accuracies": 0.75, "rewards/chosen": -0.6776841282844543, "rewards/margins": 2.9434800148010254, "rewards/rejected": -3.621164321899414, "step": 4976 }, { "epoch": 0.58, "learning_rate": 1.2822723514822251e-07, "logits/chosen": -2.2051119804382324, "logits/rejected": -2.614405632019043, "logps/chosen": -307.7746887207031, "logps/rejected": -169.33465576171875, "loss": 0.1548, "rewards/accuracies": 1.0, "rewards/chosen": -0.17593073844909668, "rewards/margins": 2.823361873626709, "rewards/rejected": -2.9992928504943848, "step": 4977 }, { "epoch": 0.58, "learning_rate": 1.2819180347230424e-07, "logits/chosen": -2.281564712524414, "logits/rejected": -2.2349283695220947, "logps/chosen": -245.7202606201172, "logps/rejected": -341.51123046875, "loss": 0.1694, "rewards/accuracies": 1.0, "rewards/chosen": -1.369429588317871, "rewards/margins": 3.81427001953125, "rewards/rejected": -5.183699607849121, "step": 4978 }, { "epoch": 0.58, "learning_rate": 1.2815637179638596e-07, "logits/chosen": -2.3892860412597656, "logits/rejected": -2.5508804321289062, "logps/chosen": -299.2299499511719, "logps/rejected": -252.72314453125, "loss": 0.1828, "rewards/accuracies": 1.0, "rewards/chosen": -0.7366554737091064, "rewards/margins": 2.0160117149353027, "rewards/rejected": -2.752667188644409, "step": 4979 }, { "epoch": 0.58, "learning_rate": 1.281209401204677e-07, "logits/chosen": -2.0227205753326416, "logits/rejected": -2.236707925796509, "logps/chosen": -281.2027587890625, "logps/rejected": -257.28509521484375, "loss": 0.3239, "rewards/accuracies": 0.875, "rewards/chosen": -0.5506935119628906, "rewards/margins": 2.8614561557769775, "rewards/rejected": -3.412149429321289, "step": 4980 }, { "epoch": 0.58, "learning_rate": 1.2808550844454943e-07, "logits/chosen": -2.5172176361083984, "logits/rejected": -2.4444332122802734, "logps/chosen": -250.2740478515625, "logps/rejected": -206.22427368164062, "loss": 0.7018, "rewards/accuracies": 0.75, "rewards/chosen": -0.5766663551330566, "rewards/margins": 0.5567880272865295, "rewards/rejected": -1.1334543228149414, "step": 4981 }, { "epoch": 0.58, "learning_rate": 1.2805007676863115e-07, "logits/chosen": -2.141328811645508, "logits/rejected": -2.5219016075134277, "logps/chosen": -192.51229858398438, "logps/rejected": -245.0391082763672, "loss": 0.6426, "rewards/accuracies": 0.75, "rewards/chosen": -0.724065899848938, "rewards/margins": 1.4952632188796997, "rewards/rejected": -2.2193291187286377, "step": 4982 }, { "epoch": 0.58, "learning_rate": 1.2801464509271287e-07, "logits/chosen": -2.025484561920166, "logits/rejected": -2.4529287815093994, "logps/chosen": -396.4825744628906, "logps/rejected": -275.458984375, "loss": 0.2005, "rewards/accuracies": 0.875, "rewards/chosen": -0.30254507064819336, "rewards/margins": 3.6435177326202393, "rewards/rejected": -3.9460625648498535, "step": 4983 }, { "epoch": 0.58, "learning_rate": 1.279792134167946e-07, "logits/chosen": -2.470404624938965, "logits/rejected": -2.4293644428253174, "logps/chosen": -223.46432495117188, "logps/rejected": -399.7251281738281, "loss": 0.3438, "rewards/accuracies": 0.875, "rewards/chosen": -0.42662739753723145, "rewards/margins": 2.4852964878082275, "rewards/rejected": -2.911923885345459, "step": 4984 }, { "epoch": 0.58, "learning_rate": 1.2794378174087634e-07, "logits/chosen": -2.445805549621582, "logits/rejected": -2.464209794998169, "logps/chosen": -300.95196533203125, "logps/rejected": -306.5870056152344, "loss": 0.4353, "rewards/accuracies": 0.625, "rewards/chosen": -0.34047120809555054, "rewards/margins": 1.87259840965271, "rewards/rejected": -2.2130696773529053, "step": 4985 }, { "epoch": 0.58, "learning_rate": 1.2790835006495807e-07, "logits/chosen": -2.698894500732422, "logits/rejected": -2.7770156860351562, "logps/chosen": -135.4595489501953, "logps/rejected": -216.386962890625, "loss": 0.5819, "rewards/accuracies": 0.875, "rewards/chosen": -1.5836890935897827, "rewards/margins": 1.689714789390564, "rewards/rejected": -3.2734038829803467, "step": 4986 }, { "epoch": 0.58, "learning_rate": 1.278729183890398e-07, "logits/chosen": -1.9910471439361572, "logits/rejected": -1.9543753862380981, "logps/chosen": -275.806640625, "logps/rejected": -292.7551574707031, "loss": 1.0247, "rewards/accuracies": 0.75, "rewards/chosen": -2.3051960468292236, "rewards/margins": 1.457802414894104, "rewards/rejected": -3.762998580932617, "step": 4987 }, { "epoch": 0.58, "learning_rate": 1.2783748671312154e-07, "logits/chosen": -1.911210536956787, "logits/rejected": -2.034813642501831, "logps/chosen": -415.4217834472656, "logps/rejected": -308.489990234375, "loss": 0.8522, "rewards/accuracies": 0.625, "rewards/chosen": -0.9848672747612, "rewards/margins": 1.3181178569793701, "rewards/rejected": -2.302985191345215, "step": 4988 }, { "epoch": 0.58, "learning_rate": 1.2780205503720326e-07, "logits/chosen": -2.555696487426758, "logits/rejected": -2.5398035049438477, "logps/chosen": -242.5450897216797, "logps/rejected": -246.6356201171875, "loss": 0.089, "rewards/accuracies": 1.0, "rewards/chosen": -0.20990701019763947, "rewards/margins": 3.8510372638702393, "rewards/rejected": -4.060944557189941, "step": 4989 }, { "epoch": 0.58, "learning_rate": 1.2776662336128498e-07, "logits/chosen": -1.5355135202407837, "logits/rejected": -1.7660880088806152, "logps/chosen": -620.6427612304688, "logps/rejected": -544.0104370117188, "loss": 0.1109, "rewards/accuracies": 1.0, "rewards/chosen": -0.01670283079147339, "rewards/margins": 2.954514741897583, "rewards/rejected": -2.971217393875122, "step": 4990 }, { "epoch": 0.58, "learning_rate": 1.2773119168536673e-07, "logits/chosen": -2.4170894622802734, "logits/rejected": -2.593413829803467, "logps/chosen": -456.5002136230469, "logps/rejected": -227.36859130859375, "loss": 0.4416, "rewards/accuracies": 0.625, "rewards/chosen": -2.525996685028076, "rewards/margins": 2.124033212661743, "rewards/rejected": -4.650030136108398, "step": 4991 }, { "epoch": 0.58, "learning_rate": 1.2769576000944845e-07, "logits/chosen": -2.352752923965454, "logits/rejected": -2.2720630168914795, "logps/chosen": -287.6003112792969, "logps/rejected": -338.2923583984375, "loss": 0.7006, "rewards/accuracies": 0.875, "rewards/chosen": -1.1352601051330566, "rewards/margins": 2.8906590938568115, "rewards/rejected": -4.025918960571289, "step": 4992 }, { "epoch": 0.58, "learning_rate": 1.2766032833353017e-07, "logits/chosen": -2.8374853134155273, "logits/rejected": -2.8801887035369873, "logps/chosen": -128.32022094726562, "logps/rejected": -193.45614624023438, "loss": 0.1513, "rewards/accuracies": 1.0, "rewards/chosen": -0.7128094434738159, "rewards/margins": 2.7571961879730225, "rewards/rejected": -3.470005750656128, "step": 4993 }, { "epoch": 0.58, "learning_rate": 1.276248966576119e-07, "logits/chosen": -2.778282642364502, "logits/rejected": -2.7775795459747314, "logps/chosen": -201.17347717285156, "logps/rejected": -273.3324279785156, "loss": 0.5322, "rewards/accuracies": 0.625, "rewards/chosen": -1.0694972276687622, "rewards/margins": 0.9708243608474731, "rewards/rejected": -2.0403215885162354, "step": 4994 }, { "epoch": 0.58, "learning_rate": 1.2758946498169362e-07, "logits/chosen": -2.6118276119232178, "logits/rejected": -2.902498483657837, "logps/chosen": -322.518798828125, "logps/rejected": -275.03021240234375, "loss": 0.2206, "rewards/accuracies": 1.0, "rewards/chosen": -0.7189247608184814, "rewards/margins": 2.8431403636932373, "rewards/rejected": -3.5620648860931396, "step": 4995 }, { "epoch": 0.58, "learning_rate": 1.2755403330577534e-07, "logits/chosen": -2.91837215423584, "logits/rejected": -2.8293378353118896, "logps/chosen": -413.57830810546875, "logps/rejected": -419.4398193359375, "loss": 0.7061, "rewards/accuracies": 0.625, "rewards/chosen": -1.7350621223449707, "rewards/margins": 2.0849318504333496, "rewards/rejected": -3.8199942111968994, "step": 4996 }, { "epoch": 0.58, "learning_rate": 1.275186016298571e-07, "logits/chosen": -2.713042736053467, "logits/rejected": -2.5138919353485107, "logps/chosen": -273.91021728515625, "logps/rejected": -206.5056610107422, "loss": 0.2816, "rewards/accuracies": 0.875, "rewards/chosen": -0.8296372890472412, "rewards/margins": 1.9194782972335815, "rewards/rejected": -2.749115467071533, "step": 4997 }, { "epoch": 0.58, "learning_rate": 1.274831699539388e-07, "logits/chosen": -2.042919397354126, "logits/rejected": -2.1612935066223145, "logps/chosen": -317.7601623535156, "logps/rejected": -307.3943786621094, "loss": 0.184, "rewards/accuracies": 1.0, "rewards/chosen": -0.9101721048355103, "rewards/margins": 2.3210501670837402, "rewards/rejected": -3.231222152709961, "step": 4998 }, { "epoch": 0.58, "learning_rate": 1.2744773827802053e-07, "logits/chosen": -2.478100299835205, "logits/rejected": -2.489065408706665, "logps/chosen": -333.7314147949219, "logps/rejected": -303.79534912109375, "loss": 0.4342, "rewards/accuracies": 0.75, "rewards/chosen": -0.8440136909484863, "rewards/margins": 1.564299464225769, "rewards/rejected": -2.408313035964966, "step": 4999 }, { "epoch": 0.58, "learning_rate": 1.2741230660210228e-07, "logits/chosen": -2.436861753463745, "logits/rejected": -2.4165420532226562, "logps/chosen": -125.89694213867188, "logps/rejected": -180.00918579101562, "loss": 0.6934, "rewards/accuracies": 0.625, "rewards/chosen": -1.9185258150100708, "rewards/margins": 0.7490828037261963, "rewards/rejected": -2.6676084995269775, "step": 5000 }, { "epoch": 0.58, "eval_logits/chosen": -1.7493003606796265, "eval_logits/rejected": -1.749708890914917, "eval_logps/chosen": -278.6475524902344, "eval_logps/rejected": -278.8118591308594, "eval_loss": 0.3704482614994049, "eval_rewards/accuracies": 0.8491379022598267, "eval_rewards/chosen": -0.6482082009315491, "eval_rewards/margins": 2.1323232650756836, "eval_rewards/rejected": -2.780531167984009, "eval_runtime": 237.4807, "eval_samples_per_second": 2.927, "eval_steps_per_second": 1.465, "step": 5000 }, { "epoch": 0.58, "learning_rate": 1.27376874926184e-07, "logits/chosen": -2.448072671890259, "logits/rejected": -2.5658533573150635, "logps/chosen": -314.3951416015625, "logps/rejected": -193.90748596191406, "loss": 0.1856, "rewards/accuracies": 1.0, "rewards/chosen": 0.1025920882821083, "rewards/margins": 2.0626649856567383, "rewards/rejected": -1.9600727558135986, "step": 5001 }, { "epoch": 0.58, "learning_rate": 1.2734144325026575e-07, "logits/chosen": -2.0485544204711914, "logits/rejected": -2.0625832080841064, "logps/chosen": -367.2135925292969, "logps/rejected": -305.0137023925781, "loss": 0.2455, "rewards/accuracies": 0.875, "rewards/chosen": -1.1647567749023438, "rewards/margins": 2.722200393676758, "rewards/rejected": -3.8869569301605225, "step": 5002 }, { "epoch": 0.58, "learning_rate": 1.2730601157434747e-07, "logits/chosen": -2.3442068099975586, "logits/rejected": -2.1199841499328613, "logps/chosen": -235.193359375, "logps/rejected": -238.95765686035156, "loss": 0.3038, "rewards/accuracies": 0.75, "rewards/chosen": -0.5088386535644531, "rewards/margins": 1.9518119096755981, "rewards/rejected": -2.4606504440307617, "step": 5003 }, { "epoch": 0.58, "learning_rate": 1.272705798984292e-07, "logits/chosen": -2.8053717613220215, "logits/rejected": -2.8121883869171143, "logps/chosen": -122.35606384277344, "logps/rejected": -124.37657165527344, "loss": 0.5249, "rewards/accuracies": 0.625, "rewards/chosen": -0.5447511076927185, "rewards/margins": 2.140857219696045, "rewards/rejected": -2.685608386993408, "step": 5004 }, { "epoch": 0.58, "learning_rate": 1.2723514822251092e-07, "logits/chosen": -2.2128281593322754, "logits/rejected": -2.4840023517608643, "logps/chosen": -337.5320129394531, "logps/rejected": -206.02890014648438, "loss": 0.4322, "rewards/accuracies": 0.75, "rewards/chosen": -0.7329486012458801, "rewards/margins": 1.5698072910308838, "rewards/rejected": -2.3027560710906982, "step": 5005 }, { "epoch": 0.58, "learning_rate": 1.2719971654659264e-07, "logits/chosen": -2.4379220008850098, "logits/rejected": -2.643517017364502, "logps/chosen": -141.06040954589844, "logps/rejected": -135.20481872558594, "loss": 1.3239, "rewards/accuracies": 0.75, "rewards/chosen": -1.6664382219314575, "rewards/margins": 1.3340415954589844, "rewards/rejected": -3.0004799365997314, "step": 5006 }, { "epoch": 0.58, "learning_rate": 1.2716428487067436e-07, "logits/chosen": -1.9871236085891724, "logits/rejected": -2.002516984939575, "logps/chosen": -324.06298828125, "logps/rejected": -282.7936706542969, "loss": 0.2882, "rewards/accuracies": 1.0, "rewards/chosen": -0.38419878482818604, "rewards/margins": 2.0457749366760254, "rewards/rejected": -2.429973840713501, "step": 5007 }, { "epoch": 0.58, "learning_rate": 1.271288531947561e-07, "logits/chosen": -2.1172099113464355, "logits/rejected": -2.522083282470703, "logps/chosen": -374.1548767089844, "logps/rejected": -217.88128662109375, "loss": 0.1255, "rewards/accuracies": 1.0, "rewards/chosen": -0.34124788641929626, "rewards/margins": 2.3673157691955566, "rewards/rejected": -2.708563804626465, "step": 5008 }, { "epoch": 0.58, "learning_rate": 1.2709342151883783e-07, "logits/chosen": -2.397780179977417, "logits/rejected": -2.3363850116729736, "logps/chosen": -191.94326782226562, "logps/rejected": -253.56504821777344, "loss": 0.0883, "rewards/accuracies": 1.0, "rewards/chosen": -0.5940826535224915, "rewards/margins": 3.58428955078125, "rewards/rejected": -4.178372383117676, "step": 5009 }, { "epoch": 0.58, "learning_rate": 1.2705798984291956e-07, "logits/chosen": -2.1975460052490234, "logits/rejected": -2.6020331382751465, "logps/chosen": -347.7258605957031, "logps/rejected": -190.0863037109375, "loss": 0.3145, "rewards/accuracies": 0.875, "rewards/chosen": -1.6098583936691284, "rewards/margins": 1.2346560955047607, "rewards/rejected": -2.8445146083831787, "step": 5010 }, { "epoch": 0.58, "learning_rate": 1.270225581670013e-07, "logits/chosen": -2.1591057777404785, "logits/rejected": -1.8677372932434082, "logps/chosen": -235.5383758544922, "logps/rejected": -205.22998046875, "loss": 0.3431, "rewards/accuracies": 0.875, "rewards/chosen": -0.9226926565170288, "rewards/margins": 1.9067730903625488, "rewards/rejected": -2.829465866088867, "step": 5011 }, { "epoch": 0.58, "learning_rate": 1.2698712649108303e-07, "logits/chosen": -2.4727463722229004, "logits/rejected": -2.201777458190918, "logps/chosen": -246.04653930664062, "logps/rejected": -364.3743896484375, "loss": 0.2528, "rewards/accuracies": 0.875, "rewards/chosen": -0.37532657384872437, "rewards/margins": 2.0345726013183594, "rewards/rejected": -2.4098987579345703, "step": 5012 }, { "epoch": 0.58, "learning_rate": 1.2695169481516475e-07, "logits/chosen": -2.5710182189941406, "logits/rejected": -2.420264482498169, "logps/chosen": -138.28521728515625, "logps/rejected": -238.74826049804688, "loss": 0.2853, "rewards/accuracies": 0.875, "rewards/chosen": -0.8522183895111084, "rewards/margins": 2.1393890380859375, "rewards/rejected": -2.991607666015625, "step": 5013 }, { "epoch": 0.58, "learning_rate": 1.269162631392465e-07, "logits/chosen": -2.347460985183716, "logits/rejected": -2.742182731628418, "logps/chosen": -566.0645141601562, "logps/rejected": -380.82244873046875, "loss": 0.2336, "rewards/accuracies": 0.75, "rewards/chosen": -0.3380976915359497, "rewards/margins": 2.815774440765381, "rewards/rejected": -3.153872013092041, "step": 5014 }, { "epoch": 0.58, "learning_rate": 1.2688083146332822e-07, "logits/chosen": -2.326587200164795, "logits/rejected": -2.3138411045074463, "logps/chosen": -237.60281372070312, "logps/rejected": -310.2696533203125, "loss": 0.5868, "rewards/accuracies": 0.75, "rewards/chosen": -0.8882647156715393, "rewards/margins": 1.0294536352157593, "rewards/rejected": -1.9177184104919434, "step": 5015 }, { "epoch": 0.58, "learning_rate": 1.2684539978740994e-07, "logits/chosen": -2.3812763690948486, "logits/rejected": -2.2785987854003906, "logps/chosen": -188.69285583496094, "logps/rejected": -301.3819885253906, "loss": 0.696, "rewards/accuracies": 0.625, "rewards/chosen": -1.787169098854065, "rewards/margins": 1.3538234233856201, "rewards/rejected": -3.1409926414489746, "step": 5016 }, { "epoch": 0.58, "learning_rate": 1.2680996811149166e-07, "logits/chosen": -2.047483205795288, "logits/rejected": -2.25661301612854, "logps/chosen": -443.06365966796875, "logps/rejected": -320.4031066894531, "loss": 0.2352, "rewards/accuracies": 1.0, "rewards/chosen": -1.0828160047531128, "rewards/margins": 2.474048376083374, "rewards/rejected": -3.5568642616271973, "step": 5017 }, { "epoch": 0.58, "learning_rate": 1.2677453643557339e-07, "logits/chosen": -2.493412733078003, "logits/rejected": -2.387420654296875, "logps/chosen": -358.2150573730469, "logps/rejected": -341.5381774902344, "loss": 0.6698, "rewards/accuracies": 0.625, "rewards/chosen": -1.9517525434494019, "rewards/margins": 0.8550430536270142, "rewards/rejected": -2.806795597076416, "step": 5018 }, { "epoch": 0.58, "learning_rate": 1.267391047596551e-07, "logits/chosen": -2.293483257293701, "logits/rejected": -2.324214458465576, "logps/chosen": -151.86395263671875, "logps/rejected": -178.8552703857422, "loss": 0.2737, "rewards/accuracies": 0.875, "rewards/chosen": -0.8797059059143066, "rewards/margins": 2.2820637226104736, "rewards/rejected": -3.161769390106201, "step": 5019 }, { "epoch": 0.58, "learning_rate": 1.2670367308373686e-07, "logits/chosen": -2.3212504386901855, "logits/rejected": -2.4591965675354004, "logps/chosen": -446.8814697265625, "logps/rejected": -331.9065246582031, "loss": 0.2569, "rewards/accuracies": 0.875, "rewards/chosen": -0.9165117740631104, "rewards/margins": 2.397764205932617, "rewards/rejected": -3.3142757415771484, "step": 5020 }, { "epoch": 0.58, "learning_rate": 1.2666824140781858e-07, "logits/chosen": -1.9619414806365967, "logits/rejected": -2.2968997955322266, "logps/chosen": -216.6094512939453, "logps/rejected": -178.23741149902344, "loss": 0.9064, "rewards/accuracies": 0.75, "rewards/chosen": -1.2431081533432007, "rewards/margins": 2.0532853603363037, "rewards/rejected": -3.296393394470215, "step": 5021 }, { "epoch": 0.58, "learning_rate": 1.2663280973190033e-07, "logits/chosen": -2.5364489555358887, "logits/rejected": -2.5714333057403564, "logps/chosen": -255.55294799804688, "logps/rejected": -284.2391357421875, "loss": 0.682, "rewards/accuracies": 0.75, "rewards/chosen": -1.2542182207107544, "rewards/margins": 1.6134649515151978, "rewards/rejected": -2.867683172225952, "step": 5022 }, { "epoch": 0.58, "learning_rate": 1.2659737805598205e-07, "logits/chosen": -2.0139377117156982, "logits/rejected": -1.9455764293670654, "logps/chosen": -268.730224609375, "logps/rejected": -264.1647033691406, "loss": 0.4835, "rewards/accuracies": 0.75, "rewards/chosen": -0.8141624331474304, "rewards/margins": 1.4268033504486084, "rewards/rejected": -2.2409658432006836, "step": 5023 }, { "epoch": 0.58, "learning_rate": 1.2656194638006377e-07, "logits/chosen": -2.6256308555603027, "logits/rejected": -2.6258370876312256, "logps/chosen": -274.5530700683594, "logps/rejected": -157.1398468017578, "loss": 0.6867, "rewards/accuracies": 0.75, "rewards/chosen": -1.627955675125122, "rewards/margins": 0.9628015756607056, "rewards/rejected": -2.590757369995117, "step": 5024 }, { "epoch": 0.58, "learning_rate": 1.265265147041455e-07, "logits/chosen": -2.555377960205078, "logits/rejected": -2.477292776107788, "logps/chosen": -344.78662109375, "logps/rejected": -194.75804138183594, "loss": 0.7569, "rewards/accuracies": 0.875, "rewards/chosen": -1.2015328407287598, "rewards/margins": 1.2914994955062866, "rewards/rejected": -2.493032455444336, "step": 5025 }, { "epoch": 0.58, "learning_rate": 1.2649108302822724e-07, "logits/chosen": -2.5004374980926514, "logits/rejected": -2.6832122802734375, "logps/chosen": -161.03521728515625, "logps/rejected": -169.54747009277344, "loss": 0.4776, "rewards/accuracies": 0.75, "rewards/chosen": -0.5409038066864014, "rewards/margins": 1.6216109991073608, "rewards/rejected": -2.1625146865844727, "step": 5026 }, { "epoch": 0.58, "learning_rate": 1.2645565135230896e-07, "logits/chosen": -2.5424160957336426, "logits/rejected": -2.774426221847534, "logps/chosen": -139.35586547851562, "logps/rejected": -283.0226745605469, "loss": 0.0689, "rewards/accuracies": 1.0, "rewards/chosen": -0.5186126828193665, "rewards/margins": 4.47479248046875, "rewards/rejected": -4.993404388427734, "step": 5027 }, { "epoch": 0.58, "learning_rate": 1.2642021967639069e-07, "logits/chosen": -2.5196285247802734, "logits/rejected": -2.599818229675293, "logps/chosen": -298.28143310546875, "logps/rejected": -204.89028930664062, "loss": 0.3603, "rewards/accuracies": 0.75, "rewards/chosen": -1.0066965818405151, "rewards/margins": 2.8453176021575928, "rewards/rejected": -3.8520145416259766, "step": 5028 }, { "epoch": 0.59, "learning_rate": 1.263847880004724e-07, "logits/chosen": -1.6583380699157715, "logits/rejected": -1.8511111736297607, "logps/chosen": -310.0856628417969, "logps/rejected": -266.33209228515625, "loss": 0.3913, "rewards/accuracies": 0.875, "rewards/chosen": -1.3269226551055908, "rewards/margins": 2.3249833583831787, "rewards/rejected": -3.6519060134887695, "step": 5029 }, { "epoch": 0.59, "learning_rate": 1.2634935632455413e-07, "logits/chosen": -2.902507781982422, "logits/rejected": -2.9840400218963623, "logps/chosen": -345.1080322265625, "logps/rejected": -233.337646484375, "loss": 0.2349, "rewards/accuracies": 0.875, "rewards/chosen": -1.1498950719833374, "rewards/margins": 2.3578126430511475, "rewards/rejected": -3.5077078342437744, "step": 5030 }, { "epoch": 0.59, "learning_rate": 1.2631392464863588e-07, "logits/chosen": -1.8298745155334473, "logits/rejected": -1.8847832679748535, "logps/chosen": -464.0446472167969, "logps/rejected": -550.2960815429688, "loss": 0.6852, "rewards/accuracies": 0.75, "rewards/chosen": -1.2549769878387451, "rewards/margins": 0.6575993299484253, "rewards/rejected": -1.9125761985778809, "step": 5031 }, { "epoch": 0.59, "learning_rate": 1.262784929727176e-07, "logits/chosen": -2.466765880584717, "logits/rejected": -2.5501484870910645, "logps/chosen": -215.99560546875, "logps/rejected": -126.53117370605469, "loss": 1.1182, "rewards/accuracies": 0.625, "rewards/chosen": -1.5513098239898682, "rewards/margins": -0.04120063781738281, "rewards/rejected": -1.510109305381775, "step": 5032 }, { "epoch": 0.59, "learning_rate": 1.2624306129679932e-07, "logits/chosen": -2.2486977577209473, "logits/rejected": -2.3930084705352783, "logps/chosen": -349.591796875, "logps/rejected": -250.3293914794922, "loss": 0.2727, "rewards/accuracies": 1.0, "rewards/chosen": -0.8739951848983765, "rewards/margins": 1.664348840713501, "rewards/rejected": -2.538343906402588, "step": 5033 }, { "epoch": 0.59, "learning_rate": 1.2620762962088107e-07, "logits/chosen": -2.7339017391204834, "logits/rejected": -2.8964338302612305, "logps/chosen": -229.69888305664062, "logps/rejected": -166.30776977539062, "loss": 0.23, "rewards/accuracies": 0.875, "rewards/chosen": -0.36191827058792114, "rewards/margins": 3.2997090816497803, "rewards/rejected": -3.6616272926330566, "step": 5034 }, { "epoch": 0.59, "learning_rate": 1.261721979449628e-07, "logits/chosen": -1.7661529779434204, "logits/rejected": -2.319608211517334, "logps/chosen": -468.3108215332031, "logps/rejected": -266.27642822265625, "loss": 0.184, "rewards/accuracies": 1.0, "rewards/chosen": -0.013813894242048264, "rewards/margins": 2.166487216949463, "rewards/rejected": -2.1803011894226074, "step": 5035 }, { "epoch": 0.59, "learning_rate": 1.2613676626904452e-07, "logits/chosen": -2.4165430068969727, "logits/rejected": -2.3733725547790527, "logps/chosen": -298.70361328125, "logps/rejected": -296.5895080566406, "loss": 0.3693, "rewards/accuracies": 0.875, "rewards/chosen": -2.04870343208313, "rewards/margins": 2.629744052886963, "rewards/rejected": -4.678447246551514, "step": 5036 }, { "epoch": 0.59, "learning_rate": 1.2610133459312626e-07, "logits/chosen": -2.704406499862671, "logits/rejected": -2.5900254249572754, "logps/chosen": -302.1014099121094, "logps/rejected": -253.86624145507812, "loss": 0.4055, "rewards/accuracies": 0.75, "rewards/chosen": -0.9303373098373413, "rewards/margins": 1.9476597309112549, "rewards/rejected": -2.8779969215393066, "step": 5037 }, { "epoch": 0.59, "learning_rate": 1.2606590291720799e-07, "logits/chosen": -1.9082858562469482, "logits/rejected": -1.8875195980072021, "logps/chosen": -182.53204345703125, "logps/rejected": -184.48992919921875, "loss": 0.407, "rewards/accuracies": 0.875, "rewards/chosen": -1.0560022592544556, "rewards/margins": 2.435718059539795, "rewards/rejected": -3.491720199584961, "step": 5038 }, { "epoch": 0.59, "learning_rate": 1.260304712412897e-07, "logits/chosen": -2.369805335998535, "logits/rejected": -2.6521146297454834, "logps/chosen": -279.2736511230469, "logps/rejected": -175.94692993164062, "loss": 0.4506, "rewards/accuracies": 0.75, "rewards/chosen": -0.8370301723480225, "rewards/margins": 1.264455795288086, "rewards/rejected": -2.1014859676361084, "step": 5039 }, { "epoch": 0.59, "learning_rate": 1.2599503956537143e-07, "logits/chosen": -2.2333791255950928, "logits/rejected": -2.167800188064575, "logps/chosen": -185.82015991210938, "logps/rejected": -203.60986328125, "loss": 0.966, "rewards/accuracies": 0.5, "rewards/chosen": -1.9001622200012207, "rewards/margins": 0.5284583568572998, "rewards/rejected": -2.4286205768585205, "step": 5040 }, { "epoch": 0.59, "learning_rate": 1.2595960788945315e-07, "logits/chosen": -1.598362684249878, "logits/rejected": -1.490558385848999, "logps/chosen": -267.8580322265625, "logps/rejected": -372.0643005371094, "loss": 0.3532, "rewards/accuracies": 0.875, "rewards/chosen": -0.8839406967163086, "rewards/margins": 1.3490755558013916, "rewards/rejected": -2.233016014099121, "step": 5041 }, { "epoch": 0.59, "learning_rate": 1.259241762135349e-07, "logits/chosen": -2.785994529724121, "logits/rejected": -2.7436108589172363, "logps/chosen": -126.50717163085938, "logps/rejected": -271.67034912109375, "loss": 0.13, "rewards/accuracies": 1.0, "rewards/chosen": -0.9907951354980469, "rewards/margins": 3.335768699645996, "rewards/rejected": -4.326563835144043, "step": 5042 }, { "epoch": 0.59, "learning_rate": 1.2588874453761662e-07, "logits/chosen": -2.8406522274017334, "logits/rejected": -2.847862482070923, "logps/chosen": -285.98150634765625, "logps/rejected": -243.86953735351562, "loss": 0.2045, "rewards/accuracies": 0.875, "rewards/chosen": -0.040951959788799286, "rewards/margins": 3.677973747253418, "rewards/rejected": -3.718925714492798, "step": 5043 }, { "epoch": 0.59, "learning_rate": 1.2585331286169835e-07, "logits/chosen": -2.5393457412719727, "logits/rejected": -2.2833058834075928, "logps/chosen": -103.62020111083984, "logps/rejected": -287.2734680175781, "loss": 0.1982, "rewards/accuracies": 0.875, "rewards/chosen": -0.639127790927887, "rewards/margins": 3.255880117416382, "rewards/rejected": -3.895008087158203, "step": 5044 }, { "epoch": 0.59, "learning_rate": 1.258178811857801e-07, "logits/chosen": -2.3160483837127686, "logits/rejected": -2.445261001586914, "logps/chosen": -301.4508361816406, "logps/rejected": -235.83108520507812, "loss": 0.172, "rewards/accuracies": 1.0, "rewards/chosen": -0.6346229910850525, "rewards/margins": 3.6629855632781982, "rewards/rejected": -4.297608375549316, "step": 5045 }, { "epoch": 0.59, "learning_rate": 1.2578244950986182e-07, "logits/chosen": -2.4213991165161133, "logits/rejected": -2.1528828144073486, "logps/chosen": -260.78173828125, "logps/rejected": -286.6127014160156, "loss": 0.2338, "rewards/accuracies": 0.875, "rewards/chosen": -0.5604619979858398, "rewards/margins": 1.9202547073364258, "rewards/rejected": -2.4807167053222656, "step": 5046 }, { "epoch": 0.59, "learning_rate": 1.2574701783394354e-07, "logits/chosen": -2.1239657402038574, "logits/rejected": -2.2135210037231445, "logps/chosen": -197.57440185546875, "logps/rejected": -247.21759033203125, "loss": 0.1699, "rewards/accuracies": 1.0, "rewards/chosen": -0.5139496922492981, "rewards/margins": 2.620455265045166, "rewards/rejected": -3.1344048976898193, "step": 5047 }, { "epoch": 0.59, "learning_rate": 1.2571158615802526e-07, "logits/chosen": -1.8035229444503784, "logits/rejected": -2.262049674987793, "logps/chosen": -442.8195495605469, "logps/rejected": -245.49559020996094, "loss": 0.4067, "rewards/accuracies": 0.75, "rewards/chosen": -0.9732864499092102, "rewards/margins": 1.5596760511398315, "rewards/rejected": -2.5329623222351074, "step": 5048 }, { "epoch": 0.59, "learning_rate": 1.25676154482107e-07, "logits/chosen": -1.9747023582458496, "logits/rejected": -1.9992501735687256, "logps/chosen": -203.44345092773438, "logps/rejected": -287.6009521484375, "loss": 0.2356, "rewards/accuracies": 0.875, "rewards/chosen": -0.6251349449157715, "rewards/margins": 2.7157435417175293, "rewards/rejected": -3.340878486633301, "step": 5049 }, { "epoch": 0.59, "learning_rate": 1.2564072280618873e-07, "logits/chosen": -2.2412095069885254, "logits/rejected": -2.173689365386963, "logps/chosen": -277.09002685546875, "logps/rejected": -331.48907470703125, "loss": 0.2863, "rewards/accuracies": 0.875, "rewards/chosen": -0.5114327073097229, "rewards/margins": 2.143389940261841, "rewards/rejected": -2.654822826385498, "step": 5050 }, { "epoch": 0.59, "learning_rate": 1.2560529113027045e-07, "logits/chosen": -2.911099672317505, "logits/rejected": -2.8257720470428467, "logps/chosen": -302.1712646484375, "logps/rejected": -275.680908203125, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": -0.5880608558654785, "rewards/margins": 4.239800930023193, "rewards/rejected": -4.827861309051514, "step": 5051 }, { "epoch": 0.59, "learning_rate": 1.2556985945435218e-07, "logits/chosen": -2.51224422454834, "logits/rejected": -2.4394235610961914, "logps/chosen": -178.4909210205078, "logps/rejected": -292.4670715332031, "loss": 0.5162, "rewards/accuracies": 0.75, "rewards/chosen": -1.0399235486984253, "rewards/margins": 2.5417978763580322, "rewards/rejected": -3.581721782684326, "step": 5052 }, { "epoch": 0.59, "learning_rate": 1.255344277784339e-07, "logits/chosen": -2.345547676086426, "logits/rejected": -2.367676019668579, "logps/chosen": -337.0796203613281, "logps/rejected": -343.7296447753906, "loss": 0.4611, "rewards/accuracies": 0.75, "rewards/chosen": -1.2996642589569092, "rewards/margins": 2.2832274436950684, "rewards/rejected": -3.5828917026519775, "step": 5053 }, { "epoch": 0.59, "learning_rate": 1.2549899610251565e-07, "logits/chosen": -2.0981104373931885, "logits/rejected": -1.7766231298446655, "logps/chosen": -267.132568359375, "logps/rejected": -426.4649353027344, "loss": 0.4391, "rewards/accuracies": 0.75, "rewards/chosen": -1.1440658569335938, "rewards/margins": 1.9454902410507202, "rewards/rejected": -3.0895562171936035, "step": 5054 }, { "epoch": 0.59, "learning_rate": 1.2546356442659737e-07, "logits/chosen": -2.27182936668396, "logits/rejected": -2.4739980697631836, "logps/chosen": -197.70553588867188, "logps/rejected": -299.1203918457031, "loss": 0.2109, "rewards/accuracies": 1.0, "rewards/chosen": -0.9340135455131531, "rewards/margins": 2.7556979656219482, "rewards/rejected": -3.689711570739746, "step": 5055 }, { "epoch": 0.59, "learning_rate": 1.2542813275067912e-07, "logits/chosen": -2.1973214149475098, "logits/rejected": -1.7851725816726685, "logps/chosen": -161.9153289794922, "logps/rejected": -316.9391784667969, "loss": 0.5271, "rewards/accuracies": 0.75, "rewards/chosen": -0.31334853172302246, "rewards/margins": 2.3823776245117188, "rewards/rejected": -2.695726156234741, "step": 5056 }, { "epoch": 0.59, "learning_rate": 1.2539270107476084e-07, "logits/chosen": -2.6415936946868896, "logits/rejected": -2.725944995880127, "logps/chosen": -345.1324157714844, "logps/rejected": -281.581298828125, "loss": 0.2281, "rewards/accuracies": 0.875, "rewards/chosen": -0.9726064801216125, "rewards/margins": 3.1831581592559814, "rewards/rejected": -4.155764579772949, "step": 5057 }, { "epoch": 0.59, "learning_rate": 1.2535726939884256e-07, "logits/chosen": -1.9976046085357666, "logits/rejected": -2.064516067504883, "logps/chosen": -300.18182373046875, "logps/rejected": -240.24188232421875, "loss": 0.3908, "rewards/accuracies": 0.875, "rewards/chosen": -1.1462929248809814, "rewards/margins": 1.6564544439315796, "rewards/rejected": -2.8027472496032715, "step": 5058 }, { "epoch": 0.59, "learning_rate": 1.2532183772292428e-07, "logits/chosen": -2.2003281116485596, "logits/rejected": -2.2003026008605957, "logps/chosen": -330.9859619140625, "logps/rejected": -335.26641845703125, "loss": 0.577, "rewards/accuracies": 0.75, "rewards/chosen": -0.4647020697593689, "rewards/margins": 1.023249626159668, "rewards/rejected": -1.487951636314392, "step": 5059 }, { "epoch": 0.59, "learning_rate": 1.25286406047006e-07, "logits/chosen": -1.8503432273864746, "logits/rejected": -2.23567271232605, "logps/chosen": -391.1458740234375, "logps/rejected": -315.9316101074219, "loss": 0.5458, "rewards/accuracies": 0.75, "rewards/chosen": -0.9872946739196777, "rewards/margins": 1.5065724849700928, "rewards/rejected": -2.4938669204711914, "step": 5060 }, { "epoch": 0.59, "learning_rate": 1.2525097437108775e-07, "logits/chosen": -2.194896697998047, "logits/rejected": -2.2195792198181152, "logps/chosen": -180.734619140625, "logps/rejected": -298.43365478515625, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -0.11201455444097519, "rewards/margins": 4.649348258972168, "rewards/rejected": -4.7613630294799805, "step": 5061 }, { "epoch": 0.59, "learning_rate": 1.2521554269516948e-07, "logits/chosen": -2.935894727706909, "logits/rejected": -2.930459976196289, "logps/chosen": -343.04119873046875, "logps/rejected": -215.00030517578125, "loss": 0.8591, "rewards/accuracies": 0.75, "rewards/chosen": -1.3056944608688354, "rewards/margins": 1.528649926185608, "rewards/rejected": -2.8343443870544434, "step": 5062 }, { "epoch": 0.59, "learning_rate": 1.251801110192512e-07, "logits/chosen": -2.2215189933776855, "logits/rejected": -2.328315258026123, "logps/chosen": -236.15066528320312, "logps/rejected": -286.737060546875, "loss": 0.421, "rewards/accuracies": 0.75, "rewards/chosen": -1.0598161220550537, "rewards/margins": 2.3775668144226074, "rewards/rejected": -3.437382936477661, "step": 5063 }, { "epoch": 0.59, "learning_rate": 1.2514467934333292e-07, "logits/chosen": -2.3271660804748535, "logits/rejected": -2.3771748542785645, "logps/chosen": -340.5474853515625, "logps/rejected": -338.4072265625, "loss": 0.0636, "rewards/accuracies": 1.0, "rewards/chosen": -0.47275155782699585, "rewards/margins": 4.264046669006348, "rewards/rejected": -4.736798286437988, "step": 5064 }, { "epoch": 0.59, "learning_rate": 1.2510924766741467e-07, "logits/chosen": -2.382052421569824, "logits/rejected": -2.1117660999298096, "logps/chosen": -200.75177001953125, "logps/rejected": -258.7510070800781, "loss": 0.4138, "rewards/accuracies": 0.625, "rewards/chosen": -1.4100629091262817, "rewards/margins": 1.7193949222564697, "rewards/rejected": -3.129457712173462, "step": 5065 }, { "epoch": 0.59, "learning_rate": 1.250738159914964e-07, "logits/chosen": -2.3961143493652344, "logits/rejected": -2.742340564727783, "logps/chosen": -419.90472412109375, "logps/rejected": -283.8713684082031, "loss": 0.2118, "rewards/accuracies": 1.0, "rewards/chosen": -0.06645327806472778, "rewards/margins": 2.4807984828948975, "rewards/rejected": -2.5472517013549805, "step": 5066 }, { "epoch": 0.59, "learning_rate": 1.2503838431557814e-07, "logits/chosen": -2.512465238571167, "logits/rejected": -2.6450307369232178, "logps/chosen": -202.74526977539062, "logps/rejected": -253.4952392578125, "loss": 0.1445, "rewards/accuracies": 1.0, "rewards/chosen": -0.9202558994293213, "rewards/margins": 4.262088298797607, "rewards/rejected": -5.18234395980835, "step": 5067 }, { "epoch": 0.59, "learning_rate": 1.2500295263965986e-07, "logits/chosen": -2.7043914794921875, "logits/rejected": -2.47170352935791, "logps/chosen": -239.700439453125, "logps/rejected": -351.883056640625, "loss": 0.1642, "rewards/accuracies": 1.0, "rewards/chosen": -0.07963220030069351, "rewards/margins": 3.2495436668395996, "rewards/rejected": -3.3291759490966797, "step": 5068 }, { "epoch": 0.59, "learning_rate": 1.2496752096374158e-07, "logits/chosen": -2.0160913467407227, "logits/rejected": -2.293389320373535, "logps/chosen": -295.4750061035156, "logps/rejected": -246.48040771484375, "loss": 0.3468, "rewards/accuracies": 0.875, "rewards/chosen": -0.42977944016456604, "rewards/margins": 2.011566162109375, "rewards/rejected": -2.441345691680908, "step": 5069 }, { "epoch": 0.59, "learning_rate": 1.249320892878233e-07, "logits/chosen": -2.523953676223755, "logits/rejected": -2.2209982872009277, "logps/chosen": -145.88943481445312, "logps/rejected": -168.9512481689453, "loss": 0.4348, "rewards/accuracies": 0.75, "rewards/chosen": -1.4279842376708984, "rewards/margins": 1.55070161819458, "rewards/rejected": -2.9786860942840576, "step": 5070 }, { "epoch": 0.59, "learning_rate": 1.2489665761190503e-07, "logits/chosen": -2.184809446334839, "logits/rejected": -2.3422513008117676, "logps/chosen": -519.1162109375, "logps/rejected": -286.68035888671875, "loss": 0.3948, "rewards/accuracies": 0.75, "rewards/chosen": -1.020301342010498, "rewards/margins": 1.8226814270019531, "rewards/rejected": -2.842982769012451, "step": 5071 }, { "epoch": 0.59, "learning_rate": 1.2486122593598678e-07, "logits/chosen": -1.9471588134765625, "logits/rejected": -2.1414012908935547, "logps/chosen": -340.64569091796875, "logps/rejected": -212.9463348388672, "loss": 0.4739, "rewards/accuracies": 0.875, "rewards/chosen": -0.4951516389846802, "rewards/margins": 1.3302807807922363, "rewards/rejected": -1.825432300567627, "step": 5072 }, { "epoch": 0.59, "learning_rate": 1.248257942600685e-07, "logits/chosen": -2.471249580383301, "logits/rejected": -2.372087001800537, "logps/chosen": -235.6441650390625, "logps/rejected": -155.7246856689453, "loss": 0.7172, "rewards/accuracies": 0.5, "rewards/chosen": -1.1826435327529907, "rewards/margins": 0.6697194576263428, "rewards/rejected": -1.8523629903793335, "step": 5073 }, { "epoch": 0.59, "learning_rate": 1.2479036258415022e-07, "logits/chosen": -2.237401247024536, "logits/rejected": -2.372328758239746, "logps/chosen": -366.558349609375, "logps/rejected": -316.90704345703125, "loss": 0.4739, "rewards/accuracies": 0.875, "rewards/chosen": -0.2678741216659546, "rewards/margins": 2.0118048191070557, "rewards/rejected": -2.2796788215637207, "step": 5074 }, { "epoch": 0.59, "learning_rate": 1.2475493090823194e-07, "logits/chosen": -2.4794363975524902, "logits/rejected": -2.4415979385375977, "logps/chosen": -338.049072265625, "logps/rejected": -335.29071044921875, "loss": 0.6196, "rewards/accuracies": 0.75, "rewards/chosen": -1.2521889209747314, "rewards/margins": 2.6171348094940186, "rewards/rejected": -3.869323492050171, "step": 5075 }, { "epoch": 0.59, "learning_rate": 1.247194992323137e-07, "logits/chosen": -2.2891132831573486, "logits/rejected": -2.5519914627075195, "logps/chosen": -269.9117431640625, "logps/rejected": -178.40011596679688, "loss": 0.3299, "rewards/accuracies": 0.875, "rewards/chosen": -0.5464590787887573, "rewards/margins": 1.6039543151855469, "rewards/rejected": -2.1504135131835938, "step": 5076 }, { "epoch": 0.59, "learning_rate": 1.2468406755639541e-07, "logits/chosen": -2.5970542430877686, "logits/rejected": -2.608846664428711, "logps/chosen": -197.98886108398438, "logps/rejected": -222.43438720703125, "loss": 0.6745, "rewards/accuracies": 0.625, "rewards/chosen": -1.4875085353851318, "rewards/margins": 1.0582644939422607, "rewards/rejected": -2.5457730293273926, "step": 5077 }, { "epoch": 0.59, "learning_rate": 1.2464863588047714e-07, "logits/chosen": -2.1260392665863037, "logits/rejected": -2.1436879634857178, "logps/chosen": -159.30738830566406, "logps/rejected": -213.78289794921875, "loss": 0.4453, "rewards/accuracies": 0.875, "rewards/chosen": -0.7945365905761719, "rewards/margins": 1.9845962524414062, "rewards/rejected": -2.779132843017578, "step": 5078 }, { "epoch": 0.59, "learning_rate": 1.2461320420455888e-07, "logits/chosen": -2.6719436645507812, "logits/rejected": -2.6745734214782715, "logps/chosen": -174.7235870361328, "logps/rejected": -230.06301879882812, "loss": 0.3497, "rewards/accuracies": 0.875, "rewards/chosen": -0.6107015609741211, "rewards/margins": 3.4982619285583496, "rewards/rejected": -4.108963966369629, "step": 5079 }, { "epoch": 0.59, "learning_rate": 1.245777725286406e-07, "logits/chosen": -2.4551637172698975, "logits/rejected": -2.3567707538604736, "logps/chosen": -173.47279357910156, "logps/rejected": -266.3307189941406, "loss": 0.2706, "rewards/accuracies": 0.875, "rewards/chosen": -0.3563610911369324, "rewards/margins": 2.4658963680267334, "rewards/rejected": -2.8222575187683105, "step": 5080 }, { "epoch": 0.59, "learning_rate": 1.2454234085272233e-07, "logits/chosen": -2.1030921936035156, "logits/rejected": -2.0538699626922607, "logps/chosen": -305.3489990234375, "logps/rejected": -312.9396057128906, "loss": 0.4411, "rewards/accuracies": 0.75, "rewards/chosen": -0.7178651094436646, "rewards/margins": 0.9211158752441406, "rewards/rejected": -1.6389811038970947, "step": 5081 }, { "epoch": 0.59, "learning_rate": 1.2450690917680405e-07, "logits/chosen": -1.7618861198425293, "logits/rejected": -1.5333662033081055, "logps/chosen": -280.0419616699219, "logps/rejected": -291.8614196777344, "loss": 0.5739, "rewards/accuracies": 0.75, "rewards/chosen": -1.2153081893920898, "rewards/margins": 0.7899072170257568, "rewards/rejected": -2.0052154064178467, "step": 5082 }, { "epoch": 0.59, "learning_rate": 1.2447147750088577e-07, "logits/chosen": -2.7937374114990234, "logits/rejected": -2.775571584701538, "logps/chosen": -226.0089111328125, "logps/rejected": -182.79132080078125, "loss": 0.4453, "rewards/accuracies": 0.75, "rewards/chosen": -0.8861177563667297, "rewards/margins": 1.7546391487121582, "rewards/rejected": -2.6407570838928223, "step": 5083 }, { "epoch": 0.59, "learning_rate": 1.2443604582496752e-07, "logits/chosen": -1.9664208889007568, "logits/rejected": -1.9980876445770264, "logps/chosen": -268.162841796875, "logps/rejected": -444.4507751464844, "loss": 0.8689, "rewards/accuracies": 0.625, "rewards/chosen": -1.7266005277633667, "rewards/margins": 0.22701841592788696, "rewards/rejected": -1.953619122505188, "step": 5084 }, { "epoch": 0.59, "learning_rate": 1.2440061414904924e-07, "logits/chosen": -1.9957945346832275, "logits/rejected": -1.7428536415100098, "logps/chosen": -296.41778564453125, "logps/rejected": -272.56500244140625, "loss": 0.2183, "rewards/accuracies": 1.0, "rewards/chosen": -0.6645196080207825, "rewards/margins": 2.1741867065429688, "rewards/rejected": -2.8387064933776855, "step": 5085 }, { "epoch": 0.59, "learning_rate": 1.2436518247313097e-07, "logits/chosen": -2.274955987930298, "logits/rejected": -2.2444496154785156, "logps/chosen": -224.04209899902344, "logps/rejected": -292.93817138671875, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": -0.7449394464492798, "rewards/margins": 3.2602670192718506, "rewards/rejected": -4.00520658493042, "step": 5086 }, { "epoch": 0.59, "learning_rate": 1.243297507972127e-07, "logits/chosen": -2.248771905899048, "logits/rejected": -2.696075916290283, "logps/chosen": -350.410400390625, "logps/rejected": -282.1456298828125, "loss": 0.2712, "rewards/accuracies": 0.875, "rewards/chosen": -0.3212772011756897, "rewards/margins": 2.8136792182922363, "rewards/rejected": -3.1349563598632812, "step": 5087 }, { "epoch": 0.59, "learning_rate": 1.2429431912129444e-07, "logits/chosen": -2.1303837299346924, "logits/rejected": -2.1944024562835693, "logps/chosen": -279.9931640625, "logps/rejected": -341.1676025390625, "loss": 0.3759, "rewards/accuracies": 0.75, "rewards/chosen": -0.7397469282150269, "rewards/margins": 3.070223331451416, "rewards/rejected": -3.8099701404571533, "step": 5088 }, { "epoch": 0.59, "learning_rate": 1.2425888744537616e-07, "logits/chosen": -1.941174864768982, "logits/rejected": -2.042607069015503, "logps/chosen": -346.42877197265625, "logps/rejected": -241.75503540039062, "loss": 0.4349, "rewards/accuracies": 0.75, "rewards/chosen": -0.7154902219772339, "rewards/margins": 1.3025219440460205, "rewards/rejected": -2.018012046813965, "step": 5089 }, { "epoch": 0.59, "learning_rate": 1.242234557694579e-07, "logits/chosen": -2.345172166824341, "logits/rejected": -2.315709114074707, "logps/chosen": -247.92958068847656, "logps/rejected": -323.3211364746094, "loss": 0.6432, "rewards/accuracies": 0.625, "rewards/chosen": -1.3852211236953735, "rewards/margins": 0.7937406897544861, "rewards/rejected": -2.178961753845215, "step": 5090 }, { "epoch": 0.59, "learning_rate": 1.2418802409353963e-07, "logits/chosen": -2.0898611545562744, "logits/rejected": -2.470738649368286, "logps/chosen": -389.3899230957031, "logps/rejected": -188.8907470703125, "loss": 0.6823, "rewards/accuracies": 0.625, "rewards/chosen": -1.1931101083755493, "rewards/margins": 0.7235841155052185, "rewards/rejected": -1.9166940450668335, "step": 5091 }, { "epoch": 0.59, "learning_rate": 1.2415259241762135e-07, "logits/chosen": -2.558830738067627, "logits/rejected": -2.4750943183898926, "logps/chosen": -196.71957397460938, "logps/rejected": -158.61727905273438, "loss": 0.248, "rewards/accuracies": 0.875, "rewards/chosen": -0.9535135626792908, "rewards/margins": 2.2230215072631836, "rewards/rejected": -3.176535129547119, "step": 5092 }, { "epoch": 0.59, "learning_rate": 1.2411716074170307e-07, "logits/chosen": -2.383772850036621, "logits/rejected": -2.226917028427124, "logps/chosen": -258.3792724609375, "logps/rejected": -299.1199645996094, "loss": 0.3076, "rewards/accuracies": 0.875, "rewards/chosen": -0.44346946477890015, "rewards/margins": 3.150388479232788, "rewards/rejected": -3.593857526779175, "step": 5093 }, { "epoch": 0.59, "learning_rate": 1.240817290657848e-07, "logits/chosen": -2.029862642288208, "logits/rejected": -1.8136718273162842, "logps/chosen": -163.6396026611328, "logps/rejected": -278.8581237792969, "loss": 0.8742, "rewards/accuracies": 0.625, "rewards/chosen": -2.1036734580993652, "rewards/margins": 0.5058338642120361, "rewards/rejected": -2.6095073223114014, "step": 5094 }, { "epoch": 0.59, "learning_rate": 1.2404629738986652e-07, "logits/chosen": -2.629366397857666, "logits/rejected": -2.671375036239624, "logps/chosen": -194.59194946289062, "logps/rejected": -304.2836608886719, "loss": 0.1737, "rewards/accuracies": 0.875, "rewards/chosen": -0.5404179096221924, "rewards/margins": 3.2201313972473145, "rewards/rejected": -3.7605490684509277, "step": 5095 }, { "epoch": 0.59, "learning_rate": 1.2401086571394827e-07, "logits/chosen": -2.552117347717285, "logits/rejected": -2.544235944747925, "logps/chosen": -204.56903076171875, "logps/rejected": -282.14569091796875, "loss": 0.7417, "rewards/accuracies": 0.625, "rewards/chosen": -0.6886357069015503, "rewards/margins": 2.2382373809814453, "rewards/rejected": -2.926873207092285, "step": 5096 }, { "epoch": 0.59, "learning_rate": 1.2397543403803e-07, "logits/chosen": -2.887909412384033, "logits/rejected": -2.8467471599578857, "logps/chosen": -165.85572814941406, "logps/rejected": -163.55233764648438, "loss": 0.3839, "rewards/accuracies": 0.875, "rewards/chosen": -0.7653634548187256, "rewards/margins": 1.6618831157684326, "rewards/rejected": -2.427246570587158, "step": 5097 }, { "epoch": 0.59, "learning_rate": 1.239400023621117e-07, "logits/chosen": -2.587955951690674, "logits/rejected": -2.579162836074829, "logps/chosen": -212.80560302734375, "logps/rejected": -275.4340515136719, "loss": 0.3655, "rewards/accuracies": 0.875, "rewards/chosen": -0.9853346943855286, "rewards/margins": 1.573212742805481, "rewards/rejected": -2.5585474967956543, "step": 5098 }, { "epoch": 0.59, "learning_rate": 1.2390457068619346e-07, "logits/chosen": -1.8810653686523438, "logits/rejected": -2.345719337463379, "logps/chosen": -426.538818359375, "logps/rejected": -319.029296875, "loss": 0.3472, "rewards/accuracies": 0.875, "rewards/chosen": -1.2576731443405151, "rewards/margins": 2.5477428436279297, "rewards/rejected": -3.8054158687591553, "step": 5099 }, { "epoch": 0.59, "learning_rate": 1.2386913901027518e-07, "logits/chosen": -2.380269765853882, "logits/rejected": -2.322206735610962, "logps/chosen": -339.3162536621094, "logps/rejected": -346.724609375, "loss": 0.3273, "rewards/accuracies": 0.75, "rewards/chosen": -0.554209291934967, "rewards/margins": 2.468238592147827, "rewards/rejected": -3.0224475860595703, "step": 5100 }, { "epoch": 0.59, "learning_rate": 1.238337073343569e-07, "logits/chosen": -2.410684108734131, "logits/rejected": -2.34245228767395, "logps/chosen": -312.950439453125, "logps/rejected": -315.50244140625, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": -0.8407118320465088, "rewards/margins": 3.945054531097412, "rewards/rejected": -4.7857666015625, "step": 5101 }, { "epoch": 0.59, "learning_rate": 1.2379827565843865e-07, "logits/chosen": -2.69411563873291, "logits/rejected": -2.7130560874938965, "logps/chosen": -221.516845703125, "logps/rejected": -260.2184753417969, "loss": 0.36, "rewards/accuracies": 0.75, "rewards/chosen": -0.5093930959701538, "rewards/margins": 1.8112454414367676, "rewards/rejected": -2.320638656616211, "step": 5102 }, { "epoch": 0.59, "learning_rate": 1.2376284398252037e-07, "logits/chosen": -2.017139434814453, "logits/rejected": -2.154606580734253, "logps/chosen": -291.7240905761719, "logps/rejected": -346.83135986328125, "loss": 0.1228, "rewards/accuracies": 1.0, "rewards/chosen": -0.3959539532661438, "rewards/margins": 2.9272682666778564, "rewards/rejected": -3.3232223987579346, "step": 5103 }, { "epoch": 0.59, "learning_rate": 1.237274123066021e-07, "logits/chosen": -2.241389751434326, "logits/rejected": -2.1206982135772705, "logps/chosen": -223.3714599609375, "logps/rejected": -238.818115234375, "loss": 0.4368, "rewards/accuracies": 0.75, "rewards/chosen": -0.5930240750312805, "rewards/margins": 1.6766254901885986, "rewards/rejected": -2.2696497440338135, "step": 5104 }, { "epoch": 0.59, "learning_rate": 1.2369198063068382e-07, "logits/chosen": -2.1535606384277344, "logits/rejected": -2.0892107486724854, "logps/chosen": -302.6767883300781, "logps/rejected": -277.99493408203125, "loss": 0.2835, "rewards/accuracies": 0.875, "rewards/chosen": 0.24181517958641052, "rewards/margins": 3.8903353214263916, "rewards/rejected": -3.6485202312469482, "step": 5105 }, { "epoch": 0.59, "learning_rate": 1.2365654895476554e-07, "logits/chosen": -2.332160234451294, "logits/rejected": -2.3231637477874756, "logps/chosen": -197.3474578857422, "logps/rejected": -264.1737060546875, "loss": 0.1381, "rewards/accuracies": 1.0, "rewards/chosen": -0.21212142705917358, "rewards/margins": 3.0222043991088867, "rewards/rejected": -3.234325647354126, "step": 5106 }, { "epoch": 0.59, "learning_rate": 1.2362111727884726e-07, "logits/chosen": -2.438993453979492, "logits/rejected": -2.334308624267578, "logps/chosen": -242.27081298828125, "logps/rejected": -166.3047332763672, "loss": 0.5015, "rewards/accuracies": 0.625, "rewards/chosen": -1.5868017673492432, "rewards/margins": 1.2155275344848633, "rewards/rejected": -2.8023295402526855, "step": 5107 }, { "epoch": 0.59, "learning_rate": 1.23585685602929e-07, "logits/chosen": -2.239373207092285, "logits/rejected": -2.245403528213501, "logps/chosen": -261.1809387207031, "logps/rejected": -278.2749938964844, "loss": 0.5542, "rewards/accuracies": 0.75, "rewards/chosen": -1.9882194995880127, "rewards/margins": 0.8726952075958252, "rewards/rejected": -2.860914707183838, "step": 5108 }, { "epoch": 0.59, "learning_rate": 1.2355025392701073e-07, "logits/chosen": -1.9228613376617432, "logits/rejected": -1.977163314819336, "logps/chosen": -350.00604248046875, "logps/rejected": -308.2041015625, "loss": 0.4457, "rewards/accuracies": 0.75, "rewards/chosen": -1.6628162860870361, "rewards/margins": 0.7396672964096069, "rewards/rejected": -2.4024834632873535, "step": 5109 }, { "epoch": 0.59, "learning_rate": 1.2351482225109248e-07, "logits/chosen": -2.5530567169189453, "logits/rejected": -2.439129590988159, "logps/chosen": -318.56402587890625, "logps/rejected": -388.52862548828125, "loss": 0.0895, "rewards/accuracies": 1.0, "rewards/chosen": -0.9836808443069458, "rewards/margins": 4.30989933013916, "rewards/rejected": -5.293580055236816, "step": 5110 }, { "epoch": 0.59, "learning_rate": 1.234793905751742e-07, "logits/chosen": -2.426469087600708, "logits/rejected": -2.5056188106536865, "logps/chosen": -357.5161437988281, "logps/rejected": -311.189697265625, "loss": 0.5073, "rewards/accuracies": 0.625, "rewards/chosen": -0.9797780513763428, "rewards/margins": 1.274430274963379, "rewards/rejected": -2.2542080879211426, "step": 5111 }, { "epoch": 0.59, "learning_rate": 1.2344395889925593e-07, "logits/chosen": -2.766845703125, "logits/rejected": -2.656970739364624, "logps/chosen": -159.90008544921875, "logps/rejected": -202.7606964111328, "loss": 0.5815, "rewards/accuracies": 0.75, "rewards/chosen": -0.8860570788383484, "rewards/margins": 2.4095001220703125, "rewards/rejected": -3.2955570220947266, "step": 5112 }, { "epoch": 0.59, "learning_rate": 1.2340852722333768e-07, "logits/chosen": -1.8868474960327148, "logits/rejected": -2.2078776359558105, "logps/chosen": -450.057861328125, "logps/rejected": -270.5749206542969, "loss": 0.3439, "rewards/accuracies": 0.75, "rewards/chosen": -0.8930110931396484, "rewards/margins": 1.7636840343475342, "rewards/rejected": -2.6566951274871826, "step": 5113 }, { "epoch": 0.59, "learning_rate": 1.233730955474194e-07, "logits/chosen": -1.9442545175552368, "logits/rejected": -2.323054552078247, "logps/chosen": -335.75250244140625, "logps/rejected": -210.75537109375, "loss": 0.1562, "rewards/accuracies": 1.0, "rewards/chosen": -0.488150030374527, "rewards/margins": 3.3689723014831543, "rewards/rejected": -3.8571221828460693, "step": 5114 }, { "epoch": 0.6, "learning_rate": 1.2333766387150112e-07, "logits/chosen": -2.5916597843170166, "logits/rejected": -2.6921615600585938, "logps/chosen": -324.7310791015625, "logps/rejected": -280.6888122558594, "loss": 0.1767, "rewards/accuracies": 1.0, "rewards/chosen": -0.8536218404769897, "rewards/margins": 2.0352423191070557, "rewards/rejected": -2.888864040374756, "step": 5115 }, { "epoch": 0.6, "learning_rate": 1.2330223219558284e-07, "logits/chosen": -2.1391282081604004, "logits/rejected": -2.338430166244507, "logps/chosen": -225.7545166015625, "logps/rejected": -315.41571044921875, "loss": 0.5856, "rewards/accuracies": 0.75, "rewards/chosen": -1.2772448062896729, "rewards/margins": 2.337618827819824, "rewards/rejected": -3.614863395690918, "step": 5116 }, { "epoch": 0.6, "learning_rate": 1.2326680051966456e-07, "logits/chosen": -2.834620237350464, "logits/rejected": -2.682323932647705, "logps/chosen": -157.98403930664062, "logps/rejected": -248.41929626464844, "loss": 0.1557, "rewards/accuracies": 1.0, "rewards/chosen": 0.03156479448080063, "rewards/margins": 3.235058307647705, "rewards/rejected": -3.203493595123291, "step": 5117 }, { "epoch": 0.6, "learning_rate": 1.2323136884374629e-07, "logits/chosen": -2.515840530395508, "logits/rejected": -2.6033060550689697, "logps/chosen": -210.25086975097656, "logps/rejected": -170.61021423339844, "loss": 0.4969, "rewards/accuracies": 0.625, "rewards/chosen": -0.07243821769952774, "rewards/margins": 1.0937703847885132, "rewards/rejected": -1.1662086248397827, "step": 5118 }, { "epoch": 0.6, "learning_rate": 1.2319593716782803e-07, "logits/chosen": -2.3279213905334473, "logits/rejected": -2.2057008743286133, "logps/chosen": -219.17718505859375, "logps/rejected": -266.13714599609375, "loss": 0.3275, "rewards/accuracies": 0.75, "rewards/chosen": -0.9945389628410339, "rewards/margins": 2.227848529815674, "rewards/rejected": -3.2223877906799316, "step": 5119 }, { "epoch": 0.6, "learning_rate": 1.2316050549190976e-07, "logits/chosen": -2.2559127807617188, "logits/rejected": -2.129075050354004, "logps/chosen": -354.2698669433594, "logps/rejected": -306.61846923828125, "loss": 0.2272, "rewards/accuracies": 1.0, "rewards/chosen": -1.2929072380065918, "rewards/margins": 2.0850305557250977, "rewards/rejected": -3.3779377937316895, "step": 5120 }, { "epoch": 0.6, "learning_rate": 1.231250738159915e-07, "logits/chosen": -2.3452606201171875, "logits/rejected": -2.077998638153076, "logps/chosen": -314.56597900390625, "logps/rejected": -476.0709533691406, "loss": 0.1039, "rewards/accuracies": 1.0, "rewards/chosen": -0.7632959485054016, "rewards/margins": 3.0705442428588867, "rewards/rejected": -3.8338401317596436, "step": 5121 }, { "epoch": 0.6, "learning_rate": 1.2308964214007323e-07, "logits/chosen": -2.2064530849456787, "logits/rejected": -2.591062545776367, "logps/chosen": -342.0685119628906, "logps/rejected": -286.3999328613281, "loss": 0.2422, "rewards/accuracies": 0.875, "rewards/chosen": -0.9231036901473999, "rewards/margins": 1.549333095550537, "rewards/rejected": -2.4724369049072266, "step": 5122 }, { "epoch": 0.6, "learning_rate": 1.2305421046415495e-07, "logits/chosen": -2.798582077026367, "logits/rejected": -2.571605920791626, "logps/chosen": -218.5767822265625, "logps/rejected": -199.66358947753906, "loss": 0.185, "rewards/accuracies": 0.875, "rewards/chosen": -0.8188111186027527, "rewards/margins": 2.7973177433013916, "rewards/rejected": -3.616128921508789, "step": 5123 }, { "epoch": 0.6, "learning_rate": 1.2301877878823667e-07, "logits/chosen": -2.2091310024261475, "logits/rejected": -2.1715455055236816, "logps/chosen": -241.8619842529297, "logps/rejected": -365.66448974609375, "loss": 0.2226, "rewards/accuracies": 0.875, "rewards/chosen": -0.9224108457565308, "rewards/margins": 2.6204233169555664, "rewards/rejected": -3.5428342819213867, "step": 5124 }, { "epoch": 0.6, "learning_rate": 1.2298334711231842e-07, "logits/chosen": -2.339036226272583, "logits/rejected": -2.054238796234131, "logps/chosen": -260.60540771484375, "logps/rejected": -274.2049560546875, "loss": 0.3287, "rewards/accuracies": 0.875, "rewards/chosen": -0.6425458192825317, "rewards/margins": 2.4909396171569824, "rewards/rejected": -3.1334853172302246, "step": 5125 }, { "epoch": 0.6, "learning_rate": 1.2294791543640014e-07, "logits/chosen": -2.0086510181427, "logits/rejected": -2.50942325592041, "logps/chosen": -301.9664306640625, "logps/rejected": -234.12249755859375, "loss": 0.2727, "rewards/accuracies": 0.875, "rewards/chosen": -0.33487218618392944, "rewards/margins": 2.5505857467651367, "rewards/rejected": -2.885457992553711, "step": 5126 }, { "epoch": 0.6, "learning_rate": 1.2291248376048186e-07, "logits/chosen": -2.1895875930786133, "logits/rejected": -2.5083351135253906, "logps/chosen": -352.14532470703125, "logps/rejected": -148.89776611328125, "loss": 0.3725, "rewards/accuracies": 0.75, "rewards/chosen": -0.339913547039032, "rewards/margins": 1.6847015619277954, "rewards/rejected": -2.0246152877807617, "step": 5127 }, { "epoch": 0.6, "learning_rate": 1.2287705208456359e-07, "logits/chosen": -1.7136218547821045, "logits/rejected": -1.9946284294128418, "logps/chosen": -507.454345703125, "logps/rejected": -328.8541564941406, "loss": 0.6494, "rewards/accuracies": 0.625, "rewards/chosen": -0.8290361166000366, "rewards/margins": 2.297891139984131, "rewards/rejected": -3.126927137374878, "step": 5128 }, { "epoch": 0.6, "learning_rate": 1.228416204086453e-07, "logits/chosen": -1.7071917057037354, "logits/rejected": -2.1851866245269775, "logps/chosen": -278.64117431640625, "logps/rejected": -177.24632263183594, "loss": 0.6272, "rewards/accuracies": 0.75, "rewards/chosen": -1.5662418603897095, "rewards/margins": 0.5421977639198303, "rewards/rejected": -2.1084396839141846, "step": 5129 }, { "epoch": 0.6, "learning_rate": 1.2280618873272706e-07, "logits/chosen": -2.3276870250701904, "logits/rejected": -2.3523080348968506, "logps/chosen": -220.08668518066406, "logps/rejected": -296.5013732910156, "loss": 0.2328, "rewards/accuracies": 1.0, "rewards/chosen": -0.418956995010376, "rewards/margins": 1.9940530061721802, "rewards/rejected": -2.4130098819732666, "step": 5130 }, { "epoch": 0.6, "learning_rate": 1.2277075705680878e-07, "logits/chosen": -2.3106372356414795, "logits/rejected": -2.7049717903137207, "logps/chosen": -327.0578308105469, "logps/rejected": -191.98822021484375, "loss": 0.4681, "rewards/accuracies": 0.625, "rewards/chosen": -1.9463878870010376, "rewards/margins": 1.4810173511505127, "rewards/rejected": -3.4274051189422607, "step": 5131 }, { "epoch": 0.6, "learning_rate": 1.227353253808905e-07, "logits/chosen": -2.7277674674987793, "logits/rejected": -2.8120040893554688, "logps/chosen": -403.78070068359375, "logps/rejected": -338.1508483886719, "loss": 0.2603, "rewards/accuracies": 0.875, "rewards/chosen": -0.6966190338134766, "rewards/margins": 1.779030442237854, "rewards/rejected": -2.475649356842041, "step": 5132 }, { "epoch": 0.6, "learning_rate": 1.2269989370497225e-07, "logits/chosen": -2.506174325942993, "logits/rejected": -2.473383903503418, "logps/chosen": -446.2240905761719, "logps/rejected": -369.17724609375, "loss": 0.3913, "rewards/accuracies": 0.75, "rewards/chosen": -0.8596564531326294, "rewards/margins": 1.538688063621521, "rewards/rejected": -2.3983445167541504, "step": 5133 }, { "epoch": 0.6, "learning_rate": 1.2266446202905397e-07, "logits/chosen": -2.4449896812438965, "logits/rejected": -2.3463964462280273, "logps/chosen": -279.21490478515625, "logps/rejected": -218.77459716796875, "loss": 0.4912, "rewards/accuracies": 0.625, "rewards/chosen": -1.5418498516082764, "rewards/margins": 1.5551363229751587, "rewards/rejected": -3.0969860553741455, "step": 5134 }, { "epoch": 0.6, "learning_rate": 1.226290303531357e-07, "logits/chosen": -2.219984531402588, "logits/rejected": -2.3935954570770264, "logps/chosen": -318.04443359375, "logps/rejected": -289.8975830078125, "loss": 0.4663, "rewards/accuracies": 0.75, "rewards/chosen": -1.0296363830566406, "rewards/margins": 1.558647871017456, "rewards/rejected": -2.588284492492676, "step": 5135 }, { "epoch": 0.6, "learning_rate": 1.2259359867721742e-07, "logits/chosen": -2.4949724674224854, "logits/rejected": -2.6152076721191406, "logps/chosen": -250.11190795898438, "logps/rejected": -212.3108367919922, "loss": 0.5792, "rewards/accuracies": 0.625, "rewards/chosen": -0.8342756032943726, "rewards/margins": 1.760995626449585, "rewards/rejected": -2.595271587371826, "step": 5136 }, { "epoch": 0.6, "learning_rate": 1.2255816700129916e-07, "logits/chosen": -2.0042953491210938, "logits/rejected": -1.8382179737091064, "logps/chosen": -162.68508911132812, "logps/rejected": -209.60682678222656, "loss": 0.3356, "rewards/accuracies": 0.75, "rewards/chosen": -0.5327480435371399, "rewards/margins": 2.375957727432251, "rewards/rejected": -2.908705711364746, "step": 5137 }, { "epoch": 0.6, "learning_rate": 1.225227353253809e-07, "logits/chosen": -2.084937572479248, "logits/rejected": -1.8819053173065186, "logps/chosen": -314.6112976074219, "logps/rejected": -353.434814453125, "loss": 0.6759, "rewards/accuracies": 0.75, "rewards/chosen": -1.1111476421356201, "rewards/margins": 1.1149036884307861, "rewards/rejected": -2.226051092147827, "step": 5138 }, { "epoch": 0.6, "learning_rate": 1.224873036494626e-07, "logits/chosen": -1.7486134767532349, "logits/rejected": -2.2891416549682617, "logps/chosen": -475.4940185546875, "logps/rejected": -223.64321899414062, "loss": 0.1939, "rewards/accuracies": 1.0, "rewards/chosen": -0.5282847285270691, "rewards/margins": 2.0656471252441406, "rewards/rejected": -2.5939316749572754, "step": 5139 }, { "epoch": 0.6, "learning_rate": 1.2245187197354433e-07, "logits/chosen": -2.7553608417510986, "logits/rejected": -2.8568062782287598, "logps/chosen": -213.13465881347656, "logps/rejected": -194.9080047607422, "loss": 0.4253, "rewards/accuracies": 0.875, "rewards/chosen": -1.0414444208145142, "rewards/margins": 2.980316400527954, "rewards/rejected": -4.021760940551758, "step": 5140 }, { "epoch": 0.6, "learning_rate": 1.2241644029762608e-07, "logits/chosen": -2.0805649757385254, "logits/rejected": -2.1005215644836426, "logps/chosen": -263.3174743652344, "logps/rejected": -338.59869384765625, "loss": 0.3109, "rewards/accuracies": 0.875, "rewards/chosen": -1.4862335920333862, "rewards/margins": 2.7639541625976562, "rewards/rejected": -4.250187873840332, "step": 5141 }, { "epoch": 0.6, "learning_rate": 1.223810086217078e-07, "logits/chosen": -2.1309316158294678, "logits/rejected": -2.3310976028442383, "logps/chosen": -197.66177368164062, "logps/rejected": -265.501708984375, "loss": 0.2532, "rewards/accuracies": 0.875, "rewards/chosen": -0.6281002759933472, "rewards/margins": 3.1413731575012207, "rewards/rejected": -3.7694733142852783, "step": 5142 }, { "epoch": 0.6, "learning_rate": 1.2234557694578952e-07, "logits/chosen": -2.7278285026550293, "logits/rejected": -2.5791237354278564, "logps/chosen": -153.018798828125, "logps/rejected": -188.00750732421875, "loss": 0.1447, "rewards/accuracies": 1.0, "rewards/chosen": -0.8224601149559021, "rewards/margins": 2.6298556327819824, "rewards/rejected": -3.4523158073425293, "step": 5143 }, { "epoch": 0.6, "learning_rate": 1.2231014526987127e-07, "logits/chosen": -2.438666343688965, "logits/rejected": -2.2669496536254883, "logps/chosen": -217.2213592529297, "logps/rejected": -223.17330932617188, "loss": 0.2411, "rewards/accuracies": 0.875, "rewards/chosen": 0.29590755701065063, "rewards/margins": 1.7999703884124756, "rewards/rejected": -1.5040628910064697, "step": 5144 }, { "epoch": 0.6, "learning_rate": 1.22274713593953e-07, "logits/chosen": -2.2915568351745605, "logits/rejected": -2.361651659011841, "logps/chosen": -323.3281555175781, "logps/rejected": -283.2708740234375, "loss": 0.1435, "rewards/accuracies": 1.0, "rewards/chosen": -0.28033846616744995, "rewards/margins": 2.2793521881103516, "rewards/rejected": -2.5596907138824463, "step": 5145 }, { "epoch": 0.6, "learning_rate": 1.2223928191803472e-07, "logits/chosen": -2.8296799659729004, "logits/rejected": -2.785123825073242, "logps/chosen": -529.276611328125, "logps/rejected": -332.7628173828125, "loss": 0.3218, "rewards/accuracies": 0.75, "rewards/chosen": -1.1657543182373047, "rewards/margins": 3.213261604309082, "rewards/rejected": -4.379015922546387, "step": 5146 }, { "epoch": 0.6, "learning_rate": 1.2220385024211644e-07, "logits/chosen": -1.9739183187484741, "logits/rejected": -1.744006633758545, "logps/chosen": -179.52938842773438, "logps/rejected": -307.3097839355469, "loss": 0.1305, "rewards/accuracies": 1.0, "rewards/chosen": -0.25576990842819214, "rewards/margins": 3.3169713020324707, "rewards/rejected": -3.5727415084838867, "step": 5147 }, { "epoch": 0.6, "learning_rate": 1.221684185661982e-07, "logits/chosen": -2.251734495162964, "logits/rejected": -2.5340583324432373, "logps/chosen": -259.9818115234375, "logps/rejected": -270.37744140625, "loss": 0.2404, "rewards/accuracies": 0.875, "rewards/chosen": -0.9523962140083313, "rewards/margins": 2.2593045234680176, "rewards/rejected": -3.2117011547088623, "step": 5148 }, { "epoch": 0.6, "learning_rate": 1.221329868902799e-07, "logits/chosen": -2.4059689044952393, "logits/rejected": -2.4282827377319336, "logps/chosen": -132.5740509033203, "logps/rejected": -270.3348693847656, "loss": 0.6176, "rewards/accuracies": 0.625, "rewards/chosen": -1.3161264657974243, "rewards/margins": 2.3858261108398438, "rewards/rejected": -3.7019524574279785, "step": 5149 }, { "epoch": 0.6, "learning_rate": 1.2209755521436163e-07, "logits/chosen": -2.2006330490112305, "logits/rejected": -2.2719225883483887, "logps/chosen": -210.8809051513672, "logps/rejected": -195.230712890625, "loss": 0.3965, "rewards/accuracies": 0.875, "rewards/chosen": -0.6237286329269409, "rewards/margins": 2.2334136962890625, "rewards/rejected": -2.857142448425293, "step": 5150 }, { "epoch": 0.6, "learning_rate": 1.2206212353844335e-07, "logits/chosen": -2.324965476989746, "logits/rejected": -2.412593126296997, "logps/chosen": -319.0421142578125, "logps/rejected": -341.6940002441406, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": 0.053290314972400665, "rewards/margins": 4.247989654541016, "rewards/rejected": -4.194699764251709, "step": 5151 }, { "epoch": 0.6, "learning_rate": 1.2202669186252508e-07, "logits/chosen": -2.9868533611297607, "logits/rejected": -2.844783306121826, "logps/chosen": -183.92478942871094, "logps/rejected": -183.93618774414062, "loss": 0.4751, "rewards/accuracies": 0.625, "rewards/chosen": -1.1546136140823364, "rewards/margins": 1.3894068002700806, "rewards/rejected": -2.544020414352417, "step": 5152 }, { "epoch": 0.6, "learning_rate": 1.2199126018660682e-07, "logits/chosen": -2.487842082977295, "logits/rejected": -2.0799882411956787, "logps/chosen": -104.04010772705078, "logps/rejected": -201.80337524414062, "loss": 0.4485, "rewards/accuracies": 0.75, "rewards/chosen": -0.8131652474403381, "rewards/margins": 1.4487608671188354, "rewards/rejected": -2.2619261741638184, "step": 5153 }, { "epoch": 0.6, "learning_rate": 1.2195582851068855e-07, "logits/chosen": -2.5003345012664795, "logits/rejected": -2.3973002433776855, "logps/chosen": -309.2982482910156, "logps/rejected": -252.57940673828125, "loss": 0.4983, "rewards/accuracies": 0.625, "rewards/chosen": -1.9559922218322754, "rewards/margins": 1.5331485271453857, "rewards/rejected": -3.489140510559082, "step": 5154 }, { "epoch": 0.6, "learning_rate": 1.219203968347703e-07, "logits/chosen": -2.2191805839538574, "logits/rejected": -2.3995485305786133, "logps/chosen": -262.9721984863281, "logps/rejected": -234.10498046875, "loss": 0.1613, "rewards/accuracies": 1.0, "rewards/chosen": -0.953752338886261, "rewards/margins": 2.3778343200683594, "rewards/rejected": -3.3315868377685547, "step": 5155 }, { "epoch": 0.6, "learning_rate": 1.2188496515885202e-07, "logits/chosen": -2.3358514308929443, "logits/rejected": -2.3938379287719727, "logps/chosen": -221.9315948486328, "logps/rejected": -218.56625366210938, "loss": 0.1779, "rewards/accuracies": 1.0, "rewards/chosen": -1.2708457708358765, "rewards/margins": 2.123467445373535, "rewards/rejected": -3.3943135738372803, "step": 5156 }, { "epoch": 0.6, "learning_rate": 1.2184953348293374e-07, "logits/chosen": -2.707411289215088, "logits/rejected": -2.5543408393859863, "logps/chosen": -164.876953125, "logps/rejected": -199.887451171875, "loss": 0.5665, "rewards/accuracies": 0.75, "rewards/chosen": -1.4612798690795898, "rewards/margins": 2.03262996673584, "rewards/rejected": -3.4939098358154297, "step": 5157 }, { "epoch": 0.6, "learning_rate": 1.2181410180701546e-07, "logits/chosen": -1.7999128103256226, "logits/rejected": -1.9087313413619995, "logps/chosen": -407.53643798828125, "logps/rejected": -379.4078674316406, "loss": 1.3412, "rewards/accuracies": 0.5, "rewards/chosen": -1.8983010053634644, "rewards/margins": -0.7385311126708984, "rewards/rejected": -1.1597700119018555, "step": 5158 }, { "epoch": 0.6, "learning_rate": 1.2177867013109718e-07, "logits/chosen": -2.223919630050659, "logits/rejected": -2.1439318656921387, "logps/chosen": -355.631103515625, "logps/rejected": -359.44580078125, "loss": 0.1598, "rewards/accuracies": 1.0, "rewards/chosen": -0.6269824504852295, "rewards/margins": 3.4270691871643066, "rewards/rejected": -4.054051876068115, "step": 5159 }, { "epoch": 0.6, "learning_rate": 1.2174323845517893e-07, "logits/chosen": -2.5781381130218506, "logits/rejected": -2.576977252960205, "logps/chosen": -70.28850555419922, "logps/rejected": -136.47933959960938, "loss": 0.5058, "rewards/accuracies": 0.625, "rewards/chosen": -0.5026009678840637, "rewards/margins": 2.0700178146362305, "rewards/rejected": -2.5726184844970703, "step": 5160 }, { "epoch": 0.6, "learning_rate": 1.2170780677926065e-07, "logits/chosen": -2.138638496398926, "logits/rejected": -2.0703682899475098, "logps/chosen": -300.4554748535156, "logps/rejected": -322.74853515625, "loss": 0.5386, "rewards/accuracies": 0.75, "rewards/chosen": -0.48631101846694946, "rewards/margins": 1.663435697555542, "rewards/rejected": -2.1497466564178467, "step": 5161 }, { "epoch": 0.6, "learning_rate": 1.2167237510334238e-07, "logits/chosen": -2.5677742958068848, "logits/rejected": -2.5401296615600586, "logps/chosen": -165.1081085205078, "logps/rejected": -186.43771362304688, "loss": 0.2137, "rewards/accuracies": 1.0, "rewards/chosen": -0.8036798238754272, "rewards/margins": 1.856041431427002, "rewards/rejected": -2.6597213745117188, "step": 5162 }, { "epoch": 0.6, "learning_rate": 1.216369434274241e-07, "logits/chosen": -2.3227591514587402, "logits/rejected": -2.3879740238189697, "logps/chosen": -349.0043029785156, "logps/rejected": -313.2303466796875, "loss": 0.2557, "rewards/accuracies": 0.875, "rewards/chosen": -0.5349323153495789, "rewards/margins": 2.2743756771087646, "rewards/rejected": -2.809307813644409, "step": 5163 }, { "epoch": 0.6, "learning_rate": 1.2160151175150585e-07, "logits/chosen": -2.1383869647979736, "logits/rejected": -2.2058053016662598, "logps/chosen": -156.2996063232422, "logps/rejected": -216.99822998046875, "loss": 0.1436, "rewards/accuracies": 1.0, "rewards/chosen": -0.3586556911468506, "rewards/margins": 3.022068500518799, "rewards/rejected": -3.3807241916656494, "step": 5164 }, { "epoch": 0.6, "learning_rate": 1.2156608007558757e-07, "logits/chosen": -1.791621446609497, "logits/rejected": -1.9342632293701172, "logps/chosen": -347.428466796875, "logps/rejected": -304.0482177734375, "loss": 0.67, "rewards/accuracies": 0.5, "rewards/chosen": -1.0351691246032715, "rewards/margins": 0.9751028418540955, "rewards/rejected": -2.0102720260620117, "step": 5165 }, { "epoch": 0.6, "learning_rate": 1.2153064839966932e-07, "logits/chosen": -1.8173198699951172, "logits/rejected": -2.090070962905884, "logps/chosen": -452.77154541015625, "logps/rejected": -263.6712341308594, "loss": 0.5891, "rewards/accuracies": 0.625, "rewards/chosen": -1.331755518913269, "rewards/margins": 0.7883865237236023, "rewards/rejected": -2.1201422214508057, "step": 5166 }, { "epoch": 0.6, "learning_rate": 1.2149521672375104e-07, "logits/chosen": -2.16621470451355, "logits/rejected": -1.9819211959838867, "logps/chosen": -343.8999328613281, "logps/rejected": -258.97271728515625, "loss": 0.6547, "rewards/accuracies": 0.625, "rewards/chosen": -1.405301809310913, "rewards/margins": 1.1323615312576294, "rewards/rejected": -2.537663221359253, "step": 5167 }, { "epoch": 0.6, "learning_rate": 1.2145978504783276e-07, "logits/chosen": -1.9440858364105225, "logits/rejected": -2.302837610244751, "logps/chosen": -394.0356750488281, "logps/rejected": -283.26116943359375, "loss": 0.6152, "rewards/accuracies": 0.875, "rewards/chosen": -0.9935222268104553, "rewards/margins": 1.1876939535140991, "rewards/rejected": -2.181216239929199, "step": 5168 }, { "epoch": 0.6, "learning_rate": 1.2142435337191448e-07, "logits/chosen": -2.5852365493774414, "logits/rejected": -2.6824610233306885, "logps/chosen": -295.34490966796875, "logps/rejected": -241.04922485351562, "loss": 0.383, "rewards/accuracies": 0.875, "rewards/chosen": -1.1216545104980469, "rewards/margins": 1.8454992771148682, "rewards/rejected": -2.967153787612915, "step": 5169 }, { "epoch": 0.6, "learning_rate": 1.213889216959962e-07, "logits/chosen": -2.3654069900512695, "logits/rejected": -2.5773026943206787, "logps/chosen": -331.6851806640625, "logps/rejected": -270.00738525390625, "loss": 0.1689, "rewards/accuracies": 0.875, "rewards/chosen": -0.13827908039093018, "rewards/margins": 2.8870933055877686, "rewards/rejected": -3.025372266769409, "step": 5170 }, { "epoch": 0.6, "learning_rate": 1.2135349002007793e-07, "logits/chosen": -1.98057222366333, "logits/rejected": -1.5026447772979736, "logps/chosen": -305.1203918457031, "logps/rejected": -406.29541015625, "loss": 0.3438, "rewards/accuracies": 0.875, "rewards/chosen": -0.9541860222816467, "rewards/margins": 1.725627064704895, "rewards/rejected": -2.6798131465911865, "step": 5171 }, { "epoch": 0.6, "learning_rate": 1.2131805834415968e-07, "logits/chosen": -2.2253262996673584, "logits/rejected": -2.284430503845215, "logps/chosen": -262.65576171875, "logps/rejected": -226.43206787109375, "loss": 0.3293, "rewards/accuracies": 0.875, "rewards/chosen": -0.5622559785842896, "rewards/margins": 2.163120985031128, "rewards/rejected": -2.725377082824707, "step": 5172 }, { "epoch": 0.6, "learning_rate": 1.212826266682414e-07, "logits/chosen": -2.8781540393829346, "logits/rejected": -2.610921621322632, "logps/chosen": -379.34783935546875, "logps/rejected": -342.66961669921875, "loss": 0.234, "rewards/accuracies": 0.875, "rewards/chosen": -0.2945258617401123, "rewards/margins": 2.5202436447143555, "rewards/rejected": -2.8147695064544678, "step": 5173 }, { "epoch": 0.6, "learning_rate": 1.2124719499232312e-07, "logits/chosen": -1.9700376987457275, "logits/rejected": -2.028470039367676, "logps/chosen": -399.7346496582031, "logps/rejected": -259.3099365234375, "loss": 0.4372, "rewards/accuracies": 0.75, "rewards/chosen": -0.6947767734527588, "rewards/margins": 1.506657600402832, "rewards/rejected": -2.20143461227417, "step": 5174 }, { "epoch": 0.6, "learning_rate": 1.2121176331640487e-07, "logits/chosen": -2.315840721130371, "logits/rejected": -2.1478283405303955, "logps/chosen": -220.9124755859375, "logps/rejected": -265.1065979003906, "loss": 0.3572, "rewards/accuracies": 0.75, "rewards/chosen": -1.022269368171692, "rewards/margins": 2.4682207107543945, "rewards/rejected": -3.490490198135376, "step": 5175 }, { "epoch": 0.6, "learning_rate": 1.211763316404866e-07, "logits/chosen": -2.0749850273132324, "logits/rejected": -2.399956226348877, "logps/chosen": -643.225830078125, "logps/rejected": -465.65826416015625, "loss": 0.2581, "rewards/accuracies": 0.875, "rewards/chosen": -1.0667566061019897, "rewards/margins": 3.2370879650115967, "rewards/rejected": -4.303844451904297, "step": 5176 }, { "epoch": 0.6, "learning_rate": 1.2114089996456831e-07, "logits/chosen": -2.569577217102051, "logits/rejected": -2.4923605918884277, "logps/chosen": -197.9288330078125, "logps/rejected": -171.02896118164062, "loss": 0.472, "rewards/accuracies": 0.75, "rewards/chosen": -0.3524514138698578, "rewards/margins": 0.7974086999893188, "rewards/rejected": -1.149860143661499, "step": 5177 }, { "epoch": 0.6, "learning_rate": 1.2110546828865006e-07, "logits/chosen": -2.1022799015045166, "logits/rejected": -2.0434725284576416, "logps/chosen": -528.9983520507812, "logps/rejected": -302.42633056640625, "loss": 0.1902, "rewards/accuracies": 1.0, "rewards/chosen": -0.20870739221572876, "rewards/margins": 3.252781629562378, "rewards/rejected": -3.461488723754883, "step": 5178 }, { "epoch": 0.6, "learning_rate": 1.2107003661273179e-07, "logits/chosen": -2.251540422439575, "logits/rejected": -2.0910701751708984, "logps/chosen": -204.24888610839844, "logps/rejected": -243.0397186279297, "loss": 0.4986, "rewards/accuracies": 0.75, "rewards/chosen": -0.9263660907745361, "rewards/margins": 2.4848403930664062, "rewards/rejected": -3.4112062454223633, "step": 5179 }, { "epoch": 0.6, "learning_rate": 1.210346049368135e-07, "logits/chosen": -2.4439024925231934, "logits/rejected": -2.450456142425537, "logps/chosen": -248.8252716064453, "logps/rejected": -215.6868896484375, "loss": 0.2755, "rewards/accuracies": 0.875, "rewards/chosen": -0.7359672784805298, "rewards/margins": 2.913820266723633, "rewards/rejected": -3.6497879028320312, "step": 5180 }, { "epoch": 0.6, "learning_rate": 1.2099917326089523e-07, "logits/chosen": -2.243943214416504, "logits/rejected": -2.1298763751983643, "logps/chosen": -183.43824768066406, "logps/rejected": -205.67611694335938, "loss": 0.3913, "rewards/accuracies": 0.875, "rewards/chosen": -0.6132287383079529, "rewards/margins": 1.9868220090866089, "rewards/rejected": -2.600050926208496, "step": 5181 }, { "epoch": 0.6, "learning_rate": 1.2096374158497695e-07, "logits/chosen": -1.9042142629623413, "logits/rejected": -1.5784499645233154, "logps/chosen": -272.8074645996094, "logps/rejected": -383.36474609375, "loss": 0.6092, "rewards/accuracies": 0.75, "rewards/chosen": -0.789008617401123, "rewards/margins": 0.6373796463012695, "rewards/rejected": -1.4263882637023926, "step": 5182 }, { "epoch": 0.6, "learning_rate": 1.209283099090587e-07, "logits/chosen": -2.452303171157837, "logits/rejected": -2.2016170024871826, "logps/chosen": -355.0323181152344, "logps/rejected": -339.13128662109375, "loss": 0.1151, "rewards/accuracies": 1.0, "rewards/chosen": -0.21731846034526825, "rewards/margins": 2.855644702911377, "rewards/rejected": -3.072962999343872, "step": 5183 }, { "epoch": 0.6, "learning_rate": 1.2089287823314042e-07, "logits/chosen": -2.503416061401367, "logits/rejected": -2.621192455291748, "logps/chosen": -436.8119201660156, "logps/rejected": -177.7572479248047, "loss": 0.493, "rewards/accuracies": 0.875, "rewards/chosen": -0.9297093749046326, "rewards/margins": 0.5880588293075562, "rewards/rejected": -1.517768144607544, "step": 5184 }, { "epoch": 0.6, "learning_rate": 1.2085744655722214e-07, "logits/chosen": -2.1680829524993896, "logits/rejected": -2.223102569580078, "logps/chosen": -310.1890869140625, "logps/rejected": -204.76429748535156, "loss": 0.3985, "rewards/accuracies": 0.75, "rewards/chosen": -0.7580963373184204, "rewards/margins": 1.1790990829467773, "rewards/rejected": -1.9371954202651978, "step": 5185 }, { "epoch": 0.6, "learning_rate": 1.2082201488130387e-07, "logits/chosen": -2.4249343872070312, "logits/rejected": -2.531165838241577, "logps/chosen": -264.2085266113281, "logps/rejected": -270.48541259765625, "loss": 0.6262, "rewards/accuracies": 0.625, "rewards/chosen": -1.496390461921692, "rewards/margins": 1.8428950309753418, "rewards/rejected": -3.3392858505249023, "step": 5186 }, { "epoch": 0.6, "learning_rate": 1.2078658320538562e-07, "logits/chosen": -2.6830625534057617, "logits/rejected": -2.6153247356414795, "logps/chosen": -295.1492919921875, "logps/rejected": -180.11886596679688, "loss": 0.2245, "rewards/accuracies": 1.0, "rewards/chosen": -1.0302248001098633, "rewards/margins": 2.0213284492492676, "rewards/rejected": -3.0515530109405518, "step": 5187 }, { "epoch": 0.6, "learning_rate": 1.2075115152946734e-07, "logits/chosen": -2.1325323581695557, "logits/rejected": -2.2552058696746826, "logps/chosen": -174.17930603027344, "logps/rejected": -190.2611846923828, "loss": 0.5903, "rewards/accuracies": 0.75, "rewards/chosen": -1.2608143091201782, "rewards/margins": 1.8889987468719482, "rewards/rejected": -3.149813175201416, "step": 5188 }, { "epoch": 0.6, "learning_rate": 1.2071571985354909e-07, "logits/chosen": -2.1447079181671143, "logits/rejected": -2.4205920696258545, "logps/chosen": -328.7030334472656, "logps/rejected": -189.96087646484375, "loss": 0.441, "rewards/accuracies": 0.625, "rewards/chosen": -0.5200481414794922, "rewards/margins": 1.9817452430725098, "rewards/rejected": -2.501793384552002, "step": 5189 }, { "epoch": 0.6, "learning_rate": 1.206802881776308e-07, "logits/chosen": -2.840146541595459, "logits/rejected": -2.8637099266052246, "logps/chosen": -145.47161865234375, "logps/rejected": -233.88980102539062, "loss": 0.4143, "rewards/accuracies": 0.875, "rewards/chosen": -1.1663498878479004, "rewards/margins": 4.163477420806885, "rewards/rejected": -5.329827308654785, "step": 5190 }, { "epoch": 0.6, "learning_rate": 1.2064485650171253e-07, "logits/chosen": -2.148712635040283, "logits/rejected": -2.5300376415252686, "logps/chosen": -285.4311828613281, "logps/rejected": -205.98638916015625, "loss": 0.3611, "rewards/accuracies": 0.75, "rewards/chosen": -0.8600549697875977, "rewards/margins": 2.062434434890747, "rewards/rejected": -2.9224894046783447, "step": 5191 }, { "epoch": 0.6, "learning_rate": 1.2060942482579425e-07, "logits/chosen": -2.138774871826172, "logits/rejected": -2.0490779876708984, "logps/chosen": -215.04754638671875, "logps/rejected": -260.30718994140625, "loss": 0.3514, "rewards/accuracies": 0.75, "rewards/chosen": -0.3328709006309509, "rewards/margins": 1.851085901260376, "rewards/rejected": -2.1839566230773926, "step": 5192 }, { "epoch": 0.6, "learning_rate": 1.2057399314987597e-07, "logits/chosen": -2.6361491680145264, "logits/rejected": -2.4686880111694336, "logps/chosen": -226.85906982421875, "logps/rejected": -348.58514404296875, "loss": 0.5946, "rewards/accuracies": 0.75, "rewards/chosen": -0.7174818515777588, "rewards/margins": 1.592439889907837, "rewards/rejected": -2.3099217414855957, "step": 5193 }, { "epoch": 0.6, "learning_rate": 1.205385614739577e-07, "logits/chosen": -2.2250266075134277, "logits/rejected": -2.1893231868743896, "logps/chosen": -188.78280639648438, "logps/rejected": -262.77947998046875, "loss": 0.3641, "rewards/accuracies": 0.875, "rewards/chosen": -1.2067149877548218, "rewards/margins": 1.190011978149414, "rewards/rejected": -2.3967268466949463, "step": 5194 }, { "epoch": 0.6, "learning_rate": 1.2050312979803945e-07, "logits/chosen": -2.6227099895477295, "logits/rejected": -2.554823875427246, "logps/chosen": -171.34768676757812, "logps/rejected": -264.05963134765625, "loss": 0.7432, "rewards/accuracies": 0.75, "rewards/chosen": -0.9436321258544922, "rewards/margins": 1.6460413932800293, "rewards/rejected": -2.5896735191345215, "step": 5195 }, { "epoch": 0.6, "learning_rate": 1.2046769812212117e-07, "logits/chosen": -1.9634126424789429, "logits/rejected": -2.0565199851989746, "logps/chosen": -320.5899658203125, "logps/rejected": -301.64447021484375, "loss": 0.2464, "rewards/accuracies": 0.875, "rewards/chosen": -1.125593662261963, "rewards/margins": 2.3610596656799316, "rewards/rejected": -3.4866530895233154, "step": 5196 }, { "epoch": 0.6, "learning_rate": 1.204322664462029e-07, "logits/chosen": -2.60831880569458, "logits/rejected": -2.817276954650879, "logps/chosen": -182.84112548828125, "logps/rejected": -207.76422119140625, "loss": 0.4499, "rewards/accuracies": 0.625, "rewards/chosen": -0.9005789160728455, "rewards/margins": 1.7930991649627686, "rewards/rejected": -2.693678140640259, "step": 5197 }, { "epoch": 0.6, "learning_rate": 1.2039683477028464e-07, "logits/chosen": -2.356882095336914, "logits/rejected": -2.6087868213653564, "logps/chosen": -250.39501953125, "logps/rejected": -182.9837188720703, "loss": 0.325, "rewards/accuracies": 0.875, "rewards/chosen": -0.9269205331802368, "rewards/margins": 2.7070443630218506, "rewards/rejected": -3.633965015411377, "step": 5198 }, { "epoch": 0.6, "learning_rate": 1.2036140309436636e-07, "logits/chosen": -2.054184675216675, "logits/rejected": -2.367687463760376, "logps/chosen": -417.39190673828125, "logps/rejected": -318.114990234375, "loss": 0.6066, "rewards/accuracies": 0.75, "rewards/chosen": -0.9740835428237915, "rewards/margins": 0.8706832528114319, "rewards/rejected": -1.8447667360305786, "step": 5199 }, { "epoch": 0.6, "learning_rate": 1.2032597141844808e-07, "logits/chosen": -1.9945340156555176, "logits/rejected": -1.6193487644195557, "logps/chosen": -334.8529052734375, "logps/rejected": -464.32940673828125, "loss": 0.1595, "rewards/accuracies": 0.875, "rewards/chosen": -0.13162831962108612, "rewards/margins": 2.6609864234924316, "rewards/rejected": -2.792614698410034, "step": 5200 }, { "epoch": 0.61, "learning_rate": 1.2029053974252983e-07, "logits/chosen": -2.262388229370117, "logits/rejected": -2.6312084197998047, "logps/chosen": -342.1814270019531, "logps/rejected": -281.8554382324219, "loss": 0.5054, "rewards/accuracies": 0.75, "rewards/chosen": -1.0466265678405762, "rewards/margins": 3.318283796310425, "rewards/rejected": -4.364910125732422, "step": 5201 }, { "epoch": 0.61, "learning_rate": 1.2025510806661155e-07, "logits/chosen": -2.728365182876587, "logits/rejected": -2.6839518547058105, "logps/chosen": -205.17190551757812, "logps/rejected": -205.14405822753906, "loss": 0.1078, "rewards/accuracies": 1.0, "rewards/chosen": -0.2852359116077423, "rewards/margins": 3.2778968811035156, "rewards/rejected": -3.5631327629089355, "step": 5202 }, { "epoch": 0.61, "learning_rate": 1.2021967639069328e-07, "logits/chosen": -2.496018886566162, "logits/rejected": -2.448404312133789, "logps/chosen": -232.7915802001953, "logps/rejected": -225.1036376953125, "loss": 0.4538, "rewards/accuracies": 0.875, "rewards/chosen": -0.7520129680633545, "rewards/margins": 1.928113579750061, "rewards/rejected": -2.680126428604126, "step": 5203 }, { "epoch": 0.61, "learning_rate": 1.20184244714775e-07, "logits/chosen": -1.822451114654541, "logits/rejected": -2.1021296977996826, "logps/chosen": -327.4976806640625, "logps/rejected": -192.91050720214844, "loss": 0.7255, "rewards/accuracies": 0.625, "rewards/chosen": -0.9814808368682861, "rewards/margins": 0.8993427753448486, "rewards/rejected": -1.8808236122131348, "step": 5204 }, { "epoch": 0.61, "learning_rate": 1.2014881303885672e-07, "logits/chosen": -2.415126323699951, "logits/rejected": -2.2385666370391846, "logps/chosen": -423.7177734375, "logps/rejected": -362.6585693359375, "loss": 0.4231, "rewards/accuracies": 0.875, "rewards/chosen": -0.7231408357620239, "rewards/margins": 2.2093112468719482, "rewards/rejected": -2.9324522018432617, "step": 5205 }, { "epoch": 0.61, "learning_rate": 1.2011338136293844e-07, "logits/chosen": -2.4968698024749756, "logits/rejected": -2.541084051132202, "logps/chosen": -435.7365417480469, "logps/rejected": -429.32476806640625, "loss": 0.4976, "rewards/accuracies": 0.875, "rewards/chosen": -0.930698037147522, "rewards/margins": 2.9863271713256836, "rewards/rejected": -3.917025089263916, "step": 5206 }, { "epoch": 0.61, "learning_rate": 1.200779496870202e-07, "logits/chosen": -2.710352659225464, "logits/rejected": -2.6802237033843994, "logps/chosen": -287.5000305175781, "logps/rejected": -210.6776123046875, "loss": 0.5631, "rewards/accuracies": 0.625, "rewards/chosen": -0.449428528547287, "rewards/margins": 1.4352753162384033, "rewards/rejected": -1.8847038745880127, "step": 5207 }, { "epoch": 0.61, "learning_rate": 1.200425180111019e-07, "logits/chosen": -3.0040903091430664, "logits/rejected": -2.9331958293914795, "logps/chosen": -182.52853393554688, "logps/rejected": -195.39010620117188, "loss": 0.3667, "rewards/accuracies": 0.625, "rewards/chosen": -0.9012411832809448, "rewards/margins": 2.3392202854156494, "rewards/rejected": -3.2404613494873047, "step": 5208 }, { "epoch": 0.61, "learning_rate": 1.2000708633518366e-07, "logits/chosen": -2.3674371242523193, "logits/rejected": -2.2761852741241455, "logps/chosen": -159.58273315429688, "logps/rejected": -285.1098937988281, "loss": 0.1553, "rewards/accuracies": 0.875, "rewards/chosen": -0.5408338904380798, "rewards/margins": 4.506514072418213, "rewards/rejected": -5.0473480224609375, "step": 5209 }, { "epoch": 0.61, "learning_rate": 1.1997165465926538e-07, "logits/chosen": -2.3298394680023193, "logits/rejected": -2.3637166023254395, "logps/chosen": -214.53109741210938, "logps/rejected": -160.7362060546875, "loss": 0.3904, "rewards/accuracies": 0.875, "rewards/chosen": -1.010250210762024, "rewards/margins": 1.2797260284423828, "rewards/rejected": -2.2899763584136963, "step": 5210 }, { "epoch": 0.61, "learning_rate": 1.199362229833471e-07, "logits/chosen": -2.3432834148406982, "logits/rejected": -2.530444622039795, "logps/chosen": -189.16932678222656, "logps/rejected": -195.637451171875, "loss": 1.2276, "rewards/accuracies": 0.625, "rewards/chosen": -1.7863121032714844, "rewards/margins": 0.4323478937149048, "rewards/rejected": -2.2186601161956787, "step": 5211 }, { "epoch": 0.61, "learning_rate": 1.1990079130742883e-07, "logits/chosen": -2.657667636871338, "logits/rejected": -2.3885087966918945, "logps/chosen": -231.29000854492188, "logps/rejected": -212.68896484375, "loss": 0.2373, "rewards/accuracies": 1.0, "rewards/chosen": -1.2781105041503906, "rewards/margins": 2.279245138168335, "rewards/rejected": -3.5573554039001465, "step": 5212 }, { "epoch": 0.61, "learning_rate": 1.1986535963151058e-07, "logits/chosen": -2.538577079772949, "logits/rejected": -2.4338741302490234, "logps/chosen": -370.89520263671875, "logps/rejected": -400.186279296875, "loss": 0.2402, "rewards/accuracies": 0.875, "rewards/chosen": -0.8703994750976562, "rewards/margins": 3.467846393585205, "rewards/rejected": -4.3382463455200195, "step": 5213 }, { "epoch": 0.61, "learning_rate": 1.198299279555923e-07, "logits/chosen": -2.149869680404663, "logits/rejected": -2.3282487392425537, "logps/chosen": -309.484375, "logps/rejected": -193.91026306152344, "loss": 0.7639, "rewards/accuracies": 0.5, "rewards/chosen": -1.2988252639770508, "rewards/margins": 0.3297821581363678, "rewards/rejected": -1.6286073923110962, "step": 5214 }, { "epoch": 0.61, "learning_rate": 1.1979449627967402e-07, "logits/chosen": -2.143472194671631, "logits/rejected": -2.3911712169647217, "logps/chosen": -236.49417114257812, "logps/rejected": -196.71041870117188, "loss": 0.3835, "rewards/accuracies": 0.75, "rewards/chosen": -0.4488144516944885, "rewards/margins": 2.5874552726745605, "rewards/rejected": -3.0362696647644043, "step": 5215 }, { "epoch": 0.61, "learning_rate": 1.1975906460375574e-07, "logits/chosen": -2.7759060859680176, "logits/rejected": -2.816784381866455, "logps/chosen": -131.42872619628906, "logps/rejected": -189.11538696289062, "loss": 0.1664, "rewards/accuracies": 1.0, "rewards/chosen": -0.5083937048912048, "rewards/margins": 3.1335415840148926, "rewards/rejected": -3.6419355869293213, "step": 5216 }, { "epoch": 0.61, "learning_rate": 1.1972363292783746e-07, "logits/chosen": -2.3794891834259033, "logits/rejected": -2.2853145599365234, "logps/chosen": -223.5705108642578, "logps/rejected": -220.18338012695312, "loss": 0.4644, "rewards/accuracies": 0.875, "rewards/chosen": -0.15381933748722076, "rewards/margins": 1.4701015949249268, "rewards/rejected": -1.6239209175109863, "step": 5217 }, { "epoch": 0.61, "learning_rate": 1.196882012519192e-07, "logits/chosen": -2.3720836639404297, "logits/rejected": -2.232619524002075, "logps/chosen": -233.04229736328125, "logps/rejected": -313.33953857421875, "loss": 0.555, "rewards/accuracies": 0.625, "rewards/chosen": -0.6587268710136414, "rewards/margins": 1.468065857887268, "rewards/rejected": -2.1267926692962646, "step": 5218 }, { "epoch": 0.61, "learning_rate": 1.1965276957600093e-07, "logits/chosen": -2.04050612449646, "logits/rejected": -2.323744535446167, "logps/chosen": -202.7533416748047, "logps/rejected": -183.7977752685547, "loss": 0.7175, "rewards/accuracies": 0.625, "rewards/chosen": -1.008948802947998, "rewards/margins": 1.5431543588638306, "rewards/rejected": -2.552103042602539, "step": 5219 }, { "epoch": 0.61, "learning_rate": 1.1961733790008268e-07, "logits/chosen": -2.806553840637207, "logits/rejected": -2.6314992904663086, "logps/chosen": -228.44308471679688, "logps/rejected": -172.6291046142578, "loss": 0.1837, "rewards/accuracies": 1.0, "rewards/chosen": -0.5104161500930786, "rewards/margins": 2.000979423522949, "rewards/rejected": -2.5113954544067383, "step": 5220 }, { "epoch": 0.61, "learning_rate": 1.195819062241644e-07, "logits/chosen": -2.5998787879943848, "logits/rejected": -2.66147518157959, "logps/chosen": -208.212158203125, "logps/rejected": -166.71063232421875, "loss": 0.217, "rewards/accuracies": 1.0, "rewards/chosen": -0.8014048933982849, "rewards/margins": 1.714307427406311, "rewards/rejected": -2.5157124996185303, "step": 5221 }, { "epoch": 0.61, "learning_rate": 1.1954647454824613e-07, "logits/chosen": -2.4281039237976074, "logits/rejected": -2.5322682857513428, "logps/chosen": -108.86763763427734, "logps/rejected": -232.72381591796875, "loss": 0.4878, "rewards/accuracies": 0.875, "rewards/chosen": -1.0086349248886108, "rewards/margins": 2.048588752746582, "rewards/rejected": -3.0572235584259033, "step": 5222 }, { "epoch": 0.61, "learning_rate": 1.1951104287232785e-07, "logits/chosen": -2.0509934425354004, "logits/rejected": -2.138908863067627, "logps/chosen": -198.68914794921875, "logps/rejected": -257.7431945800781, "loss": 0.5984, "rewards/accuracies": 0.75, "rewards/chosen": -1.283691644668579, "rewards/margins": 1.7401546239852905, "rewards/rejected": -3.023846387863159, "step": 5223 }, { "epoch": 0.61, "learning_rate": 1.194756111964096e-07, "logits/chosen": -2.2003681659698486, "logits/rejected": -1.9633464813232422, "logps/chosen": -236.55325317382812, "logps/rejected": -340.1285400390625, "loss": 0.3148, "rewards/accuracies": 0.875, "rewards/chosen": -0.757728099822998, "rewards/margins": 3.0098705291748047, "rewards/rejected": -3.7675986289978027, "step": 5224 }, { "epoch": 0.61, "learning_rate": 1.1944017952049132e-07, "logits/chosen": -2.143829107284546, "logits/rejected": -2.2712697982788086, "logps/chosen": -323.12054443359375, "logps/rejected": -245.2421112060547, "loss": 0.4124, "rewards/accuracies": 0.75, "rewards/chosen": -1.4978917837142944, "rewards/margins": 1.7034579515457153, "rewards/rejected": -3.2013497352600098, "step": 5225 }, { "epoch": 0.61, "learning_rate": 1.1940474784457304e-07, "logits/chosen": -1.8851680755615234, "logits/rejected": -2.0490682125091553, "logps/chosen": -361.4912414550781, "logps/rejected": -319.59429931640625, "loss": 0.2101, "rewards/accuracies": 1.0, "rewards/chosen": -0.6845676898956299, "rewards/margins": 3.6948163509368896, "rewards/rejected": -4.3793840408325195, "step": 5226 }, { "epoch": 0.61, "learning_rate": 1.1936931616865476e-07, "logits/chosen": -1.7843022346496582, "logits/rejected": -2.1901628971099854, "logps/chosen": -405.79974365234375, "logps/rejected": -239.50567626953125, "loss": 0.415, "rewards/accuracies": 0.875, "rewards/chosen": -0.6462787985801697, "rewards/margins": 2.27803373336792, "rewards/rejected": -2.9243125915527344, "step": 5227 }, { "epoch": 0.61, "learning_rate": 1.193338844927365e-07, "logits/chosen": -2.101116418838501, "logits/rejected": -2.230097532272339, "logps/chosen": -385.493896484375, "logps/rejected": -392.6390075683594, "loss": 0.4562, "rewards/accuracies": 0.625, "rewards/chosen": -0.9440162777900696, "rewards/margins": 1.1745153665542603, "rewards/rejected": -2.1185317039489746, "step": 5228 }, { "epoch": 0.61, "learning_rate": 1.1929845281681824e-07, "logits/chosen": -2.578500747680664, "logits/rejected": -2.8369140625, "logps/chosen": -295.1000671386719, "logps/rejected": -178.70208740234375, "loss": 0.4898, "rewards/accuracies": 0.875, "rewards/chosen": -1.14497709274292, "rewards/margins": 2.224788188934326, "rewards/rejected": -3.369765520095825, "step": 5229 }, { "epoch": 0.61, "learning_rate": 1.1926302114089996e-07, "logits/chosen": -2.27799654006958, "logits/rejected": -2.21203351020813, "logps/chosen": -251.28021240234375, "logps/rejected": -311.6126403808594, "loss": 0.603, "rewards/accuracies": 0.75, "rewards/chosen": -1.18759024143219, "rewards/margins": 0.6488876342773438, "rewards/rejected": -1.8364778757095337, "step": 5230 }, { "epoch": 0.61, "learning_rate": 1.1922758946498168e-07, "logits/chosen": -2.394235849380493, "logits/rejected": -2.4152748584747314, "logps/chosen": -255.47183227539062, "logps/rejected": -313.38677978515625, "loss": 0.5667, "rewards/accuracies": 0.75, "rewards/chosen": -1.433862566947937, "rewards/margins": 1.1317811012268066, "rewards/rejected": -2.565643787384033, "step": 5231 }, { "epoch": 0.61, "learning_rate": 1.1919215778906342e-07, "logits/chosen": -2.4488236904144287, "logits/rejected": -2.5489354133605957, "logps/chosen": -115.35789489746094, "logps/rejected": -170.82884216308594, "loss": 0.8249, "rewards/accuracies": 0.625, "rewards/chosen": -2.0399065017700195, "rewards/margins": 1.2633881568908691, "rewards/rejected": -3.3032946586608887, "step": 5232 }, { "epoch": 0.61, "learning_rate": 1.1915672611314515e-07, "logits/chosen": -2.3067777156829834, "logits/rejected": -2.19870662689209, "logps/chosen": -204.93234252929688, "logps/rejected": -291.32086181640625, "loss": 0.5137, "rewards/accuracies": 0.75, "rewards/chosen": -1.7462971210479736, "rewards/margins": 2.242022752761841, "rewards/rejected": -3.9883198738098145, "step": 5233 }, { "epoch": 0.61, "learning_rate": 1.1912129443722687e-07, "logits/chosen": -2.205462694168091, "logits/rejected": -2.445732593536377, "logps/chosen": -392.052001953125, "logps/rejected": -245.5328826904297, "loss": 0.2757, "rewards/accuracies": 1.0, "rewards/chosen": -0.6796283721923828, "rewards/margins": 1.7002649307250977, "rewards/rejected": -2.3798933029174805, "step": 5234 }, { "epoch": 0.61, "learning_rate": 1.190858627613086e-07, "logits/chosen": -2.9344794750213623, "logits/rejected": -2.6877963542938232, "logps/chosen": -308.64727783203125, "logps/rejected": -244.89910888671875, "loss": 0.8133, "rewards/accuracies": 0.75, "rewards/chosen": -0.8858540058135986, "rewards/margins": 1.695253849029541, "rewards/rejected": -2.5811080932617188, "step": 5235 }, { "epoch": 0.61, "learning_rate": 1.1905043108539034e-07, "logits/chosen": -2.739502191543579, "logits/rejected": -2.7095513343811035, "logps/chosen": -219.41241455078125, "logps/rejected": -220.99903869628906, "loss": 0.2125, "rewards/accuracies": 1.0, "rewards/chosen": -0.6793192625045776, "rewards/margins": 2.1359171867370605, "rewards/rejected": -2.815236806869507, "step": 5236 }, { "epoch": 0.61, "learning_rate": 1.1901499940947207e-07, "logits/chosen": -2.078427791595459, "logits/rejected": -2.3817410469055176, "logps/chosen": -263.18310546875, "logps/rejected": -178.794677734375, "loss": 0.403, "rewards/accuracies": 0.875, "rewards/chosen": -0.7582724094390869, "rewards/margins": 1.5949277877807617, "rewards/rejected": -2.3532001972198486, "step": 5237 }, { "epoch": 0.61, "learning_rate": 1.1897956773355379e-07, "logits/chosen": -1.7767460346221924, "logits/rejected": -1.7624926567077637, "logps/chosen": -347.53662109375, "logps/rejected": -288.1228332519531, "loss": 0.409, "rewards/accuracies": 0.75, "rewards/chosen": -0.6475794911384583, "rewards/margins": 1.3134269714355469, "rewards/rejected": -1.96100652217865, "step": 5238 }, { "epoch": 0.61, "learning_rate": 1.1894413605763552e-07, "logits/chosen": -2.9477145671844482, "logits/rejected": -2.900980234146118, "logps/chosen": -164.41928100585938, "logps/rejected": -148.6708526611328, "loss": 0.6513, "rewards/accuracies": 0.625, "rewards/chosen": -0.7536442875862122, "rewards/margins": 1.8062372207641602, "rewards/rejected": -2.5598816871643066, "step": 5239 }, { "epoch": 0.61, "learning_rate": 1.1890870438171725e-07, "logits/chosen": -2.2992303371429443, "logits/rejected": -2.37612247467041, "logps/chosen": -432.0656433105469, "logps/rejected": -209.2398681640625, "loss": 0.3639, "rewards/accuracies": 0.875, "rewards/chosen": -0.9445242881774902, "rewards/margins": 1.641132116317749, "rewards/rejected": -2.5856566429138184, "step": 5240 }, { "epoch": 0.61, "learning_rate": 1.1887327270579897e-07, "logits/chosen": -2.08923077583313, "logits/rejected": -2.3481321334838867, "logps/chosen": -402.72210693359375, "logps/rejected": -232.14007568359375, "loss": 0.1195, "rewards/accuracies": 1.0, "rewards/chosen": -0.6301354169845581, "rewards/margins": 2.508328914642334, "rewards/rejected": -3.1384642124176025, "step": 5241 }, { "epoch": 0.61, "learning_rate": 1.1883784102988072e-07, "logits/chosen": -2.5205092430114746, "logits/rejected": -2.5372467041015625, "logps/chosen": -195.1888427734375, "logps/rejected": -166.8831787109375, "loss": 0.4237, "rewards/accuracies": 0.625, "rewards/chosen": -1.0291069746017456, "rewards/margins": 1.6552293300628662, "rewards/rejected": -2.6843361854553223, "step": 5242 }, { "epoch": 0.61, "learning_rate": 1.1880240935396244e-07, "logits/chosen": -2.6876614093780518, "logits/rejected": -2.643460750579834, "logps/chosen": -239.96884155273438, "logps/rejected": -219.08084106445312, "loss": 0.4498, "rewards/accuracies": 0.875, "rewards/chosen": -0.2831932604312897, "rewards/margins": 2.206418991088867, "rewards/rejected": -2.489612102508545, "step": 5243 }, { "epoch": 0.61, "learning_rate": 1.1876697767804417e-07, "logits/chosen": -1.6111646890640259, "logits/rejected": -1.8130854368209839, "logps/chosen": -486.34893798828125, "logps/rejected": -336.9454040527344, "loss": 0.606, "rewards/accuracies": 0.625, "rewards/chosen": -1.1003403663635254, "rewards/margins": 0.7380590438842773, "rewards/rejected": -1.8383995294570923, "step": 5244 }, { "epoch": 0.61, "learning_rate": 1.187315460021259e-07, "logits/chosen": -2.204624652862549, "logits/rejected": -2.4227869510650635, "logps/chosen": -212.95664978027344, "logps/rejected": -263.3576965332031, "loss": 0.4663, "rewards/accuracies": 0.625, "rewards/chosen": -1.0531284809112549, "rewards/margins": 2.0488672256469727, "rewards/rejected": -3.1019954681396484, "step": 5245 }, { "epoch": 0.61, "learning_rate": 1.1869611432620762e-07, "logits/chosen": -2.648639440536499, "logits/rejected": -2.70845365524292, "logps/chosen": -345.9920654296875, "logps/rejected": -344.4086608886719, "loss": 0.1256, "rewards/accuracies": 1.0, "rewards/chosen": -0.9703644514083862, "rewards/margins": 3.282701015472412, "rewards/rejected": -4.253065586090088, "step": 5246 }, { "epoch": 0.61, "learning_rate": 1.1866068265028934e-07, "logits/chosen": -2.1289336681365967, "logits/rejected": -2.2007386684417725, "logps/chosen": -416.67645263671875, "logps/rejected": -435.2542724609375, "loss": 0.2219, "rewards/accuracies": 1.0, "rewards/chosen": -0.20128487050533295, "rewards/margins": 2.78542423248291, "rewards/rejected": -2.9867093563079834, "step": 5247 }, { "epoch": 0.61, "learning_rate": 1.1862525097437109e-07, "logits/chosen": -2.453432083129883, "logits/rejected": -2.5474154949188232, "logps/chosen": -189.3832244873047, "logps/rejected": -186.490478515625, "loss": 0.7441, "rewards/accuracies": 0.75, "rewards/chosen": -0.7731969356536865, "rewards/margins": 1.908055305480957, "rewards/rejected": -2.6812522411346436, "step": 5248 }, { "epoch": 0.61, "learning_rate": 1.1858981929845281e-07, "logits/chosen": -2.6520328521728516, "logits/rejected": -2.3169631958007812, "logps/chosen": -184.47994995117188, "logps/rejected": -342.31231689453125, "loss": 0.1786, "rewards/accuracies": 1.0, "rewards/chosen": -1.0655137300491333, "rewards/margins": 3.17734956741333, "rewards/rejected": -4.242863178253174, "step": 5249 }, { "epoch": 0.61, "learning_rate": 1.1855438762253455e-07, "logits/chosen": -2.10669207572937, "logits/rejected": -1.9971015453338623, "logps/chosen": -125.4957275390625, "logps/rejected": -237.892333984375, "loss": 0.5254, "rewards/accuracies": 0.75, "rewards/chosen": -0.857762336730957, "rewards/margins": 1.807873249053955, "rewards/rejected": -2.665635585784912, "step": 5250 }, { "epoch": 0.61, "learning_rate": 1.1851895594661627e-07, "logits/chosen": -2.4672963619232178, "logits/rejected": -2.4392919540405273, "logps/chosen": -364.3828430175781, "logps/rejected": -357.29742431640625, "loss": 0.2152, "rewards/accuracies": 0.875, "rewards/chosen": 0.07487642765045166, "rewards/margins": 3.1160213947296143, "rewards/rejected": -3.0411453247070312, "step": 5251 }, { "epoch": 0.61, "learning_rate": 1.1848352427069799e-07, "logits/chosen": -2.539534330368042, "logits/rejected": -2.6610054969787598, "logps/chosen": -348.3246154785156, "logps/rejected": -225.27252197265625, "loss": 0.3165, "rewards/accuracies": 0.75, "rewards/chosen": -0.714199423789978, "rewards/margins": 2.5985474586486816, "rewards/rejected": -3.31274676322937, "step": 5252 }, { "epoch": 0.61, "learning_rate": 1.1844809259477974e-07, "logits/chosen": -1.9542683362960815, "logits/rejected": -2.215526580810547, "logps/chosen": -202.9188995361328, "logps/rejected": -197.16281127929688, "loss": 0.3073, "rewards/accuracies": 1.0, "rewards/chosen": -0.1514233648777008, "rewards/margins": 1.2696161270141602, "rewards/rejected": -1.4210395812988281, "step": 5253 }, { "epoch": 0.61, "learning_rate": 1.1841266091886146e-07, "logits/chosen": -2.6250462532043457, "logits/rejected": -2.8109302520751953, "logps/chosen": -354.80133056640625, "logps/rejected": -291.2445983886719, "loss": 0.6602, "rewards/accuracies": 0.5, "rewards/chosen": -1.7423521280288696, "rewards/margins": 1.6311440467834473, "rewards/rejected": -3.3734960556030273, "step": 5254 }, { "epoch": 0.61, "learning_rate": 1.1837722924294318e-07, "logits/chosen": -2.140439510345459, "logits/rejected": -1.7785425186157227, "logps/chosen": -396.25030517578125, "logps/rejected": -451.5938720703125, "loss": 0.3564, "rewards/accuracies": 0.875, "rewards/chosen": -0.484535276889801, "rewards/margins": 3.134540319442749, "rewards/rejected": -3.6190755367279053, "step": 5255 }, { "epoch": 0.61, "learning_rate": 1.1834179756702492e-07, "logits/chosen": -2.1213769912719727, "logits/rejected": -1.976830005645752, "logps/chosen": -162.17613220214844, "logps/rejected": -218.14202880859375, "loss": 0.3676, "rewards/accuracies": 0.75, "rewards/chosen": -1.1742827892303467, "rewards/margins": 1.604225516319275, "rewards/rejected": -2.778508186340332, "step": 5256 }, { "epoch": 0.61, "learning_rate": 1.1830636589110664e-07, "logits/chosen": -3.048300266265869, "logits/rejected": -3.05981707572937, "logps/chosen": -300.2829284667969, "logps/rejected": -255.86085510253906, "loss": 0.2255, "rewards/accuracies": 0.875, "rewards/chosen": -1.1522409915924072, "rewards/margins": 3.170846939086914, "rewards/rejected": -4.3230881690979, "step": 5257 }, { "epoch": 0.61, "learning_rate": 1.1827093421518836e-07, "logits/chosen": -2.165253162384033, "logits/rejected": -2.09025239944458, "logps/chosen": -425.52862548828125, "logps/rejected": -309.7415771484375, "loss": 0.2786, "rewards/accuracies": 0.875, "rewards/chosen": -0.412817120552063, "rewards/margins": 2.428269147872925, "rewards/rejected": -2.8410863876342773, "step": 5258 }, { "epoch": 0.61, "learning_rate": 1.1823550253927011e-07, "logits/chosen": -1.8609963655471802, "logits/rejected": -1.7712948322296143, "logps/chosen": -261.3502197265625, "logps/rejected": -180.3934326171875, "loss": 0.3166, "rewards/accuracies": 0.875, "rewards/chosen": -0.3460504412651062, "rewards/margins": 1.8323123455047607, "rewards/rejected": -2.1783628463745117, "step": 5259 }, { "epoch": 0.61, "learning_rate": 1.1820007086335183e-07, "logits/chosen": -2.299649238586426, "logits/rejected": -2.5753514766693115, "logps/chosen": -296.44781494140625, "logps/rejected": -239.747314453125, "loss": 2.5189, "rewards/accuracies": 0.375, "rewards/chosen": -3.460230827331543, "rewards/margins": -0.9753597974777222, "rewards/rejected": -2.4848709106445312, "step": 5260 }, { "epoch": 0.61, "learning_rate": 1.1816463918743357e-07, "logits/chosen": -2.736708164215088, "logits/rejected": -2.3362793922424316, "logps/chosen": -134.73666381835938, "logps/rejected": -189.3877716064453, "loss": 0.3995, "rewards/accuracies": 0.625, "rewards/chosen": -0.29064756631851196, "rewards/margins": 1.4641447067260742, "rewards/rejected": -1.7547922134399414, "step": 5261 }, { "epoch": 0.61, "learning_rate": 1.1812920751151529e-07, "logits/chosen": -1.4951770305633545, "logits/rejected": -1.9950357675552368, "logps/chosen": -302.83184814453125, "logps/rejected": -196.23143005371094, "loss": 0.3007, "rewards/accuracies": 0.875, "rewards/chosen": -0.618175745010376, "rewards/margins": 1.6477444171905518, "rewards/rejected": -2.2659201622009277, "step": 5262 }, { "epoch": 0.61, "learning_rate": 1.1809377583559701e-07, "logits/chosen": -2.048804759979248, "logits/rejected": -2.1541364192962646, "logps/chosen": -321.9862976074219, "logps/rejected": -218.3290557861328, "loss": 0.2272, "rewards/accuracies": 1.0, "rewards/chosen": -0.5715718865394592, "rewards/margins": 1.8715957403182983, "rewards/rejected": -2.4431676864624023, "step": 5263 }, { "epoch": 0.61, "learning_rate": 1.1805834415967873e-07, "logits/chosen": -2.424882173538208, "logits/rejected": -2.6252951622009277, "logps/chosen": -404.2431945800781, "logps/rejected": -268.65484619140625, "loss": 0.1829, "rewards/accuracies": 0.875, "rewards/chosen": -0.5891562104225159, "rewards/margins": 2.7115139961242676, "rewards/rejected": -3.3006701469421387, "step": 5264 }, { "epoch": 0.61, "learning_rate": 1.1802291248376048e-07, "logits/chosen": -1.9521167278289795, "logits/rejected": -2.2592697143554688, "logps/chosen": -360.3934326171875, "logps/rejected": -245.67391967773438, "loss": 0.253, "rewards/accuracies": 1.0, "rewards/chosen": -0.08789709210395813, "rewards/margins": 1.7856574058532715, "rewards/rejected": -1.8735544681549072, "step": 5265 }, { "epoch": 0.61, "learning_rate": 1.179874808078422e-07, "logits/chosen": -2.68017840385437, "logits/rejected": -2.656266212463379, "logps/chosen": -294.82415771484375, "logps/rejected": -192.40499877929688, "loss": 0.2723, "rewards/accuracies": 0.875, "rewards/chosen": -0.7716482281684875, "rewards/margins": 1.8566749095916748, "rewards/rejected": -2.6283230781555176, "step": 5266 }, { "epoch": 0.61, "learning_rate": 1.1795204913192394e-07, "logits/chosen": -2.299527168273926, "logits/rejected": -2.500115394592285, "logps/chosen": -347.5217590332031, "logps/rejected": -284.2477722167969, "loss": 0.5421, "rewards/accuracies": 0.875, "rewards/chosen": -0.2631189227104187, "rewards/margins": 1.4383418560028076, "rewards/rejected": -1.701460838317871, "step": 5267 }, { "epoch": 0.61, "learning_rate": 1.1791661745600566e-07, "logits/chosen": -2.304253578186035, "logits/rejected": -2.298375129699707, "logps/chosen": -341.73443603515625, "logps/rejected": -320.4442138671875, "loss": 0.48, "rewards/accuracies": 0.75, "rewards/chosen": -0.8404834270477295, "rewards/margins": 3.031440496444702, "rewards/rejected": -3.8719236850738525, "step": 5268 }, { "epoch": 0.61, "learning_rate": 1.1788118578008739e-07, "logits/chosen": -2.5838303565979004, "logits/rejected": -2.1793441772460938, "logps/chosen": -158.73004150390625, "logps/rejected": -322.43341064453125, "loss": 0.6354, "rewards/accuracies": 0.875, "rewards/chosen": -1.3438777923583984, "rewards/margins": 0.8087022304534912, "rewards/rejected": -2.1525802612304688, "step": 5269 }, { "epoch": 0.61, "learning_rate": 1.1784575410416912e-07, "logits/chosen": -1.9441759586334229, "logits/rejected": -1.7470110654830933, "logps/chosen": -265.01055908203125, "logps/rejected": -297.27178955078125, "loss": 0.3428, "rewards/accuracies": 0.875, "rewards/chosen": -0.7761585116386414, "rewards/margins": 1.5984714031219482, "rewards/rejected": -2.3746297359466553, "step": 5270 }, { "epoch": 0.61, "learning_rate": 1.1781032242825086e-07, "logits/chosen": -1.7500593662261963, "logits/rejected": -1.811596393585205, "logps/chosen": -409.02783203125, "logps/rejected": -427.439697265625, "loss": 0.3294, "rewards/accuracies": 0.875, "rewards/chosen": -0.22791367769241333, "rewards/margins": 2.1136672496795654, "rewards/rejected": -2.341580867767334, "step": 5271 }, { "epoch": 0.61, "learning_rate": 1.1777489075233258e-07, "logits/chosen": -2.2640557289123535, "logits/rejected": -2.283284902572632, "logps/chosen": -259.98687744140625, "logps/rejected": -206.06663513183594, "loss": 0.395, "rewards/accuracies": 0.75, "rewards/chosen": -0.5183029770851135, "rewards/margins": 3.388443946838379, "rewards/rejected": -3.9067466259002686, "step": 5272 }, { "epoch": 0.61, "learning_rate": 1.1773945907641431e-07, "logits/chosen": -2.5991086959838867, "logits/rejected": -2.5637495517730713, "logps/chosen": -248.1488800048828, "logps/rejected": -237.92465209960938, "loss": 0.4125, "rewards/accuracies": 0.875, "rewards/chosen": -0.9665053486824036, "rewards/margins": 1.0807572603225708, "rewards/rejected": -2.047262668609619, "step": 5273 }, { "epoch": 0.61, "learning_rate": 1.1770402740049604e-07, "logits/chosen": -2.4517123699188232, "logits/rejected": -2.3901944160461426, "logps/chosen": -272.49908447265625, "logps/rejected": -339.0950012207031, "loss": 0.4553, "rewards/accuracies": 0.875, "rewards/chosen": -0.5718115568161011, "rewards/margins": 3.2487692832946777, "rewards/rejected": -3.8205807209014893, "step": 5274 }, { "epoch": 0.61, "learning_rate": 1.1766859572457776e-07, "logits/chosen": -2.2701027393341064, "logits/rejected": -2.2160847187042236, "logps/chosen": -351.8022766113281, "logps/rejected": -298.6549072265625, "loss": 0.3985, "rewards/accuracies": 0.875, "rewards/chosen": -0.33423224091529846, "rewards/margins": 1.826180338859558, "rewards/rejected": -2.160412549972534, "step": 5275 }, { "epoch": 0.61, "learning_rate": 1.1763316404865949e-07, "logits/chosen": -2.407958984375, "logits/rejected": -2.7222139835357666, "logps/chosen": -481.1859130859375, "logps/rejected": -296.8026123046875, "loss": 0.53, "rewards/accuracies": 0.625, "rewards/chosen": -1.6143051385879517, "rewards/margins": 0.7701908349990845, "rewards/rejected": -2.3844962120056152, "step": 5276 }, { "epoch": 0.61, "learning_rate": 1.1759773237274123e-07, "logits/chosen": -2.577221632003784, "logits/rejected": -2.4440903663635254, "logps/chosen": -337.23126220703125, "logps/rejected": -292.03350830078125, "loss": 0.1974, "rewards/accuracies": 1.0, "rewards/chosen": -1.6447274684906006, "rewards/margins": 2.385129928588867, "rewards/rejected": -4.029857158660889, "step": 5277 }, { "epoch": 0.61, "learning_rate": 1.1756230069682296e-07, "logits/chosen": -2.5873584747314453, "logits/rejected": -2.8606417179107666, "logps/chosen": -229.9432373046875, "logps/rejected": -121.72816467285156, "loss": 0.2367, "rewards/accuracies": 1.0, "rewards/chosen": -0.17829251289367676, "rewards/margins": 2.6714322566986084, "rewards/rejected": -2.849724769592285, "step": 5278 }, { "epoch": 0.61, "learning_rate": 1.1752686902090469e-07, "logits/chosen": -2.697476863861084, "logits/rejected": -2.474026679992676, "logps/chosen": -229.49891662597656, "logps/rejected": -279.8469543457031, "loss": 0.3051, "rewards/accuracies": 0.875, "rewards/chosen": -0.5056782960891724, "rewards/margins": 2.6878819465637207, "rewards/rejected": -3.1935601234436035, "step": 5279 }, { "epoch": 0.61, "learning_rate": 1.1749143734498641e-07, "logits/chosen": -2.7505712509155273, "logits/rejected": -2.721304416656494, "logps/chosen": -376.27740478515625, "logps/rejected": -289.19873046875, "loss": 0.0963, "rewards/accuracies": 1.0, "rewards/chosen": -0.931423008441925, "rewards/margins": 3.0802342891693115, "rewards/rejected": -4.011657238006592, "step": 5280 }, { "epoch": 0.61, "learning_rate": 1.1745600566906813e-07, "logits/chosen": -2.03131103515625, "logits/rejected": -2.1894495487213135, "logps/chosen": -283.6620178222656, "logps/rejected": -214.67050170898438, "loss": 0.5055, "rewards/accuracies": 0.75, "rewards/chosen": -1.1678783893585205, "rewards/margins": 1.2625603675842285, "rewards/rejected": -2.430438756942749, "step": 5281 }, { "epoch": 0.61, "learning_rate": 1.1742057399314987e-07, "logits/chosen": -2.5550785064697266, "logits/rejected": -2.517813205718994, "logps/chosen": -139.17137145996094, "logps/rejected": -179.29153442382812, "loss": 0.3278, "rewards/accuracies": 0.75, "rewards/chosen": -1.7388405799865723, "rewards/margins": 2.288928508758545, "rewards/rejected": -4.027769088745117, "step": 5282 }, { "epoch": 0.61, "learning_rate": 1.173851423172316e-07, "logits/chosen": -2.7774856090545654, "logits/rejected": -2.6158766746520996, "logps/chosen": -198.04556274414062, "logps/rejected": -222.92657470703125, "loss": 0.2694, "rewards/accuracies": 0.75, "rewards/chosen": -0.6858325004577637, "rewards/margins": 2.2583394050598145, "rewards/rejected": -2.944171905517578, "step": 5283 }, { "epoch": 0.61, "learning_rate": 1.1734971064131334e-07, "logits/chosen": -2.2488937377929688, "logits/rejected": -2.3045594692230225, "logps/chosen": -491.4161071777344, "logps/rejected": -429.3929138183594, "loss": 0.3314, "rewards/accuracies": 0.875, "rewards/chosen": -0.27050426602363586, "rewards/margins": 1.6516382694244385, "rewards/rejected": -1.922142505645752, "step": 5284 }, { "epoch": 0.61, "learning_rate": 1.1731427896539506e-07, "logits/chosen": -1.9537928104400635, "logits/rejected": -1.8455679416656494, "logps/chosen": -270.1189270019531, "logps/rejected": -272.5382080078125, "loss": 0.3381, "rewards/accuracies": 0.875, "rewards/chosen": -0.603519856929779, "rewards/margins": 1.5209020376205444, "rewards/rejected": -2.1244218349456787, "step": 5285 }, { "epoch": 0.61, "learning_rate": 1.1727884728947678e-07, "logits/chosen": -2.286505699157715, "logits/rejected": -2.25192928314209, "logps/chosen": -357.8780517578125, "logps/rejected": -378.70257568359375, "loss": 0.2016, "rewards/accuracies": 0.875, "rewards/chosen": -0.5772948265075684, "rewards/margins": 3.2156221866607666, "rewards/rejected": -3.792917013168335, "step": 5286 }, { "epoch": 0.62, "learning_rate": 1.1724341561355852e-07, "logits/chosen": -2.4928858280181885, "logits/rejected": -2.6404378414154053, "logps/chosen": -260.2478332519531, "logps/rejected": -251.83070373535156, "loss": 0.2312, "rewards/accuracies": 0.875, "rewards/chosen": 0.005236834287643433, "rewards/margins": 3.4741201400756836, "rewards/rejected": -3.4688832759857178, "step": 5287 }, { "epoch": 0.62, "learning_rate": 1.1720798393764024e-07, "logits/chosen": -2.8182716369628906, "logits/rejected": -2.631560802459717, "logps/chosen": -380.1495666503906, "logps/rejected": -362.5925598144531, "loss": 0.1212, "rewards/accuracies": 1.0, "rewards/chosen": -0.05954378843307495, "rewards/margins": 4.238600730895996, "rewards/rejected": -4.298144817352295, "step": 5288 }, { "epoch": 0.62, "learning_rate": 1.1717255226172197e-07, "logits/chosen": -2.528200626373291, "logits/rejected": -2.6138134002685547, "logps/chosen": -261.32989501953125, "logps/rejected": -225.1515350341797, "loss": 0.7444, "rewards/accuracies": 0.875, "rewards/chosen": -1.2387347221374512, "rewards/margins": 2.447575092315674, "rewards/rejected": -3.686309576034546, "step": 5289 }, { "epoch": 0.62, "learning_rate": 1.1713712058580371e-07, "logits/chosen": -2.6036620140075684, "logits/rejected": -2.635847806930542, "logps/chosen": -143.15992736816406, "logps/rejected": -219.8861083984375, "loss": 0.2193, "rewards/accuracies": 0.875, "rewards/chosen": -0.3654455840587616, "rewards/margins": 2.343755006790161, "rewards/rejected": -2.709200859069824, "step": 5290 }, { "epoch": 0.62, "learning_rate": 1.1710168890988543e-07, "logits/chosen": -1.97577965259552, "logits/rejected": -2.262843132019043, "logps/chosen": -334.03173828125, "logps/rejected": -256.16815185546875, "loss": 0.4685, "rewards/accuracies": 0.875, "rewards/chosen": -0.5646648406982422, "rewards/margins": 1.7210431098937988, "rewards/rejected": -2.28570818901062, "step": 5291 }, { "epoch": 0.62, "learning_rate": 1.1706625723396715e-07, "logits/chosen": -2.8297572135925293, "logits/rejected": -2.787078619003296, "logps/chosen": -490.55078125, "logps/rejected": -354.1529235839844, "loss": 0.2165, "rewards/accuracies": 1.0, "rewards/chosen": -0.7940122485160828, "rewards/margins": 2.617335796356201, "rewards/rejected": -3.411348342895508, "step": 5292 }, { "epoch": 0.62, "learning_rate": 1.1703082555804889e-07, "logits/chosen": -1.3562649488449097, "logits/rejected": -1.8054382801055908, "logps/chosen": -468.336669921875, "logps/rejected": -263.1922607421875, "loss": 0.3449, "rewards/accuracies": 0.875, "rewards/chosen": -0.7919514179229736, "rewards/margins": 1.5936633348464966, "rewards/rejected": -2.3856146335601807, "step": 5293 }, { "epoch": 0.62, "learning_rate": 1.1699539388213062e-07, "logits/chosen": -2.2575552463531494, "logits/rejected": -2.2675092220306396, "logps/chosen": -193.53082275390625, "logps/rejected": -230.63601684570312, "loss": 0.4661, "rewards/accuracies": 0.5, "rewards/chosen": -1.1878875494003296, "rewards/margins": 1.904371738433838, "rewards/rejected": -3.092259407043457, "step": 5294 }, { "epoch": 0.62, "learning_rate": 1.1695996220621236e-07, "logits/chosen": -1.7577286958694458, "logits/rejected": -1.8983138799667358, "logps/chosen": -314.48284912109375, "logps/rejected": -276.42279052734375, "loss": 0.8828, "rewards/accuracies": 0.75, "rewards/chosen": -1.8643360137939453, "rewards/margins": 0.9514135718345642, "rewards/rejected": -2.8157496452331543, "step": 5295 }, { "epoch": 0.62, "learning_rate": 1.1692453053029408e-07, "logits/chosen": -2.4131877422332764, "logits/rejected": -2.2702126502990723, "logps/chosen": -124.94967651367188, "logps/rejected": -150.53201293945312, "loss": 0.6018, "rewards/accuracies": 0.875, "rewards/chosen": 0.1203969344496727, "rewards/margins": 0.7300107479095459, "rewards/rejected": -0.6096138954162598, "step": 5296 }, { "epoch": 0.62, "learning_rate": 1.168890988543758e-07, "logits/chosen": -2.5262932777404785, "logits/rejected": -2.503272771835327, "logps/chosen": -207.5669403076172, "logps/rejected": -263.85546875, "loss": 0.6127, "rewards/accuracies": 0.75, "rewards/chosen": -0.9905283451080322, "rewards/margins": 1.005749225616455, "rewards/rejected": -1.9962774515151978, "step": 5297 }, { "epoch": 0.62, "learning_rate": 1.1685366717845754e-07, "logits/chosen": -2.1826329231262207, "logits/rejected": -2.363375186920166, "logps/chosen": -406.6322021484375, "logps/rejected": -311.2193908691406, "loss": 0.1088, "rewards/accuracies": 1.0, "rewards/chosen": 0.057021260261535645, "rewards/margins": 2.5437793731689453, "rewards/rejected": -2.486758232116699, "step": 5298 }, { "epoch": 0.62, "learning_rate": 1.1681823550253926e-07, "logits/chosen": -2.3317127227783203, "logits/rejected": -2.0828559398651123, "logps/chosen": -120.66413879394531, "logps/rejected": -179.9330596923828, "loss": 0.3736, "rewards/accuracies": 1.0, "rewards/chosen": -0.6312776207923889, "rewards/margins": 1.0773752927780151, "rewards/rejected": -1.7086528539657593, "step": 5299 }, { "epoch": 0.62, "learning_rate": 1.16782803826621e-07, "logits/chosen": -2.1027679443359375, "logits/rejected": -2.2560501098632812, "logps/chosen": -353.9442138671875, "logps/rejected": -382.62567138671875, "loss": 0.7868, "rewards/accuracies": 0.75, "rewards/chosen": -0.801364541053772, "rewards/margins": 1.6687010526657104, "rewards/rejected": -2.4700655937194824, "step": 5300 }, { "epoch": 0.62, "learning_rate": 1.1674737215070273e-07, "logits/chosen": -1.9048185348510742, "logits/rejected": -2.0334906578063965, "logps/chosen": -267.0411376953125, "logps/rejected": -209.96096801757812, "loss": 0.6011, "rewards/accuracies": 0.5, "rewards/chosen": -0.6247832179069519, "rewards/margins": 1.2106143236160278, "rewards/rejected": -1.835397481918335, "step": 5301 }, { "epoch": 0.62, "learning_rate": 1.1671194047478445e-07, "logits/chosen": -2.2913894653320312, "logits/rejected": -2.5170986652374268, "logps/chosen": -397.20947265625, "logps/rejected": -285.99493408203125, "loss": 0.3304, "rewards/accuracies": 0.75, "rewards/chosen": -0.037906989455223083, "rewards/margins": 1.9353054761886597, "rewards/rejected": -1.973212480545044, "step": 5302 }, { "epoch": 0.62, "learning_rate": 1.1667650879886618e-07, "logits/chosen": -1.8038997650146484, "logits/rejected": -1.9182120561599731, "logps/chosen": -362.06390380859375, "logps/rejected": -321.1721496582031, "loss": 0.4378, "rewards/accuracies": 0.875, "rewards/chosen": -0.24410377442836761, "rewards/margins": 1.5396840572357178, "rewards/rejected": -1.7837879657745361, "step": 5303 }, { "epoch": 0.62, "learning_rate": 1.1664107712294791e-07, "logits/chosen": -2.421043872833252, "logits/rejected": -2.266037940979004, "logps/chosen": -261.90008544921875, "logps/rejected": -283.0332946777344, "loss": 0.4065, "rewards/accuracies": 0.75, "rewards/chosen": -0.12445847690105438, "rewards/margins": 2.1373724937438965, "rewards/rejected": -2.261831045150757, "step": 5304 }, { "epoch": 0.62, "learning_rate": 1.1660564544702963e-07, "logits/chosen": -2.56738018989563, "logits/rejected": -2.5271265506744385, "logps/chosen": -124.43252563476562, "logps/rejected": -237.01507568359375, "loss": 0.1104, "rewards/accuracies": 1.0, "rewards/chosen": -0.5398470163345337, "rewards/margins": 3.5363097190856934, "rewards/rejected": -4.0761566162109375, "step": 5305 }, { "epoch": 0.62, "learning_rate": 1.1657021377111138e-07, "logits/chosen": -2.570099353790283, "logits/rejected": -2.3443877696990967, "logps/chosen": -232.73184204101562, "logps/rejected": -283.84576416015625, "loss": 0.2621, "rewards/accuracies": 0.875, "rewards/chosen": -0.7346103191375732, "rewards/margins": 1.7430243492126465, "rewards/rejected": -2.4776346683502197, "step": 5306 }, { "epoch": 0.62, "learning_rate": 1.165347820951931e-07, "logits/chosen": -2.7462167739868164, "logits/rejected": -2.8138694763183594, "logps/chosen": -282.20477294921875, "logps/rejected": -305.8728942871094, "loss": 0.4351, "rewards/accuracies": 0.625, "rewards/chosen": -0.747832179069519, "rewards/margins": 1.5048048496246338, "rewards/rejected": -2.2526373863220215, "step": 5307 }, { "epoch": 0.62, "learning_rate": 1.1649935041927483e-07, "logits/chosen": -2.152541160583496, "logits/rejected": -2.078327178955078, "logps/chosen": -166.49655151367188, "logps/rejected": -204.6988067626953, "loss": 0.7467, "rewards/accuracies": 0.625, "rewards/chosen": -2.225314140319824, "rewards/margins": 0.4870728850364685, "rewards/rejected": -2.7123870849609375, "step": 5308 }, { "epoch": 0.62, "learning_rate": 1.1646391874335655e-07, "logits/chosen": -2.783792734146118, "logits/rejected": -2.323777914047241, "logps/chosen": -157.93177795410156, "logps/rejected": -353.5307922363281, "loss": 0.0901, "rewards/accuracies": 1.0, "rewards/chosen": -1.0466148853302002, "rewards/margins": 5.291600227355957, "rewards/rejected": -6.33821439743042, "step": 5309 }, { "epoch": 0.62, "learning_rate": 1.1642848706743828e-07, "logits/chosen": -2.063654899597168, "logits/rejected": -1.9952524900436401, "logps/chosen": -407.60308837890625, "logps/rejected": -301.52935791015625, "loss": 0.2581, "rewards/accuracies": 1.0, "rewards/chosen": -0.4817858338356018, "rewards/margins": 2.077347755432129, "rewards/rejected": -2.559133529663086, "step": 5310 }, { "epoch": 0.62, "learning_rate": 1.1639305539152e-07, "logits/chosen": -2.5035603046417236, "logits/rejected": -2.645695447921753, "logps/chosen": -445.1630859375, "logps/rejected": -417.775146484375, "loss": 0.5402, "rewards/accuracies": 0.625, "rewards/chosen": -1.149680256843567, "rewards/margins": 1.2361507415771484, "rewards/rejected": -2.385831117630005, "step": 5311 }, { "epoch": 0.62, "learning_rate": 1.1635762371560175e-07, "logits/chosen": -2.6594557762145996, "logits/rejected": -2.549384355545044, "logps/chosen": -248.6284637451172, "logps/rejected": -242.2784423828125, "loss": 0.3487, "rewards/accuracies": 0.875, "rewards/chosen": -1.7310099601745605, "rewards/margins": 1.6637134552001953, "rewards/rejected": -3.394723415374756, "step": 5312 }, { "epoch": 0.62, "learning_rate": 1.1632219203968348e-07, "logits/chosen": -2.1348092555999756, "logits/rejected": -2.3476319313049316, "logps/chosen": -274.33599853515625, "logps/rejected": -204.35475158691406, "loss": 0.8265, "rewards/accuracies": 0.5, "rewards/chosen": -1.228765845298767, "rewards/margins": 1.54368257522583, "rewards/rejected": -2.7724485397338867, "step": 5313 }, { "epoch": 0.62, "learning_rate": 1.162867603637652e-07, "logits/chosen": -2.0720951557159424, "logits/rejected": -2.3176915645599365, "logps/chosen": -274.685546875, "logps/rejected": -226.14480590820312, "loss": 0.3857, "rewards/accuracies": 0.875, "rewards/chosen": -0.49419498443603516, "rewards/margins": 1.8361411094665527, "rewards/rejected": -2.330336093902588, "step": 5314 }, { "epoch": 0.62, "learning_rate": 1.1625132868784693e-07, "logits/chosen": -1.8268955945968628, "logits/rejected": -2.127134323120117, "logps/chosen": -307.13525390625, "logps/rejected": -189.55226135253906, "loss": 0.3793, "rewards/accuracies": 0.75, "rewards/chosen": -0.5510401725769043, "rewards/margins": 2.268220901489258, "rewards/rejected": -2.819261074066162, "step": 5315 }, { "epoch": 0.62, "learning_rate": 1.1621589701192866e-07, "logits/chosen": -2.281550645828247, "logits/rejected": -1.7439478635787964, "logps/chosen": -218.48455810546875, "logps/rejected": -279.8666076660156, "loss": 0.5154, "rewards/accuracies": 0.875, "rewards/chosen": -1.0256143808364868, "rewards/margins": 1.0093443393707275, "rewards/rejected": -2.034958839416504, "step": 5316 }, { "epoch": 0.62, "learning_rate": 1.1618046533601038e-07, "logits/chosen": -1.5144336223602295, "logits/rejected": -1.6629301309585571, "logps/chosen": -289.74267578125, "logps/rejected": -322.62677001953125, "loss": 0.3827, "rewards/accuracies": 0.875, "rewards/chosen": -0.9496184587478638, "rewards/margins": 1.3883254528045654, "rewards/rejected": -2.3379440307617188, "step": 5317 }, { "epoch": 0.62, "learning_rate": 1.1614503366009213e-07, "logits/chosen": -2.073505401611328, "logits/rejected": -1.981796383857727, "logps/chosen": -271.1490478515625, "logps/rejected": -284.88458251953125, "loss": 0.2236, "rewards/accuracies": 0.875, "rewards/chosen": -0.7359215617179871, "rewards/margins": 2.2834086418151855, "rewards/rejected": -3.0193305015563965, "step": 5318 }, { "epoch": 0.62, "learning_rate": 1.1610960198417385e-07, "logits/chosen": -2.1382827758789062, "logits/rejected": -2.2655844688415527, "logps/chosen": -190.14688110351562, "logps/rejected": -262.853515625, "loss": 0.5129, "rewards/accuracies": 0.625, "rewards/chosen": -0.8454734086990356, "rewards/margins": 1.6533787250518799, "rewards/rejected": -2.498852252960205, "step": 5319 }, { "epoch": 0.62, "learning_rate": 1.1607417030825557e-07, "logits/chosen": -2.4889798164367676, "logits/rejected": -2.230797290802002, "logps/chosen": -135.45750427246094, "logps/rejected": -424.9656982421875, "loss": 0.1801, "rewards/accuracies": 1.0, "rewards/chosen": -0.34320545196533203, "rewards/margins": 2.677400827407837, "rewards/rejected": -3.02060604095459, "step": 5320 }, { "epoch": 0.62, "learning_rate": 1.160387386323373e-07, "logits/chosen": -2.0018177032470703, "logits/rejected": -1.8580577373504639, "logps/chosen": -344.46063232421875, "logps/rejected": -311.4071960449219, "loss": 0.4515, "rewards/accuracies": 0.875, "rewards/chosen": -0.644399106502533, "rewards/margins": 1.293992519378662, "rewards/rejected": -1.9383918046951294, "step": 5321 }, { "epoch": 0.62, "learning_rate": 1.1600330695641903e-07, "logits/chosen": -2.153836250305176, "logits/rejected": -2.3938827514648438, "logps/chosen": -326.6735534667969, "logps/rejected": -279.66485595703125, "loss": 0.2744, "rewards/accuracies": 0.875, "rewards/chosen": -1.3750455379486084, "rewards/margins": 1.6775996685028076, "rewards/rejected": -3.052645206451416, "step": 5322 }, { "epoch": 0.62, "learning_rate": 1.1596787528050075e-07, "logits/chosen": -2.844278335571289, "logits/rejected": -2.741501569747925, "logps/chosen": -130.61639404296875, "logps/rejected": -207.46424865722656, "loss": 0.667, "rewards/accuracies": 0.75, "rewards/chosen": -0.9724605083465576, "rewards/margins": 1.2323251962661743, "rewards/rejected": -2.2047853469848633, "step": 5323 }, { "epoch": 0.62, "learning_rate": 1.159324436045825e-07, "logits/chosen": -2.1201515197753906, "logits/rejected": -1.880948781967163, "logps/chosen": -336.17327880859375, "logps/rejected": -437.87567138671875, "loss": 0.7724, "rewards/accuracies": 0.625, "rewards/chosen": -0.5723652839660645, "rewards/margins": 1.1516891717910767, "rewards/rejected": -1.7240545749664307, "step": 5324 }, { "epoch": 0.62, "learning_rate": 1.1589701192866422e-07, "logits/chosen": -2.077421188354492, "logits/rejected": -2.178849697113037, "logps/chosen": -310.98297119140625, "logps/rejected": -265.6669616699219, "loss": 0.4356, "rewards/accuracies": 0.75, "rewards/chosen": -0.9609177112579346, "rewards/margins": 2.256821632385254, "rewards/rejected": -3.2177393436431885, "step": 5325 }, { "epoch": 0.62, "learning_rate": 1.1586158025274594e-07, "logits/chosen": -2.277005910873413, "logits/rejected": -2.312925338745117, "logps/chosen": -168.8188934326172, "logps/rejected": -220.8189697265625, "loss": 0.1608, "rewards/accuracies": 1.0, "rewards/chosen": -0.17282895743846893, "rewards/margins": 3.451234817504883, "rewards/rejected": -3.6240639686584473, "step": 5326 }, { "epoch": 0.62, "learning_rate": 1.1582614857682768e-07, "logits/chosen": -2.8460745811462402, "logits/rejected": -2.5967495441436768, "logps/chosen": -400.2037353515625, "logps/rejected": -283.1959228515625, "loss": 0.2978, "rewards/accuracies": 0.875, "rewards/chosen": -0.5235212445259094, "rewards/margins": 1.8115437030792236, "rewards/rejected": -2.3350648880004883, "step": 5327 }, { "epoch": 0.62, "learning_rate": 1.157907169009094e-07, "logits/chosen": -2.116426944732666, "logits/rejected": -2.1622731685638428, "logps/chosen": -385.31842041015625, "logps/rejected": -330.441650390625, "loss": 0.1049, "rewards/accuracies": 1.0, "rewards/chosen": -0.21976371109485626, "rewards/margins": 2.940495491027832, "rewards/rejected": -3.160259246826172, "step": 5328 }, { "epoch": 0.62, "learning_rate": 1.1575528522499115e-07, "logits/chosen": -1.7466893196105957, "logits/rejected": -2.0879921913146973, "logps/chosen": -270.5606689453125, "logps/rejected": -219.05068969726562, "loss": 1.85, "rewards/accuracies": 0.625, "rewards/chosen": -2.194166898727417, "rewards/margins": -0.014158755540847778, "rewards/rejected": -2.1800081729888916, "step": 5329 }, { "epoch": 0.62, "learning_rate": 1.1571985354907287e-07, "logits/chosen": -2.3867106437683105, "logits/rejected": -2.0094375610351562, "logps/chosen": -157.71710205078125, "logps/rejected": -297.6543273925781, "loss": 0.1551, "rewards/accuracies": 1.0, "rewards/chosen": -0.6314519643783569, "rewards/margins": 3.869785785675049, "rewards/rejected": -4.501237869262695, "step": 5330 }, { "epoch": 0.62, "learning_rate": 1.156844218731546e-07, "logits/chosen": -2.599961042404175, "logits/rejected": -2.5207571983337402, "logps/chosen": -123.40969848632812, "logps/rejected": -188.2261505126953, "loss": 0.44, "rewards/accuracies": 0.625, "rewards/chosen": -1.3310739994049072, "rewards/margins": 1.702453374862671, "rewards/rejected": -3.033527374267578, "step": 5331 }, { "epoch": 0.62, "learning_rate": 1.1564899019723633e-07, "logits/chosen": -1.8369178771972656, "logits/rejected": -1.98710036277771, "logps/chosen": -536.4494018554688, "logps/rejected": -325.2657775878906, "loss": 0.5485, "rewards/accuracies": 0.75, "rewards/chosen": -1.0740693807601929, "rewards/margins": 1.0516221523284912, "rewards/rejected": -2.1256914138793945, "step": 5332 }, { "epoch": 0.62, "learning_rate": 1.1561355852131805e-07, "logits/chosen": -2.590642213821411, "logits/rejected": -2.4282639026641846, "logps/chosen": -515.46826171875, "logps/rejected": -465.89794921875, "loss": 0.2467, "rewards/accuracies": 0.875, "rewards/chosen": -1.1489574909210205, "rewards/margins": 2.1513025760650635, "rewards/rejected": -3.300260305404663, "step": 5333 }, { "epoch": 0.62, "learning_rate": 1.1557812684539977e-07, "logits/chosen": -2.315420389175415, "logits/rejected": -2.304166316986084, "logps/chosen": -266.9661560058594, "logps/rejected": -252.50802612304688, "loss": 0.4396, "rewards/accuracies": 0.75, "rewards/chosen": -1.7433305978775024, "rewards/margins": 1.637261152267456, "rewards/rejected": -3.380591869354248, "step": 5334 }, { "epoch": 0.62, "learning_rate": 1.1554269516948152e-07, "logits/chosen": -2.2451415061950684, "logits/rejected": -2.2613637447357178, "logps/chosen": -338.59979248046875, "logps/rejected": -402.37005615234375, "loss": 0.6928, "rewards/accuracies": 0.75, "rewards/chosen": -0.9381811618804932, "rewards/margins": 2.0637519359588623, "rewards/rejected": -3.0019330978393555, "step": 5335 }, { "epoch": 0.62, "learning_rate": 1.1550726349356324e-07, "logits/chosen": -2.4758846759796143, "logits/rejected": -2.21002459526062, "logps/chosen": -422.95269775390625, "logps/rejected": -268.5511474609375, "loss": 0.532, "rewards/accuracies": 0.625, "rewards/chosen": -1.1272928714752197, "rewards/margins": 1.7336997985839844, "rewards/rejected": -2.860992908477783, "step": 5336 }, { "epoch": 0.62, "learning_rate": 1.1547183181764497e-07, "logits/chosen": -2.6345410346984863, "logits/rejected": -2.8673479557037354, "logps/chosen": -131.6870574951172, "logps/rejected": -170.91351318359375, "loss": 0.228, "rewards/accuracies": 0.875, "rewards/chosen": -1.1256211996078491, "rewards/margins": 2.353010654449463, "rewards/rejected": -3.4786319732666016, "step": 5337 }, { "epoch": 0.62, "learning_rate": 1.154364001417267e-07, "logits/chosen": -1.7189850807189941, "logits/rejected": -1.7581462860107422, "logps/chosen": -442.4598083496094, "logps/rejected": -451.1578674316406, "loss": 1.3066, "rewards/accuracies": 0.5, "rewards/chosen": -1.3435595035552979, "rewards/margins": -0.4952393174171448, "rewards/rejected": -0.8483201265335083, "step": 5338 }, { "epoch": 0.62, "learning_rate": 1.1540096846580842e-07, "logits/chosen": -2.337881565093994, "logits/rejected": -2.572523832321167, "logps/chosen": -165.08001708984375, "logps/rejected": -170.83181762695312, "loss": 0.2496, "rewards/accuracies": 1.0, "rewards/chosen": -0.2044268697500229, "rewards/margins": 2.4736313819885254, "rewards/rejected": -2.678058385848999, "step": 5339 }, { "epoch": 0.62, "learning_rate": 1.1536553678989015e-07, "logits/chosen": -2.501457929611206, "logits/rejected": -2.423488140106201, "logps/chosen": -285.71319580078125, "logps/rejected": -286.56060791015625, "loss": 0.2485, "rewards/accuracies": 0.875, "rewards/chosen": -0.6228606700897217, "rewards/margins": 3.6428256034851074, "rewards/rejected": -4.26568603515625, "step": 5340 }, { "epoch": 0.62, "learning_rate": 1.153301051139719e-07, "logits/chosen": -2.052064895629883, "logits/rejected": -2.06030011177063, "logps/chosen": -326.6309814453125, "logps/rejected": -293.8786315917969, "loss": 0.2024, "rewards/accuracies": 1.0, "rewards/chosen": -0.5786959528923035, "rewards/margins": 2.1206724643707275, "rewards/rejected": -2.699368715286255, "step": 5341 }, { "epoch": 0.62, "learning_rate": 1.1529467343805362e-07, "logits/chosen": -2.396091938018799, "logits/rejected": -2.5877866744995117, "logps/chosen": -362.53765869140625, "logps/rejected": -386.9617004394531, "loss": 0.3246, "rewards/accuracies": 0.75, "rewards/chosen": -0.3591059446334839, "rewards/margins": 2.194791316986084, "rewards/rejected": -2.5538971424102783, "step": 5342 }, { "epoch": 0.62, "learning_rate": 1.1525924176213535e-07, "logits/chosen": -2.252765655517578, "logits/rejected": -2.008424758911133, "logps/chosen": -246.1446075439453, "logps/rejected": -279.3450012207031, "loss": 0.1694, "rewards/accuracies": 1.0, "rewards/chosen": -0.4225243031978607, "rewards/margins": 2.2018089294433594, "rewards/rejected": -2.624333143234253, "step": 5343 }, { "epoch": 0.62, "learning_rate": 1.1522381008621707e-07, "logits/chosen": -2.8630661964416504, "logits/rejected": -2.850386142730713, "logps/chosen": -246.35342407226562, "logps/rejected": -225.23934936523438, "loss": 0.1342, "rewards/accuracies": 1.0, "rewards/chosen": -0.43753331899642944, "rewards/margins": 2.728153705596924, "rewards/rejected": -3.165687084197998, "step": 5344 }, { "epoch": 0.62, "learning_rate": 1.151883784102988e-07, "logits/chosen": -2.513031482696533, "logits/rejected": -2.0251822471618652, "logps/chosen": -69.95834350585938, "logps/rejected": -243.9511260986328, "loss": 0.3549, "rewards/accuracies": 0.75, "rewards/chosen": -0.5751447081565857, "rewards/margins": 2.300048351287842, "rewards/rejected": -2.8751931190490723, "step": 5345 }, { "epoch": 0.62, "learning_rate": 1.1515294673438052e-07, "logits/chosen": -2.6216540336608887, "logits/rejected": -2.7086193561553955, "logps/chosen": -268.7843017578125, "logps/rejected": -293.68548583984375, "loss": 0.281, "rewards/accuracies": 0.875, "rewards/chosen": 0.14242079854011536, "rewards/margins": 2.886920690536499, "rewards/rejected": -2.744499683380127, "step": 5346 }, { "epoch": 0.62, "learning_rate": 1.1511751505846227e-07, "logits/chosen": -2.2427892684936523, "logits/rejected": -2.435361385345459, "logps/chosen": -285.2143249511719, "logps/rejected": -184.26779174804688, "loss": 0.553, "rewards/accuracies": 0.625, "rewards/chosen": -1.1017125844955444, "rewards/margins": 1.1716386079788208, "rewards/rejected": -2.2733511924743652, "step": 5347 }, { "epoch": 0.62, "learning_rate": 1.1508208338254399e-07, "logits/chosen": -2.0735509395599365, "logits/rejected": -2.0336225032806396, "logps/chosen": -255.41664123535156, "logps/rejected": -267.1845397949219, "loss": 0.2048, "rewards/accuracies": 0.875, "rewards/chosen": -0.734046459197998, "rewards/margins": 2.8476364612579346, "rewards/rejected": -3.5816829204559326, "step": 5348 }, { "epoch": 0.62, "learning_rate": 1.1504665170662572e-07, "logits/chosen": -1.7602059841156006, "logits/rejected": -2.1081738471984863, "logps/chosen": -286.80865478515625, "logps/rejected": -250.80068969726562, "loss": 0.4324, "rewards/accuracies": 0.75, "rewards/chosen": -1.24528169631958, "rewards/margins": 2.0437862873077393, "rewards/rejected": -3.2890677452087402, "step": 5349 }, { "epoch": 0.62, "learning_rate": 1.1501122003070745e-07, "logits/chosen": -2.7501590251922607, "logits/rejected": -2.717268228530884, "logps/chosen": -131.63436889648438, "logps/rejected": -259.3874206542969, "loss": 0.4652, "rewards/accuracies": 0.625, "rewards/chosen": -0.49505820870399475, "rewards/margins": 1.6246461868286133, "rewards/rejected": -2.119704484939575, "step": 5350 }, { "epoch": 0.62, "learning_rate": 1.1497578835478917e-07, "logits/chosen": -2.369781494140625, "logits/rejected": -2.263546943664551, "logps/chosen": -168.73956298828125, "logps/rejected": -151.7049102783203, "loss": 0.351, "rewards/accuracies": 0.75, "rewards/chosen": -0.6330385208129883, "rewards/margins": 2.147521495819092, "rewards/rejected": -2.7805604934692383, "step": 5351 }, { "epoch": 0.62, "learning_rate": 1.149403566788709e-07, "logits/chosen": -2.0895814895629883, "logits/rejected": -2.2708892822265625, "logps/chosen": -183.67526245117188, "logps/rejected": -213.24270629882812, "loss": 1.1146, "rewards/accuracies": 0.625, "rewards/chosen": -2.0220611095428467, "rewards/margins": 1.0762057304382324, "rewards/rejected": -3.0982666015625, "step": 5352 }, { "epoch": 0.62, "learning_rate": 1.1490492500295264e-07, "logits/chosen": -1.9805409908294678, "logits/rejected": -2.4532768726348877, "logps/chosen": -137.63040161132812, "logps/rejected": -232.8541259765625, "loss": 0.1846, "rewards/accuracies": 0.875, "rewards/chosen": -0.9267908334732056, "rewards/margins": 3.1285951137542725, "rewards/rejected": -4.055385589599609, "step": 5353 }, { "epoch": 0.62, "learning_rate": 1.1486949332703436e-07, "logits/chosen": -2.632026433944702, "logits/rejected": -2.706714630126953, "logps/chosen": -106.12554931640625, "logps/rejected": -150.34532165527344, "loss": 0.2706, "rewards/accuracies": 0.875, "rewards/chosen": -0.8451471328735352, "rewards/margins": 2.107205390930176, "rewards/rejected": -2.952352285385132, "step": 5354 }, { "epoch": 0.62, "learning_rate": 1.148340616511161e-07, "logits/chosen": -2.433797597885132, "logits/rejected": -2.5247974395751953, "logps/chosen": -215.24429321289062, "logps/rejected": -258.6317138671875, "loss": 0.6516, "rewards/accuracies": 0.625, "rewards/chosen": -0.993907630443573, "rewards/margins": 1.2607725858688354, "rewards/rejected": -2.2546801567077637, "step": 5355 }, { "epoch": 0.62, "learning_rate": 1.1479862997519782e-07, "logits/chosen": -2.0873520374298096, "logits/rejected": -2.054530620574951, "logps/chosen": -311.9149475097656, "logps/rejected": -334.8275146484375, "loss": 1.0901, "rewards/accuracies": 0.5, "rewards/chosen": -1.218367099761963, "rewards/margins": 0.8420179486274719, "rewards/rejected": -2.06038498878479, "step": 5356 }, { "epoch": 0.62, "learning_rate": 1.1476319829927954e-07, "logits/chosen": -2.120436429977417, "logits/rejected": -2.16455078125, "logps/chosen": -395.1813049316406, "logps/rejected": -379.5257568359375, "loss": 0.2807, "rewards/accuracies": 0.875, "rewards/chosen": -0.1493827849626541, "rewards/margins": 2.4019041061401367, "rewards/rejected": -2.5512866973876953, "step": 5357 }, { "epoch": 0.62, "learning_rate": 1.1472776662336128e-07, "logits/chosen": -2.323474884033203, "logits/rejected": -2.125051498413086, "logps/chosen": -395.9935607910156, "logps/rejected": -270.6297302246094, "loss": 0.2705, "rewards/accuracies": 0.875, "rewards/chosen": -0.8298876285552979, "rewards/margins": 2.7952215671539307, "rewards/rejected": -3.6251096725463867, "step": 5358 }, { "epoch": 0.62, "learning_rate": 1.1469233494744301e-07, "logits/chosen": -2.578296184539795, "logits/rejected": -2.6547253131866455, "logps/chosen": -379.9794921875, "logps/rejected": -297.4342956542969, "loss": 0.2716, "rewards/accuracies": 0.875, "rewards/chosen": -0.788794755935669, "rewards/margins": 2.169572353363037, "rewards/rejected": -2.958367109298706, "step": 5359 }, { "epoch": 0.62, "learning_rate": 1.1465690327152475e-07, "logits/chosen": -2.479835033416748, "logits/rejected": -2.5383660793304443, "logps/chosen": -324.4447937011719, "logps/rejected": -273.3553771972656, "loss": 0.1848, "rewards/accuracies": 0.875, "rewards/chosen": -0.2654675245285034, "rewards/margins": 2.719896078109741, "rewards/rejected": -2.985363483428955, "step": 5360 }, { "epoch": 0.62, "learning_rate": 1.1462147159560647e-07, "logits/chosen": -2.1093058586120605, "logits/rejected": -2.2043099403381348, "logps/chosen": -362.1557922363281, "logps/rejected": -237.79739379882812, "loss": 0.9247, "rewards/accuracies": 0.75, "rewards/chosen": -1.7323381900787354, "rewards/margins": 1.5911972522735596, "rewards/rejected": -3.323535442352295, "step": 5361 }, { "epoch": 0.62, "learning_rate": 1.1458603991968819e-07, "logits/chosen": -2.0444445610046387, "logits/rejected": -2.354599952697754, "logps/chosen": -287.296630859375, "logps/rejected": -200.69656372070312, "loss": 0.4084, "rewards/accuracies": 0.875, "rewards/chosen": 0.05367937684059143, "rewards/margins": 1.4882746934890747, "rewards/rejected": -1.4345953464508057, "step": 5362 }, { "epoch": 0.62, "learning_rate": 1.1455060824376991e-07, "logits/chosen": -2.993025779724121, "logits/rejected": -2.957380771636963, "logps/chosen": -231.00228881835938, "logps/rejected": -284.85540771484375, "loss": 0.2742, "rewards/accuracies": 1.0, "rewards/chosen": -0.844107449054718, "rewards/margins": 2.6686673164367676, "rewards/rejected": -3.5127744674682617, "step": 5363 }, { "epoch": 0.62, "learning_rate": 1.1451517656785166e-07, "logits/chosen": -2.4452755451202393, "logits/rejected": -2.360866069793701, "logps/chosen": -216.28919982910156, "logps/rejected": -228.65545654296875, "loss": 0.2529, "rewards/accuracies": 0.875, "rewards/chosen": -0.7654356360435486, "rewards/margins": 1.808443307876587, "rewards/rejected": -2.5738790035247803, "step": 5364 }, { "epoch": 0.62, "learning_rate": 1.1447974489193338e-07, "logits/chosen": -2.5250790119171143, "logits/rejected": -2.3355093002319336, "logps/chosen": -147.6964569091797, "logps/rejected": -205.58584594726562, "loss": 0.5226, "rewards/accuracies": 0.625, "rewards/chosen": -2.055158853530884, "rewards/margins": 1.6498078107833862, "rewards/rejected": -3.7049665451049805, "step": 5365 }, { "epoch": 0.62, "learning_rate": 1.1444431321601512e-07, "logits/chosen": -2.4814791679382324, "logits/rejected": -2.82623028755188, "logps/chosen": -230.5892333984375, "logps/rejected": -118.79801940917969, "loss": 0.4237, "rewards/accuracies": 0.625, "rewards/chosen": -0.5208330154418945, "rewards/margins": 1.330108642578125, "rewards/rejected": -1.850941777229309, "step": 5366 }, { "epoch": 0.62, "learning_rate": 1.1440888154009684e-07, "logits/chosen": -1.7854995727539062, "logits/rejected": -2.1558923721313477, "logps/chosen": -419.88360595703125, "logps/rejected": -308.0103454589844, "loss": 0.7217, "rewards/accuracies": 0.375, "rewards/chosen": -0.8216978907585144, "rewards/margins": 0.766961932182312, "rewards/rejected": -1.5886598825454712, "step": 5367 }, { "epoch": 0.62, "learning_rate": 1.1437344986417856e-07, "logits/chosen": -2.8295204639434814, "logits/rejected": -2.5207693576812744, "logps/chosen": -311.2629089355469, "logps/rejected": -398.0264892578125, "loss": 0.2207, "rewards/accuracies": 1.0, "rewards/chosen": -1.0010223388671875, "rewards/margins": 2.2625441551208496, "rewards/rejected": -3.2635669708251953, "step": 5368 }, { "epoch": 0.62, "learning_rate": 1.143380181882603e-07, "logits/chosen": -2.4289450645446777, "logits/rejected": -2.4903650283813477, "logps/chosen": -229.8724822998047, "logps/rejected": -191.74612426757812, "loss": 0.5757, "rewards/accuracies": 0.75, "rewards/chosen": -2.333589553833008, "rewards/margins": 1.4477872848510742, "rewards/rejected": -3.7813773155212402, "step": 5369 }, { "epoch": 0.62, "learning_rate": 1.1430258651234203e-07, "logits/chosen": -2.1118698120117188, "logits/rejected": -2.22216796875, "logps/chosen": -283.0202331542969, "logps/rejected": -259.7262878417969, "loss": 0.4346, "rewards/accuracies": 0.75, "rewards/chosen": -0.9116078615188599, "rewards/margins": 2.2685108184814453, "rewards/rejected": -3.1801185607910156, "step": 5370 }, { "epoch": 0.62, "learning_rate": 1.1426715483642376e-07, "logits/chosen": -2.340806245803833, "logits/rejected": -2.7342162132263184, "logps/chosen": -266.09814453125, "logps/rejected": -190.0380859375, "loss": 0.4641, "rewards/accuracies": 0.75, "rewards/chosen": -0.6011068224906921, "rewards/margins": 0.9216456413269043, "rewards/rejected": -1.5227525234222412, "step": 5371 }, { "epoch": 0.62, "learning_rate": 1.1423172316050549e-07, "logits/chosen": -1.6209710836410522, "logits/rejected": -1.8452750444412231, "logps/chosen": -385.224853515625, "logps/rejected": -331.89556884765625, "loss": 0.4362, "rewards/accuracies": 0.875, "rewards/chosen": -0.883021354675293, "rewards/margins": 1.509251356124878, "rewards/rejected": -2.392272472381592, "step": 5372 }, { "epoch": 0.63, "learning_rate": 1.1419629148458721e-07, "logits/chosen": -2.1133692264556885, "logits/rejected": -2.2323427200317383, "logps/chosen": -227.40985107421875, "logps/rejected": -319.9500732421875, "loss": 0.316, "rewards/accuracies": 0.875, "rewards/chosen": -0.29349297285079956, "rewards/margins": 2.6873676776885986, "rewards/rejected": -2.980860710144043, "step": 5373 }, { "epoch": 0.63, "learning_rate": 1.1416085980866894e-07, "logits/chosen": -2.0857796669006348, "logits/rejected": -2.2653074264526367, "logps/chosen": -398.2340393066406, "logps/rejected": -244.95411682128906, "loss": 0.1929, "rewards/accuracies": 0.875, "rewards/chosen": -0.8428530693054199, "rewards/margins": 2.702859878540039, "rewards/rejected": -3.545713424682617, "step": 5374 }, { "epoch": 0.63, "learning_rate": 1.1412542813275067e-07, "logits/chosen": -2.481411933898926, "logits/rejected": -2.4078009128570557, "logps/chosen": -358.3394775390625, "logps/rejected": -267.542724609375, "loss": 0.2931, "rewards/accuracies": 0.875, "rewards/chosen": -0.43920016288757324, "rewards/margins": 1.5420889854431152, "rewards/rejected": -1.981289267539978, "step": 5375 }, { "epoch": 0.63, "learning_rate": 1.1408999645683241e-07, "logits/chosen": -2.0551159381866455, "logits/rejected": -1.7485780715942383, "logps/chosen": -185.59776306152344, "logps/rejected": -353.1546630859375, "loss": 0.2641, "rewards/accuracies": 0.875, "rewards/chosen": -0.36728566884994507, "rewards/margins": 3.1150283813476562, "rewards/rejected": -3.482314109802246, "step": 5376 }, { "epoch": 0.63, "learning_rate": 1.1405456478091414e-07, "logits/chosen": -2.3738317489624023, "logits/rejected": -2.4083411693573, "logps/chosen": -182.7656707763672, "logps/rejected": -162.0426788330078, "loss": 0.3529, "rewards/accuracies": 0.75, "rewards/chosen": -0.7681323885917664, "rewards/margins": 1.5419130325317383, "rewards/rejected": -2.3100454807281494, "step": 5377 }, { "epoch": 0.63, "learning_rate": 1.1401913310499586e-07, "logits/chosen": -1.8814460039138794, "logits/rejected": -2.2363131046295166, "logps/chosen": -481.83294677734375, "logps/rejected": -380.61981201171875, "loss": 0.3595, "rewards/accuracies": 0.875, "rewards/chosen": -0.4333607852458954, "rewards/margins": 2.334918260574341, "rewards/rejected": -2.7682790756225586, "step": 5378 }, { "epoch": 0.63, "learning_rate": 1.1398370142907759e-07, "logits/chosen": -2.8467555046081543, "logits/rejected": -2.5749404430389404, "logps/chosen": -247.470703125, "logps/rejected": -368.93798828125, "loss": 0.7379, "rewards/accuracies": 0.75, "rewards/chosen": -1.8185607194900513, "rewards/margins": 1.828495740890503, "rewards/rejected": -3.6470565795898438, "step": 5379 }, { "epoch": 0.63, "learning_rate": 1.1394826975315931e-07, "logits/chosen": -2.614248514175415, "logits/rejected": -2.2359120845794678, "logps/chosen": -119.18404388427734, "logps/rejected": -197.70083618164062, "loss": 0.2712, "rewards/accuracies": 0.75, "rewards/chosen": -1.0320305824279785, "rewards/margins": 1.892529845237732, "rewards/rejected": -2.924560546875, "step": 5380 }, { "epoch": 0.63, "learning_rate": 1.1391283807724104e-07, "logits/chosen": -2.738949775695801, "logits/rejected": -2.7817113399505615, "logps/chosen": -139.37057495117188, "logps/rejected": -153.03518676757812, "loss": 0.1708, "rewards/accuracies": 0.875, "rewards/chosen": -0.8323532342910767, "rewards/margins": 2.9041097164154053, "rewards/rejected": -3.7364625930786133, "step": 5381 }, { "epoch": 0.63, "learning_rate": 1.1387740640132278e-07, "logits/chosen": -2.021416187286377, "logits/rejected": -2.3910136222839355, "logps/chosen": -201.4613800048828, "logps/rejected": -187.7165069580078, "loss": 0.2727, "rewards/accuracies": 1.0, "rewards/chosen": -0.7954983115196228, "rewards/margins": 1.529793620109558, "rewards/rejected": -2.3252921104431152, "step": 5382 }, { "epoch": 0.63, "learning_rate": 1.1384197472540451e-07, "logits/chosen": -1.8371105194091797, "logits/rejected": -1.6156127452850342, "logps/chosen": -361.0088806152344, "logps/rejected": -434.0414733886719, "loss": 0.6038, "rewards/accuracies": 0.625, "rewards/chosen": -1.4817534685134888, "rewards/margins": 1.3600858449935913, "rewards/rejected": -2.84183931350708, "step": 5383 }, { "epoch": 0.63, "learning_rate": 1.1380654304948624e-07, "logits/chosen": -2.0803580284118652, "logits/rejected": -2.4836301803588867, "logps/chosen": -219.02294921875, "logps/rejected": -174.16793823242188, "loss": 1.8926, "rewards/accuracies": 0.625, "rewards/chosen": -2.1486542224884033, "rewards/margins": 0.286805123090744, "rewards/rejected": -2.4354593753814697, "step": 5384 }, { "epoch": 0.63, "learning_rate": 1.1377111137356796e-07, "logits/chosen": -2.406153440475464, "logits/rejected": -2.433340072631836, "logps/chosen": -366.7221984863281, "logps/rejected": -491.34649658203125, "loss": 0.4119, "rewards/accuracies": 0.75, "rewards/chosen": -0.5996214151382446, "rewards/margins": 2.9444174766540527, "rewards/rejected": -3.544039011001587, "step": 5385 }, { "epoch": 0.63, "learning_rate": 1.137356796976497e-07, "logits/chosen": -1.9369298219680786, "logits/rejected": -2.0869154930114746, "logps/chosen": -465.50823974609375, "logps/rejected": -350.197509765625, "loss": 0.2196, "rewards/accuracies": 1.0, "rewards/chosen": -0.16899700462818146, "rewards/margins": 1.9959421157836914, "rewards/rejected": -2.1649391651153564, "step": 5386 }, { "epoch": 0.63, "learning_rate": 1.1370024802173142e-07, "logits/chosen": -2.493149757385254, "logits/rejected": -2.368044853210449, "logps/chosen": -350.370849609375, "logps/rejected": -368.33697509765625, "loss": 0.2441, "rewards/accuracies": 0.875, "rewards/chosen": -0.27342501282691956, "rewards/margins": 3.7940433025360107, "rewards/rejected": -4.067468643188477, "step": 5387 }, { "epoch": 0.63, "learning_rate": 1.1366481634581315e-07, "logits/chosen": -2.1739561557769775, "logits/rejected": -2.380276918411255, "logps/chosen": -296.95556640625, "logps/rejected": -224.952880859375, "loss": 0.2964, "rewards/accuracies": 1.0, "rewards/chosen": -0.494283527135849, "rewards/margins": 1.690805435180664, "rewards/rejected": -2.185089111328125, "step": 5388 }, { "epoch": 0.63, "learning_rate": 1.1362938466989489e-07, "logits/chosen": -2.580578088760376, "logits/rejected": -2.5316686630249023, "logps/chosen": -344.95489501953125, "logps/rejected": -243.54953002929688, "loss": 0.1035, "rewards/accuracies": 1.0, "rewards/chosen": 0.030601661652326584, "rewards/margins": 2.894486427307129, "rewards/rejected": -2.863884925842285, "step": 5389 }, { "epoch": 0.63, "learning_rate": 1.1359395299397661e-07, "logits/chosen": -2.0239391326904297, "logits/rejected": -2.249756097793579, "logps/chosen": -359.32037353515625, "logps/rejected": -329.4820861816406, "loss": 0.2823, "rewards/accuracies": 0.875, "rewards/chosen": -0.6357448101043701, "rewards/margins": 3.2584493160247803, "rewards/rejected": -3.8941946029663086, "step": 5390 }, { "epoch": 0.63, "learning_rate": 1.1355852131805833e-07, "logits/chosen": -2.79663348197937, "logits/rejected": -2.8424994945526123, "logps/chosen": -147.08351135253906, "logps/rejected": -204.67236328125, "loss": 0.0668, "rewards/accuracies": 1.0, "rewards/chosen": 0.04837636649608612, "rewards/margins": 3.8691134452819824, "rewards/rejected": -3.8207366466522217, "step": 5391 }, { "epoch": 0.63, "learning_rate": 1.1352308964214007e-07, "logits/chosen": -2.0020720958709717, "logits/rejected": -2.313211441040039, "logps/chosen": -362.942626953125, "logps/rejected": -179.58518981933594, "loss": 0.4616, "rewards/accuracies": 0.75, "rewards/chosen": -0.9049264788627625, "rewards/margins": 2.499267101287842, "rewards/rejected": -3.404193878173828, "step": 5392 }, { "epoch": 0.63, "learning_rate": 1.1348765796622179e-07, "logits/chosen": -2.142202615737915, "logits/rejected": -2.2349166870117188, "logps/chosen": -232.10662841796875, "logps/rejected": -249.53184509277344, "loss": 0.3677, "rewards/accuracies": 0.75, "rewards/chosen": -0.8841705322265625, "rewards/margins": 1.855550765991211, "rewards/rejected": -2.7397212982177734, "step": 5393 }, { "epoch": 0.63, "learning_rate": 1.1345222629030354e-07, "logits/chosen": -2.0515010356903076, "logits/rejected": -2.346853256225586, "logps/chosen": -235.42884826660156, "logps/rejected": -293.098876953125, "loss": 0.2191, "rewards/accuracies": 0.875, "rewards/chosen": -1.009724497795105, "rewards/margins": 3.818309783935547, "rewards/rejected": -4.828034400939941, "step": 5394 }, { "epoch": 0.63, "learning_rate": 1.1341679461438526e-07, "logits/chosen": -2.0894134044647217, "logits/rejected": -2.0225439071655273, "logps/chosen": -408.7860107421875, "logps/rejected": -418.6668395996094, "loss": 0.1276, "rewards/accuracies": 1.0, "rewards/chosen": -0.567647397518158, "rewards/margins": 2.8327972888946533, "rewards/rejected": -3.400444507598877, "step": 5395 }, { "epoch": 0.63, "learning_rate": 1.1338136293846698e-07, "logits/chosen": -2.6005935668945312, "logits/rejected": -2.298593282699585, "logps/chosen": -203.59519958496094, "logps/rejected": -249.62623596191406, "loss": 0.1183, "rewards/accuracies": 1.0, "rewards/chosen": -0.8998126983642578, "rewards/margins": 3.222750186920166, "rewards/rejected": -4.122562885284424, "step": 5396 }, { "epoch": 0.63, "learning_rate": 1.1334593126254872e-07, "logits/chosen": -2.515056848526001, "logits/rejected": -2.775061845779419, "logps/chosen": -360.76275634765625, "logps/rejected": -348.619384765625, "loss": 0.3427, "rewards/accuracies": 0.75, "rewards/chosen": -1.2704813480377197, "rewards/margins": 2.6100900173187256, "rewards/rejected": -3.880571126937866, "step": 5397 }, { "epoch": 0.63, "learning_rate": 1.1331049958663044e-07, "logits/chosen": -1.8495633602142334, "logits/rejected": -2.4554667472839355, "logps/chosen": -394.687255859375, "logps/rejected": -199.52792358398438, "loss": 0.5357, "rewards/accuracies": 0.625, "rewards/chosen": -0.9220387935638428, "rewards/margins": 1.2433502674102783, "rewards/rejected": -2.165389060974121, "step": 5398 }, { "epoch": 0.63, "learning_rate": 1.1327506791071217e-07, "logits/chosen": -2.276494026184082, "logits/rejected": -2.0302937030792236, "logps/chosen": -332.1329650878906, "logps/rejected": -373.7216796875, "loss": 0.6003, "rewards/accuracies": 0.625, "rewards/chosen": -1.4945908784866333, "rewards/margins": 1.605769395828247, "rewards/rejected": -3.10036039352417, "step": 5399 }, { "epoch": 0.63, "learning_rate": 1.1323963623479391e-07, "logits/chosen": -2.040966272354126, "logits/rejected": -2.1161627769470215, "logps/chosen": -235.60594177246094, "logps/rejected": -411.2179870605469, "loss": 0.3609, "rewards/accuracies": 0.75, "rewards/chosen": -1.0819263458251953, "rewards/margins": 4.6390862464904785, "rewards/rejected": -5.721012115478516, "step": 5400 }, { "epoch": 0.63, "learning_rate": 1.1320420455887563e-07, "logits/chosen": -2.1681926250457764, "logits/rejected": -2.3577537536621094, "logps/chosen": -336.5526123046875, "logps/rejected": -242.6056671142578, "loss": 0.4116, "rewards/accuracies": 0.875, "rewards/chosen": -0.7304649353027344, "rewards/margins": 1.41405189037323, "rewards/rejected": -2.144516944885254, "step": 5401 }, { "epoch": 0.63, "learning_rate": 1.1316877288295735e-07, "logits/chosen": -2.0901708602905273, "logits/rejected": -2.241230010986328, "logps/chosen": -237.8018798828125, "logps/rejected": -264.5483093261719, "loss": 0.6419, "rewards/accuracies": 0.875, "rewards/chosen": -0.7429393529891968, "rewards/margins": 1.4400684833526611, "rewards/rejected": -2.1830077171325684, "step": 5402 }, { "epoch": 0.63, "learning_rate": 1.1313334120703909e-07, "logits/chosen": -2.0109751224517822, "logits/rejected": -1.8507510423660278, "logps/chosen": -588.3833618164062, "logps/rejected": -488.09747314453125, "loss": 0.2836, "rewards/accuracies": 0.875, "rewards/chosen": -0.5851837396621704, "rewards/margins": 2.9185261726379395, "rewards/rejected": -3.5037097930908203, "step": 5403 }, { "epoch": 0.63, "learning_rate": 1.1309790953112081e-07, "logits/chosen": -1.6815078258514404, "logits/rejected": -1.6843570470809937, "logps/chosen": -291.677490234375, "logps/rejected": -315.4087829589844, "loss": 0.3521, "rewards/accuracies": 0.875, "rewards/chosen": -0.5687878727912903, "rewards/margins": 3.07369065284729, "rewards/rejected": -3.6424784660339355, "step": 5404 }, { "epoch": 0.63, "learning_rate": 1.1306247785520256e-07, "logits/chosen": -2.3073229789733887, "logits/rejected": -2.3163983821868896, "logps/chosen": -257.4685363769531, "logps/rejected": -362.3294677734375, "loss": 0.3787, "rewards/accuracies": 0.875, "rewards/chosen": -1.429797887802124, "rewards/margins": 3.7340869903564453, "rewards/rejected": -5.163885116577148, "step": 5405 }, { "epoch": 0.63, "learning_rate": 1.1302704617928428e-07, "logits/chosen": -2.373509407043457, "logits/rejected": -2.4976487159729004, "logps/chosen": -139.86788940429688, "logps/rejected": -168.26275634765625, "loss": 0.3271, "rewards/accuracies": 1.0, "rewards/chosen": -0.5603963136672974, "rewards/margins": 1.969430923461914, "rewards/rejected": -2.529827117919922, "step": 5406 }, { "epoch": 0.63, "learning_rate": 1.12991614503366e-07, "logits/chosen": -2.0953168869018555, "logits/rejected": -2.235417366027832, "logps/chosen": -419.89447021484375, "logps/rejected": -318.45135498046875, "loss": 0.291, "rewards/accuracies": 0.75, "rewards/chosen": -1.5837041139602661, "rewards/margins": 2.368928909301758, "rewards/rejected": -3.9526329040527344, "step": 5407 }, { "epoch": 0.63, "learning_rate": 1.1295618282744773e-07, "logits/chosen": -1.8292272090911865, "logits/rejected": -1.7883622646331787, "logps/chosen": -264.42822265625, "logps/rejected": -367.25616455078125, "loss": 0.3194, "rewards/accuracies": 0.875, "rewards/chosen": -1.0295547246932983, "rewards/margins": 2.5533621311187744, "rewards/rejected": -3.582916736602783, "step": 5408 }, { "epoch": 0.63, "learning_rate": 1.1292075115152946e-07, "logits/chosen": -2.1756949424743652, "logits/rejected": -2.1853675842285156, "logps/chosen": -153.75531005859375, "logps/rejected": -197.07398986816406, "loss": 0.1391, "rewards/accuracies": 1.0, "rewards/chosen": -0.16343599557876587, "rewards/margins": 3.074493408203125, "rewards/rejected": -3.237929344177246, "step": 5409 }, { "epoch": 0.63, "learning_rate": 1.1288531947561118e-07, "logits/chosen": -1.6149758100509644, "logits/rejected": -1.8730405569076538, "logps/chosen": -320.65069580078125, "logps/rejected": -290.70599365234375, "loss": 0.2609, "rewards/accuracies": 0.875, "rewards/chosen": -0.4795359969139099, "rewards/margins": 2.247673511505127, "rewards/rejected": -2.7272095680236816, "step": 5410 }, { "epoch": 0.63, "learning_rate": 1.1284988779969293e-07, "logits/chosen": -2.6699986457824707, "logits/rejected": -2.7237260341644287, "logps/chosen": -154.31924438476562, "logps/rejected": -239.70318603515625, "loss": 0.6771, "rewards/accuracies": 0.75, "rewards/chosen": -0.8081600069999695, "rewards/margins": 1.3225332498550415, "rewards/rejected": -2.1306934356689453, "step": 5411 }, { "epoch": 0.63, "learning_rate": 1.1281445612377465e-07, "logits/chosen": -2.8297481536865234, "logits/rejected": -2.570286273956299, "logps/chosen": -394.79156494140625, "logps/rejected": -231.51202392578125, "loss": 0.3715, "rewards/accuracies": 0.75, "rewards/chosen": -0.9974328279495239, "rewards/margins": 2.729815721511841, "rewards/rejected": -3.7272486686706543, "step": 5412 }, { "epoch": 0.63, "learning_rate": 1.1277902444785638e-07, "logits/chosen": -2.1524434089660645, "logits/rejected": -2.500807762145996, "logps/chosen": -462.39031982421875, "logps/rejected": -333.8258972167969, "loss": 0.1007, "rewards/accuracies": 1.0, "rewards/chosen": 0.07131797075271606, "rewards/margins": 3.22157883644104, "rewards/rejected": -3.150261163711548, "step": 5413 }, { "epoch": 0.63, "learning_rate": 1.1274359277193811e-07, "logits/chosen": -2.469902276992798, "logits/rejected": -2.6426360607147217, "logps/chosen": -257.4925842285156, "logps/rejected": -265.8251647949219, "loss": 0.1432, "rewards/accuracies": 1.0, "rewards/chosen": -0.9830567240715027, "rewards/margins": 3.1951873302459717, "rewards/rejected": -4.178244113922119, "step": 5414 }, { "epoch": 0.63, "learning_rate": 1.1270816109601983e-07, "logits/chosen": -2.209501266479492, "logits/rejected": -2.2544641494750977, "logps/chosen": -356.9164733886719, "logps/rejected": -228.82635498046875, "loss": 0.4904, "rewards/accuracies": 0.625, "rewards/chosen": -1.6333261728286743, "rewards/margins": 1.3822753429412842, "rewards/rejected": -3.015601396560669, "step": 5415 }, { "epoch": 0.63, "learning_rate": 1.1267272942010156e-07, "logits/chosen": -2.226491689682007, "logits/rejected": -2.109182596206665, "logps/chosen": -495.8923034667969, "logps/rejected": -380.00665283203125, "loss": 0.1993, "rewards/accuracies": 1.0, "rewards/chosen": -1.036454200744629, "rewards/margins": 2.5797996520996094, "rewards/rejected": -3.6162538528442383, "step": 5416 }, { "epoch": 0.63, "learning_rate": 1.126372977441833e-07, "logits/chosen": -2.778865098953247, "logits/rejected": -2.6349055767059326, "logps/chosen": -369.8677978515625, "logps/rejected": -348.53680419921875, "loss": 0.2531, "rewards/accuracies": 0.875, "rewards/chosen": 0.1774243712425232, "rewards/margins": 2.1286141872406006, "rewards/rejected": -1.9511901140213013, "step": 5417 }, { "epoch": 0.63, "learning_rate": 1.1260186606826503e-07, "logits/chosen": -2.0492913722991943, "logits/rejected": -2.1439034938812256, "logps/chosen": -493.8384094238281, "logps/rejected": -403.1107177734375, "loss": 0.109, "rewards/accuracies": 1.0, "rewards/chosen": -0.3148505985736847, "rewards/margins": 3.4577407836914062, "rewards/rejected": -3.7725913524627686, "step": 5418 }, { "epoch": 0.63, "learning_rate": 1.1256643439234675e-07, "logits/chosen": -2.8936779499053955, "logits/rejected": -2.8470215797424316, "logps/chosen": -130.54893493652344, "logps/rejected": -114.22411346435547, "loss": 0.087, "rewards/accuracies": 1.0, "rewards/chosen": -0.03555231913924217, "rewards/margins": 3.0255603790283203, "rewards/rejected": -3.061112403869629, "step": 5419 }, { "epoch": 0.63, "learning_rate": 1.1253100271642848e-07, "logits/chosen": -2.0061471462249756, "logits/rejected": -2.160956859588623, "logps/chosen": -256.5695495605469, "logps/rejected": -172.8482666015625, "loss": 0.7687, "rewards/accuracies": 0.625, "rewards/chosen": -0.8914706110954285, "rewards/margins": 0.7966321706771851, "rewards/rejected": -1.6881027221679688, "step": 5420 }, { "epoch": 0.63, "learning_rate": 1.1249557104051021e-07, "logits/chosen": -2.3844470977783203, "logits/rejected": -2.846339225769043, "logps/chosen": -342.1619567871094, "logps/rejected": -225.065185546875, "loss": 0.3459, "rewards/accuracies": 0.875, "rewards/chosen": -0.359169065952301, "rewards/margins": 2.478038787841797, "rewards/rejected": -2.837207794189453, "step": 5421 }, { "epoch": 0.63, "learning_rate": 1.1246013936459193e-07, "logits/chosen": -2.83151912689209, "logits/rejected": -2.6686532497406006, "logps/chosen": -191.8907928466797, "logps/rejected": -158.0035400390625, "loss": 0.6158, "rewards/accuracies": 0.875, "rewards/chosen": -0.5885133147239685, "rewards/margins": 1.7286145687103271, "rewards/rejected": -2.3171279430389404, "step": 5422 }, { "epoch": 0.63, "learning_rate": 1.1242470768867368e-07, "logits/chosen": -2.2413179874420166, "logits/rejected": -2.761350393295288, "logps/chosen": -342.164306640625, "logps/rejected": -247.7933349609375, "loss": 0.6766, "rewards/accuracies": 0.625, "rewards/chosen": -1.2648792266845703, "rewards/margins": 1.9464848041534424, "rewards/rejected": -3.2113640308380127, "step": 5423 }, { "epoch": 0.63, "learning_rate": 1.123892760127554e-07, "logits/chosen": -1.7190141677856445, "logits/rejected": -1.9828202724456787, "logps/chosen": -363.981201171875, "logps/rejected": -188.7792510986328, "loss": 0.5835, "rewards/accuracies": 0.75, "rewards/chosen": -1.7304027080535889, "rewards/margins": 0.7001680135726929, "rewards/rejected": -2.430570602416992, "step": 5424 }, { "epoch": 0.63, "learning_rate": 1.1235384433683712e-07, "logits/chosen": -1.874596357345581, "logits/rejected": -2.2107901573181152, "logps/chosen": -298.2101745605469, "logps/rejected": -197.6917724609375, "loss": 0.3815, "rewards/accuracies": 0.875, "rewards/chosen": -0.7896029353141785, "rewards/margins": 1.0445470809936523, "rewards/rejected": -1.8341500759124756, "step": 5425 }, { "epoch": 0.63, "learning_rate": 1.1231841266091886e-07, "logits/chosen": -2.7808456420898438, "logits/rejected": -2.778216600418091, "logps/chosen": -176.84768676757812, "logps/rejected": -347.4643249511719, "loss": 0.3397, "rewards/accuracies": 0.875, "rewards/chosen": -1.0814919471740723, "rewards/margins": 2.9159164428710938, "rewards/rejected": -3.997408390045166, "step": 5426 }, { "epoch": 0.63, "learning_rate": 1.1228298098500058e-07, "logits/chosen": -2.180626392364502, "logits/rejected": -2.1392223834991455, "logps/chosen": -284.5223083496094, "logps/rejected": -383.3424987792969, "loss": 0.5376, "rewards/accuracies": 0.75, "rewards/chosen": -0.9218234419822693, "rewards/margins": 1.339971899986267, "rewards/rejected": -2.2617955207824707, "step": 5427 }, { "epoch": 0.63, "learning_rate": 1.122475493090823e-07, "logits/chosen": -2.175630569458008, "logits/rejected": -2.191176652908325, "logps/chosen": -627.421142578125, "logps/rejected": -517.9371948242188, "loss": 0.5195, "rewards/accuracies": 0.875, "rewards/chosen": -0.9638887643814087, "rewards/margins": 0.7159367799758911, "rewards/rejected": -1.6798255443572998, "step": 5428 }, { "epoch": 0.63, "learning_rate": 1.1221211763316405e-07, "logits/chosen": -2.5000226497650146, "logits/rejected": -2.5190014839172363, "logps/chosen": -336.8921813964844, "logps/rejected": -275.81488037109375, "loss": 0.6511, "rewards/accuracies": 0.625, "rewards/chosen": -1.1737866401672363, "rewards/margins": 0.8599332571029663, "rewards/rejected": -2.033719778060913, "step": 5429 }, { "epoch": 0.63, "learning_rate": 1.1217668595724577e-07, "logits/chosen": -1.9417145252227783, "logits/rejected": -2.311255931854248, "logps/chosen": -347.3506774902344, "logps/rejected": -291.76861572265625, "loss": 0.9648, "rewards/accuracies": 0.5, "rewards/chosen": -1.4313206672668457, "rewards/margins": 0.6953222751617432, "rewards/rejected": -2.126643180847168, "step": 5430 }, { "epoch": 0.63, "learning_rate": 1.1214125428132751e-07, "logits/chosen": -2.7482590675354004, "logits/rejected": -2.6745455265045166, "logps/chosen": -244.3821258544922, "logps/rejected": -247.88552856445312, "loss": 0.7947, "rewards/accuracies": 0.5, "rewards/chosen": -2.0783932209014893, "rewards/margins": 0.59649258852005, "rewards/rejected": -2.6748857498168945, "step": 5431 }, { "epoch": 0.63, "learning_rate": 1.1210582260540923e-07, "logits/chosen": -2.320410966873169, "logits/rejected": -2.2884135246276855, "logps/chosen": -286.5833740234375, "logps/rejected": -294.120849609375, "loss": 0.1207, "rewards/accuracies": 1.0, "rewards/chosen": -0.4949522614479065, "rewards/margins": 3.1444108486175537, "rewards/rejected": -3.6393628120422363, "step": 5432 }, { "epoch": 0.63, "learning_rate": 1.1207039092949095e-07, "logits/chosen": -1.7404357194900513, "logits/rejected": -2.2082605361938477, "logps/chosen": -524.9677734375, "logps/rejected": -310.6058349609375, "loss": 0.3667, "rewards/accuracies": 0.875, "rewards/chosen": -0.4941253066062927, "rewards/margins": 1.6549936532974243, "rewards/rejected": -2.1491191387176514, "step": 5433 }, { "epoch": 0.63, "learning_rate": 1.1203495925357269e-07, "logits/chosen": -2.3483726978302, "logits/rejected": -2.4620578289031982, "logps/chosen": -321.5094909667969, "logps/rejected": -308.6068420410156, "loss": 0.4426, "rewards/accuracies": 0.75, "rewards/chosen": -1.0267605781555176, "rewards/margins": 1.8645309209823608, "rewards/rejected": -2.891291379928589, "step": 5434 }, { "epoch": 0.63, "learning_rate": 1.1199952757765442e-07, "logits/chosen": -2.545304536819458, "logits/rejected": -2.7126946449279785, "logps/chosen": -222.67269897460938, "logps/rejected": -187.9613800048828, "loss": 0.4663, "rewards/accuracies": 0.875, "rewards/chosen": -1.3508564233779907, "rewards/margins": 1.007794737815857, "rewards/rejected": -2.3586511611938477, "step": 5435 }, { "epoch": 0.63, "learning_rate": 1.1196409590173614e-07, "logits/chosen": -2.379878520965576, "logits/rejected": -2.296349048614502, "logps/chosen": -228.67666625976562, "logps/rejected": -329.12542724609375, "loss": 0.3111, "rewards/accuracies": 0.875, "rewards/chosen": -0.35086387395858765, "rewards/margins": 2.5683939456939697, "rewards/rejected": -2.919257640838623, "step": 5436 }, { "epoch": 0.63, "learning_rate": 1.1192866422581788e-07, "logits/chosen": -2.834955930709839, "logits/rejected": -2.7682485580444336, "logps/chosen": -217.00167846679688, "logps/rejected": -240.8406219482422, "loss": 0.2707, "rewards/accuracies": 0.875, "rewards/chosen": -0.47074994444847107, "rewards/margins": 2.787675380706787, "rewards/rejected": -3.258425235748291, "step": 5437 }, { "epoch": 0.63, "learning_rate": 1.118932325498996e-07, "logits/chosen": -2.4867069721221924, "logits/rejected": -2.7169923782348633, "logps/chosen": -194.88238525390625, "logps/rejected": -220.4710693359375, "loss": 0.3502, "rewards/accuracies": 0.75, "rewards/chosen": -0.4139651358127594, "rewards/margins": 2.512171983718872, "rewards/rejected": -2.9261372089385986, "step": 5438 }, { "epoch": 0.63, "learning_rate": 1.1185780087398132e-07, "logits/chosen": -2.7698581218719482, "logits/rejected": -2.8102030754089355, "logps/chosen": -82.76570129394531, "logps/rejected": -112.25534057617188, "loss": 0.4989, "rewards/accuracies": 0.625, "rewards/chosen": -1.0793108940124512, "rewards/margins": 1.9017988443374634, "rewards/rejected": -2.981109857559204, "step": 5439 }, { "epoch": 0.63, "learning_rate": 1.1182236919806307e-07, "logits/chosen": -1.9212427139282227, "logits/rejected": -2.1138436794281006, "logps/chosen": -346.54022216796875, "logps/rejected": -307.0097351074219, "loss": 0.2654, "rewards/accuracies": 0.875, "rewards/chosen": -0.5529037714004517, "rewards/margins": 3.094998359680176, "rewards/rejected": -3.647902011871338, "step": 5440 }, { "epoch": 0.63, "learning_rate": 1.117869375221448e-07, "logits/chosen": -2.2420120239257812, "logits/rejected": -2.269819974899292, "logps/chosen": -265.33056640625, "logps/rejected": -277.6573486328125, "loss": 0.3977, "rewards/accuracies": 0.875, "rewards/chosen": -0.8846417665481567, "rewards/margins": 1.7760423421859741, "rewards/rejected": -2.660684108734131, "step": 5441 }, { "epoch": 0.63, "learning_rate": 1.1175150584622652e-07, "logits/chosen": -2.556025743484497, "logits/rejected": -2.637547016143799, "logps/chosen": -253.7698516845703, "logps/rejected": -300.7780456542969, "loss": 0.2951, "rewards/accuracies": 0.75, "rewards/chosen": -0.7236975431442261, "rewards/margins": 2.6017839908599854, "rewards/rejected": -3.325481414794922, "step": 5442 }, { "epoch": 0.63, "learning_rate": 1.1171607417030825e-07, "logits/chosen": -2.074341058731079, "logits/rejected": -2.234650135040283, "logps/chosen": -247.71060180664062, "logps/rejected": -355.0132751464844, "loss": 0.6224, "rewards/accuracies": 0.875, "rewards/chosen": -1.0118634700775146, "rewards/margins": 0.9492691159248352, "rewards/rejected": -1.9611326456069946, "step": 5443 }, { "epoch": 0.63, "learning_rate": 1.1168064249438997e-07, "logits/chosen": -2.4312615394592285, "logits/rejected": -2.1841607093811035, "logps/chosen": -279.50384521484375, "logps/rejected": -262.96697998046875, "loss": 0.5533, "rewards/accuracies": 0.75, "rewards/chosen": -1.513006329536438, "rewards/margins": 2.134632110595703, "rewards/rejected": -3.6476383209228516, "step": 5444 }, { "epoch": 0.63, "learning_rate": 1.116452108184717e-07, "logits/chosen": -1.9913761615753174, "logits/rejected": -1.974186897277832, "logps/chosen": -248.65628051757812, "logps/rejected": -302.9364318847656, "loss": 0.6167, "rewards/accuracies": 0.625, "rewards/chosen": -0.9924798011779785, "rewards/margins": 0.5923253893852234, "rewards/rejected": -1.5848052501678467, "step": 5445 }, { "epoch": 0.63, "learning_rate": 1.1160977914255345e-07, "logits/chosen": -2.268012046813965, "logits/rejected": -2.5050535202026367, "logps/chosen": -206.32081604003906, "logps/rejected": -157.01922607421875, "loss": 2.6615, "rewards/accuracies": 0.625, "rewards/chosen": -2.460832118988037, "rewards/margins": -0.7061963081359863, "rewards/rejected": -1.7546356916427612, "step": 5446 }, { "epoch": 0.63, "learning_rate": 1.1157434746663517e-07, "logits/chosen": -2.5190646648406982, "logits/rejected": -2.529682159423828, "logps/chosen": -272.3499450683594, "logps/rejected": -266.38677978515625, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": -0.6423850655555725, "rewards/margins": 3.420186996459961, "rewards/rejected": -4.062572479248047, "step": 5447 }, { "epoch": 0.63, "learning_rate": 1.115389157907169e-07, "logits/chosen": -2.2268686294555664, "logits/rejected": -2.3013153076171875, "logps/chosen": -111.85850524902344, "logps/rejected": -167.75538635253906, "loss": 0.2708, "rewards/accuracies": 0.875, "rewards/chosen": -0.83359295129776, "rewards/margins": 2.4300522804260254, "rewards/rejected": -3.2636451721191406, "step": 5448 }, { "epoch": 0.63, "learning_rate": 1.1150348411479862e-07, "logits/chosen": -2.4390482902526855, "logits/rejected": -2.3662052154541016, "logps/chosen": -425.13458251953125, "logps/rejected": -272.8174743652344, "loss": 0.6509, "rewards/accuracies": 0.875, "rewards/chosen": -1.631458044052124, "rewards/margins": 1.2138640880584717, "rewards/rejected": -2.8453221321105957, "step": 5449 }, { "epoch": 0.63, "learning_rate": 1.1146805243888035e-07, "logits/chosen": -2.485001802444458, "logits/rejected": -2.166154623031616, "logps/chosen": -258.07470703125, "logps/rejected": -368.3575744628906, "loss": 0.4991, "rewards/accuracies": 0.75, "rewards/chosen": -1.0456717014312744, "rewards/margins": 1.4731897115707397, "rewards/rejected": -2.5188612937927246, "step": 5450 }, { "epoch": 0.63, "learning_rate": 1.1143262076296208e-07, "logits/chosen": -2.7795093059539795, "logits/rejected": -2.6864068508148193, "logps/chosen": -230.74053955078125, "logps/rejected": -125.32540893554688, "loss": 0.293, "rewards/accuracies": 1.0, "rewards/chosen": -0.7749330997467041, "rewards/margins": 1.314659595489502, "rewards/rejected": -2.089592695236206, "step": 5451 }, { "epoch": 0.63, "learning_rate": 1.1139718908704382e-07, "logits/chosen": -2.4311282634735107, "logits/rejected": -2.4431800842285156, "logps/chosen": -249.26010131835938, "logps/rejected": -240.43768310546875, "loss": 0.4141, "rewards/accuracies": 0.75, "rewards/chosen": -0.9348114132881165, "rewards/margins": 3.031147003173828, "rewards/rejected": -3.9659583568573, "step": 5452 }, { "epoch": 0.63, "learning_rate": 1.1136175741112554e-07, "logits/chosen": -1.9503041505813599, "logits/rejected": -1.8971867561340332, "logps/chosen": -461.7470703125, "logps/rejected": -529.012451171875, "loss": 0.4095, "rewards/accuracies": 0.75, "rewards/chosen": -1.211175560951233, "rewards/margins": 2.6800379753112793, "rewards/rejected": -3.8912134170532227, "step": 5453 }, { "epoch": 0.63, "learning_rate": 1.1132632573520728e-07, "logits/chosen": -1.9389369487762451, "logits/rejected": -2.486764430999756, "logps/chosen": -748.1295166015625, "logps/rejected": -273.95941162109375, "loss": 0.3546, "rewards/accuracies": 0.875, "rewards/chosen": -0.7899062633514404, "rewards/margins": 1.5206000804901123, "rewards/rejected": -2.3105063438415527, "step": 5454 }, { "epoch": 0.63, "learning_rate": 1.11290894059289e-07, "logits/chosen": -2.1409292221069336, "logits/rejected": -2.108003616333008, "logps/chosen": -159.531494140625, "logps/rejected": -155.24563598632812, "loss": 0.1981, "rewards/accuracies": 0.875, "rewards/chosen": -0.7198622226715088, "rewards/margins": 2.275578737258911, "rewards/rejected": -2.99544095993042, "step": 5455 }, { "epoch": 0.63, "learning_rate": 1.1125546238337072e-07, "logits/chosen": -2.3768603801727295, "logits/rejected": -2.7208669185638428, "logps/chosen": -353.31585693359375, "logps/rejected": -333.7634582519531, "loss": 0.6581, "rewards/accuracies": 0.75, "rewards/chosen": -1.1247022151947021, "rewards/margins": 2.156001091003418, "rewards/rejected": -3.280703544616699, "step": 5456 }, { "epoch": 0.63, "learning_rate": 1.1122003070745245e-07, "logits/chosen": -2.0702431201934814, "logits/rejected": -1.9071733951568604, "logps/chosen": -247.37591552734375, "logps/rejected": -248.1339111328125, "loss": 0.1469, "rewards/accuracies": 1.0, "rewards/chosen": -0.3598642647266388, "rewards/margins": 2.4316556453704834, "rewards/rejected": -2.7915198802948, "step": 5457 }, { "epoch": 0.63, "learning_rate": 1.1118459903153419e-07, "logits/chosen": -2.339940071105957, "logits/rejected": -2.6672112941741943, "logps/chosen": -303.1612548828125, "logps/rejected": -202.21490478515625, "loss": 0.397, "rewards/accuracies": 0.875, "rewards/chosen": -1.3011424541473389, "rewards/margins": 1.1551721096038818, "rewards/rejected": -2.4563148021698, "step": 5458 }, { "epoch": 0.64, "learning_rate": 1.1114916735561593e-07, "logits/chosen": -2.4008448123931885, "logits/rejected": -2.3614091873168945, "logps/chosen": -253.9644775390625, "logps/rejected": -332.78753662109375, "loss": 1.6274, "rewards/accuracies": 0.75, "rewards/chosen": -2.65031361579895, "rewards/margins": 0.7148020267486572, "rewards/rejected": -3.365115165710449, "step": 5459 }, { "epoch": 0.64, "learning_rate": 1.1111373567969765e-07, "logits/chosen": -2.3067145347595215, "logits/rejected": -2.3615288734436035, "logps/chosen": -308.28607177734375, "logps/rejected": -289.49456787109375, "loss": 0.8091, "rewards/accuracies": 0.625, "rewards/chosen": -0.5115925073623657, "rewards/margins": 1.215430736541748, "rewards/rejected": -1.7270233631134033, "step": 5460 }, { "epoch": 0.64, "learning_rate": 1.1107830400377937e-07, "logits/chosen": -2.474803924560547, "logits/rejected": -2.5771422386169434, "logps/chosen": -356.1029357910156, "logps/rejected": -362.7196350097656, "loss": 0.44, "rewards/accuracies": 0.75, "rewards/chosen": -1.3983619213104248, "rewards/margins": 1.8603885173797607, "rewards/rejected": -3.2587504386901855, "step": 5461 }, { "epoch": 0.64, "learning_rate": 1.1104287232786109e-07, "logits/chosen": -1.9505131244659424, "logits/rejected": -2.1550159454345703, "logps/chosen": -256.8675231933594, "logps/rejected": -248.46829223632812, "loss": 0.4941, "rewards/accuracies": 0.625, "rewards/chosen": -0.4862803518772125, "rewards/margins": 1.7781143188476562, "rewards/rejected": -2.264394760131836, "step": 5462 }, { "epoch": 0.64, "learning_rate": 1.1100744065194283e-07, "logits/chosen": -2.3333632946014404, "logits/rejected": -2.093876361846924, "logps/chosen": -229.13970947265625, "logps/rejected": -271.5419006347656, "loss": 0.5177, "rewards/accuracies": 0.875, "rewards/chosen": -0.908474862575531, "rewards/margins": 1.9225562810897827, "rewards/rejected": -2.831031084060669, "step": 5463 }, { "epoch": 0.64, "learning_rate": 1.1097200897602456e-07, "logits/chosen": -2.348353624343872, "logits/rejected": -2.4400272369384766, "logps/chosen": -373.0672912597656, "logps/rejected": -272.0811462402344, "loss": 0.213, "rewards/accuracies": 0.875, "rewards/chosen": -0.6249352693557739, "rewards/margins": 2.5510406494140625, "rewards/rejected": -3.175976037979126, "step": 5464 }, { "epoch": 0.64, "learning_rate": 1.109365773001063e-07, "logits/chosen": -2.973336696624756, "logits/rejected": -2.977463483810425, "logps/chosen": -178.27024841308594, "logps/rejected": -267.38201904296875, "loss": 0.2124, "rewards/accuracies": 0.875, "rewards/chosen": -0.5851482152938843, "rewards/margins": 3.308651924133301, "rewards/rejected": -3.8938000202178955, "step": 5465 }, { "epoch": 0.64, "learning_rate": 1.1090114562418802e-07, "logits/chosen": -2.819924831390381, "logits/rejected": -2.825700283050537, "logps/chosen": -264.49395751953125, "logps/rejected": -249.4871368408203, "loss": 0.4278, "rewards/accuracies": 0.75, "rewards/chosen": -0.6320321559906006, "rewards/margins": 1.8009101152420044, "rewards/rejected": -2.4329423904418945, "step": 5466 }, { "epoch": 0.64, "learning_rate": 1.1086571394826974e-07, "logits/chosen": -2.5752809047698975, "logits/rejected": -2.620699882507324, "logps/chosen": -195.54476928710938, "logps/rejected": -200.12403869628906, "loss": 0.209, "rewards/accuracies": 1.0, "rewards/chosen": -0.6223514676094055, "rewards/margins": 2.6813302040100098, "rewards/rejected": -3.3036813735961914, "step": 5467 }, { "epoch": 0.64, "learning_rate": 1.1083028227235148e-07, "logits/chosen": -2.450965642929077, "logits/rejected": -2.538991928100586, "logps/chosen": -354.7512512207031, "logps/rejected": -319.60125732421875, "loss": 0.16, "rewards/accuracies": 0.875, "rewards/chosen": -0.3484196066856384, "rewards/margins": 3.0538203716278076, "rewards/rejected": -3.4022397994995117, "step": 5468 }, { "epoch": 0.64, "learning_rate": 1.107948505964332e-07, "logits/chosen": -2.658229351043701, "logits/rejected": -2.448155641555786, "logps/chosen": -263.8769226074219, "logps/rejected": -345.824462890625, "loss": 0.3302, "rewards/accuracies": 0.875, "rewards/chosen": -0.6639606952667236, "rewards/margins": 2.4960074424743652, "rewards/rejected": -3.159968376159668, "step": 5469 }, { "epoch": 0.64, "learning_rate": 1.1075941892051494e-07, "logits/chosen": -1.962981104850769, "logits/rejected": -2.20139479637146, "logps/chosen": -307.5494384765625, "logps/rejected": -256.3520812988281, "loss": 0.2275, "rewards/accuracies": 0.875, "rewards/chosen": -0.2471906840801239, "rewards/margins": 2.6274847984313965, "rewards/rejected": -2.8746752738952637, "step": 5470 }, { "epoch": 0.64, "learning_rate": 1.1072398724459667e-07, "logits/chosen": -1.9393223524093628, "logits/rejected": -1.95176100730896, "logps/chosen": -171.08279418945312, "logps/rejected": -223.8411865234375, "loss": 0.9538, "rewards/accuracies": 0.75, "rewards/chosen": -1.7351804971694946, "rewards/margins": 0.5884295701980591, "rewards/rejected": -2.323610305786133, "step": 5471 }, { "epoch": 0.64, "learning_rate": 1.1068855556867839e-07, "logits/chosen": -2.6580963134765625, "logits/rejected": -2.387050151824951, "logps/chosen": -153.36741638183594, "logps/rejected": -328.6927490234375, "loss": 0.0858, "rewards/accuracies": 1.0, "rewards/chosen": -0.006263114511966705, "rewards/margins": 4.561356544494629, "rewards/rejected": -4.567620277404785, "step": 5472 }, { "epoch": 0.64, "learning_rate": 1.1065312389276011e-07, "logits/chosen": -2.169116497039795, "logits/rejected": -2.5653042793273926, "logps/chosen": -217.5720977783203, "logps/rejected": -170.7113037109375, "loss": 0.335, "rewards/accuracies": 0.875, "rewards/chosen": -0.6674585938453674, "rewards/margins": 1.918187141418457, "rewards/rejected": -2.5856456756591797, "step": 5473 }, { "epoch": 0.64, "learning_rate": 1.1061769221684185e-07, "logits/chosen": -2.4139952659606934, "logits/rejected": -2.8692803382873535, "logps/chosen": -392.4227294921875, "logps/rejected": -234.67332458496094, "loss": 0.3434, "rewards/accuracies": 0.75, "rewards/chosen": -0.9784570336341858, "rewards/margins": 2.009451389312744, "rewards/rejected": -2.987908363342285, "step": 5474 }, { "epoch": 0.64, "learning_rate": 1.1058226054092359e-07, "logits/chosen": -2.2656748294830322, "logits/rejected": -2.707684278488159, "logps/chosen": -326.50738525390625, "logps/rejected": -197.89549255371094, "loss": 0.2934, "rewards/accuracies": 0.875, "rewards/chosen": -1.415968656539917, "rewards/margins": 1.9923152923583984, "rewards/rejected": -3.4082837104797363, "step": 5475 }, { "epoch": 0.64, "learning_rate": 1.1054682886500532e-07, "logits/chosen": -2.1116418838500977, "logits/rejected": -2.0619378089904785, "logps/chosen": -311.6429443359375, "logps/rejected": -302.07000732421875, "loss": 0.3852, "rewards/accuracies": 0.75, "rewards/chosen": -0.8603106141090393, "rewards/margins": 2.037646532058716, "rewards/rejected": -2.8979570865631104, "step": 5476 }, { "epoch": 0.64, "learning_rate": 1.1051139718908704e-07, "logits/chosen": -2.168062210083008, "logits/rejected": -2.0354886054992676, "logps/chosen": -330.4161682128906, "logps/rejected": -407.875, "loss": 0.7688, "rewards/accuracies": 0.75, "rewards/chosen": -0.16985741257667542, "rewards/margins": 1.2124873399734497, "rewards/rejected": -1.3823448419570923, "step": 5477 }, { "epoch": 0.64, "learning_rate": 1.1047596551316876e-07, "logits/chosen": -2.7135202884674072, "logits/rejected": -2.845043659210205, "logps/chosen": -367.636474609375, "logps/rejected": -231.40121459960938, "loss": 0.2022, "rewards/accuracies": 1.0, "rewards/chosen": -0.6420729160308838, "rewards/margins": 2.128307819366455, "rewards/rejected": -2.770380973815918, "step": 5478 }, { "epoch": 0.64, "learning_rate": 1.1044053383725049e-07, "logits/chosen": -2.1597025394439697, "logits/rejected": -2.4846441745758057, "logps/chosen": -360.82147216796875, "logps/rejected": -180.10342407226562, "loss": 0.3401, "rewards/accuracies": 0.875, "rewards/chosen": -0.4870985150337219, "rewards/margins": 2.516366720199585, "rewards/rejected": -3.003465175628662, "step": 5479 }, { "epoch": 0.64, "learning_rate": 1.1040510216133222e-07, "logits/chosen": -2.197957992553711, "logits/rejected": -2.111999273300171, "logps/chosen": -239.9806671142578, "logps/rejected": -414.5565490722656, "loss": 0.0601, "rewards/accuracies": 1.0, "rewards/chosen": -0.01108504831790924, "rewards/margins": 3.659945487976074, "rewards/rejected": -3.6710305213928223, "step": 5480 }, { "epoch": 0.64, "learning_rate": 1.1036967048541396e-07, "logits/chosen": -2.006141185760498, "logits/rejected": -2.2732014656066895, "logps/chosen": -325.8533935546875, "logps/rejected": -286.2337341308594, "loss": 0.5854, "rewards/accuracies": 0.875, "rewards/chosen": -1.1232013702392578, "rewards/margins": 2.184298038482666, "rewards/rejected": -3.307499408721924, "step": 5481 }, { "epoch": 0.64, "learning_rate": 1.1033423880949569e-07, "logits/chosen": -1.9891605377197266, "logits/rejected": -2.2760303020477295, "logps/chosen": -313.83575439453125, "logps/rejected": -288.521484375, "loss": 0.7758, "rewards/accuracies": 0.5, "rewards/chosen": -1.297821283340454, "rewards/margins": 1.2551591396331787, "rewards/rejected": -2.552980422973633, "step": 5482 }, { "epoch": 0.64, "learning_rate": 1.1029880713357742e-07, "logits/chosen": -2.361585855484009, "logits/rejected": -2.5484514236450195, "logps/chosen": -354.91094970703125, "logps/rejected": -303.721435546875, "loss": 0.1664, "rewards/accuracies": 0.875, "rewards/chosen": -1.5444213151931763, "rewards/margins": 2.629948377609253, "rewards/rejected": -4.174369812011719, "step": 5483 }, { "epoch": 0.64, "learning_rate": 1.1026337545765914e-07, "logits/chosen": -2.4451351165771484, "logits/rejected": -2.2414422035217285, "logps/chosen": -248.25531005859375, "logps/rejected": -261.5409851074219, "loss": 0.2349, "rewards/accuracies": 1.0, "rewards/chosen": -0.44500476121902466, "rewards/margins": 2.1578893661499023, "rewards/rejected": -2.6028940677642822, "step": 5484 }, { "epoch": 0.64, "learning_rate": 1.1022794378174087e-07, "logits/chosen": -2.6161246299743652, "logits/rejected": -2.595123291015625, "logps/chosen": -253.56582641601562, "logps/rejected": -354.81890869140625, "loss": 0.2084, "rewards/accuracies": 0.875, "rewards/chosen": -0.38251179456710815, "rewards/margins": 3.4004383087158203, "rewards/rejected": -3.782949924468994, "step": 5485 }, { "epoch": 0.64, "learning_rate": 1.101925121058226e-07, "logits/chosen": -2.165921449661255, "logits/rejected": -2.4346415996551514, "logps/chosen": -241.74301147460938, "logps/rejected": -145.16175842285156, "loss": 0.6967, "rewards/accuracies": 0.625, "rewards/chosen": -1.4942351579666138, "rewards/margins": 1.2649438381195068, "rewards/rejected": -2.75917911529541, "step": 5486 }, { "epoch": 0.64, "learning_rate": 1.1015708042990433e-07, "logits/chosen": -2.009830951690674, "logits/rejected": -2.1575984954833984, "logps/chosen": -333.8755187988281, "logps/rejected": -221.09906005859375, "loss": 0.6292, "rewards/accuracies": 0.75, "rewards/chosen": -1.1169664859771729, "rewards/margins": 1.103528618812561, "rewards/rejected": -2.2204952239990234, "step": 5487 }, { "epoch": 0.64, "learning_rate": 1.1012164875398607e-07, "logits/chosen": -2.66276216506958, "logits/rejected": -2.749074697494507, "logps/chosen": -345.73553466796875, "logps/rejected": -308.97259521484375, "loss": 0.2267, "rewards/accuracies": 0.875, "rewards/chosen": -0.7621593475341797, "rewards/margins": 3.758425235748291, "rewards/rejected": -4.520584583282471, "step": 5488 }, { "epoch": 0.64, "learning_rate": 1.1008621707806779e-07, "logits/chosen": -1.7442905902862549, "logits/rejected": -1.8624660968780518, "logps/chosen": -242.27590942382812, "logps/rejected": -298.80938720703125, "loss": 0.2224, "rewards/accuracies": 0.875, "rewards/chosen": -0.4078303575515747, "rewards/margins": 2.355605363845825, "rewards/rejected": -2.7634356021881104, "step": 5489 }, { "epoch": 0.64, "learning_rate": 1.1005078540214951e-07, "logits/chosen": -2.168128728866577, "logits/rejected": -1.9911104440689087, "logps/chosen": -169.77000427246094, "logps/rejected": -207.755615234375, "loss": 0.3999, "rewards/accuracies": 0.75, "rewards/chosen": -0.6974061131477356, "rewards/margins": 1.2795287370681763, "rewards/rejected": -1.976934790611267, "step": 5490 }, { "epoch": 0.64, "learning_rate": 1.1001535372623125e-07, "logits/chosen": -2.4600863456726074, "logits/rejected": -2.760425329208374, "logps/chosen": -251.51736450195312, "logps/rejected": -187.92611694335938, "loss": 0.4639, "rewards/accuracies": 0.875, "rewards/chosen": -1.474007487297058, "rewards/margins": 2.3428795337677, "rewards/rejected": -3.8168869018554688, "step": 5491 }, { "epoch": 0.64, "learning_rate": 1.0997992205031297e-07, "logits/chosen": -2.4894893169403076, "logits/rejected": -2.423626661300659, "logps/chosen": -302.7921447753906, "logps/rejected": -195.44076538085938, "loss": 0.3864, "rewards/accuracies": 0.875, "rewards/chosen": -0.5127599835395813, "rewards/margins": 1.6179898977279663, "rewards/rejected": -2.1307497024536133, "step": 5492 }, { "epoch": 0.64, "learning_rate": 1.0994449037439472e-07, "logits/chosen": -2.753323793411255, "logits/rejected": -2.7221145629882812, "logps/chosen": -224.5933837890625, "logps/rejected": -203.54347229003906, "loss": 0.1975, "rewards/accuracies": 0.875, "rewards/chosen": -0.3556295931339264, "rewards/margins": 2.7632105350494385, "rewards/rejected": -3.118839740753174, "step": 5493 }, { "epoch": 0.64, "learning_rate": 1.0990905869847644e-07, "logits/chosen": -1.559370756149292, "logits/rejected": -1.891071081161499, "logps/chosen": -416.7593994140625, "logps/rejected": -322.3216552734375, "loss": 1.4343, "rewards/accuracies": 0.625, "rewards/chosen": -2.286785125732422, "rewards/margins": 0.3305429220199585, "rewards/rejected": -2.61732816696167, "step": 5494 }, { "epoch": 0.64, "learning_rate": 1.0987362702255816e-07, "logits/chosen": -2.3185677528381348, "logits/rejected": -2.676105499267578, "logps/chosen": -423.58221435546875, "logps/rejected": -236.094482421875, "loss": 0.1265, "rewards/accuracies": 1.0, "rewards/chosen": -0.4550972580909729, "rewards/margins": 3.681673049926758, "rewards/rejected": -4.136770248413086, "step": 5495 }, { "epoch": 0.64, "learning_rate": 1.098381953466399e-07, "logits/chosen": -1.8339810371398926, "logits/rejected": -1.78395676612854, "logps/chosen": -229.42747497558594, "logps/rejected": -330.4653015136719, "loss": 0.173, "rewards/accuracies": 1.0, "rewards/chosen": -0.8713713884353638, "rewards/margins": 2.4712955951690674, "rewards/rejected": -3.3426666259765625, "step": 5496 }, { "epoch": 0.64, "learning_rate": 1.0980276367072162e-07, "logits/chosen": -2.520328998565674, "logits/rejected": -2.4316155910491943, "logps/chosen": -264.7857971191406, "logps/rejected": -253.3126220703125, "loss": 0.567, "rewards/accuracies": 0.75, "rewards/chosen": -1.3359676599502563, "rewards/margins": 0.8190702199935913, "rewards/rejected": -2.1550378799438477, "step": 5497 }, { "epoch": 0.64, "learning_rate": 1.0976733199480334e-07, "logits/chosen": -2.2747983932495117, "logits/rejected": -2.131091594696045, "logps/chosen": -279.14404296875, "logps/rejected": -353.19366455078125, "loss": 0.3465, "rewards/accuracies": 0.875, "rewards/chosen": -0.8522354364395142, "rewards/margins": 1.7580828666687012, "rewards/rejected": -2.610318422317505, "step": 5498 }, { "epoch": 0.64, "learning_rate": 1.0973190031888509e-07, "logits/chosen": -2.477996349334717, "logits/rejected": -2.664045810699463, "logps/chosen": -260.69122314453125, "logps/rejected": -285.224853515625, "loss": 0.3029, "rewards/accuracies": 0.875, "rewards/chosen": -0.8915267586708069, "rewards/margins": 1.9391167163848877, "rewards/rejected": -2.830643653869629, "step": 5499 }, { "epoch": 0.64, "learning_rate": 1.0969646864296681e-07, "logits/chosen": -1.5811165571212769, "logits/rejected": -1.627776026725769, "logps/chosen": -369.02520751953125, "logps/rejected": -388.3089599609375, "loss": 0.5241, "rewards/accuracies": 0.75, "rewards/chosen": -0.533074676990509, "rewards/margins": 1.9415935277938843, "rewards/rejected": -2.474668264389038, "step": 5500 }, { "epoch": 0.64, "learning_rate": 1.0966103696704853e-07, "logits/chosen": -1.6619644165039062, "logits/rejected": -1.5948665142059326, "logps/chosen": -167.68292236328125, "logps/rejected": -220.64105224609375, "loss": 0.2343, "rewards/accuracies": 1.0, "rewards/chosen": -0.6588548421859741, "rewards/margins": 2.0013203620910645, "rewards/rejected": -2.660175085067749, "step": 5501 }, { "epoch": 0.64, "learning_rate": 1.0962560529113027e-07, "logits/chosen": -2.329218864440918, "logits/rejected": -2.5369369983673096, "logps/chosen": -324.39654541015625, "logps/rejected": -259.3008728027344, "loss": 0.1806, "rewards/accuracies": 1.0, "rewards/chosen": -1.3681390285491943, "rewards/margins": 2.939256191253662, "rewards/rejected": -4.307394981384277, "step": 5502 }, { "epoch": 0.64, "learning_rate": 1.0959017361521199e-07, "logits/chosen": -2.079803466796875, "logits/rejected": -2.160726308822632, "logps/chosen": -307.9873352050781, "logps/rejected": -310.6032409667969, "loss": 0.3676, "rewards/accuracies": 0.875, "rewards/chosen": -1.1198577880859375, "rewards/margins": 2.0183143615722656, "rewards/rejected": -3.138171911239624, "step": 5503 }, { "epoch": 0.64, "learning_rate": 1.0955474193929371e-07, "logits/chosen": -2.4428975582122803, "logits/rejected": -1.9963269233703613, "logps/chosen": -289.301025390625, "logps/rejected": -459.22137451171875, "loss": 0.8773, "rewards/accuracies": 0.625, "rewards/chosen": -0.7280832529067993, "rewards/margins": 1.1040599346160889, "rewards/rejected": -1.8321431875228882, "step": 5504 }, { "epoch": 0.64, "learning_rate": 1.0951931026337546e-07, "logits/chosen": -2.336620569229126, "logits/rejected": -2.431709051132202, "logps/chosen": -169.52076721191406, "logps/rejected": -162.01190185546875, "loss": 0.4808, "rewards/accuracies": 0.75, "rewards/chosen": -1.5360898971557617, "rewards/margins": 2.1919946670532227, "rewards/rejected": -3.7280845642089844, "step": 5505 }, { "epoch": 0.64, "learning_rate": 1.0948387858745718e-07, "logits/chosen": -2.765146255493164, "logits/rejected": -2.6556320190429688, "logps/chosen": -176.542724609375, "logps/rejected": -241.50125122070312, "loss": 0.4591, "rewards/accuracies": 0.75, "rewards/chosen": -0.8933104872703552, "rewards/margins": 1.1431083679199219, "rewards/rejected": -2.036418914794922, "step": 5506 }, { "epoch": 0.64, "learning_rate": 1.094484469115389e-07, "logits/chosen": -2.508908987045288, "logits/rejected": -2.623159885406494, "logps/chosen": -191.87985229492188, "logps/rejected": -210.1908416748047, "loss": 0.2431, "rewards/accuracies": 1.0, "rewards/chosen": -0.5041604042053223, "rewards/margins": 2.367405891418457, "rewards/rejected": -2.8715660572052, "step": 5507 }, { "epoch": 0.64, "learning_rate": 1.0941301523562064e-07, "logits/chosen": -2.685666084289551, "logits/rejected": -2.656187057495117, "logps/chosen": -199.7700958251953, "logps/rejected": -311.2677001953125, "loss": 0.4329, "rewards/accuracies": 0.75, "rewards/chosen": -0.6700700521469116, "rewards/margins": 1.7948205471038818, "rewards/rejected": -2.464890718460083, "step": 5508 }, { "epoch": 0.64, "learning_rate": 1.0937758355970236e-07, "logits/chosen": -2.3454654216766357, "logits/rejected": -2.5005979537963867, "logps/chosen": -351.37628173828125, "logps/rejected": -314.9337463378906, "loss": 0.1508, "rewards/accuracies": 1.0, "rewards/chosen": -1.1744897365570068, "rewards/margins": 3.7373228073120117, "rewards/rejected": -4.911812782287598, "step": 5509 }, { "epoch": 0.64, "learning_rate": 1.0934215188378411e-07, "logits/chosen": -2.7421669960021973, "logits/rejected": -2.9442861080169678, "logps/chosen": -161.73841857910156, "logps/rejected": -175.9002227783203, "loss": 0.4284, "rewards/accuracies": 0.75, "rewards/chosen": -0.32011324167251587, "rewards/margins": 2.252490282058716, "rewards/rejected": -2.572603225708008, "step": 5510 }, { "epoch": 0.64, "learning_rate": 1.0930672020786583e-07, "logits/chosen": -2.6186201572418213, "logits/rejected": -2.59755277633667, "logps/chosen": -214.5706787109375, "logps/rejected": -286.5604248046875, "loss": 0.3111, "rewards/accuracies": 0.75, "rewards/chosen": -0.6353821754455566, "rewards/margins": 3.4341964721679688, "rewards/rejected": -4.069578647613525, "step": 5511 }, { "epoch": 0.64, "learning_rate": 1.0927128853194756e-07, "logits/chosen": -2.8597218990325928, "logits/rejected": -2.8215253353118896, "logps/chosen": -176.03683471679688, "logps/rejected": -236.8227996826172, "loss": 0.7058, "rewards/accuracies": 0.625, "rewards/chosen": -1.8339489698410034, "rewards/margins": 0.6430934071540833, "rewards/rejected": -2.4770424365997314, "step": 5512 }, { "epoch": 0.64, "learning_rate": 1.0923585685602929e-07, "logits/chosen": -1.7799270153045654, "logits/rejected": -1.611576795578003, "logps/chosen": -309.53271484375, "logps/rejected": -343.6754150390625, "loss": 0.3284, "rewards/accuracies": 0.875, "rewards/chosen": -1.0011677742004395, "rewards/margins": 1.8267860412597656, "rewards/rejected": -2.827953815460205, "step": 5513 }, { "epoch": 0.64, "learning_rate": 1.0920042518011101e-07, "logits/chosen": -2.5283117294311523, "logits/rejected": -2.3851239681243896, "logps/chosen": -298.6266784667969, "logps/rejected": -329.6625671386719, "loss": 0.0663, "rewards/accuracies": 1.0, "rewards/chosen": -0.7362421751022339, "rewards/margins": 3.9701457023620605, "rewards/rejected": -4.706387519836426, "step": 5514 }, { "epoch": 0.64, "learning_rate": 1.0916499350419274e-07, "logits/chosen": -2.4814083576202393, "logits/rejected": -2.399545431137085, "logps/chosen": -168.21652221679688, "logps/rejected": -174.70706176757812, "loss": 0.3698, "rewards/accuracies": 0.75, "rewards/chosen": -0.8276681900024414, "rewards/margins": 2.075510025024414, "rewards/rejected": -2.9031782150268555, "step": 5515 }, { "epoch": 0.64, "learning_rate": 1.0912956182827448e-07, "logits/chosen": -2.420590400695801, "logits/rejected": -2.348771572113037, "logps/chosen": -312.38885498046875, "logps/rejected": -185.67625427246094, "loss": 0.3957, "rewards/accuracies": 0.75, "rewards/chosen": -0.6957936882972717, "rewards/margins": 1.8521244525909424, "rewards/rejected": -2.5479180812835693, "step": 5516 }, { "epoch": 0.64, "learning_rate": 1.090941301523562e-07, "logits/chosen": -2.720383644104004, "logits/rejected": -2.557812213897705, "logps/chosen": -128.51434326171875, "logps/rejected": -217.8095245361328, "loss": 0.2455, "rewards/accuracies": 0.875, "rewards/chosen": -0.8978371024131775, "rewards/margins": 1.972967505455017, "rewards/rejected": -2.87080454826355, "step": 5517 }, { "epoch": 0.64, "learning_rate": 1.0905869847643793e-07, "logits/chosen": -2.7188265323638916, "logits/rejected": -2.6899116039276123, "logps/chosen": -129.9900360107422, "logps/rejected": -234.42803955078125, "loss": 0.652, "rewards/accuracies": 0.625, "rewards/chosen": -0.47055068612098694, "rewards/margins": 2.3679323196411133, "rewards/rejected": -2.8384828567504883, "step": 5518 }, { "epoch": 0.64, "learning_rate": 1.0902326680051966e-07, "logits/chosen": -2.5070927143096924, "logits/rejected": -2.381716251373291, "logps/chosen": -258.263671875, "logps/rejected": -278.0553283691406, "loss": 0.1947, "rewards/accuracies": 1.0, "rewards/chosen": -0.5146371722221375, "rewards/margins": 3.9579153060913086, "rewards/rejected": -4.47255277633667, "step": 5519 }, { "epoch": 0.64, "learning_rate": 1.0898783512460139e-07, "logits/chosen": -1.9693636894226074, "logits/rejected": -1.9643385410308838, "logps/chosen": -245.49362182617188, "logps/rejected": -286.5799560546875, "loss": 0.5093, "rewards/accuracies": 0.875, "rewards/chosen": -0.8815093040466309, "rewards/margins": 1.5759446620941162, "rewards/rejected": -2.457453966140747, "step": 5520 }, { "epoch": 0.64, "learning_rate": 1.0895240344868311e-07, "logits/chosen": -2.1263818740844727, "logits/rejected": -2.4805684089660645, "logps/chosen": -274.5559387207031, "logps/rejected": -207.94520568847656, "loss": 0.4198, "rewards/accuracies": 0.75, "rewards/chosen": -1.1102595329284668, "rewards/margins": 1.5282881259918213, "rewards/rejected": -2.638547658920288, "step": 5521 }, { "epoch": 0.64, "learning_rate": 1.0891697177276486e-07, "logits/chosen": -2.340707302093506, "logits/rejected": -2.7081639766693115, "logps/chosen": -265.44415283203125, "logps/rejected": -231.94821166992188, "loss": 0.501, "rewards/accuracies": 0.875, "rewards/chosen": -0.8569229245185852, "rewards/margins": 0.7735512852668762, "rewards/rejected": -1.6304740905761719, "step": 5522 }, { "epoch": 0.64, "learning_rate": 1.0888154009684658e-07, "logits/chosen": -2.3438587188720703, "logits/rejected": -2.70676851272583, "logps/chosen": -479.60821533203125, "logps/rejected": -187.53616333007812, "loss": 1.1443, "rewards/accuracies": 0.625, "rewards/chosen": -1.6542689800262451, "rewards/margins": 0.4966229796409607, "rewards/rejected": -2.1508922576904297, "step": 5523 }, { "epoch": 0.64, "learning_rate": 1.088461084209283e-07, "logits/chosen": -2.5530643463134766, "logits/rejected": -2.318917989730835, "logps/chosen": -314.6600646972656, "logps/rejected": -309.53070068359375, "loss": 0.1667, "rewards/accuracies": 1.0, "rewards/chosen": -0.13424380123615265, "rewards/margins": 2.321531295776367, "rewards/rejected": -2.455775022506714, "step": 5524 }, { "epoch": 0.64, "learning_rate": 1.0881067674501004e-07, "logits/chosen": -2.5879530906677246, "logits/rejected": -2.6974809169769287, "logps/chosen": -168.13638305664062, "logps/rejected": -320.28570556640625, "loss": 0.2282, "rewards/accuracies": 1.0, "rewards/chosen": -0.3991885483264923, "rewards/margins": 2.07222318649292, "rewards/rejected": -2.47141170501709, "step": 5525 }, { "epoch": 0.64, "learning_rate": 1.0877524506909176e-07, "logits/chosen": -1.8821018934249878, "logits/rejected": -2.1345057487487793, "logps/chosen": -296.1561279296875, "logps/rejected": -163.584716796875, "loss": 0.2191, "rewards/accuracies": 1.0, "rewards/chosen": -0.03734889253973961, "rewards/margins": 2.1962034702301025, "rewards/rejected": -2.2335524559020996, "step": 5526 }, { "epoch": 0.64, "learning_rate": 1.0873981339317348e-07, "logits/chosen": -2.4681999683380127, "logits/rejected": -2.197720527648926, "logps/chosen": -231.4757080078125, "logps/rejected": -347.0568542480469, "loss": 0.2371, "rewards/accuracies": 1.0, "rewards/chosen": -0.7732428908348083, "rewards/margins": 2.0871329307556152, "rewards/rejected": -2.8603758811950684, "step": 5527 }, { "epoch": 0.64, "learning_rate": 1.0870438171725523e-07, "logits/chosen": -2.2544362545013428, "logits/rejected": -2.145930767059326, "logps/chosen": -275.9822082519531, "logps/rejected": -275.5411071777344, "loss": 0.3039, "rewards/accuracies": 0.875, "rewards/chosen": -0.9949970841407776, "rewards/margins": 2.696363687515259, "rewards/rejected": -3.6913604736328125, "step": 5528 }, { "epoch": 0.64, "learning_rate": 1.0866895004133695e-07, "logits/chosen": -2.8618671894073486, "logits/rejected": -2.79007625579834, "logps/chosen": -255.21202087402344, "logps/rejected": -317.13995361328125, "loss": 0.2349, "rewards/accuracies": 0.875, "rewards/chosen": -0.9057572484016418, "rewards/margins": 2.5286638736724854, "rewards/rejected": -3.4344210624694824, "step": 5529 }, { "epoch": 0.64, "learning_rate": 1.0863351836541869e-07, "logits/chosen": -2.897818088531494, "logits/rejected": -2.580827474594116, "logps/chosen": -371.13116455078125, "logps/rejected": -256.207275390625, "loss": 0.294, "rewards/accuracies": 0.875, "rewards/chosen": -1.7634882926940918, "rewards/margins": 2.5742335319519043, "rewards/rejected": -4.337721824645996, "step": 5530 }, { "epoch": 0.64, "learning_rate": 1.0859808668950041e-07, "logits/chosen": -1.9573509693145752, "logits/rejected": -2.30708646774292, "logps/chosen": -410.7255554199219, "logps/rejected": -444.55596923828125, "loss": 0.1752, "rewards/accuracies": 1.0, "rewards/chosen": -0.7326204776763916, "rewards/margins": 2.6319923400878906, "rewards/rejected": -3.364612579345703, "step": 5531 }, { "epoch": 0.64, "learning_rate": 1.0856265501358213e-07, "logits/chosen": -2.579740524291992, "logits/rejected": -2.6268200874328613, "logps/chosen": -387.17431640625, "logps/rejected": -264.77313232421875, "loss": 0.2798, "rewards/accuracies": 0.875, "rewards/chosen": -0.6288034915924072, "rewards/margins": 1.4032468795776367, "rewards/rejected": -2.032050371170044, "step": 5532 }, { "epoch": 0.64, "learning_rate": 1.0852722333766387e-07, "logits/chosen": -2.310770273208618, "logits/rejected": -2.297313690185547, "logps/chosen": -239.90469360351562, "logps/rejected": -194.91561889648438, "loss": 0.4838, "rewards/accuracies": 0.625, "rewards/chosen": -0.8780336380004883, "rewards/margins": 1.2692618370056152, "rewards/rejected": -2.1472954750061035, "step": 5533 }, { "epoch": 0.64, "learning_rate": 1.084917916617456e-07, "logits/chosen": -2.4069600105285645, "logits/rejected": -2.3253746032714844, "logps/chosen": -228.01458740234375, "logps/rejected": -237.7639923095703, "loss": 0.2783, "rewards/accuracies": 0.875, "rewards/chosen": -0.6791322231292725, "rewards/margins": 2.6979074478149414, "rewards/rejected": -3.377039670944214, "step": 5534 }, { "epoch": 0.64, "learning_rate": 1.0845635998582732e-07, "logits/chosen": -2.0040555000305176, "logits/rejected": -2.3793389797210693, "logps/chosen": -536.1353759765625, "logps/rejected": -227.88751220703125, "loss": 0.3706, "rewards/accuracies": 0.875, "rewards/chosen": -0.7483206987380981, "rewards/margins": 1.6440987586975098, "rewards/rejected": -2.3924198150634766, "step": 5535 }, { "epoch": 0.64, "learning_rate": 1.0842092830990906e-07, "logits/chosen": -2.166121006011963, "logits/rejected": -2.568272590637207, "logps/chosen": -304.8279724121094, "logps/rejected": -256.71380615234375, "loss": 0.6612, "rewards/accuracies": 0.75, "rewards/chosen": -0.9218889474868774, "rewards/margins": 3.310692310333252, "rewards/rejected": -4.23258113861084, "step": 5536 }, { "epoch": 0.64, "learning_rate": 1.0838549663399078e-07, "logits/chosen": -1.910620093345642, "logits/rejected": -1.955139398574829, "logps/chosen": -277.6504821777344, "logps/rejected": -232.3689727783203, "loss": 0.9325, "rewards/accuracies": 0.625, "rewards/chosen": -2.4458885192871094, "rewards/margins": 0.35922497510910034, "rewards/rejected": -2.8051135540008545, "step": 5537 }, { "epoch": 0.64, "learning_rate": 1.083500649580725e-07, "logits/chosen": -2.5261237621307373, "logits/rejected": -2.7145235538482666, "logps/chosen": -290.4201354980469, "logps/rejected": -190.94093322753906, "loss": 0.3771, "rewards/accuracies": 0.875, "rewards/chosen": -1.00648832321167, "rewards/margins": 1.67669677734375, "rewards/rejected": -2.68318510055542, "step": 5538 }, { "epoch": 0.64, "learning_rate": 1.0831463328215424e-07, "logits/chosen": -1.835139274597168, "logits/rejected": -1.7962489128112793, "logps/chosen": -272.0028076171875, "logps/rejected": -284.15087890625, "loss": 0.7982, "rewards/accuracies": 0.375, "rewards/chosen": -1.8461220264434814, "rewards/margins": 0.16179919242858887, "rewards/rejected": -2.0079212188720703, "step": 5539 }, { "epoch": 0.64, "learning_rate": 1.0827920160623597e-07, "logits/chosen": -2.3012776374816895, "logits/rejected": -2.394357204437256, "logps/chosen": -134.8734893798828, "logps/rejected": -180.5397186279297, "loss": 0.6008, "rewards/accuracies": 0.5, "rewards/chosen": -1.568005084991455, "rewards/margins": 2.0915956497192383, "rewards/rejected": -3.6596007347106934, "step": 5540 }, { "epoch": 0.64, "learning_rate": 1.082437699303177e-07, "logits/chosen": -2.5282986164093018, "logits/rejected": -2.422687292098999, "logps/chosen": -277.6493835449219, "logps/rejected": -269.4349365234375, "loss": 0.4091, "rewards/accuracies": 0.75, "rewards/chosen": -1.3406157493591309, "rewards/margins": 1.5547951459884644, "rewards/rejected": -2.8954107761383057, "step": 5541 }, { "epoch": 0.64, "learning_rate": 1.0820833825439943e-07, "logits/chosen": -2.6439990997314453, "logits/rejected": -2.7426793575286865, "logps/chosen": -372.94256591796875, "logps/rejected": -306.2283630371094, "loss": 0.1497, "rewards/accuracies": 1.0, "rewards/chosen": -0.3622992932796478, "rewards/margins": 3.6952764987945557, "rewards/rejected": -4.057575702667236, "step": 5542 }, { "epoch": 0.64, "learning_rate": 1.0817290657848115e-07, "logits/chosen": -2.5075714588165283, "logits/rejected": -2.7141528129577637, "logps/chosen": -239.81329345703125, "logps/rejected": -314.10968017578125, "loss": 0.1632, "rewards/accuracies": 1.0, "rewards/chosen": 0.1700737476348877, "rewards/margins": 3.3333075046539307, "rewards/rejected": -3.163233757019043, "step": 5543 }, { "epoch": 0.64, "learning_rate": 1.0813747490256288e-07, "logits/chosen": -2.215956211090088, "logits/rejected": -2.1957297325134277, "logps/chosen": -236.16946411132812, "logps/rejected": -267.1487731933594, "loss": 0.3835, "rewards/accuracies": 0.875, "rewards/chosen": -0.9504519701004028, "rewards/margins": 2.6554319858551025, "rewards/rejected": -3.605884075164795, "step": 5544 }, { "epoch": 0.65, "learning_rate": 1.0810204322664462e-07, "logits/chosen": -2.5025620460510254, "logits/rejected": -2.482163667678833, "logps/chosen": -315.2698669433594, "logps/rejected": -324.51513671875, "loss": 0.3941, "rewards/accuracies": 0.875, "rewards/chosen": -0.48086681962013245, "rewards/margins": 2.882641553878784, "rewards/rejected": -3.363508462905884, "step": 5545 }, { "epoch": 0.65, "learning_rate": 1.0806661155072635e-07, "logits/chosen": -2.67179536819458, "logits/rejected": -2.3595356941223145, "logps/chosen": -123.38778686523438, "logps/rejected": -217.46038818359375, "loss": 0.2459, "rewards/accuracies": 1.0, "rewards/chosen": -0.40293756127357483, "rewards/margins": 2.5373082160949707, "rewards/rejected": -2.9402456283569336, "step": 5546 }, { "epoch": 0.65, "learning_rate": 1.0803117987480808e-07, "logits/chosen": -2.1732001304626465, "logits/rejected": -2.1965274810791016, "logps/chosen": -283.5172424316406, "logps/rejected": -219.78060913085938, "loss": 0.2575, "rewards/accuracies": 0.875, "rewards/chosen": -1.1102187633514404, "rewards/margins": 2.165741205215454, "rewards/rejected": -3.2759599685668945, "step": 5547 }, { "epoch": 0.65, "learning_rate": 1.079957481988898e-07, "logits/chosen": -2.053701639175415, "logits/rejected": -2.184518814086914, "logps/chosen": -302.4637756347656, "logps/rejected": -248.36785888671875, "loss": 0.1723, "rewards/accuracies": 1.0, "rewards/chosen": -0.4212523102760315, "rewards/margins": 2.355085849761963, "rewards/rejected": -2.7763381004333496, "step": 5548 }, { "epoch": 0.65, "learning_rate": 1.0796031652297153e-07, "logits/chosen": -2.3230438232421875, "logits/rejected": -2.2971608638763428, "logps/chosen": -327.668212890625, "logps/rejected": -267.6793518066406, "loss": 0.4205, "rewards/accuracies": 0.75, "rewards/chosen": -1.112288236618042, "rewards/margins": 1.5340354442596436, "rewards/rejected": -2.6463236808776855, "step": 5549 }, { "epoch": 0.65, "learning_rate": 1.0792488484705326e-07, "logits/chosen": -1.7293477058410645, "logits/rejected": -1.8442070484161377, "logps/chosen": -251.14048767089844, "logps/rejected": -240.60621643066406, "loss": 0.3003, "rewards/accuracies": 1.0, "rewards/chosen": -0.6201245188713074, "rewards/margins": 1.137850284576416, "rewards/rejected": -1.7579747438430786, "step": 5550 }, { "epoch": 0.65, "learning_rate": 1.07889453171135e-07, "logits/chosen": -2.431334972381592, "logits/rejected": -2.274172782897949, "logps/chosen": -257.6517028808594, "logps/rejected": -229.78927612304688, "loss": 0.3421, "rewards/accuracies": 0.875, "rewards/chosen": -0.828568696975708, "rewards/margins": 2.026825189590454, "rewards/rejected": -2.855393886566162, "step": 5551 }, { "epoch": 0.65, "learning_rate": 1.0785402149521672e-07, "logits/chosen": -2.6414785385131836, "logits/rejected": -2.2931535243988037, "logps/chosen": -279.06451416015625, "logps/rejected": -305.9254150390625, "loss": 0.1724, "rewards/accuracies": 1.0, "rewards/chosen": -0.16477230191230774, "rewards/margins": 2.779637336730957, "rewards/rejected": -2.9444096088409424, "step": 5552 }, { "epoch": 0.65, "learning_rate": 1.0781858981929845e-07, "logits/chosen": -2.275362253189087, "logits/rejected": -2.1211600303649902, "logps/chosen": -176.54620361328125, "logps/rejected": -334.2018737792969, "loss": 0.1474, "rewards/accuracies": 1.0, "rewards/chosen": -0.8799606561660767, "rewards/margins": 3.5989603996276855, "rewards/rejected": -4.478921413421631, "step": 5553 }, { "epoch": 0.65, "learning_rate": 1.0778315814338018e-07, "logits/chosen": -1.9199397563934326, "logits/rejected": -2.176997184753418, "logps/chosen": -308.8395080566406, "logps/rejected": -233.494140625, "loss": 0.1672, "rewards/accuracies": 0.875, "rewards/chosen": 0.05099180340766907, "rewards/margins": 3.0103812217712402, "rewards/rejected": -2.9593894481658936, "step": 5554 }, { "epoch": 0.65, "learning_rate": 1.077477264674619e-07, "logits/chosen": -2.470576047897339, "logits/rejected": -2.451507091522217, "logps/chosen": -290.0706481933594, "logps/rejected": -305.48089599609375, "loss": 0.1315, "rewards/accuracies": 1.0, "rewards/chosen": -0.9675343036651611, "rewards/margins": 3.3533926010131836, "rewards/rejected": -4.320926666259766, "step": 5555 }, { "epoch": 0.65, "learning_rate": 1.0771229479154363e-07, "logits/chosen": -2.6342782974243164, "logits/rejected": -2.3942174911499023, "logps/chosen": -346.26416015625, "logps/rejected": -386.4808654785156, "loss": 0.3061, "rewards/accuracies": 0.875, "rewards/chosen": -0.6556344628334045, "rewards/margins": 3.6348209381103516, "rewards/rejected": -4.2904558181762695, "step": 5556 }, { "epoch": 0.65, "learning_rate": 1.0767686311562537e-07, "logits/chosen": -2.1717352867126465, "logits/rejected": -2.5242092609405518, "logps/chosen": -250.3701934814453, "logps/rejected": -202.9127197265625, "loss": 0.3293, "rewards/accuracies": 0.875, "rewards/chosen": -1.1744482517242432, "rewards/margins": 1.5571682453155518, "rewards/rejected": -2.731616497039795, "step": 5557 }, { "epoch": 0.65, "learning_rate": 1.076414314397071e-07, "logits/chosen": -2.5993642807006836, "logits/rejected": -2.5876288414001465, "logps/chosen": -411.9694519042969, "logps/rejected": -349.6552429199219, "loss": 0.2406, "rewards/accuracies": 0.875, "rewards/chosen": -1.0939135551452637, "rewards/margins": 2.4891908168792725, "rewards/rejected": -3.583104133605957, "step": 5558 }, { "epoch": 0.65, "learning_rate": 1.0760599976378883e-07, "logits/chosen": -2.6959004402160645, "logits/rejected": -2.577112913131714, "logps/chosen": -457.5589599609375, "logps/rejected": -270.0552673339844, "loss": 0.273, "rewards/accuracies": 0.875, "rewards/chosen": -0.7154108285903931, "rewards/margins": 3.0198984146118164, "rewards/rejected": -3.735309362411499, "step": 5559 }, { "epoch": 0.65, "learning_rate": 1.0757056808787055e-07, "logits/chosen": -2.3742010593414307, "logits/rejected": -2.4471824169158936, "logps/chosen": -218.62503051757812, "logps/rejected": -210.44580078125, "loss": 0.4101, "rewards/accuracies": 0.875, "rewards/chosen": -0.8074192404747009, "rewards/margins": 2.1079394817352295, "rewards/rejected": -2.915358781814575, "step": 5560 }, { "epoch": 0.65, "learning_rate": 1.0753513641195227e-07, "logits/chosen": -2.5182600021362305, "logits/rejected": -2.4238173961639404, "logps/chosen": -301.5669250488281, "logps/rejected": -195.61203002929688, "loss": 0.6395, "rewards/accuracies": 0.625, "rewards/chosen": -1.1553606986999512, "rewards/margins": 1.823959469795227, "rewards/rejected": -2.9793200492858887, "step": 5561 }, { "epoch": 0.65, "learning_rate": 1.07499704736034e-07, "logits/chosen": -2.5983095169067383, "logits/rejected": -2.600515127182007, "logps/chosen": -211.55308532714844, "logps/rejected": -214.12158203125, "loss": 0.2764, "rewards/accuracies": 0.875, "rewards/chosen": -0.6153090000152588, "rewards/margins": 1.7521321773529053, "rewards/rejected": -2.367441177368164, "step": 5562 }, { "epoch": 0.65, "learning_rate": 1.0746427306011574e-07, "logits/chosen": -1.9259390830993652, "logits/rejected": -2.2441442012786865, "logps/chosen": -401.49371337890625, "logps/rejected": -383.50909423828125, "loss": 0.689, "rewards/accuracies": 0.75, "rewards/chosen": -0.8266350626945496, "rewards/margins": 1.732300043106079, "rewards/rejected": -2.5589349269866943, "step": 5563 }, { "epoch": 0.65, "learning_rate": 1.0742884138419748e-07, "logits/chosen": -1.953871488571167, "logits/rejected": -2.317373275756836, "logps/chosen": -396.308349609375, "logps/rejected": -194.0185546875, "loss": 0.2293, "rewards/accuracies": 1.0, "rewards/chosen": -0.5964360237121582, "rewards/margins": 2.008596897125244, "rewards/rejected": -2.6050329208374023, "step": 5564 }, { "epoch": 0.65, "learning_rate": 1.073934097082792e-07, "logits/chosen": -2.0684657096862793, "logits/rejected": -2.177018642425537, "logps/chosen": -434.3354187011719, "logps/rejected": -389.63226318359375, "loss": 0.6438, "rewards/accuracies": 0.625, "rewards/chosen": -0.5199898481369019, "rewards/margins": 0.9814404249191284, "rewards/rejected": -1.5014302730560303, "step": 5565 }, { "epoch": 0.65, "learning_rate": 1.0735797803236092e-07, "logits/chosen": -1.5296730995178223, "logits/rejected": -2.0231168270111084, "logps/chosen": -394.19659423828125, "logps/rejected": -184.48046875, "loss": 0.7009, "rewards/accuracies": 0.625, "rewards/chosen": -1.4306572675704956, "rewards/margins": 1.125707983970642, "rewards/rejected": -2.556365489959717, "step": 5566 }, { "epoch": 0.65, "learning_rate": 1.0732254635644266e-07, "logits/chosen": -2.3657724857330322, "logits/rejected": -2.442661762237549, "logps/chosen": -139.80911254882812, "logps/rejected": -130.12615966796875, "loss": 0.7822, "rewards/accuracies": 0.5, "rewards/chosen": -1.8504955768585205, "rewards/margins": 0.10454201698303223, "rewards/rejected": -1.9550375938415527, "step": 5567 }, { "epoch": 0.65, "learning_rate": 1.0728711468052438e-07, "logits/chosen": -2.454427480697632, "logits/rejected": -2.3742730617523193, "logps/chosen": -180.02435302734375, "logps/rejected": -240.14312744140625, "loss": 0.3357, "rewards/accuracies": 0.75, "rewards/chosen": -0.17115499079227448, "rewards/margins": 2.524588108062744, "rewards/rejected": -2.6957430839538574, "step": 5568 }, { "epoch": 0.65, "learning_rate": 1.0725168300460611e-07, "logits/chosen": -1.8623799085617065, "logits/rejected": -2.0532166957855225, "logps/chosen": -504.10614013671875, "logps/rejected": -468.05377197265625, "loss": 0.3834, "rewards/accuracies": 0.875, "rewards/chosen": 0.09231072664260864, "rewards/margins": 1.7501569986343384, "rewards/rejected": -1.6578463315963745, "step": 5569 }, { "epoch": 0.65, "learning_rate": 1.0721625132868785e-07, "logits/chosen": -1.8625051975250244, "logits/rejected": -1.8379905223846436, "logps/chosen": -299.1378479003906, "logps/rejected": -348.2354736328125, "loss": 0.3557, "rewards/accuracies": 0.875, "rewards/chosen": -0.7667273879051208, "rewards/margins": 1.4400935173034668, "rewards/rejected": -2.2068209648132324, "step": 5570 }, { "epoch": 0.65, "learning_rate": 1.0718081965276957e-07, "logits/chosen": -1.5901904106140137, "logits/rejected": -1.7126057147979736, "logps/chosen": -490.5355224609375, "logps/rejected": -422.37628173828125, "loss": 0.4582, "rewards/accuracies": 0.875, "rewards/chosen": -1.3320876359939575, "rewards/margins": 1.642423152923584, "rewards/rejected": -2.974510908126831, "step": 5571 }, { "epoch": 0.65, "learning_rate": 1.0714538797685129e-07, "logits/chosen": -2.412890672683716, "logits/rejected": -2.2661798000335693, "logps/chosen": -331.3213806152344, "logps/rejected": -352.2065734863281, "loss": 0.2747, "rewards/accuracies": 0.875, "rewards/chosen": -0.0036644116044044495, "rewards/margins": 1.591478943824768, "rewards/rejected": -1.5951433181762695, "step": 5572 }, { "epoch": 0.65, "learning_rate": 1.0710995630093303e-07, "logits/chosen": -2.6523828506469727, "logits/rejected": -2.6323821544647217, "logps/chosen": -191.44619750976562, "logps/rejected": -283.21112060546875, "loss": 0.6983, "rewards/accuracies": 0.75, "rewards/chosen": -1.2590867280960083, "rewards/margins": 2.5970253944396973, "rewards/rejected": -3.856112003326416, "step": 5573 }, { "epoch": 0.65, "learning_rate": 1.0707452462501475e-07, "logits/chosen": -2.4959301948547363, "logits/rejected": -2.537555694580078, "logps/chosen": -231.56277465820312, "logps/rejected": -269.70928955078125, "loss": 0.6447, "rewards/accuracies": 0.75, "rewards/chosen": -0.9521153569221497, "rewards/margins": 1.214499831199646, "rewards/rejected": -2.1666150093078613, "step": 5574 }, { "epoch": 0.65, "learning_rate": 1.070390929490965e-07, "logits/chosen": -2.831625461578369, "logits/rejected": -2.7964558601379395, "logps/chosen": -126.67218780517578, "logps/rejected": -111.9854965209961, "loss": 0.3724, "rewards/accuracies": 0.75, "rewards/chosen": -0.6167360544204712, "rewards/margins": 1.2312123775482178, "rewards/rejected": -1.8479483127593994, "step": 5575 }, { "epoch": 0.65, "learning_rate": 1.0700366127317822e-07, "logits/chosen": -2.536048412322998, "logits/rejected": -2.7987852096557617, "logps/chosen": -471.2641906738281, "logps/rejected": -311.6507568359375, "loss": 0.6253, "rewards/accuracies": 0.75, "rewards/chosen": -1.280011773109436, "rewards/margins": 1.086187720298767, "rewards/rejected": -2.366199493408203, "step": 5576 }, { "epoch": 0.65, "learning_rate": 1.0696822959725994e-07, "logits/chosen": -2.625699043273926, "logits/rejected": -2.8159613609313965, "logps/chosen": -230.58999633789062, "logps/rejected": -162.9471435546875, "loss": 0.287, "rewards/accuracies": 1.0, "rewards/chosen": -0.37150681018829346, "rewards/margins": 1.4768213033676147, "rewards/rejected": -1.8483282327651978, "step": 5577 }, { "epoch": 0.65, "learning_rate": 1.0693279792134167e-07, "logits/chosen": -2.564779043197632, "logits/rejected": -2.591029167175293, "logps/chosen": -234.09881591796875, "logps/rejected": -225.9639434814453, "loss": 0.248, "rewards/accuracies": 1.0, "rewards/chosen": -0.6889725923538208, "rewards/margins": 2.4834468364715576, "rewards/rejected": -3.172419548034668, "step": 5578 }, { "epoch": 0.65, "learning_rate": 1.068973662454234e-07, "logits/chosen": -2.2685186862945557, "logits/rejected": -2.6765265464782715, "logps/chosen": -451.13800048828125, "logps/rejected": -267.298828125, "loss": 0.262, "rewards/accuracies": 0.875, "rewards/chosen": -0.47293537855148315, "rewards/margins": 1.9983187913894653, "rewards/rejected": -2.4712541103363037, "step": 5579 }, { "epoch": 0.65, "learning_rate": 1.0686193456950514e-07, "logits/chosen": -2.419560432434082, "logits/rejected": -2.7063004970550537, "logps/chosen": -326.1977233886719, "logps/rejected": -292.1570129394531, "loss": 0.3343, "rewards/accuracies": 0.75, "rewards/chosen": -0.3205970227718353, "rewards/margins": 1.8505513668060303, "rewards/rejected": -2.1711483001708984, "step": 5580 }, { "epoch": 0.65, "learning_rate": 1.0682650289358687e-07, "logits/chosen": -2.2289986610412598, "logits/rejected": -2.3114376068115234, "logps/chosen": -191.40354919433594, "logps/rejected": -264.1790466308594, "loss": 0.3125, "rewards/accuracies": 0.875, "rewards/chosen": -0.6331544518470764, "rewards/margins": 3.2315258979797363, "rewards/rejected": -3.864680290222168, "step": 5581 }, { "epoch": 0.65, "learning_rate": 1.067910712176686e-07, "logits/chosen": -2.594210624694824, "logits/rejected": -2.479752540588379, "logps/chosen": -238.67538452148438, "logps/rejected": -277.4839782714844, "loss": 0.342, "rewards/accuracies": 0.875, "rewards/chosen": -0.9305959939956665, "rewards/margins": 2.8977179527282715, "rewards/rejected": -3.8283140659332275, "step": 5582 }, { "epoch": 0.65, "learning_rate": 1.0675563954175032e-07, "logits/chosen": -2.764878273010254, "logits/rejected": -2.8600916862487793, "logps/chosen": -383.59796142578125, "logps/rejected": -231.89883422851562, "loss": 0.2092, "rewards/accuracies": 0.875, "rewards/chosen": -0.27205565571784973, "rewards/margins": 3.286550998687744, "rewards/rejected": -3.5586066246032715, "step": 5583 }, { "epoch": 0.65, "learning_rate": 1.0672020786583205e-07, "logits/chosen": -2.5947067737579346, "logits/rejected": -2.7330265045166016, "logps/chosen": -274.2263488769531, "logps/rejected": -209.89559936523438, "loss": 1.1811, "rewards/accuracies": 0.625, "rewards/chosen": -1.778704047203064, "rewards/margins": 0.6674702167510986, "rewards/rejected": -2.446174144744873, "step": 5584 }, { "epoch": 0.65, "learning_rate": 1.0668477618991377e-07, "logits/chosen": -2.385666608810425, "logits/rejected": -2.571998119354248, "logps/chosen": -392.3490905761719, "logps/rejected": -242.2446746826172, "loss": 0.4291, "rewards/accuracies": 0.875, "rewards/chosen": -0.6309361457824707, "rewards/margins": 1.5469849109649658, "rewards/rejected": -2.1779210567474365, "step": 5585 }, { "epoch": 0.65, "learning_rate": 1.0664934451399551e-07, "logits/chosen": -2.7479805946350098, "logits/rejected": -2.655322551727295, "logps/chosen": -341.26025390625, "logps/rejected": -318.4857177734375, "loss": 0.6615, "rewards/accuracies": 0.625, "rewards/chosen": -1.2927355766296387, "rewards/margins": 1.186115026473999, "rewards/rejected": -2.478850841522217, "step": 5586 }, { "epoch": 0.65, "learning_rate": 1.0661391283807724e-07, "logits/chosen": -2.2025818824768066, "logits/rejected": -1.968019723892212, "logps/chosen": -354.1976318359375, "logps/rejected": -539.114990234375, "loss": 0.3948, "rewards/accuracies": 0.75, "rewards/chosen": -1.2670934200286865, "rewards/margins": 2.383455753326416, "rewards/rejected": -3.6505491733551025, "step": 5587 }, { "epoch": 0.65, "learning_rate": 1.0657848116215897e-07, "logits/chosen": -2.154465675354004, "logits/rejected": -2.0366625785827637, "logps/chosen": -259.64947509765625, "logps/rejected": -261.9423522949219, "loss": 0.3547, "rewards/accuracies": 0.75, "rewards/chosen": -0.503532886505127, "rewards/margins": 1.807664155960083, "rewards/rejected": -2.311196804046631, "step": 5588 }, { "epoch": 0.65, "learning_rate": 1.0654304948624069e-07, "logits/chosen": -1.9179420471191406, "logits/rejected": -1.9974387884140015, "logps/chosen": -195.2952117919922, "logps/rejected": -333.1743469238281, "loss": 0.4323, "rewards/accuracies": 0.75, "rewards/chosen": -0.5956999659538269, "rewards/margins": 2.8740077018737793, "rewards/rejected": -3.469707489013672, "step": 5589 }, { "epoch": 0.65, "learning_rate": 1.0650761781032242e-07, "logits/chosen": -2.402742624282837, "logits/rejected": -2.2347519397735596, "logps/chosen": -182.13858032226562, "logps/rejected": -225.77944946289062, "loss": 0.2855, "rewards/accuracies": 1.0, "rewards/chosen": -0.5126633644104004, "rewards/margins": 1.6866238117218018, "rewards/rejected": -2.199287176132202, "step": 5590 }, { "epoch": 0.65, "learning_rate": 1.0647218613440415e-07, "logits/chosen": -2.3169713020324707, "logits/rejected": -2.43715500831604, "logps/chosen": -335.8240661621094, "logps/rejected": -298.69091796875, "loss": 0.1182, "rewards/accuracies": 1.0, "rewards/chosen": -0.8508182764053345, "rewards/margins": 3.319321632385254, "rewards/rejected": -4.170139789581299, "step": 5591 }, { "epoch": 0.65, "learning_rate": 1.064367544584859e-07, "logits/chosen": -1.9033262729644775, "logits/rejected": -1.9991397857666016, "logps/chosen": -342.1416015625, "logps/rejected": -290.2689514160156, "loss": 0.3406, "rewards/accuracies": 0.875, "rewards/chosen": -0.9085351228713989, "rewards/margins": 1.2899129390716553, "rewards/rejected": -2.1984481811523438, "step": 5592 }, { "epoch": 0.65, "learning_rate": 1.0640132278256762e-07, "logits/chosen": -2.0126028060913086, "logits/rejected": -2.030104160308838, "logps/chosen": -211.09231567382812, "logps/rejected": -202.5311279296875, "loss": 0.3608, "rewards/accuracies": 0.75, "rewards/chosen": -0.8773146271705627, "rewards/margins": 1.7375333309173584, "rewards/rejected": -2.6148478984832764, "step": 5593 }, { "epoch": 0.65, "learning_rate": 1.0636589110664934e-07, "logits/chosen": -2.6777124404907227, "logits/rejected": -2.897007465362549, "logps/chosen": -420.89666748046875, "logps/rejected": -269.5523376464844, "loss": 0.077, "rewards/accuracies": 1.0, "rewards/chosen": -0.6979645490646362, "rewards/margins": 3.9179677963256836, "rewards/rejected": -4.615932464599609, "step": 5594 }, { "epoch": 0.65, "learning_rate": 1.0633045943073107e-07, "logits/chosen": -2.474107265472412, "logits/rejected": -2.3461694717407227, "logps/chosen": -309.263671875, "logps/rejected": -218.72491455078125, "loss": 0.3595, "rewards/accuracies": 0.875, "rewards/chosen": -0.6593648195266724, "rewards/margins": 1.8954733610153198, "rewards/rejected": -2.554838180541992, "step": 5595 }, { "epoch": 0.65, "learning_rate": 1.062950277548128e-07, "logits/chosen": -2.1780173778533936, "logits/rejected": -2.4122493267059326, "logps/chosen": -496.2576904296875, "logps/rejected": -241.11349487304688, "loss": 0.1731, "rewards/accuracies": 1.0, "rewards/chosen": -0.5786592364311218, "rewards/margins": 2.5009305477142334, "rewards/rejected": -3.079590082168579, "step": 5596 }, { "epoch": 0.65, "learning_rate": 1.0625959607889452e-07, "logits/chosen": -1.707923173904419, "logits/rejected": -1.775159239768982, "logps/chosen": -407.81304931640625, "logps/rejected": -318.01422119140625, "loss": 0.6141, "rewards/accuracies": 0.625, "rewards/chosen": -0.48332005739212036, "rewards/margins": 1.3423819541931152, "rewards/rejected": -1.82570219039917, "step": 5597 }, { "epoch": 0.65, "learning_rate": 1.0622416440297627e-07, "logits/chosen": -2.690600633621216, "logits/rejected": -2.5701708793640137, "logps/chosen": -188.03851318359375, "logps/rejected": -390.8686218261719, "loss": 0.3359, "rewards/accuracies": 0.875, "rewards/chosen": -0.7250154614448547, "rewards/margins": 2.055575370788574, "rewards/rejected": -2.780590772628784, "step": 5598 }, { "epoch": 0.65, "learning_rate": 1.0618873272705799e-07, "logits/chosen": -2.3172032833099365, "logits/rejected": -1.9886953830718994, "logps/chosen": -180.4352264404297, "logps/rejected": -338.8710021972656, "loss": 0.0825, "rewards/accuracies": 1.0, "rewards/chosen": -0.0007446594536304474, "rewards/margins": 4.374330997467041, "rewards/rejected": -4.375075340270996, "step": 5599 }, { "epoch": 0.65, "learning_rate": 1.0615330105113971e-07, "logits/chosen": -2.54811954498291, "logits/rejected": -2.342271566390991, "logps/chosen": -231.7083282470703, "logps/rejected": -307.0648193359375, "loss": 0.4313, "rewards/accuracies": 0.625, "rewards/chosen": -1.7863280773162842, "rewards/margins": 2.6879913806915283, "rewards/rejected": -4.4743194580078125, "step": 5600 }, { "epoch": 0.65, "learning_rate": 1.0611786937522145e-07, "logits/chosen": -2.431410312652588, "logits/rejected": -2.5768730640411377, "logps/chosen": -338.81793212890625, "logps/rejected": -344.4649658203125, "loss": 0.422, "rewards/accuracies": 0.75, "rewards/chosen": -0.1319657415151596, "rewards/margins": 2.1297731399536133, "rewards/rejected": -2.2617387771606445, "step": 5601 }, { "epoch": 0.65, "learning_rate": 1.0608243769930317e-07, "logits/chosen": -1.9816032648086548, "logits/rejected": -2.0000553131103516, "logps/chosen": -209.99826049804688, "logps/rejected": -163.0596466064453, "loss": 0.5682, "rewards/accuracies": 0.75, "rewards/chosen": -0.7610423564910889, "rewards/margins": 1.4955402612686157, "rewards/rejected": -2.256582498550415, "step": 5602 }, { "epoch": 0.65, "learning_rate": 1.0604700602338489e-07, "logits/chosen": -2.6323509216308594, "logits/rejected": -2.7565560340881348, "logps/chosen": -241.28768920898438, "logps/rejected": -217.859130859375, "loss": 0.3784, "rewards/accuracies": 0.75, "rewards/chosen": -0.6914969682693481, "rewards/margins": 2.390824794769287, "rewards/rejected": -3.0823216438293457, "step": 5603 }, { "epoch": 0.65, "learning_rate": 1.0601157434746664e-07, "logits/chosen": -2.4694392681121826, "logits/rejected": -2.491980791091919, "logps/chosen": -191.0977020263672, "logps/rejected": -231.89402770996094, "loss": 0.4856, "rewards/accuracies": 0.75, "rewards/chosen": -0.7997926473617554, "rewards/margins": 1.3519394397735596, "rewards/rejected": -2.1517322063446045, "step": 5604 }, { "epoch": 0.65, "learning_rate": 1.0597614267154836e-07, "logits/chosen": -2.2028019428253174, "logits/rejected": -2.2042770385742188, "logps/chosen": -189.22779846191406, "logps/rejected": -201.02769470214844, "loss": 0.9092, "rewards/accuracies": 0.625, "rewards/chosen": -1.6107237339019775, "rewards/margins": 1.2019548416137695, "rewards/rejected": -2.812678337097168, "step": 5605 }, { "epoch": 0.65, "learning_rate": 1.0594071099563008e-07, "logits/chosen": -2.0499954223632812, "logits/rejected": -2.1228108406066895, "logps/chosen": -295.46307373046875, "logps/rejected": -179.30352783203125, "loss": 0.5153, "rewards/accuracies": 0.625, "rewards/chosen": -1.1482831239700317, "rewards/margins": 1.5624401569366455, "rewards/rejected": -2.710723400115967, "step": 5606 }, { "epoch": 0.65, "learning_rate": 1.0590527931971182e-07, "logits/chosen": -2.259133815765381, "logits/rejected": -2.3870928287506104, "logps/chosen": -478.9671630859375, "logps/rejected": -319.3724365234375, "loss": 0.3526, "rewards/accuracies": 0.75, "rewards/chosen": -0.5126805305480957, "rewards/margins": 1.7548809051513672, "rewards/rejected": -2.267561435699463, "step": 5607 }, { "epoch": 0.65, "learning_rate": 1.0586984764379354e-07, "logits/chosen": -1.7448177337646484, "logits/rejected": -2.165412425994873, "logps/chosen": -486.16912841796875, "logps/rejected": -395.8465270996094, "loss": 0.0629, "rewards/accuracies": 1.0, "rewards/chosen": -0.65990149974823, "rewards/margins": 3.792248249053955, "rewards/rejected": -4.452149391174316, "step": 5608 }, { "epoch": 0.65, "learning_rate": 1.0583441596787526e-07, "logits/chosen": -2.1054742336273193, "logits/rejected": -2.488062858581543, "logps/chosen": -508.16192626953125, "logps/rejected": -388.604248046875, "loss": 0.2532, "rewards/accuracies": 0.75, "rewards/chosen": -0.9517425298690796, "rewards/margins": 2.2195377349853516, "rewards/rejected": -3.1712803840637207, "step": 5609 }, { "epoch": 0.65, "learning_rate": 1.0579898429195701e-07, "logits/chosen": -2.430575370788574, "logits/rejected": -2.5163168907165527, "logps/chosen": -143.40626525878906, "logps/rejected": -150.57357788085938, "loss": 0.1891, "rewards/accuracies": 1.0, "rewards/chosen": -0.7942186594009399, "rewards/margins": 1.9873102903366089, "rewards/rejected": -2.781528949737549, "step": 5610 }, { "epoch": 0.65, "learning_rate": 1.0576355261603873e-07, "logits/chosen": -2.0882678031921387, "logits/rejected": -2.592775821685791, "logps/chosen": -430.4734802246094, "logps/rejected": -193.49856567382812, "loss": 0.4517, "rewards/accuracies": 0.875, "rewards/chosen": -0.3439328074455261, "rewards/margins": 2.3839666843414307, "rewards/rejected": -2.7278993129730225, "step": 5611 }, { "epoch": 0.65, "learning_rate": 1.0572812094012047e-07, "logits/chosen": -2.150719165802002, "logits/rejected": -1.9896302223205566, "logps/chosen": -163.9005126953125, "logps/rejected": -290.6529541015625, "loss": 0.234, "rewards/accuracies": 1.0, "rewards/chosen": -0.23867546021938324, "rewards/margins": 2.4266653060913086, "rewards/rejected": -2.6653409004211426, "step": 5612 }, { "epoch": 0.65, "learning_rate": 1.0569268926420219e-07, "logits/chosen": -2.1129679679870605, "logits/rejected": -2.525690793991089, "logps/chosen": -616.1929931640625, "logps/rejected": -230.37689208984375, "loss": 0.4099, "rewards/accuracies": 0.875, "rewards/chosen": -0.8796554207801819, "rewards/margins": 1.2876133918762207, "rewards/rejected": -2.167268753051758, "step": 5613 }, { "epoch": 0.65, "learning_rate": 1.0565725758828391e-07, "logits/chosen": -2.0620594024658203, "logits/rejected": -1.7740558385849, "logps/chosen": -203.27467346191406, "logps/rejected": -322.9368896484375, "loss": 0.4968, "rewards/accuracies": 0.625, "rewards/chosen": -0.27982550859451294, "rewards/margins": 1.2123559713363647, "rewards/rejected": -1.4921815395355225, "step": 5614 }, { "epoch": 0.65, "learning_rate": 1.0562182591236564e-07, "logits/chosen": -2.437512159347534, "logits/rejected": -2.418700933456421, "logps/chosen": -265.8520812988281, "logps/rejected": -285.62042236328125, "loss": 0.5924, "rewards/accuracies": 0.625, "rewards/chosen": -1.1879808902740479, "rewards/margins": 1.4863914251327515, "rewards/rejected": -2.674372673034668, "step": 5615 }, { "epoch": 0.65, "learning_rate": 1.0558639423644738e-07, "logits/chosen": -1.913207769393921, "logits/rejected": -2.1958112716674805, "logps/chosen": -327.0488586425781, "logps/rejected": -226.95755004882812, "loss": 0.4423, "rewards/accuracies": 0.75, "rewards/chosen": -0.5466669797897339, "rewards/margins": 2.5042710304260254, "rewards/rejected": -3.0509376525878906, "step": 5616 }, { "epoch": 0.65, "learning_rate": 1.055509625605291e-07, "logits/chosen": -2.89825177192688, "logits/rejected": -2.960782051086426, "logps/chosen": -140.8758087158203, "logps/rejected": -187.6630859375, "loss": 0.4172, "rewards/accuracies": 0.875, "rewards/chosen": -0.5988028645515442, "rewards/margins": 3.142946481704712, "rewards/rejected": -3.7417495250701904, "step": 5617 }, { "epoch": 0.65, "learning_rate": 1.0551553088461084e-07, "logits/chosen": -2.6265978813171387, "logits/rejected": -2.6625616550445557, "logps/chosen": -342.4358215332031, "logps/rejected": -273.6228942871094, "loss": 0.3229, "rewards/accuracies": 0.75, "rewards/chosen": -0.9187076687812805, "rewards/margins": 1.7407310009002686, "rewards/rejected": -2.6594386100769043, "step": 5618 }, { "epoch": 0.65, "learning_rate": 1.0548009920869256e-07, "logits/chosen": -2.0639142990112305, "logits/rejected": -2.0048723220825195, "logps/chosen": -192.9703826904297, "logps/rejected": -257.6492919921875, "loss": 0.2617, "rewards/accuracies": 0.875, "rewards/chosen": -0.951195240020752, "rewards/margins": 2.1815037727355957, "rewards/rejected": -3.1326990127563477, "step": 5619 }, { "epoch": 0.65, "learning_rate": 1.0544466753277429e-07, "logits/chosen": -2.1003029346466064, "logits/rejected": -2.1251637935638428, "logps/chosen": -264.3726501464844, "logps/rejected": -288.12408447265625, "loss": 0.2463, "rewards/accuracies": 0.875, "rewards/chosen": -0.22717618942260742, "rewards/margins": 2.2846388816833496, "rewards/rejected": -2.511815071105957, "step": 5620 }, { "epoch": 0.65, "learning_rate": 1.0540923585685603e-07, "logits/chosen": -2.468763589859009, "logits/rejected": -2.245225191116333, "logps/chosen": -267.2696533203125, "logps/rejected": -432.46551513671875, "loss": 0.2394, "rewards/accuracies": 0.875, "rewards/chosen": -0.3838314116001129, "rewards/margins": 3.3134491443634033, "rewards/rejected": -3.6972804069519043, "step": 5621 }, { "epoch": 0.65, "learning_rate": 1.0537380418093776e-07, "logits/chosen": -3.089451789855957, "logits/rejected": -2.968903064727783, "logps/chosen": -270.4367370605469, "logps/rejected": -271.61016845703125, "loss": 0.2842, "rewards/accuracies": 0.875, "rewards/chosen": -1.5795798301696777, "rewards/margins": 3.185483694076538, "rewards/rejected": -4.765063285827637, "step": 5622 }, { "epoch": 0.65, "learning_rate": 1.0533837250501948e-07, "logits/chosen": -2.679521322250366, "logits/rejected": -2.5554208755493164, "logps/chosen": -199.07595825195312, "logps/rejected": -191.9415283203125, "loss": 0.3335, "rewards/accuracies": 0.875, "rewards/chosen": -0.8686326146125793, "rewards/margins": 1.4549545049667358, "rewards/rejected": -2.32358717918396, "step": 5623 }, { "epoch": 0.65, "learning_rate": 1.0530294082910121e-07, "logits/chosen": -2.399594306945801, "logits/rejected": -2.4834470748901367, "logps/chosen": -145.66175842285156, "logps/rejected": -222.60934448242188, "loss": 0.2773, "rewards/accuracies": 0.875, "rewards/chosen": -0.17375105619430542, "rewards/margins": 2.1814072132110596, "rewards/rejected": -2.3551583290100098, "step": 5624 }, { "epoch": 0.65, "learning_rate": 1.0526750915318294e-07, "logits/chosen": -2.6118690967559814, "logits/rejected": -2.5002565383911133, "logps/chosen": -188.09170532226562, "logps/rejected": -255.79156494140625, "loss": 0.2816, "rewards/accuracies": 0.875, "rewards/chosen": -0.9079570770263672, "rewards/margins": 2.4687705039978027, "rewards/rejected": -3.37672758102417, "step": 5625 }, { "epoch": 0.65, "learning_rate": 1.0523207747726466e-07, "logits/chosen": -2.489267587661743, "logits/rejected": -2.6335031986236572, "logps/chosen": -365.405517578125, "logps/rejected": -300.27838134765625, "loss": 0.2642, "rewards/accuracies": 0.75, "rewards/chosen": -0.8886477947235107, "rewards/margins": 2.6124279499053955, "rewards/rejected": -3.501075506210327, "step": 5626 }, { "epoch": 0.65, "learning_rate": 1.0519664580134641e-07, "logits/chosen": -2.9813945293426514, "logits/rejected": -3.0162110328674316, "logps/chosen": -174.27999877929688, "logps/rejected": -214.72152709960938, "loss": 0.4907, "rewards/accuracies": 0.75, "rewards/chosen": -0.664664089679718, "rewards/margins": 1.9335359334945679, "rewards/rejected": -2.5981998443603516, "step": 5627 }, { "epoch": 0.65, "learning_rate": 1.0516121412542813e-07, "logits/chosen": -1.7221534252166748, "logits/rejected": -1.962601661682129, "logps/chosen": -340.79742431640625, "logps/rejected": -297.7884521484375, "loss": 0.5498, "rewards/accuracies": 0.75, "rewards/chosen": -0.9804275035858154, "rewards/margins": 1.3806710243225098, "rewards/rejected": -2.361098527908325, "step": 5628 }, { "epoch": 0.65, "learning_rate": 1.0512578244950986e-07, "logits/chosen": -1.6051430702209473, "logits/rejected": -1.6317306756973267, "logps/chosen": -271.6248474121094, "logps/rejected": -261.099365234375, "loss": 0.7323, "rewards/accuracies": 0.75, "rewards/chosen": -2.1928234100341797, "rewards/margins": 1.8139066696166992, "rewards/rejected": -4.006730556488037, "step": 5629 }, { "epoch": 0.65, "learning_rate": 1.0509035077359159e-07, "logits/chosen": -2.4578018188476562, "logits/rejected": -2.5516843795776367, "logps/chosen": -192.4944610595703, "logps/rejected": -159.92626953125, "loss": 0.5072, "rewards/accuracies": 0.75, "rewards/chosen": -0.8193400502204895, "rewards/margins": 1.9240024089813232, "rewards/rejected": -2.743342399597168, "step": 5630 }, { "epoch": 0.66, "learning_rate": 1.0505491909767331e-07, "logits/chosen": -2.8046793937683105, "logits/rejected": -2.7845048904418945, "logps/chosen": -327.2584228515625, "logps/rejected": -177.72872924804688, "loss": 0.1236, "rewards/accuracies": 1.0, "rewards/chosen": 0.28252071142196655, "rewards/margins": 2.890591621398926, "rewards/rejected": -2.6080708503723145, "step": 5631 }, { "epoch": 0.66, "learning_rate": 1.0501948742175504e-07, "logits/chosen": -2.2264182567596436, "logits/rejected": -2.323803663253784, "logps/chosen": -288.475830078125, "logps/rejected": -424.555419921875, "loss": 0.1292, "rewards/accuracies": 1.0, "rewards/chosen": -0.25940507650375366, "rewards/margins": 3.774869918823242, "rewards/rejected": -4.034275054931641, "step": 5632 }, { "epoch": 0.66, "learning_rate": 1.0498405574583678e-07, "logits/chosen": -2.7708659172058105, "logits/rejected": -2.708275318145752, "logps/chosen": -217.19029235839844, "logps/rejected": -280.57373046875, "loss": 0.3191, "rewards/accuracies": 0.875, "rewards/chosen": -0.47454315423965454, "rewards/margins": 3.2973673343658447, "rewards/rejected": -3.7719106674194336, "step": 5633 }, { "epoch": 0.66, "learning_rate": 1.049486240699185e-07, "logits/chosen": -2.003082275390625, "logits/rejected": -2.1110317707061768, "logps/chosen": -338.17498779296875, "logps/rejected": -311.8550720214844, "loss": 0.3596, "rewards/accuracies": 0.75, "rewards/chosen": -1.0051870346069336, "rewards/margins": 1.9960434436798096, "rewards/rejected": -3.0012307167053223, "step": 5634 }, { "epoch": 0.66, "learning_rate": 1.0491319239400024e-07, "logits/chosen": -2.283311128616333, "logits/rejected": -2.5141074657440186, "logps/chosen": -295.2777099609375, "logps/rejected": -213.91244506835938, "loss": 0.1806, "rewards/accuracies": 1.0, "rewards/chosen": -0.31349262595176697, "rewards/margins": 1.8165096044540405, "rewards/rejected": -2.13000226020813, "step": 5635 }, { "epoch": 0.66, "learning_rate": 1.0487776071808196e-07, "logits/chosen": -2.211575984954834, "logits/rejected": -2.212348699569702, "logps/chosen": -295.5931091308594, "logps/rejected": -309.7364501953125, "loss": 0.1625, "rewards/accuracies": 1.0, "rewards/chosen": 0.3619875907897949, "rewards/margins": 2.20029878616333, "rewards/rejected": -1.8383111953735352, "step": 5636 }, { "epoch": 0.66, "learning_rate": 1.0484232904216368e-07, "logits/chosen": -2.047206401824951, "logits/rejected": -2.2447280883789062, "logps/chosen": -446.1842041015625, "logps/rejected": -384.1418762207031, "loss": 0.3096, "rewards/accuracies": 0.875, "rewards/chosen": -0.926439642906189, "rewards/margins": 2.4248485565185547, "rewards/rejected": -3.351288318634033, "step": 5637 }, { "epoch": 0.66, "learning_rate": 1.0480689736624542e-07, "logits/chosen": -1.9269683361053467, "logits/rejected": -2.0173122882843018, "logps/chosen": -162.9124755859375, "logps/rejected": -205.02239990234375, "loss": 0.2426, "rewards/accuracies": 1.0, "rewards/chosen": -0.784050464630127, "rewards/margins": 2.356318950653076, "rewards/rejected": -3.140369176864624, "step": 5638 }, { "epoch": 0.66, "learning_rate": 1.0477146569032715e-07, "logits/chosen": -2.680712938308716, "logits/rejected": -2.7194597721099854, "logps/chosen": -208.26791381835938, "logps/rejected": -129.04966735839844, "loss": 0.3192, "rewards/accuracies": 1.0, "rewards/chosen": -0.540419340133667, "rewards/margins": 1.8171100616455078, "rewards/rejected": -2.357529401779175, "step": 5639 }, { "epoch": 0.66, "learning_rate": 1.0473603401440887e-07, "logits/chosen": -2.1089446544647217, "logits/rejected": -2.269528865814209, "logps/chosen": -195.4993133544922, "logps/rejected": -254.5201416015625, "loss": 0.4793, "rewards/accuracies": 0.75, "rewards/chosen": -1.7801486253738403, "rewards/margins": 0.9757960438728333, "rewards/rejected": -2.7559447288513184, "step": 5640 }, { "epoch": 0.66, "learning_rate": 1.0470060233849061e-07, "logits/chosen": -1.9523968696594238, "logits/rejected": -1.7849524021148682, "logps/chosen": -306.89752197265625, "logps/rejected": -409.33428955078125, "loss": 0.1804, "rewards/accuracies": 1.0, "rewards/chosen": -0.7026433944702148, "rewards/margins": 3.624249219894409, "rewards/rejected": -4.326892852783203, "step": 5641 }, { "epoch": 0.66, "learning_rate": 1.0466517066257233e-07, "logits/chosen": -2.055495023727417, "logits/rejected": -1.9896587133407593, "logps/chosen": -207.96058654785156, "logps/rejected": -214.98959350585938, "loss": 0.2722, "rewards/accuracies": 1.0, "rewards/chosen": -0.7799131870269775, "rewards/margins": 2.2086238861083984, "rewards/rejected": -2.988537073135376, "step": 5642 }, { "epoch": 0.66, "learning_rate": 1.0462973898665405e-07, "logits/chosen": -2.0959582328796387, "logits/rejected": -2.104794979095459, "logps/chosen": -351.9902038574219, "logps/rejected": -312.3078918457031, "loss": 1.2059, "rewards/accuracies": 0.375, "rewards/chosen": -2.446608304977417, "rewards/margins": 0.060374438762664795, "rewards/rejected": -2.5069830417633057, "step": 5643 }, { "epoch": 0.66, "learning_rate": 1.0459430731073579e-07, "logits/chosen": -2.362825393676758, "logits/rejected": -2.298342704772949, "logps/chosen": -273.7911376953125, "logps/rejected": -340.9767761230469, "loss": 0.3405, "rewards/accuracies": 0.875, "rewards/chosen": -1.240666151046753, "rewards/margins": 1.6644459962844849, "rewards/rejected": -2.9051120281219482, "step": 5644 }, { "epoch": 0.66, "learning_rate": 1.0455887563481752e-07, "logits/chosen": -1.8483754396438599, "logits/rejected": -2.042412757873535, "logps/chosen": -438.0550537109375, "logps/rejected": -469.2054138183594, "loss": 0.2888, "rewards/accuracies": 0.875, "rewards/chosen": -0.5498804450035095, "rewards/margins": 3.355186700820923, "rewards/rejected": -3.905067205429077, "step": 5645 }, { "epoch": 0.66, "learning_rate": 1.0452344395889926e-07, "logits/chosen": -2.627197742462158, "logits/rejected": -2.6115307807922363, "logps/chosen": -310.6881408691406, "logps/rejected": -317.25439453125, "loss": 0.2185, "rewards/accuracies": 0.875, "rewards/chosen": -0.7008978128433228, "rewards/margins": 2.742345094680786, "rewards/rejected": -3.4432430267333984, "step": 5646 }, { "epoch": 0.66, "learning_rate": 1.0448801228298098e-07, "logits/chosen": -2.601165294647217, "logits/rejected": -2.4152183532714844, "logps/chosen": -252.05630493164062, "logps/rejected": -319.3289794921875, "loss": 0.1687, "rewards/accuracies": 1.0, "rewards/chosen": -0.5914105176925659, "rewards/margins": 4.035156726837158, "rewards/rejected": -4.6265668869018555, "step": 5647 }, { "epoch": 0.66, "learning_rate": 1.044525806070627e-07, "logits/chosen": -2.5110795497894287, "logits/rejected": -2.6164026260375977, "logps/chosen": -148.21534729003906, "logps/rejected": -300.07098388671875, "loss": 0.4467, "rewards/accuracies": 0.75, "rewards/chosen": -1.7840666770935059, "rewards/margins": 3.925251007080078, "rewards/rejected": -5.709317684173584, "step": 5648 }, { "epoch": 0.66, "learning_rate": 1.0441714893114444e-07, "logits/chosen": -1.8799796104431152, "logits/rejected": -1.8996953964233398, "logps/chosen": -245.6571502685547, "logps/rejected": -262.1639404296875, "loss": 0.5568, "rewards/accuracies": 0.625, "rewards/chosen": -0.7814924716949463, "rewards/margins": 1.6691805124282837, "rewards/rejected": -2.4506731033325195, "step": 5649 }, { "epoch": 0.66, "learning_rate": 1.0438171725522616e-07, "logits/chosen": -2.1249635219573975, "logits/rejected": -2.4611122608184814, "logps/chosen": -295.913818359375, "logps/rejected": -288.91845703125, "loss": 0.8687, "rewards/accuracies": 0.625, "rewards/chosen": -1.436316967010498, "rewards/margins": 1.2291241884231567, "rewards/rejected": -2.6654410362243652, "step": 5650 }, { "epoch": 0.66, "learning_rate": 1.043462855793079e-07, "logits/chosen": -1.8214809894561768, "logits/rejected": -2.228898286819458, "logps/chosen": -271.5148620605469, "logps/rejected": -163.38751220703125, "loss": 0.3916, "rewards/accuracies": 0.875, "rewards/chosen": -0.882966160774231, "rewards/margins": 1.46793794631958, "rewards/rejected": -2.3509039878845215, "step": 5651 }, { "epoch": 0.66, "learning_rate": 1.0431085390338963e-07, "logits/chosen": -2.4616811275482178, "logits/rejected": -2.5256142616271973, "logps/chosen": -326.0119323730469, "logps/rejected": -386.1529846191406, "loss": 0.3041, "rewards/accuracies": 0.875, "rewards/chosen": -1.5476267337799072, "rewards/margins": 2.62630558013916, "rewards/rejected": -4.1739325523376465, "step": 5652 }, { "epoch": 0.66, "learning_rate": 1.0427542222747135e-07, "logits/chosen": -2.090790271759033, "logits/rejected": -2.1614456176757812, "logps/chosen": -201.30613708496094, "logps/rejected": -179.7163848876953, "loss": 0.8695, "rewards/accuracies": 0.75, "rewards/chosen": -1.0840425491333008, "rewards/margins": 1.5900282859802246, "rewards/rejected": -2.6740708351135254, "step": 5653 }, { "epoch": 0.66, "learning_rate": 1.0423999055155308e-07, "logits/chosen": -2.6787147521972656, "logits/rejected": -2.8297760486602783, "logps/chosen": -311.9983825683594, "logps/rejected": -309.57916259765625, "loss": 0.4029, "rewards/accuracies": 0.75, "rewards/chosen": -0.8317017555236816, "rewards/margins": 3.0495874881744385, "rewards/rejected": -3.881289482116699, "step": 5654 }, { "epoch": 0.66, "learning_rate": 1.0420455887563481e-07, "logits/chosen": -1.9957685470581055, "logits/rejected": -2.167116165161133, "logps/chosen": -372.3825378417969, "logps/rejected": -334.19384765625, "loss": 0.3331, "rewards/accuracies": 0.75, "rewards/chosen": -0.8929535150527954, "rewards/margins": 2.9981539249420166, "rewards/rejected": -3.8911073207855225, "step": 5655 }, { "epoch": 0.66, "learning_rate": 1.0416912719971655e-07, "logits/chosen": -1.8710715770721436, "logits/rejected": -1.8476738929748535, "logps/chosen": -380.03253173828125, "logps/rejected": -436.19329833984375, "loss": 0.2688, "rewards/accuracies": 0.875, "rewards/chosen": -1.3017499446868896, "rewards/margins": 2.741666078567505, "rewards/rejected": -4.0434160232543945, "step": 5656 }, { "epoch": 0.66, "learning_rate": 1.0413369552379828e-07, "logits/chosen": -2.547333002090454, "logits/rejected": -2.612338066101074, "logps/chosen": -148.70590209960938, "logps/rejected": -105.24624633789062, "loss": 0.6887, "rewards/accuracies": 0.625, "rewards/chosen": -1.328561544418335, "rewards/margins": 0.8790522813796997, "rewards/rejected": -2.207613945007324, "step": 5657 }, { "epoch": 0.66, "learning_rate": 1.0409826384788e-07, "logits/chosen": -2.8819124698638916, "logits/rejected": -2.959810733795166, "logps/chosen": -348.1834716796875, "logps/rejected": -229.48875427246094, "loss": 0.9472, "rewards/accuracies": 0.5, "rewards/chosen": -0.7797030210494995, "rewards/margins": 1.0191231966018677, "rewards/rejected": -1.7988262176513672, "step": 5658 }, { "epoch": 0.66, "learning_rate": 1.0406283217196173e-07, "logits/chosen": -2.276402473449707, "logits/rejected": -2.19966721534729, "logps/chosen": -396.5199279785156, "logps/rejected": -417.11883544921875, "loss": 0.4427, "rewards/accuracies": 0.75, "rewards/chosen": -1.9029924869537354, "rewards/margins": 2.0527846813201904, "rewards/rejected": -3.955777168273926, "step": 5659 }, { "epoch": 0.66, "learning_rate": 1.0402740049604345e-07, "logits/chosen": -2.93898344039917, "logits/rejected": -2.9107820987701416, "logps/chosen": -296.6107177734375, "logps/rejected": -234.84799194335938, "loss": 0.3557, "rewards/accuracies": 0.875, "rewards/chosen": -0.46664807200431824, "rewards/margins": 2.44972562789917, "rewards/rejected": -2.9163737297058105, "step": 5660 }, { "epoch": 0.66, "learning_rate": 1.0399196882012518e-07, "logits/chosen": -2.2126054763793945, "logits/rejected": -2.143376350402832, "logps/chosen": -266.48681640625, "logps/rejected": -386.7888488769531, "loss": 0.3071, "rewards/accuracies": 0.875, "rewards/chosen": -0.40486711263656616, "rewards/margins": 2.5140881538391113, "rewards/rejected": -2.918955087661743, "step": 5661 }, { "epoch": 0.66, "learning_rate": 1.0395653714420692e-07, "logits/chosen": -2.3556840419769287, "logits/rejected": -2.47299861907959, "logps/chosen": -322.6022033691406, "logps/rejected": -288.6607666015625, "loss": 0.2853, "rewards/accuracies": 0.875, "rewards/chosen": -0.7269226312637329, "rewards/margins": 2.170443058013916, "rewards/rejected": -2.8973653316497803, "step": 5662 }, { "epoch": 0.66, "learning_rate": 1.0392110546828865e-07, "logits/chosen": -2.42887544631958, "logits/rejected": -2.5546767711639404, "logps/chosen": -206.69461059570312, "logps/rejected": -384.1551818847656, "loss": 0.3659, "rewards/accuracies": 0.875, "rewards/chosen": -1.2105356454849243, "rewards/margins": 2.2735595703125, "rewards/rejected": -3.484095335006714, "step": 5663 }, { "epoch": 0.66, "learning_rate": 1.0388567379237038e-07, "logits/chosen": -2.636662006378174, "logits/rejected": -2.8372044563293457, "logps/chosen": -156.79464721679688, "logps/rejected": -190.98333740234375, "loss": 0.3509, "rewards/accuracies": 0.75, "rewards/chosen": -0.5529892444610596, "rewards/margins": 3.260711431503296, "rewards/rejected": -3.8137006759643555, "step": 5664 }, { "epoch": 0.66, "learning_rate": 1.038502421164521e-07, "logits/chosen": -2.517970323562622, "logits/rejected": -2.7369227409362793, "logps/chosen": -684.9908447265625, "logps/rejected": -559.7181396484375, "loss": 0.0727, "rewards/accuracies": 1.0, "rewards/chosen": -0.7034969329833984, "rewards/margins": 4.921209812164307, "rewards/rejected": -5.624707221984863, "step": 5665 }, { "epoch": 0.66, "learning_rate": 1.0381481044053383e-07, "logits/chosen": -1.8254623413085938, "logits/rejected": -2.3381237983703613, "logps/chosen": -308.38671875, "logps/rejected": -208.96261596679688, "loss": 0.4206, "rewards/accuracies": 0.75, "rewards/chosen": -0.794208824634552, "rewards/margins": 2.7997324466705322, "rewards/rejected": -3.5939412117004395, "step": 5666 }, { "epoch": 0.66, "learning_rate": 1.0377937876461556e-07, "logits/chosen": -2.396946907043457, "logits/rejected": -2.1706907749176025, "logps/chosen": -224.25955200195312, "logps/rejected": -310.0357971191406, "loss": 0.6437, "rewards/accuracies": 0.75, "rewards/chosen": -0.8369054794311523, "rewards/margins": 1.8134957551956177, "rewards/rejected": -2.6504011154174805, "step": 5667 }, { "epoch": 0.66, "learning_rate": 1.0374394708869729e-07, "logits/chosen": -2.5202484130859375, "logits/rejected": -2.5895838737487793, "logps/chosen": -238.8297119140625, "logps/rejected": -225.16664123535156, "loss": 0.3936, "rewards/accuracies": 0.75, "rewards/chosen": -1.3433902263641357, "rewards/margins": 3.520153522491455, "rewards/rejected": -4.86354398727417, "step": 5668 }, { "epoch": 0.66, "learning_rate": 1.0370851541277903e-07, "logits/chosen": -1.8457984924316406, "logits/rejected": -1.8705971240997314, "logps/chosen": -461.7740173339844, "logps/rejected": -374.1877746582031, "loss": 0.4043, "rewards/accuracies": 0.625, "rewards/chosen": -0.3605010509490967, "rewards/margins": 2.196782350540161, "rewards/rejected": -2.557283401489258, "step": 5669 }, { "epoch": 0.66, "learning_rate": 1.0367308373686075e-07, "logits/chosen": -2.7027058601379395, "logits/rejected": -2.317307472229004, "logps/chosen": -213.99588012695312, "logps/rejected": -242.6956329345703, "loss": 0.1211, "rewards/accuracies": 1.0, "rewards/chosen": -0.4589482247829437, "rewards/margins": 3.606396198272705, "rewards/rejected": -4.065344333648682, "step": 5670 }, { "epoch": 0.66, "learning_rate": 1.0363765206094247e-07, "logits/chosen": -2.751953363418579, "logits/rejected": -2.712613582611084, "logps/chosen": -228.58518981933594, "logps/rejected": -240.92710876464844, "loss": 0.9828, "rewards/accuracies": 0.625, "rewards/chosen": -1.344312071800232, "rewards/margins": 0.4630223214626312, "rewards/rejected": -1.807334303855896, "step": 5671 }, { "epoch": 0.66, "learning_rate": 1.0360222038502421e-07, "logits/chosen": -1.7498023509979248, "logits/rejected": -1.9027984142303467, "logps/chosen": -487.6247863769531, "logps/rejected": -404.3485412597656, "loss": 0.2184, "rewards/accuracies": 1.0, "rewards/chosen": 0.006755739450454712, "rewards/margins": 1.8866910934448242, "rewards/rejected": -1.8799352645874023, "step": 5672 }, { "epoch": 0.66, "learning_rate": 1.0356678870910593e-07, "logits/chosen": -2.5228075981140137, "logits/rejected": -2.5785574913024902, "logps/chosen": -209.76174926757812, "logps/rejected": -208.15707397460938, "loss": 0.4904, "rewards/accuracies": 0.875, "rewards/chosen": -1.2170389890670776, "rewards/margins": 2.4760961532592773, "rewards/rejected": -3.6931354999542236, "step": 5673 }, { "epoch": 0.66, "learning_rate": 1.0353135703318768e-07, "logits/chosen": -2.896099090576172, "logits/rejected": -2.8547146320343018, "logps/chosen": -195.39804077148438, "logps/rejected": -324.66510009765625, "loss": 0.2585, "rewards/accuracies": 1.0, "rewards/chosen": -0.05571621656417847, "rewards/margins": 2.2246785163879395, "rewards/rejected": -2.2803945541381836, "step": 5674 }, { "epoch": 0.66, "learning_rate": 1.034959253572694e-07, "logits/chosen": -1.964308261871338, "logits/rejected": -2.215885877609253, "logps/chosen": -370.36663818359375, "logps/rejected": -287.57861328125, "loss": 0.1656, "rewards/accuracies": 0.875, "rewards/chosen": 0.24088440835475922, "rewards/margins": 2.769105911254883, "rewards/rejected": -2.528221368789673, "step": 5675 }, { "epoch": 0.66, "learning_rate": 1.0346049368135112e-07, "logits/chosen": -2.484022855758667, "logits/rejected": -2.3083415031433105, "logps/chosen": -402.21075439453125, "logps/rejected": -297.1026306152344, "loss": 0.3879, "rewards/accuracies": 0.875, "rewards/chosen": -0.8095904588699341, "rewards/margins": 1.2451809644699097, "rewards/rejected": -2.0547714233398438, "step": 5676 }, { "epoch": 0.66, "learning_rate": 1.0342506200543284e-07, "logits/chosen": -2.4121782779693604, "logits/rejected": -2.3747212886810303, "logps/chosen": -244.59390258789062, "logps/rejected": -224.47940063476562, "loss": 0.878, "rewards/accuracies": 0.5, "rewards/chosen": -1.6101865768432617, "rewards/margins": 0.684607744216919, "rewards/rejected": -2.2947943210601807, "step": 5677 }, { "epoch": 0.66, "learning_rate": 1.0338963032951458e-07, "logits/chosen": -2.1911585330963135, "logits/rejected": -2.1446990966796875, "logps/chosen": -333.9509582519531, "logps/rejected": -365.362060546875, "loss": 0.2902, "rewards/accuracies": 0.875, "rewards/chosen": -0.9364689588546753, "rewards/margins": 2.8088223934173584, "rewards/rejected": -3.7452917098999023, "step": 5678 }, { "epoch": 0.66, "learning_rate": 1.033541986535963e-07, "logits/chosen": -2.4206061363220215, "logits/rejected": -2.5407021045684814, "logps/chosen": -169.20144653320312, "logps/rejected": -144.4083709716797, "loss": 0.2362, "rewards/accuracies": 1.0, "rewards/chosen": -0.4208838641643524, "rewards/margins": 2.7999186515808105, "rewards/rejected": -3.2208023071289062, "step": 5679 }, { "epoch": 0.66, "learning_rate": 1.0331876697767805e-07, "logits/chosen": -1.7812180519104004, "logits/rejected": -1.8153513669967651, "logps/chosen": -235.05714416503906, "logps/rejected": -280.4712829589844, "loss": 0.5536, "rewards/accuracies": 0.5, "rewards/chosen": -1.0953407287597656, "rewards/margins": 1.6475452184677124, "rewards/rejected": -2.7428860664367676, "step": 5680 }, { "epoch": 0.66, "learning_rate": 1.0328333530175977e-07, "logits/chosen": -2.259068012237549, "logits/rejected": -2.455493927001953, "logps/chosen": -155.1253662109375, "logps/rejected": -135.94676208496094, "loss": 0.9605, "rewards/accuracies": 0.625, "rewards/chosen": -1.9609016180038452, "rewards/margins": 0.9500251412391663, "rewards/rejected": -2.9109268188476562, "step": 5681 }, { "epoch": 0.66, "learning_rate": 1.032479036258415e-07, "logits/chosen": -2.591142177581787, "logits/rejected": -2.2102584838867188, "logps/chosen": -309.0902404785156, "logps/rejected": -543.540771484375, "loss": 0.4866, "rewards/accuracies": 0.625, "rewards/chosen": -0.9471275210380554, "rewards/margins": 2.0250461101531982, "rewards/rejected": -2.9721734523773193, "step": 5682 }, { "epoch": 0.66, "learning_rate": 1.0321247194992323e-07, "logits/chosen": -2.267996311187744, "logits/rejected": -1.9789023399353027, "logps/chosen": -165.4520263671875, "logps/rejected": -317.72698974609375, "loss": 0.0981, "rewards/accuracies": 1.0, "rewards/chosen": -1.0607959032058716, "rewards/margins": 3.8675308227539062, "rewards/rejected": -4.928326606750488, "step": 5683 }, { "epoch": 0.66, "learning_rate": 1.0317704027400495e-07, "logits/chosen": -1.8548684120178223, "logits/rejected": -1.9856256246566772, "logps/chosen": -297.42047119140625, "logps/rejected": -306.18170166015625, "loss": 0.3967, "rewards/accuracies": 0.875, "rewards/chosen": -1.106410264968872, "rewards/margins": 1.5973560810089111, "rewards/rejected": -2.703766345977783, "step": 5684 }, { "epoch": 0.66, "learning_rate": 1.0314160859808667e-07, "logits/chosen": -1.9887953996658325, "logits/rejected": -2.2334275245666504, "logps/chosen": -307.009033203125, "logps/rejected": -325.21746826171875, "loss": 0.3754, "rewards/accuracies": 0.75, "rewards/chosen": -0.5989624261856079, "rewards/margins": 1.8741008043289185, "rewards/rejected": -2.4730634689331055, "step": 5685 }, { "epoch": 0.66, "learning_rate": 1.0310617692216842e-07, "logits/chosen": -2.2823076248168945, "logits/rejected": -2.152216672897339, "logps/chosen": -306.51507568359375, "logps/rejected": -272.28997802734375, "loss": 0.1843, "rewards/accuracies": 1.0, "rewards/chosen": -0.537173867225647, "rewards/margins": 2.929534435272217, "rewards/rejected": -3.466708183288574, "step": 5686 }, { "epoch": 0.66, "learning_rate": 1.0307074524625014e-07, "logits/chosen": -2.3854639530181885, "logits/rejected": -2.104041814804077, "logps/chosen": -310.034423828125, "logps/rejected": -290.28680419921875, "loss": 0.4197, "rewards/accuracies": 0.875, "rewards/chosen": -1.3259224891662598, "rewards/margins": 1.8734439611434937, "rewards/rejected": -3.1993660926818848, "step": 5687 }, { "epoch": 0.66, "learning_rate": 1.0303531357033187e-07, "logits/chosen": -3.0074820518493652, "logits/rejected": -3.0343217849731445, "logps/chosen": -171.9145050048828, "logps/rejected": -221.28074645996094, "loss": 0.1307, "rewards/accuracies": 1.0, "rewards/chosen": -0.1393604725599289, "rewards/margins": 2.58916974067688, "rewards/rejected": -2.728530168533325, "step": 5688 }, { "epoch": 0.66, "learning_rate": 1.029998818944136e-07, "logits/chosen": -2.7404379844665527, "logits/rejected": -2.3216776847839355, "logps/chosen": -268.2242431640625, "logps/rejected": -407.40185546875, "loss": 0.2884, "rewards/accuracies": 0.75, "rewards/chosen": -0.9922752976417542, "rewards/margins": 1.7920259237289429, "rewards/rejected": -2.784301280975342, "step": 5689 }, { "epoch": 0.66, "learning_rate": 1.0296445021849532e-07, "logits/chosen": -1.9953800439834595, "logits/rejected": -2.258453845977783, "logps/chosen": -332.80487060546875, "logps/rejected": -301.6964111328125, "loss": 0.22, "rewards/accuracies": 1.0, "rewards/chosen": -0.7347323894500732, "rewards/margins": 2.3722028732299805, "rewards/rejected": -3.1069352626800537, "step": 5690 }, { "epoch": 0.66, "learning_rate": 1.0292901854257707e-07, "logits/chosen": -2.3219385147094727, "logits/rejected": -2.3723931312561035, "logps/chosen": -460.2960205078125, "logps/rejected": -335.94158935546875, "loss": 0.1729, "rewards/accuracies": 1.0, "rewards/chosen": -0.3464643061161041, "rewards/margins": 2.774585485458374, "rewards/rejected": -3.1210501194000244, "step": 5691 }, { "epoch": 0.66, "learning_rate": 1.028935868666588e-07, "logits/chosen": -2.622586250305176, "logits/rejected": -2.823594808578491, "logps/chosen": -307.8970947265625, "logps/rejected": -544.295654296875, "loss": 0.1252, "rewards/accuracies": 1.0, "rewards/chosen": -0.4733589291572571, "rewards/margins": 3.605354070663452, "rewards/rejected": -4.0787129402160645, "step": 5692 }, { "epoch": 0.66, "learning_rate": 1.0285815519074052e-07, "logits/chosen": -1.9538044929504395, "logits/rejected": -2.387094020843506, "logps/chosen": -571.9424438476562, "logps/rejected": -269.4619445800781, "loss": 0.4342, "rewards/accuracies": 0.75, "rewards/chosen": -0.7312449216842651, "rewards/margins": 2.3624212741851807, "rewards/rejected": -3.0936663150787354, "step": 5693 }, { "epoch": 0.66, "learning_rate": 1.0282272351482225e-07, "logits/chosen": -2.7999067306518555, "logits/rejected": -2.830901622772217, "logps/chosen": -224.41917419433594, "logps/rejected": -262.5125732421875, "loss": 0.2892, "rewards/accuracies": 0.875, "rewards/chosen": -1.1468156576156616, "rewards/margins": 1.7163158655166626, "rewards/rejected": -2.863131523132324, "step": 5694 }, { "epoch": 0.66, "learning_rate": 1.0278729183890397e-07, "logits/chosen": -2.0202271938323975, "logits/rejected": -2.181016206741333, "logps/chosen": -333.349853515625, "logps/rejected": -324.0946044921875, "loss": 0.4828, "rewards/accuracies": 0.875, "rewards/chosen": -0.9875384569168091, "rewards/margins": 1.2193856239318848, "rewards/rejected": -2.2069242000579834, "step": 5695 }, { "epoch": 0.66, "learning_rate": 1.027518601629857e-07, "logits/chosen": -2.4934492111206055, "logits/rejected": -2.474184989929199, "logps/chosen": -348.32623291015625, "logps/rejected": -319.3465576171875, "loss": 0.0983, "rewards/accuracies": 1.0, "rewards/chosen": -0.23315924406051636, "rewards/margins": 3.348158359527588, "rewards/rejected": -3.58131742477417, "step": 5696 }, { "epoch": 0.66, "learning_rate": 1.0271642848706745e-07, "logits/chosen": -2.2634634971618652, "logits/rejected": -2.402512550354004, "logps/chosen": -437.80609130859375, "logps/rejected": -332.95428466796875, "loss": 0.1938, "rewards/accuracies": 0.875, "rewards/chosen": -0.9240847826004028, "rewards/margins": 3.324362277984619, "rewards/rejected": -4.248447418212891, "step": 5697 }, { "epoch": 0.66, "learning_rate": 1.0268099681114917e-07, "logits/chosen": -2.6575422286987305, "logits/rejected": -2.4704792499542236, "logps/chosen": -222.37686157226562, "logps/rejected": -295.86529541015625, "loss": 0.306, "rewards/accuracies": 0.75, "rewards/chosen": -1.2494723796844482, "rewards/margins": 2.337425947189331, "rewards/rejected": -3.5868983268737793, "step": 5698 }, { "epoch": 0.66, "learning_rate": 1.0264556513523089e-07, "logits/chosen": -1.5050008296966553, "logits/rejected": -2.2734344005584717, "logps/chosen": -560.3615112304688, "logps/rejected": -187.5242919921875, "loss": 0.1381, "rewards/accuracies": 0.875, "rewards/chosen": -0.27771225571632385, "rewards/margins": 3.4961953163146973, "rewards/rejected": -3.773907423019409, "step": 5699 }, { "epoch": 0.66, "learning_rate": 1.0261013345931263e-07, "logits/chosen": -2.540419816970825, "logits/rejected": -2.593881607055664, "logps/chosen": -168.9939727783203, "logps/rejected": -243.46568298339844, "loss": 0.1095, "rewards/accuracies": 1.0, "rewards/chosen": 0.19797229766845703, "rewards/margins": 3.935375928878784, "rewards/rejected": -3.737403392791748, "step": 5700 }, { "epoch": 0.66, "learning_rate": 1.0257470178339435e-07, "logits/chosen": -1.9824333190917969, "logits/rejected": -1.903713583946228, "logps/chosen": -235.05239868164062, "logps/rejected": -265.193115234375, "loss": 0.6506, "rewards/accuracies": 0.375, "rewards/chosen": -1.698386549949646, "rewards/margins": 1.94468092918396, "rewards/rejected": -3.6430673599243164, "step": 5701 }, { "epoch": 0.66, "learning_rate": 1.0253927010747607e-07, "logits/chosen": -2.522278070449829, "logits/rejected": -2.6037659645080566, "logps/chosen": -158.93414306640625, "logps/rejected": -193.7322998046875, "loss": 1.6273, "rewards/accuracies": 0.625, "rewards/chosen": -2.752061367034912, "rewards/margins": 0.6176124215126038, "rewards/rejected": -3.369673490524292, "step": 5702 }, { "epoch": 0.66, "learning_rate": 1.0250383843155782e-07, "logits/chosen": -2.468505859375, "logits/rejected": -2.437094211578369, "logps/chosen": -271.2071228027344, "logps/rejected": -220.4228515625, "loss": 0.7156, "rewards/accuracies": 0.75, "rewards/chosen": -1.198683500289917, "rewards/margins": 3.157135248184204, "rewards/rejected": -4.355818748474121, "step": 5703 }, { "epoch": 0.66, "learning_rate": 1.0246840675563954e-07, "logits/chosen": -1.9327147006988525, "logits/rejected": -1.8885383605957031, "logps/chosen": -220.66006469726562, "logps/rejected": -308.0321350097656, "loss": 0.4349, "rewards/accuracies": 0.875, "rewards/chosen": -0.9108067750930786, "rewards/margins": 2.7351884841918945, "rewards/rejected": -3.6459949016571045, "step": 5704 }, { "epoch": 0.66, "learning_rate": 1.0243297507972126e-07, "logits/chosen": -1.7174080610275269, "logits/rejected": -1.9539053440093994, "logps/chosen": -332.41943359375, "logps/rejected": -347.6141052246094, "loss": 0.3344, "rewards/accuracies": 0.875, "rewards/chosen": -1.293500304222107, "rewards/margins": 1.8750184774398804, "rewards/rejected": -3.168518543243408, "step": 5705 }, { "epoch": 0.66, "learning_rate": 1.02397543403803e-07, "logits/chosen": -2.7614777088165283, "logits/rejected": -2.3465769290924072, "logps/chosen": -201.98431396484375, "logps/rejected": -291.85626220703125, "loss": 0.2932, "rewards/accuracies": 0.75, "rewards/chosen": -0.49604740738868713, "rewards/margins": 2.3633601665496826, "rewards/rejected": -2.859407663345337, "step": 5706 }, { "epoch": 0.66, "learning_rate": 1.0236211172788472e-07, "logits/chosen": -2.2498862743377686, "logits/rejected": -2.203916311264038, "logps/chosen": -307.3746032714844, "logps/rejected": -279.220947265625, "loss": 0.5872, "rewards/accuracies": 0.75, "rewards/chosen": -0.9476866126060486, "rewards/margins": 2.216930866241455, "rewards/rejected": -3.1646173000335693, "step": 5707 }, { "epoch": 0.66, "learning_rate": 1.0232668005196644e-07, "logits/chosen": -1.9243485927581787, "logits/rejected": -2.198871612548828, "logps/chosen": -439.82379150390625, "logps/rejected": -343.59521484375, "loss": 0.2202, "rewards/accuracies": 0.875, "rewards/chosen": -1.5185532569885254, "rewards/margins": 2.700611114501953, "rewards/rejected": -4.2191643714904785, "step": 5708 }, { "epoch": 0.66, "learning_rate": 1.0229124837604819e-07, "logits/chosen": -1.93864905834198, "logits/rejected": -1.6575130224227905, "logps/chosen": -208.60574340820312, "logps/rejected": -286.4598388671875, "loss": 0.3951, "rewards/accuracies": 0.875, "rewards/chosen": -1.2352862358093262, "rewards/margins": 1.8691368103027344, "rewards/rejected": -3.1044230461120605, "step": 5709 }, { "epoch": 0.66, "learning_rate": 1.0225581670012991e-07, "logits/chosen": -2.609442949295044, "logits/rejected": -2.318737030029297, "logps/chosen": -230.5745849609375, "logps/rejected": -729.001708984375, "loss": 0.0625, "rewards/accuracies": 1.0, "rewards/chosen": -1.271448016166687, "rewards/margins": 4.682802200317383, "rewards/rejected": -5.954249858856201, "step": 5710 }, { "epoch": 0.66, "learning_rate": 1.0222038502421165e-07, "logits/chosen": -2.660043954849243, "logits/rejected": -2.6118857860565186, "logps/chosen": -259.57904052734375, "logps/rejected": -221.65069580078125, "loss": 0.6422, "rewards/accuracies": 0.625, "rewards/chosen": -1.1934735774993896, "rewards/margins": 0.8994506001472473, "rewards/rejected": -2.092924118041992, "step": 5711 }, { "epoch": 0.66, "learning_rate": 1.0218495334829337e-07, "logits/chosen": -2.278362274169922, "logits/rejected": -2.3269429206848145, "logps/chosen": -309.8248291015625, "logps/rejected": -310.9016418457031, "loss": 0.1855, "rewards/accuracies": 1.0, "rewards/chosen": -0.7453591227531433, "rewards/margins": 2.532506227493286, "rewards/rejected": -3.277865409851074, "step": 5712 }, { "epoch": 0.66, "learning_rate": 1.0214952167237509e-07, "logits/chosen": -1.9521524906158447, "logits/rejected": -2.203430652618408, "logps/chosen": -290.2025451660156, "logps/rejected": -286.20867919921875, "loss": 0.1967, "rewards/accuracies": 1.0, "rewards/chosen": -0.10247907787561417, "rewards/margins": 1.7325737476348877, "rewards/rejected": -1.8350528478622437, "step": 5713 }, { "epoch": 0.66, "learning_rate": 1.0211408999645681e-07, "logits/chosen": -1.9695463180541992, "logits/rejected": -2.184995174407959, "logps/chosen": -187.3121337890625, "logps/rejected": -159.37315368652344, "loss": 0.5797, "rewards/accuracies": 0.625, "rewards/chosen": -1.1828334331512451, "rewards/margins": 0.5078913569450378, "rewards/rejected": -1.6907248497009277, "step": 5714 }, { "epoch": 0.66, "learning_rate": 1.0207865832053856e-07, "logits/chosen": -1.908722162246704, "logits/rejected": -2.211146831512451, "logps/chosen": -337.79974365234375, "logps/rejected": -252.11666870117188, "loss": 0.2886, "rewards/accuracies": 0.875, "rewards/chosen": -0.5847647786140442, "rewards/margins": 2.1655218601226807, "rewards/rejected": -2.75028657913208, "step": 5715 }, { "epoch": 0.66, "learning_rate": 1.0204322664462028e-07, "logits/chosen": -1.8768537044525146, "logits/rejected": -2.3056490421295166, "logps/chosen": -638.035888671875, "logps/rejected": -350.0102233886719, "loss": 0.9439, "rewards/accuracies": 0.75, "rewards/chosen": -1.2478303909301758, "rewards/margins": 0.12308266758918762, "rewards/rejected": -1.370913028717041, "step": 5716 }, { "epoch": 0.67, "learning_rate": 1.0200779496870202e-07, "logits/chosen": -1.6574689149856567, "logits/rejected": -1.5764528512954712, "logps/chosen": -304.44708251953125, "logps/rejected": -252.310302734375, "loss": 0.3659, "rewards/accuracies": 0.75, "rewards/chosen": -0.7468189001083374, "rewards/margins": 1.9753895998001099, "rewards/rejected": -2.7222084999084473, "step": 5717 }, { "epoch": 0.67, "learning_rate": 1.0197236329278374e-07, "logits/chosen": -2.071197509765625, "logits/rejected": -2.26833176612854, "logps/chosen": -207.89273071289062, "logps/rejected": -231.1511688232422, "loss": 0.3212, "rewards/accuracies": 1.0, "rewards/chosen": -0.3369930386543274, "rewards/margins": 1.5215131044387817, "rewards/rejected": -1.858506202697754, "step": 5718 }, { "epoch": 0.67, "learning_rate": 1.0193693161686546e-07, "logits/chosen": -2.6907482147216797, "logits/rejected": -2.679058313369751, "logps/chosen": -139.7802276611328, "logps/rejected": -150.3061981201172, "loss": 0.3158, "rewards/accuracies": 0.75, "rewards/chosen": -0.4210570156574249, "rewards/margins": 2.383976936340332, "rewards/rejected": -2.8050339221954346, "step": 5719 }, { "epoch": 0.67, "learning_rate": 1.019014999409472e-07, "logits/chosen": -2.0877857208251953, "logits/rejected": -2.005148410797119, "logps/chosen": -295.5303955078125, "logps/rejected": -239.89822387695312, "loss": 0.5591, "rewards/accuracies": 0.625, "rewards/chosen": -0.8500014543533325, "rewards/margins": 1.6384848356246948, "rewards/rejected": -2.4884862899780273, "step": 5720 }, { "epoch": 0.67, "learning_rate": 1.0186606826502894e-07, "logits/chosen": -2.07388973236084, "logits/rejected": -1.7710657119750977, "logps/chosen": -178.97964477539062, "logps/rejected": -222.16748046875, "loss": 0.5951, "rewards/accuracies": 0.625, "rewards/chosen": -1.2213088274002075, "rewards/margins": 1.7739155292510986, "rewards/rejected": -2.9952239990234375, "step": 5721 }, { "epoch": 0.67, "learning_rate": 1.0183063658911066e-07, "logits/chosen": -2.58060884475708, "logits/rejected": -2.418449878692627, "logps/chosen": -396.7721862792969, "logps/rejected": -360.379150390625, "loss": 0.2855, "rewards/accuracies": 0.875, "rewards/chosen": -1.1663193702697754, "rewards/margins": 3.6210176944732666, "rewards/rejected": -4.787336826324463, "step": 5722 }, { "epoch": 0.67, "learning_rate": 1.0179520491319239e-07, "logits/chosen": -2.209815263748169, "logits/rejected": -2.524604558944702, "logps/chosen": -380.9732971191406, "logps/rejected": -321.17657470703125, "loss": 0.2435, "rewards/accuracies": 0.875, "rewards/chosen": -0.9208568334579468, "rewards/margins": 2.3090951442718506, "rewards/rejected": -3.2299516201019287, "step": 5723 }, { "epoch": 0.67, "learning_rate": 1.0175977323727411e-07, "logits/chosen": -1.5827622413635254, "logits/rejected": -1.6703882217407227, "logps/chosen": -170.38226318359375, "logps/rejected": -278.2833251953125, "loss": 0.7052, "rewards/accuracies": 0.625, "rewards/chosen": -1.058730959892273, "rewards/margins": 0.6451176404953003, "rewards/rejected": -1.7038486003875732, "step": 5724 }, { "epoch": 0.67, "learning_rate": 1.0172434156135584e-07, "logits/chosen": -2.3528287410736084, "logits/rejected": -2.4916741847991943, "logps/chosen": -225.21652221679688, "logps/rejected": -267.46240234375, "loss": 0.6027, "rewards/accuracies": 0.5, "rewards/chosen": -1.6623985767364502, "rewards/margins": 1.5840626955032349, "rewards/rejected": -3.2464609146118164, "step": 5725 }, { "epoch": 0.67, "learning_rate": 1.0168890988543759e-07, "logits/chosen": -1.949461817741394, "logits/rejected": -1.838788628578186, "logps/chosen": -380.763671875, "logps/rejected": -406.9295654296875, "loss": 0.4554, "rewards/accuracies": 0.75, "rewards/chosen": -0.5501372814178467, "rewards/margins": 1.1464478969573975, "rewards/rejected": -1.6965851783752441, "step": 5726 }, { "epoch": 0.67, "learning_rate": 1.0165347820951931e-07, "logits/chosen": -1.7326855659484863, "logits/rejected": -1.9960318803787231, "logps/chosen": -310.2761535644531, "logps/rejected": -343.4034118652344, "loss": 0.7927, "rewards/accuracies": 0.5, "rewards/chosen": -1.6247073411941528, "rewards/margins": 1.178387999534607, "rewards/rejected": -2.8030953407287598, "step": 5727 }, { "epoch": 0.67, "learning_rate": 1.0161804653360104e-07, "logits/chosen": -2.3971526622772217, "logits/rejected": -2.3676724433898926, "logps/chosen": -293.67315673828125, "logps/rejected": -314.10626220703125, "loss": 0.7996, "rewards/accuracies": 0.5, "rewards/chosen": -1.5329173803329468, "rewards/margins": 2.017937660217285, "rewards/rejected": -3.5508551597595215, "step": 5728 }, { "epoch": 0.67, "learning_rate": 1.0158261485768277e-07, "logits/chosen": -2.242605209350586, "logits/rejected": -2.456092357635498, "logps/chosen": -219.0775146484375, "logps/rejected": -238.84564208984375, "loss": 0.4803, "rewards/accuracies": 0.75, "rewards/chosen": -1.7128984928131104, "rewards/margins": 2.0546960830688477, "rewards/rejected": -3.767594337463379, "step": 5729 }, { "epoch": 0.67, "learning_rate": 1.0154718318176449e-07, "logits/chosen": -2.6814515590667725, "logits/rejected": -2.6922030448913574, "logps/chosen": -206.15452575683594, "logps/rejected": -221.15023803710938, "loss": 0.4529, "rewards/accuracies": 0.75, "rewards/chosen": -0.9341166615486145, "rewards/margins": 2.8035199642181396, "rewards/rejected": -3.7376368045806885, "step": 5730 }, { "epoch": 0.67, "learning_rate": 1.0151175150584621e-07, "logits/chosen": -2.334106922149658, "logits/rejected": -2.215198040008545, "logps/chosen": -172.55801391601562, "logps/rejected": -215.14901733398438, "loss": 0.8135, "rewards/accuracies": 0.5, "rewards/chosen": -1.7230803966522217, "rewards/margins": 1.0622026920318604, "rewards/rejected": -2.785283088684082, "step": 5731 }, { "epoch": 0.67, "learning_rate": 1.0147631982992796e-07, "logits/chosen": -2.4482102394104004, "logits/rejected": -2.4757003784179688, "logps/chosen": -472.3151550292969, "logps/rejected": -328.2801513671875, "loss": 0.4107, "rewards/accuracies": 0.75, "rewards/chosen": -1.1007823944091797, "rewards/margins": 1.7438466548919678, "rewards/rejected": -2.8446290493011475, "step": 5732 }, { "epoch": 0.67, "learning_rate": 1.0144088815400968e-07, "logits/chosen": -1.73236083984375, "logits/rejected": -2.256962299346924, "logps/chosen": -332.712158203125, "logps/rejected": -170.97012329101562, "loss": 0.5033, "rewards/accuracies": 0.75, "rewards/chosen": -0.6615587472915649, "rewards/margins": 0.9288749694824219, "rewards/rejected": -1.5904337167739868, "step": 5733 }, { "epoch": 0.67, "learning_rate": 1.0140545647809142e-07, "logits/chosen": -2.6494405269622803, "logits/rejected": -2.6059999465942383, "logps/chosen": -133.99842834472656, "logps/rejected": -172.59774780273438, "loss": 0.4521, "rewards/accuracies": 0.625, "rewards/chosen": -1.001673698425293, "rewards/margins": 1.8003664016723633, "rewards/rejected": -2.8020401000976562, "step": 5734 }, { "epoch": 0.67, "learning_rate": 1.0137002480217314e-07, "logits/chosen": -2.567019462585449, "logits/rejected": -2.537938117980957, "logps/chosen": -286.87249755859375, "logps/rejected": -281.37078857421875, "loss": 0.7006, "rewards/accuracies": 0.625, "rewards/chosen": -0.6186783313751221, "rewards/margins": 1.7746076583862305, "rewards/rejected": -2.3932859897613525, "step": 5735 }, { "epoch": 0.67, "learning_rate": 1.0133459312625486e-07, "logits/chosen": -1.5154378414154053, "logits/rejected": -2.0747733116149902, "logps/chosen": -485.62481689453125, "logps/rejected": -338.37677001953125, "loss": 0.6459, "rewards/accuracies": 0.625, "rewards/chosen": -0.8394079804420471, "rewards/margins": 1.0331939458847046, "rewards/rejected": -1.8726019859313965, "step": 5736 }, { "epoch": 0.67, "learning_rate": 1.012991614503366e-07, "logits/chosen": -2.4816856384277344, "logits/rejected": -2.587556838989258, "logps/chosen": -359.301513671875, "logps/rejected": -298.4873352050781, "loss": 0.1836, "rewards/accuracies": 0.875, "rewards/chosen": -0.29759326577186584, "rewards/margins": 3.6978468894958496, "rewards/rejected": -3.9954400062561035, "step": 5737 }, { "epoch": 0.67, "learning_rate": 1.0126372977441833e-07, "logits/chosen": -2.429328203201294, "logits/rejected": -2.568686008453369, "logps/chosen": -238.32521057128906, "logps/rejected": -162.77139282226562, "loss": 0.5633, "rewards/accuracies": 0.625, "rewards/chosen": -0.451534241437912, "rewards/margins": 1.2890466451644897, "rewards/rejected": -1.7405807971954346, "step": 5738 }, { "epoch": 0.67, "learning_rate": 1.0122829809850005e-07, "logits/chosen": -2.701542854309082, "logits/rejected": -2.756730794906616, "logps/chosen": -244.95867919921875, "logps/rejected": -237.81527709960938, "loss": 0.2815, "rewards/accuracies": 0.875, "rewards/chosen": -0.7419660091400146, "rewards/margins": 2.8073973655700684, "rewards/rejected": -3.549363136291504, "step": 5739 }, { "epoch": 0.67, "learning_rate": 1.0119286642258179e-07, "logits/chosen": -2.395193338394165, "logits/rejected": -2.1522490978240967, "logps/chosen": -316.0656433105469, "logps/rejected": -254.7091827392578, "loss": 0.3435, "rewards/accuracies": 0.875, "rewards/chosen": -0.9142875671386719, "rewards/margins": 1.70399808883667, "rewards/rejected": -2.618285655975342, "step": 5740 }, { "epoch": 0.67, "learning_rate": 1.0115743474666351e-07, "logits/chosen": -2.492504596710205, "logits/rejected": -2.5207035541534424, "logps/chosen": -312.96014404296875, "logps/rejected": -250.63743591308594, "loss": 0.3079, "rewards/accuracies": 0.875, "rewards/chosen": -0.8065988421440125, "rewards/margins": 1.9257049560546875, "rewards/rejected": -2.7323038578033447, "step": 5741 }, { "epoch": 0.67, "learning_rate": 1.0112200307074523e-07, "logits/chosen": -2.487619400024414, "logits/rejected": -2.586833953857422, "logps/chosen": -230.24822998046875, "logps/rejected": -269.104736328125, "loss": 0.2565, "rewards/accuracies": 0.875, "rewards/chosen": -0.7277302742004395, "rewards/margins": 2.7000045776367188, "rewards/rejected": -3.427734613418579, "step": 5742 }, { "epoch": 0.67, "learning_rate": 1.0108657139482697e-07, "logits/chosen": -2.698556423187256, "logits/rejected": -2.8122189044952393, "logps/chosen": -125.97235107421875, "logps/rejected": -179.52749633789062, "loss": 0.5832, "rewards/accuracies": 0.75, "rewards/chosen": -0.9974445104598999, "rewards/margins": 1.5923588275909424, "rewards/rejected": -2.589803457260132, "step": 5743 }, { "epoch": 0.67, "learning_rate": 1.010511397189087e-07, "logits/chosen": -2.3821825981140137, "logits/rejected": -2.4445407390594482, "logps/chosen": -338.0587463378906, "logps/rejected": -235.01148986816406, "loss": 0.4868, "rewards/accuracies": 0.875, "rewards/chosen": -1.3402934074401855, "rewards/margins": 1.1800202131271362, "rewards/rejected": -2.5203137397766113, "step": 5744 }, { "epoch": 0.67, "learning_rate": 1.0101570804299044e-07, "logits/chosen": -2.66007399559021, "logits/rejected": -2.729832649230957, "logps/chosen": -365.5499267578125, "logps/rejected": -265.78448486328125, "loss": 0.7894, "rewards/accuracies": 0.625, "rewards/chosen": -1.3333251476287842, "rewards/margins": 0.9008133411407471, "rewards/rejected": -2.2341384887695312, "step": 5745 }, { "epoch": 0.67, "learning_rate": 1.0098027636707216e-07, "logits/chosen": -1.9044493436813354, "logits/rejected": -2.294090509414673, "logps/chosen": -373.70977783203125, "logps/rejected": -216.33627319335938, "loss": 0.5365, "rewards/accuracies": 0.75, "rewards/chosen": -0.7308860421180725, "rewards/margins": 0.5618414878845215, "rewards/rejected": -1.2927274703979492, "step": 5746 }, { "epoch": 0.67, "learning_rate": 1.0094484469115388e-07, "logits/chosen": -2.390343427658081, "logits/rejected": -2.6076478958129883, "logps/chosen": -182.3687286376953, "logps/rejected": -212.65625, "loss": 0.8951, "rewards/accuracies": 0.625, "rewards/chosen": -2.0419673919677734, "rewards/margins": 0.9703493118286133, "rewards/rejected": -3.0123167037963867, "step": 5747 }, { "epoch": 0.67, "learning_rate": 1.0090941301523562e-07, "logits/chosen": -1.786605715751648, "logits/rejected": -2.3089120388031006, "logps/chosen": -510.920166015625, "logps/rejected": -332.3559265136719, "loss": 0.165, "rewards/accuracies": 1.0, "rewards/chosen": -0.5459075570106506, "rewards/margins": 2.918745517730713, "rewards/rejected": -3.4646530151367188, "step": 5748 }, { "epoch": 0.67, "learning_rate": 1.0087398133931734e-07, "logits/chosen": -1.976226806640625, "logits/rejected": -2.018033742904663, "logps/chosen": -337.2959899902344, "logps/rejected": -370.3382873535156, "loss": 0.3711, "rewards/accuracies": 0.75, "rewards/chosen": -1.8997442722320557, "rewards/margins": 1.231888771057129, "rewards/rejected": -3.1316330432891846, "step": 5749 }, { "epoch": 0.67, "learning_rate": 1.0083854966339908e-07, "logits/chosen": -2.8591208457946777, "logits/rejected": -2.591240167617798, "logps/chosen": -216.6610870361328, "logps/rejected": -292.6071472167969, "loss": 0.2974, "rewards/accuracies": 0.875, "rewards/chosen": -1.9944829940795898, "rewards/margins": 3.8813953399658203, "rewards/rejected": -5.87587833404541, "step": 5750 }, { "epoch": 0.67, "learning_rate": 1.0080311798748081e-07, "logits/chosen": -1.9769082069396973, "logits/rejected": -2.2968480587005615, "logps/chosen": -279.29754638671875, "logps/rejected": -143.7392578125, "loss": 0.6944, "rewards/accuracies": 0.625, "rewards/chosen": -0.9152938723564148, "rewards/margins": 0.4035572409629822, "rewards/rejected": -1.318851113319397, "step": 5751 }, { "epoch": 0.67, "learning_rate": 1.0076768631156253e-07, "logits/chosen": -2.217264175415039, "logits/rejected": -2.099442481994629, "logps/chosen": -249.83682250976562, "logps/rejected": -359.16790771484375, "loss": 0.7673, "rewards/accuracies": 0.625, "rewards/chosen": -1.2593172788619995, "rewards/margins": 1.5421158075332642, "rewards/rejected": -2.8014328479766846, "step": 5752 }, { "epoch": 0.67, "learning_rate": 1.0073225463564425e-07, "logits/chosen": -1.978755235671997, "logits/rejected": -2.0711700916290283, "logps/chosen": -390.3529968261719, "logps/rejected": -287.80615234375, "loss": 0.6873, "rewards/accuracies": 0.625, "rewards/chosen": -1.218348503112793, "rewards/margins": 1.8363316059112549, "rewards/rejected": -3.054680347442627, "step": 5753 }, { "epoch": 0.67, "learning_rate": 1.0069682295972599e-07, "logits/chosen": -2.6326143741607666, "logits/rejected": -2.525367021560669, "logps/chosen": -187.7223663330078, "logps/rejected": -285.8730773925781, "loss": 0.4134, "rewards/accuracies": 0.75, "rewards/chosen": -0.7321880459785461, "rewards/margins": 2.054845094680786, "rewards/rejected": -2.7870330810546875, "step": 5754 }, { "epoch": 0.67, "learning_rate": 1.0066139128380771e-07, "logits/chosen": -2.011235237121582, "logits/rejected": -2.0516176223754883, "logps/chosen": -293.41400146484375, "logps/rejected": -198.67410278320312, "loss": 0.5663, "rewards/accuracies": 0.625, "rewards/chosen": -0.5634838342666626, "rewards/margins": 0.6417245268821716, "rewards/rejected": -1.2052083015441895, "step": 5755 }, { "epoch": 0.67, "learning_rate": 1.0062595960788946e-07, "logits/chosen": -2.6013669967651367, "logits/rejected": -2.6785786151885986, "logps/chosen": -449.7982482910156, "logps/rejected": -398.6786804199219, "loss": 0.2954, "rewards/accuracies": 0.875, "rewards/chosen": -0.9055079221725464, "rewards/margins": 2.065767288208008, "rewards/rejected": -2.9712753295898438, "step": 5756 }, { "epoch": 0.67, "learning_rate": 1.0059052793197118e-07, "logits/chosen": -1.6158208847045898, "logits/rejected": -1.562638521194458, "logps/chosen": -198.82675170898438, "logps/rejected": -345.23828125, "loss": 0.192, "rewards/accuracies": 1.0, "rewards/chosen": -0.6444261074066162, "rewards/margins": 2.9441826343536377, "rewards/rejected": -3.588608741760254, "step": 5757 }, { "epoch": 0.67, "learning_rate": 1.005550962560529e-07, "logits/chosen": -2.209902763366699, "logits/rejected": -2.3361053466796875, "logps/chosen": -370.64654541015625, "logps/rejected": -391.99383544921875, "loss": 0.4246, "rewards/accuracies": 0.75, "rewards/chosen": -1.4168072938919067, "rewards/margins": 1.4995262622833252, "rewards/rejected": -2.9163334369659424, "step": 5758 }, { "epoch": 0.67, "learning_rate": 1.0051966458013463e-07, "logits/chosen": -2.3736722469329834, "logits/rejected": -2.490983724594116, "logps/chosen": -403.9136962890625, "logps/rejected": -280.2528076171875, "loss": 0.2755, "rewards/accuracies": 0.875, "rewards/chosen": 0.048612385988235474, "rewards/margins": 2.9712984561920166, "rewards/rejected": -2.9226863384246826, "step": 5759 }, { "epoch": 0.67, "learning_rate": 1.0048423290421636e-07, "logits/chosen": -2.0639231204986572, "logits/rejected": -2.1353981494903564, "logps/chosen": -299.6881103515625, "logps/rejected": -332.575439453125, "loss": 0.1254, "rewards/accuracies": 1.0, "rewards/chosen": -1.0410830974578857, "rewards/margins": 3.5966620445251465, "rewards/rejected": -4.637744903564453, "step": 5760 }, { "epoch": 0.67, "learning_rate": 1.0044880122829808e-07, "logits/chosen": -1.9740675687789917, "logits/rejected": -2.116307020187378, "logps/chosen": -459.16058349609375, "logps/rejected": -232.26039123535156, "loss": 0.3425, "rewards/accuracies": 0.875, "rewards/chosen": -1.1385475397109985, "rewards/margins": 2.828944444656372, "rewards/rejected": -3.967491865158081, "step": 5761 }, { "epoch": 0.67, "learning_rate": 1.0041336955237983e-07, "logits/chosen": -1.720078945159912, "logits/rejected": -1.577985405921936, "logps/chosen": -368.5813293457031, "logps/rejected": -349.7297668457031, "loss": 0.5375, "rewards/accuracies": 0.75, "rewards/chosen": -0.9944068193435669, "rewards/margins": 2.064049482345581, "rewards/rejected": -3.0584561824798584, "step": 5762 }, { "epoch": 0.67, "learning_rate": 1.0037793787646156e-07, "logits/chosen": -2.807548761367798, "logits/rejected": -2.775440216064453, "logps/chosen": -306.3334045410156, "logps/rejected": -274.8289489746094, "loss": 0.5316, "rewards/accuracies": 0.875, "rewards/chosen": -1.5152502059936523, "rewards/margins": 1.0386261940002441, "rewards/rejected": -2.5538761615753174, "step": 5763 }, { "epoch": 0.67, "learning_rate": 1.0034250620054328e-07, "logits/chosen": -2.4948530197143555, "logits/rejected": -2.416546106338501, "logps/chosen": -340.4671325683594, "logps/rejected": -261.9438171386719, "loss": 0.3535, "rewards/accuracies": 0.875, "rewards/chosen": -1.6160755157470703, "rewards/margins": 1.214086890220642, "rewards/rejected": -2.830162286758423, "step": 5764 }, { "epoch": 0.67, "learning_rate": 1.0030707452462501e-07, "logits/chosen": -2.817288398742676, "logits/rejected": -2.9314796924591064, "logps/chosen": -472.5043640136719, "logps/rejected": -245.44647216796875, "loss": 0.3663, "rewards/accuracies": 0.875, "rewards/chosen": -0.5141590237617493, "rewards/margins": 1.9853473901748657, "rewards/rejected": -2.4995064735412598, "step": 5765 }, { "epoch": 0.67, "learning_rate": 1.0027164284870674e-07, "logits/chosen": -2.557020902633667, "logits/rejected": -2.5298523902893066, "logps/chosen": -169.0706787109375, "logps/rejected": -173.798095703125, "loss": 0.4056, "rewards/accuracies": 0.75, "rewards/chosen": -0.4015989899635315, "rewards/margins": 2.3247835636138916, "rewards/rejected": -2.7263827323913574, "step": 5766 }, { "epoch": 0.67, "learning_rate": 1.0023621117278847e-07, "logits/chosen": -1.7525663375854492, "logits/rejected": -1.9515842199325562, "logps/chosen": -408.82598876953125, "logps/rejected": -384.0552978515625, "loss": 0.095, "rewards/accuracies": 1.0, "rewards/chosen": -0.2872649133205414, "rewards/margins": 3.3191471099853516, "rewards/rejected": -3.606411933898926, "step": 5767 }, { "epoch": 0.67, "learning_rate": 1.002007794968702e-07, "logits/chosen": -2.5796375274658203, "logits/rejected": -2.450972318649292, "logps/chosen": -204.9462127685547, "logps/rejected": -226.3411865234375, "loss": 0.193, "rewards/accuracies": 1.0, "rewards/chosen": -1.0458745956420898, "rewards/margins": 3.495532751083374, "rewards/rejected": -4.541407108306885, "step": 5768 }, { "epoch": 0.67, "learning_rate": 1.0016534782095193e-07, "logits/chosen": -1.6434109210968018, "logits/rejected": -2.025432825088501, "logps/chosen": -314.3719787597656, "logps/rejected": -228.85870361328125, "loss": 0.7679, "rewards/accuracies": 0.75, "rewards/chosen": -1.1968960762023926, "rewards/margins": 0.8928781747817993, "rewards/rejected": -2.0897743701934814, "step": 5769 }, { "epoch": 0.67, "learning_rate": 1.0012991614503365e-07, "logits/chosen": -2.8037776947021484, "logits/rejected": -2.761701822280884, "logps/chosen": -191.1158447265625, "logps/rejected": -156.84133911132812, "loss": 0.4617, "rewards/accuracies": 0.75, "rewards/chosen": -0.7867107391357422, "rewards/margins": 1.4570469856262207, "rewards/rejected": -2.243757724761963, "step": 5770 }, { "epoch": 0.67, "learning_rate": 1.0009448446911539e-07, "logits/chosen": -2.1562514305114746, "logits/rejected": -2.417405366897583, "logps/chosen": -262.6151123046875, "logps/rejected": -347.05633544921875, "loss": 0.3718, "rewards/accuracies": 0.75, "rewards/chosen": -0.26289331912994385, "rewards/margins": 2.7212533950805664, "rewards/rejected": -2.9841468334198, "step": 5771 }, { "epoch": 0.67, "learning_rate": 1.0005905279319711e-07, "logits/chosen": -2.2447824478149414, "logits/rejected": -2.1203253269195557, "logps/chosen": -518.9264526367188, "logps/rejected": -492.086181640625, "loss": 0.8053, "rewards/accuracies": 0.75, "rewards/chosen": -0.989099383354187, "rewards/margins": 1.368168830871582, "rewards/rejected": -2.3572683334350586, "step": 5772 }, { "epoch": 0.67, "learning_rate": 1.0002362111727886e-07, "logits/chosen": -1.9306023120880127, "logits/rejected": -1.8628727197647095, "logps/chosen": -162.7547149658203, "logps/rejected": -307.2591857910156, "loss": 0.7782, "rewards/accuracies": 0.5, "rewards/chosen": -1.550424575805664, "rewards/margins": 1.218955636024475, "rewards/rejected": -2.7693800926208496, "step": 5773 }, { "epoch": 0.67, "learning_rate": 9.998818944136058e-08, "logits/chosen": -2.6193060874938965, "logits/rejected": -2.563824415206909, "logps/chosen": -188.22842407226562, "logps/rejected": -267.38323974609375, "loss": 0.3796, "rewards/accuracies": 0.875, "rewards/chosen": -1.4857118129730225, "rewards/margins": 2.1975202560424805, "rewards/rejected": -3.683232307434082, "step": 5774 }, { "epoch": 0.67, "learning_rate": 9.99527577654423e-08, "logits/chosen": -1.850621223449707, "logits/rejected": -1.6796479225158691, "logps/chosen": -398.70037841796875, "logps/rejected": -448.2723388671875, "loss": 0.308, "rewards/accuracies": 0.875, "rewards/chosen": -0.884171187877655, "rewards/margins": 3.823141574859619, "rewards/rejected": -4.707313060760498, "step": 5775 }, { "epoch": 0.67, "learning_rate": 9.991732608952402e-08, "logits/chosen": -2.561994791030884, "logits/rejected": -2.3181967735290527, "logps/chosen": -159.31642150878906, "logps/rejected": -219.54165649414062, "loss": 0.2016, "rewards/accuracies": 1.0, "rewards/chosen": -0.8138719201087952, "rewards/margins": 2.1652028560638428, "rewards/rejected": -2.9790749549865723, "step": 5776 }, { "epoch": 0.67, "learning_rate": 9.988189441360576e-08, "logits/chosen": -2.920694351196289, "logits/rejected": -2.889153003692627, "logps/chosen": -316.88543701171875, "logps/rejected": -282.6190490722656, "loss": 0.1648, "rewards/accuracies": 1.0, "rewards/chosen": -0.8634724617004395, "rewards/margins": 2.3018622398376465, "rewards/rejected": -3.165334939956665, "step": 5777 }, { "epoch": 0.67, "learning_rate": 9.984646273768748e-08, "logits/chosen": -2.0603859424591064, "logits/rejected": -2.454493999481201, "logps/chosen": -207.4029998779297, "logps/rejected": -215.81536865234375, "loss": 0.2656, "rewards/accuracies": 0.75, "rewards/chosen": -0.5988070964813232, "rewards/margins": 3.0167288780212402, "rewards/rejected": -3.6155362129211426, "step": 5778 }, { "epoch": 0.67, "learning_rate": 9.981103106176923e-08, "logits/chosen": -1.7181212902069092, "logits/rejected": -2.1415064334869385, "logps/chosen": -410.1514892578125, "logps/rejected": -361.6053466796875, "loss": 0.2006, "rewards/accuracies": 1.0, "rewards/chosen": 0.014646857976913452, "rewards/margins": 2.9200191497802734, "rewards/rejected": -2.9053726196289062, "step": 5779 }, { "epoch": 0.67, "learning_rate": 9.977559938585095e-08, "logits/chosen": -2.12811017036438, "logits/rejected": -2.302107810974121, "logps/chosen": -368.584228515625, "logps/rejected": -360.9226379394531, "loss": 0.6387, "rewards/accuracies": 0.5, "rewards/chosen": -1.7874436378479004, "rewards/margins": 1.0331153869628906, "rewards/rejected": -2.820558786392212, "step": 5780 }, { "epoch": 0.67, "learning_rate": 9.974016770993267e-08, "logits/chosen": -2.2165122032165527, "logits/rejected": -2.307837963104248, "logps/chosen": -227.49978637695312, "logps/rejected": -218.623779296875, "loss": 0.2024, "rewards/accuracies": 0.875, "rewards/chosen": -1.4706130027770996, "rewards/margins": 2.2196743488311768, "rewards/rejected": -3.6902875900268555, "step": 5781 }, { "epoch": 0.67, "learning_rate": 9.970473603401441e-08, "logits/chosen": -2.6064822673797607, "logits/rejected": -2.682154893875122, "logps/chosen": -375.7645263671875, "logps/rejected": -335.4135437011719, "loss": 0.3377, "rewards/accuracies": 0.75, "rewards/chosen": -1.144835352897644, "rewards/margins": 2.792149066925049, "rewards/rejected": -3.9369847774505615, "step": 5782 }, { "epoch": 0.67, "learning_rate": 9.966930435809613e-08, "logits/chosen": -2.0059328079223633, "logits/rejected": -1.9022154808044434, "logps/chosen": -426.1322937011719, "logps/rejected": -396.5504150390625, "loss": 0.4509, "rewards/accuracies": 0.75, "rewards/chosen": -0.6676013469696045, "rewards/margins": 1.9990286827087402, "rewards/rejected": -2.6666300296783447, "step": 5783 }, { "epoch": 0.67, "learning_rate": 9.963387268217785e-08, "logits/chosen": -2.419198989868164, "logits/rejected": -2.217604637145996, "logps/chosen": -271.5042724609375, "logps/rejected": -337.99688720703125, "loss": 0.4504, "rewards/accuracies": 0.75, "rewards/chosen": -0.7376140356063843, "rewards/margins": 1.9669110774993896, "rewards/rejected": -2.7045249938964844, "step": 5784 }, { "epoch": 0.67, "learning_rate": 9.95984410062596e-08, "logits/chosen": -2.3259098529815674, "logits/rejected": -2.6586060523986816, "logps/chosen": -222.43161010742188, "logps/rejected": -177.86752319335938, "loss": 0.5859, "rewards/accuracies": 0.75, "rewards/chosen": -1.209706425666809, "rewards/margins": 1.0650098323822021, "rewards/rejected": -2.2747161388397217, "step": 5785 }, { "epoch": 0.67, "learning_rate": 9.956300933034132e-08, "logits/chosen": -2.0341129302978516, "logits/rejected": -2.269469738006592, "logps/chosen": -163.59140014648438, "logps/rejected": -140.89967346191406, "loss": 0.6506, "rewards/accuracies": 0.5, "rewards/chosen": -0.6979331374168396, "rewards/margins": 1.7667651176452637, "rewards/rejected": -2.464698314666748, "step": 5786 }, { "epoch": 0.67, "learning_rate": 9.952757765442305e-08, "logits/chosen": -1.7250542640686035, "logits/rejected": -1.9845881462097168, "logps/chosen": -447.96728515625, "logps/rejected": -397.0347900390625, "loss": 0.4518, "rewards/accuracies": 0.875, "rewards/chosen": -0.6019169092178345, "rewards/margins": 2.493518352508545, "rewards/rejected": -3.095435380935669, "step": 5787 }, { "epoch": 0.67, "learning_rate": 9.949214597850478e-08, "logits/chosen": -2.4451355934143066, "logits/rejected": -2.253032922744751, "logps/chosen": -268.4742431640625, "logps/rejected": -321.53265380859375, "loss": 0.3656, "rewards/accuracies": 0.75, "rewards/chosen": -0.9998315572738647, "rewards/margins": 1.940568208694458, "rewards/rejected": -2.940399646759033, "step": 5788 }, { "epoch": 0.67, "learning_rate": 9.94567143025865e-08, "logits/chosen": -2.178494453430176, "logits/rejected": -2.2212491035461426, "logps/chosen": -216.44918823242188, "logps/rejected": -295.6053161621094, "loss": 0.3469, "rewards/accuracies": 0.75, "rewards/chosen": -1.4273236989974976, "rewards/margins": 2.5931284427642822, "rewards/rejected": -4.02045202255249, "step": 5789 }, { "epoch": 0.67, "learning_rate": 9.942128262666822e-08, "logits/chosen": -2.5102224349975586, "logits/rejected": -2.7717175483703613, "logps/chosen": -282.730712890625, "logps/rejected": -153.065673828125, "loss": 0.3454, "rewards/accuracies": 0.875, "rewards/chosen": -0.10285449028015137, "rewards/margins": 2.8701112270355225, "rewards/rejected": -2.972965717315674, "step": 5790 }, { "epoch": 0.67, "learning_rate": 9.938585095074997e-08, "logits/chosen": -2.7636642456054688, "logits/rejected": -2.610619068145752, "logps/chosen": -145.7222900390625, "logps/rejected": -296.0362854003906, "loss": 0.1342, "rewards/accuracies": 1.0, "rewards/chosen": -0.7835463881492615, "rewards/margins": 3.0779871940612793, "rewards/rejected": -3.8615336418151855, "step": 5791 }, { "epoch": 0.67, "learning_rate": 9.93504192748317e-08, "logits/chosen": -2.711123466491699, "logits/rejected": -2.680260181427002, "logps/chosen": -139.6595458984375, "logps/rejected": -186.71826171875, "loss": 0.5611, "rewards/accuracies": 0.75, "rewards/chosen": -0.4655061960220337, "rewards/margins": 1.5441739559173584, "rewards/rejected": -2.0096802711486816, "step": 5792 }, { "epoch": 0.67, "learning_rate": 9.931498759891343e-08, "logits/chosen": -2.421576738357544, "logits/rejected": -2.2816014289855957, "logps/chosen": -376.7750244140625, "logps/rejected": -383.32293701171875, "loss": 0.5136, "rewards/accuracies": 0.75, "rewards/chosen": -1.2615442276000977, "rewards/margins": 2.4751136302948, "rewards/rejected": -3.7366580963134766, "step": 5793 }, { "epoch": 0.67, "learning_rate": 9.927955592299515e-08, "logits/chosen": -2.3425345420837402, "logits/rejected": -2.477050542831421, "logps/chosen": -213.98519897460938, "logps/rejected": -254.2338104248047, "loss": 0.3272, "rewards/accuracies": 0.75, "rewards/chosen": -0.7614902257919312, "rewards/margins": 3.1809847354888916, "rewards/rejected": -3.9424750804901123, "step": 5794 }, { "epoch": 0.67, "learning_rate": 9.924412424707688e-08, "logits/chosen": -2.056183099746704, "logits/rejected": -2.490325689315796, "logps/chosen": -251.58493041992188, "logps/rejected": -152.64537048339844, "loss": 0.6667, "rewards/accuracies": 0.5, "rewards/chosen": -1.2129579782485962, "rewards/margins": 1.0743306875228882, "rewards/rejected": -2.2872886657714844, "step": 5795 }, { "epoch": 0.67, "learning_rate": 9.92086925711586e-08, "logits/chosen": -1.7949477434158325, "logits/rejected": -2.2164885997772217, "logps/chosen": -382.417236328125, "logps/rejected": -320.01715087890625, "loss": 0.1988, "rewards/accuracies": 1.0, "rewards/chosen": -0.8480574488639832, "rewards/margins": 2.1306028366088867, "rewards/rejected": -2.9786601066589355, "step": 5796 }, { "epoch": 0.67, "learning_rate": 9.917326089524035e-08, "logits/chosen": -2.073305368423462, "logits/rejected": -2.0590038299560547, "logps/chosen": -347.04840087890625, "logps/rejected": -326.3638916015625, "loss": 0.7199, "rewards/accuracies": 0.375, "rewards/chosen": -1.2119827270507812, "rewards/margins": 0.22591352462768555, "rewards/rejected": -1.4378962516784668, "step": 5797 }, { "epoch": 0.67, "learning_rate": 9.913782921932207e-08, "logits/chosen": -2.15633487701416, "logits/rejected": -2.24438738822937, "logps/chosen": -304.1710510253906, "logps/rejected": -214.03878784179688, "loss": 0.2889, "rewards/accuracies": 0.875, "rewards/chosen": -0.5794135332107544, "rewards/margins": 1.646407127380371, "rewards/rejected": -2.225820541381836, "step": 5798 }, { "epoch": 0.67, "learning_rate": 9.91023975434038e-08, "logits/chosen": -2.176957845687866, "logits/rejected": -2.390069007873535, "logps/chosen": -184.3973388671875, "logps/rejected": -201.41078186035156, "loss": 0.7947, "rewards/accuracies": 0.75, "rewards/chosen": -0.6155750751495361, "rewards/margins": 1.525986909866333, "rewards/rejected": -2.141561985015869, "step": 5799 }, { "epoch": 0.67, "learning_rate": 9.906696586748553e-08, "logits/chosen": -2.652984142303467, "logits/rejected": -2.677906036376953, "logps/chosen": -155.05079650878906, "logps/rejected": -227.96412658691406, "loss": 0.1973, "rewards/accuracies": 1.0, "rewards/chosen": -0.18932411074638367, "rewards/margins": 2.6940526962280273, "rewards/rejected": -2.8833768367767334, "step": 5800 }, { "epoch": 0.67, "learning_rate": 9.903153419156725e-08, "logits/chosen": -2.830456018447876, "logits/rejected": -2.7919225692749023, "logps/chosen": -179.61117553710938, "logps/rejected": -232.68484497070312, "loss": 0.3483, "rewards/accuracies": 0.75, "rewards/chosen": -0.5737810730934143, "rewards/margins": 2.6185367107391357, "rewards/rejected": -3.1923179626464844, "step": 5801 }, { "epoch": 0.67, "learning_rate": 9.8996102515649e-08, "logits/chosen": -1.5814012289047241, "logits/rejected": -2.0337724685668945, "logps/chosen": -491.41424560546875, "logps/rejected": -342.4465026855469, "loss": 0.5137, "rewards/accuracies": 0.75, "rewards/chosen": -0.913167417049408, "rewards/margins": 1.7688361406326294, "rewards/rejected": -2.6820034980773926, "step": 5802 }, { "epoch": 0.68, "learning_rate": 9.896067083973072e-08, "logits/chosen": -2.9493532180786133, "logits/rejected": -2.9293782711029053, "logps/chosen": -300.3930969238281, "logps/rejected": -366.511474609375, "loss": 0.2287, "rewards/accuracies": 0.875, "rewards/chosen": -0.3780864477157593, "rewards/margins": 2.9899775981903076, "rewards/rejected": -3.3680639266967773, "step": 5803 }, { "epoch": 0.68, "learning_rate": 9.892523916381244e-08, "logits/chosen": -2.2271621227264404, "logits/rejected": -2.051503896713257, "logps/chosen": -367.313232421875, "logps/rejected": -414.24249267578125, "loss": 0.5951, "rewards/accuracies": 0.625, "rewards/chosen": -1.2073097229003906, "rewards/margins": 1.679544448852539, "rewards/rejected": -2.8868541717529297, "step": 5804 }, { "epoch": 0.68, "learning_rate": 9.888980748789418e-08, "logits/chosen": -2.330772876739502, "logits/rejected": -2.329786777496338, "logps/chosen": -168.54380798339844, "logps/rejected": -269.3568115234375, "loss": 0.7435, "rewards/accuracies": 0.75, "rewards/chosen": -1.604803204536438, "rewards/margins": 0.6879359483718872, "rewards/rejected": -2.292739152908325, "step": 5805 }, { "epoch": 0.68, "learning_rate": 9.88543758119759e-08, "logits/chosen": -2.379081964492798, "logits/rejected": -2.16621470451355, "logps/chosen": -275.62774658203125, "logps/rejected": -277.1600036621094, "loss": 0.3908, "rewards/accuracies": 0.625, "rewards/chosen": -0.4698517620563507, "rewards/margins": 1.8437947034835815, "rewards/rejected": -2.3136463165283203, "step": 5806 }, { "epoch": 0.68, "learning_rate": 9.881894413605762e-08, "logits/chosen": -2.249687671661377, "logits/rejected": -2.333430290222168, "logps/chosen": -138.70065307617188, "logps/rejected": -202.5987548828125, "loss": 0.595, "rewards/accuracies": 0.5, "rewards/chosen": -1.474763035774231, "rewards/margins": 1.8993209600448608, "rewards/rejected": -3.374083995819092, "step": 5807 }, { "epoch": 0.68, "learning_rate": 9.878351246013937e-08, "logits/chosen": -2.2325475215911865, "logits/rejected": -2.400157928466797, "logps/chosen": -258.97601318359375, "logps/rejected": -214.21539306640625, "loss": 0.4433, "rewards/accuracies": 0.875, "rewards/chosen": -1.4454786777496338, "rewards/margins": 1.8724851608276367, "rewards/rejected": -3.3179638385772705, "step": 5808 }, { "epoch": 0.68, "learning_rate": 9.874808078422109e-08, "logits/chosen": -1.9814982414245605, "logits/rejected": -2.259554624557495, "logps/chosen": -370.0982666015625, "logps/rejected": -289.6712951660156, "loss": 0.3645, "rewards/accuracies": 0.75, "rewards/chosen": -0.7674177289009094, "rewards/margins": 1.3704586029052734, "rewards/rejected": -2.137876510620117, "step": 5809 }, { "epoch": 0.68, "learning_rate": 9.871264910830283e-08, "logits/chosen": -3.021476984024048, "logits/rejected": -3.0162901878356934, "logps/chosen": -148.9092559814453, "logps/rejected": -162.7642822265625, "loss": 0.2438, "rewards/accuracies": 1.0, "rewards/chosen": -0.22747698426246643, "rewards/margins": 1.9398497343063354, "rewards/rejected": -2.1673266887664795, "step": 5810 }, { "epoch": 0.68, "learning_rate": 9.867721743238455e-08, "logits/chosen": -2.288134813308716, "logits/rejected": -2.5542681217193604, "logps/chosen": -321.8184814453125, "logps/rejected": -230.3305206298828, "loss": 0.3836, "rewards/accuracies": 0.875, "rewards/chosen": -0.03234012424945831, "rewards/margins": 2.430572509765625, "rewards/rejected": -2.4629125595092773, "step": 5811 }, { "epoch": 0.68, "learning_rate": 9.864178575646627e-08, "logits/chosen": -2.059936046600342, "logits/rejected": -2.1046040058135986, "logps/chosen": -271.4300537109375, "logps/rejected": -309.09722900390625, "loss": 0.3622, "rewards/accuracies": 0.75, "rewards/chosen": -0.1504054218530655, "rewards/margins": 1.479260802268982, "rewards/rejected": -1.6296662092208862, "step": 5812 }, { "epoch": 0.68, "learning_rate": 9.860635408054799e-08, "logits/chosen": -2.626279354095459, "logits/rejected": -2.7447381019592285, "logps/chosen": -355.27130126953125, "logps/rejected": -273.3246154785156, "loss": 0.2139, "rewards/accuracies": 0.875, "rewards/chosen": -1.2988759279251099, "rewards/margins": 2.518878221511841, "rewards/rejected": -3.8177542686462402, "step": 5813 }, { "epoch": 0.68, "learning_rate": 9.857092240462974e-08, "logits/chosen": -2.65175724029541, "logits/rejected": -2.5101280212402344, "logps/chosen": -173.26950073242188, "logps/rejected": -215.5498046875, "loss": 0.479, "rewards/accuracies": 0.75, "rewards/chosen": -0.7574704885482788, "rewards/margins": 2.199469804763794, "rewards/rejected": -2.956940174102783, "step": 5814 }, { "epoch": 0.68, "learning_rate": 9.853549072871146e-08, "logits/chosen": -2.599482536315918, "logits/rejected": -2.360781192779541, "logps/chosen": -189.71966552734375, "logps/rejected": -329.5677185058594, "loss": 0.3286, "rewards/accuracies": 0.875, "rewards/chosen": -1.2199496030807495, "rewards/margins": 2.6072840690612793, "rewards/rejected": -3.8272337913513184, "step": 5815 }, { "epoch": 0.68, "learning_rate": 9.85000590527932e-08, "logits/chosen": -2.5412893295288086, "logits/rejected": -2.3722496032714844, "logps/chosen": -333.02581787109375, "logps/rejected": -285.10479736328125, "loss": 0.6744, "rewards/accuracies": 0.875, "rewards/chosen": -1.001763939857483, "rewards/margins": 1.6396164894104004, "rewards/rejected": -2.641380548477173, "step": 5816 }, { "epoch": 0.68, "learning_rate": 9.846462737687492e-08, "logits/chosen": -2.595106840133667, "logits/rejected": -2.3334808349609375, "logps/chosen": -210.63128662109375, "logps/rejected": -287.3044128417969, "loss": 0.1733, "rewards/accuracies": 0.875, "rewards/chosen": -0.28211739659309387, "rewards/margins": 5.175088405609131, "rewards/rejected": -5.457205772399902, "step": 5817 }, { "epoch": 0.68, "learning_rate": 9.842919570095664e-08, "logits/chosen": -2.527214527130127, "logits/rejected": -2.697998046875, "logps/chosen": -262.33013916015625, "logps/rejected": -287.5573425292969, "loss": 0.2721, "rewards/accuracies": 0.875, "rewards/chosen": -0.053454361855983734, "rewards/margins": 3.3497228622436523, "rewards/rejected": -3.403177261352539, "step": 5818 }, { "epoch": 0.68, "learning_rate": 9.839376402503838e-08, "logits/chosen": -2.8611392974853516, "logits/rejected": -2.7315587997436523, "logps/chosen": -440.97021484375, "logps/rejected": -381.95416259765625, "loss": 0.1419, "rewards/accuracies": 1.0, "rewards/chosen": -0.7846784591674805, "rewards/margins": 2.507371425628662, "rewards/rejected": -3.2920498847961426, "step": 5819 }, { "epoch": 0.68, "learning_rate": 9.835833234912011e-08, "logits/chosen": -2.6056699752807617, "logits/rejected": -2.5562195777893066, "logps/chosen": -331.08892822265625, "logps/rejected": -412.41973876953125, "loss": 0.2504, "rewards/accuracies": 0.875, "rewards/chosen": -0.5552920699119568, "rewards/margins": 4.398438453674316, "rewards/rejected": -4.95373010635376, "step": 5820 }, { "epoch": 0.68, "learning_rate": 9.832290067320184e-08, "logits/chosen": -1.926238775253296, "logits/rejected": -2.1735785007476807, "logps/chosen": -274.7484436035156, "logps/rejected": -201.3153076171875, "loss": 0.2772, "rewards/accuracies": 0.875, "rewards/chosen": -0.7782412767410278, "rewards/margins": 1.5303986072540283, "rewards/rejected": -2.3086400032043457, "step": 5821 }, { "epoch": 0.68, "learning_rate": 9.828746899728357e-08, "logits/chosen": -2.6639742851257324, "logits/rejected": -2.636319398880005, "logps/chosen": -130.09457397460938, "logps/rejected": -183.01690673828125, "loss": 0.5483, "rewards/accuracies": 0.75, "rewards/chosen": -1.3506728410720825, "rewards/margins": 0.4640253186225891, "rewards/rejected": -1.8146979808807373, "step": 5822 }, { "epoch": 0.68, "learning_rate": 9.825203732136529e-08, "logits/chosen": -2.547755241394043, "logits/rejected": -2.6127548217773438, "logps/chosen": -281.2243347167969, "logps/rejected": -359.19256591796875, "loss": 0.2907, "rewards/accuracies": 0.875, "rewards/chosen": -0.8423844575881958, "rewards/margins": 2.339141368865967, "rewards/rejected": -3.181525707244873, "step": 5823 }, { "epoch": 0.68, "learning_rate": 9.821660564544702e-08, "logits/chosen": -1.5287011861801147, "logits/rejected": -1.8484904766082764, "logps/chosen": -386.1397705078125, "logps/rejected": -261.4129638671875, "loss": 0.4082, "rewards/accuracies": 0.75, "rewards/chosen": -0.38434702157974243, "rewards/margins": 1.3657457828521729, "rewards/rejected": -1.7500927448272705, "step": 5824 }, { "epoch": 0.68, "learning_rate": 9.818117396952875e-08, "logits/chosen": -2.249058723449707, "logits/rejected": -2.184455633163452, "logps/chosen": -120.81012725830078, "logps/rejected": -257.14715576171875, "loss": 0.3532, "rewards/accuracies": 0.75, "rewards/chosen": -1.1911541223526, "rewards/margins": 2.8143506050109863, "rewards/rejected": -4.005504131317139, "step": 5825 }, { "epoch": 0.68, "learning_rate": 9.814574229361049e-08, "logits/chosen": -2.513272285461426, "logits/rejected": -2.444352149963379, "logps/chosen": -151.2674102783203, "logps/rejected": -227.27883911132812, "loss": 0.2233, "rewards/accuracies": 0.75, "rewards/chosen": 0.2101948857307434, "rewards/margins": 3.5450193881988525, "rewards/rejected": -3.334824323654175, "step": 5826 }, { "epoch": 0.68, "learning_rate": 9.811031061769222e-08, "logits/chosen": -2.1268792152404785, "logits/rejected": -2.2992568016052246, "logps/chosen": -470.65948486328125, "logps/rejected": -509.82257080078125, "loss": 0.2224, "rewards/accuracies": 0.875, "rewards/chosen": -0.7836727499961853, "rewards/margins": 2.6066439151763916, "rewards/rejected": -3.3903164863586426, "step": 5827 }, { "epoch": 0.68, "learning_rate": 9.807487894177394e-08, "logits/chosen": -2.2792792320251465, "logits/rejected": -2.6962006092071533, "logps/chosen": -259.50274658203125, "logps/rejected": -131.101806640625, "loss": 0.6768, "rewards/accuracies": 0.625, "rewards/chosen": -1.4020997285842896, "rewards/margins": 0.7297334671020508, "rewards/rejected": -2.131833076477051, "step": 5828 }, { "epoch": 0.68, "learning_rate": 9.803944726585567e-08, "logits/chosen": -1.9343293905258179, "logits/rejected": -2.0016207695007324, "logps/chosen": -589.5328979492188, "logps/rejected": -499.66583251953125, "loss": 0.4025, "rewards/accuracies": 0.75, "rewards/chosen": -1.1232115030288696, "rewards/margins": 2.042865514755249, "rewards/rejected": -3.166076898574829, "step": 5829 }, { "epoch": 0.68, "learning_rate": 9.800401558993739e-08, "logits/chosen": -2.8486547470092773, "logits/rejected": -2.7257375717163086, "logps/chosen": -163.724853515625, "logps/rejected": -219.4993438720703, "loss": 0.2646, "rewards/accuracies": 0.875, "rewards/chosen": -1.393648386001587, "rewards/margins": 2.3560056686401367, "rewards/rejected": -3.7496540546417236, "step": 5830 }, { "epoch": 0.68, "learning_rate": 9.796858391401912e-08, "logits/chosen": -1.8759996891021729, "logits/rejected": -1.860798716545105, "logps/chosen": -254.4556884765625, "logps/rejected": -210.89381408691406, "loss": 0.4051, "rewards/accuracies": 0.75, "rewards/chosen": -1.1211622953414917, "rewards/margins": 1.085024356842041, "rewards/rejected": -2.2061867713928223, "step": 5831 }, { "epoch": 0.68, "learning_rate": 9.793315223810086e-08, "logits/chosen": -2.4924604892730713, "logits/rejected": -2.5159144401550293, "logps/chosen": -287.80181884765625, "logps/rejected": -158.75582885742188, "loss": 0.3922, "rewards/accuracies": 0.875, "rewards/chosen": -0.4019641876220703, "rewards/margins": 1.3297817707061768, "rewards/rejected": -1.731745958328247, "step": 5832 }, { "epoch": 0.68, "learning_rate": 9.78977205621826e-08, "logits/chosen": -2.306197166442871, "logits/rejected": -2.6921420097351074, "logps/chosen": -363.8436584472656, "logps/rejected": -369.6960754394531, "loss": 0.3074, "rewards/accuracies": 1.0, "rewards/chosen": -2.1744704246520996, "rewards/margins": 1.3661524057388306, "rewards/rejected": -3.5406227111816406, "step": 5833 }, { "epoch": 0.68, "learning_rate": 9.786228888626432e-08, "logits/chosen": -2.684108257293701, "logits/rejected": -2.8449478149414062, "logps/chosen": -187.42185974121094, "logps/rejected": -179.3641357421875, "loss": 0.2168, "rewards/accuracies": 1.0, "rewards/chosen": -0.9326674342155457, "rewards/margins": 1.5117851495742798, "rewards/rejected": -2.4444527626037598, "step": 5834 }, { "epoch": 0.68, "learning_rate": 9.782685721034604e-08, "logits/chosen": -2.4081640243530273, "logits/rejected": -2.5438756942749023, "logps/chosen": -221.11756896972656, "logps/rejected": -247.10877990722656, "loss": 0.5358, "rewards/accuracies": 0.625, "rewards/chosen": -0.8151350617408752, "rewards/margins": 2.8048768043518066, "rewards/rejected": -3.620011568069458, "step": 5835 }, { "epoch": 0.68, "learning_rate": 9.779142553442777e-08, "logits/chosen": -2.777893304824829, "logits/rejected": -2.6634902954101562, "logps/chosen": -191.167236328125, "logps/rejected": -323.3246154785156, "loss": 0.4273, "rewards/accuracies": 0.75, "rewards/chosen": -1.5514953136444092, "rewards/margins": 2.842097043991089, "rewards/rejected": -4.393592357635498, "step": 5836 }, { "epoch": 0.68, "learning_rate": 9.775599385850951e-08, "logits/chosen": -2.401395559310913, "logits/rejected": -2.4684596061706543, "logps/chosen": -340.7705383300781, "logps/rejected": -289.1254577636719, "loss": 0.5287, "rewards/accuracies": 0.75, "rewards/chosen": -1.0362420082092285, "rewards/margins": 1.4623596668243408, "rewards/rejected": -2.4986016750335693, "step": 5837 }, { "epoch": 0.68, "learning_rate": 9.772056218259123e-08, "logits/chosen": -2.106703758239746, "logits/rejected": -1.8573567867279053, "logps/chosen": -280.6707763671875, "logps/rejected": -290.6792297363281, "loss": 0.6123, "rewards/accuracies": 0.75, "rewards/chosen": -1.0737457275390625, "rewards/margins": 2.191828966140747, "rewards/rejected": -3.2655749320983887, "step": 5838 }, { "epoch": 0.68, "learning_rate": 9.768513050667297e-08, "logits/chosen": -2.8857665061950684, "logits/rejected": -2.9276299476623535, "logps/chosen": -145.70106506347656, "logps/rejected": -161.30609130859375, "loss": 0.3633, "rewards/accuracies": 0.75, "rewards/chosen": -0.28052622079849243, "rewards/margins": 1.4684369564056396, "rewards/rejected": -1.7489631175994873, "step": 5839 }, { "epoch": 0.68, "learning_rate": 9.764969883075469e-08, "logits/chosen": -2.5134902000427246, "logits/rejected": -2.482090950012207, "logps/chosen": -205.07733154296875, "logps/rejected": -253.4620819091797, "loss": 0.0993, "rewards/accuracies": 1.0, "rewards/chosen": -0.3249451518058777, "rewards/margins": 3.211097002029419, "rewards/rejected": -3.5360419750213623, "step": 5840 }, { "epoch": 0.68, "learning_rate": 9.761426715483641e-08, "logits/chosen": -2.4590957164764404, "logits/rejected": -2.310288429260254, "logps/chosen": -114.83355712890625, "logps/rejected": -320.64031982421875, "loss": 0.1961, "rewards/accuracies": 1.0, "rewards/chosen": -0.1090492382645607, "rewards/margins": 2.796499729156494, "rewards/rejected": -2.9055488109588623, "step": 5841 }, { "epoch": 0.68, "learning_rate": 9.757883547891815e-08, "logits/chosen": -2.533212900161743, "logits/rejected": -2.3988802433013916, "logps/chosen": -417.77557373046875, "logps/rejected": -426.0582580566406, "loss": 0.2546, "rewards/accuracies": 1.0, "rewards/chosen": -1.031105399131775, "rewards/margins": 2.0796828269958496, "rewards/rejected": -3.110788345336914, "step": 5842 }, { "epoch": 0.68, "learning_rate": 9.754340380299988e-08, "logits/chosen": -2.1905882358551025, "logits/rejected": -2.430752754211426, "logps/chosen": -194.02206420898438, "logps/rejected": -177.1234893798828, "loss": 0.3432, "rewards/accuracies": 0.875, "rewards/chosen": -1.6669241189956665, "rewards/margins": 1.2506080865859985, "rewards/rejected": -2.917532205581665, "step": 5843 }, { "epoch": 0.68, "learning_rate": 9.750797212708162e-08, "logits/chosen": -2.768632411956787, "logits/rejected": -2.81009578704834, "logps/chosen": -164.57818603515625, "logps/rejected": -211.46417236328125, "loss": 0.2166, "rewards/accuracies": 1.0, "rewards/chosen": -0.9066532850265503, "rewards/margins": 2.587496757507324, "rewards/rejected": -3.494150161743164, "step": 5844 }, { "epoch": 0.68, "learning_rate": 9.747254045116334e-08, "logits/chosen": -2.7227959632873535, "logits/rejected": -2.661351203918457, "logps/chosen": -242.81210327148438, "logps/rejected": -290.8025207519531, "loss": 0.2019, "rewards/accuracies": 1.0, "rewards/chosen": -1.4141229391098022, "rewards/margins": 3.292353630065918, "rewards/rejected": -4.706476211547852, "step": 5845 }, { "epoch": 0.68, "learning_rate": 9.743710877524506e-08, "logits/chosen": -2.929567575454712, "logits/rejected": -2.975761890411377, "logps/chosen": -123.76954650878906, "logps/rejected": -234.66558837890625, "loss": 0.1309, "rewards/accuracies": 1.0, "rewards/chosen": -0.06425062566995621, "rewards/margins": 3.860234260559082, "rewards/rejected": -3.9244847297668457, "step": 5846 }, { "epoch": 0.68, "learning_rate": 9.74016770993268e-08, "logits/chosen": -2.9565682411193848, "logits/rejected": -2.8933205604553223, "logps/chosen": -232.4525146484375, "logps/rejected": -238.96649169921875, "loss": 0.3053, "rewards/accuracies": 0.875, "rewards/chosen": -1.1003684997558594, "rewards/margins": 2.5249862670898438, "rewards/rejected": -3.625354528427124, "step": 5847 }, { "epoch": 0.68, "learning_rate": 9.736624542340852e-08, "logits/chosen": -2.7018609046936035, "logits/rejected": -2.519109010696411, "logps/chosen": -264.9702453613281, "logps/rejected": -498.9739990234375, "loss": 0.1762, "rewards/accuracies": 1.0, "rewards/chosen": -0.8539862036705017, "rewards/margins": 4.591601848602295, "rewards/rejected": -5.445588111877441, "step": 5848 }, { "epoch": 0.68, "learning_rate": 9.733081374749025e-08, "logits/chosen": -3.0254151821136475, "logits/rejected": -3.0488219261169434, "logps/chosen": -174.10247802734375, "logps/rejected": -171.7904052734375, "loss": 0.4273, "rewards/accuracies": 0.875, "rewards/chosen": -1.194596767425537, "rewards/margins": 1.5572588443756104, "rewards/rejected": -2.7518558502197266, "step": 5849 }, { "epoch": 0.68, "learning_rate": 9.729538207157199e-08, "logits/chosen": -2.719857692718506, "logits/rejected": -2.6141107082366943, "logps/chosen": -447.00201416015625, "logps/rejected": -362.6184387207031, "loss": 0.1189, "rewards/accuracies": 1.0, "rewards/chosen": -0.7481001615524292, "rewards/margins": 2.6477272510528564, "rewards/rejected": -3.395827531814575, "step": 5850 }, { "epoch": 0.68, "learning_rate": 9.725995039565371e-08, "logits/chosen": -2.04660964012146, "logits/rejected": -1.894899845123291, "logps/chosen": -459.4366149902344, "logps/rejected": -459.6244812011719, "loss": 0.4305, "rewards/accuracies": 0.75, "rewards/chosen": -1.3228460550308228, "rewards/margins": 1.7490999698638916, "rewards/rejected": -3.071945905685425, "step": 5851 }, { "epoch": 0.68, "learning_rate": 9.722451871973543e-08, "logits/chosen": -2.043168544769287, "logits/rejected": -2.140120506286621, "logps/chosen": -442.5826110839844, "logps/rejected": -426.6492919921875, "loss": 0.5071, "rewards/accuracies": 0.75, "rewards/chosen": -0.3536932170391083, "rewards/margins": 1.730751633644104, "rewards/rejected": -2.084444999694824, "step": 5852 }, { "epoch": 0.68, "learning_rate": 9.718908704381717e-08, "logits/chosen": -2.7855868339538574, "logits/rejected": -2.6384315490722656, "logps/chosen": -197.64749145507812, "logps/rejected": -299.1425476074219, "loss": 0.2763, "rewards/accuracies": 0.875, "rewards/chosen": -0.3158573508262634, "rewards/margins": 3.7448999881744385, "rewards/rejected": -4.060757160186768, "step": 5853 }, { "epoch": 0.68, "learning_rate": 9.715365536789889e-08, "logits/chosen": -2.4416534900665283, "logits/rejected": -2.211107015609741, "logps/chosen": -187.8308868408203, "logps/rejected": -230.53701782226562, "loss": 0.3152, "rewards/accuracies": 0.875, "rewards/chosen": -0.6655954718589783, "rewards/margins": 1.5582704544067383, "rewards/rejected": -2.2238659858703613, "step": 5854 }, { "epoch": 0.68, "learning_rate": 9.711822369198064e-08, "logits/chosen": -2.4269604682922363, "logits/rejected": -2.6616358757019043, "logps/chosen": -230.49569702148438, "logps/rejected": -145.82594299316406, "loss": 1.7174, "rewards/accuracies": 0.75, "rewards/chosen": -3.284794569015503, "rewards/margins": 0.28378283977508545, "rewards/rejected": -3.568577289581299, "step": 5855 }, { "epoch": 0.68, "learning_rate": 9.708279201606236e-08, "logits/chosen": -2.864290952682495, "logits/rejected": -2.592101812362671, "logps/chosen": -170.21739196777344, "logps/rejected": -339.693603515625, "loss": 0.5066, "rewards/accuracies": 0.75, "rewards/chosen": -1.3716148138046265, "rewards/margins": 1.6630580425262451, "rewards/rejected": -3.034672737121582, "step": 5856 }, { "epoch": 0.68, "learning_rate": 9.704736034014408e-08, "logits/chosen": -2.1063737869262695, "logits/rejected": -2.0136847496032715, "logps/chosen": -264.4392395019531, "logps/rejected": -284.8487548828125, "loss": 0.0924, "rewards/accuracies": 1.0, "rewards/chosen": -0.652680516242981, "rewards/margins": 2.955890417098999, "rewards/rejected": -3.6085710525512695, "step": 5857 }, { "epoch": 0.68, "learning_rate": 9.70119286642258e-08, "logits/chosen": -2.2321510314941406, "logits/rejected": -2.1191959381103516, "logps/chosen": -237.4574432373047, "logps/rejected": -234.283447265625, "loss": 0.2902, "rewards/accuracies": 0.875, "rewards/chosen": -2.017428398132324, "rewards/margins": 2.4314522743225098, "rewards/rejected": -4.448881149291992, "step": 5858 }, { "epoch": 0.68, "learning_rate": 9.697649698830754e-08, "logits/chosen": -2.7944183349609375, "logits/rejected": -2.9035797119140625, "logps/chosen": -209.94839477539062, "logps/rejected": -290.82415771484375, "loss": 0.2371, "rewards/accuracies": 0.875, "rewards/chosen": -0.4654172658920288, "rewards/margins": 3.6061437129974365, "rewards/rejected": -4.071560859680176, "step": 5859 }, { "epoch": 0.68, "learning_rate": 9.694106531238926e-08, "logits/chosen": -2.2708511352539062, "logits/rejected": -2.408926486968994, "logps/chosen": -391.6497497558594, "logps/rejected": -448.88189697265625, "loss": 0.1027, "rewards/accuracies": 1.0, "rewards/chosen": -0.2796616554260254, "rewards/margins": 4.741923809051514, "rewards/rejected": -5.021585464477539, "step": 5860 }, { "epoch": 0.68, "learning_rate": 9.690563363647101e-08, "logits/chosen": -2.438347339630127, "logits/rejected": -2.4875471591949463, "logps/chosen": -251.97291564941406, "logps/rejected": -221.66908264160156, "loss": 0.3331, "rewards/accuracies": 0.875, "rewards/chosen": -1.5308715105056763, "rewards/margins": 2.1991844177246094, "rewards/rejected": -3.7300562858581543, "step": 5861 }, { "epoch": 0.68, "learning_rate": 9.687020196055273e-08, "logits/chosen": -1.7928955554962158, "logits/rejected": -2.323960065841675, "logps/chosen": -301.6091003417969, "logps/rejected": -252.599853515625, "loss": 0.6005, "rewards/accuracies": 0.625, "rewards/chosen": -1.0585882663726807, "rewards/margins": 2.8673648834228516, "rewards/rejected": -3.9259533882141113, "step": 5862 }, { "epoch": 0.68, "learning_rate": 9.683477028463446e-08, "logits/chosen": -2.190072536468506, "logits/rejected": -2.344536304473877, "logps/chosen": -281.3901062011719, "logps/rejected": -325.34735107421875, "loss": 0.2686, "rewards/accuracies": 1.0, "rewards/chosen": -0.79584801197052, "rewards/margins": 1.9217419624328613, "rewards/rejected": -2.717589855194092, "step": 5863 }, { "epoch": 0.68, "learning_rate": 9.679933860871619e-08, "logits/chosen": -1.975340485572815, "logits/rejected": -2.309140682220459, "logps/chosen": -320.14263916015625, "logps/rejected": -200.86529541015625, "loss": 0.3032, "rewards/accuracies": 0.75, "rewards/chosen": 0.07751323282718658, "rewards/margins": 2.256333589553833, "rewards/rejected": -2.1788203716278076, "step": 5864 }, { "epoch": 0.68, "learning_rate": 9.676390693279791e-08, "logits/chosen": -2.949552536010742, "logits/rejected": -3.009413957595825, "logps/chosen": -396.7818298339844, "logps/rejected": -366.3778076171875, "loss": 0.2477, "rewards/accuracies": 0.875, "rewards/chosen": -1.0566915273666382, "rewards/margins": 4.445048809051514, "rewards/rejected": -5.501739978790283, "step": 5865 }, { "epoch": 0.68, "learning_rate": 9.672847525687964e-08, "logits/chosen": -2.220644474029541, "logits/rejected": -2.4218220710754395, "logps/chosen": -361.0992736816406, "logps/rejected": -247.60507202148438, "loss": 0.4803, "rewards/accuracies": 0.75, "rewards/chosen": -0.7737730741500854, "rewards/margins": 0.7254037261009216, "rewards/rejected": -1.4991768598556519, "step": 5866 }, { "epoch": 0.68, "learning_rate": 9.669304358096138e-08, "logits/chosen": -2.1382663249969482, "logits/rejected": -2.238375186920166, "logps/chosen": -284.5999755859375, "logps/rejected": -163.6244659423828, "loss": 0.4937, "rewards/accuracies": 0.625, "rewards/chosen": -0.7055432796478271, "rewards/margins": 0.7947183847427368, "rewards/rejected": -1.5002617835998535, "step": 5867 }, { "epoch": 0.68, "learning_rate": 9.66576119050431e-08, "logits/chosen": -2.100579261779785, "logits/rejected": -1.805131435394287, "logps/chosen": -390.3517150878906, "logps/rejected": -337.4720764160156, "loss": 1.2853, "rewards/accuracies": 0.5, "rewards/chosen": -1.5684128999710083, "rewards/margins": 0.687651515007019, "rewards/rejected": -2.2560644149780273, "step": 5868 }, { "epoch": 0.68, "learning_rate": 9.662218022912483e-08, "logits/chosen": -2.3235695362091064, "logits/rejected": -2.326822280883789, "logps/chosen": -340.28302001953125, "logps/rejected": -259.8599853515625, "loss": 0.3346, "rewards/accuracies": 0.875, "rewards/chosen": -1.9328219890594482, "rewards/margins": 1.7996058464050293, "rewards/rejected": -3.7324278354644775, "step": 5869 }, { "epoch": 0.68, "learning_rate": 9.658674855320656e-08, "logits/chosen": -2.0841453075408936, "logits/rejected": -1.9630587100982666, "logps/chosen": -426.2296447753906, "logps/rejected": -445.25994873046875, "loss": 0.1472, "rewards/accuracies": 1.0, "rewards/chosen": -0.4540475010871887, "rewards/margins": 2.92598295211792, "rewards/rejected": -3.380030393600464, "step": 5870 }, { "epoch": 0.68, "learning_rate": 9.655131687728829e-08, "logits/chosen": -2.6098687648773193, "logits/rejected": -2.7706615924835205, "logps/chosen": -305.1506652832031, "logps/rejected": -256.80133056640625, "loss": 0.1673, "rewards/accuracies": 0.875, "rewards/chosen": -0.20819798111915588, "rewards/margins": 2.841977119445801, "rewards/rejected": -3.0501749515533447, "step": 5871 }, { "epoch": 0.68, "learning_rate": 9.651588520137003e-08, "logits/chosen": -2.7098987102508545, "logits/rejected": -2.765056610107422, "logps/chosen": -206.22410583496094, "logps/rejected": -188.06324768066406, "loss": 0.2634, "rewards/accuracies": 0.875, "rewards/chosen": -0.9149890542030334, "rewards/margins": 1.8388144969940186, "rewards/rejected": -2.7538037300109863, "step": 5872 }, { "epoch": 0.68, "learning_rate": 9.648045352545176e-08, "logits/chosen": -2.5029172897338867, "logits/rejected": -1.9610893726348877, "logps/chosen": -128.87857055664062, "logps/rejected": -374.7557373046875, "loss": 0.2696, "rewards/accuracies": 0.875, "rewards/chosen": -0.09530672430992126, "rewards/margins": 1.943038821220398, "rewards/rejected": -2.0383455753326416, "step": 5873 }, { "epoch": 0.68, "learning_rate": 9.644502184953348e-08, "logits/chosen": -2.2349438667297363, "logits/rejected": -2.4086241722106934, "logps/chosen": -264.426513671875, "logps/rejected": -361.1946716308594, "loss": 0.3745, "rewards/accuracies": 0.75, "rewards/chosen": -1.366037368774414, "rewards/margins": 3.4394259452819824, "rewards/rejected": -4.8054633140563965, "step": 5874 }, { "epoch": 0.68, "learning_rate": 9.64095901736152e-08, "logits/chosen": -2.061450958251953, "logits/rejected": -2.1520233154296875, "logps/chosen": -493.27294921875, "logps/rejected": -373.57012939453125, "loss": 0.3093, "rewards/accuracies": 0.875, "rewards/chosen": -1.632220983505249, "rewards/margins": 1.282753825187683, "rewards/rejected": -2.9149746894836426, "step": 5875 }, { "epoch": 0.68, "learning_rate": 9.637415849769694e-08, "logits/chosen": -2.633035182952881, "logits/rejected": -2.62864089012146, "logps/chosen": -196.40182495117188, "logps/rejected": -186.197998046875, "loss": 0.2644, "rewards/accuracies": 0.875, "rewards/chosen": -0.19892184436321259, "rewards/margins": 3.5853686332702637, "rewards/rejected": -3.784290313720703, "step": 5876 }, { "epoch": 0.68, "learning_rate": 9.633872682177866e-08, "logits/chosen": -2.3477790355682373, "logits/rejected": -2.3270368576049805, "logps/chosen": -262.3770446777344, "logps/rejected": -302.68658447265625, "loss": 0.3899, "rewards/accuracies": 0.75, "rewards/chosen": -1.1917483806610107, "rewards/margins": 1.4134128093719482, "rewards/rejected": -2.605161190032959, "step": 5877 }, { "epoch": 0.68, "learning_rate": 9.630329514586041e-08, "logits/chosen": -2.0783581733703613, "logits/rejected": -1.8413623571395874, "logps/chosen": -277.5154113769531, "logps/rejected": -466.63800048828125, "loss": 0.265, "rewards/accuracies": 0.875, "rewards/chosen": 0.014033585786819458, "rewards/margins": 1.8185244798660278, "rewards/rejected": -1.8044910430908203, "step": 5878 }, { "epoch": 0.68, "learning_rate": 9.626786346994213e-08, "logits/chosen": -2.5362746715545654, "logits/rejected": -2.6752707958221436, "logps/chosen": -254.61607360839844, "logps/rejected": -238.74249267578125, "loss": 0.2132, "rewards/accuracies": 1.0, "rewards/chosen": -0.6979035139083862, "rewards/margins": 1.9780018329620361, "rewards/rejected": -2.675905227661133, "step": 5879 }, { "epoch": 0.68, "learning_rate": 9.623243179402385e-08, "logits/chosen": -2.684633731842041, "logits/rejected": -2.559316635131836, "logps/chosen": -255.76148986816406, "logps/rejected": -320.2872314453125, "loss": 0.5279, "rewards/accuracies": 0.75, "rewards/chosen": -1.3142439126968384, "rewards/margins": 2.8019003868103027, "rewards/rejected": -4.116144180297852, "step": 5880 }, { "epoch": 0.68, "learning_rate": 9.619700011810559e-08, "logits/chosen": -2.267869710922241, "logits/rejected": -2.0087978839874268, "logps/chosen": -428.28167724609375, "logps/rejected": -306.6273498535156, "loss": 0.2731, "rewards/accuracies": 0.875, "rewards/chosen": -0.4562082290649414, "rewards/margins": 3.063324451446533, "rewards/rejected": -3.5195324420928955, "step": 5881 }, { "epoch": 0.68, "learning_rate": 9.616156844218731e-08, "logits/chosen": -2.565192699432373, "logits/rejected": -2.395318031311035, "logps/chosen": -183.47491455078125, "logps/rejected": -210.11965942382812, "loss": 0.132, "rewards/accuracies": 1.0, "rewards/chosen": -0.7746830582618713, "rewards/margins": 4.530812740325928, "rewards/rejected": -5.305495738983154, "step": 5882 }, { "epoch": 0.68, "learning_rate": 9.612613676626903e-08, "logits/chosen": -2.3027725219726562, "logits/rejected": -2.328664779663086, "logps/chosen": -472.92279052734375, "logps/rejected": -313.06787109375, "loss": 0.4451, "rewards/accuracies": 0.75, "rewards/chosen": -1.1310348510742188, "rewards/margins": 1.2796835899353027, "rewards/rejected": -2.4107184410095215, "step": 5883 }, { "epoch": 0.68, "learning_rate": 9.609070509035078e-08, "logits/chosen": -2.7532668113708496, "logits/rejected": -2.848609447479248, "logps/chosen": -343.0406799316406, "logps/rejected": -316.49688720703125, "loss": 0.2909, "rewards/accuracies": 0.875, "rewards/chosen": 0.06904959678649902, "rewards/margins": 2.565916061401367, "rewards/rejected": -2.496866464614868, "step": 5884 }, { "epoch": 0.68, "learning_rate": 9.60552734144325e-08, "logits/chosen": -2.546437978744507, "logits/rejected": -2.877047300338745, "logps/chosen": -287.32904052734375, "logps/rejected": -207.63685607910156, "loss": 0.1494, "rewards/accuracies": 0.875, "rewards/chosen": -0.6403865218162537, "rewards/margins": 4.052772045135498, "rewards/rejected": -4.693158149719238, "step": 5885 }, { "epoch": 0.68, "learning_rate": 9.601984173851422e-08, "logits/chosen": -1.9319920539855957, "logits/rejected": -1.8634850978851318, "logps/chosen": -294.12646484375, "logps/rejected": -380.89697265625, "loss": 0.8134, "rewards/accuracies": 0.625, "rewards/chosen": -1.4304003715515137, "rewards/margins": 0.44577091932296753, "rewards/rejected": -1.876171350479126, "step": 5886 }, { "epoch": 0.68, "learning_rate": 9.598441006259596e-08, "logits/chosen": -2.1247053146362305, "logits/rejected": -2.233093023300171, "logps/chosen": -370.36456298828125, "logps/rejected": -374.5145263671875, "loss": 0.1664, "rewards/accuracies": 0.875, "rewards/chosen": -0.43138623237609863, "rewards/margins": 2.675057888031006, "rewards/rejected": -3.1064443588256836, "step": 5887 }, { "epoch": 0.68, "learning_rate": 9.594897838667768e-08, "logits/chosen": -2.5947442054748535, "logits/rejected": -2.422447919845581, "logps/chosen": -293.9527893066406, "logps/rejected": -351.14892578125, "loss": 0.2396, "rewards/accuracies": 1.0, "rewards/chosen": -0.30337369441986084, "rewards/margins": 2.2513818740844727, "rewards/rejected": -2.554755687713623, "step": 5888 }, { "epoch": 0.69, "learning_rate": 9.59135467107594e-08, "logits/chosen": -2.32375431060791, "logits/rejected": -2.4762425422668457, "logps/chosen": -216.7289276123047, "logps/rejected": -221.71517944335938, "loss": 0.6269, "rewards/accuracies": 0.5, "rewards/chosen": -2.4433209896087646, "rewards/margins": 0.9587020874023438, "rewards/rejected": -3.4020230770111084, "step": 5889 }, { "epoch": 0.69, "learning_rate": 9.587811503484115e-08, "logits/chosen": -1.8865809440612793, "logits/rejected": -1.92498779296875, "logps/chosen": -292.66668701171875, "logps/rejected": -256.88104248046875, "loss": 1.0313, "rewards/accuracies": 0.625, "rewards/chosen": -2.3378713130950928, "rewards/margins": 1.1495935916900635, "rewards/rejected": -3.4874649047851562, "step": 5890 }, { "epoch": 0.69, "learning_rate": 9.584268335892287e-08, "logits/chosen": -1.949636697769165, "logits/rejected": -2.1250860691070557, "logps/chosen": -355.9758605957031, "logps/rejected": -281.478515625, "loss": 0.5243, "rewards/accuracies": 0.75, "rewards/chosen": -1.42768132686615, "rewards/margins": 1.5919275283813477, "rewards/rejected": -3.019608736038208, "step": 5891 }, { "epoch": 0.69, "learning_rate": 9.580725168300461e-08, "logits/chosen": -2.072981119155884, "logits/rejected": -2.3087215423583984, "logps/chosen": -335.58642578125, "logps/rejected": -303.81231689453125, "loss": 0.3614, "rewards/accuracies": 0.75, "rewards/chosen": -0.31743279099464417, "rewards/margins": 1.7269761562347412, "rewards/rejected": -2.0444087982177734, "step": 5892 }, { "epoch": 0.69, "learning_rate": 9.577182000708633e-08, "logits/chosen": -2.3352630138397217, "logits/rejected": -2.3733298778533936, "logps/chosen": -369.0168762207031, "logps/rejected": -347.66455078125, "loss": 0.2708, "rewards/accuracies": 1.0, "rewards/chosen": -0.41420289874076843, "rewards/margins": 1.910370111465454, "rewards/rejected": -2.324573040008545, "step": 5893 }, { "epoch": 0.69, "learning_rate": 9.573638833116805e-08, "logits/chosen": -2.337742805480957, "logits/rejected": -2.1794371604919434, "logps/chosen": -359.23370361328125, "logps/rejected": -289.09222412109375, "loss": 0.3418, "rewards/accuracies": 0.875, "rewards/chosen": -0.6389098167419434, "rewards/margins": 2.439901351928711, "rewards/rejected": -3.0788111686706543, "step": 5894 }, { "epoch": 0.69, "learning_rate": 9.570095665524978e-08, "logits/chosen": -1.9090752601623535, "logits/rejected": -2.084043025970459, "logps/chosen": -284.69732666015625, "logps/rejected": -306.3131103515625, "loss": 0.4023, "rewards/accuracies": 0.875, "rewards/chosen": -0.7799218893051147, "rewards/margins": 2.544426918029785, "rewards/rejected": -3.3243486881256104, "step": 5895 }, { "epoch": 0.69, "learning_rate": 9.566552497933152e-08, "logits/chosen": -1.9165470600128174, "logits/rejected": -1.620482325553894, "logps/chosen": -238.80709838867188, "logps/rejected": -334.2662353515625, "loss": 0.5079, "rewards/accuracies": 0.875, "rewards/chosen": -1.262253999710083, "rewards/margins": 1.624863862991333, "rewards/rejected": -2.887117862701416, "step": 5896 }, { "epoch": 0.69, "learning_rate": 9.563009330341325e-08, "logits/chosen": -1.9619412422180176, "logits/rejected": -2.1274194717407227, "logps/chosen": -400.50946044921875, "logps/rejected": -322.94012451171875, "loss": 0.4957, "rewards/accuracies": 0.625, "rewards/chosen": -0.848469614982605, "rewards/margins": 1.2204712629318237, "rewards/rejected": -2.068941116333008, "step": 5897 }, { "epoch": 0.69, "learning_rate": 9.559466162749498e-08, "logits/chosen": -2.6760141849517822, "logits/rejected": -2.6767990589141846, "logps/chosen": -319.7278137207031, "logps/rejected": -177.6492919921875, "loss": 0.2742, "rewards/accuracies": 0.875, "rewards/chosen": -0.6762881278991699, "rewards/margins": 2.2938010692596436, "rewards/rejected": -2.9700894355773926, "step": 5898 }, { "epoch": 0.69, "learning_rate": 9.55592299515767e-08, "logits/chosen": -2.734656810760498, "logits/rejected": -2.700079917907715, "logps/chosen": -98.83802032470703, "logps/rejected": -183.96836853027344, "loss": 0.4978, "rewards/accuracies": 0.75, "rewards/chosen": -0.6390364170074463, "rewards/margins": 2.0303823947906494, "rewards/rejected": -2.6694188117980957, "step": 5899 }, { "epoch": 0.69, "learning_rate": 9.552379827565843e-08, "logits/chosen": -2.077681064605713, "logits/rejected": -1.7779815196990967, "logps/chosen": -222.34445190429688, "logps/rejected": -264.60589599609375, "loss": 0.188, "rewards/accuracies": 1.0, "rewards/chosen": -0.5411868095397949, "rewards/margins": 2.003406047821045, "rewards/rejected": -2.54459285736084, "step": 5900 }, { "epoch": 0.69, "learning_rate": 9.548836659974016e-08, "logits/chosen": -2.422165870666504, "logits/rejected": -2.2710418701171875, "logps/chosen": -190.32870483398438, "logps/rejected": -237.65179443359375, "loss": 0.3044, "rewards/accuracies": 0.875, "rewards/chosen": -0.5092857480049133, "rewards/margins": 1.8819029331207275, "rewards/rejected": -2.391188621520996, "step": 5901 }, { "epoch": 0.69, "learning_rate": 9.54529349238219e-08, "logits/chosen": -2.528597831726074, "logits/rejected": -2.565235137939453, "logps/chosen": -314.50408935546875, "logps/rejected": -234.3599395751953, "loss": 0.3795, "rewards/accuracies": 0.875, "rewards/chosen": -0.5634748339653015, "rewards/margins": 1.376401662826538, "rewards/rejected": -1.9398764371871948, "step": 5902 }, { "epoch": 0.69, "learning_rate": 9.541750324790362e-08, "logits/chosen": -2.3112144470214844, "logits/rejected": -2.413743019104004, "logps/chosen": -217.50584411621094, "logps/rejected": -243.25726318359375, "loss": 0.1812, "rewards/accuracies": 1.0, "rewards/chosen": -0.17611321806907654, "rewards/margins": 2.6496267318725586, "rewards/rejected": -2.825739622116089, "step": 5903 }, { "epoch": 0.69, "learning_rate": 9.538207157198535e-08, "logits/chosen": -1.55003821849823, "logits/rejected": -1.8767004013061523, "logps/chosen": -439.70892333984375, "logps/rejected": -420.8730163574219, "loss": 0.57, "rewards/accuracies": 0.75, "rewards/chosen": -1.4753055572509766, "rewards/margins": 0.8566839694976807, "rewards/rejected": -2.3319895267486572, "step": 5904 }, { "epoch": 0.69, "learning_rate": 9.534663989606708e-08, "logits/chosen": -1.8701086044311523, "logits/rejected": -2.1864428520202637, "logps/chosen": -237.4652099609375, "logps/rejected": -232.73223876953125, "loss": 0.5811, "rewards/accuracies": 0.875, "rewards/chosen": -0.8477592468261719, "rewards/margins": 2.560506820678711, "rewards/rejected": -3.408266067504883, "step": 5905 }, { "epoch": 0.69, "learning_rate": 9.53112082201488e-08, "logits/chosen": -1.9162864685058594, "logits/rejected": -2.178011417388916, "logps/chosen": -410.37445068359375, "logps/rejected": -260.7994079589844, "loss": 0.7261, "rewards/accuracies": 0.75, "rewards/chosen": -0.6745463609695435, "rewards/margins": 1.3188854455947876, "rewards/rejected": -1.9934319257736206, "step": 5906 }, { "epoch": 0.69, "learning_rate": 9.527577654423055e-08, "logits/chosen": -2.1639902591705322, "logits/rejected": -2.3871755599975586, "logps/chosen": -133.58465576171875, "logps/rejected": -126.93550109863281, "loss": 0.4765, "rewards/accuracies": 0.625, "rewards/chosen": -0.9120819568634033, "rewards/margins": 1.528993844985962, "rewards/rejected": -2.4410760402679443, "step": 5907 }, { "epoch": 0.69, "learning_rate": 9.524034486831227e-08, "logits/chosen": -2.173604726791382, "logits/rejected": -1.8360134363174438, "logps/chosen": -99.08837890625, "logps/rejected": -266.2565002441406, "loss": 0.4014, "rewards/accuracies": 0.625, "rewards/chosen": -1.5634245872497559, "rewards/margins": 2.742495059967041, "rewards/rejected": -4.305919170379639, "step": 5908 }, { "epoch": 0.69, "learning_rate": 9.5204913192394e-08, "logits/chosen": -2.8517205715179443, "logits/rejected": -2.60160493850708, "logps/chosen": -89.30880737304688, "logps/rejected": -179.57623291015625, "loss": 0.5477, "rewards/accuracies": 0.625, "rewards/chosen": -1.708245038986206, "rewards/margins": 0.9475343227386475, "rewards/rejected": -2.6557793617248535, "step": 5909 }, { "epoch": 0.69, "learning_rate": 9.516948151647573e-08, "logits/chosen": -2.0299699306488037, "logits/rejected": -2.201125383377075, "logps/chosen": -273.859619140625, "logps/rejected": -299.3193359375, "loss": 0.4466, "rewards/accuracies": 0.75, "rewards/chosen": -0.12934082746505737, "rewards/margins": 1.8654190301895142, "rewards/rejected": -1.9947597980499268, "step": 5910 }, { "epoch": 0.69, "learning_rate": 9.513404984055745e-08, "logits/chosen": -2.2381529808044434, "logits/rejected": -2.412238597869873, "logps/chosen": -181.9948272705078, "logps/rejected": -161.71131896972656, "loss": 0.2716, "rewards/accuracies": 0.875, "rewards/chosen": -1.194922685623169, "rewards/margins": 1.8007724285125732, "rewards/rejected": -2.995695114135742, "step": 5911 }, { "epoch": 0.69, "learning_rate": 9.509861816463917e-08, "logits/chosen": -2.2182302474975586, "logits/rejected": -2.3002400398254395, "logps/chosen": -222.30081176757812, "logps/rejected": -221.53790283203125, "loss": 1.4641, "rewards/accuracies": 0.5, "rewards/chosen": -1.611617088317871, "rewards/margins": 0.47977834939956665, "rewards/rejected": -2.091395378112793, "step": 5912 }, { "epoch": 0.69, "learning_rate": 9.506318648872092e-08, "logits/chosen": -2.3533477783203125, "logits/rejected": -2.264291763305664, "logps/chosen": -256.43707275390625, "logps/rejected": -245.60302734375, "loss": 0.3625, "rewards/accuracies": 0.875, "rewards/chosen": -0.4704910218715668, "rewards/margins": 2.043818712234497, "rewards/rejected": -2.514309883117676, "step": 5913 }, { "epoch": 0.69, "learning_rate": 9.502775481280264e-08, "logits/chosen": -2.201035976409912, "logits/rejected": -2.378178596496582, "logps/chosen": -314.2226257324219, "logps/rejected": -242.24322509765625, "loss": 0.6851, "rewards/accuracies": 0.75, "rewards/chosen": -1.4087984561920166, "rewards/margins": 1.5760602951049805, "rewards/rejected": -2.984858751296997, "step": 5914 }, { "epoch": 0.69, "learning_rate": 9.499232313688438e-08, "logits/chosen": -2.6358203887939453, "logits/rejected": -2.5250744819641113, "logps/chosen": -255.57733154296875, "logps/rejected": -356.587158203125, "loss": 0.5078, "rewards/accuracies": 0.625, "rewards/chosen": -1.2061302661895752, "rewards/margins": 1.6427199840545654, "rewards/rejected": -2.8488500118255615, "step": 5915 }, { "epoch": 0.69, "learning_rate": 9.49568914609661e-08, "logits/chosen": -1.6475694179534912, "logits/rejected": -1.6061614751815796, "logps/chosen": -402.5577087402344, "logps/rejected": -407.996337890625, "loss": 0.9326, "rewards/accuracies": 0.75, "rewards/chosen": -1.743146538734436, "rewards/margins": 0.970516562461853, "rewards/rejected": -2.713663101196289, "step": 5916 }, { "epoch": 0.69, "learning_rate": 9.492145978504782e-08, "logits/chosen": -2.179629325866699, "logits/rejected": -2.039102554321289, "logps/chosen": -301.76422119140625, "logps/rejected": -389.2252197265625, "loss": 0.271, "rewards/accuracies": 0.875, "rewards/chosen": -0.1813288927078247, "rewards/margins": 2.287900447845459, "rewards/rejected": -2.4692294597625732, "step": 5917 }, { "epoch": 0.69, "learning_rate": 9.488602810912956e-08, "logits/chosen": -2.7107768058776855, "logits/rejected": -2.750114679336548, "logps/chosen": -294.894287109375, "logps/rejected": -280.7994689941406, "loss": 0.4793, "rewards/accuracies": 0.75, "rewards/chosen": -1.198390245437622, "rewards/margins": 3.3883914947509766, "rewards/rejected": -4.5867815017700195, "step": 5918 }, { "epoch": 0.69, "learning_rate": 9.485059643321129e-08, "logits/chosen": -2.4870729446411133, "logits/rejected": -2.305002212524414, "logps/chosen": -389.4294128417969, "logps/rejected": -433.03057861328125, "loss": 0.2701, "rewards/accuracies": 0.75, "rewards/chosen": -0.527874767780304, "rewards/margins": 2.127601385116577, "rewards/rejected": -2.6554760932922363, "step": 5919 }, { "epoch": 0.69, "learning_rate": 9.481516475729301e-08, "logits/chosen": -2.9721598625183105, "logits/rejected": -3.0318779945373535, "logps/chosen": -175.3630828857422, "logps/rejected": -223.1979522705078, "loss": 0.2474, "rewards/accuracies": 0.875, "rewards/chosen": -0.6491037607192993, "rewards/margins": 2.826045274734497, "rewards/rejected": -3.475148916244507, "step": 5920 }, { "epoch": 0.69, "learning_rate": 9.477973308137475e-08, "logits/chosen": -1.7583205699920654, "logits/rejected": -1.9558253288269043, "logps/chosen": -586.5169677734375, "logps/rejected": -455.55072021484375, "loss": 0.3244, "rewards/accuracies": 0.875, "rewards/chosen": -1.0194284915924072, "rewards/margins": 2.181220531463623, "rewards/rejected": -3.200648784637451, "step": 5921 }, { "epoch": 0.69, "learning_rate": 9.474430140545647e-08, "logits/chosen": -2.657351493835449, "logits/rejected": -2.9368834495544434, "logps/chosen": -863.09912109375, "logps/rejected": -168.06103515625, "loss": 0.5833, "rewards/accuracies": 0.625, "rewards/chosen": -1.3771238327026367, "rewards/margins": 1.4492801427841187, "rewards/rejected": -2.826404094696045, "step": 5922 }, { "epoch": 0.69, "learning_rate": 9.47088697295382e-08, "logits/chosen": -2.249117851257324, "logits/rejected": -2.3774282932281494, "logps/chosen": -428.8744812011719, "logps/rejected": -438.18658447265625, "loss": 0.647, "rewards/accuracies": 0.75, "rewards/chosen": -0.8838959336280823, "rewards/margins": 1.1071912050247192, "rewards/rejected": -1.9910871982574463, "step": 5923 }, { "epoch": 0.69, "learning_rate": 9.467343805361993e-08, "logits/chosen": -2.333578586578369, "logits/rejected": -2.4373106956481934, "logps/chosen": -289.1213073730469, "logps/rejected": -202.0303497314453, "loss": 0.3034, "rewards/accuracies": 0.875, "rewards/chosen": -0.43871447443962097, "rewards/margins": 2.020746946334839, "rewards/rejected": -2.4594614505767822, "step": 5924 }, { "epoch": 0.69, "learning_rate": 9.463800637770166e-08, "logits/chosen": -2.077951192855835, "logits/rejected": -1.8953227996826172, "logps/chosen": -217.91490173339844, "logps/rejected": -287.54241943359375, "loss": 0.2732, "rewards/accuracies": 0.875, "rewards/chosen": -0.4839100241661072, "rewards/margins": 2.426374912261963, "rewards/rejected": -2.9102847576141357, "step": 5925 }, { "epoch": 0.69, "learning_rate": 9.46025747017834e-08, "logits/chosen": -1.8402951955795288, "logits/rejected": -1.91623854637146, "logps/chosen": -505.0852355957031, "logps/rejected": -452.1322326660156, "loss": 0.4046, "rewards/accuracies": 0.875, "rewards/chosen": -0.7689120173454285, "rewards/margins": 1.7076504230499268, "rewards/rejected": -2.4765625, "step": 5926 }, { "epoch": 0.69, "learning_rate": 9.456714302586512e-08, "logits/chosen": -1.6595100164413452, "logits/rejected": -1.982163667678833, "logps/chosen": -293.1182861328125, "logps/rejected": -265.4901123046875, "loss": 0.2295, "rewards/accuracies": 0.875, "rewards/chosen": -1.2739243507385254, "rewards/margins": 2.196908473968506, "rewards/rejected": -3.4708328247070312, "step": 5927 }, { "epoch": 0.69, "learning_rate": 9.453171134994684e-08, "logits/chosen": -2.3765406608581543, "logits/rejected": -2.3280892372131348, "logps/chosen": -230.54444885253906, "logps/rejected": -198.10858154296875, "loss": 0.3386, "rewards/accuracies": 0.875, "rewards/chosen": -0.16764776408672333, "rewards/margins": 1.4049782752990723, "rewards/rejected": -1.5726261138916016, "step": 5928 }, { "epoch": 0.69, "learning_rate": 9.449627967402857e-08, "logits/chosen": -2.28157901763916, "logits/rejected": -2.579763650894165, "logps/chosen": -138.90377807617188, "logps/rejected": -216.56285095214844, "loss": 0.2334, "rewards/accuracies": 0.875, "rewards/chosen": -0.683760404586792, "rewards/margins": 3.10937762260437, "rewards/rejected": -3.793138027191162, "step": 5929 }, { "epoch": 0.69, "learning_rate": 9.44608479981103e-08, "logits/chosen": -2.830928087234497, "logits/rejected": -2.681988000869751, "logps/chosen": -670.5145874023438, "logps/rejected": -298.57373046875, "loss": 0.3143, "rewards/accuracies": 0.875, "rewards/chosen": -1.9294040203094482, "rewards/margins": 1.8117976188659668, "rewards/rejected": -3.741201639175415, "step": 5930 }, { "epoch": 0.69, "learning_rate": 9.442541632219204e-08, "logits/chosen": -2.1685876846313477, "logits/rejected": -2.0510852336883545, "logps/chosen": -136.60537719726562, "logps/rejected": -192.06222534179688, "loss": 0.2329, "rewards/accuracies": 1.0, "rewards/chosen": -0.8772600293159485, "rewards/margins": 2.285426139831543, "rewards/rejected": -3.1626861095428467, "step": 5931 }, { "epoch": 0.69, "learning_rate": 9.438998464627377e-08, "logits/chosen": -2.0891244411468506, "logits/rejected": -2.65095853805542, "logps/chosen": -462.6180419921875, "logps/rejected": -302.8314208984375, "loss": 0.2518, "rewards/accuracies": 0.875, "rewards/chosen": -0.9913123846054077, "rewards/margins": 2.433444023132324, "rewards/rejected": -3.4247565269470215, "step": 5932 }, { "epoch": 0.69, "learning_rate": 9.43545529703555e-08, "logits/chosen": -1.6498281955718994, "logits/rejected": -1.9068920612335205, "logps/chosen": -588.326904296875, "logps/rejected": -409.0146179199219, "loss": 1.189, "rewards/accuracies": 0.5, "rewards/chosen": -1.684700846672058, "rewards/margins": 0.0992247462272644, "rewards/rejected": -1.7839255332946777, "step": 5933 }, { "epoch": 0.69, "learning_rate": 9.431912129443722e-08, "logits/chosen": -2.5157723426818848, "logits/rejected": -2.7526533603668213, "logps/chosen": -173.66766357421875, "logps/rejected": -119.85406494140625, "loss": 0.3312, "rewards/accuracies": 0.875, "rewards/chosen": -0.6468674540519714, "rewards/margins": 1.2184251546859741, "rewards/rejected": -1.8652925491333008, "step": 5934 }, { "epoch": 0.69, "learning_rate": 9.428368961851895e-08, "logits/chosen": -2.8032970428466797, "logits/rejected": -2.7056663036346436, "logps/chosen": -264.3237609863281, "logps/rejected": -271.6268310546875, "loss": 0.2401, "rewards/accuracies": 0.875, "rewards/chosen": -0.9757487177848816, "rewards/margins": 2.0539379119873047, "rewards/rejected": -3.029686689376831, "step": 5935 }, { "epoch": 0.69, "learning_rate": 9.424825794260067e-08, "logits/chosen": -1.95194411277771, "logits/rejected": -2.1157774925231934, "logps/chosen": -157.1025390625, "logps/rejected": -210.97959899902344, "loss": 1.1956, "rewards/accuracies": 0.625, "rewards/chosen": -2.0827066898345947, "rewards/margins": 1.262181043624878, "rewards/rejected": -3.3448877334594727, "step": 5936 }, { "epoch": 0.69, "learning_rate": 9.421282626668241e-08, "logits/chosen": -2.865729808807373, "logits/rejected": -2.614572048187256, "logps/chosen": -99.10787963867188, "logps/rejected": -151.16262817382812, "loss": 0.268, "rewards/accuracies": 1.0, "rewards/chosen": -0.41255316138267517, "rewards/margins": 1.4948984384536743, "rewards/rejected": -1.9074515104293823, "step": 5937 }, { "epoch": 0.69, "learning_rate": 9.417739459076414e-08, "logits/chosen": -2.282487392425537, "logits/rejected": -2.684597969055176, "logps/chosen": -341.38702392578125, "logps/rejected": -275.035400390625, "loss": 0.2266, "rewards/accuracies": 0.875, "rewards/chosen": -1.064897060394287, "rewards/margins": 3.2950215339660645, "rewards/rejected": -4.359918594360352, "step": 5938 }, { "epoch": 0.69, "learning_rate": 9.414196291484587e-08, "logits/chosen": -2.266268730163574, "logits/rejected": -2.3414816856384277, "logps/chosen": -443.3470458984375, "logps/rejected": -358.99951171875, "loss": 0.3121, "rewards/accuracies": 0.875, "rewards/chosen": -1.2478749752044678, "rewards/margins": 1.345739483833313, "rewards/rejected": -2.5936145782470703, "step": 5939 }, { "epoch": 0.69, "learning_rate": 9.410653123892759e-08, "logits/chosen": -2.692594289779663, "logits/rejected": -2.5957717895507812, "logps/chosen": -113.17381286621094, "logps/rejected": -198.43589782714844, "loss": 0.3994, "rewards/accuracies": 0.75, "rewards/chosen": -0.35381466150283813, "rewards/margins": 2.577329158782959, "rewards/rejected": -2.9311442375183105, "step": 5940 }, { "epoch": 0.69, "learning_rate": 9.407109956300932e-08, "logits/chosen": -2.719303846359253, "logits/rejected": -2.3532893657684326, "logps/chosen": -215.58489990234375, "logps/rejected": -243.45327758789062, "loss": 0.2119, "rewards/accuracies": 1.0, "rewards/chosen": -1.0949797630310059, "rewards/margins": 2.5842397212982178, "rewards/rejected": -3.6792197227478027, "step": 5941 }, { "epoch": 0.69, "learning_rate": 9.403566788709105e-08, "logits/chosen": -2.4591453075408936, "logits/rejected": -2.3354687690734863, "logps/chosen": -227.62564086914062, "logps/rejected": -327.6260681152344, "loss": 0.1955, "rewards/accuracies": 1.0, "rewards/chosen": -1.2488445043563843, "rewards/margins": 2.28767728805542, "rewards/rejected": -3.5365219116210938, "step": 5942 }, { "epoch": 0.69, "learning_rate": 9.40002362111728e-08, "logits/chosen": -2.248875379562378, "logits/rejected": -2.5448269844055176, "logps/chosen": -231.5139617919922, "logps/rejected": -216.9073486328125, "loss": 0.5503, "rewards/accuracies": 0.75, "rewards/chosen": -0.5718295574188232, "rewards/margins": 1.9952366352081299, "rewards/rejected": -2.567065954208374, "step": 5943 }, { "epoch": 0.69, "learning_rate": 9.396480453525452e-08, "logits/chosen": -2.1362948417663574, "logits/rejected": -2.390921115875244, "logps/chosen": -313.90802001953125, "logps/rejected": -359.3048095703125, "loss": 0.5642, "rewards/accuracies": 0.75, "rewards/chosen": -0.40796053409576416, "rewards/margins": 2.4602465629577637, "rewards/rejected": -2.8682069778442383, "step": 5944 }, { "epoch": 0.69, "learning_rate": 9.392937285933624e-08, "logits/chosen": -2.5148916244506836, "logits/rejected": -2.684272289276123, "logps/chosen": -334.59991455078125, "logps/rejected": -188.01336669921875, "loss": 0.5003, "rewards/accuracies": 0.875, "rewards/chosen": -1.575730323791504, "rewards/margins": 1.7875216007232666, "rewards/rejected": -3.3632519245147705, "step": 5945 }, { "epoch": 0.69, "learning_rate": 9.389394118341797e-08, "logits/chosen": -2.01924729347229, "logits/rejected": -2.026073932647705, "logps/chosen": -311.9098205566406, "logps/rejected": -313.757568359375, "loss": 0.2504, "rewards/accuracies": 0.875, "rewards/chosen": -0.9026853442192078, "rewards/margins": 2.4526865482330322, "rewards/rejected": -3.3553714752197266, "step": 5946 }, { "epoch": 0.69, "learning_rate": 9.38585095074997e-08, "logits/chosen": -2.741791248321533, "logits/rejected": -2.587472915649414, "logps/chosen": -362.07330322265625, "logps/rejected": -329.809814453125, "loss": 0.4363, "rewards/accuracies": 0.75, "rewards/chosen": -0.04341205954551697, "rewards/margins": 2.5221872329711914, "rewards/rejected": -2.565599203109741, "step": 5947 }, { "epoch": 0.69, "learning_rate": 9.382307783158143e-08, "logits/chosen": -2.2487430572509766, "logits/rejected": -2.1011388301849365, "logps/chosen": -328.9737548828125, "logps/rejected": -481.4882507324219, "loss": 0.5682, "rewards/accuracies": 0.5, "rewards/chosen": -0.7789228558540344, "rewards/margins": 1.8237905502319336, "rewards/rejected": -2.6027135848999023, "step": 5948 }, { "epoch": 0.69, "learning_rate": 9.378764615566317e-08, "logits/chosen": -2.1852614879608154, "logits/rejected": -2.221254825592041, "logps/chosen": -403.380126953125, "logps/rejected": -393.04730224609375, "loss": 0.4, "rewards/accuracies": 0.75, "rewards/chosen": -0.6940209269523621, "rewards/margins": 2.767766237258911, "rewards/rejected": -3.461787223815918, "step": 5949 }, { "epoch": 0.69, "learning_rate": 9.375221447974489e-08, "logits/chosen": -1.5681110620498657, "logits/rejected": -1.8948683738708496, "logps/chosen": -428.328857421875, "logps/rejected": -341.12884521484375, "loss": 0.2291, "rewards/accuracies": 1.0, "rewards/chosen": -0.324679970741272, "rewards/margins": 1.9649741649627686, "rewards/rejected": -2.28965425491333, "step": 5950 }, { "epoch": 0.69, "learning_rate": 9.371678280382661e-08, "logits/chosen": -2.9862968921661377, "logits/rejected": -3.0504016876220703, "logps/chosen": -140.73727416992188, "logps/rejected": -198.33126831054688, "loss": 0.39, "rewards/accuracies": 0.75, "rewards/chosen": -0.8614821434020996, "rewards/margins": 2.6209652423858643, "rewards/rejected": -3.482447624206543, "step": 5951 }, { "epoch": 0.69, "learning_rate": 9.368135112790835e-08, "logits/chosen": -1.6383451223373413, "logits/rejected": -1.7701480388641357, "logps/chosen": -593.3883056640625, "logps/rejected": -452.3918762207031, "loss": 1.2717, "rewards/accuracies": 0.25, "rewards/chosen": -1.3469135761260986, "rewards/margins": -0.1748369336128235, "rewards/rejected": -1.1720765829086304, "step": 5952 }, { "epoch": 0.69, "learning_rate": 9.364591945199007e-08, "logits/chosen": -2.013572931289673, "logits/rejected": -1.9564919471740723, "logps/chosen": -215.31222534179688, "logps/rejected": -214.2820587158203, "loss": 0.8383, "rewards/accuracies": 0.625, "rewards/chosen": -2.1268084049224854, "rewards/margins": 0.5475807189941406, "rewards/rejected": -2.674389123916626, "step": 5953 }, { "epoch": 0.69, "learning_rate": 9.361048777607182e-08, "logits/chosen": -1.7631113529205322, "logits/rejected": -1.8286938667297363, "logps/chosen": -398.39862060546875, "logps/rejected": -356.8348693847656, "loss": 0.2042, "rewards/accuracies": 0.875, "rewards/chosen": -0.5517328381538391, "rewards/margins": 2.7781646251678467, "rewards/rejected": -3.329897403717041, "step": 5954 }, { "epoch": 0.69, "learning_rate": 9.357505610015354e-08, "logits/chosen": -2.437823534011841, "logits/rejected": -2.403437376022339, "logps/chosen": -167.44659423828125, "logps/rejected": -213.57162475585938, "loss": 0.5455, "rewards/accuracies": 0.625, "rewards/chosen": -1.5479899644851685, "rewards/margins": 1.2420594692230225, "rewards/rejected": -2.7900495529174805, "step": 5955 }, { "epoch": 0.69, "learning_rate": 9.353962442423526e-08, "logits/chosen": -2.013030529022217, "logits/rejected": -2.059264898300171, "logps/chosen": -263.68426513671875, "logps/rejected": -253.4541015625, "loss": 0.148, "rewards/accuracies": 1.0, "rewards/chosen": -0.4490455389022827, "rewards/margins": 2.705362319946289, "rewards/rejected": -3.1544079780578613, "step": 5956 }, { "epoch": 0.69, "learning_rate": 9.350419274831698e-08, "logits/chosen": -2.433872699737549, "logits/rejected": -2.173637628555298, "logps/chosen": -168.36306762695312, "logps/rejected": -218.60040283203125, "loss": 0.4971, "rewards/accuracies": 0.875, "rewards/chosen": -1.2994093894958496, "rewards/margins": 1.480369210243225, "rewards/rejected": -2.779778480529785, "step": 5957 }, { "epoch": 0.69, "learning_rate": 9.346876107239872e-08, "logits/chosen": -2.1994478702545166, "logits/rejected": -2.126127004623413, "logps/chosen": -261.6136779785156, "logps/rejected": -419.21502685546875, "loss": 0.2662, "rewards/accuracies": 0.875, "rewards/chosen": -0.7895863056182861, "rewards/margins": 4.078079700469971, "rewards/rejected": -4.867666244506836, "step": 5958 }, { "epoch": 0.69, "learning_rate": 9.343332939648044e-08, "logits/chosen": -1.7941107749938965, "logits/rejected": -1.8441176414489746, "logps/chosen": -307.96466064453125, "logps/rejected": -352.8614501953125, "loss": 0.7728, "rewards/accuracies": 0.75, "rewards/chosen": -1.5004740953445435, "rewards/margins": 1.545682668685913, "rewards/rejected": -3.046156883239746, "step": 5959 }, { "epoch": 0.69, "learning_rate": 9.339789772056219e-08, "logits/chosen": -2.652026653289795, "logits/rejected": -2.6799988746643066, "logps/chosen": -278.9390869140625, "logps/rejected": -332.3306579589844, "loss": 0.518, "rewards/accuracies": 0.75, "rewards/chosen": -1.1404473781585693, "rewards/margins": 2.0784149169921875, "rewards/rejected": -3.218862295150757, "step": 5960 }, { "epoch": 0.69, "learning_rate": 9.336246604464391e-08, "logits/chosen": -2.4938302040100098, "logits/rejected": -2.380173683166504, "logps/chosen": -304.5406494140625, "logps/rejected": -317.55615234375, "loss": 0.2866, "rewards/accuracies": 0.875, "rewards/chosen": -0.6309347152709961, "rewards/margins": 1.4928696155548096, "rewards/rejected": -2.1238043308258057, "step": 5961 }, { "epoch": 0.69, "learning_rate": 9.332703436872563e-08, "logits/chosen": -2.2930479049682617, "logits/rejected": -2.491283416748047, "logps/chosen": -245.7716827392578, "logps/rejected": -301.66522216796875, "loss": 0.1723, "rewards/accuracies": 1.0, "rewards/chosen": -1.13448166847229, "rewards/margins": 3.241150140762329, "rewards/rejected": -4.375631809234619, "step": 5962 }, { "epoch": 0.69, "learning_rate": 9.329160269280737e-08, "logits/chosen": -2.1727917194366455, "logits/rejected": -2.5466363430023193, "logps/chosen": -453.0307922363281, "logps/rejected": -359.4427185058594, "loss": 0.2969, "rewards/accuracies": 0.875, "rewards/chosen": -1.494231104850769, "rewards/margins": 2.26005220413208, "rewards/rejected": -3.7542834281921387, "step": 5963 }, { "epoch": 0.69, "learning_rate": 9.325617101688909e-08, "logits/chosen": -2.8971292972564697, "logits/rejected": -2.8799123764038086, "logps/chosen": -292.66351318359375, "logps/rejected": -248.83889770507812, "loss": 0.1071, "rewards/accuracies": 1.0, "rewards/chosen": -1.54169499874115, "rewards/margins": 3.625748634338379, "rewards/rejected": -5.16744327545166, "step": 5964 }, { "epoch": 0.69, "learning_rate": 9.322073934097081e-08, "logits/chosen": -2.1410813331604004, "logits/rejected": -1.830519437789917, "logps/chosen": -168.0423126220703, "logps/rejected": -266.092041015625, "loss": 0.4915, "rewards/accuracies": 0.625, "rewards/chosen": -1.8001751899719238, "rewards/margins": 2.052030563354492, "rewards/rejected": -3.852205753326416, "step": 5965 }, { "epoch": 0.69, "learning_rate": 9.318530766505256e-08, "logits/chosen": -2.5409159660339355, "logits/rejected": -2.723055362701416, "logps/chosen": -330.30291748046875, "logps/rejected": -324.2977294921875, "loss": 0.2499, "rewards/accuracies": 0.875, "rewards/chosen": -0.3429834246635437, "rewards/margins": 3.1278910636901855, "rewards/rejected": -3.470874309539795, "step": 5966 }, { "epoch": 0.69, "learning_rate": 9.314987598913429e-08, "logits/chosen": -2.169447898864746, "logits/rejected": -1.918920874595642, "logps/chosen": -296.6964416503906, "logps/rejected": -553.6142578125, "loss": 0.349, "rewards/accuracies": 0.75, "rewards/chosen": -1.3497463464736938, "rewards/margins": 1.7751201391220093, "rewards/rejected": -3.124866485595703, "step": 5967 }, { "epoch": 0.69, "learning_rate": 9.311444431321601e-08, "logits/chosen": -2.9531173706054688, "logits/rejected": -2.958043336868286, "logps/chosen": -68.79165649414062, "logps/rejected": -177.22085571289062, "loss": 0.4329, "rewards/accuracies": 0.875, "rewards/chosen": -0.6737321019172668, "rewards/margins": 3.1215827465057373, "rewards/rejected": -3.7953150272369385, "step": 5968 }, { "epoch": 0.69, "learning_rate": 9.307901263729774e-08, "logits/chosen": -2.692622184753418, "logits/rejected": -2.624939441680908, "logps/chosen": -223.71083068847656, "logps/rejected": -190.51426696777344, "loss": 0.7559, "rewards/accuracies": 0.625, "rewards/chosen": -1.3284344673156738, "rewards/margins": 0.8538775444030762, "rewards/rejected": -2.18231201171875, "step": 5969 }, { "epoch": 0.69, "learning_rate": 9.304358096137946e-08, "logits/chosen": -2.783207893371582, "logits/rejected": -2.6350581645965576, "logps/chosen": -189.0893096923828, "logps/rejected": -171.85400390625, "loss": 0.214, "rewards/accuracies": 0.875, "rewards/chosen": -1.1891270875930786, "rewards/margins": 3.6914901733398438, "rewards/rejected": -4.880617618560791, "step": 5970 }, { "epoch": 0.69, "learning_rate": 9.300814928546119e-08, "logits/chosen": -2.4099061489105225, "logits/rejected": -2.021446466445923, "logps/chosen": -198.8851776123047, "logps/rejected": -319.7222900390625, "loss": 0.2556, "rewards/accuracies": 0.875, "rewards/chosen": -0.7842992544174194, "rewards/margins": 2.9663493633270264, "rewards/rejected": -3.7506484985351562, "step": 5971 }, { "epoch": 0.69, "learning_rate": 9.297271760954294e-08, "logits/chosen": -2.1567671298980713, "logits/rejected": -2.4220125675201416, "logps/chosen": -390.8017272949219, "logps/rejected": -307.2712097167969, "loss": 0.3252, "rewards/accuracies": 0.875, "rewards/chosen": -0.9562134742736816, "rewards/margins": 2.350801944732666, "rewards/rejected": -3.3070154190063477, "step": 5972 }, { "epoch": 0.69, "learning_rate": 9.293728593362466e-08, "logits/chosen": -1.9482084512710571, "logits/rejected": -1.7152926921844482, "logps/chosen": -129.21717834472656, "logps/rejected": -179.1575469970703, "loss": 0.4035, "rewards/accuracies": 0.75, "rewards/chosen": -0.48120176792144775, "rewards/margins": 1.815718412399292, "rewards/rejected": -2.2969202995300293, "step": 5973 }, { "epoch": 0.69, "learning_rate": 9.290185425770638e-08, "logits/chosen": -2.2032294273376465, "logits/rejected": -2.5073676109313965, "logps/chosen": -404.4399719238281, "logps/rejected": -237.85464477539062, "loss": 0.3043, "rewards/accuracies": 0.875, "rewards/chosen": -0.022510483860969543, "rewards/margins": 2.283503293991089, "rewards/rejected": -2.306013822555542, "step": 5974 }, { "epoch": 0.7, "learning_rate": 9.286642258178811e-08, "logits/chosen": -2.5684890747070312, "logits/rejected": -2.4349396228790283, "logps/chosen": -218.24407958984375, "logps/rejected": -287.7623596191406, "loss": 0.4725, "rewards/accuracies": 0.75, "rewards/chosen": -1.193178653717041, "rewards/margins": 1.247343897819519, "rewards/rejected": -2.4405226707458496, "step": 5975 }, { "epoch": 0.7, "learning_rate": 9.283099090586984e-08, "logits/chosen": -2.634812116622925, "logits/rejected": -2.8056836128234863, "logps/chosen": -242.88885498046875, "logps/rejected": -231.482666015625, "loss": 0.4609, "rewards/accuracies": 0.75, "rewards/chosen": -1.384013295173645, "rewards/margins": 1.9812326431274414, "rewards/rejected": -3.365246057510376, "step": 5976 }, { "epoch": 0.7, "learning_rate": 9.279555922995156e-08, "logits/chosen": -2.0458903312683105, "logits/rejected": -2.479682207107544, "logps/chosen": -576.851806640625, "logps/rejected": -334.5779113769531, "loss": 0.2845, "rewards/accuracies": 0.875, "rewards/chosen": -1.0324729681015015, "rewards/margins": 2.067734479904175, "rewards/rejected": -3.1002073287963867, "step": 5977 }, { "epoch": 0.7, "learning_rate": 9.276012755403331e-08, "logits/chosen": -2.3929603099823, "logits/rejected": -2.2969861030578613, "logps/chosen": -108.85712432861328, "logps/rejected": -186.12872314453125, "loss": 0.7329, "rewards/accuracies": 0.625, "rewards/chosen": -1.1857686042785645, "rewards/margins": 1.537548303604126, "rewards/rejected": -2.7233171463012695, "step": 5978 }, { "epoch": 0.7, "learning_rate": 9.272469587811503e-08, "logits/chosen": -1.8463034629821777, "logits/rejected": -1.9940028190612793, "logps/chosen": -336.3285827636719, "logps/rejected": -306.70233154296875, "loss": 0.6161, "rewards/accuracies": 0.625, "rewards/chosen": -0.8039653301239014, "rewards/margins": 0.31288450956344604, "rewards/rejected": -1.1168498992919922, "step": 5979 }, { "epoch": 0.7, "learning_rate": 9.268926420219677e-08, "logits/chosen": -2.8811416625976562, "logits/rejected": -2.775979518890381, "logps/chosen": -254.50343322753906, "logps/rejected": -246.05128479003906, "loss": 0.2994, "rewards/accuracies": 0.875, "rewards/chosen": -0.9869256019592285, "rewards/margins": 1.5724821090698242, "rewards/rejected": -2.5594077110290527, "step": 5980 }, { "epoch": 0.7, "learning_rate": 9.265383252627849e-08, "logits/chosen": -1.7466903924942017, "logits/rejected": -1.8183021545410156, "logps/chosen": -331.76348876953125, "logps/rejected": -286.8199462890625, "loss": 0.2365, "rewards/accuracies": 0.875, "rewards/chosen": 0.07527542859315872, "rewards/margins": 3.1024773120880127, "rewards/rejected": -3.0272016525268555, "step": 5981 }, { "epoch": 0.7, "learning_rate": 9.261840085036021e-08, "logits/chosen": -3.00887393951416, "logits/rejected": -2.956235885620117, "logps/chosen": -231.02664184570312, "logps/rejected": -208.3655242919922, "loss": 0.3936, "rewards/accuracies": 0.75, "rewards/chosen": -0.820006251335144, "rewards/margins": 1.6724148988723755, "rewards/rejected": -2.4924211502075195, "step": 5982 }, { "epoch": 0.7, "learning_rate": 9.258296917444196e-08, "logits/chosen": -2.571348190307617, "logits/rejected": -2.55328106880188, "logps/chosen": -185.45504760742188, "logps/rejected": -300.9732666015625, "loss": 0.2959, "rewards/accuracies": 0.875, "rewards/chosen": -1.1201887130737305, "rewards/margins": 2.3118059635162354, "rewards/rejected": -3.431994676589966, "step": 5983 }, { "epoch": 0.7, "learning_rate": 9.254753749852368e-08, "logits/chosen": -2.0843253135681152, "logits/rejected": -1.8757576942443848, "logps/chosen": -263.79571533203125, "logps/rejected": -417.78656005859375, "loss": 0.1634, "rewards/accuracies": 1.0, "rewards/chosen": 0.22406062483787537, "rewards/margins": 3.275120973587036, "rewards/rejected": -3.051060438156128, "step": 5984 }, { "epoch": 0.7, "learning_rate": 9.25121058226054e-08, "logits/chosen": -2.97792911529541, "logits/rejected": -2.8449926376342773, "logps/chosen": -194.62420654296875, "logps/rejected": -265.5486145019531, "loss": 0.1545, "rewards/accuracies": 1.0, "rewards/chosen": -0.428298681974411, "rewards/margins": 4.042585849761963, "rewards/rejected": -4.470884323120117, "step": 5985 }, { "epoch": 0.7, "learning_rate": 9.247667414668714e-08, "logits/chosen": -2.135791063308716, "logits/rejected": -2.1938891410827637, "logps/chosen": -253.03616333007812, "logps/rejected": -191.9647979736328, "loss": 0.7574, "rewards/accuracies": 0.625, "rewards/chosen": -0.7978093028068542, "rewards/margins": 0.5191839933395386, "rewards/rejected": -1.316993236541748, "step": 5986 }, { "epoch": 0.7, "learning_rate": 9.244124247076886e-08, "logits/chosen": -2.6979825496673584, "logits/rejected": -2.681398391723633, "logps/chosen": -264.9971008300781, "logps/rejected": -319.8002014160156, "loss": 0.3114, "rewards/accuracies": 0.875, "rewards/chosen": -1.3239859342575073, "rewards/margins": 1.5056757926940918, "rewards/rejected": -2.8296616077423096, "step": 5987 }, { "epoch": 0.7, "learning_rate": 9.240581079485058e-08, "logits/chosen": -2.4685072898864746, "logits/rejected": -2.56435489654541, "logps/chosen": -183.2127227783203, "logps/rejected": -210.27597045898438, "loss": 0.3203, "rewards/accuracies": 0.75, "rewards/chosen": -0.9452823996543884, "rewards/margins": 2.252321243286133, "rewards/rejected": -3.197603702545166, "step": 5988 }, { "epoch": 0.7, "learning_rate": 9.237037911893233e-08, "logits/chosen": -1.550560712814331, "logits/rejected": -1.153158187866211, "logps/chosen": -280.5323486328125, "logps/rejected": -421.38482666015625, "loss": 1.1279, "rewards/accuracies": 0.625, "rewards/chosen": -2.3674099445343018, "rewards/margins": 1.1257030963897705, "rewards/rejected": -3.4931132793426514, "step": 5989 }, { "epoch": 0.7, "learning_rate": 9.233494744301405e-08, "logits/chosen": -2.6287765502929688, "logits/rejected": -2.4114041328430176, "logps/chosen": -181.55838012695312, "logps/rejected": -227.42025756835938, "loss": 0.5254, "rewards/accuracies": 0.75, "rewards/chosen": -0.9875730276107788, "rewards/margins": 2.732818365097046, "rewards/rejected": -3.720391273498535, "step": 5990 }, { "epoch": 0.7, "learning_rate": 9.229951576709577e-08, "logits/chosen": -2.578174114227295, "logits/rejected": -2.420846462249756, "logps/chosen": -425.3371276855469, "logps/rejected": -371.03643798828125, "loss": 0.3028, "rewards/accuracies": 0.75, "rewards/chosen": -0.9448429346084595, "rewards/margins": 2.1427884101867676, "rewards/rejected": -3.0876314640045166, "step": 5991 }, { "epoch": 0.7, "learning_rate": 9.226408409117751e-08, "logits/chosen": -2.466123104095459, "logits/rejected": -2.5625219345092773, "logps/chosen": -289.1417236328125, "logps/rejected": -276.5509948730469, "loss": 0.1494, "rewards/accuracies": 1.0, "rewards/chosen": -0.6761846542358398, "rewards/margins": 3.657160758972168, "rewards/rejected": -4.333345413208008, "step": 5992 }, { "epoch": 0.7, "learning_rate": 9.222865241525923e-08, "logits/chosen": -2.333064556121826, "logits/rejected": -2.7006757259368896, "logps/chosen": -232.5068359375, "logps/rejected": -265.67822265625, "loss": 0.8478, "rewards/accuracies": 0.625, "rewards/chosen": -1.1403058767318726, "rewards/margins": 2.566277265548706, "rewards/rejected": -3.706583023071289, "step": 5993 }, { "epoch": 0.7, "learning_rate": 9.219322073934095e-08, "logits/chosen": -1.8091753721237183, "logits/rejected": -1.693538784980774, "logps/chosen": -264.4583740234375, "logps/rejected": -322.2759704589844, "loss": 0.2388, "rewards/accuracies": 1.0, "rewards/chosen": -0.17475838959217072, "rewards/margins": 2.284785270690918, "rewards/rejected": -2.4595437049865723, "step": 5994 }, { "epoch": 0.7, "learning_rate": 9.21577890634227e-08, "logits/chosen": -2.579787492752075, "logits/rejected": -2.596958875656128, "logps/chosen": -223.7586212158203, "logps/rejected": -224.19454956054688, "loss": 0.5459, "rewards/accuracies": 0.625, "rewards/chosen": -1.28402841091156, "rewards/margins": 1.1929841041564941, "rewards/rejected": -2.4770123958587646, "step": 5995 }, { "epoch": 0.7, "learning_rate": 9.212235738750443e-08, "logits/chosen": -2.2675952911376953, "logits/rejected": -2.266467809677124, "logps/chosen": -250.20394897460938, "logps/rejected": -327.83441162109375, "loss": 0.2396, "rewards/accuracies": 0.875, "rewards/chosen": -1.4304215908050537, "rewards/margins": 2.713576316833496, "rewards/rejected": -4.143998146057129, "step": 5996 }, { "epoch": 0.7, "learning_rate": 9.208692571158616e-08, "logits/chosen": -2.6624648571014404, "logits/rejected": -2.6537322998046875, "logps/chosen": -358.8682861328125, "logps/rejected": -236.94256591796875, "loss": 0.2816, "rewards/accuracies": 0.875, "rewards/chosen": -0.36517655849456787, "rewards/margins": 1.998573660850525, "rewards/rejected": -2.3637502193450928, "step": 5997 }, { "epoch": 0.7, "learning_rate": 9.205149403566788e-08, "logits/chosen": -1.9768404960632324, "logits/rejected": -2.039572238922119, "logps/chosen": -341.776123046875, "logps/rejected": -325.33099365234375, "loss": 0.4145, "rewards/accuracies": 0.75, "rewards/chosen": -1.3710134029388428, "rewards/margins": 2.7524940967559814, "rewards/rejected": -4.123507499694824, "step": 5998 }, { "epoch": 0.7, "learning_rate": 9.20160623597496e-08, "logits/chosen": -2.439814567565918, "logits/rejected": -2.283468723297119, "logps/chosen": -327.0221252441406, "logps/rejected": -299.84490966796875, "loss": 0.3501, "rewards/accuracies": 1.0, "rewards/chosen": -0.35666030645370483, "rewards/margins": 1.7146227359771729, "rewards/rejected": -2.0712828636169434, "step": 5999 }, { "epoch": 0.7, "learning_rate": 9.198063068383134e-08, "logits/chosen": -2.2308316230773926, "logits/rejected": -2.031500816345215, "logps/chosen": -299.0888671875, "logps/rejected": -379.53070068359375, "loss": 0.1674, "rewards/accuracies": 1.0, "rewards/chosen": -0.6453994512557983, "rewards/margins": 3.115950107574463, "rewards/rejected": -3.7613492012023926, "step": 6000 }, { "epoch": 0.7, "eval_logits/chosen": -1.7527562379837036, "eval_logits/rejected": -1.7537353038787842, "eval_logps/chosen": -278.6492614746094, "eval_logps/rejected": -279.0399475097656, "eval_loss": 0.36728817224502563, "eval_rewards/accuracies": 0.8548850417137146, "eval_rewards/chosen": -0.648383617401123, "eval_rewards/margins": 2.1549606323242188, "eval_rewards/rejected": -2.803344249725342, "eval_runtime": 238.5977, "eval_samples_per_second": 2.913, "eval_steps_per_second": 1.459, "step": 6000 }, { "epoch": 0.7, "learning_rate": 9.194519900791308e-08, "logits/chosen": -2.1462483406066895, "logits/rejected": -2.2073240280151367, "logps/chosen": -324.7988586425781, "logps/rejected": -418.3434753417969, "loss": 0.7451, "rewards/accuracies": 0.625, "rewards/chosen": -0.6907975673675537, "rewards/margins": 0.7781219482421875, "rewards/rejected": -1.4689193964004517, "step": 6001 }, { "epoch": 0.7, "learning_rate": 9.19097673319948e-08, "logits/chosen": -2.4827237129211426, "logits/rejected": -2.5902347564697266, "logps/chosen": -213.63230895996094, "logps/rejected": -145.9033660888672, "loss": 1.4522, "rewards/accuracies": 0.5, "rewards/chosen": -2.1601834297180176, "rewards/margins": 1.175827145576477, "rewards/rejected": -3.3360109329223633, "step": 6002 }, { "epoch": 0.7, "learning_rate": 9.187433565607653e-08, "logits/chosen": -1.9801803827285767, "logits/rejected": -2.0193517208099365, "logps/chosen": -399.0745849609375, "logps/rejected": -300.7531433105469, "loss": 0.5171, "rewards/accuracies": 0.875, "rewards/chosen": -1.1907086372375488, "rewards/margins": 1.2503740787506104, "rewards/rejected": -2.441082715988159, "step": 6003 }, { "epoch": 0.7, "learning_rate": 9.183890398015826e-08, "logits/chosen": -2.383993148803711, "logits/rejected": -2.341671943664551, "logps/chosen": -277.90240478515625, "logps/rejected": -268.15765380859375, "loss": 0.2204, "rewards/accuracies": 0.875, "rewards/chosen": -0.7308348417282104, "rewards/margins": 3.4386465549468994, "rewards/rejected": -4.16948127746582, "step": 6004 }, { "epoch": 0.7, "learning_rate": 9.180347230423998e-08, "logits/chosen": -2.455585479736328, "logits/rejected": -2.4666290283203125, "logps/chosen": -284.3740539550781, "logps/rejected": -356.8467102050781, "loss": 0.0939, "rewards/accuracies": 1.0, "rewards/chosen": -0.08365730196237564, "rewards/margins": 3.768669843673706, "rewards/rejected": -3.8523268699645996, "step": 6005 }, { "epoch": 0.7, "learning_rate": 9.176804062832171e-08, "logits/chosen": -2.163043260574341, "logits/rejected": -2.4181694984436035, "logps/chosen": -347.5899658203125, "logps/rejected": -377.31134033203125, "loss": 0.5187, "rewards/accuracies": 0.75, "rewards/chosen": -0.9709057807922363, "rewards/margins": 2.2633957862854004, "rewards/rejected": -3.2343015670776367, "step": 6006 }, { "epoch": 0.7, "learning_rate": 9.173260895240345e-08, "logits/chosen": -2.540018081665039, "logits/rejected": -2.403459310531616, "logps/chosen": -498.76702880859375, "logps/rejected": -323.305908203125, "loss": 0.3377, "rewards/accuracies": 0.75, "rewards/chosen": -0.5676903128623962, "rewards/margins": 2.13607120513916, "rewards/rejected": -2.703761339187622, "step": 6007 }, { "epoch": 0.7, "learning_rate": 9.169717727648518e-08, "logits/chosen": -2.448683738708496, "logits/rejected": -2.412686824798584, "logps/chosen": -238.2685546875, "logps/rejected": -173.99188232421875, "loss": 0.2481, "rewards/accuracies": 1.0, "rewards/chosen": -0.8431081771850586, "rewards/margins": 2.941160202026367, "rewards/rejected": -3.784268379211426, "step": 6008 }, { "epoch": 0.7, "learning_rate": 9.16617456005669e-08, "logits/chosen": -2.6664700508117676, "logits/rejected": -2.6652727127075195, "logps/chosen": -205.84657287597656, "logps/rejected": -233.36599731445312, "loss": 0.183, "rewards/accuracies": 0.875, "rewards/chosen": -0.9583196043968201, "rewards/margins": 4.038956642150879, "rewards/rejected": -4.997276782989502, "step": 6009 }, { "epoch": 0.7, "learning_rate": 9.162631392464863e-08, "logits/chosen": -2.2818922996520996, "logits/rejected": -2.285827159881592, "logps/chosen": -332.89154052734375, "logps/rejected": -366.5816345214844, "loss": 0.2169, "rewards/accuracies": 1.0, "rewards/chosen": 0.040971674025058746, "rewards/margins": 2.3611698150634766, "rewards/rejected": -2.3201980590820312, "step": 6010 }, { "epoch": 0.7, "learning_rate": 9.159088224873035e-08, "logits/chosen": -2.167664051055908, "logits/rejected": -2.3829503059387207, "logps/chosen": -369.7292785644531, "logps/rejected": -319.5907897949219, "loss": 0.2052, "rewards/accuracies": 1.0, "rewards/chosen": -0.20869721472263336, "rewards/margins": 1.6884161233901978, "rewards/rejected": -1.89711332321167, "step": 6011 }, { "epoch": 0.7, "learning_rate": 9.155545057281208e-08, "logits/chosen": -2.605367422103882, "logits/rejected": -2.6658053398132324, "logps/chosen": -186.47979736328125, "logps/rejected": -181.69515991210938, "loss": 0.2283, "rewards/accuracies": 0.875, "rewards/chosen": -0.8381533622741699, "rewards/margins": 2.5030322074890137, "rewards/rejected": -3.341186046600342, "step": 6012 }, { "epoch": 0.7, "learning_rate": 9.152001889689382e-08, "logits/chosen": -2.70961332321167, "logits/rejected": -2.7942330837249756, "logps/chosen": -174.91329956054688, "logps/rejected": -221.78334045410156, "loss": 0.2489, "rewards/accuracies": 1.0, "rewards/chosen": -1.3838317394256592, "rewards/margins": 2.8249096870422363, "rewards/rejected": -4.208741664886475, "step": 6013 }, { "epoch": 0.7, "learning_rate": 9.148458722097556e-08, "logits/chosen": -2.072396755218506, "logits/rejected": -2.619096517562866, "logps/chosen": -287.3940124511719, "logps/rejected": -149.68157958984375, "loss": 0.3905, "rewards/accuracies": 0.875, "rewards/chosen": -0.5017650723457336, "rewards/margins": 1.3530287742614746, "rewards/rejected": -1.8547937870025635, "step": 6014 }, { "epoch": 0.7, "learning_rate": 9.144915554505728e-08, "logits/chosen": -2.825576066970825, "logits/rejected": -2.6692593097686768, "logps/chosen": -268.5045166015625, "logps/rejected": -292.56048583984375, "loss": 0.3052, "rewards/accuracies": 0.875, "rewards/chosen": -1.9862189292907715, "rewards/margins": 2.3398356437683105, "rewards/rejected": -4.326054573059082, "step": 6015 }, { "epoch": 0.7, "learning_rate": 9.1413723869139e-08, "logits/chosen": -2.2917284965515137, "logits/rejected": -2.1904425621032715, "logps/chosen": -319.18792724609375, "logps/rejected": -441.0121154785156, "loss": 0.2303, "rewards/accuracies": 0.875, "rewards/chosen": -1.0515291690826416, "rewards/margins": 3.179701805114746, "rewards/rejected": -4.231231212615967, "step": 6016 }, { "epoch": 0.7, "learning_rate": 9.137829219322074e-08, "logits/chosen": -1.898000955581665, "logits/rejected": -2.12349271774292, "logps/chosen": -449.4615173339844, "logps/rejected": -338.26629638671875, "loss": 0.1392, "rewards/accuracies": 1.0, "rewards/chosen": 0.37480705976486206, "rewards/margins": 2.8934261798858643, "rewards/rejected": -2.5186190605163574, "step": 6017 }, { "epoch": 0.7, "learning_rate": 9.134286051730247e-08, "logits/chosen": -2.3604321479797363, "logits/rejected": -2.4165232181549072, "logps/chosen": -374.125732421875, "logps/rejected": -355.0762023925781, "loss": 0.3898, "rewards/accuracies": 0.75, "rewards/chosen": -0.7255774140357971, "rewards/margins": 1.6422920227050781, "rewards/rejected": -2.3678693771362305, "step": 6018 }, { "epoch": 0.7, "learning_rate": 9.130742884138419e-08, "logits/chosen": -2.3641490936279297, "logits/rejected": -2.2291553020477295, "logps/chosen": -352.9361267089844, "logps/rejected": -300.0913391113281, "loss": 0.6935, "rewards/accuracies": 0.625, "rewards/chosen": -1.7091320753097534, "rewards/margins": 0.45051148533821106, "rewards/rejected": -2.1596436500549316, "step": 6019 }, { "epoch": 0.7, "learning_rate": 9.127199716546593e-08, "logits/chosen": -2.709162473678589, "logits/rejected": -2.6785128116607666, "logps/chosen": -249.75723266601562, "logps/rejected": -233.406982421875, "loss": 0.3172, "rewards/accuracies": 0.875, "rewards/chosen": -1.3355178833007812, "rewards/margins": 1.3936246633529663, "rewards/rejected": -2.729142427444458, "step": 6020 }, { "epoch": 0.7, "learning_rate": 9.123656548954765e-08, "logits/chosen": -2.5697898864746094, "logits/rejected": -2.4685654640197754, "logps/chosen": -329.4343566894531, "logps/rejected": -222.78826904296875, "loss": 0.348, "rewards/accuracies": 0.75, "rewards/chosen": -1.1228890419006348, "rewards/margins": 2.361448049545288, "rewards/rejected": -3.4843368530273438, "step": 6021 }, { "epoch": 0.7, "learning_rate": 9.120113381362937e-08, "logits/chosen": -1.3579021692276, "logits/rejected": -1.5742639303207397, "logps/chosen": -807.8760375976562, "logps/rejected": -562.3926391601562, "loss": 0.858, "rewards/accuracies": 0.5, "rewards/chosen": -0.9015644192695618, "rewards/margins": 0.15051400661468506, "rewards/rejected": -1.0520784854888916, "step": 6022 }, { "epoch": 0.7, "learning_rate": 9.116570213771111e-08, "logits/chosen": -2.6098814010620117, "logits/rejected": -2.5496938228607178, "logps/chosen": -203.02767944335938, "logps/rejected": -192.68238830566406, "loss": 1.0241, "rewards/accuracies": 0.5, "rewards/chosen": -1.018750786781311, "rewards/margins": 1.342000961303711, "rewards/rejected": -2.3607516288757324, "step": 6023 }, { "epoch": 0.7, "learning_rate": 9.113027046179284e-08, "logits/chosen": -2.1932501792907715, "logits/rejected": -2.2903051376342773, "logps/chosen": -259.55120849609375, "logps/rejected": -271.6265869140625, "loss": 0.3543, "rewards/accuracies": 0.875, "rewards/chosen": -0.08828525245189667, "rewards/margins": 1.9427448511123657, "rewards/rejected": -2.0310299396514893, "step": 6024 }, { "epoch": 0.7, "learning_rate": 9.109483878587458e-08, "logits/chosen": -2.216691255569458, "logits/rejected": -2.299765110015869, "logps/chosen": -249.56524658203125, "logps/rejected": -267.18670654296875, "loss": 0.2552, "rewards/accuracies": 1.0, "rewards/chosen": -0.7095420360565186, "rewards/margins": 3.237900733947754, "rewards/rejected": -3.9474427700042725, "step": 6025 }, { "epoch": 0.7, "learning_rate": 9.10594071099563e-08, "logits/chosen": -2.3363699913024902, "logits/rejected": -2.805180549621582, "logps/chosen": -354.978759765625, "logps/rejected": -277.37188720703125, "loss": 0.2015, "rewards/accuracies": 0.875, "rewards/chosen": -1.059920310974121, "rewards/margins": 2.2413792610168457, "rewards/rejected": -3.301299571990967, "step": 6026 }, { "epoch": 0.7, "learning_rate": 9.102397543403802e-08, "logits/chosen": -2.4577157497406006, "logits/rejected": -2.6041719913482666, "logps/chosen": -468.6709899902344, "logps/rejected": -258.8568115234375, "loss": 0.1509, "rewards/accuracies": 1.0, "rewards/chosen": -1.142607569694519, "rewards/margins": 2.4675354957580566, "rewards/rejected": -3.6101431846618652, "step": 6027 }, { "epoch": 0.7, "learning_rate": 9.098854375811974e-08, "logits/chosen": -2.583595037460327, "logits/rejected": -2.362802743911743, "logps/chosen": -142.79420471191406, "logps/rejected": -245.39947509765625, "loss": 0.3251, "rewards/accuracies": 0.875, "rewards/chosen": -0.6801767945289612, "rewards/margins": 3.1240344047546387, "rewards/rejected": -3.804211378097534, "step": 6028 }, { "epoch": 0.7, "learning_rate": 9.095311208220148e-08, "logits/chosen": -2.2925822734832764, "logits/rejected": -2.1861793994903564, "logps/chosen": -162.0034637451172, "logps/rejected": -181.0818328857422, "loss": 0.1665, "rewards/accuracies": 0.875, "rewards/chosen": -0.36120232939720154, "rewards/margins": 2.606855869293213, "rewards/rejected": -2.9680581092834473, "step": 6029 }, { "epoch": 0.7, "learning_rate": 9.091768040628322e-08, "logits/chosen": -1.6989262104034424, "logits/rejected": -1.9809398651123047, "logps/chosen": -328.41827392578125, "logps/rejected": -242.74935913085938, "loss": 0.4249, "rewards/accuracies": 0.875, "rewards/chosen": -2.034381628036499, "rewards/margins": 1.1511019468307495, "rewards/rejected": -3.185483455657959, "step": 6030 }, { "epoch": 0.7, "learning_rate": 9.088224873036495e-08, "logits/chosen": -2.7312920093536377, "logits/rejected": -2.3301899433135986, "logps/chosen": -133.37928771972656, "logps/rejected": -434.45391845703125, "loss": 0.2294, "rewards/accuracies": 1.0, "rewards/chosen": -0.7052136659622192, "rewards/margins": 2.406933307647705, "rewards/rejected": -3.1121466159820557, "step": 6031 }, { "epoch": 0.7, "learning_rate": 9.084681705444667e-08, "logits/chosen": -2.7333908081054688, "logits/rejected": -2.348658561706543, "logps/chosen": -242.15560913085938, "logps/rejected": -284.3896789550781, "loss": 0.087, "rewards/accuracies": 1.0, "rewards/chosen": -1.1187427043914795, "rewards/margins": 2.5835251808166504, "rewards/rejected": -3.70226788520813, "step": 6032 }, { "epoch": 0.7, "learning_rate": 9.08113853785284e-08, "logits/chosen": -2.4055323600769043, "logits/rejected": -1.8480944633483887, "logps/chosen": -193.98248291015625, "logps/rejected": -300.6365051269531, "loss": 0.7382, "rewards/accuracies": 0.75, "rewards/chosen": -1.4279305934906006, "rewards/margins": 1.651532769203186, "rewards/rejected": -3.079463481903076, "step": 6033 }, { "epoch": 0.7, "learning_rate": 9.077595370261013e-08, "logits/chosen": -2.743155002593994, "logits/rejected": -2.7312674522399902, "logps/chosen": -280.6197509765625, "logps/rejected": -294.8072509765625, "loss": 0.1441, "rewards/accuracies": 1.0, "rewards/chosen": -1.0820462703704834, "rewards/margins": 3.43280029296875, "rewards/rejected": -4.5148468017578125, "step": 6034 }, { "epoch": 0.7, "learning_rate": 9.074052202669185e-08, "logits/chosen": -2.3577675819396973, "logits/rejected": -2.151329517364502, "logps/chosen": -285.47540283203125, "logps/rejected": -348.01495361328125, "loss": 0.4573, "rewards/accuracies": 0.75, "rewards/chosen": -1.9262704849243164, "rewards/margins": 2.042452096939087, "rewards/rejected": -3.9687228202819824, "step": 6035 }, { "epoch": 0.7, "learning_rate": 9.070509035077359e-08, "logits/chosen": -2.242931842803955, "logits/rejected": -2.2619190216064453, "logps/chosen": -265.83502197265625, "logps/rejected": -249.09239196777344, "loss": 0.4337, "rewards/accuracies": 0.625, "rewards/chosen": -0.5185831785202026, "rewards/margins": 1.6929024457931519, "rewards/rejected": -2.2114856243133545, "step": 6036 }, { "epoch": 0.7, "learning_rate": 9.066965867485532e-08, "logits/chosen": -2.492661476135254, "logits/rejected": -2.518251657485962, "logps/chosen": -266.2502136230469, "logps/rejected": -205.04013061523438, "loss": 0.287, "rewards/accuracies": 0.875, "rewards/chosen": -0.9849428534507751, "rewards/margins": 2.030200242996216, "rewards/rejected": -3.0151431560516357, "step": 6037 }, { "epoch": 0.7, "learning_rate": 9.063422699893705e-08, "logits/chosen": -2.359525680541992, "logits/rejected": -2.4171929359436035, "logps/chosen": -308.9859619140625, "logps/rejected": -237.25267028808594, "loss": 0.4134, "rewards/accuracies": 0.75, "rewards/chosen": -1.586553931236267, "rewards/margins": 1.5468459129333496, "rewards/rejected": -3.1333999633789062, "step": 6038 }, { "epoch": 0.7, "learning_rate": 9.059879532301877e-08, "logits/chosen": -2.2778449058532715, "logits/rejected": -2.2827181816101074, "logps/chosen": -235.49188232421875, "logps/rejected": -284.8341369628906, "loss": 0.6327, "rewards/accuracies": 0.75, "rewards/chosen": -2.257972478866577, "rewards/margins": 1.9846856594085693, "rewards/rejected": -4.242657661437988, "step": 6039 }, { "epoch": 0.7, "learning_rate": 9.05633636471005e-08, "logits/chosen": -2.323399543762207, "logits/rejected": -2.4326274394989014, "logps/chosen": -234.27146911621094, "logps/rejected": -192.68829345703125, "loss": 0.8768, "rewards/accuracies": 0.875, "rewards/chosen": -1.3022602796554565, "rewards/margins": 1.129872441291809, "rewards/rejected": -2.4321327209472656, "step": 6040 }, { "epoch": 0.7, "learning_rate": 9.052793197118223e-08, "logits/chosen": -3.095167875289917, "logits/rejected": -2.994746208190918, "logps/chosen": -405.2083740234375, "logps/rejected": -247.9383544921875, "loss": 0.1841, "rewards/accuracies": 0.875, "rewards/chosen": -0.9737396240234375, "rewards/margins": 3.586784601211548, "rewards/rejected": -4.560523986816406, "step": 6041 }, { "epoch": 0.7, "learning_rate": 9.049250029526397e-08, "logits/chosen": -2.5182836055755615, "logits/rejected": -2.4292612075805664, "logps/chosen": -236.18609619140625, "logps/rejected": -267.0619201660156, "loss": 0.1657, "rewards/accuracies": 1.0, "rewards/chosen": -0.2799082398414612, "rewards/margins": 2.2520065307617188, "rewards/rejected": -2.5319149494171143, "step": 6042 }, { "epoch": 0.7, "learning_rate": 9.04570686193457e-08, "logits/chosen": -1.8934950828552246, "logits/rejected": -1.8614875078201294, "logps/chosen": -219.97900390625, "logps/rejected": -295.099609375, "loss": 0.719, "rewards/accuracies": 0.625, "rewards/chosen": -1.0394965410232544, "rewards/margins": 1.2311452627182007, "rewards/rejected": -2.270641803741455, "step": 6043 }, { "epoch": 0.7, "learning_rate": 9.042163694342742e-08, "logits/chosen": -2.4254908561706543, "logits/rejected": -2.548405647277832, "logps/chosen": -268.73187255859375, "logps/rejected": -293.1374206542969, "loss": 0.1939, "rewards/accuracies": 0.875, "rewards/chosen": -0.6550068855285645, "rewards/margins": 2.934358596801758, "rewards/rejected": -3.589365243911743, "step": 6044 }, { "epoch": 0.7, "learning_rate": 9.038620526750915e-08, "logits/chosen": -2.0549476146698, "logits/rejected": -2.3383572101593018, "logps/chosen": -463.4259033203125, "logps/rejected": -281.25592041015625, "loss": 0.5208, "rewards/accuracies": 0.75, "rewards/chosen": -0.7854830622673035, "rewards/margins": 1.306495189666748, "rewards/rejected": -2.091978073120117, "step": 6045 }, { "epoch": 0.7, "learning_rate": 9.035077359159088e-08, "logits/chosen": -2.6422457695007324, "logits/rejected": -2.4961774349212646, "logps/chosen": -213.76400756835938, "logps/rejected": -189.88604736328125, "loss": 0.4351, "rewards/accuracies": 0.625, "rewards/chosen": -0.7342209815979004, "rewards/margins": 1.4535390138626099, "rewards/rejected": -2.1877598762512207, "step": 6046 }, { "epoch": 0.7, "learning_rate": 9.03153419156726e-08, "logits/chosen": -2.4081578254699707, "logits/rejected": -2.3567299842834473, "logps/chosen": -247.90664672851562, "logps/rejected": -234.61669921875, "loss": 0.6025, "rewards/accuracies": 0.875, "rewards/chosen": -0.9996240139007568, "rewards/margins": 0.7125396728515625, "rewards/rejected": -1.7121635675430298, "step": 6047 }, { "epoch": 0.7, "learning_rate": 9.027991023975435e-08, "logits/chosen": -2.8147592544555664, "logits/rejected": -2.668168544769287, "logps/chosen": -381.6708984375, "logps/rejected": -256.8616027832031, "loss": 0.2588, "rewards/accuracies": 1.0, "rewards/chosen": -1.338503360748291, "rewards/margins": 2.7136435508728027, "rewards/rejected": -4.052146911621094, "step": 6048 }, { "epoch": 0.7, "learning_rate": 9.024447856383607e-08, "logits/chosen": -2.0083932876586914, "logits/rejected": -1.8788243532180786, "logps/chosen": -272.7646484375, "logps/rejected": -366.0218505859375, "loss": 0.5632, "rewards/accuracies": 0.75, "rewards/chosen": -0.8544009923934937, "rewards/margins": 1.3425747156143188, "rewards/rejected": -2.1969757080078125, "step": 6049 }, { "epoch": 0.7, "learning_rate": 9.020904688791779e-08, "logits/chosen": -2.2220420837402344, "logits/rejected": -1.9433717727661133, "logps/chosen": -286.26007080078125, "logps/rejected": -354.78302001953125, "loss": 0.2477, "rewards/accuracies": 0.875, "rewards/chosen": -0.41965430974960327, "rewards/margins": 2.7266602516174316, "rewards/rejected": -3.1463146209716797, "step": 6050 }, { "epoch": 0.7, "learning_rate": 9.017361521199953e-08, "logits/chosen": -2.3732101917266846, "logits/rejected": -2.5975537300109863, "logps/chosen": -494.0625, "logps/rejected": -298.8734130859375, "loss": 0.5308, "rewards/accuracies": 0.625, "rewards/chosen": -0.5878989100456238, "rewards/margins": 1.2254228591918945, "rewards/rejected": -1.813321828842163, "step": 6051 }, { "epoch": 0.7, "learning_rate": 9.013818353608125e-08, "logits/chosen": -1.9585765600204468, "logits/rejected": -1.9640977382659912, "logps/chosen": -345.22027587890625, "logps/rejected": -305.5877380371094, "loss": 0.6802, "rewards/accuracies": 0.75, "rewards/chosen": -1.2118251323699951, "rewards/margins": 1.8873087167739868, "rewards/rejected": -3.0991339683532715, "step": 6052 }, { "epoch": 0.7, "learning_rate": 9.0102751860163e-08, "logits/chosen": -2.2497875690460205, "logits/rejected": -2.3619680404663086, "logps/chosen": -168.23890686035156, "logps/rejected": -155.3815460205078, "loss": 0.2327, "rewards/accuracies": 1.0, "rewards/chosen": -0.8057801723480225, "rewards/margins": 2.0978238582611084, "rewards/rejected": -2.903604030609131, "step": 6053 }, { "epoch": 0.7, "learning_rate": 9.006732018424472e-08, "logits/chosen": -2.493454933166504, "logits/rejected": -2.463866710662842, "logps/chosen": -223.10906982421875, "logps/rejected": -248.75152587890625, "loss": 0.0714, "rewards/accuracies": 1.0, "rewards/chosen": -0.5285241603851318, "rewards/margins": 3.50827693939209, "rewards/rejected": -4.036801338195801, "step": 6054 }, { "epoch": 0.7, "learning_rate": 9.003188850832644e-08, "logits/chosen": -2.026017427444458, "logits/rejected": -1.9918798208236694, "logps/chosen": -332.57305908203125, "logps/rejected": -336.7238464355469, "loss": 0.2898, "rewards/accuracies": 1.0, "rewards/chosen": -0.4716756343841553, "rewards/margins": 1.7782105207443237, "rewards/rejected": -2.2498860359191895, "step": 6055 }, { "epoch": 0.7, "learning_rate": 8.999645683240816e-08, "logits/chosen": -2.1670002937316895, "logits/rejected": -1.8779200315475464, "logps/chosen": -315.0589599609375, "logps/rejected": -293.11614990234375, "loss": 0.588, "rewards/accuracies": 0.75, "rewards/chosen": -1.3251738548278809, "rewards/margins": 1.0696611404418945, "rewards/rejected": -2.3948349952697754, "step": 6056 }, { "epoch": 0.7, "learning_rate": 8.99610251564899e-08, "logits/chosen": -2.207033634185791, "logits/rejected": -2.105794668197632, "logps/chosen": -316.33135986328125, "logps/rejected": -320.90728759765625, "loss": 0.3469, "rewards/accuracies": 0.75, "rewards/chosen": -0.7274892330169678, "rewards/margins": 2.840513229370117, "rewards/rejected": -3.568002223968506, "step": 6057 }, { "epoch": 0.7, "learning_rate": 8.992559348057162e-08, "logits/chosen": -2.7574825286865234, "logits/rejected": -2.841709613800049, "logps/chosen": -285.93109130859375, "logps/rejected": -340.4156799316406, "loss": 0.1552, "rewards/accuracies": 1.0, "rewards/chosen": -1.3708571195602417, "rewards/margins": 3.010004997253418, "rewards/rejected": -4.380862236022949, "step": 6058 }, { "epoch": 0.7, "learning_rate": 8.989016180465337e-08, "logits/chosen": -2.195568084716797, "logits/rejected": -2.2587990760803223, "logps/chosen": -172.8002471923828, "logps/rejected": -236.90951538085938, "loss": 0.347, "rewards/accuracies": 0.875, "rewards/chosen": -0.8750612735748291, "rewards/margins": 2.4218783378601074, "rewards/rejected": -3.2969393730163574, "step": 6059 }, { "epoch": 0.7, "learning_rate": 8.985473012873509e-08, "logits/chosen": -2.0671024322509766, "logits/rejected": -1.7654187679290771, "logps/chosen": -143.0915069580078, "logps/rejected": -245.78111267089844, "loss": 0.2083, "rewards/accuracies": 1.0, "rewards/chosen": -0.46524545550346375, "rewards/margins": 2.8809163570404053, "rewards/rejected": -3.3461618423461914, "step": 6060 }, { "epoch": 0.71, "learning_rate": 8.981929845281681e-08, "logits/chosen": -2.921506404876709, "logits/rejected": -3.0079240798950195, "logps/chosen": -129.94105529785156, "logps/rejected": -191.42147827148438, "loss": 0.5496, "rewards/accuracies": 0.75, "rewards/chosen": -0.5242642760276794, "rewards/margins": 3.1556496620178223, "rewards/rejected": -3.6799139976501465, "step": 6061 }, { "epoch": 0.71, "learning_rate": 8.978386677689855e-08, "logits/chosen": -2.34425950050354, "logits/rejected": -2.3116650581359863, "logps/chosen": -266.7066345214844, "logps/rejected": -307.5665283203125, "loss": 0.3734, "rewards/accuracies": 0.75, "rewards/chosen": -0.1719905585050583, "rewards/margins": 2.0604403018951416, "rewards/rejected": -2.232430934906006, "step": 6062 }, { "epoch": 0.71, "learning_rate": 8.974843510098027e-08, "logits/chosen": -2.739182949066162, "logits/rejected": -2.8692543506622314, "logps/chosen": -217.7708282470703, "logps/rejected": -260.9698791503906, "loss": 0.716, "rewards/accuracies": 0.625, "rewards/chosen": -1.037595272064209, "rewards/margins": 1.46248197555542, "rewards/rejected": -2.50007700920105, "step": 6063 }, { "epoch": 0.71, "learning_rate": 8.971300342506199e-08, "logits/chosen": -2.8602612018585205, "logits/rejected": -2.8754830360412598, "logps/chosen": -140.65780639648438, "logps/rejected": -189.33399963378906, "loss": 1.1151, "rewards/accuracies": 0.5, "rewards/chosen": -1.8129007816314697, "rewards/margins": 0.14040732383728027, "rewards/rejected": -1.95330810546875, "step": 6064 }, { "epoch": 0.71, "learning_rate": 8.967757174914374e-08, "logits/chosen": -2.6244564056396484, "logits/rejected": -2.771458864212036, "logps/chosen": -249.37396240234375, "logps/rejected": -274.462890625, "loss": 0.466, "rewards/accuracies": 0.75, "rewards/chosen": -0.3309949040412903, "rewards/margins": 1.898422122001648, "rewards/rejected": -2.229417085647583, "step": 6065 }, { "epoch": 0.71, "learning_rate": 8.964214007322546e-08, "logits/chosen": -2.621119499206543, "logits/rejected": -2.5922396183013916, "logps/chosen": -244.64468383789062, "logps/rejected": -235.27439880371094, "loss": 0.3047, "rewards/accuracies": 0.875, "rewards/chosen": -0.13181746006011963, "rewards/margins": 1.8906285762786865, "rewards/rejected": -2.0224459171295166, "step": 6066 }, { "epoch": 0.71, "learning_rate": 8.960670839730719e-08, "logits/chosen": -2.2560882568359375, "logits/rejected": -2.493377447128296, "logps/chosen": -428.2352600097656, "logps/rejected": -288.28436279296875, "loss": 0.3439, "rewards/accuracies": 1.0, "rewards/chosen": -1.4931015968322754, "rewards/margins": 1.546626091003418, "rewards/rejected": -3.0397276878356934, "step": 6067 }, { "epoch": 0.71, "learning_rate": 8.957127672138892e-08, "logits/chosen": -2.8651716709136963, "logits/rejected": -2.9749045372009277, "logps/chosen": -208.01931762695312, "logps/rejected": -183.84371948242188, "loss": 0.1544, "rewards/accuracies": 1.0, "rewards/chosen": -0.3931627571582794, "rewards/margins": 2.5370540618896484, "rewards/rejected": -2.9302167892456055, "step": 6068 }, { "epoch": 0.71, "learning_rate": 8.953584504547064e-08, "logits/chosen": -2.1059317588806152, "logits/rejected": -2.3672032356262207, "logps/chosen": -459.32586669921875, "logps/rejected": -273.21856689453125, "loss": 0.1841, "rewards/accuracies": 1.0, "rewards/chosen": 0.12873800098896027, "rewards/margins": 2.3498189449310303, "rewards/rejected": -2.221081018447876, "step": 6069 }, { "epoch": 0.71, "learning_rate": 8.950041336955237e-08, "logits/chosen": -1.9180374145507812, "logits/rejected": -2.4997997283935547, "logps/chosen": -546.3544921875, "logps/rejected": -312.7955627441406, "loss": 0.3954, "rewards/accuracies": 0.875, "rewards/chosen": -0.31952184438705444, "rewards/margins": 1.688346028327942, "rewards/rejected": -2.0078678131103516, "step": 6070 }, { "epoch": 0.71, "learning_rate": 8.946498169363411e-08, "logits/chosen": -2.54764723777771, "logits/rejected": -2.595222234725952, "logps/chosen": -252.62982177734375, "logps/rejected": -275.7747802734375, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": -0.20762206614017487, "rewards/margins": 3.547131061553955, "rewards/rejected": -3.7547531127929688, "step": 6071 }, { "epoch": 0.71, "learning_rate": 8.942955001771584e-08, "logits/chosen": -2.8214502334594727, "logits/rejected": -2.5950121879577637, "logps/chosen": -194.98770141601562, "logps/rejected": -223.40989685058594, "loss": 0.5833, "rewards/accuracies": 0.75, "rewards/chosen": -0.4574030935764313, "rewards/margins": 2.3068861961364746, "rewards/rejected": -2.764289140701294, "step": 6072 }, { "epoch": 0.71, "learning_rate": 8.939411834179756e-08, "logits/chosen": -2.3900208473205566, "logits/rejected": -2.465254068374634, "logps/chosen": -245.37562561035156, "logps/rejected": -240.39381408691406, "loss": 0.7263, "rewards/accuracies": 0.625, "rewards/chosen": -2.182644844055176, "rewards/margins": 1.126953125, "rewards/rejected": -3.3095977306365967, "step": 6073 }, { "epoch": 0.71, "learning_rate": 8.93586866658793e-08, "logits/chosen": -1.9208862781524658, "logits/rejected": -1.8747988939285278, "logps/chosen": -388.1885986328125, "logps/rejected": -359.90594482421875, "loss": 0.2965, "rewards/accuracies": 0.875, "rewards/chosen": -1.0844615697860718, "rewards/margins": 2.5975072383880615, "rewards/rejected": -3.6819686889648438, "step": 6074 }, { "epoch": 0.71, "learning_rate": 8.932325498996102e-08, "logits/chosen": -2.1524553298950195, "logits/rejected": -2.2502312660217285, "logps/chosen": -177.98049926757812, "logps/rejected": -188.83380126953125, "loss": 0.5204, "rewards/accuracies": 0.875, "rewards/chosen": -0.8347357511520386, "rewards/margins": 0.762645423412323, "rewards/rejected": -1.5973812341690063, "step": 6075 }, { "epoch": 0.71, "learning_rate": 8.928782331404274e-08, "logits/chosen": -2.355353355407715, "logits/rejected": -2.4701359272003174, "logps/chosen": -241.41578674316406, "logps/rejected": -182.42572021484375, "loss": 0.5684, "rewards/accuracies": 0.875, "rewards/chosen": -1.6510765552520752, "rewards/margins": 1.980826735496521, "rewards/rejected": -3.6319031715393066, "step": 6076 }, { "epoch": 0.71, "learning_rate": 8.925239163812449e-08, "logits/chosen": -2.297243356704712, "logits/rejected": -2.124403715133667, "logps/chosen": -240.0218048095703, "logps/rejected": -263.4620666503906, "loss": 0.2174, "rewards/accuracies": 1.0, "rewards/chosen": -0.4449262022972107, "rewards/margins": 1.9022812843322754, "rewards/rejected": -2.347207546234131, "step": 6077 }, { "epoch": 0.71, "learning_rate": 8.921695996220621e-08, "logits/chosen": -2.2461559772491455, "logits/rejected": -2.086864709854126, "logps/chosen": -291.0412292480469, "logps/rejected": -261.37286376953125, "loss": 0.314, "rewards/accuracies": 0.875, "rewards/chosen": -0.41498541831970215, "rewards/margins": 1.7279516458511353, "rewards/rejected": -2.142937183380127, "step": 6078 }, { "epoch": 0.71, "learning_rate": 8.918152828628794e-08, "logits/chosen": -2.1897013187408447, "logits/rejected": -2.363321542739868, "logps/chosen": -246.17330932617188, "logps/rejected": -174.49530029296875, "loss": 0.5452, "rewards/accuracies": 0.625, "rewards/chosen": -1.2312779426574707, "rewards/margins": 0.9113813638687134, "rewards/rejected": -2.1426591873168945, "step": 6079 }, { "epoch": 0.71, "learning_rate": 8.914609661036967e-08, "logits/chosen": -2.7428009510040283, "logits/rejected": -2.639326572418213, "logps/chosen": -428.898681640625, "logps/rejected": -215.15017700195312, "loss": 1.107, "rewards/accuracies": 0.625, "rewards/chosen": -1.7672739028930664, "rewards/margins": 1.100378394126892, "rewards/rejected": -2.867652416229248, "step": 6080 }, { "epoch": 0.71, "learning_rate": 8.911066493445139e-08, "logits/chosen": -2.4604074954986572, "logits/rejected": -2.3406734466552734, "logps/chosen": -507.4819030761719, "logps/rejected": -454.30072021484375, "loss": 0.3133, "rewards/accuracies": 0.875, "rewards/chosen": -1.2158894538879395, "rewards/margins": 1.718186378479004, "rewards/rejected": -2.9340758323669434, "step": 6081 }, { "epoch": 0.71, "learning_rate": 8.907523325853312e-08, "logits/chosen": -2.285922050476074, "logits/rejected": -2.1631877422332764, "logps/chosen": -136.53541564941406, "logps/rejected": -285.9084777832031, "loss": 0.5063, "rewards/accuracies": 0.75, "rewards/chosen": -1.0933934450149536, "rewards/margins": 2.5071191787719727, "rewards/rejected": -3.6005125045776367, "step": 6082 }, { "epoch": 0.71, "learning_rate": 8.903980158261486e-08, "logits/chosen": -2.2997090816497803, "logits/rejected": -2.5307321548461914, "logps/chosen": -303.2523193359375, "logps/rejected": -312.78546142578125, "loss": 0.1238, "rewards/accuracies": 1.0, "rewards/chosen": -1.0345184803009033, "rewards/margins": 4.063953399658203, "rewards/rejected": -5.0984721183776855, "step": 6083 }, { "epoch": 0.71, "learning_rate": 8.900436990669658e-08, "logits/chosen": -2.289158344268799, "logits/rejected": -2.4126124382019043, "logps/chosen": -256.8719787597656, "logps/rejected": -156.02981567382812, "loss": 0.856, "rewards/accuracies": 0.625, "rewards/chosen": -1.2161816358566284, "rewards/margins": 0.17590780556201935, "rewards/rejected": -1.3920893669128418, "step": 6084 }, { "epoch": 0.71, "learning_rate": 8.896893823077832e-08, "logits/chosen": -2.1791434288024902, "logits/rejected": -2.1278724670410156, "logps/chosen": -164.05587768554688, "logps/rejected": -167.4168701171875, "loss": 0.52, "rewards/accuracies": 0.75, "rewards/chosen": -2.035451889038086, "rewards/margins": 0.6230434775352478, "rewards/rejected": -2.6584951877593994, "step": 6085 }, { "epoch": 0.71, "learning_rate": 8.893350655486004e-08, "logits/chosen": -1.7802207469940186, "logits/rejected": -1.886575698852539, "logps/chosen": -358.449951171875, "logps/rejected": -364.9169921875, "loss": 0.2627, "rewards/accuracies": 0.875, "rewards/chosen": -0.9199423789978027, "rewards/margins": 3.3337976932525635, "rewards/rejected": -4.253739833831787, "step": 6086 }, { "epoch": 0.71, "learning_rate": 8.889807487894176e-08, "logits/chosen": -2.404851198196411, "logits/rejected": -2.5447511672973633, "logps/chosen": -175.25811767578125, "logps/rejected": -151.1827392578125, "loss": 1.7476, "rewards/accuracies": 0.375, "rewards/chosen": -2.184103488922119, "rewards/margins": -1.1567902565002441, "rewards/rejected": -1.027313232421875, "step": 6087 }, { "epoch": 0.71, "learning_rate": 8.88626432030235e-08, "logits/chosen": -2.3675529956817627, "logits/rejected": -2.3178658485412598, "logps/chosen": -378.8748779296875, "logps/rejected": -228.85000610351562, "loss": 0.3234, "rewards/accuracies": 0.875, "rewards/chosen": -0.03923603892326355, "rewards/margins": 2.106400489807129, "rewards/rejected": -2.145636558532715, "step": 6088 }, { "epoch": 0.71, "learning_rate": 8.882721152710523e-08, "logits/chosen": -2.497647762298584, "logits/rejected": -2.734200954437256, "logps/chosen": -308.90020751953125, "logps/rejected": -314.0935974121094, "loss": 0.5421, "rewards/accuracies": 0.5, "rewards/chosen": -1.5567786693572998, "rewards/margins": 1.806865930557251, "rewards/rejected": -3.363644599914551, "step": 6089 }, { "epoch": 0.71, "learning_rate": 8.879177985118695e-08, "logits/chosen": -1.565824270248413, "logits/rejected": -1.8275678157806396, "logps/chosen": -294.8878173828125, "logps/rejected": -282.58154296875, "loss": 0.2518, "rewards/accuracies": 0.875, "rewards/chosen": -0.15170909464359283, "rewards/margins": 2.397779941558838, "rewards/rejected": -2.5494887828826904, "step": 6090 }, { "epoch": 0.71, "learning_rate": 8.875634817526869e-08, "logits/chosen": -2.2223644256591797, "logits/rejected": -2.143782615661621, "logps/chosen": -609.1446533203125, "logps/rejected": -457.02886962890625, "loss": 0.6349, "rewards/accuracies": 0.875, "rewards/chosen": -1.375617265701294, "rewards/margins": 0.9190353155136108, "rewards/rejected": -2.2946527004241943, "step": 6091 }, { "epoch": 0.71, "learning_rate": 8.872091649935041e-08, "logits/chosen": -2.5329525470733643, "logits/rejected": -2.7066824436187744, "logps/chosen": -303.14617919921875, "logps/rejected": -273.23626708984375, "loss": 0.5213, "rewards/accuracies": 0.5, "rewards/chosen": -0.7791649699211121, "rewards/margins": 0.875169038772583, "rewards/rejected": -1.6543340682983398, "step": 6092 }, { "epoch": 0.71, "learning_rate": 8.868548482343213e-08, "logits/chosen": -2.4131641387939453, "logits/rejected": -2.2675180435180664, "logps/chosen": -162.85696411132812, "logps/rejected": -225.6729278564453, "loss": 1.0922, "rewards/accuracies": 0.625, "rewards/chosen": -1.2169970273971558, "rewards/margins": 0.748391330242157, "rewards/rejected": -1.965388298034668, "step": 6093 }, { "epoch": 0.71, "learning_rate": 8.865005314751388e-08, "logits/chosen": -1.9691004753112793, "logits/rejected": -1.7309893369674683, "logps/chosen": -291.86651611328125, "logps/rejected": -343.6380615234375, "loss": 0.3573, "rewards/accuracies": 0.875, "rewards/chosen": -1.3300131559371948, "rewards/margins": 3.140977144241333, "rewards/rejected": -4.470990180969238, "step": 6094 }, { "epoch": 0.71, "learning_rate": 8.86146214715956e-08, "logits/chosen": -2.269162893295288, "logits/rejected": -2.318301200866699, "logps/chosen": -454.9854736328125, "logps/rejected": -291.75335693359375, "loss": 0.3068, "rewards/accuracies": 0.75, "rewards/chosen": -0.36650753021240234, "rewards/margins": 1.7758901119232178, "rewards/rejected": -2.14239764213562, "step": 6095 }, { "epoch": 0.71, "learning_rate": 8.857918979567734e-08, "logits/chosen": -2.0823662281036377, "logits/rejected": -2.214926242828369, "logps/chosen": -306.0577697753906, "logps/rejected": -264.1078186035156, "loss": 0.1838, "rewards/accuracies": 1.0, "rewards/chosen": -0.8658745884895325, "rewards/margins": 3.4998414516448975, "rewards/rejected": -4.365715980529785, "step": 6096 }, { "epoch": 0.71, "learning_rate": 8.854375811975906e-08, "logits/chosen": -2.298790454864502, "logits/rejected": -2.11314058303833, "logps/chosen": -272.09844970703125, "logps/rejected": -336.40478515625, "loss": 0.3971, "rewards/accuracies": 0.75, "rewards/chosen": -0.5103957653045654, "rewards/margins": 1.598239779472351, "rewards/rejected": -2.108635425567627, "step": 6097 }, { "epoch": 0.71, "learning_rate": 8.850832644384078e-08, "logits/chosen": -2.495558500289917, "logits/rejected": -2.3835060596466064, "logps/chosen": -250.13194274902344, "logps/rejected": -206.45150756835938, "loss": 0.2864, "rewards/accuracies": 1.0, "rewards/chosen": -0.6320149898529053, "rewards/margins": 1.4325981140136719, "rewards/rejected": -2.064613103866577, "step": 6098 }, { "epoch": 0.71, "learning_rate": 8.847289476792252e-08, "logits/chosen": -2.202238082885742, "logits/rejected": -2.077327013015747, "logps/chosen": -175.7583770751953, "logps/rejected": -369.3607482910156, "loss": 0.8092, "rewards/accuracies": 0.625, "rewards/chosen": -1.522727608680725, "rewards/margins": 2.3033037185668945, "rewards/rejected": -3.82603120803833, "step": 6099 }, { "epoch": 0.71, "learning_rate": 8.843746309200425e-08, "logits/chosen": -1.6138476133346558, "logits/rejected": -2.2819066047668457, "logps/chosen": -464.3603820800781, "logps/rejected": -219.4442138671875, "loss": 0.5702, "rewards/accuracies": 0.5, "rewards/chosen": -1.0938005447387695, "rewards/margins": 0.9703003168106079, "rewards/rejected": -2.064100742340088, "step": 6100 }, { "epoch": 0.71, "learning_rate": 8.840203141608598e-08, "logits/chosen": -1.9255167245864868, "logits/rejected": -1.979909062385559, "logps/chosen": -299.1183166503906, "logps/rejected": -334.2889404296875, "loss": 0.465, "rewards/accuracies": 0.875, "rewards/chosen": -1.1435785293579102, "rewards/margins": 1.842218041419983, "rewards/rejected": -2.9857966899871826, "step": 6101 }, { "epoch": 0.71, "learning_rate": 8.836659974016771e-08, "logits/chosen": -2.456321954727173, "logits/rejected": -2.594014883041382, "logps/chosen": -136.5105438232422, "logps/rejected": -176.92172241210938, "loss": 0.3602, "rewards/accuracies": 0.875, "rewards/chosen": -1.2972975969314575, "rewards/margins": 1.7850086688995361, "rewards/rejected": -3.082306385040283, "step": 6102 }, { "epoch": 0.71, "learning_rate": 8.833116806424943e-08, "logits/chosen": -2.4109106063842773, "logits/rejected": -2.203990936279297, "logps/chosen": -233.26119995117188, "logps/rejected": -299.64349365234375, "loss": 0.1744, "rewards/accuracies": 1.0, "rewards/chosen": -0.616451621055603, "rewards/margins": 4.101806640625, "rewards/rejected": -4.718257904052734, "step": 6103 }, { "epoch": 0.71, "learning_rate": 8.829573638833116e-08, "logits/chosen": -2.1415700912475586, "logits/rejected": -1.9274588823318481, "logps/chosen": -219.3123779296875, "logps/rejected": -301.9386901855469, "loss": 0.459, "rewards/accuracies": 0.625, "rewards/chosen": -0.9475097060203552, "rewards/margins": 1.4611196517944336, "rewards/rejected": -2.4086294174194336, "step": 6104 }, { "epoch": 0.71, "learning_rate": 8.826030471241289e-08, "logits/chosen": -2.3756103515625, "logits/rejected": -2.4115521907806396, "logps/chosen": -369.58740234375, "logps/rejected": -276.358154296875, "loss": 0.2302, "rewards/accuracies": 0.875, "rewards/chosen": -0.22028805315494537, "rewards/margins": 3.006739616394043, "rewards/rejected": -3.227027416229248, "step": 6105 }, { "epoch": 0.71, "learning_rate": 8.822487303649463e-08, "logits/chosen": -2.215615749359131, "logits/rejected": -2.3276073932647705, "logps/chosen": -413.2349853515625, "logps/rejected": -346.08721923828125, "loss": 0.4898, "rewards/accuracies": 0.875, "rewards/chosen": -1.3091329336166382, "rewards/margins": 0.9969435930252075, "rewards/rejected": -2.3060765266418457, "step": 6106 }, { "epoch": 0.71, "learning_rate": 8.818944136057636e-08, "logits/chosen": -2.226386547088623, "logits/rejected": -1.9804866313934326, "logps/chosen": -264.3817138671875, "logps/rejected": -317.307373046875, "loss": 0.1141, "rewards/accuracies": 1.0, "rewards/chosen": -0.4915817677974701, "rewards/margins": 2.6018714904785156, "rewards/rejected": -3.0934531688690186, "step": 6107 }, { "epoch": 0.71, "learning_rate": 8.815400968465808e-08, "logits/chosen": -1.9151116609573364, "logits/rejected": -2.1489970684051514, "logps/chosen": -434.1298828125, "logps/rejected": -403.305908203125, "loss": 0.6361, "rewards/accuracies": 0.625, "rewards/chosen": -1.5194711685180664, "rewards/margins": 1.9919497966766357, "rewards/rejected": -3.511420726776123, "step": 6108 }, { "epoch": 0.71, "learning_rate": 8.81185780087398e-08, "logits/chosen": -2.2899513244628906, "logits/rejected": -2.1781821250915527, "logps/chosen": -242.45130920410156, "logps/rejected": -330.3238525390625, "loss": 0.5673, "rewards/accuracies": 0.625, "rewards/chosen": -0.7320735454559326, "rewards/margins": 0.6779049038887024, "rewards/rejected": -1.4099783897399902, "step": 6109 }, { "epoch": 0.71, "learning_rate": 8.808314633282153e-08, "logits/chosen": -2.13594913482666, "logits/rejected": -1.8555288314819336, "logps/chosen": -419.04302978515625, "logps/rejected": -587.0065307617188, "loss": 0.2918, "rewards/accuracies": 1.0, "rewards/chosen": -0.7920877933502197, "rewards/margins": 2.6420600414276123, "rewards/rejected": -3.434147834777832, "step": 6110 }, { "epoch": 0.71, "learning_rate": 8.804771465690326e-08, "logits/chosen": -2.480276584625244, "logits/rejected": -2.44240140914917, "logps/chosen": -206.36062622070312, "logps/rejected": -237.22869873046875, "loss": 0.4797, "rewards/accuracies": 0.75, "rewards/chosen": -0.2510621249675751, "rewards/margins": 1.4230225086212158, "rewards/rejected": -1.6740846633911133, "step": 6111 }, { "epoch": 0.71, "learning_rate": 8.8012282980985e-08, "logits/chosen": -2.06315279006958, "logits/rejected": -2.0393190383911133, "logps/chosen": -303.54876708984375, "logps/rejected": -260.7725830078125, "loss": 0.3088, "rewards/accuracies": 0.875, "rewards/chosen": -0.8212411403656006, "rewards/margins": 2.6151843070983887, "rewards/rejected": -3.4364254474639893, "step": 6112 }, { "epoch": 0.71, "learning_rate": 8.797685130506673e-08, "logits/chosen": -2.980529546737671, "logits/rejected": -2.961334466934204, "logps/chosen": -182.6428985595703, "logps/rejected": -158.72781372070312, "loss": 0.2067, "rewards/accuracies": 0.875, "rewards/chosen": -0.9052941799163818, "rewards/margins": 2.855271816253662, "rewards/rejected": -3.760566234588623, "step": 6113 }, { "epoch": 0.71, "learning_rate": 8.794141962914846e-08, "logits/chosen": -1.7858860492706299, "logits/rejected": -1.8515753746032715, "logps/chosen": -334.43182373046875, "logps/rejected": -290.6058044433594, "loss": 0.403, "rewards/accuracies": 0.875, "rewards/chosen": -1.3740646839141846, "rewards/margins": 2.0690348148345947, "rewards/rejected": -3.4430994987487793, "step": 6114 }, { "epoch": 0.71, "learning_rate": 8.790598795323018e-08, "logits/chosen": -1.912555456161499, "logits/rejected": -1.9130315780639648, "logps/chosen": -521.101806640625, "logps/rejected": -463.4203796386719, "loss": 0.1793, "rewards/accuracies": 1.0, "rewards/chosen": -0.06353634595870972, "rewards/margins": 2.244542360305786, "rewards/rejected": -2.3080787658691406, "step": 6115 }, { "epoch": 0.71, "learning_rate": 8.787055627731191e-08, "logits/chosen": -2.262333631515503, "logits/rejected": -2.6874678134918213, "logps/chosen": -241.91064453125, "logps/rejected": -279.5497131347656, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -0.24283675849437714, "rewards/margins": 4.939653396606445, "rewards/rejected": -5.182490348815918, "step": 6116 }, { "epoch": 0.71, "learning_rate": 8.783512460139364e-08, "logits/chosen": -2.3150930404663086, "logits/rejected": -2.2221925258636475, "logps/chosen": -264.23016357421875, "logps/rejected": -352.434814453125, "loss": 0.791, "rewards/accuracies": 0.625, "rewards/chosen": -1.4289357662200928, "rewards/margins": 1.3579154014587402, "rewards/rejected": -2.786851406097412, "step": 6117 }, { "epoch": 0.71, "learning_rate": 8.779969292547537e-08, "logits/chosen": -1.5268027782440186, "logits/rejected": -1.658892273902893, "logps/chosen": -391.9781188964844, "logps/rejected": -341.7236633300781, "loss": 0.2365, "rewards/accuracies": 0.875, "rewards/chosen": -0.5500835180282593, "rewards/margins": 2.1006102561950684, "rewards/rejected": -2.650693893432617, "step": 6118 }, { "epoch": 0.71, "learning_rate": 8.776426124955711e-08, "logits/chosen": -1.764309048652649, "logits/rejected": -2.007469415664673, "logps/chosen": -236.9412384033203, "logps/rejected": -249.05166625976562, "loss": 0.2891, "rewards/accuracies": 0.75, "rewards/chosen": -0.8151686787605286, "rewards/margins": 2.8563342094421387, "rewards/rejected": -3.6715028285980225, "step": 6119 }, { "epoch": 0.71, "learning_rate": 8.772882957363883e-08, "logits/chosen": -2.3307769298553467, "logits/rejected": -2.0163726806640625, "logps/chosen": -178.000732421875, "logps/rejected": -250.12823486328125, "loss": 0.4124, "rewards/accuracies": 0.75, "rewards/chosen": -1.14437997341156, "rewards/margins": 2.359159231185913, "rewards/rejected": -3.5035390853881836, "step": 6120 }, { "epoch": 0.71, "learning_rate": 8.769339789772055e-08, "logits/chosen": -2.597825765609741, "logits/rejected": -2.7223739624023438, "logps/chosen": -299.5548400878906, "logps/rejected": -303.1390380859375, "loss": 0.4602, "rewards/accuracies": 0.75, "rewards/chosen": -0.7089821100234985, "rewards/margins": 2.198246955871582, "rewards/rejected": -2.907228946685791, "step": 6121 }, { "epoch": 0.71, "learning_rate": 8.765796622180229e-08, "logits/chosen": -2.1313023567199707, "logits/rejected": -2.283750534057617, "logps/chosen": -220.73583984375, "logps/rejected": -199.9409637451172, "loss": 0.5779, "rewards/accuracies": 0.625, "rewards/chosen": -0.8378703594207764, "rewards/margins": 0.8049442768096924, "rewards/rejected": -1.6428146362304688, "step": 6122 }, { "epoch": 0.71, "learning_rate": 8.762253454588401e-08, "logits/chosen": -1.481194019317627, "logits/rejected": -1.7667092084884644, "logps/chosen": -417.4812316894531, "logps/rejected": -199.42239379882812, "loss": 0.5664, "rewards/accuracies": 0.625, "rewards/chosen": -0.974650502204895, "rewards/margins": 0.913427472114563, "rewards/rejected": -1.888077974319458, "step": 6123 }, { "epoch": 0.71, "learning_rate": 8.758710286996576e-08, "logits/chosen": -2.097006320953369, "logits/rejected": -2.336116075515747, "logps/chosen": -216.31507873535156, "logps/rejected": -119.80402374267578, "loss": 0.3131, "rewards/accuracies": 1.0, "rewards/chosen": -0.40683791041374207, "rewards/margins": 1.505118489265442, "rewards/rejected": -1.9119564294815063, "step": 6124 }, { "epoch": 0.71, "learning_rate": 8.755167119404748e-08, "logits/chosen": -2.0113472938537598, "logits/rejected": -2.361189126968384, "logps/chosen": -334.7735595703125, "logps/rejected": -173.824462890625, "loss": 0.4211, "rewards/accuracies": 0.75, "rewards/chosen": -0.21142104268074036, "rewards/margins": 2.062908887863159, "rewards/rejected": -2.2743301391601562, "step": 6125 }, { "epoch": 0.71, "learning_rate": 8.75162395181292e-08, "logits/chosen": -2.684218406677246, "logits/rejected": -2.8137998580932617, "logps/chosen": -281.62213134765625, "logps/rejected": -178.46836853027344, "loss": 0.3558, "rewards/accuracies": 0.875, "rewards/chosen": -1.2136485576629639, "rewards/margins": 1.5049245357513428, "rewards/rejected": -2.7185730934143066, "step": 6126 }, { "epoch": 0.71, "learning_rate": 8.748080784221092e-08, "logits/chosen": -2.444291353225708, "logits/rejected": -2.55389666557312, "logps/chosen": -321.7677307128906, "logps/rejected": -271.75885009765625, "loss": 0.205, "rewards/accuracies": 1.0, "rewards/chosen": -0.2117134928703308, "rewards/margins": 2.6430160999298096, "rewards/rejected": -2.854729413986206, "step": 6127 }, { "epoch": 0.71, "learning_rate": 8.744537616629266e-08, "logits/chosen": -2.079340934753418, "logits/rejected": -2.063159942626953, "logps/chosen": -347.285888671875, "logps/rejected": -255.70108032226562, "loss": 0.1793, "rewards/accuracies": 1.0, "rewards/chosen": -0.6500391960144043, "rewards/margins": 2.943251609802246, "rewards/rejected": -3.5932908058166504, "step": 6128 }, { "epoch": 0.71, "learning_rate": 8.74099444903744e-08, "logits/chosen": -2.451814651489258, "logits/rejected": -2.6078076362609863, "logps/chosen": -245.636962890625, "logps/rejected": -179.5076904296875, "loss": 0.1986, "rewards/accuracies": 0.875, "rewards/chosen": 0.008210569620132446, "rewards/margins": 2.986668109893799, "rewards/rejected": -2.978457450866699, "step": 6129 }, { "epoch": 0.71, "learning_rate": 8.737451281445613e-08, "logits/chosen": -2.0049984455108643, "logits/rejected": -2.03981876373291, "logps/chosen": -384.27972412109375, "logps/rejected": -280.62908935546875, "loss": 0.721, "rewards/accuracies": 0.5, "rewards/chosen": -1.4910625219345093, "rewards/margins": 0.545496940612793, "rewards/rejected": -2.036559581756592, "step": 6130 }, { "epoch": 0.71, "learning_rate": 8.733908113853785e-08, "logits/chosen": -2.844108819961548, "logits/rejected": -2.79852032661438, "logps/chosen": -186.81813049316406, "logps/rejected": -305.7603454589844, "loss": 0.2305, "rewards/accuracies": 0.75, "rewards/chosen": -0.20040485262870789, "rewards/margins": 3.1491432189941406, "rewards/rejected": -3.34954833984375, "step": 6131 }, { "epoch": 0.71, "learning_rate": 8.730364946261957e-08, "logits/chosen": -1.9015370607376099, "logits/rejected": -2.1039295196533203, "logps/chosen": -389.6858215332031, "logps/rejected": -312.4541320800781, "loss": 0.2114, "rewards/accuracies": 0.875, "rewards/chosen": -0.6738407015800476, "rewards/margins": 2.252793788909912, "rewards/rejected": -2.9266345500946045, "step": 6132 }, { "epoch": 0.71, "learning_rate": 8.726821778670131e-08, "logits/chosen": -1.9939970970153809, "logits/rejected": -2.312680959701538, "logps/chosen": -500.7137145996094, "logps/rejected": -274.1947937011719, "loss": 0.5024, "rewards/accuracies": 0.75, "rewards/chosen": -0.9713073968887329, "rewards/margins": 2.330764055252075, "rewards/rejected": -3.3020713329315186, "step": 6133 }, { "epoch": 0.71, "learning_rate": 8.723278611078303e-08, "logits/chosen": -2.509303092956543, "logits/rejected": -2.608140707015991, "logps/chosen": -245.470703125, "logps/rejected": -260.2325439453125, "loss": 0.3626, "rewards/accuracies": 0.75, "rewards/chosen": -0.5932873487472534, "rewards/margins": 1.338752269744873, "rewards/rejected": -1.932039499282837, "step": 6134 }, { "epoch": 0.71, "learning_rate": 8.719735443486477e-08, "logits/chosen": -2.457812786102295, "logits/rejected": -2.4139039516448975, "logps/chosen": -273.72528076171875, "logps/rejected": -284.458251953125, "loss": 0.3993, "rewards/accuracies": 0.625, "rewards/chosen": -1.376622200012207, "rewards/margins": 1.8005832433700562, "rewards/rejected": -3.1772053241729736, "step": 6135 }, { "epoch": 0.71, "learning_rate": 8.71619227589465e-08, "logits/chosen": -2.0673482418060303, "logits/rejected": -2.491957187652588, "logps/chosen": -388.52923583984375, "logps/rejected": -151.33969116210938, "loss": 0.6432, "rewards/accuracies": 0.75, "rewards/chosen": -0.8891506791114807, "rewards/margins": 1.5820744037628174, "rewards/rejected": -2.471224784851074, "step": 6136 }, { "epoch": 0.71, "learning_rate": 8.712649108302822e-08, "logits/chosen": -2.2548575401306152, "logits/rejected": -2.6401479244232178, "logps/chosen": -373.9761962890625, "logps/rejected": -270.98590087890625, "loss": 0.4378, "rewards/accuracies": 0.625, "rewards/chosen": -1.0048713684082031, "rewards/margins": 2.7983908653259277, "rewards/rejected": -3.803262233734131, "step": 6137 }, { "epoch": 0.71, "learning_rate": 8.709105940710995e-08, "logits/chosen": -1.9743678569793701, "logits/rejected": -2.2567379474639893, "logps/chosen": -231.7701416015625, "logps/rejected": -271.6626281738281, "loss": 0.3696, "rewards/accuracies": 0.75, "rewards/chosen": -1.6343276500701904, "rewards/margins": 2.8061866760253906, "rewards/rejected": -4.440514087677002, "step": 6138 }, { "epoch": 0.71, "learning_rate": 8.705562773119168e-08, "logits/chosen": -2.522420883178711, "logits/rejected": -2.7375705242156982, "logps/chosen": -468.5842590332031, "logps/rejected": -243.8650665283203, "loss": 0.3982, "rewards/accuracies": 0.75, "rewards/chosen": -0.8197323679924011, "rewards/margins": 1.949836254119873, "rewards/rejected": -2.769568920135498, "step": 6139 }, { "epoch": 0.71, "learning_rate": 8.70201960552734e-08, "logits/chosen": -2.249361038208008, "logits/rejected": -2.1344447135925293, "logps/chosen": -377.2974548339844, "logps/rejected": -437.4736633300781, "loss": 0.3119, "rewards/accuracies": 0.75, "rewards/chosen": -0.973794162273407, "rewards/margins": 2.7540817260742188, "rewards/rejected": -3.7278757095336914, "step": 6140 }, { "epoch": 0.71, "learning_rate": 8.698476437935515e-08, "logits/chosen": -2.182097911834717, "logits/rejected": -2.3200480937957764, "logps/chosen": -462.1526794433594, "logps/rejected": -291.4093017578125, "loss": 0.1433, "rewards/accuracies": 1.0, "rewards/chosen": -0.3261638581752777, "rewards/margins": 3.1167874336242676, "rewards/rejected": -3.4429514408111572, "step": 6141 }, { "epoch": 0.71, "learning_rate": 8.694933270343687e-08, "logits/chosen": -1.771376371383667, "logits/rejected": -2.426353931427002, "logps/chosen": -405.4763488769531, "logps/rejected": -176.96792602539062, "loss": 0.2778, "rewards/accuracies": 0.875, "rewards/chosen": -0.40019354224205017, "rewards/margins": 1.604334831237793, "rewards/rejected": -2.004528284072876, "step": 6142 }, { "epoch": 0.71, "learning_rate": 8.69139010275186e-08, "logits/chosen": -2.0422451496124268, "logits/rejected": -1.9427660703659058, "logps/chosen": -290.16351318359375, "logps/rejected": -245.6251220703125, "loss": 0.5169, "rewards/accuracies": 0.875, "rewards/chosen": -0.4202842116355896, "rewards/margins": 1.5211806297302246, "rewards/rejected": -1.9414650201797485, "step": 6143 }, { "epoch": 0.71, "learning_rate": 8.687846935160033e-08, "logits/chosen": -2.257930278778076, "logits/rejected": -2.6794776916503906, "logps/chosen": -336.50933837890625, "logps/rejected": -168.97991943359375, "loss": 0.7564, "rewards/accuracies": 0.625, "rewards/chosen": -0.7911392450332642, "rewards/margins": 0.9203197360038757, "rewards/rejected": -1.7114589214324951, "step": 6144 }, { "epoch": 0.71, "learning_rate": 8.684303767568205e-08, "logits/chosen": -2.2868313789367676, "logits/rejected": -2.3585524559020996, "logps/chosen": -234.69717407226562, "logps/rejected": -232.47018432617188, "loss": 0.4208, "rewards/accuracies": 0.75, "rewards/chosen": -1.042853832244873, "rewards/margins": 1.8289209604263306, "rewards/rejected": -2.871774673461914, "step": 6145 }, { "epoch": 0.71, "learning_rate": 8.680760599976378e-08, "logits/chosen": -2.2574613094329834, "logits/rejected": -2.174962043762207, "logps/chosen": -321.06182861328125, "logps/rejected": -364.7124938964844, "loss": 0.1577, "rewards/accuracies": 1.0, "rewards/chosen": -0.22402261197566986, "rewards/margins": 3.276345729827881, "rewards/rejected": -3.500368595123291, "step": 6146 }, { "epoch": 0.72, "learning_rate": 8.677217432384552e-08, "logits/chosen": -2.818155527114868, "logits/rejected": -2.77358341217041, "logps/chosen": -238.77276611328125, "logps/rejected": -230.65713500976562, "loss": 0.7218, "rewards/accuracies": 0.625, "rewards/chosen": -1.4851832389831543, "rewards/margins": 1.5832417011260986, "rewards/rejected": -3.068425178527832, "step": 6147 }, { "epoch": 0.72, "learning_rate": 8.673674264792725e-08, "logits/chosen": -2.423346519470215, "logits/rejected": -2.671715259552002, "logps/chosen": -216.0159912109375, "logps/rejected": -138.0213623046875, "loss": 0.4761, "rewards/accuracies": 0.625, "rewards/chosen": -0.8891265392303467, "rewards/margins": 1.8276431560516357, "rewards/rejected": -2.7167694568634033, "step": 6148 }, { "epoch": 0.72, "learning_rate": 8.670131097200897e-08, "logits/chosen": -1.7372336387634277, "logits/rejected": -1.9758015871047974, "logps/chosen": -453.142822265625, "logps/rejected": -322.1582946777344, "loss": 0.2735, "rewards/accuracies": 0.75, "rewards/chosen": -0.36735546588897705, "rewards/margins": 2.688380241394043, "rewards/rejected": -3.0557358264923096, "step": 6149 }, { "epoch": 0.72, "learning_rate": 8.66658792960907e-08, "logits/chosen": -2.889728546142578, "logits/rejected": -2.765704393386841, "logps/chosen": -180.3844451904297, "logps/rejected": -193.21685791015625, "loss": 0.2256, "rewards/accuracies": 1.0, "rewards/chosen": -1.2347992658615112, "rewards/margins": 2.1784989833831787, "rewards/rejected": -3.4132981300354004, "step": 6150 }, { "epoch": 0.72, "learning_rate": 8.663044762017243e-08, "logits/chosen": -2.4635672569274902, "logits/rejected": -2.6356940269470215, "logps/chosen": -291.2437438964844, "logps/rejected": -273.0096130371094, "loss": 0.5601, "rewards/accuracies": 0.75, "rewards/chosen": -1.043256163597107, "rewards/margins": 2.6376941204071045, "rewards/rejected": -3.680950164794922, "step": 6151 }, { "epoch": 0.72, "learning_rate": 8.659501594425415e-08, "logits/chosen": -2.1826043128967285, "logits/rejected": -2.427936553955078, "logps/chosen": -492.7127380371094, "logps/rejected": -349.9996643066406, "loss": 0.5329, "rewards/accuracies": 0.625, "rewards/chosen": -1.2952625751495361, "rewards/margins": 0.6753624677658081, "rewards/rejected": -1.9706249237060547, "step": 6152 }, { "epoch": 0.72, "learning_rate": 8.65595842683359e-08, "logits/chosen": -2.2325987815856934, "logits/rejected": -2.2353317737579346, "logps/chosen": -362.5477600097656, "logps/rejected": -251.04669189453125, "loss": 0.3173, "rewards/accuracies": 0.875, "rewards/chosen": -0.376921147108078, "rewards/margins": 1.9942996501922607, "rewards/rejected": -2.371220827102661, "step": 6153 }, { "epoch": 0.72, "learning_rate": 8.652415259241762e-08, "logits/chosen": -1.9101965427398682, "logits/rejected": -1.9166339635849, "logps/chosen": -241.71304321289062, "logps/rejected": -242.64959716796875, "loss": 0.408, "rewards/accuracies": 0.875, "rewards/chosen": -0.8892908096313477, "rewards/margins": 1.5895254611968994, "rewards/rejected": -2.478816032409668, "step": 6154 }, { "epoch": 0.72, "learning_rate": 8.648872091649934e-08, "logits/chosen": -2.627551317214966, "logits/rejected": -2.467507839202881, "logps/chosen": -136.03323364257812, "logps/rejected": -269.40283203125, "loss": 0.4955, "rewards/accuracies": 0.75, "rewards/chosen": -1.202125906944275, "rewards/margins": 1.8675696849822998, "rewards/rejected": -3.069695472717285, "step": 6155 }, { "epoch": 0.72, "learning_rate": 8.645328924058108e-08, "logits/chosen": -1.7970623970031738, "logits/rejected": -1.8942314386367798, "logps/chosen": -164.4700164794922, "logps/rejected": -242.071044921875, "loss": 0.3473, "rewards/accuracies": 0.75, "rewards/chosen": -0.36236006021499634, "rewards/margins": 2.44041109085083, "rewards/rejected": -2.8027710914611816, "step": 6156 }, { "epoch": 0.72, "learning_rate": 8.64178575646628e-08, "logits/chosen": -2.385831594467163, "logits/rejected": -2.25411057472229, "logps/chosen": -559.1810913085938, "logps/rejected": -252.39291381835938, "loss": 0.2477, "rewards/accuracies": 0.875, "rewards/chosen": -0.24882155656814575, "rewards/margins": 2.4359004497528076, "rewards/rejected": -2.684722423553467, "step": 6157 }, { "epoch": 0.72, "learning_rate": 8.638242588874452e-08, "logits/chosen": -2.1215083599090576, "logits/rejected": -2.135525941848755, "logps/chosen": -288.5606994628906, "logps/rejected": -256.0438537597656, "loss": 0.1791, "rewards/accuracies": 0.875, "rewards/chosen": -1.034604549407959, "rewards/margins": 3.9834158420562744, "rewards/rejected": -5.018019676208496, "step": 6158 }, { "epoch": 0.72, "learning_rate": 8.634699421282627e-08, "logits/chosen": -2.3887667655944824, "logits/rejected": -2.6001358032226562, "logps/chosen": -312.2806396484375, "logps/rejected": -182.0302734375, "loss": 0.4372, "rewards/accuracies": 0.75, "rewards/chosen": -0.4884106516838074, "rewards/margins": 1.521395206451416, "rewards/rejected": -2.009805917739868, "step": 6159 }, { "epoch": 0.72, "learning_rate": 8.631156253690799e-08, "logits/chosen": -1.9172754287719727, "logits/rejected": -1.948781967163086, "logps/chosen": -370.1256408691406, "logps/rejected": -278.2213439941406, "loss": 0.5927, "rewards/accuracies": 0.75, "rewards/chosen": -1.2394564151763916, "rewards/margins": 1.520858645439148, "rewards/rejected": -2.76031494140625, "step": 6160 }, { "epoch": 0.72, "learning_rate": 8.627613086098973e-08, "logits/chosen": -2.6870713233947754, "logits/rejected": -2.6699600219726562, "logps/chosen": -211.618896484375, "logps/rejected": -233.21446228027344, "loss": 0.053, "rewards/accuracies": 1.0, "rewards/chosen": -0.7012476325035095, "rewards/margins": 4.089520454406738, "rewards/rejected": -4.790768146514893, "step": 6161 }, { "epoch": 0.72, "learning_rate": 8.624069918507145e-08, "logits/chosen": -1.9824742078781128, "logits/rejected": -1.862900733947754, "logps/chosen": -237.03225708007812, "logps/rejected": -218.35902404785156, "loss": 0.3693, "rewards/accuracies": 0.875, "rewards/chosen": -1.4816663265228271, "rewards/margins": 2.5816895961761475, "rewards/rejected": -4.063355922698975, "step": 6162 }, { "epoch": 0.72, "learning_rate": 8.620526750915317e-08, "logits/chosen": -2.8389625549316406, "logits/rejected": -2.64193058013916, "logps/chosen": -322.6404724121094, "logps/rejected": -311.8883056640625, "loss": 0.3085, "rewards/accuracies": 0.875, "rewards/chosen": -0.7945222854614258, "rewards/margins": 2.6259117126464844, "rewards/rejected": -3.42043399810791, "step": 6163 }, { "epoch": 0.72, "learning_rate": 8.616983583323492e-08, "logits/chosen": -2.212230920791626, "logits/rejected": -1.8447842597961426, "logps/chosen": -185.98309326171875, "logps/rejected": -354.83331298828125, "loss": 0.3154, "rewards/accuracies": 0.875, "rewards/chosen": -0.6632559299468994, "rewards/margins": 1.492000937461853, "rewards/rejected": -2.155256986618042, "step": 6164 }, { "epoch": 0.72, "learning_rate": 8.613440415731664e-08, "logits/chosen": -2.2033283710479736, "logits/rejected": -2.3496227264404297, "logps/chosen": -177.17449951171875, "logps/rejected": -166.38232421875, "loss": 0.4474, "rewards/accuracies": 0.75, "rewards/chosen": -0.25591638684272766, "rewards/margins": 0.778154730796814, "rewards/rejected": -1.0340712070465088, "step": 6165 }, { "epoch": 0.72, "learning_rate": 8.609897248139836e-08, "logits/chosen": -2.310760974884033, "logits/rejected": -2.3604140281677246, "logps/chosen": -173.6068115234375, "logps/rejected": -308.4166564941406, "loss": 0.2243, "rewards/accuracies": 0.875, "rewards/chosen": -0.963188648223877, "rewards/margins": 4.7303643226623535, "rewards/rejected": -5.693553447723389, "step": 6166 }, { "epoch": 0.72, "learning_rate": 8.60635408054801e-08, "logits/chosen": -2.4454691410064697, "logits/rejected": -2.4416043758392334, "logps/chosen": -242.67405700683594, "logps/rejected": -205.78683471679688, "loss": 0.2653, "rewards/accuracies": 0.875, "rewards/chosen": -0.6365424394607544, "rewards/margins": 2.5353024005889893, "rewards/rejected": -3.171844959259033, "step": 6167 }, { "epoch": 0.72, "learning_rate": 8.602810912956182e-08, "logits/chosen": -2.693699836730957, "logits/rejected": -2.5056145191192627, "logps/chosen": -213.38446044921875, "logps/rejected": -264.4823913574219, "loss": 0.4081, "rewards/accuracies": 0.75, "rewards/chosen": -0.6370081305503845, "rewards/margins": 1.6609513759613037, "rewards/rejected": -2.297959566116333, "step": 6168 }, { "epoch": 0.72, "learning_rate": 8.599267745364354e-08, "logits/chosen": -2.4321999549865723, "logits/rejected": -2.5228521823883057, "logps/chosen": -240.80467224121094, "logps/rejected": -300.208984375, "loss": 0.3714, "rewards/accuracies": 0.875, "rewards/chosen": -1.053305745124817, "rewards/margins": 1.3856127262115479, "rewards/rejected": -2.4389185905456543, "step": 6169 }, { "epoch": 0.72, "learning_rate": 8.595724577772529e-08, "logits/chosen": -2.459749221801758, "logits/rejected": -2.7593846321105957, "logps/chosen": -281.6546630859375, "logps/rejected": -257.7071533203125, "loss": 0.4225, "rewards/accuracies": 0.75, "rewards/chosen": -0.7256710529327393, "rewards/margins": 1.9130609035491943, "rewards/rejected": -2.6387319564819336, "step": 6170 }, { "epoch": 0.72, "learning_rate": 8.592181410180701e-08, "logits/chosen": -2.3389015197753906, "logits/rejected": -2.3837218284606934, "logps/chosen": -243.61997985839844, "logps/rejected": -380.61376953125, "loss": 0.3972, "rewards/accuracies": 0.75, "rewards/chosen": -0.3441395163536072, "rewards/margins": 2.917905330657959, "rewards/rejected": -3.262044906616211, "step": 6171 }, { "epoch": 0.72, "learning_rate": 8.588638242588874e-08, "logits/chosen": -2.6372053623199463, "logits/rejected": -2.3704934120178223, "logps/chosen": -254.6194610595703, "logps/rejected": -302.18560791015625, "loss": 0.0917, "rewards/accuracies": 1.0, "rewards/chosen": -0.9449309706687927, "rewards/margins": 4.170022964477539, "rewards/rejected": -5.114953994750977, "step": 6172 }, { "epoch": 0.72, "learning_rate": 8.585095074997047e-08, "logits/chosen": -2.195780038833618, "logits/rejected": -2.344179391860962, "logps/chosen": -363.1256103515625, "logps/rejected": -236.9369354248047, "loss": 0.309, "rewards/accuracies": 0.875, "rewards/chosen": -0.4474216103553772, "rewards/margins": 2.278592348098755, "rewards/rejected": -2.7260138988494873, "step": 6173 }, { "epoch": 0.72, "learning_rate": 8.58155190740522e-08, "logits/chosen": -2.0392343997955322, "logits/rejected": -2.508249282836914, "logps/chosen": -424.61749267578125, "logps/rejected": -268.83563232421875, "loss": 0.3973, "rewards/accuracies": 0.875, "rewards/chosen": -0.5108384490013123, "rewards/margins": 2.0427064895629883, "rewards/rejected": -2.5535449981689453, "step": 6174 }, { "epoch": 0.72, "learning_rate": 8.578008739813392e-08, "logits/chosen": -2.2129507064819336, "logits/rejected": -2.526017665863037, "logps/chosen": -180.93739318847656, "logps/rejected": -169.88795471191406, "loss": 0.543, "rewards/accuracies": 0.625, "rewards/chosen": -0.903856635093689, "rewards/margins": 1.367254376411438, "rewards/rejected": -2.271111249923706, "step": 6175 }, { "epoch": 0.72, "learning_rate": 8.574465572221566e-08, "logits/chosen": -2.4413280487060547, "logits/rejected": -2.4938228130340576, "logps/chosen": -236.22341918945312, "logps/rejected": -188.71583557128906, "loss": 0.3126, "rewards/accuracies": 0.875, "rewards/chosen": -0.6718831658363342, "rewards/margins": 2.6141607761383057, "rewards/rejected": -3.286044120788574, "step": 6176 }, { "epoch": 0.72, "learning_rate": 8.570922404629739e-08, "logits/chosen": -2.324399709701538, "logits/rejected": -2.035539388656616, "logps/chosen": -122.91212463378906, "logps/rejected": -291.3343811035156, "loss": 0.3625, "rewards/accuracies": 0.75, "rewards/chosen": -0.47544822096824646, "rewards/margins": 2.7233357429504395, "rewards/rejected": -3.198784112930298, "step": 6177 }, { "epoch": 0.72, "learning_rate": 8.567379237037912e-08, "logits/chosen": -2.3361992835998535, "logits/rejected": -2.360306978225708, "logps/chosen": -124.30534362792969, "logps/rejected": -183.5393829345703, "loss": 0.546, "rewards/accuracies": 0.875, "rewards/chosen": -0.6026584506034851, "rewards/margins": 1.652522087097168, "rewards/rejected": -2.2551803588867188, "step": 6178 }, { "epoch": 0.72, "learning_rate": 8.563836069446084e-08, "logits/chosen": -2.132028102874756, "logits/rejected": -1.9834133386611938, "logps/chosen": -334.4992370605469, "logps/rejected": -241.6773681640625, "loss": 0.9026, "rewards/accuracies": 0.5, "rewards/chosen": -1.6995103359222412, "rewards/margins": 0.7607282400131226, "rewards/rejected": -2.460238456726074, "step": 6179 }, { "epoch": 0.72, "learning_rate": 8.560292901854257e-08, "logits/chosen": -2.6839652061462402, "logits/rejected": -2.980827808380127, "logps/chosen": -193.55174255371094, "logps/rejected": -251.4596405029297, "loss": 0.3439, "rewards/accuracies": 0.75, "rewards/chosen": -0.809939444065094, "rewards/margins": 2.896678924560547, "rewards/rejected": -3.706618309020996, "step": 6180 }, { "epoch": 0.72, "learning_rate": 8.556749734262429e-08, "logits/chosen": -2.463465452194214, "logits/rejected": -2.464388132095337, "logps/chosen": -333.2839660644531, "logps/rejected": -241.25946044921875, "loss": 0.4707, "rewards/accuracies": 0.875, "rewards/chosen": -0.7414499521255493, "rewards/margins": 1.9656450748443604, "rewards/rejected": -2.707094669342041, "step": 6181 }, { "epoch": 0.72, "learning_rate": 8.553206566670604e-08, "logits/chosen": -2.0876364707946777, "logits/rejected": -2.232290267944336, "logps/chosen": -189.64222717285156, "logps/rejected": -165.8405303955078, "loss": 0.5108, "rewards/accuracies": 0.75, "rewards/chosen": -1.3754830360412598, "rewards/margins": 1.6044104099273682, "rewards/rejected": -2.979893684387207, "step": 6182 }, { "epoch": 0.72, "learning_rate": 8.549663399078776e-08, "logits/chosen": -2.80676007270813, "logits/rejected": -2.7026617527008057, "logps/chosen": -191.81985473632812, "logps/rejected": -202.89183044433594, "loss": 0.2427, "rewards/accuracies": 0.875, "rewards/chosen": -0.497165709733963, "rewards/margins": 3.0778274536132812, "rewards/rejected": -3.574993133544922, "step": 6183 }, { "epoch": 0.72, "learning_rate": 8.54612023148695e-08, "logits/chosen": -2.5514931678771973, "logits/rejected": -2.708113193511963, "logps/chosen": -227.96493530273438, "logps/rejected": -285.8348693847656, "loss": 0.0947, "rewards/accuracies": 1.0, "rewards/chosen": -0.4490073025226593, "rewards/margins": 3.7989754676818848, "rewards/rejected": -4.247982978820801, "step": 6184 }, { "epoch": 0.72, "learning_rate": 8.542577063895122e-08, "logits/chosen": -2.4027223587036133, "logits/rejected": -2.1411232948303223, "logps/chosen": -353.6353759765625, "logps/rejected": -471.2122802734375, "loss": 0.2714, "rewards/accuracies": 0.875, "rewards/chosen": -0.43692782521247864, "rewards/margins": 1.8715507984161377, "rewards/rejected": -2.308478593826294, "step": 6185 }, { "epoch": 0.72, "learning_rate": 8.539033896303294e-08, "logits/chosen": -2.835728168487549, "logits/rejected": -2.8514132499694824, "logps/chosen": -64.1361083984375, "logps/rejected": -276.1983642578125, "loss": 0.0964, "rewards/accuracies": 1.0, "rewards/chosen": -0.3603079915046692, "rewards/margins": 4.898098945617676, "rewards/rejected": -5.2584075927734375, "step": 6186 }, { "epoch": 0.72, "learning_rate": 8.535490728711467e-08, "logits/chosen": -2.5773115158081055, "logits/rejected": -2.3103737831115723, "logps/chosen": -194.20513916015625, "logps/rejected": -396.41119384765625, "loss": 0.2381, "rewards/accuracies": 0.875, "rewards/chosen": -1.0636407136917114, "rewards/margins": 2.9007060527801514, "rewards/rejected": -3.9643468856811523, "step": 6187 }, { "epoch": 0.72, "learning_rate": 8.531947561119641e-08, "logits/chosen": -2.0861454010009766, "logits/rejected": -2.1122045516967773, "logps/chosen": -198.61102294921875, "logps/rejected": -252.63970947265625, "loss": 0.0867, "rewards/accuracies": 1.0, "rewards/chosen": -0.6557865142822266, "rewards/margins": 3.877220392227173, "rewards/rejected": -4.53300666809082, "step": 6188 }, { "epoch": 0.72, "learning_rate": 8.528404393527813e-08, "logits/chosen": -2.6571435928344727, "logits/rejected": -2.651176929473877, "logps/chosen": -163.44204711914062, "logps/rejected": -267.6033630371094, "loss": 0.1642, "rewards/accuracies": 1.0, "rewards/chosen": -0.5860204696655273, "rewards/margins": 2.4400436878204346, "rewards/rejected": -3.026064395904541, "step": 6189 }, { "epoch": 0.72, "learning_rate": 8.524861225935987e-08, "logits/chosen": -2.0719962120056152, "logits/rejected": -2.2410714626312256, "logps/chosen": -405.42620849609375, "logps/rejected": -254.29638671875, "loss": 0.555, "rewards/accuracies": 0.75, "rewards/chosen": -1.026648998260498, "rewards/margins": 1.6360039710998535, "rewards/rejected": -2.6626529693603516, "step": 6190 }, { "epoch": 0.72, "learning_rate": 8.521318058344159e-08, "logits/chosen": -2.004477024078369, "logits/rejected": -1.934449553489685, "logps/chosen": -261.0602722167969, "logps/rejected": -281.9028625488281, "loss": 0.3347, "rewards/accuracies": 0.75, "rewards/chosen": -0.20662790536880493, "rewards/margins": 1.9031565189361572, "rewards/rejected": -2.1097846031188965, "step": 6191 }, { "epoch": 0.72, "learning_rate": 8.517774890752331e-08, "logits/chosen": -2.9406423568725586, "logits/rejected": -2.7553324699401855, "logps/chosen": -264.8376159667969, "logps/rejected": -267.23419189453125, "loss": 0.2596, "rewards/accuracies": 0.875, "rewards/chosen": -1.4856548309326172, "rewards/margins": 2.628995418548584, "rewards/rejected": -4.114650249481201, "step": 6192 }, { "epoch": 0.72, "learning_rate": 8.514231723160505e-08, "logits/chosen": -1.9721488952636719, "logits/rejected": -2.162766933441162, "logps/chosen": -454.4625549316406, "logps/rejected": -344.04058837890625, "loss": 0.3449, "rewards/accuracies": 0.875, "rewards/chosen": -1.3118884563446045, "rewards/margins": 1.383805751800537, "rewards/rejected": -2.6956942081451416, "step": 6193 }, { "epoch": 0.72, "learning_rate": 8.510688555568678e-08, "logits/chosen": -2.573375701904297, "logits/rejected": -2.389963150024414, "logps/chosen": -236.73129272460938, "logps/rejected": -338.0162658691406, "loss": 0.1066, "rewards/accuracies": 1.0, "rewards/chosen": -1.2645387649536133, "rewards/margins": 3.9866623878479004, "rewards/rejected": -5.2512006759643555, "step": 6194 }, { "epoch": 0.72, "learning_rate": 8.507145387976852e-08, "logits/chosen": -2.3015005588531494, "logits/rejected": -2.458374500274658, "logps/chosen": -264.60552978515625, "logps/rejected": -301.0356140136719, "loss": 0.3991, "rewards/accuracies": 0.875, "rewards/chosen": -0.2469845414161682, "rewards/margins": 2.1689815521240234, "rewards/rejected": -2.415966033935547, "step": 6195 }, { "epoch": 0.72, "learning_rate": 8.503602220385024e-08, "logits/chosen": -2.38104510307312, "logits/rejected": -2.7435553073883057, "logps/chosen": -269.9562072753906, "logps/rejected": -224.93417358398438, "loss": 0.1072, "rewards/accuracies": 1.0, "rewards/chosen": -0.18478454649448395, "rewards/margins": 3.2650461196899414, "rewards/rejected": -3.4498305320739746, "step": 6196 }, { "epoch": 0.72, "learning_rate": 8.500059052793196e-08, "logits/chosen": -2.415226936340332, "logits/rejected": -2.386878252029419, "logps/chosen": -172.85328674316406, "logps/rejected": -237.433349609375, "loss": 0.2787, "rewards/accuracies": 0.875, "rewards/chosen": -0.4820743799209595, "rewards/margins": 2.8432388305664062, "rewards/rejected": -3.3253135681152344, "step": 6197 }, { "epoch": 0.72, "learning_rate": 8.49651588520137e-08, "logits/chosen": -1.7650117874145508, "logits/rejected": -1.920945644378662, "logps/chosen": -207.30711364746094, "logps/rejected": -169.3771209716797, "loss": 0.7708, "rewards/accuracies": 0.625, "rewards/chosen": -0.5649362802505493, "rewards/margins": 0.559837818145752, "rewards/rejected": -1.1247740983963013, "step": 6198 }, { "epoch": 0.72, "learning_rate": 8.492972717609543e-08, "logits/chosen": -2.414623737335205, "logits/rejected": -2.37878680229187, "logps/chosen": -193.81712341308594, "logps/rejected": -129.8374481201172, "loss": 0.3786, "rewards/accuracies": 0.875, "rewards/chosen": -0.14342671632766724, "rewards/margins": 1.7765001058578491, "rewards/rejected": -1.9199268817901611, "step": 6199 }, { "epoch": 0.72, "learning_rate": 8.489429550017715e-08, "logits/chosen": -2.040703535079956, "logits/rejected": -2.2989096641540527, "logps/chosen": -352.1123046875, "logps/rejected": -280.35595703125, "loss": 0.287, "rewards/accuracies": 0.875, "rewards/chosen": -0.8461208343505859, "rewards/margins": 2.6924686431884766, "rewards/rejected": -3.5385897159576416, "step": 6200 }, { "epoch": 0.72, "learning_rate": 8.485886382425889e-08, "logits/chosen": -2.19221830368042, "logits/rejected": -2.2637648582458496, "logps/chosen": -507.0249328613281, "logps/rejected": -407.64886474609375, "loss": 0.2373, "rewards/accuracies": 0.875, "rewards/chosen": -0.9982216358184814, "rewards/margins": 2.1928834915161133, "rewards/rejected": -3.191105365753174, "step": 6201 }, { "epoch": 0.72, "learning_rate": 8.482343214834061e-08, "logits/chosen": -1.9008351564407349, "logits/rejected": -1.8096072673797607, "logps/chosen": -224.05242919921875, "logps/rejected": -299.04852294921875, "loss": 0.2414, "rewards/accuracies": 0.875, "rewards/chosen": -1.5479227304458618, "rewards/margins": 3.027313232421875, "rewards/rejected": -4.5752363204956055, "step": 6202 }, { "epoch": 0.72, "learning_rate": 8.478800047242233e-08, "logits/chosen": -2.5383598804473877, "logits/rejected": -2.5531089305877686, "logps/chosen": -468.4386291503906, "logps/rejected": -336.1687316894531, "loss": 0.0866, "rewards/accuracies": 1.0, "rewards/chosen": -0.21443511545658112, "rewards/margins": 2.9762144088745117, "rewards/rejected": -3.1906492710113525, "step": 6203 }, { "epoch": 0.72, "learning_rate": 8.475256879650407e-08, "logits/chosen": -2.44520902633667, "logits/rejected": -2.201347589492798, "logps/chosen": -261.1496887207031, "logps/rejected": -446.5155944824219, "loss": 0.5358, "rewards/accuracies": 0.875, "rewards/chosen": -0.2741830348968506, "rewards/margins": 0.8823078274726868, "rewards/rejected": -1.1564908027648926, "step": 6204 }, { "epoch": 0.72, "learning_rate": 8.47171371205858e-08, "logits/chosen": -2.526021957397461, "logits/rejected": -2.67010498046875, "logps/chosen": -203.3842315673828, "logps/rejected": -164.10516357421875, "loss": 0.5899, "rewards/accuracies": 0.625, "rewards/chosen": -1.4620704650878906, "rewards/margins": 0.48228365182876587, "rewards/rejected": -1.9443540573120117, "step": 6205 }, { "epoch": 0.72, "learning_rate": 8.468170544466754e-08, "logits/chosen": -2.526762008666992, "logits/rejected": -2.3130388259887695, "logps/chosen": -234.00985717773438, "logps/rejected": -237.1852264404297, "loss": 0.3469, "rewards/accuracies": 0.875, "rewards/chosen": -1.259589672088623, "rewards/margins": 1.7542468309402466, "rewards/rejected": -3.013836622238159, "step": 6206 }, { "epoch": 0.72, "learning_rate": 8.464627376874926e-08, "logits/chosen": -2.032775640487671, "logits/rejected": -2.3475842475891113, "logps/chosen": -333.972900390625, "logps/rejected": -221.43685913085938, "loss": 0.9185, "rewards/accuracies": 0.75, "rewards/chosen": -0.8068813681602478, "rewards/margins": 2.1693239212036133, "rewards/rejected": -2.9762051105499268, "step": 6207 }, { "epoch": 0.72, "learning_rate": 8.461084209283098e-08, "logits/chosen": -2.5879197120666504, "logits/rejected": -2.6478116512298584, "logps/chosen": -233.4046173095703, "logps/rejected": -133.99044799804688, "loss": 0.4864, "rewards/accuracies": 0.75, "rewards/chosen": -0.40472841262817383, "rewards/margins": 1.106684684753418, "rewards/rejected": -1.5114130973815918, "step": 6208 }, { "epoch": 0.72, "learning_rate": 8.45754104169127e-08, "logits/chosen": -2.888587236404419, "logits/rejected": -2.8596105575561523, "logps/chosen": -227.11813354492188, "logps/rejected": -233.48587036132812, "loss": 0.3597, "rewards/accuracies": 0.75, "rewards/chosen": -0.4482610523700714, "rewards/margins": 2.368706703186035, "rewards/rejected": -2.8169679641723633, "step": 6209 }, { "epoch": 0.72, "learning_rate": 8.453997874099444e-08, "logits/chosen": -2.0012283325195312, "logits/rejected": -2.011317729949951, "logps/chosen": -271.8927307128906, "logps/rejected": -204.44851684570312, "loss": 1.1301, "rewards/accuracies": 0.75, "rewards/chosen": -1.8087866306304932, "rewards/margins": 0.46790850162506104, "rewards/rejected": -2.2766952514648438, "step": 6210 }, { "epoch": 0.72, "learning_rate": 8.450454706507618e-08, "logits/chosen": -2.0724267959594727, "logits/rejected": -2.040477752685547, "logps/chosen": -328.2983093261719, "logps/rejected": -352.61566162109375, "loss": 0.266, "rewards/accuracies": 0.875, "rewards/chosen": 0.07237634062767029, "rewards/margins": 2.3381383419036865, "rewards/rejected": -2.2657618522644043, "step": 6211 }, { "epoch": 0.72, "learning_rate": 8.446911538915791e-08, "logits/chosen": -2.006709575653076, "logits/rejected": -1.9986475706100464, "logps/chosen": -373.9992980957031, "logps/rejected": -456.7115478515625, "loss": 0.3627, "rewards/accuracies": 0.875, "rewards/chosen": -0.1275596171617508, "rewards/margins": 2.6488778591156006, "rewards/rejected": -2.776437282562256, "step": 6212 }, { "epoch": 0.72, "learning_rate": 8.443368371323963e-08, "logits/chosen": -2.3259706497192383, "logits/rejected": -2.128541946411133, "logps/chosen": -161.970703125, "logps/rejected": -213.26870727539062, "loss": 0.1886, "rewards/accuracies": 0.875, "rewards/chosen": -0.5905094742774963, "rewards/margins": 2.942847490310669, "rewards/rejected": -3.5333569049835205, "step": 6213 }, { "epoch": 0.72, "learning_rate": 8.439825203732136e-08, "logits/chosen": -1.8114686012268066, "logits/rejected": -2.1918437480926514, "logps/chosen": -260.6184387207031, "logps/rejected": -229.102783203125, "loss": 0.4497, "rewards/accuracies": 0.75, "rewards/chosen": -0.6389410495758057, "rewards/margins": 1.9950064420700073, "rewards/rejected": -2.6339476108551025, "step": 6214 }, { "epoch": 0.72, "learning_rate": 8.436282036140309e-08, "logits/chosen": -2.5514657497406006, "logits/rejected": -2.207322359085083, "logps/chosen": -304.41595458984375, "logps/rejected": -361.548095703125, "loss": 0.1953, "rewards/accuracies": 0.875, "rewards/chosen": -1.1112205982208252, "rewards/margins": 4.914114952087402, "rewards/rejected": -6.025335311889648, "step": 6215 }, { "epoch": 0.72, "learning_rate": 8.432738868548481e-08, "logits/chosen": -2.446741819381714, "logits/rejected": -2.650381565093994, "logps/chosen": -379.11114501953125, "logps/rejected": -306.677978515625, "loss": 0.6567, "rewards/accuracies": 0.625, "rewards/chosen": -1.9503588676452637, "rewards/margins": 1.070216417312622, "rewards/rejected": -3.0205752849578857, "step": 6216 }, { "epoch": 0.72, "learning_rate": 8.429195700956655e-08, "logits/chosen": -2.6395018100738525, "logits/rejected": -2.451396942138672, "logps/chosen": -56.987632751464844, "logps/rejected": -223.51284790039062, "loss": 0.2781, "rewards/accuracies": 0.75, "rewards/chosen": -0.5905852913856506, "rewards/margins": 3.0262582302093506, "rewards/rejected": -3.6168434619903564, "step": 6217 }, { "epoch": 0.72, "learning_rate": 8.425652533364829e-08, "logits/chosen": -2.2031772136688232, "logits/rejected": -2.522761821746826, "logps/chosen": -279.779541015625, "logps/rejected": -201.77590942382812, "loss": 0.6028, "rewards/accuracies": 0.625, "rewards/chosen": -0.8779282569885254, "rewards/margins": 1.0375052690505981, "rewards/rejected": -1.9154332876205444, "step": 6218 }, { "epoch": 0.72, "learning_rate": 8.422109365773001e-08, "logits/chosen": -1.8250367641448975, "logits/rejected": -1.8257873058319092, "logps/chosen": -225.02142333984375, "logps/rejected": -212.8092803955078, "loss": 0.447, "rewards/accuracies": 0.625, "rewards/chosen": -0.5590106844902039, "rewards/margins": 1.4504485130310059, "rewards/rejected": -2.0094590187072754, "step": 6219 }, { "epoch": 0.72, "learning_rate": 8.418566198181173e-08, "logits/chosen": -1.9522621631622314, "logits/rejected": -2.305051326751709, "logps/chosen": -309.0178527832031, "logps/rejected": -214.41786193847656, "loss": 0.2315, "rewards/accuracies": 1.0, "rewards/chosen": -0.5981003046035767, "rewards/margins": 1.6721316576004028, "rewards/rejected": -2.2702319622039795, "step": 6220 }, { "epoch": 0.72, "learning_rate": 8.415023030589346e-08, "logits/chosen": -1.8973274230957031, "logits/rejected": -2.44476056098938, "logps/chosen": -425.1492614746094, "logps/rejected": -194.3166961669922, "loss": 0.5543, "rewards/accuracies": 0.625, "rewards/chosen": -0.6609974503517151, "rewards/margins": 0.44252222776412964, "rewards/rejected": -1.1035196781158447, "step": 6221 }, { "epoch": 0.72, "learning_rate": 8.411479862997519e-08, "logits/chosen": -2.379875898361206, "logits/rejected": -2.364088535308838, "logps/chosen": -273.5645446777344, "logps/rejected": -248.3965301513672, "loss": 0.2128, "rewards/accuracies": 0.875, "rewards/chosen": -0.5945512056350708, "rewards/margins": 2.2761082649230957, "rewards/rejected": -2.870659351348877, "step": 6222 }, { "epoch": 0.72, "learning_rate": 8.407936695405694e-08, "logits/chosen": -2.43648099899292, "logits/rejected": -2.3652286529541016, "logps/chosen": -269.4208984375, "logps/rejected": -221.37271118164062, "loss": 0.7281, "rewards/accuracies": 0.75, "rewards/chosen": -1.2312710285186768, "rewards/margins": 1.2549548149108887, "rewards/rejected": -2.4862256050109863, "step": 6223 }, { "epoch": 0.72, "learning_rate": 8.404393527813866e-08, "logits/chosen": -2.913649082183838, "logits/rejected": -2.9290053844451904, "logps/chosen": -585.7796020507812, "logps/rejected": -271.4493408203125, "loss": 0.25, "rewards/accuracies": 1.0, "rewards/chosen": -0.985693097114563, "rewards/margins": 1.9941980838775635, "rewards/rejected": -2.979891061782837, "step": 6224 }, { "epoch": 0.72, "learning_rate": 8.400850360222038e-08, "logits/chosen": -2.622750997543335, "logits/rejected": -2.7329254150390625, "logps/chosen": -221.3226318359375, "logps/rejected": -232.8501434326172, "loss": 0.1827, "rewards/accuracies": 0.875, "rewards/chosen": -0.47570037841796875, "rewards/margins": 2.9860920906066895, "rewards/rejected": -3.461792469024658, "step": 6225 }, { "epoch": 0.72, "learning_rate": 8.39730719263021e-08, "logits/chosen": -2.4836740493774414, "logits/rejected": -2.277036666870117, "logps/chosen": -216.7913360595703, "logps/rejected": -347.1855163574219, "loss": 0.239, "rewards/accuracies": 0.875, "rewards/chosen": -0.022188007831573486, "rewards/margins": 2.173504590988159, "rewards/rejected": -2.195692777633667, "step": 6226 }, { "epoch": 0.72, "learning_rate": 8.393764025038384e-08, "logits/chosen": -2.93351411819458, "logits/rejected": -2.915302276611328, "logps/chosen": -279.9453125, "logps/rejected": -232.0118408203125, "loss": 0.2597, "rewards/accuracies": 0.875, "rewards/chosen": -1.0144299268722534, "rewards/margins": 1.9939179420471191, "rewards/rejected": -3.008347988128662, "step": 6227 }, { "epoch": 0.72, "learning_rate": 8.390220857446556e-08, "logits/chosen": -2.5109243392944336, "logits/rejected": -2.519717216491699, "logps/chosen": -307.77008056640625, "logps/rejected": -269.854736328125, "loss": 0.4955, "rewards/accuracies": 0.875, "rewards/chosen": -0.6725038886070251, "rewards/margins": 1.921865701675415, "rewards/rejected": -2.594369411468506, "step": 6228 }, { "epoch": 0.72, "learning_rate": 8.386677689854731e-08, "logits/chosen": -2.0238726139068604, "logits/rejected": -2.059744358062744, "logps/chosen": -171.95297241210938, "logps/rejected": -258.15911865234375, "loss": 0.3455, "rewards/accuracies": 0.875, "rewards/chosen": -1.2111319303512573, "rewards/margins": 2.3702280521392822, "rewards/rejected": -3.58135986328125, "step": 6229 }, { "epoch": 0.72, "learning_rate": 8.383134522262903e-08, "logits/chosen": -1.8444111347198486, "logits/rejected": -2.151204824447632, "logps/chosen": -518.5436401367188, "logps/rejected": -318.41326904296875, "loss": 0.2865, "rewards/accuracies": 1.0, "rewards/chosen": -1.6902800798416138, "rewards/margins": 1.8736826181411743, "rewards/rejected": -3.563962697982788, "step": 6230 }, { "epoch": 0.72, "learning_rate": 8.379591354671075e-08, "logits/chosen": -2.206238269805908, "logits/rejected": -2.2179579734802246, "logps/chosen": -339.2690124511719, "logps/rejected": -312.040283203125, "loss": 0.1921, "rewards/accuracies": 0.875, "rewards/chosen": -0.7065054774284363, "rewards/margins": 3.4422428607940674, "rewards/rejected": -4.148748397827148, "step": 6231 }, { "epoch": 0.72, "learning_rate": 8.376048187079249e-08, "logits/chosen": -2.38478946685791, "logits/rejected": -2.216792345046997, "logps/chosen": -258.33447265625, "logps/rejected": -276.9412841796875, "loss": 0.3868, "rewards/accuracies": 0.875, "rewards/chosen": -0.9541309475898743, "rewards/margins": 1.157449722290039, "rewards/rejected": -2.1115808486938477, "step": 6232 }, { "epoch": 0.73, "learning_rate": 8.372505019487421e-08, "logits/chosen": -2.0269575119018555, "logits/rejected": -1.791346788406372, "logps/chosen": -203.01148986816406, "logps/rejected": -294.86224365234375, "loss": 0.1588, "rewards/accuracies": 1.0, "rewards/chosen": -0.6437211632728577, "rewards/margins": 3.725830554962158, "rewards/rejected": -4.369551658630371, "step": 6233 }, { "epoch": 0.73, "learning_rate": 8.368961851895595e-08, "logits/chosen": -2.8475847244262695, "logits/rejected": -2.7001724243164062, "logps/chosen": -196.25662231445312, "logps/rejected": -149.90695190429688, "loss": 0.1622, "rewards/accuracies": 1.0, "rewards/chosen": -0.2663803696632385, "rewards/margins": 2.360764503479004, "rewards/rejected": -2.6271448135375977, "step": 6234 }, { "epoch": 0.73, "learning_rate": 8.365418684303768e-08, "logits/chosen": -2.278712272644043, "logits/rejected": -2.6379964351654053, "logps/chosen": -382.8663330078125, "logps/rejected": -247.62770080566406, "loss": 0.3212, "rewards/accuracies": 0.75, "rewards/chosen": -1.319392442703247, "rewards/margins": 1.6596360206604004, "rewards/rejected": -2.9790284633636475, "step": 6235 }, { "epoch": 0.73, "learning_rate": 8.36187551671194e-08, "logits/chosen": -2.6201014518737793, "logits/rejected": -2.6098618507385254, "logps/chosen": -304.2347412109375, "logps/rejected": -365.8212890625, "loss": 0.2575, "rewards/accuracies": 0.875, "rewards/chosen": -1.4426062107086182, "rewards/margins": 3.0042455196380615, "rewards/rejected": -4.44685173034668, "step": 6236 }, { "epoch": 0.73, "learning_rate": 8.358332349120112e-08, "logits/chosen": -2.079807758331299, "logits/rejected": -1.887887716293335, "logps/chosen": -139.32276916503906, "logps/rejected": -202.21681213378906, "loss": 0.3087, "rewards/accuracies": 0.875, "rewards/chosen": -1.1958633661270142, "rewards/margins": 1.9506926536560059, "rewards/rejected": -3.1465561389923096, "step": 6237 }, { "epoch": 0.73, "learning_rate": 8.354789181528286e-08, "logits/chosen": -1.8903127908706665, "logits/rejected": -2.2764787673950195, "logps/chosen": -347.53826904296875, "logps/rejected": -219.7482452392578, "loss": 0.3216, "rewards/accuracies": 0.875, "rewards/chosen": -0.5685063004493713, "rewards/margins": 2.0581140518188477, "rewards/rejected": -2.6266207695007324, "step": 6238 }, { "epoch": 0.73, "learning_rate": 8.351246013936458e-08, "logits/chosen": -2.054142475128174, "logits/rejected": -2.029484272003174, "logps/chosen": -263.55535888671875, "logps/rejected": -320.9850769042969, "loss": 0.2719, "rewards/accuracies": 0.875, "rewards/chosen": -1.6472530364990234, "rewards/margins": 3.1822195053100586, "rewards/rejected": -4.829472541809082, "step": 6239 }, { "epoch": 0.73, "learning_rate": 8.347702846344633e-08, "logits/chosen": -1.9950740337371826, "logits/rejected": -2.1135499477386475, "logps/chosen": -308.297607421875, "logps/rejected": -232.7818145751953, "loss": 0.5222, "rewards/accuracies": 0.625, "rewards/chosen": -0.5112283229827881, "rewards/margins": 1.7710139751434326, "rewards/rejected": -2.2822420597076416, "step": 6240 }, { "epoch": 0.73, "learning_rate": 8.344159678752805e-08, "logits/chosen": -2.441589117050171, "logits/rejected": -2.3939151763916016, "logps/chosen": -240.2314453125, "logps/rejected": -345.9123840332031, "loss": 0.2578, "rewards/accuracies": 0.875, "rewards/chosen": -0.8331272602081299, "rewards/margins": 1.7683361768722534, "rewards/rejected": -2.6014633178710938, "step": 6241 }, { "epoch": 0.73, "learning_rate": 8.340616511160977e-08, "logits/chosen": -2.821415424346924, "logits/rejected": -2.531061887741089, "logps/chosen": -101.25201416015625, "logps/rejected": -171.59292602539062, "loss": 0.246, "rewards/accuracies": 0.875, "rewards/chosen": -1.3780596256256104, "rewards/margins": 2.818971633911133, "rewards/rejected": -4.197031021118164, "step": 6242 }, { "epoch": 0.73, "learning_rate": 8.337073343569151e-08, "logits/chosen": -2.843327522277832, "logits/rejected": -2.7762293815612793, "logps/chosen": -219.25865173339844, "logps/rejected": -268.79473876953125, "loss": 0.1159, "rewards/accuracies": 1.0, "rewards/chosen": -0.8540209531784058, "rewards/margins": 3.6852879524230957, "rewards/rejected": -4.539308547973633, "step": 6243 }, { "epoch": 0.73, "learning_rate": 8.333530175977323e-08, "logits/chosen": -2.2756004333496094, "logits/rejected": -2.0726325511932373, "logps/chosen": -187.93846130371094, "logps/rejected": -347.4775390625, "loss": 0.4215, "rewards/accuracies": 0.625, "rewards/chosen": -0.37550094723701477, "rewards/margins": 4.324132919311523, "rewards/rejected": -4.699634552001953, "step": 6244 }, { "epoch": 0.73, "learning_rate": 8.329987008385495e-08, "logits/chosen": -2.638855457305908, "logits/rejected": -2.652555465698242, "logps/chosen": -268.8465881347656, "logps/rejected": -285.71441650390625, "loss": 0.3067, "rewards/accuracies": 0.875, "rewards/chosen": -0.5621377229690552, "rewards/margins": 2.5597269535064697, "rewards/rejected": -3.1218647956848145, "step": 6245 }, { "epoch": 0.73, "learning_rate": 8.32644384079367e-08, "logits/chosen": -2.724247932434082, "logits/rejected": -2.566775321960449, "logps/chosen": -124.0853500366211, "logps/rejected": -242.38612365722656, "loss": 0.3282, "rewards/accuracies": 0.875, "rewards/chosen": -0.2946052551269531, "rewards/margins": 1.3789558410644531, "rewards/rejected": -1.6735610961914062, "step": 6246 }, { "epoch": 0.73, "learning_rate": 8.322900673201843e-08, "logits/chosen": -2.37001633644104, "logits/rejected": -2.5617761611938477, "logps/chosen": -380.3348693847656, "logps/rejected": -287.5806884765625, "loss": 0.1129, "rewards/accuracies": 1.0, "rewards/chosen": -0.4337380528450012, "rewards/margins": 3.596730947494507, "rewards/rejected": -4.030468940734863, "step": 6247 }, { "epoch": 0.73, "learning_rate": 8.319357505610015e-08, "logits/chosen": -2.4132156372070312, "logits/rejected": -2.8407673835754395, "logps/chosen": -441.3544616699219, "logps/rejected": -207.17843627929688, "loss": 0.5126, "rewards/accuracies": 0.625, "rewards/chosen": -0.9827101230621338, "rewards/margins": 0.8773732781410217, "rewards/rejected": -1.8600834608078003, "step": 6248 }, { "epoch": 0.73, "learning_rate": 8.315814338018188e-08, "logits/chosen": -2.1347768306732178, "logits/rejected": -2.492623805999756, "logps/chosen": -332.7708435058594, "logps/rejected": -406.6116638183594, "loss": 0.12, "rewards/accuracies": 1.0, "rewards/chosen": -0.13036808371543884, "rewards/margins": 2.984086036682129, "rewards/rejected": -3.1144542694091797, "step": 6249 }, { "epoch": 0.73, "learning_rate": 8.31227117042636e-08, "logits/chosen": -1.7121227979660034, "logits/rejected": -1.5564534664154053, "logps/chosen": -450.9198303222656, "logps/rejected": -467.85968017578125, "loss": 0.5187, "rewards/accuracies": 0.625, "rewards/chosen": -1.4625214338302612, "rewards/margins": 1.4492309093475342, "rewards/rejected": -2.911752223968506, "step": 6250 }, { "epoch": 0.73, "learning_rate": 8.308728002834533e-08, "logits/chosen": -2.6764185428619385, "logits/rejected": -2.6374990940093994, "logps/chosen": -161.54757690429688, "logps/rejected": -190.63674926757812, "loss": 0.4942, "rewards/accuracies": 0.625, "rewards/chosen": -0.5977938175201416, "rewards/margins": 1.2090122699737549, "rewards/rejected": -1.8068060874938965, "step": 6251 }, { "epoch": 0.73, "learning_rate": 8.305184835242708e-08, "logits/chosen": -2.0300803184509277, "logits/rejected": -2.270563840866089, "logps/chosen": -278.9227294921875, "logps/rejected": -181.25901794433594, "loss": 0.6181, "rewards/accuracies": 0.75, "rewards/chosen": -1.1741971969604492, "rewards/margins": 0.5925653576850891, "rewards/rejected": -1.7667627334594727, "step": 6252 }, { "epoch": 0.73, "learning_rate": 8.30164166765088e-08, "logits/chosen": -2.5598561763763428, "logits/rejected": -2.661360025405884, "logps/chosen": -161.10269165039062, "logps/rejected": -217.74209594726562, "loss": 0.5598, "rewards/accuracies": 0.5, "rewards/chosen": -0.7750529646873474, "rewards/margins": 1.4126702547073364, "rewards/rejected": -2.187723159790039, "step": 6253 }, { "epoch": 0.73, "learning_rate": 8.298098500059052e-08, "logits/chosen": -2.4177656173706055, "logits/rejected": -2.515322208404541, "logps/chosen": -291.6696472167969, "logps/rejected": -263.458984375, "loss": 0.481, "rewards/accuracies": 0.625, "rewards/chosen": -0.5502617359161377, "rewards/margins": 2.1220645904541016, "rewards/rejected": -2.67232608795166, "step": 6254 }, { "epoch": 0.73, "learning_rate": 8.294555332467226e-08, "logits/chosen": -2.6189582347869873, "logits/rejected": -2.394188642501831, "logps/chosen": -251.34561157226562, "logps/rejected": -287.58441162109375, "loss": 0.3661, "rewards/accuracies": 0.75, "rewards/chosen": -0.8734403252601624, "rewards/margins": 2.77573299407959, "rewards/rejected": -3.6491732597351074, "step": 6255 }, { "epoch": 0.73, "learning_rate": 8.291012164875398e-08, "logits/chosen": -1.9054627418518066, "logits/rejected": -2.2309091091156006, "logps/chosen": -531.7499389648438, "logps/rejected": -303.321044921875, "loss": 0.1626, "rewards/accuracies": 0.875, "rewards/chosen": -0.4840483069419861, "rewards/margins": 3.39410400390625, "rewards/rejected": -3.8781521320343018, "step": 6256 }, { "epoch": 0.73, "learning_rate": 8.28746899728357e-08, "logits/chosen": -2.6862878799438477, "logits/rejected": -2.6782002449035645, "logps/chosen": -270.00982666015625, "logps/rejected": -272.99383544921875, "loss": 0.6655, "rewards/accuracies": 0.625, "rewards/chosen": -1.0199124813079834, "rewards/margins": 1.1365556716918945, "rewards/rejected": -2.156468391418457, "step": 6257 }, { "epoch": 0.73, "learning_rate": 8.283925829691745e-08, "logits/chosen": -2.495436906814575, "logits/rejected": -2.288783311843872, "logps/chosen": -95.98995971679688, "logps/rejected": -97.82820892333984, "loss": 0.8575, "rewards/accuracies": 0.75, "rewards/chosen": -1.8888918161392212, "rewards/margins": 1.1619963645935059, "rewards/rejected": -3.0508880615234375, "step": 6258 }, { "epoch": 0.73, "learning_rate": 8.280382662099917e-08, "logits/chosen": -2.4147067070007324, "logits/rejected": -2.499863624572754, "logps/chosen": -407.50335693359375, "logps/rejected": -226.576416015625, "loss": 0.5584, "rewards/accuracies": 0.75, "rewards/chosen": -0.8907994031906128, "rewards/margins": 1.901977777481079, "rewards/rejected": -2.7927770614624023, "step": 6259 }, { "epoch": 0.73, "learning_rate": 8.27683949450809e-08, "logits/chosen": -2.364745616912842, "logits/rejected": -2.2371985912323, "logps/chosen": -294.4615783691406, "logps/rejected": -319.8312072753906, "loss": 0.6786, "rewards/accuracies": 0.5, "rewards/chosen": -0.8878707885742188, "rewards/margins": 0.9721347093582153, "rewards/rejected": -1.8600056171417236, "step": 6260 }, { "epoch": 0.73, "learning_rate": 8.273296326916263e-08, "logits/chosen": -2.6869781017303467, "logits/rejected": -2.4125561714172363, "logps/chosen": -265.02716064453125, "logps/rejected": -315.237060546875, "loss": 0.3384, "rewards/accuracies": 0.875, "rewards/chosen": -0.9781250357627869, "rewards/margins": 2.018409013748169, "rewards/rejected": -2.9965338706970215, "step": 6261 }, { "epoch": 0.73, "learning_rate": 8.269753159324435e-08, "logits/chosen": -2.641075849533081, "logits/rejected": -2.4539215564727783, "logps/chosen": -237.93077087402344, "logps/rejected": -291.9406433105469, "loss": 0.6089, "rewards/accuracies": 0.5, "rewards/chosen": -0.7139095664024353, "rewards/margins": 0.743086040019989, "rewards/rejected": -1.4569956064224243, "step": 6262 }, { "epoch": 0.73, "learning_rate": 8.266209991732607e-08, "logits/chosen": -2.105607271194458, "logits/rejected": -1.717482566833496, "logps/chosen": -279.1279296875, "logps/rejected": -476.36773681640625, "loss": 0.573, "rewards/accuracies": 0.875, "rewards/chosen": -0.9925141334533691, "rewards/margins": 1.4825098514556885, "rewards/rejected": -2.4750242233276367, "step": 6263 }, { "epoch": 0.73, "learning_rate": 8.262666824140782e-08, "logits/chosen": -2.358150005340576, "logits/rejected": -2.7737839221954346, "logps/chosen": -498.6929931640625, "logps/rejected": -285.8047790527344, "loss": 0.3831, "rewards/accuracies": 0.875, "rewards/chosen": -0.7536145448684692, "rewards/margins": 2.4077465534210205, "rewards/rejected": -3.1613612174987793, "step": 6264 }, { "epoch": 0.73, "learning_rate": 8.259123656548954e-08, "logits/chosen": -2.2057714462280273, "logits/rejected": -2.3441431522369385, "logps/chosen": -378.1039733886719, "logps/rejected": -363.8343200683594, "loss": 0.341, "rewards/accuracies": 0.875, "rewards/chosen": -0.5303623676300049, "rewards/margins": 2.359379291534424, "rewards/rejected": -2.889741897583008, "step": 6265 }, { "epoch": 0.73, "learning_rate": 8.255580488957128e-08, "logits/chosen": -2.3165674209594727, "logits/rejected": -2.2454793453216553, "logps/chosen": -190.69009399414062, "logps/rejected": -256.96429443359375, "loss": 0.4102, "rewards/accuracies": 0.875, "rewards/chosen": -0.39243611693382263, "rewards/margins": 1.5246665477752686, "rewards/rejected": -1.9171026945114136, "step": 6266 }, { "epoch": 0.73, "learning_rate": 8.2520373213653e-08, "logits/chosen": -2.4928946495056152, "logits/rejected": -2.5909063816070557, "logps/chosen": -229.4381103515625, "logps/rejected": -280.5776672363281, "loss": 0.4203, "rewards/accuracies": 0.75, "rewards/chosen": -0.8401175737380981, "rewards/margins": 4.340473175048828, "rewards/rejected": -5.1805901527404785, "step": 6267 }, { "epoch": 0.73, "learning_rate": 8.248494153773472e-08, "logits/chosen": -2.0853829383850098, "logits/rejected": -2.16218900680542, "logps/chosen": -269.3026428222656, "logps/rejected": -305.31097412109375, "loss": 0.4087, "rewards/accuracies": 0.875, "rewards/chosen": -0.5687272548675537, "rewards/margins": 1.372157096862793, "rewards/rejected": -1.9408843517303467, "step": 6268 }, { "epoch": 0.73, "learning_rate": 8.244950986181646e-08, "logits/chosen": -2.5696840286254883, "logits/rejected": -2.461580276489258, "logps/chosen": -363.15875244140625, "logps/rejected": -310.2806396484375, "loss": 0.3471, "rewards/accuracies": 0.875, "rewards/chosen": -0.7559940814971924, "rewards/margins": 2.71581768989563, "rewards/rejected": -3.4718117713928223, "step": 6269 }, { "epoch": 0.73, "learning_rate": 8.241407818589819e-08, "logits/chosen": -2.0937113761901855, "logits/rejected": -2.0045359134674072, "logps/chosen": -305.2699890136719, "logps/rejected": -422.78912353515625, "loss": 0.1884, "rewards/accuracies": 1.0, "rewards/chosen": -0.2489248514175415, "rewards/margins": 2.0927305221557617, "rewards/rejected": -2.3416552543640137, "step": 6270 }, { "epoch": 0.73, "learning_rate": 8.237864650997992e-08, "logits/chosen": -1.9130191802978516, "logits/rejected": -2.0114526748657227, "logps/chosen": -231.45687866210938, "logps/rejected": -228.45582580566406, "loss": 0.5287, "rewards/accuracies": 0.875, "rewards/chosen": -1.2295161485671997, "rewards/margins": 1.1234570741653442, "rewards/rejected": -2.352973222732544, "step": 6271 }, { "epoch": 0.73, "learning_rate": 8.234321483406165e-08, "logits/chosen": -2.383329153060913, "logits/rejected": -2.758892059326172, "logps/chosen": -184.5867156982422, "logps/rejected": -113.81681823730469, "loss": 0.5987, "rewards/accuracies": 0.75, "rewards/chosen": -1.4687278270721436, "rewards/margins": 0.6785489320755005, "rewards/rejected": -2.1472766399383545, "step": 6272 }, { "epoch": 0.73, "learning_rate": 8.230778315814337e-08, "logits/chosen": -2.4360463619232178, "logits/rejected": -2.364473342895508, "logps/chosen": -296.12445068359375, "logps/rejected": -214.16531372070312, "loss": 0.5327, "rewards/accuracies": 0.875, "rewards/chosen": -0.569430410861969, "rewards/margins": 1.1649894714355469, "rewards/rejected": -1.7344199419021606, "step": 6273 }, { "epoch": 0.73, "learning_rate": 8.22723514822251e-08, "logits/chosen": -2.0668206214904785, "logits/rejected": -2.3308053016662598, "logps/chosen": -495.6319580078125, "logps/rejected": -549.9015502929688, "loss": 0.3124, "rewards/accuracies": 0.75, "rewards/chosen": -0.46989697217941284, "rewards/margins": 2.1919171810150146, "rewards/rejected": -2.6618142127990723, "step": 6274 }, { "epoch": 0.73, "learning_rate": 8.223691980630684e-08, "logits/chosen": -2.408827304840088, "logits/rejected": -2.4145760536193848, "logps/chosen": -310.6664123535156, "logps/rejected": -269.8677062988281, "loss": 0.2538, "rewards/accuracies": 0.875, "rewards/chosen": -0.7586761713027954, "rewards/margins": 3.156709671020508, "rewards/rejected": -3.9153857231140137, "step": 6275 }, { "epoch": 0.73, "learning_rate": 8.220148813038857e-08, "logits/chosen": -2.770163059234619, "logits/rejected": -2.610686779022217, "logps/chosen": -219.5615692138672, "logps/rejected": -252.30465698242188, "loss": 0.2636, "rewards/accuracies": 0.875, "rewards/chosen": -1.708857536315918, "rewards/margins": 2.321722984313965, "rewards/rejected": -4.030580520629883, "step": 6276 }, { "epoch": 0.73, "learning_rate": 8.21660564544703e-08, "logits/chosen": -2.821021318435669, "logits/rejected": -2.638423204421997, "logps/chosen": -341.5736999511719, "logps/rejected": -299.1343994140625, "loss": 0.2259, "rewards/accuracies": 0.875, "rewards/chosen": -1.069793701171875, "rewards/margins": 2.6551129817962646, "rewards/rejected": -3.7249066829681396, "step": 6277 }, { "epoch": 0.73, "learning_rate": 8.213062477855202e-08, "logits/chosen": -2.489882707595825, "logits/rejected": -2.766602039337158, "logps/chosen": -194.26040649414062, "logps/rejected": -273.5350341796875, "loss": 0.2305, "rewards/accuracies": 0.875, "rewards/chosen": -0.7636180520057678, "rewards/margins": 2.5584418773651123, "rewards/rejected": -3.3220598697662354, "step": 6278 }, { "epoch": 0.73, "learning_rate": 8.209519310263374e-08, "logits/chosen": -2.25260591506958, "logits/rejected": -2.643918514251709, "logps/chosen": -368.50579833984375, "logps/rejected": -212.8402099609375, "loss": 0.398, "rewards/accuracies": 0.75, "rewards/chosen": -0.9171187877655029, "rewards/margins": 2.661553382873535, "rewards/rejected": -3.578672170639038, "step": 6279 }, { "epoch": 0.73, "learning_rate": 8.205976142671547e-08, "logits/chosen": -3.0564124584198, "logits/rejected": -2.7946348190307617, "logps/chosen": -326.1153564453125, "logps/rejected": -253.46864318847656, "loss": 0.209, "rewards/accuracies": 0.875, "rewards/chosen": -0.6989347338676453, "rewards/margins": 3.6117019653320312, "rewards/rejected": -4.310636520385742, "step": 6280 }, { "epoch": 0.73, "learning_rate": 8.202432975079722e-08, "logits/chosen": -2.4073610305786133, "logits/rejected": -2.527120351791382, "logps/chosen": -432.93524169921875, "logps/rejected": -252.2908935546875, "loss": 0.1477, "rewards/accuracies": 1.0, "rewards/chosen": -0.6735633611679077, "rewards/margins": 3.4888954162597656, "rewards/rejected": -4.162459373474121, "step": 6281 }, { "epoch": 0.73, "learning_rate": 8.198889807487894e-08, "logits/chosen": -2.2064120769500732, "logits/rejected": -1.6713894605636597, "logps/chosen": -277.0528564453125, "logps/rejected": -403.48675537109375, "loss": 0.2324, "rewards/accuracies": 0.875, "rewards/chosen": -0.4311901926994324, "rewards/margins": 2.4416494369506836, "rewards/rejected": -2.87283992767334, "step": 6282 }, { "epoch": 0.73, "learning_rate": 8.195346639896067e-08, "logits/chosen": -2.1578598022460938, "logits/rejected": -2.318854808807373, "logps/chosen": -216.53350830078125, "logps/rejected": -261.013916015625, "loss": 0.2991, "rewards/accuracies": 0.875, "rewards/chosen": -0.17750658094882965, "rewards/margins": 2.9850986003875732, "rewards/rejected": -3.162605047225952, "step": 6283 }, { "epoch": 0.73, "learning_rate": 8.19180347230424e-08, "logits/chosen": -2.759251117706299, "logits/rejected": -2.5611636638641357, "logps/chosen": -415.4079895019531, "logps/rejected": -343.9595031738281, "loss": 0.2184, "rewards/accuracies": 0.875, "rewards/chosen": -1.2044880390167236, "rewards/margins": 2.903313398361206, "rewards/rejected": -4.10780143737793, "step": 6284 }, { "epoch": 0.73, "learning_rate": 8.188260304712412e-08, "logits/chosen": -2.2869277000427246, "logits/rejected": -2.510128974914551, "logps/chosen": -328.0937194824219, "logps/rejected": -233.30435180664062, "loss": 0.1304, "rewards/accuracies": 1.0, "rewards/chosen": -0.4671802818775177, "rewards/margins": 2.3819899559020996, "rewards/rejected": -2.849170207977295, "step": 6285 }, { "epoch": 0.73, "learning_rate": 8.184717137120585e-08, "logits/chosen": -1.878566026687622, "logits/rejected": -1.915647268295288, "logps/chosen": -200.0583953857422, "logps/rejected": -341.998046875, "loss": 0.4241, "rewards/accuracies": 0.875, "rewards/chosen": -0.2539724111557007, "rewards/margins": 1.5252900123596191, "rewards/rejected": -1.7792624235153198, "step": 6286 }, { "epoch": 0.73, "learning_rate": 8.181173969528759e-08, "logits/chosen": -2.385800361633301, "logits/rejected": -2.122696876525879, "logps/chosen": -332.53753662109375, "logps/rejected": -255.65121459960938, "loss": 0.3567, "rewards/accuracies": 0.875, "rewards/chosen": -0.7248864769935608, "rewards/margins": 2.4618723392486572, "rewards/rejected": -3.1867589950561523, "step": 6287 }, { "epoch": 0.73, "learning_rate": 8.177630801936931e-08, "logits/chosen": -2.590958833694458, "logits/rejected": -2.192592144012451, "logps/chosen": -294.2811584472656, "logps/rejected": -340.4561767578125, "loss": 0.1585, "rewards/accuracies": 1.0, "rewards/chosen": -1.1945384740829468, "rewards/margins": 2.7351019382476807, "rewards/rejected": -3.929640293121338, "step": 6288 }, { "epoch": 0.73, "learning_rate": 8.174087634345105e-08, "logits/chosen": -2.491452932357788, "logits/rejected": -2.4303038120269775, "logps/chosen": -202.56320190429688, "logps/rejected": -319.87652587890625, "loss": 0.3796, "rewards/accuracies": 0.75, "rewards/chosen": -0.6704690456390381, "rewards/margins": 2.1825695037841797, "rewards/rejected": -2.853038787841797, "step": 6289 }, { "epoch": 0.73, "learning_rate": 8.170544466753277e-08, "logits/chosen": -2.351283073425293, "logits/rejected": -2.263904094696045, "logps/chosen": -284.0327453613281, "logps/rejected": -338.5082092285156, "loss": 0.2193, "rewards/accuracies": 1.0, "rewards/chosen": 0.02547439932823181, "rewards/margins": 2.9597878456115723, "rewards/rejected": -2.9343132972717285, "step": 6290 }, { "epoch": 0.73, "learning_rate": 8.167001299161449e-08, "logits/chosen": -2.4824001789093018, "logits/rejected": -2.4733469486236572, "logps/chosen": -352.1817932128906, "logps/rejected": -359.93328857421875, "loss": 0.4636, "rewards/accuracies": 0.875, "rewards/chosen": -0.848662257194519, "rewards/margins": 2.6582250595092773, "rewards/rejected": -3.506887197494507, "step": 6291 }, { "epoch": 0.73, "learning_rate": 8.163458131569623e-08, "logits/chosen": -2.775198459625244, "logits/rejected": -2.603262424468994, "logps/chosen": -226.30523681640625, "logps/rejected": -265.21368408203125, "loss": 0.3126, "rewards/accuracies": 0.875, "rewards/chosen": -1.7944421768188477, "rewards/margins": 2.5048975944519043, "rewards/rejected": -4.299339294433594, "step": 6292 }, { "epoch": 0.73, "learning_rate": 8.159914963977796e-08, "logits/chosen": -2.079866886138916, "logits/rejected": -2.16642165184021, "logps/chosen": -223.07025146484375, "logps/rejected": -155.1702117919922, "loss": 0.7305, "rewards/accuracies": 0.5, "rewards/chosen": -2.244734287261963, "rewards/margins": 1.2067642211914062, "rewards/rejected": -3.451498508453369, "step": 6293 }, { "epoch": 0.73, "learning_rate": 8.15637179638597e-08, "logits/chosen": -2.429098606109619, "logits/rejected": -2.356956958770752, "logps/chosen": -315.6029357910156, "logps/rejected": -404.0758056640625, "loss": 0.1838, "rewards/accuracies": 1.0, "rewards/chosen": -0.9159033894538879, "rewards/margins": 2.6245365142822266, "rewards/rejected": -3.5404398441314697, "step": 6294 }, { "epoch": 0.73, "learning_rate": 8.152828628794142e-08, "logits/chosen": -2.1566147804260254, "logits/rejected": -1.9597539901733398, "logps/chosen": -196.0928955078125, "logps/rejected": -274.13623046875, "loss": 0.2938, "rewards/accuracies": 0.75, "rewards/chosen": -0.8582504391670227, "rewards/margins": 2.6490767002105713, "rewards/rejected": -3.5073273181915283, "step": 6295 }, { "epoch": 0.73, "learning_rate": 8.149285461202314e-08, "logits/chosen": -2.1523935794830322, "logits/rejected": -2.1770784854888916, "logps/chosen": -215.87460327148438, "logps/rejected": -276.5356750488281, "loss": 0.2839, "rewards/accuracies": 0.875, "rewards/chosen": -1.1355527639389038, "rewards/margins": 2.1412055492401123, "rewards/rejected": -3.2767584323883057, "step": 6296 }, { "epoch": 0.73, "learning_rate": 8.145742293610488e-08, "logits/chosen": -2.5140857696533203, "logits/rejected": -2.706479072570801, "logps/chosen": -302.07098388671875, "logps/rejected": -285.73345947265625, "loss": 0.5767, "rewards/accuracies": 0.75, "rewards/chosen": -0.9069710969924927, "rewards/margins": 1.6546944379806519, "rewards/rejected": -2.5616655349731445, "step": 6297 }, { "epoch": 0.73, "learning_rate": 8.14219912601866e-08, "logits/chosen": -2.2257556915283203, "logits/rejected": -2.2137725353240967, "logps/chosen": -298.0929260253906, "logps/rejected": -326.4964904785156, "loss": 0.278, "rewards/accuracies": 0.875, "rewards/chosen": -0.395040899515152, "rewards/margins": 2.1004443168640137, "rewards/rejected": -2.495485305786133, "step": 6298 }, { "epoch": 0.73, "learning_rate": 8.138655958426833e-08, "logits/chosen": -2.2454395294189453, "logits/rejected": -2.3735191822052, "logps/chosen": -299.9709777832031, "logps/rejected": -176.82261657714844, "loss": 0.8927, "rewards/accuracies": 0.625, "rewards/chosen": -2.1321961879730225, "rewards/margins": 1.0521554946899414, "rewards/rejected": -3.1843514442443848, "step": 6299 }, { "epoch": 0.73, "learning_rate": 8.135112790835007e-08, "logits/chosen": -2.4328107833862305, "logits/rejected": -2.433586835861206, "logps/chosen": -165.39280700683594, "logps/rejected": -202.50747680664062, "loss": 0.2071, "rewards/accuracies": 1.0, "rewards/chosen": -0.3298513889312744, "rewards/margins": 1.9624146223068237, "rewards/rejected": -2.2922658920288086, "step": 6300 }, { "epoch": 0.73, "learning_rate": 8.131569623243179e-08, "logits/chosen": -2.513254404067993, "logits/rejected": -2.0525245666503906, "logps/chosen": -215.44793701171875, "logps/rejected": -248.286865234375, "loss": 0.103, "rewards/accuracies": 1.0, "rewards/chosen": -1.3027451038360596, "rewards/margins": 2.6571762561798096, "rewards/rejected": -3.959921360015869, "step": 6301 }, { "epoch": 0.73, "learning_rate": 8.128026455651351e-08, "logits/chosen": -2.374569892883301, "logits/rejected": -2.7276487350463867, "logps/chosen": -282.89776611328125, "logps/rejected": -297.6399841308594, "loss": 0.1268, "rewards/accuracies": 1.0, "rewards/chosen": -1.443732738494873, "rewards/margins": 4.862957000732422, "rewards/rejected": -6.306689739227295, "step": 6302 }, { "epoch": 0.73, "learning_rate": 8.124483288059525e-08, "logits/chosen": -2.08272647857666, "logits/rejected": -2.200378894805908, "logps/chosen": -290.1570129394531, "logps/rejected": -341.6326904296875, "loss": 0.5478, "rewards/accuracies": 0.75, "rewards/chosen": -0.5187958478927612, "rewards/margins": 3.3704991340637207, "rewards/rejected": -3.8892951011657715, "step": 6303 }, { "epoch": 0.73, "learning_rate": 8.120940120467697e-08, "logits/chosen": -2.678109884262085, "logits/rejected": -2.8483505249023438, "logps/chosen": -203.88536071777344, "logps/rejected": -117.86561584472656, "loss": 0.7845, "rewards/accuracies": 0.625, "rewards/chosen": -1.0104577541351318, "rewards/margins": 0.3936546742916107, "rewards/rejected": -1.4041123390197754, "step": 6304 }, { "epoch": 0.73, "learning_rate": 8.117396952875872e-08, "logits/chosen": -2.691967725753784, "logits/rejected": -2.803523063659668, "logps/chosen": -326.55841064453125, "logps/rejected": -189.71987915039062, "loss": 0.5483, "rewards/accuracies": 0.875, "rewards/chosen": -1.6118083000183105, "rewards/margins": 2.255246162414551, "rewards/rejected": -3.8670544624328613, "step": 6305 }, { "epoch": 0.73, "learning_rate": 8.113853785284044e-08, "logits/chosen": -2.2065279483795166, "logits/rejected": -2.2410717010498047, "logps/chosen": -378.35919189453125, "logps/rejected": -357.23541259765625, "loss": 0.3575, "rewards/accuracies": 0.875, "rewards/chosen": -0.37178128957748413, "rewards/margins": 1.0559897422790527, "rewards/rejected": -1.427770972251892, "step": 6306 }, { "epoch": 0.73, "learning_rate": 8.110310617692216e-08, "logits/chosen": -2.418013095855713, "logits/rejected": -2.314380168914795, "logps/chosen": -351.4008483886719, "logps/rejected": -337.0474548339844, "loss": 0.1427, "rewards/accuracies": 1.0, "rewards/chosen": -0.14531224966049194, "rewards/margins": 3.0513761043548584, "rewards/rejected": -3.196688652038574, "step": 6307 }, { "epoch": 0.73, "learning_rate": 8.106767450100389e-08, "logits/chosen": -2.3339617252349854, "logits/rejected": -2.5668692588806152, "logps/chosen": -268.4945068359375, "logps/rejected": -172.73361206054688, "loss": 0.2585, "rewards/accuracies": 0.875, "rewards/chosen": -0.30025404691696167, "rewards/margins": 2.546266555786133, "rewards/rejected": -2.8465209007263184, "step": 6308 }, { "epoch": 0.73, "learning_rate": 8.103224282508562e-08, "logits/chosen": -1.7337167263031006, "logits/rejected": -2.0814599990844727, "logps/chosen": -427.5549011230469, "logps/rejected": -330.0394592285156, "loss": 0.192, "rewards/accuracies": 1.0, "rewards/chosen": -0.9564264416694641, "rewards/margins": 2.7459521293640137, "rewards/rejected": -3.702378749847412, "step": 6309 }, { "epoch": 0.73, "learning_rate": 8.099681114916736e-08, "logits/chosen": -2.6628193855285645, "logits/rejected": -2.5879805088043213, "logps/chosen": -352.0947265625, "logps/rejected": -261.4306640625, "loss": 0.3394, "rewards/accuracies": 0.875, "rewards/chosen": -0.12677332758903503, "rewards/margins": 1.805931806564331, "rewards/rejected": -1.9327051639556885, "step": 6310 }, { "epoch": 0.73, "learning_rate": 8.096137947324909e-08, "logits/chosen": -2.735067129135132, "logits/rejected": -2.661919593811035, "logps/chosen": -161.14744567871094, "logps/rejected": -249.68600463867188, "loss": 0.4429, "rewards/accuracies": 0.875, "rewards/chosen": -0.30944788455963135, "rewards/margins": 1.8128485679626465, "rewards/rejected": -2.1222963333129883, "step": 6311 }, { "epoch": 0.73, "learning_rate": 8.092594779733081e-08, "logits/chosen": -2.192164897918701, "logits/rejected": -2.000788688659668, "logps/chosen": -309.6528625488281, "logps/rejected": -286.0964050292969, "loss": 0.4859, "rewards/accuracies": 0.75, "rewards/chosen": -0.6537206172943115, "rewards/margins": 0.7854449152946472, "rewards/rejected": -1.4391655921936035, "step": 6312 }, { "epoch": 0.73, "learning_rate": 8.089051612141254e-08, "logits/chosen": -1.9527461528778076, "logits/rejected": -2.0570244789123535, "logps/chosen": -253.2622528076172, "logps/rejected": -283.11993408203125, "loss": 0.3253, "rewards/accuracies": 0.875, "rewards/chosen": -0.5407022833824158, "rewards/margins": 1.521715521812439, "rewards/rejected": -2.06241774559021, "step": 6313 }, { "epoch": 0.73, "learning_rate": 8.085508444549427e-08, "logits/chosen": -2.4430432319641113, "logits/rejected": -2.022214412689209, "logps/chosen": -316.5509948730469, "logps/rejected": -304.9562072753906, "loss": 0.4155, "rewards/accuracies": 0.875, "rewards/chosen": -0.8587452173233032, "rewards/margins": 1.6502361297607422, "rewards/rejected": -2.508981466293335, "step": 6314 }, { "epoch": 0.73, "learning_rate": 8.081965276957599e-08, "logits/chosen": -2.449873924255371, "logits/rejected": -2.2064504623413086, "logps/chosen": -324.83978271484375, "logps/rejected": -384.2618713378906, "loss": 0.3873, "rewards/accuracies": 0.75, "rewards/chosen": -1.130963921546936, "rewards/margins": 2.674929618835449, "rewards/rejected": -3.8058934211730957, "step": 6315 }, { "epoch": 0.73, "learning_rate": 8.078422109365773e-08, "logits/chosen": -2.715024471282959, "logits/rejected": -2.7743289470672607, "logps/chosen": -221.88232421875, "logps/rejected": -198.40609741210938, "loss": 0.3921, "rewards/accuracies": 0.75, "rewards/chosen": -1.1148772239685059, "rewards/margins": 2.199631690979004, "rewards/rejected": -3.314509153366089, "step": 6316 }, { "epoch": 0.73, "learning_rate": 8.074878941773946e-08, "logits/chosen": -2.6985230445861816, "logits/rejected": -2.373257875442505, "logps/chosen": -263.46173095703125, "logps/rejected": -275.8941650390625, "loss": 0.4419, "rewards/accuracies": 0.75, "rewards/chosen": -1.8131861686706543, "rewards/margins": 1.470260500907898, "rewards/rejected": -3.283446788787842, "step": 6317 }, { "epoch": 0.73, "learning_rate": 8.071335774182119e-08, "logits/chosen": -2.4910757541656494, "logits/rejected": -2.2706148624420166, "logps/chosen": -151.76080322265625, "logps/rejected": -248.01141357421875, "loss": 0.345, "rewards/accuracies": 0.75, "rewards/chosen": -0.20789504051208496, "rewards/margins": 2.3533577919006348, "rewards/rejected": -2.5612525939941406, "step": 6318 }, { "epoch": 0.74, "learning_rate": 8.067792606590291e-08, "logits/chosen": -2.1222779750823975, "logits/rejected": -2.506214141845703, "logps/chosen": -286.5250549316406, "logps/rejected": -222.7381591796875, "loss": 0.7344, "rewards/accuracies": 0.625, "rewards/chosen": -1.3550012111663818, "rewards/margins": 2.3467609882354736, "rewards/rejected": -3.7017621994018555, "step": 6319 }, { "epoch": 0.74, "learning_rate": 8.064249438998464e-08, "logits/chosen": -2.5851402282714844, "logits/rejected": -2.7743523120880127, "logps/chosen": -203.3822021484375, "logps/rejected": -194.300537109375, "loss": 0.3158, "rewards/accuracies": 0.875, "rewards/chosen": -1.0593708753585815, "rewards/margins": 2.071169137954712, "rewards/rejected": -3.130540132522583, "step": 6320 }, { "epoch": 0.74, "learning_rate": 8.060706271406637e-08, "logits/chosen": -2.2360267639160156, "logits/rejected": -2.263235092163086, "logps/chosen": -202.82937622070312, "logps/rejected": -197.75900268554688, "loss": 0.5966, "rewards/accuracies": 0.625, "rewards/chosen": -0.8050970435142517, "rewards/margins": 1.4324630498886108, "rewards/rejected": -2.2375600337982178, "step": 6321 }, { "epoch": 0.74, "learning_rate": 8.057163103814811e-08, "logits/chosen": -2.3795969486236572, "logits/rejected": -2.697396993637085, "logps/chosen": -295.0947265625, "logps/rejected": -194.46299743652344, "loss": 0.5222, "rewards/accuracies": 0.75, "rewards/chosen": -0.6688815355300903, "rewards/margins": 1.3016772270202637, "rewards/rejected": -1.9705586433410645, "step": 6322 }, { "epoch": 0.74, "learning_rate": 8.053619936222984e-08, "logits/chosen": -2.4393835067749023, "logits/rejected": -2.610004186630249, "logps/chosen": -330.8255615234375, "logps/rejected": -266.91778564453125, "loss": 0.6459, "rewards/accuracies": 0.5, "rewards/chosen": 0.15006303787231445, "rewards/margins": 1.5149664878845215, "rewards/rejected": -1.364903450012207, "step": 6323 }, { "epoch": 0.74, "learning_rate": 8.050076768631156e-08, "logits/chosen": -2.707409620285034, "logits/rejected": -2.5879998207092285, "logps/chosen": -197.8020782470703, "logps/rejected": -313.51165771484375, "loss": 0.3101, "rewards/accuracies": 0.75, "rewards/chosen": -1.4572105407714844, "rewards/margins": 2.926699638366699, "rewards/rejected": -4.383910179138184, "step": 6324 }, { "epoch": 0.74, "learning_rate": 8.046533601039328e-08, "logits/chosen": -2.4200048446655273, "logits/rejected": -2.338900089263916, "logps/chosen": -236.69314575195312, "logps/rejected": -289.446533203125, "loss": 0.098, "rewards/accuracies": 1.0, "rewards/chosen": -0.1741054356098175, "rewards/margins": 2.9379196166992188, "rewards/rejected": -3.112025022506714, "step": 6325 }, { "epoch": 0.74, "learning_rate": 8.042990433447502e-08, "logits/chosen": -1.9167144298553467, "logits/rejected": -1.765062928199768, "logps/chosen": -253.27584838867188, "logps/rejected": -298.7745361328125, "loss": 0.3556, "rewards/accuracies": 0.75, "rewards/chosen": -0.23549270629882812, "rewards/margins": 1.9545906782150269, "rewards/rejected": -2.1900835037231445, "step": 6326 }, { "epoch": 0.74, "learning_rate": 8.039447265855674e-08, "logits/chosen": -2.336045265197754, "logits/rejected": -2.367928981781006, "logps/chosen": -258.0199279785156, "logps/rejected": -226.34568786621094, "loss": 0.7137, "rewards/accuracies": 0.5, "rewards/chosen": -0.9433164596557617, "rewards/margins": 1.2530632019042969, "rewards/rejected": -2.1963796615600586, "step": 6327 }, { "epoch": 0.74, "learning_rate": 8.035904098263849e-08, "logits/chosen": -2.4406237602233887, "logits/rejected": -2.4752016067504883, "logps/chosen": -280.4081726074219, "logps/rejected": -212.3579864501953, "loss": 0.4545, "rewards/accuracies": 0.625, "rewards/chosen": -0.8370428681373596, "rewards/margins": 1.292995572090149, "rewards/rejected": -2.130038261413574, "step": 6328 }, { "epoch": 0.74, "learning_rate": 8.032360930672021e-08, "logits/chosen": -1.8495343923568726, "logits/rejected": -2.1059248447418213, "logps/chosen": -359.9935302734375, "logps/rejected": -332.1397705078125, "loss": 0.5057, "rewards/accuracies": 0.875, "rewards/chosen": -0.7448663115501404, "rewards/margins": 1.676568865776062, "rewards/rejected": -2.4214353561401367, "step": 6329 }, { "epoch": 0.74, "learning_rate": 8.028817763080193e-08, "logits/chosen": -2.373020887374878, "logits/rejected": -2.0751891136169434, "logps/chosen": -111.7528076171875, "logps/rejected": -212.71249389648438, "loss": 0.5459, "rewards/accuracies": 0.75, "rewards/chosen": -1.4305014610290527, "rewards/margins": 2.1131765842437744, "rewards/rejected": -3.543678045272827, "step": 6330 }, { "epoch": 0.74, "learning_rate": 8.025274595488367e-08, "logits/chosen": -1.7870392799377441, "logits/rejected": -2.156320810317993, "logps/chosen": -266.6884765625, "logps/rejected": -238.04588317871094, "loss": 0.5137, "rewards/accuracies": 0.75, "rewards/chosen": -0.6065239906311035, "rewards/margins": 1.5989813804626465, "rewards/rejected": -2.205505132675171, "step": 6331 }, { "epoch": 0.74, "learning_rate": 8.021731427896539e-08, "logits/chosen": -2.100820541381836, "logits/rejected": -2.3984410762786865, "logps/chosen": -122.62279510498047, "logps/rejected": -154.38226318359375, "loss": 0.5349, "rewards/accuracies": 0.75, "rewards/chosen": -1.7856298685073853, "rewards/margins": 1.3981035947799683, "rewards/rejected": -3.1837334632873535, "step": 6332 }, { "epoch": 0.74, "learning_rate": 8.018188260304711e-08, "logits/chosen": -2.411797046661377, "logits/rejected": -2.3634426593780518, "logps/chosen": -506.2190856933594, "logps/rejected": -496.09600830078125, "loss": 0.0691, "rewards/accuracies": 1.0, "rewards/chosen": -1.3148081302642822, "rewards/margins": 4.1490936279296875, "rewards/rejected": -5.463901519775391, "step": 6333 }, { "epoch": 0.74, "learning_rate": 8.014645092712886e-08, "logits/chosen": -2.5489275455474854, "logits/rejected": -2.595855712890625, "logps/chosen": -376.095947265625, "logps/rejected": -257.3561096191406, "loss": 0.2723, "rewards/accuracies": 0.875, "rewards/chosen": -0.8370881080627441, "rewards/margins": 3.264387607574463, "rewards/rejected": -4.101475715637207, "step": 6334 }, { "epoch": 0.74, "learning_rate": 8.011101925121058e-08, "logits/chosen": -2.2640929222106934, "logits/rejected": -2.519909143447876, "logps/chosen": -184.02383422851562, "logps/rejected": -116.49836730957031, "loss": 1.4869, "rewards/accuracies": 0.75, "rewards/chosen": -1.8177053928375244, "rewards/margins": 0.5954796671867371, "rewards/rejected": -2.4131851196289062, "step": 6335 }, { "epoch": 0.74, "learning_rate": 8.00755875752923e-08, "logits/chosen": -2.040315866470337, "logits/rejected": -2.250474452972412, "logps/chosen": -660.4305419921875, "logps/rejected": -282.0590515136719, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -0.6525173783302307, "rewards/margins": 3.889765739440918, "rewards/rejected": -4.542283058166504, "step": 6336 }, { "epoch": 0.74, "learning_rate": 8.004015589937404e-08, "logits/chosen": -2.224618911743164, "logits/rejected": -2.5247464179992676, "logps/chosen": -356.90277099609375, "logps/rejected": -268.60858154296875, "loss": 0.4535, "rewards/accuracies": 0.75, "rewards/chosen": -1.0749468803405762, "rewards/margins": 2.922450542449951, "rewards/rejected": -3.9973976612091064, "step": 6337 }, { "epoch": 0.74, "learning_rate": 8.000472422345576e-08, "logits/chosen": -2.746990203857422, "logits/rejected": -2.6114230155944824, "logps/chosen": -95.02008056640625, "logps/rejected": -188.71951293945312, "loss": 0.2535, "rewards/accuracies": 0.875, "rewards/chosen": -0.4209843575954437, "rewards/margins": 2.2544894218444824, "rewards/rejected": -2.675473690032959, "step": 6338 }, { "epoch": 0.74, "learning_rate": 7.996929254753748e-08, "logits/chosen": -2.2374794483184814, "logits/rejected": -2.4559807777404785, "logps/chosen": -135.2891387939453, "logps/rejected": -140.6848602294922, "loss": 0.5603, "rewards/accuracies": 0.875, "rewards/chosen": -0.7226144075393677, "rewards/margins": 2.0534355640411377, "rewards/rejected": -2.776050090789795, "step": 6339 }, { "epoch": 0.74, "learning_rate": 7.993386087161923e-08, "logits/chosen": -2.538193702697754, "logits/rejected": -2.479562759399414, "logps/chosen": -256.07586669921875, "logps/rejected": -255.6300811767578, "loss": 0.2871, "rewards/accuracies": 0.875, "rewards/chosen": -0.062509685754776, "rewards/margins": 2.0434389114379883, "rewards/rejected": -2.1059484481811523, "step": 6340 }, { "epoch": 0.74, "learning_rate": 7.989842919570095e-08, "logits/chosen": -1.6480209827423096, "logits/rejected": -1.8877830505371094, "logps/chosen": -478.7276611328125, "logps/rejected": -391.46966552734375, "loss": 0.4397, "rewards/accuracies": 0.75, "rewards/chosen": -1.0250537395477295, "rewards/margins": 2.3963165283203125, "rewards/rejected": -3.421370506286621, "step": 6341 }, { "epoch": 0.74, "learning_rate": 7.986299751978269e-08, "logits/chosen": -1.9675129652023315, "logits/rejected": -2.048269748687744, "logps/chosen": -373.30487060546875, "logps/rejected": -269.5622253417969, "loss": 0.5417, "rewards/accuracies": 0.75, "rewards/chosen": -1.6289035081863403, "rewards/margins": 1.624576210975647, "rewards/rejected": -3.2534799575805664, "step": 6342 }, { "epoch": 0.74, "learning_rate": 7.982756584386441e-08, "logits/chosen": -2.332080125808716, "logits/rejected": -2.4183220863342285, "logps/chosen": -173.9196319580078, "logps/rejected": -198.49227905273438, "loss": 0.6057, "rewards/accuracies": 0.625, "rewards/chosen": -1.2623704671859741, "rewards/margins": 1.0499732494354248, "rewards/rejected": -2.3123435974121094, "step": 6343 }, { "epoch": 0.74, "learning_rate": 7.979213416794613e-08, "logits/chosen": -2.3395373821258545, "logits/rejected": -2.4529404640197754, "logps/chosen": -219.03012084960938, "logps/rejected": -266.9730529785156, "loss": 0.3207, "rewards/accuracies": 0.875, "rewards/chosen": -0.509674072265625, "rewards/margins": 3.7301769256591797, "rewards/rejected": -4.239850997924805, "step": 6344 }, { "epoch": 0.74, "learning_rate": 7.975670249202788e-08, "logits/chosen": -2.44329833984375, "logits/rejected": -2.727839946746826, "logps/chosen": -286.5792541503906, "logps/rejected": -263.8098449707031, "loss": 0.204, "rewards/accuracies": 0.875, "rewards/chosen": -1.1153305768966675, "rewards/margins": 4.686122894287109, "rewards/rejected": -5.801453590393066, "step": 6345 }, { "epoch": 0.74, "learning_rate": 7.97212708161096e-08, "logits/chosen": -1.8430575132369995, "logits/rejected": -1.8269721269607544, "logps/chosen": -346.16162109375, "logps/rejected": -362.7665100097656, "loss": 0.2449, "rewards/accuracies": 0.875, "rewards/chosen": -0.25482305884361267, "rewards/margins": 2.1301023960113525, "rewards/rejected": -2.384925365447998, "step": 6346 }, { "epoch": 0.74, "learning_rate": 7.968583914019133e-08, "logits/chosen": -2.687967538833618, "logits/rejected": -2.6635665893554688, "logps/chosen": -275.445556640625, "logps/rejected": -279.7044372558594, "loss": 0.7284, "rewards/accuracies": 0.75, "rewards/chosen": -1.1380425691604614, "rewards/margins": 1.1375553607940674, "rewards/rejected": -2.2755980491638184, "step": 6347 }, { "epoch": 0.74, "learning_rate": 7.965040746427306e-08, "logits/chosen": -2.5320448875427246, "logits/rejected": -2.525076150894165, "logps/chosen": -271.2569274902344, "logps/rejected": -231.52011108398438, "loss": 0.4563, "rewards/accuracies": 0.625, "rewards/chosen": -3.400573253631592, "rewards/margins": 1.5105398893356323, "rewards/rejected": -4.9111127853393555, "step": 6348 }, { "epoch": 0.74, "learning_rate": 7.961497578835478e-08, "logits/chosen": -2.2348616123199463, "logits/rejected": -2.2682247161865234, "logps/chosen": -208.01763916015625, "logps/rejected": -255.90309143066406, "loss": 0.1267, "rewards/accuracies": 1.0, "rewards/chosen": 0.05114486813545227, "rewards/margins": 3.586090564727783, "rewards/rejected": -3.5349459648132324, "step": 6349 }, { "epoch": 0.74, "learning_rate": 7.95795441124365e-08, "logits/chosen": -2.188889265060425, "logits/rejected": -2.0249288082122803, "logps/chosen": -217.57273864746094, "logps/rejected": -377.77996826171875, "loss": 0.2571, "rewards/accuracies": 0.875, "rewards/chosen": -0.4829117953777313, "rewards/margins": 3.6875457763671875, "rewards/rejected": -4.17045783996582, "step": 6350 }, { "epoch": 0.74, "learning_rate": 7.954411243651825e-08, "logits/chosen": -1.9877650737762451, "logits/rejected": -2.043609142303467, "logps/chosen": -327.7762451171875, "logps/rejected": -294.2757263183594, "loss": 0.9263, "rewards/accuracies": 0.75, "rewards/chosen": -2.207418441772461, "rewards/margins": 1.025368332862854, "rewards/rejected": -3.2327868938446045, "step": 6351 }, { "epoch": 0.74, "learning_rate": 7.950868076059998e-08, "logits/chosen": -2.4101905822753906, "logits/rejected": -2.2523396015167236, "logps/chosen": -243.02162170410156, "logps/rejected": -278.38818359375, "loss": 1.1713, "rewards/accuracies": 0.75, "rewards/chosen": -1.4679781198501587, "rewards/margins": 0.5629440546035767, "rewards/rejected": -2.0309221744537354, "step": 6352 }, { "epoch": 0.74, "learning_rate": 7.94732490846817e-08, "logits/chosen": -2.762709617614746, "logits/rejected": -2.8701274394989014, "logps/chosen": -220.67984008789062, "logps/rejected": -229.9732666015625, "loss": 0.3112, "rewards/accuracies": 1.0, "rewards/chosen": -0.22425761818885803, "rewards/margins": 1.9971094131469727, "rewards/rejected": -2.221367120742798, "step": 6353 }, { "epoch": 0.74, "learning_rate": 7.943781740876343e-08, "logits/chosen": -2.9179625511169434, "logits/rejected": -2.9291281700134277, "logps/chosen": -152.64508056640625, "logps/rejected": -186.2161407470703, "loss": 0.2898, "rewards/accuracies": 0.875, "rewards/chosen": -1.6997923851013184, "rewards/margins": 2.489960193634033, "rewards/rejected": -4.189752578735352, "step": 6354 }, { "epoch": 0.74, "learning_rate": 7.940238573284516e-08, "logits/chosen": -2.2195541858673096, "logits/rejected": -2.4074959754943848, "logps/chosen": -431.2127685546875, "logps/rejected": -303.155029296875, "loss": 0.6655, "rewards/accuracies": 0.625, "rewards/chosen": -1.063283085823059, "rewards/margins": 0.7332756519317627, "rewards/rejected": -1.7965586185455322, "step": 6355 }, { "epoch": 0.74, "learning_rate": 7.936695405692688e-08, "logits/chosen": -1.6377177238464355, "logits/rejected": -1.9323945045471191, "logps/chosen": -238.52816772460938, "logps/rejected": -188.13729858398438, "loss": 0.3598, "rewards/accuracies": 0.875, "rewards/chosen": -1.1040544509887695, "rewards/margins": 1.2057509422302246, "rewards/rejected": -2.309805393218994, "step": 6356 }, { "epoch": 0.74, "learning_rate": 7.933152238100863e-08, "logits/chosen": -2.194798707962036, "logits/rejected": -2.193270206451416, "logps/chosen": -286.700439453125, "logps/rejected": -308.5291748046875, "loss": 0.4358, "rewards/accuracies": 0.75, "rewards/chosen": -1.0350940227508545, "rewards/margins": 1.666778564453125, "rewards/rejected": -2.7018728256225586, "step": 6357 }, { "epoch": 0.74, "learning_rate": 7.929609070509035e-08, "logits/chosen": -2.2756619453430176, "logits/rejected": -2.51761531829834, "logps/chosen": -300.4959411621094, "logps/rejected": -251.83456420898438, "loss": 0.4894, "rewards/accuracies": 0.625, "rewards/chosen": -0.6292780637741089, "rewards/margins": 2.141449213027954, "rewards/rejected": -2.7707276344299316, "step": 6358 }, { "epoch": 0.74, "learning_rate": 7.926065902917208e-08, "logits/chosen": -2.2793943881988525, "logits/rejected": -1.944014310836792, "logps/chosen": -212.53379821777344, "logps/rejected": -355.3455810546875, "loss": 0.313, "rewards/accuracies": 0.875, "rewards/chosen": -0.9987373352050781, "rewards/margins": 2.181215524673462, "rewards/rejected": -3.179952621459961, "step": 6359 }, { "epoch": 0.74, "learning_rate": 7.92252273532538e-08, "logits/chosen": -2.1999456882476807, "logits/rejected": -2.5204761028289795, "logps/chosen": -231.4736328125, "logps/rejected": -197.885986328125, "loss": 0.5163, "rewards/accuracies": 0.625, "rewards/chosen": -0.5233587622642517, "rewards/margins": 1.302664875984192, "rewards/rejected": -1.8260236978530884, "step": 6360 }, { "epoch": 0.74, "learning_rate": 7.918979567733553e-08, "logits/chosen": -2.8117880821228027, "logits/rejected": -2.6549758911132812, "logps/chosen": -460.23028564453125, "logps/rejected": -301.2898864746094, "loss": 0.4498, "rewards/accuracies": 0.875, "rewards/chosen": -0.7425796985626221, "rewards/margins": 1.5982327461242676, "rewards/rejected": -2.3408122062683105, "step": 6361 }, { "epoch": 0.74, "learning_rate": 7.915436400141725e-08, "logits/chosen": -2.353105306625366, "logits/rejected": -2.3922836780548096, "logps/chosen": -447.5340881347656, "logps/rejected": -291.4940490722656, "loss": 0.5881, "rewards/accuracies": 0.625, "rewards/chosen": -0.9012231826782227, "rewards/margins": 1.522189736366272, "rewards/rejected": -2.423412799835205, "step": 6362 }, { "epoch": 0.74, "learning_rate": 7.9118932325499e-08, "logits/chosen": -1.9020167589187622, "logits/rejected": -1.9705555438995361, "logps/chosen": -261.39434814453125, "logps/rejected": -272.8210144042969, "loss": 0.6319, "rewards/accuracies": 0.75, "rewards/chosen": -0.5101104378700256, "rewards/margins": 2.4853148460388184, "rewards/rejected": -2.9954254627227783, "step": 6363 }, { "epoch": 0.74, "learning_rate": 7.908350064958072e-08, "logits/chosen": -2.0674240589141846, "logits/rejected": -2.142570972442627, "logps/chosen": -378.7806701660156, "logps/rejected": -370.92462158203125, "loss": 0.2685, "rewards/accuracies": 0.875, "rewards/chosen": -1.0304274559020996, "rewards/margins": 1.9749919176101685, "rewards/rejected": -3.0054192543029785, "step": 6364 }, { "epoch": 0.74, "learning_rate": 7.904806897366246e-08, "logits/chosen": -2.604363203048706, "logits/rejected": -2.3836793899536133, "logps/chosen": -164.04202270507812, "logps/rejected": -252.3910369873047, "loss": 0.1998, "rewards/accuracies": 0.875, "rewards/chosen": -0.7688165307044983, "rewards/margins": 3.4030261039733887, "rewards/rejected": -4.171842575073242, "step": 6365 }, { "epoch": 0.74, "learning_rate": 7.901263729774418e-08, "logits/chosen": -2.043614387512207, "logits/rejected": -1.8067901134490967, "logps/chosen": -402.77264404296875, "logps/rejected": -450.8165588378906, "loss": 0.3358, "rewards/accuracies": 0.875, "rewards/chosen": -0.6997337341308594, "rewards/margins": 2.1948723793029785, "rewards/rejected": -2.894606113433838, "step": 6366 }, { "epoch": 0.74, "learning_rate": 7.89772056218259e-08, "logits/chosen": -1.9815620183944702, "logits/rejected": -2.1567678451538086, "logps/chosen": -265.69305419921875, "logps/rejected": -208.31370544433594, "loss": 0.4901, "rewards/accuracies": 0.75, "rewards/chosen": -1.5322065353393555, "rewards/margins": 1.6610689163208008, "rewards/rejected": -3.1932754516601562, "step": 6367 }, { "epoch": 0.74, "learning_rate": 7.894177394590764e-08, "logits/chosen": -2.639808177947998, "logits/rejected": -2.558666229248047, "logps/chosen": -165.11106872558594, "logps/rejected": -199.14105224609375, "loss": 0.1576, "rewards/accuracies": 1.0, "rewards/chosen": -1.2951794862747192, "rewards/margins": 2.61676025390625, "rewards/rejected": -3.911939859390259, "step": 6368 }, { "epoch": 0.74, "learning_rate": 7.890634226998937e-08, "logits/chosen": -2.270467519760132, "logits/rejected": -2.5941131114959717, "logps/chosen": -367.6741943359375, "logps/rejected": -426.695068359375, "loss": 0.2144, "rewards/accuracies": 0.875, "rewards/chosen": -0.8996294140815735, "rewards/margins": 3.03814697265625, "rewards/rejected": -3.9377760887145996, "step": 6369 }, { "epoch": 0.74, "learning_rate": 7.88709105940711e-08, "logits/chosen": -2.081770420074463, "logits/rejected": -2.2126667499542236, "logps/chosen": -312.7315368652344, "logps/rejected": -335.0174255371094, "loss": 0.4204, "rewards/accuracies": 0.75, "rewards/chosen": -1.0186822414398193, "rewards/margins": 2.056854248046875, "rewards/rejected": -3.0755362510681152, "step": 6370 }, { "epoch": 0.74, "learning_rate": 7.883547891815283e-08, "logits/chosen": -2.034918785095215, "logits/rejected": -2.069779872894287, "logps/chosen": -600.7816772460938, "logps/rejected": -465.16864013671875, "loss": 0.2051, "rewards/accuracies": 0.875, "rewards/chosen": -0.8423968553543091, "rewards/margins": 3.27105975151062, "rewards/rejected": -4.113456726074219, "step": 6371 }, { "epoch": 0.74, "learning_rate": 7.880004724223455e-08, "logits/chosen": -1.668796420097351, "logits/rejected": -1.5578563213348389, "logps/chosen": -271.2153015136719, "logps/rejected": -295.33282470703125, "loss": 0.2592, "rewards/accuracies": 1.0, "rewards/chosen": -0.01847708225250244, "rewards/margins": 1.7359957695007324, "rewards/rejected": -1.7544727325439453, "step": 6372 }, { "epoch": 0.74, "learning_rate": 7.876461556631627e-08, "logits/chosen": -1.6720739603042603, "logits/rejected": -1.5310975313186646, "logps/chosen": -506.5141906738281, "logps/rejected": -433.57000732421875, "loss": 0.3995, "rewards/accuracies": 0.875, "rewards/chosen": -0.40779513120651245, "rewards/margins": 1.3797935247421265, "rewards/rejected": -1.7875887155532837, "step": 6373 }, { "epoch": 0.74, "learning_rate": 7.872918389039801e-08, "logits/chosen": -2.6460795402526855, "logits/rejected": -2.87593150138855, "logps/chosen": -301.34161376953125, "logps/rejected": -202.25306701660156, "loss": 0.8871, "rewards/accuracies": 0.625, "rewards/chosen": -2.1275746822357178, "rewards/margins": 1.0288233757019043, "rewards/rejected": -3.156397819519043, "step": 6374 }, { "epoch": 0.74, "learning_rate": 7.869375221447974e-08, "logits/chosen": -2.8347349166870117, "logits/rejected": -2.8491361141204834, "logps/chosen": -135.2898712158203, "logps/rejected": -229.7789306640625, "loss": 0.9116, "rewards/accuracies": 0.75, "rewards/chosen": -1.6730844974517822, "rewards/margins": 0.5685628056526184, "rewards/rejected": -2.241647243499756, "step": 6375 }, { "epoch": 0.74, "learning_rate": 7.865832053856148e-08, "logits/chosen": -2.123122215270996, "logits/rejected": -1.979169249534607, "logps/chosen": -334.5379333496094, "logps/rejected": -312.5858154296875, "loss": 0.4361, "rewards/accuracies": 0.75, "rewards/chosen": -0.10387551039457321, "rewards/margins": 2.380066156387329, "rewards/rejected": -2.4839417934417725, "step": 6376 }, { "epoch": 0.74, "learning_rate": 7.86228888626432e-08, "logits/chosen": -2.435196876525879, "logits/rejected": -2.5130269527435303, "logps/chosen": -202.3519287109375, "logps/rejected": -254.30059814453125, "loss": 0.2889, "rewards/accuracies": 0.875, "rewards/chosen": -0.41866984963417053, "rewards/margins": 2.2714922428131104, "rewards/rejected": -2.690162181854248, "step": 6377 }, { "epoch": 0.74, "learning_rate": 7.858745718672492e-08, "logits/chosen": -2.450761556625366, "logits/rejected": -2.5789716243743896, "logps/chosen": -259.0782775878906, "logps/rejected": -327.1623229980469, "loss": 0.3381, "rewards/accuracies": 0.75, "rewards/chosen": -0.956762433052063, "rewards/margins": 1.993109941482544, "rewards/rejected": -2.9498724937438965, "step": 6378 }, { "epoch": 0.74, "learning_rate": 7.855202551080665e-08, "logits/chosen": -1.9856858253479004, "logits/rejected": -1.9470373392105103, "logps/chosen": -226.54275512695312, "logps/rejected": -293.525390625, "loss": 0.3393, "rewards/accuracies": 0.875, "rewards/chosen": -1.3621931076049805, "rewards/margins": 3.199445962905884, "rewards/rejected": -4.561638832092285, "step": 6379 }, { "epoch": 0.74, "learning_rate": 7.85165938348884e-08, "logits/chosen": -1.936179280281067, "logits/rejected": -1.8625948429107666, "logps/chosen": -289.6942138671875, "logps/rejected": -338.8097839355469, "loss": 0.5706, "rewards/accuracies": 0.5, "rewards/chosen": -1.0653133392333984, "rewards/margins": 2.964376926422119, "rewards/rejected": -4.029690265655518, "step": 6380 }, { "epoch": 0.74, "learning_rate": 7.848116215897012e-08, "logits/chosen": -2.0657742023468018, "logits/rejected": -2.1465325355529785, "logps/chosen": -255.31570434570312, "logps/rejected": -243.9123992919922, "loss": 0.4989, "rewards/accuracies": 0.75, "rewards/chosen": -1.2228045463562012, "rewards/margins": 1.5146889686584473, "rewards/rejected": -2.7374935150146484, "step": 6381 }, { "epoch": 0.74, "learning_rate": 7.844573048305185e-08, "logits/chosen": -1.9133802652359009, "logits/rejected": -2.108579158782959, "logps/chosen": -346.1192626953125, "logps/rejected": -322.2735900878906, "loss": 0.252, "rewards/accuracies": 0.875, "rewards/chosen": -0.3973810076713562, "rewards/margins": 2.286226272583008, "rewards/rejected": -2.683607339859009, "step": 6382 }, { "epoch": 0.74, "learning_rate": 7.841029880713357e-08, "logits/chosen": -1.9985883235931396, "logits/rejected": -2.173832416534424, "logps/chosen": -180.30029296875, "logps/rejected": -220.94223022460938, "loss": 0.6336, "rewards/accuracies": 0.75, "rewards/chosen": -1.6139665842056274, "rewards/margins": 0.863246500492096, "rewards/rejected": -2.477213144302368, "step": 6383 }, { "epoch": 0.74, "learning_rate": 7.83748671312153e-08, "logits/chosen": -1.8698720932006836, "logits/rejected": -1.9429621696472168, "logps/chosen": -354.42169189453125, "logps/rejected": -258.96478271484375, "loss": 0.2575, "rewards/accuracies": 0.875, "rewards/chosen": -0.5393194556236267, "rewards/margins": 1.4937331676483154, "rewards/rejected": -2.033052444458008, "step": 6384 }, { "epoch": 0.74, "learning_rate": 7.833943545529703e-08, "logits/chosen": -2.1771397590637207, "logits/rejected": -2.39392352104187, "logps/chosen": -354.16107177734375, "logps/rejected": -258.6745910644531, "loss": 0.7984, "rewards/accuracies": 0.5, "rewards/chosen": -1.092716097831726, "rewards/margins": 0.891497015953064, "rewards/rejected": -1.98421311378479, "step": 6385 }, { "epoch": 0.74, "learning_rate": 7.830400377937877e-08, "logits/chosen": -2.7067558765411377, "logits/rejected": -2.6858816146850586, "logps/chosen": -311.6806640625, "logps/rejected": -280.38909912109375, "loss": 0.3669, "rewards/accuracies": 0.875, "rewards/chosen": -0.3288809061050415, "rewards/margins": 2.27292537689209, "rewards/rejected": -2.601806163787842, "step": 6386 }, { "epoch": 0.74, "learning_rate": 7.826857210346049e-08, "logits/chosen": -2.6012721061706543, "logits/rejected": -2.686793327331543, "logps/chosen": -195.41128540039062, "logps/rejected": -327.2893371582031, "loss": 0.2204, "rewards/accuracies": 0.875, "rewards/chosen": -0.3343936502933502, "rewards/margins": 4.042865753173828, "rewards/rejected": -4.377259731292725, "step": 6387 }, { "epoch": 0.74, "learning_rate": 7.823314042754222e-08, "logits/chosen": -2.145986318588257, "logits/rejected": -2.257993459701538, "logps/chosen": -393.15777587890625, "logps/rejected": -252.71803283691406, "loss": 0.4194, "rewards/accuracies": 0.5, "rewards/chosen": -0.4136618673801422, "rewards/margins": 2.398505926132202, "rewards/rejected": -2.8121676445007324, "step": 6388 }, { "epoch": 0.74, "learning_rate": 7.819770875162395e-08, "logits/chosen": -2.6908040046691895, "logits/rejected": -2.8434133529663086, "logps/chosen": -312.4591064453125, "logps/rejected": -223.5443572998047, "loss": 0.3814, "rewards/accuracies": 0.75, "rewards/chosen": -0.596076250076294, "rewards/margins": 1.5741448402404785, "rewards/rejected": -2.1702210903167725, "step": 6389 }, { "epoch": 0.74, "learning_rate": 7.816227707570567e-08, "logits/chosen": -2.7513985633850098, "logits/rejected": -2.401251792907715, "logps/chosen": -268.22637939453125, "logps/rejected": -204.35552978515625, "loss": 0.0828, "rewards/accuracies": 1.0, "rewards/chosen": -1.0270477533340454, "rewards/margins": 3.2404892444610596, "rewards/rejected": -4.2675371170043945, "step": 6390 }, { "epoch": 0.74, "learning_rate": 7.81268453997874e-08, "logits/chosen": -2.588042974472046, "logits/rejected": -2.3384296894073486, "logps/chosen": -151.8711395263672, "logps/rejected": -255.84030151367188, "loss": 0.3645, "rewards/accuracies": 0.875, "rewards/chosen": -0.6730120182037354, "rewards/margins": 2.4799699783325195, "rewards/rejected": -3.152981996536255, "step": 6391 }, { "epoch": 0.74, "learning_rate": 7.809141372386914e-08, "logits/chosen": -1.8338249921798706, "logits/rejected": -1.654573678970337, "logps/chosen": -294.57196044921875, "logps/rejected": -529.7262573242188, "loss": 0.414, "rewards/accuracies": 0.75, "rewards/chosen": -0.6450214385986328, "rewards/margins": 2.026660442352295, "rewards/rejected": -2.6716818809509277, "step": 6392 }, { "epoch": 0.74, "learning_rate": 7.805598204795087e-08, "logits/chosen": -2.3792786598205566, "logits/rejected": -2.3096213340759277, "logps/chosen": -371.8365173339844, "logps/rejected": -470.0130920410156, "loss": 0.2059, "rewards/accuracies": 1.0, "rewards/chosen": -0.4234592914581299, "rewards/margins": 2.2488715648651123, "rewards/rejected": -2.672330856323242, "step": 6393 }, { "epoch": 0.74, "learning_rate": 7.80205503720326e-08, "logits/chosen": -2.5118401050567627, "logits/rejected": -2.6777453422546387, "logps/chosen": -163.86968994140625, "logps/rejected": -195.02151489257812, "loss": 0.2516, "rewards/accuracies": 1.0, "rewards/chosen": -0.495700478553772, "rewards/margins": 2.8646626472473145, "rewards/rejected": -3.360363245010376, "step": 6394 }, { "epoch": 0.74, "learning_rate": 7.798511869611432e-08, "logits/chosen": -2.5370490550994873, "logits/rejected": -2.3993818759918213, "logps/chosen": -258.4268798828125, "logps/rejected": -257.0291748046875, "loss": 0.4706, "rewards/accuracies": 0.75, "rewards/chosen": -1.4182193279266357, "rewards/margins": 1.7299749851226807, "rewards/rejected": -3.1481943130493164, "step": 6395 }, { "epoch": 0.74, "learning_rate": 7.794968702019605e-08, "logits/chosen": -2.118284225463867, "logits/rejected": -1.9005517959594727, "logps/chosen": -275.18121337890625, "logps/rejected": -399.9444580078125, "loss": 0.5522, "rewards/accuracies": 0.625, "rewards/chosen": -1.3262256383895874, "rewards/margins": 1.8195912837982178, "rewards/rejected": -3.1458168029785156, "step": 6396 }, { "epoch": 0.74, "learning_rate": 7.791425534427778e-08, "logits/chosen": -2.2186057567596436, "logits/rejected": -2.0822126865386963, "logps/chosen": -310.2674255371094, "logps/rejected": -238.73013305664062, "loss": 0.254, "rewards/accuracies": 1.0, "rewards/chosen": -0.37500691413879395, "rewards/margins": 1.5949251651763916, "rewards/rejected": -1.9699320793151855, "step": 6397 }, { "epoch": 0.74, "learning_rate": 7.787882366835951e-08, "logits/chosen": -2.501980781555176, "logits/rejected": -2.134209156036377, "logps/chosen": -452.5187683105469, "logps/rejected": -440.0625, "loss": 0.4548, "rewards/accuracies": 0.625, "rewards/chosen": -1.0395818948745728, "rewards/margins": 1.407677173614502, "rewards/rejected": -2.4472591876983643, "step": 6398 }, { "epoch": 0.74, "learning_rate": 7.784339199244125e-08, "logits/chosen": -2.3460328578948975, "logits/rejected": -2.5737810134887695, "logps/chosen": -152.9859619140625, "logps/rejected": -145.36912536621094, "loss": 0.3407, "rewards/accuracies": 0.75, "rewards/chosen": -0.2107190489768982, "rewards/margins": 2.135066032409668, "rewards/rejected": -2.345785140991211, "step": 6399 }, { "epoch": 0.74, "learning_rate": 7.780796031652297e-08, "logits/chosen": -2.001856803894043, "logits/rejected": -2.4711577892303467, "logps/chosen": -292.7585754394531, "logps/rejected": -218.11392211914062, "loss": 0.7948, "rewards/accuracies": 0.625, "rewards/chosen": -1.9619510173797607, "rewards/margins": 2.713930368423462, "rewards/rejected": -4.675881385803223, "step": 6400 }, { "epoch": 0.74, "learning_rate": 7.777252864060469e-08, "logits/chosen": -2.4192304611206055, "logits/rejected": -2.420469045639038, "logps/chosen": -276.3792419433594, "logps/rejected": -426.84539794921875, "loss": 0.319, "rewards/accuracies": 0.875, "rewards/chosen": -0.37056198716163635, "rewards/margins": 2.081132411956787, "rewards/rejected": -2.4516944885253906, "step": 6401 }, { "epoch": 0.74, "learning_rate": 7.773709696468643e-08, "logits/chosen": -2.474398612976074, "logits/rejected": -2.5094876289367676, "logps/chosen": -348.28717041015625, "logps/rejected": -382.49127197265625, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": -0.3090416193008423, "rewards/margins": 3.640346050262451, "rewards/rejected": -3.949387311935425, "step": 6402 }, { "epoch": 0.74, "learning_rate": 7.770166528876815e-08, "logits/chosen": -2.181915044784546, "logits/rejected": -2.7005882263183594, "logps/chosen": -473.056396484375, "logps/rejected": -364.496826171875, "loss": 0.3907, "rewards/accuracies": 0.75, "rewards/chosen": -1.3759779930114746, "rewards/margins": 2.50034761428833, "rewards/rejected": -3.8763256072998047, "step": 6403 }, { "epoch": 0.74, "learning_rate": 7.76662336128499e-08, "logits/chosen": -2.3214123249053955, "logits/rejected": -2.5783770084381104, "logps/chosen": -331.99114990234375, "logps/rejected": -243.52842712402344, "loss": 0.4352, "rewards/accuracies": 0.875, "rewards/chosen": -0.6402058005332947, "rewards/margins": 2.6924171447753906, "rewards/rejected": -3.332623243331909, "step": 6404 }, { "epoch": 0.75, "learning_rate": 7.763080193693162e-08, "logits/chosen": -2.486558437347412, "logits/rejected": -2.4222195148468018, "logps/chosen": -181.11451721191406, "logps/rejected": -336.13665771484375, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.23040178418159485, "rewards/margins": 3.480703830718994, "rewards/rejected": -3.2503018379211426, "step": 6405 }, { "epoch": 0.75, "learning_rate": 7.759537026101334e-08, "logits/chosen": -2.7107272148132324, "logits/rejected": -2.5056614875793457, "logps/chosen": -194.38168334960938, "logps/rejected": -338.28448486328125, "loss": 1.2051, "rewards/accuracies": 0.625, "rewards/chosen": -1.0191123485565186, "rewards/margins": 0.20944470167160034, "rewards/rejected": -1.2285569906234741, "step": 6406 }, { "epoch": 0.75, "learning_rate": 7.755993858509506e-08, "logits/chosen": -2.742217540740967, "logits/rejected": -2.6435117721557617, "logps/chosen": -233.5757293701172, "logps/rejected": -204.10552978515625, "loss": 0.5591, "rewards/accuracies": 0.875, "rewards/chosen": -1.7199912071228027, "rewards/margins": 1.6379963159561157, "rewards/rejected": -3.357987403869629, "step": 6407 }, { "epoch": 0.75, "learning_rate": 7.75245069091768e-08, "logits/chosen": -2.321730136871338, "logits/rejected": -2.3149333000183105, "logps/chosen": -98.77872467041016, "logps/rejected": -178.81195068359375, "loss": 0.1743, "rewards/accuracies": 1.0, "rewards/chosen": 0.27893608808517456, "rewards/margins": 2.057197093963623, "rewards/rejected": -1.7782609462738037, "step": 6408 }, { "epoch": 0.75, "learning_rate": 7.748907523325852e-08, "logits/chosen": -2.0387425422668457, "logits/rejected": -2.279232978820801, "logps/chosen": -325.68994140625, "logps/rejected": -253.56680297851562, "loss": 0.309, "rewards/accuracies": 0.875, "rewards/chosen": -0.7646862268447876, "rewards/margins": 2.463909864425659, "rewards/rejected": -3.2285962104797363, "step": 6409 }, { "epoch": 0.75, "learning_rate": 7.745364355734027e-08, "logits/chosen": -2.392296314239502, "logits/rejected": -2.693480968475342, "logps/chosen": -294.962890625, "logps/rejected": -198.32708740234375, "loss": 0.4708, "rewards/accuracies": 0.75, "rewards/chosen": -0.5032957792282104, "rewards/margins": 0.8935557007789612, "rewards/rejected": -1.3968515396118164, "step": 6410 }, { "epoch": 0.75, "learning_rate": 7.741821188142199e-08, "logits/chosen": -2.5389504432678223, "logits/rejected": -2.484173536300659, "logps/chosen": -142.568603515625, "logps/rejected": -189.47430419921875, "loss": 0.6796, "rewards/accuracies": 0.875, "rewards/chosen": -0.7840370535850525, "rewards/margins": 1.5526070594787598, "rewards/rejected": -2.336644172668457, "step": 6411 }, { "epoch": 0.75, "learning_rate": 7.738278020550371e-08, "logits/chosen": -2.5575222969055176, "logits/rejected": -2.5970146656036377, "logps/chosen": -420.9395446777344, "logps/rejected": -323.59466552734375, "loss": 1.0558, "rewards/accuracies": 0.625, "rewards/chosen": -1.2047288417816162, "rewards/margins": 0.5521663427352905, "rewards/rejected": -1.7568954229354858, "step": 6412 }, { "epoch": 0.75, "learning_rate": 7.734734852958545e-08, "logits/chosen": -2.1747989654541016, "logits/rejected": -2.170938014984131, "logps/chosen": -249.02703857421875, "logps/rejected": -217.4600830078125, "loss": 0.602, "rewards/accuracies": 0.625, "rewards/chosen": -1.2801377773284912, "rewards/margins": 1.802008867263794, "rewards/rejected": -3.082146644592285, "step": 6413 }, { "epoch": 0.75, "learning_rate": 7.731191685366717e-08, "logits/chosen": -2.6273951530456543, "logits/rejected": -2.6874468326568604, "logps/chosen": -264.71185302734375, "logps/rejected": -173.08937072753906, "loss": 0.2643, "rewards/accuracies": 0.875, "rewards/chosen": -0.47029584646224976, "rewards/margins": 2.6012015342712402, "rewards/rejected": -3.0714974403381348, "step": 6414 }, { "epoch": 0.75, "learning_rate": 7.72764851777489e-08, "logits/chosen": -2.0252740383148193, "logits/rejected": -2.1635279655456543, "logps/chosen": -211.72442626953125, "logps/rejected": -281.4804382324219, "loss": 0.1336, "rewards/accuracies": 1.0, "rewards/chosen": -0.7684502005577087, "rewards/margins": 4.9723615646362305, "rewards/rejected": -5.740812301635742, "step": 6415 }, { "epoch": 0.75, "learning_rate": 7.724105350183064e-08, "logits/chosen": -1.6997333765029907, "logits/rejected": -1.5918294191360474, "logps/chosen": -450.3856201171875, "logps/rejected": -402.88134765625, "loss": 0.3319, "rewards/accuracies": 0.875, "rewards/chosen": -0.5704143047332764, "rewards/margins": 2.1217641830444336, "rewards/rejected": -2.69217848777771, "step": 6416 }, { "epoch": 0.75, "learning_rate": 7.720562182591236e-08, "logits/chosen": -2.4934637546539307, "logits/rejected": -2.5155117511749268, "logps/chosen": -360.11785888671875, "logps/rejected": -241.81321716308594, "loss": 0.8171, "rewards/accuracies": 0.625, "rewards/chosen": -0.428935170173645, "rewards/margins": 0.8101019859313965, "rewards/rejected": -1.2390371561050415, "step": 6417 }, { "epoch": 0.75, "learning_rate": 7.717019014999409e-08, "logits/chosen": -2.6665194034576416, "logits/rejected": -2.5604965686798096, "logps/chosen": -116.65167236328125, "logps/rejected": -168.26075744628906, "loss": 0.2769, "rewards/accuracies": 0.875, "rewards/chosen": -0.8757097125053406, "rewards/margins": 2.071082592010498, "rewards/rejected": -2.9467923641204834, "step": 6418 }, { "epoch": 0.75, "learning_rate": 7.713475847407582e-08, "logits/chosen": -2.6286911964416504, "logits/rejected": -2.559898614883423, "logps/chosen": -224.26950073242188, "logps/rejected": -270.5161437988281, "loss": 0.344, "rewards/accuracies": 0.875, "rewards/chosen": -0.19065624475479126, "rewards/margins": 1.9123352766036987, "rewards/rejected": -2.1029915809631348, "step": 6419 }, { "epoch": 0.75, "learning_rate": 7.709932679815754e-08, "logits/chosen": -2.9494924545288086, "logits/rejected": -2.9871902465820312, "logps/chosen": -389.91729736328125, "logps/rejected": -317.595703125, "loss": 0.3525, "rewards/accuracies": 0.875, "rewards/chosen": -1.1283293962478638, "rewards/margins": 2.1654815673828125, "rewards/rejected": -3.293811321258545, "step": 6420 }, { "epoch": 0.75, "learning_rate": 7.706389512223929e-08, "logits/chosen": -2.4265518188476562, "logits/rejected": -2.2038326263427734, "logps/chosen": -130.63023376464844, "logps/rejected": -225.9704132080078, "loss": 0.1912, "rewards/accuracies": 1.0, "rewards/chosen": -1.1925289630889893, "rewards/margins": 2.4350008964538574, "rewards/rejected": -3.627530097961426, "step": 6421 }, { "epoch": 0.75, "learning_rate": 7.702846344632101e-08, "logits/chosen": -2.337998390197754, "logits/rejected": -2.1579906940460205, "logps/chosen": -350.9261474609375, "logps/rejected": -283.45416259765625, "loss": 0.334, "rewards/accuracies": 0.875, "rewards/chosen": -1.5537731647491455, "rewards/margins": 2.0075488090515137, "rewards/rejected": -3.561321973800659, "step": 6422 }, { "epoch": 0.75, "learning_rate": 7.699303177040274e-08, "logits/chosen": -1.7990086078643799, "logits/rejected": -2.044326066970825, "logps/chosen": -346.77459716796875, "logps/rejected": -235.65347290039062, "loss": 0.2497, "rewards/accuracies": 0.875, "rewards/chosen": -0.7417292594909668, "rewards/margins": 2.1745195388793945, "rewards/rejected": -2.9162487983703613, "step": 6423 }, { "epoch": 0.75, "learning_rate": 7.695760009448446e-08, "logits/chosen": -2.50469970703125, "logits/rejected": -2.839393138885498, "logps/chosen": -178.4884490966797, "logps/rejected": -188.29721069335938, "loss": 0.7207, "rewards/accuracies": 0.5, "rewards/chosen": -2.0601043701171875, "rewards/margins": 0.38692495226860046, "rewards/rejected": -2.4470293521881104, "step": 6424 }, { "epoch": 0.75, "learning_rate": 7.69221684185662e-08, "logits/chosen": -2.257568120956421, "logits/rejected": -2.3210902214050293, "logps/chosen": -338.0167541503906, "logps/rejected": -240.28793334960938, "loss": 0.5292, "rewards/accuracies": 0.75, "rewards/chosen": -0.5461677312850952, "rewards/margins": 1.6609265804290771, "rewards/rejected": -2.2070939540863037, "step": 6425 }, { "epoch": 0.75, "learning_rate": 7.688673674264792e-08, "logits/chosen": -1.8621723651885986, "logits/rejected": -2.1463358402252197, "logps/chosen": -447.75689697265625, "logps/rejected": -313.0927429199219, "loss": 0.5084, "rewards/accuracies": 0.75, "rewards/chosen": -1.2757142782211304, "rewards/margins": 0.9229774475097656, "rewards/rejected": -2.1986918449401855, "step": 6426 }, { "epoch": 0.75, "learning_rate": 7.685130506672966e-08, "logits/chosen": -2.346403121948242, "logits/rejected": -2.356208324432373, "logps/chosen": -313.5272216796875, "logps/rejected": -254.33682250976562, "loss": 0.4091, "rewards/accuracies": 0.625, "rewards/chosen": -0.9742799997329712, "rewards/margins": 1.9327505826950073, "rewards/rejected": -2.9070305824279785, "step": 6427 }, { "epoch": 0.75, "learning_rate": 7.681587339081139e-08, "logits/chosen": -2.499162197113037, "logits/rejected": -2.7812552452087402, "logps/chosen": -125.84668731689453, "logps/rejected": -144.59274291992188, "loss": 0.306, "rewards/accuracies": 0.75, "rewards/chosen": -0.3935560882091522, "rewards/margins": 2.2597146034240723, "rewards/rejected": -2.653270721435547, "step": 6428 }, { "epoch": 0.75, "learning_rate": 7.678044171489311e-08, "logits/chosen": -2.6562695503234863, "logits/rejected": -2.642174482345581, "logps/chosen": -191.4093017578125, "logps/rejected": -149.96524047851562, "loss": 0.3989, "rewards/accuracies": 0.75, "rewards/chosen": -0.6402041912078857, "rewards/margins": 1.5469110012054443, "rewards/rejected": -2.18711519241333, "step": 6429 }, { "epoch": 0.75, "learning_rate": 7.674501003897484e-08, "logits/chosen": -2.7760348320007324, "logits/rejected": -2.6117167472839355, "logps/chosen": -338.2925720214844, "logps/rejected": -325.4942321777344, "loss": 0.2399, "rewards/accuracies": 0.875, "rewards/chosen": 0.0609721839427948, "rewards/margins": 2.663550853729248, "rewards/rejected": -2.602578639984131, "step": 6430 }, { "epoch": 0.75, "learning_rate": 7.670957836305657e-08, "logits/chosen": -2.445775270462036, "logits/rejected": -2.4182183742523193, "logps/chosen": -151.1334991455078, "logps/rejected": -150.6818389892578, "loss": 0.3694, "rewards/accuracies": 0.875, "rewards/chosen": -0.7647528648376465, "rewards/margins": 1.4589147567749023, "rewards/rejected": -2.2236673831939697, "step": 6431 }, { "epoch": 0.75, "learning_rate": 7.667414668713829e-08, "logits/chosen": -1.8199172019958496, "logits/rejected": -1.9016788005828857, "logps/chosen": -292.3561706542969, "logps/rejected": -237.92996215820312, "loss": 0.3644, "rewards/accuracies": 0.875, "rewards/chosen": -0.21134167909622192, "rewards/margins": 3.099377155303955, "rewards/rejected": -3.3107187747955322, "step": 6432 }, { "epoch": 0.75, "learning_rate": 7.663871501122004e-08, "logits/chosen": -1.7157964706420898, "logits/rejected": -2.1576313972473145, "logps/chosen": -525.5230102539062, "logps/rejected": -398.78155517578125, "loss": 0.0809, "rewards/accuracies": 1.0, "rewards/chosen": -0.654600203037262, "rewards/margins": 3.6912384033203125, "rewards/rejected": -4.34583854675293, "step": 6433 }, { "epoch": 0.75, "learning_rate": 7.660328333530176e-08, "logits/chosen": -2.387354612350464, "logits/rejected": -2.482710599899292, "logps/chosen": -183.86631774902344, "logps/rejected": -237.038818359375, "loss": 3.8958, "rewards/accuracies": 0.625, "rewards/chosen": -4.453646659851074, "rewards/margins": -2.402622699737549, "rewards/rejected": -2.0510237216949463, "step": 6434 }, { "epoch": 0.75, "learning_rate": 7.656785165938348e-08, "logits/chosen": -2.186370611190796, "logits/rejected": -2.560506820678711, "logps/chosen": -278.1424560546875, "logps/rejected": -204.10498046875, "loss": 0.3552, "rewards/accuracies": 0.875, "rewards/chosen": -0.2696784734725952, "rewards/margins": 2.2725183963775635, "rewards/rejected": -2.542196750640869, "step": 6435 }, { "epoch": 0.75, "learning_rate": 7.653241998346522e-08, "logits/chosen": -2.4624102115631104, "logits/rejected": -2.461409568786621, "logps/chosen": -470.4972839355469, "logps/rejected": -527.3855590820312, "loss": 0.226, "rewards/accuracies": 0.875, "rewards/chosen": -0.19695037603378296, "rewards/margins": 2.706575393676758, "rewards/rejected": -2.9035258293151855, "step": 6436 }, { "epoch": 0.75, "learning_rate": 7.649698830754694e-08, "logits/chosen": -2.0214905738830566, "logits/rejected": -2.026721715927124, "logps/chosen": -397.970947265625, "logps/rejected": -259.406494140625, "loss": 0.1021, "rewards/accuracies": 1.0, "rewards/chosen": 0.111090287566185, "rewards/margins": 2.7056374549865723, "rewards/rejected": -2.5945472717285156, "step": 6437 }, { "epoch": 0.75, "learning_rate": 7.646155663162866e-08, "logits/chosen": -2.2953052520751953, "logits/rejected": -2.2925941944122314, "logps/chosen": -282.2213134765625, "logps/rejected": -339.807373046875, "loss": 0.3698, "rewards/accuracies": 0.875, "rewards/chosen": -0.836524486541748, "rewards/margins": 2.700441837310791, "rewards/rejected": -3.536966323852539, "step": 6438 }, { "epoch": 0.75, "learning_rate": 7.642612495571041e-08, "logits/chosen": -2.29888916015625, "logits/rejected": -2.2941832542419434, "logps/chosen": -148.8206787109375, "logps/rejected": -221.51780700683594, "loss": 0.4291, "rewards/accuracies": 0.75, "rewards/chosen": -0.5345407128334045, "rewards/margins": 1.535630226135254, "rewards/rejected": -2.0701708793640137, "step": 6439 }, { "epoch": 0.75, "learning_rate": 7.639069327979213e-08, "logits/chosen": -2.4630770683288574, "logits/rejected": -2.332904815673828, "logps/chosen": -193.40936279296875, "logps/rejected": -259.61285400390625, "loss": 0.2297, "rewards/accuracies": 0.875, "rewards/chosen": -0.4938666820526123, "rewards/margins": 2.873671293258667, "rewards/rejected": -3.3675379753112793, "step": 6440 }, { "epoch": 0.75, "learning_rate": 7.635526160387385e-08, "logits/chosen": -2.464574098587036, "logits/rejected": -2.5408425331115723, "logps/chosen": -431.915771484375, "logps/rejected": -390.4723205566406, "loss": 0.3606, "rewards/accuracies": 0.875, "rewards/chosen": -0.05764177441596985, "rewards/margins": 1.3085925579071045, "rewards/rejected": -1.366234302520752, "step": 6441 }, { "epoch": 0.75, "learning_rate": 7.631982992795559e-08, "logits/chosen": -2.410548210144043, "logits/rejected": -2.1704158782958984, "logps/chosen": -212.90451049804688, "logps/rejected": -332.1941223144531, "loss": 0.2765, "rewards/accuracies": 0.75, "rewards/chosen": -1.0454578399658203, "rewards/margins": 3.0149343013763428, "rewards/rejected": -4.060391902923584, "step": 6442 }, { "epoch": 0.75, "learning_rate": 7.628439825203731e-08, "logits/chosen": -2.3366470336914062, "logits/rejected": -2.1366610527038574, "logps/chosen": -294.73858642578125, "logps/rejected": -366.5589599609375, "loss": 0.4117, "rewards/accuracies": 0.75, "rewards/chosen": -0.904717743396759, "rewards/margins": 2.1983988285064697, "rewards/rejected": -3.103116512298584, "step": 6443 }, { "epoch": 0.75, "learning_rate": 7.624896657611903e-08, "logits/chosen": -2.305436134338379, "logits/rejected": -2.45635724067688, "logps/chosen": -316.4794006347656, "logps/rejected": -295.93994140625, "loss": 0.533, "rewards/accuracies": 0.75, "rewards/chosen": -1.414783239364624, "rewards/margins": 1.143951654434204, "rewards/rejected": -2.558734893798828, "step": 6444 }, { "epoch": 0.75, "learning_rate": 7.621353490020078e-08, "logits/chosen": -2.4052436351776123, "logits/rejected": -2.41888427734375, "logps/chosen": -346.21722412109375, "logps/rejected": -365.48455810546875, "loss": 0.5674, "rewards/accuracies": 0.5, "rewards/chosen": -1.4423046112060547, "rewards/margins": 1.6221472024917603, "rewards/rejected": -3.0644521713256836, "step": 6445 }, { "epoch": 0.75, "learning_rate": 7.61781032242825e-08, "logits/chosen": -1.9310271739959717, "logits/rejected": -2.4453656673431396, "logps/chosen": -336.87689208984375, "logps/rejected": -218.16835021972656, "loss": 0.1854, "rewards/accuracies": 1.0, "rewards/chosen": -0.1601131111383438, "rewards/margins": 2.227339506149292, "rewards/rejected": -2.3874526023864746, "step": 6446 }, { "epoch": 0.75, "learning_rate": 7.614267154836424e-08, "logits/chosen": -2.6300199031829834, "logits/rejected": -2.7626144886016846, "logps/chosen": -210.91806030273438, "logps/rejected": -171.9352569580078, "loss": 0.1421, "rewards/accuracies": 1.0, "rewards/chosen": -0.5809270143508911, "rewards/margins": 2.3892831802368164, "rewards/rejected": -2.970210075378418, "step": 6447 }, { "epoch": 0.75, "learning_rate": 7.610723987244596e-08, "logits/chosen": -1.9328283071517944, "logits/rejected": -2.085153818130493, "logps/chosen": -388.857666015625, "logps/rejected": -439.230712890625, "loss": 0.2692, "rewards/accuracies": 0.75, "rewards/chosen": -0.22985783219337463, "rewards/margins": 2.691563367843628, "rewards/rejected": -2.921421527862549, "step": 6448 }, { "epoch": 0.75, "learning_rate": 7.607180819652768e-08, "logits/chosen": -1.8152461051940918, "logits/rejected": -2.160717725753784, "logps/chosen": -332.2568359375, "logps/rejected": -251.8614959716797, "loss": 0.1953, "rewards/accuracies": 1.0, "rewards/chosen": -0.5463205575942993, "rewards/margins": 2.6006813049316406, "rewards/rejected": -3.1470019817352295, "step": 6449 }, { "epoch": 0.75, "learning_rate": 7.603637652060942e-08, "logits/chosen": -2.3659584522247314, "logits/rejected": -2.3320837020874023, "logps/chosen": -256.12530517578125, "logps/rejected": -304.3139343261719, "loss": 0.0904, "rewards/accuracies": 1.0, "rewards/chosen": -0.5431153774261475, "rewards/margins": 2.831453800201416, "rewards/rejected": -3.3745691776275635, "step": 6450 }, { "epoch": 0.75, "learning_rate": 7.600094484469115e-08, "logits/chosen": -2.1952297687530518, "logits/rejected": -2.3484854698181152, "logps/chosen": -391.63580322265625, "logps/rejected": -348.87298583984375, "loss": 0.228, "rewards/accuracies": 1.0, "rewards/chosen": -0.3151053190231323, "rewards/margins": 2.619378089904785, "rewards/rejected": -2.934483289718628, "step": 6451 }, { "epoch": 0.75, "learning_rate": 7.596551316877288e-08, "logits/chosen": -1.8346936702728271, "logits/rejected": -1.9064598083496094, "logps/chosen": -361.21990966796875, "logps/rejected": -258.1582336425781, "loss": 0.7016, "rewards/accuracies": 0.625, "rewards/chosen": -0.6663395166397095, "rewards/margins": 0.3649175763130188, "rewards/rejected": -1.0312570333480835, "step": 6452 }, { "epoch": 0.75, "learning_rate": 7.593008149285461e-08, "logits/chosen": -2.4647676944732666, "logits/rejected": -2.600994110107422, "logps/chosen": -109.36909484863281, "logps/rejected": -134.81832885742188, "loss": 0.2538, "rewards/accuracies": 0.75, "rewards/chosen": -0.7016472220420837, "rewards/margins": 2.127311944961548, "rewards/rejected": -2.8289592266082764, "step": 6453 }, { "epoch": 0.75, "learning_rate": 7.589464981693633e-08, "logits/chosen": -2.1688389778137207, "logits/rejected": -2.4107134342193604, "logps/chosen": -330.3179016113281, "logps/rejected": -348.23565673828125, "loss": 0.5562, "rewards/accuracies": 0.625, "rewards/chosen": -0.66881263256073, "rewards/margins": 1.482250452041626, "rewards/rejected": -2.1510632038116455, "step": 6454 }, { "epoch": 0.75, "learning_rate": 7.585921814101806e-08, "logits/chosen": -1.9878334999084473, "logits/rejected": -2.5281176567077637, "logps/chosen": -367.12762451171875, "logps/rejected": -282.396484375, "loss": 0.6584, "rewards/accuracies": 0.75, "rewards/chosen": -1.1726101636886597, "rewards/margins": 1.6659901142120361, "rewards/rejected": -2.8386001586914062, "step": 6455 }, { "epoch": 0.75, "learning_rate": 7.58237864650998e-08, "logits/chosen": -2.494420289993286, "logits/rejected": -2.31838321685791, "logps/chosen": -149.25677490234375, "logps/rejected": -243.22354125976562, "loss": 0.37, "rewards/accuracies": 0.75, "rewards/chosen": -0.4915926456451416, "rewards/margins": 3.71329927444458, "rewards/rejected": -4.204892158508301, "step": 6456 }, { "epoch": 0.75, "learning_rate": 7.578835478918153e-08, "logits/chosen": -2.2464678287506104, "logits/rejected": -2.3555166721343994, "logps/chosen": -304.54730224609375, "logps/rejected": -330.56024169921875, "loss": 0.3068, "rewards/accuracies": 0.875, "rewards/chosen": -0.21251508593559265, "rewards/margins": 2.277697801589966, "rewards/rejected": -2.490212917327881, "step": 6457 }, { "epoch": 0.75, "learning_rate": 7.575292311326326e-08, "logits/chosen": -2.114300012588501, "logits/rejected": -2.2568600177764893, "logps/chosen": -394.3472595214844, "logps/rejected": -306.275634765625, "loss": 0.5306, "rewards/accuracies": 0.75, "rewards/chosen": -1.7053381204605103, "rewards/margins": 0.5187211632728577, "rewards/rejected": -2.2240591049194336, "step": 6458 }, { "epoch": 0.75, "learning_rate": 7.571749143734498e-08, "logits/chosen": -1.6774001121520996, "logits/rejected": -2.349780797958374, "logps/chosen": -380.2327880859375, "logps/rejected": -201.6908416748047, "loss": 1.0181, "rewards/accuracies": 0.625, "rewards/chosen": -1.2111611366271973, "rewards/margins": 0.721416711807251, "rewards/rejected": -1.9325779676437378, "step": 6459 }, { "epoch": 0.75, "learning_rate": 7.568205976142671e-08, "logits/chosen": -2.290689468383789, "logits/rejected": -2.560246706008911, "logps/chosen": -222.33877563476562, "logps/rejected": -211.14617919921875, "loss": 0.45, "rewards/accuracies": 0.75, "rewards/chosen": -0.624109148979187, "rewards/margins": 1.4265174865722656, "rewards/rejected": -2.050626516342163, "step": 6460 }, { "epoch": 0.75, "learning_rate": 7.564662808550843e-08, "logits/chosen": -2.7059998512268066, "logits/rejected": -2.596613883972168, "logps/chosen": -225.66851806640625, "logps/rejected": -224.09307861328125, "loss": 1.3102, "rewards/accuracies": 0.75, "rewards/chosen": -2.1883246898651123, "rewards/margins": 0.09823280572891235, "rewards/rejected": -2.28655743598938, "step": 6461 }, { "epoch": 0.75, "learning_rate": 7.561119640959018e-08, "logits/chosen": -2.6661267280578613, "logits/rejected": -2.522162914276123, "logps/chosen": -132.64132690429688, "logps/rejected": -168.64820861816406, "loss": 0.2069, "rewards/accuracies": 1.0, "rewards/chosen": -0.14964909851551056, "rewards/margins": 2.1558678150177, "rewards/rejected": -2.3055169582366943, "step": 6462 }, { "epoch": 0.75, "learning_rate": 7.55757647336719e-08, "logits/chosen": -2.6403417587280273, "logits/rejected": -2.68473482131958, "logps/chosen": -116.46459197998047, "logps/rejected": -186.0068359375, "loss": 0.1786, "rewards/accuracies": 1.0, "rewards/chosen": -0.29007911682128906, "rewards/margins": 2.6083991527557373, "rewards/rejected": -2.8984785079956055, "step": 6463 }, { "epoch": 0.75, "learning_rate": 7.554033305775363e-08, "logits/chosen": -2.2794063091278076, "logits/rejected": -1.8668127059936523, "logps/chosen": -220.65646362304688, "logps/rejected": -383.7742919921875, "loss": 0.1164, "rewards/accuracies": 1.0, "rewards/chosen": -0.4166777729988098, "rewards/margins": 4.237817287445068, "rewards/rejected": -4.654494762420654, "step": 6464 }, { "epoch": 0.75, "learning_rate": 7.550490138183536e-08, "logits/chosen": -2.253746747970581, "logits/rejected": -2.491044282913208, "logps/chosen": -283.82611083984375, "logps/rejected": -162.2842254638672, "loss": 0.7464, "rewards/accuracies": 0.875, "rewards/chosen": -1.5153189897537231, "rewards/margins": 0.38248109817504883, "rewards/rejected": -1.8977999687194824, "step": 6465 }, { "epoch": 0.75, "learning_rate": 7.546946970591708e-08, "logits/chosen": -2.421590566635132, "logits/rejected": -2.5192604064941406, "logps/chosen": -219.20285034179688, "logps/rejected": -299.8850402832031, "loss": 0.2194, "rewards/accuracies": 1.0, "rewards/chosen": -0.5302232503890991, "rewards/margins": 2.0074219703674316, "rewards/rejected": -2.5376453399658203, "step": 6466 }, { "epoch": 0.75, "learning_rate": 7.543403802999881e-08, "logits/chosen": -2.5977070331573486, "logits/rejected": -2.7787818908691406, "logps/chosen": -263.13397216796875, "logps/rejected": -258.5908203125, "loss": 0.4478, "rewards/accuracies": 0.75, "rewards/chosen": -1.6950018405914307, "rewards/margins": 2.1370444297790527, "rewards/rejected": -3.8320462703704834, "step": 6467 }, { "epoch": 0.75, "learning_rate": 7.539860635408055e-08, "logits/chosen": -2.4104084968566895, "logits/rejected": -2.624112844467163, "logps/chosen": -298.2568359375, "logps/rejected": -235.68045043945312, "loss": 0.3528, "rewards/accuracies": 0.75, "rewards/chosen": -0.14634397625923157, "rewards/margins": 2.625641107559204, "rewards/rejected": -2.7719852924346924, "step": 6468 }, { "epoch": 0.75, "learning_rate": 7.536317467816227e-08, "logits/chosen": -2.758101463317871, "logits/rejected": -2.6431703567504883, "logps/chosen": -203.6315155029297, "logps/rejected": -213.75733947753906, "loss": 0.4345, "rewards/accuracies": 0.625, "rewards/chosen": 0.1302729994058609, "rewards/margins": 2.419203281402588, "rewards/rejected": -2.2889304161071777, "step": 6469 }, { "epoch": 0.75, "learning_rate": 7.532774300224401e-08, "logits/chosen": -2.469794750213623, "logits/rejected": -2.5683364868164062, "logps/chosen": -205.78976440429688, "logps/rejected": -168.04039001464844, "loss": 0.6453, "rewards/accuracies": 0.625, "rewards/chosen": -1.6418776512145996, "rewards/margins": 1.5474066734313965, "rewards/rejected": -3.189284324645996, "step": 6470 }, { "epoch": 0.75, "learning_rate": 7.529231132632573e-08, "logits/chosen": -2.6583425998687744, "logits/rejected": -2.584652900695801, "logps/chosen": -189.99542236328125, "logps/rejected": -222.5227813720703, "loss": 0.4268, "rewards/accuracies": 0.75, "rewards/chosen": -1.4412004947662354, "rewards/margins": 1.4033842086791992, "rewards/rejected": -2.8445847034454346, "step": 6471 }, { "epoch": 0.75, "learning_rate": 7.525687965040745e-08, "logits/chosen": -1.9429912567138672, "logits/rejected": -2.0021865367889404, "logps/chosen": -382.5565185546875, "logps/rejected": -391.878662109375, "loss": 0.1949, "rewards/accuracies": 0.875, "rewards/chosen": -0.6977723240852356, "rewards/margins": 2.8193814754486084, "rewards/rejected": -3.517153739929199, "step": 6472 }, { "epoch": 0.75, "learning_rate": 7.522144797448919e-08, "logits/chosen": -1.653220295906067, "logits/rejected": -1.8512948751449585, "logps/chosen": -323.2754211425781, "logps/rejected": -263.09332275390625, "loss": 1.1886, "rewards/accuracies": 0.5, "rewards/chosen": -2.051288604736328, "rewards/margins": -0.09607559442520142, "rewards/rejected": -1.955213189125061, "step": 6473 }, { "epoch": 0.75, "learning_rate": 7.518601629857092e-08, "logits/chosen": -2.3776702880859375, "logits/rejected": -2.4249541759490967, "logps/chosen": -222.36526489257812, "logps/rejected": -221.4114990234375, "loss": 0.2417, "rewards/accuracies": 0.875, "rewards/chosen": 0.09754163771867752, "rewards/margins": 2.849870204925537, "rewards/rejected": -2.752328634262085, "step": 6474 }, { "epoch": 0.75, "learning_rate": 7.515058462265266e-08, "logits/chosen": -2.602116823196411, "logits/rejected": -2.6855695247650146, "logps/chosen": -186.8363494873047, "logps/rejected": -223.6293182373047, "loss": 0.3513, "rewards/accuracies": 0.875, "rewards/chosen": -0.9947190284729004, "rewards/margins": 1.5746976137161255, "rewards/rejected": -2.5694165229797363, "step": 6475 }, { "epoch": 0.75, "learning_rate": 7.511515294673438e-08, "logits/chosen": -2.1278703212738037, "logits/rejected": -1.8477816581726074, "logps/chosen": -187.11715698242188, "logps/rejected": -214.281982421875, "loss": 0.687, "rewards/accuracies": 0.625, "rewards/chosen": -1.0301921367645264, "rewards/margins": 1.824438214302063, "rewards/rejected": -2.854630470275879, "step": 6476 }, { "epoch": 0.75, "learning_rate": 7.50797212708161e-08, "logits/chosen": -2.460930824279785, "logits/rejected": -2.3468692302703857, "logps/chosen": -207.79335021972656, "logps/rejected": -185.47250366210938, "loss": 0.3304, "rewards/accuracies": 1.0, "rewards/chosen": -1.2658095359802246, "rewards/margins": 1.3391714096069336, "rewards/rejected": -2.604980945587158, "step": 6477 }, { "epoch": 0.75, "learning_rate": 7.504428959489782e-08, "logits/chosen": -2.850128650665283, "logits/rejected": -2.8688979148864746, "logps/chosen": -183.9222869873047, "logps/rejected": -138.09597778320312, "loss": 0.4537, "rewards/accuracies": 0.75, "rewards/chosen": -1.2794824838638306, "rewards/margins": 1.0173484086990356, "rewards/rejected": -2.296830654144287, "step": 6478 }, { "epoch": 0.75, "learning_rate": 7.500885791897956e-08, "logits/chosen": -2.754365921020508, "logits/rejected": -2.6961374282836914, "logps/chosen": -315.6653137207031, "logps/rejected": -355.5334167480469, "loss": 0.405, "rewards/accuracies": 0.875, "rewards/chosen": -0.6954002976417542, "rewards/margins": 3.1540236473083496, "rewards/rejected": -3.84942364692688, "step": 6479 }, { "epoch": 0.75, "learning_rate": 7.49734262430613e-08, "logits/chosen": -1.521028995513916, "logits/rejected": -2.0557851791381836, "logps/chosen": -678.4910888671875, "logps/rejected": -382.72503662109375, "loss": 0.7318, "rewards/accuracies": 0.75, "rewards/chosen": -0.8140151500701904, "rewards/margins": 0.8725492358207703, "rewards/rejected": -1.686564564704895, "step": 6480 }, { "epoch": 0.75, "learning_rate": 7.493799456714303e-08, "logits/chosen": -2.4138646125793457, "logits/rejected": -2.2300660610198975, "logps/chosen": -292.7588806152344, "logps/rejected": -373.501953125, "loss": 0.2313, "rewards/accuracies": 0.875, "rewards/chosen": 0.13556605577468872, "rewards/margins": 3.559673309326172, "rewards/rejected": -3.424107551574707, "step": 6481 }, { "epoch": 0.75, "learning_rate": 7.490256289122475e-08, "logits/chosen": -2.4603888988494873, "logits/rejected": -2.487705707550049, "logps/chosen": -320.8235778808594, "logps/rejected": -287.2161560058594, "loss": 0.164, "rewards/accuracies": 1.0, "rewards/chosen": -0.24512377381324768, "rewards/margins": 3.4962949752807617, "rewards/rejected": -3.7414186000823975, "step": 6482 }, { "epoch": 0.75, "learning_rate": 7.486713121530647e-08, "logits/chosen": -2.47711181640625, "logits/rejected": -2.6055920124053955, "logps/chosen": -309.1855773925781, "logps/rejected": -372.4263610839844, "loss": 0.3376, "rewards/accuracies": 0.75, "rewards/chosen": -0.9842288494110107, "rewards/margins": 2.6509792804718018, "rewards/rejected": -3.6352081298828125, "step": 6483 }, { "epoch": 0.75, "learning_rate": 7.483169953938821e-08, "logits/chosen": -2.5217947959899902, "logits/rejected": -2.4774439334869385, "logps/chosen": -332.9872131347656, "logps/rejected": -305.52606201171875, "loss": 0.3967, "rewards/accuracies": 0.75, "rewards/chosen": -1.2651970386505127, "rewards/margins": 1.6915512084960938, "rewards/rejected": -2.9567484855651855, "step": 6484 }, { "epoch": 0.75, "learning_rate": 7.479626786346995e-08, "logits/chosen": -2.7126035690307617, "logits/rejected": -2.6986136436462402, "logps/chosen": -211.6145477294922, "logps/rejected": -183.21795654296875, "loss": 0.2613, "rewards/accuracies": 0.875, "rewards/chosen": -0.6984207630157471, "rewards/margins": 1.9462628364562988, "rewards/rejected": -2.644683837890625, "step": 6485 }, { "epoch": 0.75, "learning_rate": 7.476083618755167e-08, "logits/chosen": -2.9035329818725586, "logits/rejected": -2.9082236289978027, "logps/chosen": -169.04530334472656, "logps/rejected": -215.0203857421875, "loss": 0.3599, "rewards/accuracies": 0.875, "rewards/chosen": -0.44306913018226624, "rewards/margins": 1.5387532711029053, "rewards/rejected": -1.9818223714828491, "step": 6486 }, { "epoch": 0.75, "learning_rate": 7.47254045116334e-08, "logits/chosen": -2.186835527420044, "logits/rejected": -1.8564000129699707, "logps/chosen": -232.23985290527344, "logps/rejected": -279.71630859375, "loss": 0.5581, "rewards/accuracies": 0.5, "rewards/chosen": -1.3683847188949585, "rewards/margins": 1.2653347253799438, "rewards/rejected": -2.6337194442749023, "step": 6487 }, { "epoch": 0.75, "learning_rate": 7.468997283571512e-08, "logits/chosen": -2.484494209289551, "logits/rejected": -2.596965789794922, "logps/chosen": -171.68630981445312, "logps/rejected": -259.25433349609375, "loss": 0.3942, "rewards/accuracies": 0.625, "rewards/chosen": -0.8313173055648804, "rewards/margins": 2.4513344764709473, "rewards/rejected": -3.282651901245117, "step": 6488 }, { "epoch": 0.75, "learning_rate": 7.465454115979685e-08, "logits/chosen": -1.6246484518051147, "logits/rejected": -2.0188868045806885, "logps/chosen": -340.76995849609375, "logps/rejected": -243.5684356689453, "loss": 0.2812, "rewards/accuracies": 0.875, "rewards/chosen": -0.6674381494522095, "rewards/margins": 1.9222631454467773, "rewards/rejected": -2.5897014141082764, "step": 6489 }, { "epoch": 0.75, "learning_rate": 7.461910948387858e-08, "logits/chosen": -2.3743112087249756, "logits/rejected": -2.3970508575439453, "logps/chosen": -313.0654602050781, "logps/rejected": -199.01651000976562, "loss": 0.4237, "rewards/accuracies": 0.75, "rewards/chosen": -0.40202367305755615, "rewards/margins": 1.7450857162475586, "rewards/rejected": -2.147109270095825, "step": 6490 }, { "epoch": 0.76, "learning_rate": 7.458367780796032e-08, "logits/chosen": -2.2585835456848145, "logits/rejected": -2.320622444152832, "logps/chosen": -270.8371276855469, "logps/rejected": -278.6273498535156, "loss": 0.479, "rewards/accuracies": 0.625, "rewards/chosen": -0.08981799334287643, "rewards/margins": 2.071437358856201, "rewards/rejected": -2.161255359649658, "step": 6491 }, { "epoch": 0.76, "learning_rate": 7.454824613204204e-08, "logits/chosen": -2.8580784797668457, "logits/rejected": -2.9435601234436035, "logps/chosen": -288.48907470703125, "logps/rejected": -270.0225830078125, "loss": 0.5848, "rewards/accuracies": 0.75, "rewards/chosen": -0.9860184788703918, "rewards/margins": 1.1633367538452148, "rewards/rejected": -2.149355173110962, "step": 6492 }, { "epoch": 0.76, "learning_rate": 7.451281445612378e-08, "logits/chosen": -2.5036725997924805, "logits/rejected": -2.624281883239746, "logps/chosen": -289.745361328125, "logps/rejected": -256.320068359375, "loss": 0.3267, "rewards/accuracies": 0.75, "rewards/chosen": -1.1172726154327393, "rewards/margins": 2.3018720149993896, "rewards/rejected": -3.41914439201355, "step": 6493 }, { "epoch": 0.76, "learning_rate": 7.44773827802055e-08, "logits/chosen": -2.2382712364196777, "logits/rejected": -2.2557499408721924, "logps/chosen": -273.10577392578125, "logps/rejected": -217.0933074951172, "loss": 0.268, "rewards/accuracies": 0.875, "rewards/chosen": -0.48139986395835876, "rewards/margins": 2.766129970550537, "rewards/rejected": -3.2475295066833496, "step": 6494 }, { "epoch": 0.76, "learning_rate": 7.444195110428723e-08, "logits/chosen": -2.5953822135925293, "logits/rejected": -2.470294952392578, "logps/chosen": -170.13339233398438, "logps/rejected": -200.04359436035156, "loss": 0.269, "rewards/accuracies": 1.0, "rewards/chosen": -0.9646235108375549, "rewards/margins": 2.2494678497314453, "rewards/rejected": -3.2140913009643555, "step": 6495 }, { "epoch": 0.76, "learning_rate": 7.440651942836895e-08, "logits/chosen": -2.607260227203369, "logits/rejected": -2.6524176597595215, "logps/chosen": -235.563232421875, "logps/rejected": -363.0798034667969, "loss": 0.3737, "rewards/accuracies": 0.875, "rewards/chosen": -0.7814541459083557, "rewards/margins": 2.5795629024505615, "rewards/rejected": -3.3610172271728516, "step": 6496 }, { "epoch": 0.76, "learning_rate": 7.437108775245069e-08, "logits/chosen": -2.4380621910095215, "logits/rejected": -2.32533597946167, "logps/chosen": -258.1275634765625, "logps/rejected": -243.79844665527344, "loss": 0.7227, "rewards/accuracies": 0.75, "rewards/chosen": -0.98960280418396, "rewards/margins": 1.4333994388580322, "rewards/rejected": -2.423002004623413, "step": 6497 }, { "epoch": 0.76, "learning_rate": 7.433565607653241e-08, "logits/chosen": -2.4614648818969727, "logits/rejected": -2.4190120697021484, "logps/chosen": -252.6791229248047, "logps/rejected": -284.16650390625, "loss": 0.154, "rewards/accuracies": 1.0, "rewards/chosen": -0.24404442310333252, "rewards/margins": 4.167491912841797, "rewards/rejected": -4.41153621673584, "step": 6498 }, { "epoch": 0.76, "learning_rate": 7.430022440061415e-08, "logits/chosen": -1.7292258739471436, "logits/rejected": -1.9843504428863525, "logps/chosen": -441.6611633300781, "logps/rejected": -319.1882019042969, "loss": 0.6822, "rewards/accuracies": 0.75, "rewards/chosen": -1.3567826747894287, "rewards/margins": 1.640333890914917, "rewards/rejected": -2.997117042541504, "step": 6499 }, { "epoch": 0.76, "learning_rate": 7.426479272469587e-08, "logits/chosen": -2.569911003112793, "logits/rejected": -2.304762125015259, "logps/chosen": -259.9080505371094, "logps/rejected": -316.58984375, "loss": 0.6812, "rewards/accuracies": 0.625, "rewards/chosen": -1.2681901454925537, "rewards/margins": 0.9844961166381836, "rewards/rejected": -2.2526865005493164, "step": 6500 }, { "epoch": 0.76, "learning_rate": 7.42293610487776e-08, "logits/chosen": -1.911576271057129, "logits/rejected": -2.0494470596313477, "logps/chosen": -334.9373474121094, "logps/rejected": -356.62994384765625, "loss": 0.162, "rewards/accuracies": 1.0, "rewards/chosen": -0.680385172367096, "rewards/margins": 2.8933048248291016, "rewards/rejected": -3.573690176010132, "step": 6501 }, { "epoch": 0.76, "learning_rate": 7.419392937285934e-08, "logits/chosen": -2.306076765060425, "logits/rejected": -2.262850046157837, "logps/chosen": -252.64849853515625, "logps/rejected": -257.77490234375, "loss": 0.4245, "rewards/accuracies": 0.625, "rewards/chosen": -0.955533504486084, "rewards/margins": 1.8460835218429565, "rewards/rejected": -2.801616907119751, "step": 6502 }, { "epoch": 0.76, "learning_rate": 7.415849769694106e-08, "logits/chosen": -2.7781872749328613, "logits/rejected": -3.105736255645752, "logps/chosen": -262.8927001953125, "logps/rejected": -273.08935546875, "loss": 0.8175, "rewards/accuracies": 0.625, "rewards/chosen": -1.3758583068847656, "rewards/margins": 1.2751716375350952, "rewards/rejected": -2.651029586791992, "step": 6503 }, { "epoch": 0.76, "learning_rate": 7.412306602102278e-08, "logits/chosen": -1.7319554090499878, "logits/rejected": -1.9258675575256348, "logps/chosen": -290.14031982421875, "logps/rejected": -251.9043426513672, "loss": 0.663, "rewards/accuracies": 0.75, "rewards/chosen": -1.456110954284668, "rewards/margins": 1.7998138666152954, "rewards/rejected": -3.255924940109253, "step": 6504 }, { "epoch": 0.76, "learning_rate": 7.408763434510452e-08, "logits/chosen": -2.0228519439697266, "logits/rejected": -2.0797502994537354, "logps/chosen": -356.51446533203125, "logps/rejected": -378.6701354980469, "loss": 0.162, "rewards/accuracies": 1.0, "rewards/chosen": -1.1638134717941284, "rewards/margins": 2.167304277420044, "rewards/rejected": -3.331117630004883, "step": 6505 }, { "epoch": 0.76, "learning_rate": 7.405220266918624e-08, "logits/chosen": -2.0093495845794678, "logits/rejected": -2.029608726501465, "logps/chosen": -322.6050720214844, "logps/rejected": -372.1376647949219, "loss": 0.45, "rewards/accuracies": 0.625, "rewards/chosen": -1.5367717742919922, "rewards/margins": 2.23799991607666, "rewards/rejected": -3.7747714519500732, "step": 6506 }, { "epoch": 0.76, "learning_rate": 7.401677099326798e-08, "logits/chosen": -2.650331497192383, "logits/rejected": -2.6581790447235107, "logps/chosen": -207.1827392578125, "logps/rejected": -280.2098388671875, "loss": 0.2948, "rewards/accuracies": 0.875, "rewards/chosen": -0.0037756264209747314, "rewards/margins": 1.6332275867462158, "rewards/rejected": -1.6370033025741577, "step": 6507 }, { "epoch": 0.76, "learning_rate": 7.398133931734971e-08, "logits/chosen": -2.2206897735595703, "logits/rejected": -2.142420768737793, "logps/chosen": -214.672607421875, "logps/rejected": -295.1993713378906, "loss": 0.2733, "rewards/accuracies": 0.75, "rewards/chosen": -1.1452040672302246, "rewards/margins": 2.333329200744629, "rewards/rejected": -3.4785332679748535, "step": 6508 }, { "epoch": 0.76, "learning_rate": 7.394590764143143e-08, "logits/chosen": -1.8460166454315186, "logits/rejected": -2.1236777305603027, "logps/chosen": -263.8135986328125, "logps/rejected": -213.716796875, "loss": 0.3003, "rewards/accuracies": 0.875, "rewards/chosen": -0.1670980155467987, "rewards/margins": 1.4465421438217163, "rewards/rejected": -1.6136400699615479, "step": 6509 }, { "epoch": 0.76, "learning_rate": 7.391047596551317e-08, "logits/chosen": -1.7671496868133545, "logits/rejected": -1.7532504796981812, "logps/chosen": -343.1138916015625, "logps/rejected": -382.9918518066406, "loss": 0.4533, "rewards/accuracies": 0.75, "rewards/chosen": -0.8129963874816895, "rewards/margins": 0.8321951627731323, "rewards/rejected": -1.6451914310455322, "step": 6510 }, { "epoch": 0.76, "learning_rate": 7.387504428959489e-08, "logits/chosen": -2.6106786727905273, "logits/rejected": -2.6972742080688477, "logps/chosen": -196.06707763671875, "logps/rejected": -237.4906768798828, "loss": 0.1733, "rewards/accuracies": 0.875, "rewards/chosen": -1.5211362838745117, "rewards/margins": 3.398703098297119, "rewards/rejected": -4.919839859008789, "step": 6511 }, { "epoch": 0.76, "learning_rate": 7.383961261367663e-08, "logits/chosen": -2.4326791763305664, "logits/rejected": -2.3134677410125732, "logps/chosen": -170.42840576171875, "logps/rejected": -321.0561828613281, "loss": 0.1045, "rewards/accuracies": 1.0, "rewards/chosen": -0.6179792284965515, "rewards/margins": 3.7009851932525635, "rewards/rejected": -4.31896448135376, "step": 6512 }, { "epoch": 0.76, "learning_rate": 7.380418093775835e-08, "logits/chosen": -2.6463255882263184, "logits/rejected": -2.819624900817871, "logps/chosen": -233.36697387695312, "logps/rejected": -218.2624969482422, "loss": 1.3446, "rewards/accuracies": 0.375, "rewards/chosen": -1.8299775123596191, "rewards/margins": 0.5131313800811768, "rewards/rejected": -2.343108892440796, "step": 6513 }, { "epoch": 0.76, "learning_rate": 7.376874926184009e-08, "logits/chosen": -2.1878550052642822, "logits/rejected": -2.1497457027435303, "logps/chosen": -319.4171447753906, "logps/rejected": -329.67529296875, "loss": 0.9727, "rewards/accuracies": 0.75, "rewards/chosen": -1.5298254489898682, "rewards/margins": 0.49959075450897217, "rewards/rejected": -2.029416084289551, "step": 6514 }, { "epoch": 0.76, "learning_rate": 7.373331758592181e-08, "logits/chosen": -1.9755208492279053, "logits/rejected": -1.9579623937606812, "logps/chosen": -257.964599609375, "logps/rejected": -247.97134399414062, "loss": 0.2104, "rewards/accuracies": 1.0, "rewards/chosen": -0.7390990853309631, "rewards/margins": 2.092090129852295, "rewards/rejected": -2.8311893939971924, "step": 6515 }, { "epoch": 0.76, "learning_rate": 7.369788591000354e-08, "logits/chosen": -2.7085702419281006, "logits/rejected": -2.7637767791748047, "logps/chosen": -134.40591430664062, "logps/rejected": -186.67678833007812, "loss": 0.3419, "rewards/accuracies": 1.0, "rewards/chosen": -1.6995807886123657, "rewards/margins": 2.4577202796936035, "rewards/rejected": -4.157301425933838, "step": 6516 }, { "epoch": 0.76, "learning_rate": 7.366245423408526e-08, "logits/chosen": -2.745403528213501, "logits/rejected": -2.747579574584961, "logps/chosen": -172.17137145996094, "logps/rejected": -264.71221923828125, "loss": 0.1932, "rewards/accuracies": 0.875, "rewards/chosen": -1.5493249893188477, "rewards/margins": 3.3155744075775146, "rewards/rejected": -4.864899158477783, "step": 6517 }, { "epoch": 0.76, "learning_rate": 7.3627022558167e-08, "logits/chosen": -2.5278282165527344, "logits/rejected": -2.596538543701172, "logps/chosen": -277.4001159667969, "logps/rejected": -209.38958740234375, "loss": 1.3385, "rewards/accuracies": 0.5, "rewards/chosen": -2.1616735458374023, "rewards/margins": -0.027311623096466064, "rewards/rejected": -2.134361743927002, "step": 6518 }, { "epoch": 0.76, "learning_rate": 7.359159088224874e-08, "logits/chosen": -2.228825092315674, "logits/rejected": -2.0385007858276367, "logps/chosen": -256.73638916015625, "logps/rejected": -294.7371520996094, "loss": 0.38, "rewards/accuracies": 0.875, "rewards/chosen": -1.4170072078704834, "rewards/margins": 1.9229167699813843, "rewards/rejected": -3.339923858642578, "step": 6519 }, { "epoch": 0.76, "learning_rate": 7.355615920633046e-08, "logits/chosen": -2.2051901817321777, "logits/rejected": -2.163444995880127, "logps/chosen": -182.35267639160156, "logps/rejected": -273.6107482910156, "loss": 0.2058, "rewards/accuracies": 1.0, "rewards/chosen": -2.648921489715576, "rewards/margins": 1.814222812652588, "rewards/rejected": -4.463144302368164, "step": 6520 }, { "epoch": 0.76, "learning_rate": 7.352072753041218e-08, "logits/chosen": -1.9406752586364746, "logits/rejected": -2.0569047927856445, "logps/chosen": -507.5339660644531, "logps/rejected": -287.4808349609375, "loss": 0.2596, "rewards/accuracies": 0.875, "rewards/chosen": -0.2752528786659241, "rewards/margins": 2.776866912841797, "rewards/rejected": -3.052119731903076, "step": 6521 }, { "epoch": 0.76, "learning_rate": 7.348529585449392e-08, "logits/chosen": -2.3453402519226074, "logits/rejected": -2.4058632850646973, "logps/chosen": -265.2162780761719, "logps/rejected": -269.7513122558594, "loss": 0.0979, "rewards/accuracies": 1.0, "rewards/chosen": 0.3292612135410309, "rewards/margins": 3.143617630004883, "rewards/rejected": -2.814356565475464, "step": 6522 }, { "epoch": 0.76, "learning_rate": 7.344986417857564e-08, "logits/chosen": -1.975771188735962, "logits/rejected": -1.5246907472610474, "logps/chosen": -294.74359130859375, "logps/rejected": -443.364013671875, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": -0.06991385668516159, "rewards/margins": 4.122983932495117, "rewards/rejected": -4.192897796630859, "step": 6523 }, { "epoch": 0.76, "learning_rate": 7.341443250265737e-08, "logits/chosen": -2.279090166091919, "logits/rejected": -2.301064968109131, "logps/chosen": -298.11285400390625, "logps/rejected": -366.35723876953125, "loss": 0.4457, "rewards/accuracies": 0.75, "rewards/chosen": -0.5600953102111816, "rewards/margins": 2.197056531906128, "rewards/rejected": -2.7571516036987305, "step": 6524 }, { "epoch": 0.76, "learning_rate": 7.337900082673911e-08, "logits/chosen": -2.778961658477783, "logits/rejected": -2.559969425201416, "logps/chosen": -165.39889526367188, "logps/rejected": -429.42169189453125, "loss": 0.2889, "rewards/accuracies": 0.875, "rewards/chosen": -1.0781649351119995, "rewards/margins": 2.2567379474639893, "rewards/rejected": -3.3349030017852783, "step": 6525 }, { "epoch": 0.76, "learning_rate": 7.334356915082083e-08, "logits/chosen": -2.564197540283203, "logits/rejected": -2.8193860054016113, "logps/chosen": -188.641845703125, "logps/rejected": -189.2569580078125, "loss": 0.4698, "rewards/accuracies": 0.625, "rewards/chosen": -0.7985535860061646, "rewards/margins": 1.819006323814392, "rewards/rejected": -2.6175599098205566, "step": 6526 }, { "epoch": 0.76, "learning_rate": 7.330813747490255e-08, "logits/chosen": -2.369932174682617, "logits/rejected": -2.3503100872039795, "logps/chosen": -188.45352172851562, "logps/rejected": -193.16091918945312, "loss": 0.0682, "rewards/accuracies": 1.0, "rewards/chosen": -0.7510823011398315, "rewards/margins": 3.372767448425293, "rewards/rejected": -4.123849868774414, "step": 6527 }, { "epoch": 0.76, "learning_rate": 7.327270579898429e-08, "logits/chosen": -2.2970075607299805, "logits/rejected": -2.196621894836426, "logps/chosen": -239.46925354003906, "logps/rejected": -288.114501953125, "loss": 0.1946, "rewards/accuracies": 0.875, "rewards/chosen": -0.7004674077033997, "rewards/margins": 3.2607064247131348, "rewards/rejected": -3.9611740112304688, "step": 6528 }, { "epoch": 0.76, "learning_rate": 7.323727412306602e-08, "logits/chosen": -2.274451732635498, "logits/rejected": -1.9620574712753296, "logps/chosen": -259.2524108886719, "logps/rejected": -325.619384765625, "loss": 0.1028, "rewards/accuracies": 1.0, "rewards/chosen": -1.021093726158142, "rewards/margins": 3.362720251083374, "rewards/rejected": -4.383813858032227, "step": 6529 }, { "epoch": 0.76, "learning_rate": 7.320184244714775e-08, "logits/chosen": -2.456428050994873, "logits/rejected": -2.4305083751678467, "logps/chosen": -235.43374633789062, "logps/rejected": -337.66326904296875, "loss": 0.172, "rewards/accuracies": 1.0, "rewards/chosen": -0.1129276305437088, "rewards/margins": 3.2768592834472656, "rewards/rejected": -3.389786720275879, "step": 6530 }, { "epoch": 0.76, "learning_rate": 7.316641077122948e-08, "logits/chosen": -2.3559694290161133, "logits/rejected": -2.574814796447754, "logps/chosen": -359.7582092285156, "logps/rejected": -262.01507568359375, "loss": 0.2465, "rewards/accuracies": 0.875, "rewards/chosen": -0.34616971015930176, "rewards/margins": 2.019064426422119, "rewards/rejected": -2.365234136581421, "step": 6531 }, { "epoch": 0.76, "learning_rate": 7.31309790953112e-08, "logits/chosen": -1.6165971755981445, "logits/rejected": -2.1380906105041504, "logps/chosen": -400.5142822265625, "logps/rejected": -342.49346923828125, "loss": 0.089, "rewards/accuracies": 1.0, "rewards/chosen": -0.6343805193901062, "rewards/margins": 3.116079092025757, "rewards/rejected": -3.750459671020508, "step": 6532 }, { "epoch": 0.76, "learning_rate": 7.309554741939292e-08, "logits/chosen": -2.138234853744507, "logits/rejected": -2.3391356468200684, "logps/chosen": -272.2114562988281, "logps/rejected": -210.33538818359375, "loss": 0.7787, "rewards/accuracies": 0.625, "rewards/chosen": -1.0656989812850952, "rewards/margins": 0.9475712180137634, "rewards/rejected": -2.013270139694214, "step": 6533 }, { "epoch": 0.76, "learning_rate": 7.306011574347466e-08, "logits/chosen": -2.5343878269195557, "logits/rejected": -2.4906201362609863, "logps/chosen": -287.34747314453125, "logps/rejected": -251.7882537841797, "loss": 0.2721, "rewards/accuracies": 0.875, "rewards/chosen": -0.8745272159576416, "rewards/margins": 2.5609042644500732, "rewards/rejected": -3.435431480407715, "step": 6534 }, { "epoch": 0.76, "learning_rate": 7.30246840675564e-08, "logits/chosen": -1.9182782173156738, "logits/rejected": -2.0620129108428955, "logps/chosen": -251.80648803710938, "logps/rejected": -338.21917724609375, "loss": 0.1969, "rewards/accuracies": 1.0, "rewards/chosen": -0.6208652853965759, "rewards/margins": 2.8699145317077637, "rewards/rejected": -3.4907798767089844, "step": 6535 }, { "epoch": 0.76, "learning_rate": 7.298925239163812e-08, "logits/chosen": -2.469118118286133, "logits/rejected": -2.538001298904419, "logps/chosen": -274.2197265625, "logps/rejected": -211.62066650390625, "loss": 0.1373, "rewards/accuracies": 1.0, "rewards/chosen": -0.5934911966323853, "rewards/margins": 3.318878650665283, "rewards/rejected": -3.912369728088379, "step": 6536 }, { "epoch": 0.76, "learning_rate": 7.295382071571985e-08, "logits/chosen": -2.654019355773926, "logits/rejected": -2.769404411315918, "logps/chosen": -339.317626953125, "logps/rejected": -276.64801025390625, "loss": 0.1581, "rewards/accuracies": 1.0, "rewards/chosen": -0.7511558532714844, "rewards/margins": 2.7480406761169434, "rewards/rejected": -3.4991965293884277, "step": 6537 }, { "epoch": 0.76, "learning_rate": 7.291838903980158e-08, "logits/chosen": -2.148364543914795, "logits/rejected": -2.12152099609375, "logps/chosen": -414.82781982421875, "logps/rejected": -387.35693359375, "loss": 0.1954, "rewards/accuracies": 0.875, "rewards/chosen": -1.724144458770752, "rewards/margins": 2.728975296020508, "rewards/rejected": -4.45311975479126, "step": 6538 }, { "epoch": 0.76, "learning_rate": 7.288295736388331e-08, "logits/chosen": -1.9216448068618774, "logits/rejected": -1.9350512027740479, "logps/chosen": -268.9171142578125, "logps/rejected": -289.7937316894531, "loss": 0.6139, "rewards/accuracies": 0.5, "rewards/chosen": -1.0308666229248047, "rewards/margins": 1.491780400276184, "rewards/rejected": -2.522646903991699, "step": 6539 }, { "epoch": 0.76, "learning_rate": 7.284752568796503e-08, "logits/chosen": -2.0315539836883545, "logits/rejected": -2.1437599658966064, "logps/chosen": -549.8056640625, "logps/rejected": -399.4963073730469, "loss": 0.6171, "rewards/accuracies": 0.75, "rewards/chosen": -0.253898561000824, "rewards/margins": 2.779489755630493, "rewards/rejected": -3.033388137817383, "step": 6540 }, { "epoch": 0.76, "learning_rate": 7.281209401204677e-08, "logits/chosen": -2.057417154312134, "logits/rejected": -2.329926013946533, "logps/chosen": -170.92593383789062, "logps/rejected": -102.81556701660156, "loss": 3.8151, "rewards/accuracies": 0.625, "rewards/chosen": -3.620354175567627, "rewards/margins": -2.6292402744293213, "rewards/rejected": -0.99111407995224, "step": 6541 }, { "epoch": 0.76, "learning_rate": 7.277666233612849e-08, "logits/chosen": -2.69047474861145, "logits/rejected": -2.429795742034912, "logps/chosen": -130.8695831298828, "logps/rejected": -271.32086181640625, "loss": 0.2225, "rewards/accuracies": 0.875, "rewards/chosen": -0.13336829841136932, "rewards/margins": 2.939639091491699, "rewards/rejected": -3.073007583618164, "step": 6542 }, { "epoch": 0.76, "learning_rate": 7.274123066021023e-08, "logits/chosen": -2.475344657897949, "logits/rejected": -2.7696638107299805, "logps/chosen": -365.8242492675781, "logps/rejected": -272.079833984375, "loss": 0.2258, "rewards/accuracies": 1.0, "rewards/chosen": -0.5095143914222717, "rewards/margins": 3.026102304458618, "rewards/rejected": -3.535616874694824, "step": 6543 }, { "epoch": 0.76, "learning_rate": 7.270579898429195e-08, "logits/chosen": -2.818424701690674, "logits/rejected": -2.856477737426758, "logps/chosen": -165.74758911132812, "logps/rejected": -210.67074584960938, "loss": 0.1723, "rewards/accuracies": 1.0, "rewards/chosen": -1.248735785484314, "rewards/margins": 4.049625873565674, "rewards/rejected": -5.298361301422119, "step": 6544 }, { "epoch": 0.76, "learning_rate": 7.267036730837368e-08, "logits/chosen": -2.2270748615264893, "logits/rejected": -2.580418348312378, "logps/chosen": -287.2328186035156, "logps/rejected": -143.59609985351562, "loss": 0.6907, "rewards/accuracies": 0.625, "rewards/chosen": -0.9970101714134216, "rewards/margins": 1.3176482915878296, "rewards/rejected": -2.3146584033966064, "step": 6545 }, { "epoch": 0.76, "learning_rate": 7.263493563245542e-08, "logits/chosen": -2.325033664703369, "logits/rejected": -2.0952320098876953, "logps/chosen": -222.8415069580078, "logps/rejected": -271.2603454589844, "loss": 0.3789, "rewards/accuracies": 0.625, "rewards/chosen": -0.636660099029541, "rewards/margins": 2.155402660369873, "rewards/rejected": -2.792062997817993, "step": 6546 }, { "epoch": 0.76, "learning_rate": 7.259950395653714e-08, "logits/chosen": -2.231644630432129, "logits/rejected": -2.341620683670044, "logps/chosen": -233.1407012939453, "logps/rejected": -157.40293884277344, "loss": 0.3366, "rewards/accuracies": 0.875, "rewards/chosen": -1.1038765907287598, "rewards/margins": 1.2512096166610718, "rewards/rejected": -2.355086326599121, "step": 6547 }, { "epoch": 0.76, "learning_rate": 7.256407228061888e-08, "logits/chosen": -2.0113120079040527, "logits/rejected": -1.5310449600219727, "logps/chosen": -179.986328125, "logps/rejected": -297.4427490234375, "loss": 0.1951, "rewards/accuracies": 1.0, "rewards/chosen": -0.7868562340736389, "rewards/margins": 1.7069090604782104, "rewards/rejected": -2.493765354156494, "step": 6548 }, { "epoch": 0.76, "learning_rate": 7.25286406047006e-08, "logits/chosen": -2.325350284576416, "logits/rejected": -2.3683652877807617, "logps/chosen": -137.22653198242188, "logps/rejected": -226.56143188476562, "loss": 0.2318, "rewards/accuracies": 0.875, "rewards/chosen": -0.8646396398544312, "rewards/margins": 3.303483724594116, "rewards/rejected": -4.168123245239258, "step": 6549 }, { "epoch": 0.76, "learning_rate": 7.249320892878232e-08, "logits/chosen": -2.4055593013763428, "logits/rejected": -2.288334846496582, "logps/chosen": -253.2880859375, "logps/rejected": -185.30796813964844, "loss": 0.713, "rewards/accuracies": 0.875, "rewards/chosen": -1.3272736072540283, "rewards/margins": 0.6200856566429138, "rewards/rejected": -1.9473592042922974, "step": 6550 }, { "epoch": 0.76, "learning_rate": 7.245777725286406e-08, "logits/chosen": -2.0152196884155273, "logits/rejected": -2.1809940338134766, "logps/chosen": -271.6258850097656, "logps/rejected": -233.04685974121094, "loss": 0.5223, "rewards/accuracies": 0.625, "rewards/chosen": -0.702997088432312, "rewards/margins": 1.1633658409118652, "rewards/rejected": -1.8663629293441772, "step": 6551 }, { "epoch": 0.76, "learning_rate": 7.242234557694579e-08, "logits/chosen": -2.5214412212371826, "logits/rejected": -2.3518805503845215, "logps/chosen": -250.47854614257812, "logps/rejected": -261.0081787109375, "loss": 0.3925, "rewards/accuracies": 0.875, "rewards/chosen": -0.7221649885177612, "rewards/margins": 1.0089610815048218, "rewards/rejected": -1.7311261892318726, "step": 6552 }, { "epoch": 0.76, "learning_rate": 7.238691390102751e-08, "logits/chosen": -2.7128942012786865, "logits/rejected": -2.670668125152588, "logps/chosen": -316.68060302734375, "logps/rejected": -354.77789306640625, "loss": 0.8776, "rewards/accuracies": 0.5, "rewards/chosen": -1.4352983236312866, "rewards/margins": 0.9401544332504272, "rewards/rejected": -2.375452756881714, "step": 6553 }, { "epoch": 0.76, "learning_rate": 7.235148222510925e-08, "logits/chosen": -2.9136030673980713, "logits/rejected": -2.933730125427246, "logps/chosen": -264.0575256347656, "logps/rejected": -364.34808349609375, "loss": 0.4619, "rewards/accuracies": 0.75, "rewards/chosen": -1.2657017707824707, "rewards/margins": 2.445497989654541, "rewards/rejected": -3.7111997604370117, "step": 6554 }, { "epoch": 0.76, "learning_rate": 7.231605054919097e-08, "logits/chosen": -2.730557918548584, "logits/rejected": -2.598778009414673, "logps/chosen": -116.2453842163086, "logps/rejected": -258.6522216796875, "loss": 0.1305, "rewards/accuracies": 1.0, "rewards/chosen": -1.0272077322006226, "rewards/margins": 3.154448986053467, "rewards/rejected": -4.181656837463379, "step": 6555 }, { "epoch": 0.76, "learning_rate": 7.22806188732727e-08, "logits/chosen": -2.5023436546325684, "logits/rejected": -2.43769907951355, "logps/chosen": -205.6953582763672, "logps/rejected": -327.6824035644531, "loss": 0.3144, "rewards/accuracies": 0.875, "rewards/chosen": -0.9908139705657959, "rewards/margins": 3.6888599395751953, "rewards/rejected": -4.67967414855957, "step": 6556 }, { "epoch": 0.76, "learning_rate": 7.224518719735444e-08, "logits/chosen": -1.8455619812011719, "logits/rejected": -2.047776222229004, "logps/chosen": -548.744873046875, "logps/rejected": -445.59625244140625, "loss": 0.3119, "rewards/accuracies": 0.875, "rewards/chosen": -0.6766484975814819, "rewards/margins": 1.7968112230300903, "rewards/rejected": -2.4734597206115723, "step": 6557 }, { "epoch": 0.76, "learning_rate": 7.220975552143616e-08, "logits/chosen": -2.4191226959228516, "logits/rejected": -2.3122975826263428, "logps/chosen": -317.31927490234375, "logps/rejected": -300.4651794433594, "loss": 0.1803, "rewards/accuracies": 0.875, "rewards/chosen": -1.0931577682495117, "rewards/margins": 2.896366596221924, "rewards/rejected": -3.9895246028900146, "step": 6558 }, { "epoch": 0.76, "learning_rate": 7.217432384551789e-08, "logits/chosen": -2.0525755882263184, "logits/rejected": -2.3356125354766846, "logps/chosen": -545.1234130859375, "logps/rejected": -360.16107177734375, "loss": 0.1789, "rewards/accuracies": 1.0, "rewards/chosen": -0.5032755136489868, "rewards/margins": 2.242983341217041, "rewards/rejected": -2.746258497238159, "step": 6559 }, { "epoch": 0.76, "learning_rate": 7.213889216959962e-08, "logits/chosen": -2.060914993286133, "logits/rejected": -2.2822837829589844, "logps/chosen": -336.3218994140625, "logps/rejected": -211.91015625, "loss": 0.6174, "rewards/accuracies": 0.75, "rewards/chosen": -0.8327117562294006, "rewards/margins": 2.033830165863037, "rewards/rejected": -2.866541862487793, "step": 6560 }, { "epoch": 0.76, "learning_rate": 7.210346049368134e-08, "logits/chosen": -2.302398204803467, "logits/rejected": -2.2340269088745117, "logps/chosen": -305.50543212890625, "logps/rejected": -337.9438781738281, "loss": 0.5812, "rewards/accuracies": 0.5, "rewards/chosen": -1.1847318410873413, "rewards/margins": 1.9427093267440796, "rewards/rejected": -3.127440929412842, "step": 6561 }, { "epoch": 0.76, "learning_rate": 7.206802881776308e-08, "logits/chosen": -2.2075300216674805, "logits/rejected": -2.400376558303833, "logps/chosen": -300.6115417480469, "logps/rejected": -170.13043212890625, "loss": 0.2824, "rewards/accuracies": 1.0, "rewards/chosen": -0.4874149560928345, "rewards/margins": 2.8852169513702393, "rewards/rejected": -3.3726320266723633, "step": 6562 }, { "epoch": 0.76, "learning_rate": 7.203259714184481e-08, "logits/chosen": -2.06742262840271, "logits/rejected": -2.191110610961914, "logps/chosen": -474.23370361328125, "logps/rejected": -427.57342529296875, "loss": 1.037, "rewards/accuracies": 0.375, "rewards/chosen": -1.7416523694992065, "rewards/margins": -0.12721076607704163, "rewards/rejected": -1.6144416332244873, "step": 6563 }, { "epoch": 0.76, "learning_rate": 7.199716546592654e-08, "logits/chosen": -2.5993423461914062, "logits/rejected": -2.7527365684509277, "logps/chosen": -427.892333984375, "logps/rejected": -271.1238708496094, "loss": 0.4168, "rewards/accuracies": 0.625, "rewards/chosen": -1.0618658065795898, "rewards/margins": 2.125018835067749, "rewards/rejected": -3.186884641647339, "step": 6564 }, { "epoch": 0.76, "learning_rate": 7.196173379000826e-08, "logits/chosen": -2.2372584342956543, "logits/rejected": -1.7887210845947266, "logps/chosen": -353.0359191894531, "logps/rejected": -455.6588134765625, "loss": 0.7099, "rewards/accuracies": 0.75, "rewards/chosen": -1.2904033660888672, "rewards/margins": 1.6828110218048096, "rewards/rejected": -2.973214626312256, "step": 6565 }, { "epoch": 0.76, "learning_rate": 7.192630211408999e-08, "logits/chosen": -2.256141424179077, "logits/rejected": -2.159013271331787, "logps/chosen": -354.660400390625, "logps/rejected": -271.24468994140625, "loss": 0.5736, "rewards/accuracies": 0.5, "rewards/chosen": -0.6704725623130798, "rewards/margins": 0.9750857353210449, "rewards/rejected": -1.6455583572387695, "step": 6566 }, { "epoch": 0.76, "learning_rate": 7.189087043817173e-08, "logits/chosen": -2.3853988647460938, "logits/rejected": -2.662398338317871, "logps/chosen": -323.795166015625, "logps/rejected": -200.51747131347656, "loss": 0.2398, "rewards/accuracies": 1.0, "rewards/chosen": -0.6135047078132629, "rewards/margins": 2.204486846923828, "rewards/rejected": -2.8179914951324463, "step": 6567 }, { "epoch": 0.76, "learning_rate": 7.185543876225345e-08, "logits/chosen": -2.290132999420166, "logits/rejected": -2.2598729133605957, "logps/chosen": -222.91253662109375, "logps/rejected": -205.90487670898438, "loss": 0.484, "rewards/accuracies": 0.75, "rewards/chosen": -2.144146680831909, "rewards/margins": 1.8360090255737305, "rewards/rejected": -3.9801554679870605, "step": 6568 }, { "epoch": 0.76, "learning_rate": 7.182000708633519e-08, "logits/chosen": -2.860623359680176, "logits/rejected": -2.7459938526153564, "logps/chosen": -190.53921508789062, "logps/rejected": -266.0608215332031, "loss": 0.2989, "rewards/accuracies": 0.875, "rewards/chosen": -1.4529485702514648, "rewards/margins": 2.4082086086273193, "rewards/rejected": -3.861157178878784, "step": 6569 }, { "epoch": 0.76, "learning_rate": 7.178457541041691e-08, "logits/chosen": -2.4549155235290527, "logits/rejected": -2.3483550548553467, "logps/chosen": -169.5719451904297, "logps/rejected": -232.28244018554688, "loss": 0.4462, "rewards/accuracies": 0.625, "rewards/chosen": -0.5750154256820679, "rewards/margins": 1.4062734842300415, "rewards/rejected": -1.981289029121399, "step": 6570 }, { "epoch": 0.76, "learning_rate": 7.174914373449863e-08, "logits/chosen": -2.220867395401001, "logits/rejected": -2.1686244010925293, "logps/chosen": -208.12989807128906, "logps/rejected": -219.7139434814453, "loss": 0.2391, "rewards/accuracies": 1.0, "rewards/chosen": -1.2352497577667236, "rewards/margins": 2.3534114360809326, "rewards/rejected": -3.5886611938476562, "step": 6571 }, { "epoch": 0.76, "learning_rate": 7.171371205858037e-08, "logits/chosen": -1.8048017024993896, "logits/rejected": -2.104722499847412, "logps/chosen": -303.0167541503906, "logps/rejected": -291.9480895996094, "loss": 0.7084, "rewards/accuracies": 0.625, "rewards/chosen": -0.6916234493255615, "rewards/margins": 0.26465147733688354, "rewards/rejected": -0.9562749266624451, "step": 6572 }, { "epoch": 0.76, "learning_rate": 7.16782803826621e-08, "logits/chosen": -2.6052513122558594, "logits/rejected": -2.61708927154541, "logps/chosen": -339.950439453125, "logps/rejected": -247.12716674804688, "loss": 0.1938, "rewards/accuracies": 1.0, "rewards/chosen": -0.10415807366371155, "rewards/margins": 2.412104368209839, "rewards/rejected": -2.5162622928619385, "step": 6573 }, { "epoch": 0.76, "learning_rate": 7.164284870674382e-08, "logits/chosen": -2.5525941848754883, "logits/rejected": -2.7941062450408936, "logps/chosen": -333.68682861328125, "logps/rejected": -148.64871215820312, "loss": 0.5338, "rewards/accuracies": 0.75, "rewards/chosen": -0.8634580969810486, "rewards/margins": 0.998272180557251, "rewards/rejected": -1.8617304563522339, "step": 6574 }, { "epoch": 0.76, "learning_rate": 7.160741703082556e-08, "logits/chosen": -2.4997150897979736, "logits/rejected": -2.441781759262085, "logps/chosen": -345.038818359375, "logps/rejected": -232.8472900390625, "loss": 0.2771, "rewards/accuracies": 0.875, "rewards/chosen": -0.7323188185691833, "rewards/margins": 1.831019639968872, "rewards/rejected": -2.5633385181427, "step": 6575 }, { "epoch": 0.76, "learning_rate": 7.157198535490728e-08, "logits/chosen": -2.5994954109191895, "logits/rejected": -2.64284086227417, "logps/chosen": -156.81898498535156, "logps/rejected": -256.114990234375, "loss": 0.2208, "rewards/accuracies": 1.0, "rewards/chosen": -0.678633451461792, "rewards/margins": 1.6605322360992432, "rewards/rejected": -2.339165687561035, "step": 6576 }, { "epoch": 0.77, "learning_rate": 7.1536553678989e-08, "logits/chosen": -2.334977149963379, "logits/rejected": -2.39477801322937, "logps/chosen": -242.10418701171875, "logps/rejected": -324.3295593261719, "loss": 0.2015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0180972814559937, "rewards/margins": 3.5532448291778564, "rewards/rejected": -4.571342468261719, "step": 6577 }, { "epoch": 0.77, "learning_rate": 7.150112200307074e-08, "logits/chosen": -1.5605220794677734, "logits/rejected": -1.657948613166809, "logps/chosen": -451.6317443847656, "logps/rejected": -399.4571838378906, "loss": 0.4709, "rewards/accuracies": 0.75, "rewards/chosen": -0.8863317966461182, "rewards/margins": 1.616328239440918, "rewards/rejected": -2.502660036087036, "step": 6578 }, { "epoch": 0.77, "learning_rate": 7.146569032715247e-08, "logits/chosen": -1.1152442693710327, "logits/rejected": -1.1567585468292236, "logps/chosen": -710.7568359375, "logps/rejected": -590.7899169921875, "loss": 0.4131, "rewards/accuracies": 0.75, "rewards/chosen": -0.427842915058136, "rewards/margins": 1.3298442363739014, "rewards/rejected": -1.7576872110366821, "step": 6579 }, { "epoch": 0.77, "learning_rate": 7.143025865123421e-08, "logits/chosen": -2.792476177215576, "logits/rejected": -2.8484408855438232, "logps/chosen": -211.41357421875, "logps/rejected": -292.6558837890625, "loss": 0.3843, "rewards/accuracies": 0.875, "rewards/chosen": -1.2674305438995361, "rewards/margins": 3.8567700386047363, "rewards/rejected": -5.124200344085693, "step": 6580 }, { "epoch": 0.77, "learning_rate": 7.139482697531593e-08, "logits/chosen": -2.7501626014709473, "logits/rejected": -2.411219835281372, "logps/chosen": -183.0274658203125, "logps/rejected": -390.5455627441406, "loss": 0.126, "rewards/accuracies": 1.0, "rewards/chosen": -0.5011011362075806, "rewards/margins": 3.47036075592041, "rewards/rejected": -3.9714620113372803, "step": 6581 }, { "epoch": 0.77, "learning_rate": 7.135939529939765e-08, "logits/chosen": -2.694293737411499, "logits/rejected": -2.8904948234558105, "logps/chosen": -274.9427795410156, "logps/rejected": -187.0045166015625, "loss": 0.6538, "rewards/accuracies": 0.75, "rewards/chosen": -1.6009187698364258, "rewards/margins": 2.1450629234313965, "rewards/rejected": -3.7459819316864014, "step": 6582 }, { "epoch": 0.77, "learning_rate": 7.132396362347939e-08, "logits/chosen": -2.3319268226623535, "logits/rejected": -2.4902915954589844, "logps/chosen": -364.5555725097656, "logps/rejected": -335.962158203125, "loss": 0.7266, "rewards/accuracies": 0.625, "rewards/chosen": -0.6324841380119324, "rewards/margins": 1.6029672622680664, "rewards/rejected": -2.2354514598846436, "step": 6583 }, { "epoch": 0.77, "learning_rate": 7.128853194756112e-08, "logits/chosen": -2.3043508529663086, "logits/rejected": -2.4165549278259277, "logps/chosen": -183.96328735351562, "logps/rejected": -138.2550048828125, "loss": 0.9518, "rewards/accuracies": 0.5, "rewards/chosen": -1.9003181457519531, "rewards/margins": 0.7257276773452759, "rewards/rejected": -2.6260459423065186, "step": 6584 }, { "epoch": 0.77, "learning_rate": 7.125310027164285e-08, "logits/chosen": -2.490055561065674, "logits/rejected": -2.765590190887451, "logps/chosen": -543.8170776367188, "logps/rejected": -296.1157531738281, "loss": 0.286, "rewards/accuracies": 0.875, "rewards/chosen": -0.32808917760849, "rewards/margins": 3.615591526031494, "rewards/rejected": -3.943680763244629, "step": 6585 }, { "epoch": 0.77, "learning_rate": 7.121766859572458e-08, "logits/chosen": -2.9766478538513184, "logits/rejected": -2.91683292388916, "logps/chosen": -268.734619140625, "logps/rejected": -335.5603332519531, "loss": 0.1984, "rewards/accuracies": 0.875, "rewards/chosen": -0.5018095970153809, "rewards/margins": 2.518181085586548, "rewards/rejected": -3.0199904441833496, "step": 6586 }, { "epoch": 0.77, "learning_rate": 7.11822369198063e-08, "logits/chosen": -2.635106325149536, "logits/rejected": -2.7410998344421387, "logps/chosen": -214.9885711669922, "logps/rejected": -257.1778259277344, "loss": 0.1906, "rewards/accuracies": 1.0, "rewards/chosen": -0.6152777671813965, "rewards/margins": 2.673250913619995, "rewards/rejected": -3.2885289192199707, "step": 6587 }, { "epoch": 0.77, "learning_rate": 7.114680524388803e-08, "logits/chosen": -2.546565294265747, "logits/rejected": -2.1516318321228027, "logps/chosen": -109.921142578125, "logps/rejected": -257.1788330078125, "loss": 0.4213, "rewards/accuracies": 0.75, "rewards/chosen": -0.6235591769218445, "rewards/margins": 1.2545857429504395, "rewards/rejected": -1.8781448602676392, "step": 6588 }, { "epoch": 0.77, "learning_rate": 7.111137356796976e-08, "logits/chosen": -1.69341242313385, "logits/rejected": -2.005821943283081, "logps/chosen": -472.2279968261719, "logps/rejected": -377.54583740234375, "loss": 0.8055, "rewards/accuracies": 0.75, "rewards/chosen": -1.806959629058838, "rewards/margins": 1.255922794342041, "rewards/rejected": -3.062882661819458, "step": 6589 }, { "epoch": 0.77, "learning_rate": 7.10759418920515e-08, "logits/chosen": -2.194896936416626, "logits/rejected": -2.074000597000122, "logps/chosen": -194.71420288085938, "logps/rejected": -327.8777770996094, "loss": 0.1985, "rewards/accuracies": 1.0, "rewards/chosen": -0.5948743224143982, "rewards/margins": 2.3299942016601562, "rewards/rejected": -2.924868583679199, "step": 6590 }, { "epoch": 0.77, "learning_rate": 7.104051021613322e-08, "logits/chosen": -2.6353819370269775, "logits/rejected": -2.858490228652954, "logps/chosen": -237.4617462158203, "logps/rejected": -233.78521728515625, "loss": 0.1916, "rewards/accuracies": 1.0, "rewards/chosen": -0.5409172177314758, "rewards/margins": 2.4213995933532715, "rewards/rejected": -2.9623167514801025, "step": 6591 }, { "epoch": 0.77, "learning_rate": 7.100507854021495e-08, "logits/chosen": -2.4676883220672607, "logits/rejected": -2.6945886611938477, "logps/chosen": -222.08116149902344, "logps/rejected": -220.3759307861328, "loss": 0.0597, "rewards/accuracies": 1.0, "rewards/chosen": -0.03765710070729256, "rewards/margins": 3.4931957721710205, "rewards/rejected": -3.530852794647217, "step": 6592 }, { "epoch": 0.77, "learning_rate": 7.096964686429668e-08, "logits/chosen": -2.5589213371276855, "logits/rejected": -2.537661075592041, "logps/chosen": -180.3143768310547, "logps/rejected": -246.37892150878906, "loss": 0.2642, "rewards/accuracies": 1.0, "rewards/chosen": -1.7748106718063354, "rewards/margins": 2.6238901615142822, "rewards/rejected": -4.398700714111328, "step": 6593 }, { "epoch": 0.77, "learning_rate": 7.093421518837841e-08, "logits/chosen": -2.1803622245788574, "logits/rejected": -2.0115742683410645, "logps/chosen": -361.5255432128906, "logps/rejected": -371.6014709472656, "loss": 0.4742, "rewards/accuracies": 0.75, "rewards/chosen": -1.4732470512390137, "rewards/margins": 1.8863335847854614, "rewards/rejected": -3.3595809936523438, "step": 6594 }, { "epoch": 0.77, "learning_rate": 7.089878351246013e-08, "logits/chosen": -2.73527193069458, "logits/rejected": -2.818406105041504, "logps/chosen": -371.71600341796875, "logps/rejected": -255.74227905273438, "loss": 0.4158, "rewards/accuracies": 0.875, "rewards/chosen": -0.5244619846343994, "rewards/margins": 1.3789600133895874, "rewards/rejected": -1.9034219980239868, "step": 6595 }, { "epoch": 0.77, "learning_rate": 7.086335183654187e-08, "logits/chosen": -2.0440053939819336, "logits/rejected": -2.263322353363037, "logps/chosen": -373.58203125, "logps/rejected": -199.83987426757812, "loss": 0.2796, "rewards/accuracies": 0.75, "rewards/chosen": 0.4240535497665405, "rewards/margins": 2.214442729949951, "rewards/rejected": -1.790389060974121, "step": 6596 }, { "epoch": 0.77, "learning_rate": 7.082792016062359e-08, "logits/chosen": -1.8701245784759521, "logits/rejected": -1.865479588508606, "logps/chosen": -176.17059326171875, "logps/rejected": -251.68515014648438, "loss": 0.7459, "rewards/accuracies": 0.875, "rewards/chosen": -0.8355588912963867, "rewards/margins": 1.9148268699645996, "rewards/rejected": -2.7503857612609863, "step": 6597 }, { "epoch": 0.77, "learning_rate": 7.079248848470533e-08, "logits/chosen": -1.4631054401397705, "logits/rejected": -1.7176802158355713, "logps/chosen": -341.572265625, "logps/rejected": -336.7872009277344, "loss": 0.361, "rewards/accuracies": 0.875, "rewards/chosen": -0.5201863646507263, "rewards/margins": 1.8873575925827026, "rewards/rejected": -2.407543897628784, "step": 6598 }, { "epoch": 0.77, "learning_rate": 7.075705680878705e-08, "logits/chosen": -2.033832550048828, "logits/rejected": -1.8602389097213745, "logps/chosen": -216.70803833007812, "logps/rejected": -278.1370544433594, "loss": 0.6953, "rewards/accuracies": 0.625, "rewards/chosen": -1.0389900207519531, "rewards/margins": 1.6648831367492676, "rewards/rejected": -2.7038731575012207, "step": 6599 }, { "epoch": 0.77, "learning_rate": 7.072162513286878e-08, "logits/chosen": -2.198885679244995, "logits/rejected": -2.1142377853393555, "logps/chosen": -294.97149658203125, "logps/rejected": -352.531982421875, "loss": 0.3151, "rewards/accuracies": 0.875, "rewards/chosen": -0.42281007766723633, "rewards/margins": 1.930802583694458, "rewards/rejected": -2.3536126613616943, "step": 6600 }, { "epoch": 0.77, "learning_rate": 7.068619345695052e-08, "logits/chosen": -2.3773207664489746, "logits/rejected": -2.327993392944336, "logps/chosen": -266.50506591796875, "logps/rejected": -209.75372314453125, "loss": 0.3627, "rewards/accuracies": 0.75, "rewards/chosen": -1.10751211643219, "rewards/margins": 2.5054850578308105, "rewards/rejected": -3.61299729347229, "step": 6601 }, { "epoch": 0.77, "learning_rate": 7.065076178103224e-08, "logits/chosen": -2.049816370010376, "logits/rejected": -1.9778817892074585, "logps/chosen": -292.8638610839844, "logps/rejected": -254.61312866210938, "loss": 0.7338, "rewards/accuracies": 0.625, "rewards/chosen": -0.8768731355667114, "rewards/margins": 0.7110817432403564, "rewards/rejected": -1.5879547595977783, "step": 6602 }, { "epoch": 0.77, "learning_rate": 7.061533010511396e-08, "logits/chosen": -1.6834138631820679, "logits/rejected": -1.8715546131134033, "logps/chosen": -494.77545166015625, "logps/rejected": -351.3647766113281, "loss": 0.2809, "rewards/accuracies": 0.875, "rewards/chosen": -0.24862122535705566, "rewards/margins": 1.687313437461853, "rewards/rejected": -1.9359346628189087, "step": 6603 }, { "epoch": 0.77, "learning_rate": 7.05798984291957e-08, "logits/chosen": -2.500033378601074, "logits/rejected": -2.347038984298706, "logps/chosen": -234.99172973632812, "logps/rejected": -239.53709411621094, "loss": 0.4528, "rewards/accuracies": 0.875, "rewards/chosen": -0.8218315839767456, "rewards/margins": 2.3247923851013184, "rewards/rejected": -3.1466240882873535, "step": 6604 }, { "epoch": 0.77, "learning_rate": 7.054446675327742e-08, "logits/chosen": -2.469059467315674, "logits/rejected": -2.4501309394836426, "logps/chosen": -184.72787475585938, "logps/rejected": -215.57427978515625, "loss": 0.3123, "rewards/accuracies": 1.0, "rewards/chosen": -1.1057450771331787, "rewards/margins": 1.7908668518066406, "rewards/rejected": -2.8966116905212402, "step": 6605 }, { "epoch": 0.77, "learning_rate": 7.050903507735916e-08, "logits/chosen": -2.194666862487793, "logits/rejected": -2.451421022415161, "logps/chosen": -410.2794189453125, "logps/rejected": -358.3693542480469, "loss": 0.8041, "rewards/accuracies": 0.75, "rewards/chosen": -1.081912636756897, "rewards/margins": 1.566514015197754, "rewards/rejected": -2.6484267711639404, "step": 6606 }, { "epoch": 0.77, "learning_rate": 7.047360340144089e-08, "logits/chosen": -2.5707454681396484, "logits/rejected": -2.5349347591400146, "logps/chosen": -166.31765747070312, "logps/rejected": -204.29615783691406, "loss": 0.4491, "rewards/accuracies": 0.75, "rewards/chosen": -1.1405258178710938, "rewards/margins": 2.2832295894622803, "rewards/rejected": -3.423755645751953, "step": 6607 }, { "epoch": 0.77, "learning_rate": 7.043817172552261e-08, "logits/chosen": -2.4307923316955566, "logits/rejected": -2.410396099090576, "logps/chosen": -269.4496154785156, "logps/rejected": -274.9190673828125, "loss": 0.6383, "rewards/accuracies": 0.5, "rewards/chosen": -1.236451506614685, "rewards/margins": 2.384723663330078, "rewards/rejected": -3.6211748123168945, "step": 6608 }, { "epoch": 0.77, "learning_rate": 7.040274004960434e-08, "logits/chosen": -2.2702419757843018, "logits/rejected": -2.4160943031311035, "logps/chosen": -516.3233642578125, "logps/rejected": -362.15142822265625, "loss": 0.3267, "rewards/accuracies": 0.875, "rewards/chosen": -1.327873945236206, "rewards/margins": 1.3431696891784668, "rewards/rejected": -2.6710433959960938, "step": 6609 }, { "epoch": 0.77, "learning_rate": 7.036730837368607e-08, "logits/chosen": -2.109980583190918, "logits/rejected": -2.179483413696289, "logps/chosen": -322.4153137207031, "logps/rejected": -289.8718566894531, "loss": 0.3486, "rewards/accuracies": 0.875, "rewards/chosen": -0.03851597011089325, "rewards/margins": 1.5667411088943481, "rewards/rejected": -1.6052570343017578, "step": 6610 }, { "epoch": 0.77, "learning_rate": 7.03318766977678e-08, "logits/chosen": -1.738944172859192, "logits/rejected": -1.9524295330047607, "logps/chosen": -385.84002685546875, "logps/rejected": -394.3967590332031, "loss": 0.1048, "rewards/accuracies": 1.0, "rewards/chosen": -0.8301822543144226, "rewards/margins": 3.6284518241882324, "rewards/rejected": -4.458634376525879, "step": 6611 }, { "epoch": 0.77, "learning_rate": 7.029644502184953e-08, "logits/chosen": -3.090388059616089, "logits/rejected": -2.977144241333008, "logps/chosen": -375.7198791503906, "logps/rejected": -250.50637817382812, "loss": 0.2045, "rewards/accuracies": 1.0, "rewards/chosen": -0.6453310251235962, "rewards/margins": 2.2055253982543945, "rewards/rejected": -2.8508565425872803, "step": 6612 }, { "epoch": 0.77, "learning_rate": 7.026101334593126e-08, "logits/chosen": -2.1728622913360596, "logits/rejected": -2.492575168609619, "logps/chosen": -327.7327880859375, "logps/rejected": -198.7549591064453, "loss": 0.9959, "rewards/accuracies": 0.5, "rewards/chosen": -2.0687782764434814, "rewards/margins": 0.7634240388870239, "rewards/rejected": -2.832202434539795, "step": 6613 }, { "epoch": 0.77, "learning_rate": 7.022558167001299e-08, "logits/chosen": -2.4572174549102783, "logits/rejected": -2.4759702682495117, "logps/chosen": -218.84423828125, "logps/rejected": -327.53033447265625, "loss": 0.1461, "rewards/accuracies": 1.0, "rewards/chosen": -0.7455686926841736, "rewards/margins": 3.667435884475708, "rewards/rejected": -4.413004398345947, "step": 6614 }, { "epoch": 0.77, "learning_rate": 7.019014999409471e-08, "logits/chosen": -2.533139228820801, "logits/rejected": -2.720991849899292, "logps/chosen": -349.4258728027344, "logps/rejected": -335.9798583984375, "loss": 0.1059, "rewards/accuracies": 1.0, "rewards/chosen": -0.9443877935409546, "rewards/margins": 2.964670181274414, "rewards/rejected": -3.9090576171875, "step": 6615 }, { "epoch": 0.77, "learning_rate": 7.015471831817644e-08, "logits/chosen": -2.3158645629882812, "logits/rejected": -2.364549398422241, "logps/chosen": -257.8260498046875, "logps/rejected": -339.10137939453125, "loss": 0.2317, "rewards/accuracies": 0.875, "rewards/chosen": -1.2057769298553467, "rewards/margins": 2.7890753746032715, "rewards/rejected": -3.9948525428771973, "step": 6616 }, { "epoch": 0.77, "learning_rate": 7.011928664225818e-08, "logits/chosen": -2.2050368785858154, "logits/rejected": -2.327218532562256, "logps/chosen": -268.4022216796875, "logps/rejected": -225.6710968017578, "loss": 0.2627, "rewards/accuracies": 1.0, "rewards/chosen": -0.38610756397247314, "rewards/margins": 1.8269723653793335, "rewards/rejected": -2.2130799293518066, "step": 6617 }, { "epoch": 0.77, "learning_rate": 7.008385496633991e-08, "logits/chosen": -2.3171496391296387, "logits/rejected": -2.2513980865478516, "logps/chosen": -216.95068359375, "logps/rejected": -284.6390686035156, "loss": 0.4528, "rewards/accuracies": 0.875, "rewards/chosen": -0.9000439047813416, "rewards/margins": 0.9235094785690308, "rewards/rejected": -1.8235533237457275, "step": 6618 }, { "epoch": 0.77, "learning_rate": 7.004842329042164e-08, "logits/chosen": -2.234891414642334, "logits/rejected": -2.241267442703247, "logps/chosen": -207.60003662109375, "logps/rejected": -217.52725219726562, "loss": 0.3256, "rewards/accuracies": 1.0, "rewards/chosen": -0.7794975638389587, "rewards/margins": 2.5186290740966797, "rewards/rejected": -3.298126697540283, "step": 6619 }, { "epoch": 0.77, "learning_rate": 7.001299161450336e-08, "logits/chosen": -2.447984218597412, "logits/rejected": -2.419877767562866, "logps/chosen": -301.27825927734375, "logps/rejected": -343.28167724609375, "loss": 0.1857, "rewards/accuracies": 1.0, "rewards/chosen": -1.3788779973983765, "rewards/margins": 2.88132381439209, "rewards/rejected": -4.260201930999756, "step": 6620 }, { "epoch": 0.77, "learning_rate": 6.99775599385851e-08, "logits/chosen": -2.5448317527770996, "logits/rejected": -2.803438186645508, "logps/chosen": -352.8631591796875, "logps/rejected": -167.7257843017578, "loss": 0.1234, "rewards/accuracies": 1.0, "rewards/chosen": -0.2308075726032257, "rewards/margins": 2.5865988731384277, "rewards/rejected": -2.817406177520752, "step": 6621 }, { "epoch": 0.77, "learning_rate": 6.994212826266682e-08, "logits/chosen": -2.257390022277832, "logits/rejected": -2.3664021492004395, "logps/chosen": -378.28887939453125, "logps/rejected": -180.0978546142578, "loss": 0.4339, "rewards/accuracies": 0.75, "rewards/chosen": -0.6106510758399963, "rewards/margins": 1.7156684398651123, "rewards/rejected": -2.326319456100464, "step": 6622 }, { "epoch": 0.77, "learning_rate": 6.990669658674855e-08, "logits/chosen": -2.1238322257995605, "logits/rejected": -1.9127647876739502, "logps/chosen": -363.7259521484375, "logps/rejected": -441.3682556152344, "loss": 0.2292, "rewards/accuracies": 0.875, "rewards/chosen": -0.2712361216545105, "rewards/margins": 4.4554033279418945, "rewards/rejected": -4.726639270782471, "step": 6623 }, { "epoch": 0.77, "learning_rate": 6.987126491083029e-08, "logits/chosen": -2.550433397293091, "logits/rejected": -2.5088014602661133, "logps/chosen": -366.5540466308594, "logps/rejected": -306.4691162109375, "loss": 0.2515, "rewards/accuracies": 0.875, "rewards/chosen": -0.7718722224235535, "rewards/margins": 2.192201614379883, "rewards/rejected": -2.964073896408081, "step": 6624 }, { "epoch": 0.77, "learning_rate": 6.983583323491201e-08, "logits/chosen": -2.8775973320007324, "logits/rejected": -2.8634979724884033, "logps/chosen": -126.82150268554688, "logps/rejected": -235.63772583007812, "loss": 0.3977, "rewards/accuracies": 0.75, "rewards/chosen": -1.1370368003845215, "rewards/margins": 2.8018836975097656, "rewards/rejected": -3.938920497894287, "step": 6625 }, { "epoch": 0.77, "learning_rate": 6.980040155899373e-08, "logits/chosen": -2.6823110580444336, "logits/rejected": -2.273869037628174, "logps/chosen": -69.99015045166016, "logps/rejected": -295.0671691894531, "loss": 0.711, "rewards/accuracies": 0.625, "rewards/chosen": -1.2944557666778564, "rewards/margins": 1.178027868270874, "rewards/rejected": -2.4724836349487305, "step": 6626 }, { "epoch": 0.77, "learning_rate": 6.976496988307547e-08, "logits/chosen": -1.9078502655029297, "logits/rejected": -2.0522212982177734, "logps/chosen": -323.8881530761719, "logps/rejected": -236.24331665039062, "loss": 0.4388, "rewards/accuracies": 0.75, "rewards/chosen": 0.01991422474384308, "rewards/margins": 1.0969030857086182, "rewards/rejected": -1.076988935470581, "step": 6627 }, { "epoch": 0.77, "learning_rate": 6.97295382071572e-08, "logits/chosen": -2.1701769828796387, "logits/rejected": -2.3331563472747803, "logps/chosen": -247.45120239257812, "logps/rejected": -179.26004028320312, "loss": 0.2787, "rewards/accuracies": 1.0, "rewards/chosen": -0.5168095827102661, "rewards/margins": 1.7797433137893677, "rewards/rejected": -2.296552896499634, "step": 6628 }, { "epoch": 0.77, "learning_rate": 6.969410653123892e-08, "logits/chosen": -2.079460859298706, "logits/rejected": -2.389756202697754, "logps/chosen": -440.46453857421875, "logps/rejected": -297.7349853515625, "loss": 0.3677, "rewards/accuracies": 0.75, "rewards/chosen": -0.693283200263977, "rewards/margins": 1.3064268827438354, "rewards/rejected": -1.9997100830078125, "step": 6629 }, { "epoch": 0.77, "learning_rate": 6.965867485532066e-08, "logits/chosen": -2.77360200881958, "logits/rejected": -2.557802438735962, "logps/chosen": -159.20547485351562, "logps/rejected": -187.08859252929688, "loss": 0.7045, "rewards/accuracies": 0.625, "rewards/chosen": -1.3333966732025146, "rewards/margins": 1.133470058441162, "rewards/rejected": -2.4668667316436768, "step": 6630 }, { "epoch": 0.77, "learning_rate": 6.962324317940238e-08, "logits/chosen": -2.184325933456421, "logits/rejected": -2.1934454441070557, "logps/chosen": -174.13169860839844, "logps/rejected": -255.92047119140625, "loss": 0.6279, "rewards/accuracies": 0.75, "rewards/chosen": -1.4359121322631836, "rewards/margins": 1.796686053276062, "rewards/rejected": -3.232598304748535, "step": 6631 }, { "epoch": 0.77, "learning_rate": 6.95878115034841e-08, "logits/chosen": -1.9716644287109375, "logits/rejected": -1.8746100664138794, "logps/chosen": -437.4822998046875, "logps/rejected": -484.39215087890625, "loss": 0.2563, "rewards/accuracies": 0.875, "rewards/chosen": -0.7362415194511414, "rewards/margins": 2.0270400047302246, "rewards/rejected": -2.76328182220459, "step": 6632 }, { "epoch": 0.77, "learning_rate": 6.955237982756584e-08, "logits/chosen": -1.991597294807434, "logits/rejected": -2.3162038326263428, "logps/chosen": -237.528564453125, "logps/rejected": -197.452392578125, "loss": 0.1455, "rewards/accuracies": 1.0, "rewards/chosen": -0.3604428768157959, "rewards/margins": 2.5374393463134766, "rewards/rejected": -2.8978824615478516, "step": 6633 }, { "epoch": 0.77, "learning_rate": 6.951694815164757e-08, "logits/chosen": -2.631809711456299, "logits/rejected": -2.59686279296875, "logps/chosen": -209.438720703125, "logps/rejected": -223.6019744873047, "loss": 0.1834, "rewards/accuracies": 0.875, "rewards/chosen": -1.3835806846618652, "rewards/margins": 3.4554567337036133, "rewards/rejected": -4.8390374183654785, "step": 6634 }, { "epoch": 0.77, "learning_rate": 6.94815164757293e-08, "logits/chosen": -2.712904930114746, "logits/rejected": -2.690114974975586, "logps/chosen": -306.30010986328125, "logps/rejected": -263.168701171875, "loss": 0.6519, "rewards/accuracies": 0.75, "rewards/chosen": -0.7241472601890564, "rewards/margins": 3.0463032722473145, "rewards/rejected": -3.7704505920410156, "step": 6635 }, { "epoch": 0.77, "learning_rate": 6.944608479981103e-08, "logits/chosen": -2.3657848834991455, "logits/rejected": -2.300935745239258, "logps/chosen": -250.40768432617188, "logps/rejected": -277.20428466796875, "loss": 0.3891, "rewards/accuracies": 0.75, "rewards/chosen": -1.2417380809783936, "rewards/margins": 1.886415958404541, "rewards/rejected": -3.1281542778015137, "step": 6636 }, { "epoch": 0.77, "learning_rate": 6.941065312389275e-08, "logits/chosen": -2.375091314315796, "logits/rejected": -2.6809215545654297, "logps/chosen": -490.8929138183594, "logps/rejected": -370.1326904296875, "loss": 0.461, "rewards/accuracies": 0.75, "rewards/chosen": -0.6053032279014587, "rewards/margins": 1.198777675628662, "rewards/rejected": -1.804080843925476, "step": 6637 }, { "epoch": 0.77, "learning_rate": 6.937522144797449e-08, "logits/chosen": -2.343369960784912, "logits/rejected": -2.0317327976226807, "logps/chosen": -120.5345458984375, "logps/rejected": -186.42825317382812, "loss": 0.3822, "rewards/accuracies": 0.875, "rewards/chosen": -1.2421795129776, "rewards/margins": 1.5862599611282349, "rewards/rejected": -2.828439474105835, "step": 6638 }, { "epoch": 0.77, "learning_rate": 6.933978977205621e-08, "logits/chosen": -2.389347553253174, "logits/rejected": -2.49420166015625, "logps/chosen": -223.27484130859375, "logps/rejected": -290.1815185546875, "loss": 0.6958, "rewards/accuracies": 0.5, "rewards/chosen": -1.6868993043899536, "rewards/margins": 0.7938140630722046, "rewards/rejected": -2.480713367462158, "step": 6639 }, { "epoch": 0.77, "learning_rate": 6.930435809613795e-08, "logits/chosen": -2.6270298957824707, "logits/rejected": -3.0052294731140137, "logps/chosen": -398.4481506347656, "logps/rejected": -293.1113586425781, "loss": 0.2372, "rewards/accuracies": 1.0, "rewards/chosen": -0.028123825788497925, "rewards/margins": 1.5148711204528809, "rewards/rejected": -1.5429949760437012, "step": 6640 }, { "epoch": 0.77, "learning_rate": 6.926892642021967e-08, "logits/chosen": -2.1066699028015137, "logits/rejected": -1.9926878213882446, "logps/chosen": -142.7261962890625, "logps/rejected": -305.0911560058594, "loss": 0.1722, "rewards/accuracies": 0.875, "rewards/chosen": -0.90076744556427, "rewards/margins": 3.0268542766571045, "rewards/rejected": -3.927621364593506, "step": 6641 }, { "epoch": 0.77, "learning_rate": 6.92334947443014e-08, "logits/chosen": -2.714205265045166, "logits/rejected": -2.7946505546569824, "logps/chosen": -290.4112548828125, "logps/rejected": -258.45343017578125, "loss": 0.9009, "rewards/accuracies": 0.875, "rewards/chosen": -2.517221450805664, "rewards/margins": 0.4512033760547638, "rewards/rejected": -2.9684245586395264, "step": 6642 }, { "epoch": 0.77, "learning_rate": 6.919806306838313e-08, "logits/chosen": -1.948014497756958, "logits/rejected": -1.98294198513031, "logps/chosen": -267.6580810546875, "logps/rejected": -304.2770080566406, "loss": 0.2801, "rewards/accuracies": 0.875, "rewards/chosen": -0.8639314770698547, "rewards/margins": 2.2468276023864746, "rewards/rejected": -3.1107590198516846, "step": 6643 }, { "epoch": 0.77, "learning_rate": 6.916263139246486e-08, "logits/chosen": -2.8469736576080322, "logits/rejected": -2.9345526695251465, "logps/chosen": -243.15455627441406, "logps/rejected": -352.44757080078125, "loss": 0.5488, "rewards/accuracies": 0.625, "rewards/chosen": -0.4624139070510864, "rewards/margins": 2.533154249191284, "rewards/rejected": -2.995568037033081, "step": 6644 }, { "epoch": 0.77, "learning_rate": 6.91271997165466e-08, "logits/chosen": -2.5226283073425293, "logits/rejected": -2.5264055728912354, "logps/chosen": -137.54177856445312, "logps/rejected": -175.3719482421875, "loss": 0.215, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016009137034416199, "rewards/margins": 2.48758602142334, "rewards/rejected": -2.489187002182007, "step": 6645 }, { "epoch": 0.77, "learning_rate": 6.909176804062832e-08, "logits/chosen": -1.4381839036941528, "logits/rejected": -1.8904484510421753, "logps/chosen": -512.6334838867188, "logps/rejected": -367.06787109375, "loss": 0.3859, "rewards/accuracies": 0.875, "rewards/chosen": -0.7148163318634033, "rewards/margins": 1.0443284511566162, "rewards/rejected": -1.759144902229309, "step": 6646 }, { "epoch": 0.77, "learning_rate": 6.905633636471004e-08, "logits/chosen": -2.646623373031616, "logits/rejected": -2.2689201831817627, "logps/chosen": -206.1192626953125, "logps/rejected": -330.0472717285156, "loss": 0.3317, "rewards/accuracies": 0.875, "rewards/chosen": -0.6966134905815125, "rewards/margins": 3.713118076324463, "rewards/rejected": -4.409731864929199, "step": 6647 }, { "epoch": 0.77, "learning_rate": 6.902090468879178e-08, "logits/chosen": -2.775387763977051, "logits/rejected": -2.6166491508483887, "logps/chosen": -292.8073425292969, "logps/rejected": -184.85568237304688, "loss": 1.0769, "rewards/accuracies": 0.75, "rewards/chosen": -1.5371967554092407, "rewards/margins": 0.8607674241065979, "rewards/rejected": -2.3979642391204834, "step": 6648 }, { "epoch": 0.77, "learning_rate": 6.89854730128735e-08, "logits/chosen": -2.610664129257202, "logits/rejected": -2.658414125442505, "logps/chosen": -178.27243041992188, "logps/rejected": -213.05398559570312, "loss": 0.8761, "rewards/accuracies": 0.625, "rewards/chosen": -1.3539060354232788, "rewards/margins": 0.8860634565353394, "rewards/rejected": -2.239969491958618, "step": 6649 }, { "epoch": 0.77, "learning_rate": 6.895004133695523e-08, "logits/chosen": -2.6043927669525146, "logits/rejected": -2.740880012512207, "logps/chosen": -400.2502136230469, "logps/rejected": -282.388427734375, "loss": 0.9478, "rewards/accuracies": 0.625, "rewards/chosen": -1.113663911819458, "rewards/margins": 1.8005231618881226, "rewards/rejected": -2.914186954498291, "step": 6650 }, { "epoch": 0.77, "learning_rate": 6.891460966103697e-08, "logits/chosen": -2.4223198890686035, "logits/rejected": -2.3453238010406494, "logps/chosen": -349.73974609375, "logps/rejected": -426.7125549316406, "loss": 0.5692, "rewards/accuracies": 0.625, "rewards/chosen": -1.442206621170044, "rewards/margins": 0.47975996136665344, "rewards/rejected": -1.921966552734375, "step": 6651 }, { "epoch": 0.77, "learning_rate": 6.887917798511869e-08, "logits/chosen": -2.3321826457977295, "logits/rejected": -2.3653907775878906, "logps/chosen": -414.3248596191406, "logps/rejected": -273.0155029296875, "loss": 0.1649, "rewards/accuracies": 1.0, "rewards/chosen": -0.9409549832344055, "rewards/margins": 2.9121716022491455, "rewards/rejected": -3.8531267642974854, "step": 6652 }, { "epoch": 0.77, "learning_rate": 6.884374630920043e-08, "logits/chosen": -2.1017701625823975, "logits/rejected": -1.9043891429901123, "logps/chosen": -176.669921875, "logps/rejected": -276.97674560546875, "loss": 0.5942, "rewards/accuracies": 0.75, "rewards/chosen": -1.2469433546066284, "rewards/margins": 1.6676008701324463, "rewards/rejected": -2.914544105529785, "step": 6653 }, { "epoch": 0.77, "learning_rate": 6.880831463328215e-08, "logits/chosen": -1.904212474822998, "logits/rejected": -1.9556723833084106, "logps/chosen": -387.9800720214844, "logps/rejected": -287.9755554199219, "loss": 0.2272, "rewards/accuracies": 1.0, "rewards/chosen": -0.6587207317352295, "rewards/margins": 2.8442280292510986, "rewards/rejected": -3.502948760986328, "step": 6654 }, { "epoch": 0.77, "learning_rate": 6.877288295736388e-08, "logits/chosen": -2.134296178817749, "logits/rejected": -2.0261964797973633, "logps/chosen": -481.0274658203125, "logps/rejected": -387.65643310546875, "loss": 0.426, "rewards/accuracies": 0.75, "rewards/chosen": -0.8048465251922607, "rewards/margins": 3.173562526702881, "rewards/rejected": -3.9784088134765625, "step": 6655 }, { "epoch": 0.77, "learning_rate": 6.873745128144562e-08, "logits/chosen": -2.0005249977111816, "logits/rejected": -1.683049201965332, "logps/chosen": -197.4270477294922, "logps/rejected": -326.55743408203125, "loss": 0.2772, "rewards/accuracies": 1.0, "rewards/chosen": -1.3897382020950317, "rewards/margins": 1.4603462219238281, "rewards/rejected": -2.8500845432281494, "step": 6656 }, { "epoch": 0.77, "learning_rate": 6.870201960552734e-08, "logits/chosen": -2.3564882278442383, "logits/rejected": -2.4877281188964844, "logps/chosen": -257.4177551269531, "logps/rejected": -268.4561767578125, "loss": 0.3436, "rewards/accuracies": 0.75, "rewards/chosen": -0.4151650071144104, "rewards/margins": 2.5974178314208984, "rewards/rejected": -3.012583017349243, "step": 6657 }, { "epoch": 0.77, "learning_rate": 6.866658792960906e-08, "logits/chosen": -2.0950613021850586, "logits/rejected": -2.270035743713379, "logps/chosen": -425.1199951171875, "logps/rejected": -263.08819580078125, "loss": 0.2941, "rewards/accuracies": 0.875, "rewards/chosen": -0.4403303265571594, "rewards/margins": 1.696149230003357, "rewards/rejected": -2.136479616165161, "step": 6658 }, { "epoch": 0.77, "learning_rate": 6.86311562536908e-08, "logits/chosen": -2.5671989917755127, "logits/rejected": -2.3682990074157715, "logps/chosen": -269.3314208984375, "logps/rejected": -314.81243896484375, "loss": 0.3753, "rewards/accuracies": 0.75, "rewards/chosen": -1.1224250793457031, "rewards/margins": 1.9260385036468506, "rewards/rejected": -3.0484635829925537, "step": 6659 }, { "epoch": 0.77, "learning_rate": 6.859572457777252e-08, "logits/chosen": -2.1609649658203125, "logits/rejected": -2.1049904823303223, "logps/chosen": -311.01080322265625, "logps/rejected": -272.4715576171875, "loss": 0.4828, "rewards/accuracies": 0.75, "rewards/chosen": -1.5274262428283691, "rewards/margins": 2.12687611579895, "rewards/rejected": -3.6543025970458984, "step": 6660 }, { "epoch": 0.77, "learning_rate": 6.856029290185426e-08, "logits/chosen": -2.559229612350464, "logits/rejected": -2.6232171058654785, "logps/chosen": -296.31939697265625, "logps/rejected": -170.80902099609375, "loss": 0.4741, "rewards/accuracies": 0.75, "rewards/chosen": -1.071327805519104, "rewards/margins": 0.8896474242210388, "rewards/rejected": -1.9609752893447876, "step": 6661 }, { "epoch": 0.77, "learning_rate": 6.852486122593599e-08, "logits/chosen": -2.2304036617279053, "logits/rejected": -2.469721555709839, "logps/chosen": -293.23968505859375, "logps/rejected": -224.3767547607422, "loss": 0.3781, "rewards/accuracies": 0.875, "rewards/chosen": -0.9680774807929993, "rewards/margins": 1.1710808277130127, "rewards/rejected": -2.139158248901367, "step": 6662 }, { "epoch": 0.78, "learning_rate": 6.848942955001771e-08, "logits/chosen": -2.056396245956421, "logits/rejected": -2.124464750289917, "logps/chosen": -283.32275390625, "logps/rejected": -322.185302734375, "loss": 0.4609, "rewards/accuracies": 0.625, "rewards/chosen": -1.0718042850494385, "rewards/margins": 2.5476369857788086, "rewards/rejected": -3.619441032409668, "step": 6663 }, { "epoch": 0.78, "learning_rate": 6.845399787409944e-08, "logits/chosen": -2.842240810394287, "logits/rejected": -2.8054933547973633, "logps/chosen": -312.9918212890625, "logps/rejected": -305.90753173828125, "loss": 0.7551, "rewards/accuracies": 0.5, "rewards/chosen": -0.9745824337005615, "rewards/margins": 0.6597601771354675, "rewards/rejected": -1.6343426704406738, "step": 6664 }, { "epoch": 0.78, "learning_rate": 6.841856619818117e-08, "logits/chosen": -2.2440812587738037, "logits/rejected": -2.3041956424713135, "logps/chosen": -183.93267822265625, "logps/rejected": -174.135498046875, "loss": 0.3045, "rewards/accuracies": 0.75, "rewards/chosen": -0.7480471134185791, "rewards/margins": 3.1588854789733887, "rewards/rejected": -3.906932830810547, "step": 6665 }, { "epoch": 0.78, "learning_rate": 6.83831345222629e-08, "logits/chosen": -2.231405258178711, "logits/rejected": -2.4036526679992676, "logps/chosen": -255.26280212402344, "logps/rejected": -190.95584106445312, "loss": 0.6998, "rewards/accuracies": 0.75, "rewards/chosen": -1.8631519079208374, "rewards/margins": 0.9314168691635132, "rewards/rejected": -2.7945687770843506, "step": 6666 }, { "epoch": 0.78, "learning_rate": 6.834770284634463e-08, "logits/chosen": -1.7355539798736572, "logits/rejected": -1.5476696491241455, "logps/chosen": -183.87754821777344, "logps/rejected": -269.5453796386719, "loss": 0.277, "rewards/accuracies": 1.0, "rewards/chosen": -0.6058818101882935, "rewards/margins": 1.688905119895935, "rewards/rejected": -2.2947866916656494, "step": 6667 }, { "epoch": 0.78, "learning_rate": 6.831227117042636e-08, "logits/chosen": -2.950035810470581, "logits/rejected": -2.900320529937744, "logps/chosen": -165.6063232421875, "logps/rejected": -186.03823852539062, "loss": 0.2848, "rewards/accuracies": 0.75, "rewards/chosen": -1.1759132146835327, "rewards/margins": 2.7915596961975098, "rewards/rejected": -3.967472791671753, "step": 6668 }, { "epoch": 0.78, "learning_rate": 6.827683949450809e-08, "logits/chosen": -2.3502089977264404, "logits/rejected": -2.443232297897339, "logps/chosen": -228.5415496826172, "logps/rejected": -241.6712646484375, "loss": 0.4101, "rewards/accuracies": 0.75, "rewards/chosen": -1.5140676498413086, "rewards/margins": 1.5131011009216309, "rewards/rejected": -3.0271687507629395, "step": 6669 }, { "epoch": 0.78, "learning_rate": 6.824140781858981e-08, "logits/chosen": -2.016116142272949, "logits/rejected": -1.896512508392334, "logps/chosen": -252.22604370117188, "logps/rejected": -343.3431701660156, "loss": 0.3498, "rewards/accuracies": 0.875, "rewards/chosen": -1.3217052221298218, "rewards/margins": 2.5496010780334473, "rewards/rejected": -3.8713064193725586, "step": 6670 }, { "epoch": 0.78, "learning_rate": 6.820597614267154e-08, "logits/chosen": -2.4583587646484375, "logits/rejected": -2.8200106620788574, "logps/chosen": -287.8721618652344, "logps/rejected": -245.5226593017578, "loss": 0.6136, "rewards/accuracies": 0.75, "rewards/chosen": -1.4895684719085693, "rewards/margins": 0.7056936025619507, "rewards/rejected": -2.1952619552612305, "step": 6671 }, { "epoch": 0.78, "learning_rate": 6.817054446675328e-08, "logits/chosen": -2.352027416229248, "logits/rejected": -2.35127329826355, "logps/chosen": -330.3799133300781, "logps/rejected": -276.8382873535156, "loss": 0.2105, "rewards/accuracies": 1.0, "rewards/chosen": -0.5801509022712708, "rewards/margins": 2.4024109840393066, "rewards/rejected": -2.9825618267059326, "step": 6672 }, { "epoch": 0.78, "learning_rate": 6.8135112790835e-08, "logits/chosen": -2.2304534912109375, "logits/rejected": -1.8882079124450684, "logps/chosen": -217.9007568359375, "logps/rejected": -310.2322082519531, "loss": 0.2342, "rewards/accuracies": 1.0, "rewards/chosen": -0.15801197290420532, "rewards/margins": 3.6213507652282715, "rewards/rejected": -3.779362678527832, "step": 6673 }, { "epoch": 0.78, "learning_rate": 6.809968111491674e-08, "logits/chosen": -1.7277190685272217, "logits/rejected": -2.197317123413086, "logps/chosen": -499.27569580078125, "logps/rejected": -372.884033203125, "loss": 0.5511, "rewards/accuracies": 0.75, "rewards/chosen": -1.793287754058838, "rewards/margins": 1.4433727264404297, "rewards/rejected": -3.2366604804992676, "step": 6674 }, { "epoch": 0.78, "learning_rate": 6.806424943899846e-08, "logits/chosen": -2.9848339557647705, "logits/rejected": -2.984494209289551, "logps/chosen": -121.56941223144531, "logps/rejected": -215.59518432617188, "loss": 0.3409, "rewards/accuracies": 0.875, "rewards/chosen": -0.6516674757003784, "rewards/margins": 3.237811326980591, "rewards/rejected": -3.889478921890259, "step": 6675 }, { "epoch": 0.78, "learning_rate": 6.802881776308018e-08, "logits/chosen": -1.9355971813201904, "logits/rejected": -1.8923416137695312, "logps/chosen": -270.706298828125, "logps/rejected": -251.49169921875, "loss": 0.303, "rewards/accuracies": 0.875, "rewards/chosen": -1.4284957647323608, "rewards/margins": 1.7913737297058105, "rewards/rejected": -3.219869375228882, "step": 6676 }, { "epoch": 0.78, "learning_rate": 6.799338608716192e-08, "logits/chosen": -2.196194648742676, "logits/rejected": -2.3535146713256836, "logps/chosen": -229.96652221679688, "logps/rejected": -230.62355041503906, "loss": 0.1636, "rewards/accuracies": 1.0, "rewards/chosen": -0.18463164567947388, "rewards/margins": 2.3111119270324707, "rewards/rejected": -2.495743751525879, "step": 6677 }, { "epoch": 0.78, "learning_rate": 6.795795441124365e-08, "logits/chosen": -1.701321005821228, "logits/rejected": -1.894420862197876, "logps/chosen": -365.467041015625, "logps/rejected": -319.93438720703125, "loss": 0.3965, "rewards/accuracies": 0.75, "rewards/chosen": -0.8441025614738464, "rewards/margins": 2.462033987045288, "rewards/rejected": -3.3061366081237793, "step": 6678 }, { "epoch": 0.78, "learning_rate": 6.792252273532537e-08, "logits/chosen": -2.285198926925659, "logits/rejected": -2.0963997840881348, "logps/chosen": -229.80361938476562, "logps/rejected": -368.8497314453125, "loss": 0.4179, "rewards/accuracies": 0.75, "rewards/chosen": -0.5706879496574402, "rewards/margins": 1.7221283912658691, "rewards/rejected": -2.292816400527954, "step": 6679 }, { "epoch": 0.78, "learning_rate": 6.788709105940711e-08, "logits/chosen": -2.1315906047821045, "logits/rejected": -2.2837376594543457, "logps/chosen": -511.73431396484375, "logps/rejected": -443.95025634765625, "loss": 0.3399, "rewards/accuracies": 0.875, "rewards/chosen": -0.1352139711380005, "rewards/margins": 2.3367979526519775, "rewards/rejected": -2.4720120429992676, "step": 6680 }, { "epoch": 0.78, "learning_rate": 6.785165938348883e-08, "logits/chosen": -2.438467025756836, "logits/rejected": -2.4087517261505127, "logps/chosen": -266.48663330078125, "logps/rejected": -205.9776611328125, "loss": 0.5589, "rewards/accuracies": 0.625, "rewards/chosen": -0.45648103952407837, "rewards/margins": 0.9591766595840454, "rewards/rejected": -1.415657639503479, "step": 6681 }, { "epoch": 0.78, "learning_rate": 6.781622770757057e-08, "logits/chosen": -2.1352956295013428, "logits/rejected": -2.089787483215332, "logps/chosen": -466.389404296875, "logps/rejected": -463.48272705078125, "loss": 0.4255, "rewards/accuracies": 0.75, "rewards/chosen": -0.1629711240530014, "rewards/margins": 2.187103271484375, "rewards/rejected": -2.350074291229248, "step": 6682 }, { "epoch": 0.78, "learning_rate": 6.77807960316523e-08, "logits/chosen": -2.502530813217163, "logits/rejected": -2.5864686965942383, "logps/chosen": -163.0996551513672, "logps/rejected": -230.57554626464844, "loss": 0.2212, "rewards/accuracies": 0.875, "rewards/chosen": -0.5594675540924072, "rewards/margins": 3.2722225189208984, "rewards/rejected": -3.8316903114318848, "step": 6683 }, { "epoch": 0.78, "learning_rate": 6.774536435573402e-08, "logits/chosen": -2.7706050872802734, "logits/rejected": -2.7820358276367188, "logps/chosen": -381.8843078613281, "logps/rejected": -342.4682922363281, "loss": 0.2197, "rewards/accuracies": 1.0, "rewards/chosen": -1.2220343351364136, "rewards/margins": 2.320164203643799, "rewards/rejected": -3.542198419570923, "step": 6684 }, { "epoch": 0.78, "learning_rate": 6.770993267981575e-08, "logits/chosen": -2.5427920818328857, "logits/rejected": -2.403404951095581, "logps/chosen": -229.47494506835938, "logps/rejected": -282.0889892578125, "loss": 0.4782, "rewards/accuracies": 0.75, "rewards/chosen": -0.9079654216766357, "rewards/margins": 2.157979726791382, "rewards/rejected": -3.0659451484680176, "step": 6685 }, { "epoch": 0.78, "learning_rate": 6.767450100389748e-08, "logits/chosen": -2.4850566387176514, "logits/rejected": -2.378840923309326, "logps/chosen": -236.47410583496094, "logps/rejected": -327.4798889160156, "loss": 0.1721, "rewards/accuracies": 1.0, "rewards/chosen": -1.0551799535751343, "rewards/margins": 4.095218181610107, "rewards/rejected": -5.150398254394531, "step": 6686 }, { "epoch": 0.78, "learning_rate": 6.76390693279792e-08, "logits/chosen": -2.2919516563415527, "logits/rejected": -2.0618233680725098, "logps/chosen": -250.63177490234375, "logps/rejected": -235.53341674804688, "loss": 1.0357, "rewards/accuracies": 0.5, "rewards/chosen": -1.7706544399261475, "rewards/margins": 0.5728428363800049, "rewards/rejected": -2.3434972763061523, "step": 6687 }, { "epoch": 0.78, "learning_rate": 6.760363765206094e-08, "logits/chosen": -2.4397132396698, "logits/rejected": -2.470994472503662, "logps/chosen": -246.7538299560547, "logps/rejected": -250.6220703125, "loss": 0.3559, "rewards/accuracies": 0.875, "rewards/chosen": -1.259779930114746, "rewards/margins": 2.5538787841796875, "rewards/rejected": -3.8136587142944336, "step": 6688 }, { "epoch": 0.78, "learning_rate": 6.756820597614267e-08, "logits/chosen": -2.0749871730804443, "logits/rejected": -2.177999496459961, "logps/chosen": -261.2310485839844, "logps/rejected": -327.1963195800781, "loss": 0.4146, "rewards/accuracies": 0.875, "rewards/chosen": -1.0801362991333008, "rewards/margins": 2.3057026863098145, "rewards/rejected": -3.385838747024536, "step": 6689 }, { "epoch": 0.78, "learning_rate": 6.75327743002244e-08, "logits/chosen": -2.0340328216552734, "logits/rejected": -2.05511474609375, "logps/chosen": -262.8029479980469, "logps/rejected": -274.64764404296875, "loss": 0.5585, "rewards/accuracies": 0.625, "rewards/chosen": -0.9783518314361572, "rewards/margins": 1.1480052471160889, "rewards/rejected": -2.126357078552246, "step": 6690 }, { "epoch": 0.78, "learning_rate": 6.749734262430613e-08, "logits/chosen": -2.8588266372680664, "logits/rejected": -2.785094976425171, "logps/chosen": -214.4049072265625, "logps/rejected": -226.07699584960938, "loss": 0.3339, "rewards/accuracies": 0.875, "rewards/chosen": -0.5854496955871582, "rewards/margins": 1.4948749542236328, "rewards/rejected": -2.080324649810791, "step": 6691 }, { "epoch": 0.78, "learning_rate": 6.746191094838785e-08, "logits/chosen": -2.3237717151641846, "logits/rejected": -2.3030593395233154, "logps/chosen": -223.0485382080078, "logps/rejected": -257.0408935546875, "loss": 0.1207, "rewards/accuracies": 1.0, "rewards/chosen": -0.3497526943683624, "rewards/margins": 3.204129695892334, "rewards/rejected": -3.553882360458374, "step": 6692 }, { "epoch": 0.78, "learning_rate": 6.742647927246959e-08, "logits/chosen": -2.815141201019287, "logits/rejected": -2.7521286010742188, "logps/chosen": -136.90203857421875, "logps/rejected": -230.65548706054688, "loss": 0.2071, "rewards/accuracies": 1.0, "rewards/chosen": -0.7323436737060547, "rewards/margins": 2.256695508956909, "rewards/rejected": -2.989039421081543, "step": 6693 }, { "epoch": 0.78, "learning_rate": 6.739104759655131e-08, "logits/chosen": -2.3654043674468994, "logits/rejected": -2.1271324157714844, "logps/chosen": -298.1686706542969, "logps/rejected": -392.1276550292969, "loss": 0.2781, "rewards/accuracies": 0.875, "rewards/chosen": -1.2456430196762085, "rewards/margins": 2.0732266902923584, "rewards/rejected": -3.3188695907592773, "step": 6694 }, { "epoch": 0.78, "learning_rate": 6.735561592063305e-08, "logits/chosen": -2.842491626739502, "logits/rejected": -2.674410104751587, "logps/chosen": -173.41233825683594, "logps/rejected": -288.1602783203125, "loss": 0.1001, "rewards/accuracies": 0.875, "rewards/chosen": -0.27922865748405457, "rewards/margins": 4.741949081420898, "rewards/rejected": -5.021177768707275, "step": 6695 }, { "epoch": 0.78, "learning_rate": 6.732018424471477e-08, "logits/chosen": -2.092898368835449, "logits/rejected": -1.9460290670394897, "logps/chosen": -123.1417007446289, "logps/rejected": -409.05474853515625, "loss": 0.9271, "rewards/accuracies": 0.625, "rewards/chosen": -1.5931000709533691, "rewards/margins": 1.0334882736206055, "rewards/rejected": -2.6265883445739746, "step": 6696 }, { "epoch": 0.78, "learning_rate": 6.72847525687965e-08, "logits/chosen": -2.388728380203247, "logits/rejected": -2.531466007232666, "logps/chosen": -185.8707733154297, "logps/rejected": -198.40872192382812, "loss": 0.6583, "rewards/accuracies": 0.5, "rewards/chosen": -1.013964295387268, "rewards/margins": 0.8661527633666992, "rewards/rejected": -1.8801169395446777, "step": 6697 }, { "epoch": 0.78, "learning_rate": 6.724932089287823e-08, "logits/chosen": -2.474527597427368, "logits/rejected": -2.4738364219665527, "logps/chosen": -516.7196655273438, "logps/rejected": -367.37628173828125, "loss": 0.1967, "rewards/accuracies": 0.875, "rewards/chosen": -0.6396868228912354, "rewards/margins": 2.2974131107330322, "rewards/rejected": -2.9370996952056885, "step": 6698 }, { "epoch": 0.78, "learning_rate": 6.721388921695996e-08, "logits/chosen": -1.6957606077194214, "logits/rejected": -1.4716038703918457, "logps/chosen": -177.84158325195312, "logps/rejected": -292.15850830078125, "loss": 0.2089, "rewards/accuracies": 0.875, "rewards/chosen": -1.2286207675933838, "rewards/margins": 2.5625884532928467, "rewards/rejected": -3.7912087440490723, "step": 6699 }, { "epoch": 0.78, "learning_rate": 6.71784575410417e-08, "logits/chosen": -2.2870731353759766, "logits/rejected": -2.3567264080047607, "logps/chosen": -269.99822998046875, "logps/rejected": -248.5194549560547, "loss": 0.3438, "rewards/accuracies": 0.875, "rewards/chosen": -0.1575675755739212, "rewards/margins": 2.3558287620544434, "rewards/rejected": -2.5133962631225586, "step": 6700 }, { "epoch": 0.78, "learning_rate": 6.714302586512342e-08, "logits/chosen": -2.222492218017578, "logits/rejected": -2.4547367095947266, "logps/chosen": -284.49078369140625, "logps/rejected": -326.5968017578125, "loss": 0.3943, "rewards/accuracies": 0.625, "rewards/chosen": -0.9901636242866516, "rewards/margins": 2.006624460220337, "rewards/rejected": -2.996788263320923, "step": 6701 }, { "epoch": 0.78, "learning_rate": 6.710759418920514e-08, "logits/chosen": -2.125469207763672, "logits/rejected": -1.943100929260254, "logps/chosen": -284.98944091796875, "logps/rejected": -355.3919677734375, "loss": 0.2669, "rewards/accuracies": 0.875, "rewards/chosen": -0.607336163520813, "rewards/margins": 1.6198086738586426, "rewards/rejected": -2.227144718170166, "step": 6702 }, { "epoch": 0.78, "learning_rate": 6.707216251328688e-08, "logits/chosen": -2.67234468460083, "logits/rejected": -2.604809522628784, "logps/chosen": -278.9767150878906, "logps/rejected": -284.6295471191406, "loss": 0.2739, "rewards/accuracies": 0.75, "rewards/chosen": -0.8595300912857056, "rewards/margins": 2.480501174926758, "rewards/rejected": -3.340031623840332, "step": 6703 }, { "epoch": 0.78, "learning_rate": 6.70367308373686e-08, "logits/chosen": -2.54579496383667, "logits/rejected": -2.729091167449951, "logps/chosen": -90.99949645996094, "logps/rejected": -124.462158203125, "loss": 0.364, "rewards/accuracies": 0.875, "rewards/chosen": -0.47379645705223083, "rewards/margins": 1.7775765657424927, "rewards/rejected": -2.251373052597046, "step": 6704 }, { "epoch": 0.78, "learning_rate": 6.700129916145033e-08, "logits/chosen": -1.987938404083252, "logits/rejected": -2.2770819664001465, "logps/chosen": -379.1482238769531, "logps/rejected": -316.0445861816406, "loss": 0.4018, "rewards/accuracies": 0.75, "rewards/chosen": -1.0910412073135376, "rewards/margins": 2.3174967765808105, "rewards/rejected": -3.4085376262664795, "step": 6705 }, { "epoch": 0.78, "learning_rate": 6.696586748553207e-08, "logits/chosen": -2.6607789993286133, "logits/rejected": -2.796546220779419, "logps/chosen": -283.2216491699219, "logps/rejected": -510.07379150390625, "loss": 0.6347, "rewards/accuracies": 0.625, "rewards/chosen": -2.3854193687438965, "rewards/margins": 2.472313404083252, "rewards/rejected": -4.857732772827148, "step": 6706 }, { "epoch": 0.78, "learning_rate": 6.693043580961379e-08, "logits/chosen": -2.7860565185546875, "logits/rejected": -2.7219154834747314, "logps/chosen": -302.9002685546875, "logps/rejected": -207.79586791992188, "loss": 0.2426, "rewards/accuracies": 1.0, "rewards/chosen": -0.2785945534706116, "rewards/margins": 2.64985728263855, "rewards/rejected": -2.9284520149230957, "step": 6707 }, { "epoch": 0.78, "learning_rate": 6.689500413369551e-08, "logits/chosen": -2.933516025543213, "logits/rejected": -2.913712501525879, "logps/chosen": -310.345458984375, "logps/rejected": -244.89085388183594, "loss": 0.4463, "rewards/accuracies": 0.75, "rewards/chosen": -1.3036630153656006, "rewards/margins": 2.6693365573883057, "rewards/rejected": -3.9729995727539062, "step": 6708 }, { "epoch": 0.78, "learning_rate": 6.685957245777725e-08, "logits/chosen": -2.9953761100769043, "logits/rejected": -2.976691484451294, "logps/chosen": -191.99363708496094, "logps/rejected": -232.1956024169922, "loss": 0.8601, "rewards/accuracies": 0.625, "rewards/chosen": -1.2406907081604004, "rewards/margins": 1.1798081398010254, "rewards/rejected": -2.420498847961426, "step": 6709 }, { "epoch": 0.78, "learning_rate": 6.682414078185898e-08, "logits/chosen": -2.3260185718536377, "logits/rejected": -2.6103432178497314, "logps/chosen": -294.9425048828125, "logps/rejected": -228.21963500976562, "loss": 0.2102, "rewards/accuracies": 1.0, "rewards/chosen": -0.7171142101287842, "rewards/margins": 2.6876914501190186, "rewards/rejected": -3.4048056602478027, "step": 6710 }, { "epoch": 0.78, "learning_rate": 6.678870910594071e-08, "logits/chosen": -1.967081904411316, "logits/rejected": -1.7084155082702637, "logps/chosen": -206.1607666015625, "logps/rejected": -359.88720703125, "loss": 0.2116, "rewards/accuracies": 1.0, "rewards/chosen": -0.1750939041376114, "rewards/margins": 2.833411693572998, "rewards/rejected": -3.0085058212280273, "step": 6711 }, { "epoch": 0.78, "learning_rate": 6.675327743002244e-08, "logits/chosen": -2.3091394901275635, "logits/rejected": -2.2618730068206787, "logps/chosen": -222.5257568359375, "logps/rejected": -231.15777587890625, "loss": 0.3866, "rewards/accuracies": 0.75, "rewards/chosen": -0.42745906114578247, "rewards/margins": 2.213589668273926, "rewards/rejected": -2.6410486698150635, "step": 6712 }, { "epoch": 0.78, "learning_rate": 6.671784575410416e-08, "logits/chosen": -2.0033977031707764, "logits/rejected": -2.1826815605163574, "logps/chosen": -254.77870178222656, "logps/rejected": -271.9248046875, "loss": 0.1936, "rewards/accuracies": 1.0, "rewards/chosen": -0.8561174273490906, "rewards/margins": 2.539294958114624, "rewards/rejected": -3.3954124450683594, "step": 6713 }, { "epoch": 0.78, "learning_rate": 6.668241407818589e-08, "logits/chosen": -1.9096633195877075, "logits/rejected": -2.057343006134033, "logps/chosen": -340.69976806640625, "logps/rejected": -294.840576171875, "loss": 0.2965, "rewards/accuracies": 0.875, "rewards/chosen": -1.2418824434280396, "rewards/margins": 1.7994009256362915, "rewards/rejected": -3.041283369064331, "step": 6714 }, { "epoch": 0.78, "learning_rate": 6.664698240226762e-08, "logits/chosen": -2.017667531967163, "logits/rejected": -2.1800107955932617, "logps/chosen": -573.7841796875, "logps/rejected": -382.7703857421875, "loss": 1.0998, "rewards/accuracies": 0.375, "rewards/chosen": -1.8655719757080078, "rewards/margins": 0.43704816699028015, "rewards/rejected": -2.3026201725006104, "step": 6715 }, { "epoch": 0.78, "learning_rate": 6.661155072634936e-08, "logits/chosen": -2.0943005084991455, "logits/rejected": -1.7638336420059204, "logps/chosen": -193.97933959960938, "logps/rejected": -364.96185302734375, "loss": 0.1382, "rewards/accuracies": 1.0, "rewards/chosen": -0.7888104915618896, "rewards/margins": 2.637986660003662, "rewards/rejected": -3.4267971515655518, "step": 6716 }, { "epoch": 0.78, "learning_rate": 6.657611905043108e-08, "logits/chosen": -2.4395244121551514, "logits/rejected": -2.6528139114379883, "logps/chosen": -236.4577178955078, "logps/rejected": -333.0775451660156, "loss": 0.3902, "rewards/accuracies": 0.75, "rewards/chosen": -0.4057982861995697, "rewards/margins": 1.909897804260254, "rewards/rejected": -2.3156960010528564, "step": 6717 }, { "epoch": 0.78, "learning_rate": 6.654068737451281e-08, "logits/chosen": -1.967890977859497, "logits/rejected": -1.7734661102294922, "logps/chosen": -335.6396484375, "logps/rejected": -358.78277587890625, "loss": 0.1458, "rewards/accuracies": 1.0, "rewards/chosen": -0.3551292419433594, "rewards/margins": 2.6152760982513428, "rewards/rejected": -2.970405340194702, "step": 6718 }, { "epoch": 0.78, "learning_rate": 6.650525569859454e-08, "logits/chosen": -2.302067756652832, "logits/rejected": -2.425612211227417, "logps/chosen": -119.6279296875, "logps/rejected": -200.06536865234375, "loss": 0.6904, "rewards/accuracies": 0.75, "rewards/chosen": -1.73972487449646, "rewards/margins": 0.6601678133010864, "rewards/rejected": -2.399892568588257, "step": 6719 }, { "epoch": 0.78, "learning_rate": 6.646982402267627e-08, "logits/chosen": -2.418684959411621, "logits/rejected": -2.5878515243530273, "logps/chosen": -435.0817565917969, "logps/rejected": -296.0639343261719, "loss": 0.2157, "rewards/accuracies": 0.875, "rewards/chosen": -0.7551548480987549, "rewards/margins": 3.0221855640411377, "rewards/rejected": -3.7773404121398926, "step": 6720 }, { "epoch": 0.78, "learning_rate": 6.6434392346758e-08, "logits/chosen": -2.2393128871917725, "logits/rejected": -2.2634389400482178, "logps/chosen": -256.0483703613281, "logps/rejected": -178.4649658203125, "loss": 0.4936, "rewards/accuracies": 0.625, "rewards/chosen": -0.778351902961731, "rewards/margins": 2.2833328247070312, "rewards/rejected": -3.0616846084594727, "step": 6721 }, { "epoch": 0.78, "learning_rate": 6.639896067083973e-08, "logits/chosen": -1.8561320304870605, "logits/rejected": -2.142460584640503, "logps/chosen": -320.501708984375, "logps/rejected": -363.2616882324219, "loss": 0.1555, "rewards/accuracies": 0.875, "rewards/chosen": -0.3523080050945282, "rewards/margins": 3.58445143699646, "rewards/rejected": -3.9367592334747314, "step": 6722 }, { "epoch": 0.78, "learning_rate": 6.636352899492145e-08, "logits/chosen": -1.8482023477554321, "logits/rejected": -2.105743646621704, "logps/chosen": -364.515869140625, "logps/rejected": -323.3650817871094, "loss": 0.3709, "rewards/accuracies": 0.875, "rewards/chosen": -0.04167512059211731, "rewards/margins": 2.946732997894287, "rewards/rejected": -2.988408088684082, "step": 6723 }, { "epoch": 0.78, "learning_rate": 6.632809731900319e-08, "logits/chosen": -2.152111053466797, "logits/rejected": -2.317734718322754, "logps/chosen": -440.8103332519531, "logps/rejected": -370.03790283203125, "loss": 0.9016, "rewards/accuracies": 0.75, "rewards/chosen": -0.8296585083007812, "rewards/margins": 0.4981834888458252, "rewards/rejected": -1.3278419971466064, "step": 6724 }, { "epoch": 0.78, "learning_rate": 6.629266564308491e-08, "logits/chosen": -2.3289875984191895, "logits/rejected": -2.611556053161621, "logps/chosen": -279.58648681640625, "logps/rejected": -255.20599365234375, "loss": 0.2886, "rewards/accuracies": 0.875, "rewards/chosen": -1.6124484539031982, "rewards/margins": 2.6644556522369385, "rewards/rejected": -4.276904106140137, "step": 6725 }, { "epoch": 0.78, "learning_rate": 6.625723396716664e-08, "logits/chosen": -1.9364712238311768, "logits/rejected": -2.001591682434082, "logps/chosen": -332.02203369140625, "logps/rejected": -363.69232177734375, "loss": 0.2869, "rewards/accuracies": 1.0, "rewards/chosen": -0.47412994503974915, "rewards/margins": 1.4747166633605957, "rewards/rejected": -1.9488468170166016, "step": 6726 }, { "epoch": 0.78, "learning_rate": 6.622180229124838e-08, "logits/chosen": -2.468430995941162, "logits/rejected": -2.4510247707366943, "logps/chosen": -196.44134521484375, "logps/rejected": -186.54351806640625, "loss": 0.2498, "rewards/accuracies": 1.0, "rewards/chosen": -0.9392768144607544, "rewards/margins": 2.258169651031494, "rewards/rejected": -3.197446346282959, "step": 6727 }, { "epoch": 0.78, "learning_rate": 6.61863706153301e-08, "logits/chosen": -2.9351646900177, "logits/rejected": -2.616594076156616, "logps/chosen": -193.66583251953125, "logps/rejected": -276.6463317871094, "loss": 0.2623, "rewards/accuracies": 0.875, "rewards/chosen": -0.6879783868789673, "rewards/margins": 2.3686537742614746, "rewards/rejected": -3.0566320419311523, "step": 6728 }, { "epoch": 0.78, "learning_rate": 6.615093893941184e-08, "logits/chosen": -1.8479408025741577, "logits/rejected": -2.0661375522613525, "logps/chosen": -340.1581726074219, "logps/rejected": -311.9808654785156, "loss": 0.6654, "rewards/accuracies": 0.75, "rewards/chosen": -1.392877221107483, "rewards/margins": 2.0435659885406494, "rewards/rejected": -3.436443328857422, "step": 6729 }, { "epoch": 0.78, "learning_rate": 6.611550726349356e-08, "logits/chosen": -2.0385022163391113, "logits/rejected": -1.8074243068695068, "logps/chosen": -309.35235595703125, "logps/rejected": -307.7257995605469, "loss": 1.0232, "rewards/accuracies": 0.25, "rewards/chosen": -1.6712918281555176, "rewards/margins": -0.20161934196949005, "rewards/rejected": -1.469672441482544, "step": 6730 }, { "epoch": 0.78, "learning_rate": 6.608007558757528e-08, "logits/chosen": -1.737768530845642, "logits/rejected": -1.9827021360397339, "logps/chosen": -512.064697265625, "logps/rejected": -301.81939697265625, "loss": 1.0681, "rewards/accuracies": 0.5, "rewards/chosen": -1.2946124076843262, "rewards/margins": 0.734893798828125, "rewards/rejected": -2.029506206512451, "step": 6731 }, { "epoch": 0.78, "learning_rate": 6.604464391165702e-08, "logits/chosen": -2.710054874420166, "logits/rejected": -2.653728485107422, "logps/chosen": -243.56912231445312, "logps/rejected": -320.6463928222656, "loss": 0.2123, "rewards/accuracies": 0.875, "rewards/chosen": -1.0831300020217896, "rewards/margins": 3.838249683380127, "rewards/rejected": -4.921379566192627, "step": 6732 }, { "epoch": 0.78, "learning_rate": 6.600921223573875e-08, "logits/chosen": -2.6883115768432617, "logits/rejected": -2.9124701023101807, "logps/chosen": -321.918212890625, "logps/rejected": -278.0485534667969, "loss": 0.2667, "rewards/accuracies": 0.875, "rewards/chosen": -0.841445803642273, "rewards/margins": 3.0533792972564697, "rewards/rejected": -3.894824981689453, "step": 6733 }, { "epoch": 0.78, "learning_rate": 6.597378055982047e-08, "logits/chosen": -2.188117027282715, "logits/rejected": -2.0665595531463623, "logps/chosen": -321.9722900390625, "logps/rejected": -375.6776123046875, "loss": 0.5807, "rewards/accuracies": 0.5, "rewards/chosen": -1.1069436073303223, "rewards/margins": 1.1814830303192139, "rewards/rejected": -2.288426399230957, "step": 6734 }, { "epoch": 0.78, "learning_rate": 6.593834888390221e-08, "logits/chosen": -2.5359866619110107, "logits/rejected": -2.3362865447998047, "logps/chosen": -182.16632080078125, "logps/rejected": -289.39312744140625, "loss": 0.086, "rewards/accuracies": 1.0, "rewards/chosen": -0.5255894660949707, "rewards/margins": 3.344526529312134, "rewards/rejected": -3.8701162338256836, "step": 6735 }, { "epoch": 0.78, "learning_rate": 6.590291720798393e-08, "logits/chosen": -2.0730690956115723, "logits/rejected": -2.156136989593506, "logps/chosen": -183.74127197265625, "logps/rejected": -231.93020629882812, "loss": 0.6229, "rewards/accuracies": 0.625, "rewards/chosen": -0.4540839195251465, "rewards/margins": 1.9810066223144531, "rewards/rejected": -2.4350903034210205, "step": 6736 }, { "epoch": 0.78, "learning_rate": 6.586748553206567e-08, "logits/chosen": -2.4043474197387695, "logits/rejected": -2.217194080352783, "logps/chosen": -141.60353088378906, "logps/rejected": -209.29141235351562, "loss": 0.4534, "rewards/accuracies": 0.625, "rewards/chosen": -0.6602283716201782, "rewards/margins": 2.5609889030456543, "rewards/rejected": -3.221217155456543, "step": 6737 }, { "epoch": 0.78, "learning_rate": 6.583205385614739e-08, "logits/chosen": -2.0044758319854736, "logits/rejected": -1.8788104057312012, "logps/chosen": -303.8839111328125, "logps/rejected": -352.0296630859375, "loss": 0.4711, "rewards/accuracies": 0.75, "rewards/chosen": -0.7515566945075989, "rewards/margins": 1.7527194023132324, "rewards/rejected": -2.5042760372161865, "step": 6738 }, { "epoch": 0.78, "learning_rate": 6.579662218022912e-08, "logits/chosen": -2.1231765747070312, "logits/rejected": -2.0586905479431152, "logps/chosen": -345.1786804199219, "logps/rejected": -350.0381774902344, "loss": 1.0571, "rewards/accuracies": 0.625, "rewards/chosen": -1.7826244831085205, "rewards/margins": 1.353762149810791, "rewards/rejected": -3.1363868713378906, "step": 6739 }, { "epoch": 0.78, "learning_rate": 6.576119050431085e-08, "logits/chosen": -2.665330171585083, "logits/rejected": -2.7096450328826904, "logps/chosen": -362.310302734375, "logps/rejected": -286.77593994140625, "loss": 0.4386, "rewards/accuracies": 0.625, "rewards/chosen": -0.5667153000831604, "rewards/margins": 1.836995005607605, "rewards/rejected": -2.4037106037139893, "step": 6740 }, { "epoch": 0.78, "learning_rate": 6.572575882839258e-08, "logits/chosen": -2.1973042488098145, "logits/rejected": -2.223388195037842, "logps/chosen": -212.58102416992188, "logps/rejected": -277.4917297363281, "loss": 0.3324, "rewards/accuracies": 0.875, "rewards/chosen": -0.32471635937690735, "rewards/margins": 2.4254143238067627, "rewards/rejected": -2.7501308917999268, "step": 6741 }, { "epoch": 0.78, "learning_rate": 6.56903271524743e-08, "logits/chosen": -2.749851703643799, "logits/rejected": -2.685868978500366, "logps/chosen": -195.6599884033203, "logps/rejected": -191.56588745117188, "loss": 0.4973, "rewards/accuracies": 0.625, "rewards/chosen": -1.046813726425171, "rewards/margins": 2.3936359882354736, "rewards/rejected": -3.4404499530792236, "step": 6742 }, { "epoch": 0.78, "learning_rate": 6.565489547655604e-08, "logits/chosen": -2.497851610183716, "logits/rejected": -2.4638240337371826, "logps/chosen": -230.641357421875, "logps/rejected": -258.274169921875, "loss": 0.4202, "rewards/accuracies": 0.625, "rewards/chosen": -0.6937828063964844, "rewards/margins": 1.394040822982788, "rewards/rejected": -2.0878236293792725, "step": 6743 }, { "epoch": 0.78, "learning_rate": 6.561946380063778e-08, "logits/chosen": -1.594895839691162, "logits/rejected": -2.0362868309020996, "logps/chosen": -444.515625, "logps/rejected": -308.9933166503906, "loss": 0.3211, "rewards/accuracies": 0.75, "rewards/chosen": -0.41105130314826965, "rewards/margins": 2.552729845046997, "rewards/rejected": -2.9637811183929443, "step": 6744 }, { "epoch": 0.78, "learning_rate": 6.55840321247195e-08, "logits/chosen": -2.069666624069214, "logits/rejected": -2.523656129837036, "logps/chosen": -236.4113006591797, "logps/rejected": -259.1937255859375, "loss": 1.0425, "rewards/accuracies": 0.625, "rewards/chosen": -1.043017029762268, "rewards/margins": 1.61940598487854, "rewards/rejected": -2.6624231338500977, "step": 6745 }, { "epoch": 0.78, "learning_rate": 6.554860044880122e-08, "logits/chosen": -2.1396474838256836, "logits/rejected": -2.4101409912109375, "logps/chosen": -258.7208251953125, "logps/rejected": -254.325927734375, "loss": 0.5451, "rewards/accuracies": 0.625, "rewards/chosen": -1.3880400657653809, "rewards/margins": 1.569046974182129, "rewards/rejected": -2.9570870399475098, "step": 6746 }, { "epoch": 0.78, "learning_rate": 6.551316877288295e-08, "logits/chosen": -2.376088857650757, "logits/rejected": -2.5357284545898438, "logps/chosen": -126.98919677734375, "logps/rejected": -200.66676330566406, "loss": 1.4096, "rewards/accuracies": 0.75, "rewards/chosen": -1.6329371929168701, "rewards/margins": 1.0077791213989258, "rewards/rejected": -2.640716552734375, "step": 6747 }, { "epoch": 0.78, "learning_rate": 6.547773709696468e-08, "logits/chosen": -1.579930305480957, "logits/rejected": -1.797488808631897, "logps/chosen": -420.4679260253906, "logps/rejected": -288.0094299316406, "loss": 1.2377, "rewards/accuracies": 0.5, "rewards/chosen": -1.564877986907959, "rewards/margins": 0.03185093402862549, "rewards/rejected": -1.5967289209365845, "step": 6748 }, { "epoch": 0.79, "learning_rate": 6.544230542104641e-08, "logits/chosen": -2.333695411682129, "logits/rejected": -2.538177490234375, "logps/chosen": -253.01019287109375, "logps/rejected": -283.5047302246094, "loss": 0.1498, "rewards/accuracies": 1.0, "rewards/chosen": -0.6573278903961182, "rewards/margins": 3.010467529296875, "rewards/rejected": -3.667795419692993, "step": 6749 }, { "epoch": 0.79, "learning_rate": 6.540687374512815e-08, "logits/chosen": -2.2079274654388428, "logits/rejected": -2.2318241596221924, "logps/chosen": -363.25457763671875, "logps/rejected": -224.43641662597656, "loss": 0.5239, "rewards/accuracies": 0.75, "rewards/chosen": -1.2910170555114746, "rewards/margins": 2.611124038696289, "rewards/rejected": -3.9021408557891846, "step": 6750 }, { "epoch": 0.79, "learning_rate": 6.537144206920987e-08, "logits/chosen": -2.1962552070617676, "logits/rejected": -2.102534532546997, "logps/chosen": -308.8802795410156, "logps/rejected": -392.2976379394531, "loss": 0.4778, "rewards/accuracies": 0.625, "rewards/chosen": 0.10515855252742767, "rewards/margins": 2.554295778274536, "rewards/rejected": -2.4491372108459473, "step": 6751 }, { "epoch": 0.79, "learning_rate": 6.533601039329159e-08, "logits/chosen": -2.3547756671905518, "logits/rejected": -2.259152889251709, "logps/chosen": -209.7338409423828, "logps/rejected": -281.8211364746094, "loss": 0.4909, "rewards/accuracies": 0.625, "rewards/chosen": -1.0186517238616943, "rewards/margins": 1.631777048110962, "rewards/rejected": -2.6504287719726562, "step": 6752 }, { "epoch": 0.79, "learning_rate": 6.530057871737333e-08, "logits/chosen": -2.0333974361419678, "logits/rejected": -1.9305593967437744, "logps/chosen": -406.3330078125, "logps/rejected": -414.7881774902344, "loss": 0.466, "rewards/accuracies": 0.625, "rewards/chosen": -0.9523491859436035, "rewards/margins": 1.9146692752838135, "rewards/rejected": -2.867018699645996, "step": 6753 }, { "epoch": 0.79, "learning_rate": 6.526514704145506e-08, "logits/chosen": -1.991227626800537, "logits/rejected": -1.8974330425262451, "logps/chosen": -149.16363525390625, "logps/rejected": -166.6828155517578, "loss": 1.0408, "rewards/accuracies": 0.625, "rewards/chosen": -2.3483424186706543, "rewards/margins": -0.11009010672569275, "rewards/rejected": -2.2382521629333496, "step": 6754 }, { "epoch": 0.79, "learning_rate": 6.522971536553678e-08, "logits/chosen": -2.4581847190856934, "logits/rejected": -2.5660510063171387, "logps/chosen": -213.33958435058594, "logps/rejected": -422.2435607910156, "loss": 0.0915, "rewards/accuracies": 1.0, "rewards/chosen": -0.3090536594390869, "rewards/margins": 4.743819713592529, "rewards/rejected": -5.052873134613037, "step": 6755 }, { "epoch": 0.79, "learning_rate": 6.519428368961852e-08, "logits/chosen": -2.933178186416626, "logits/rejected": -2.9832205772399902, "logps/chosen": -290.4124755859375, "logps/rejected": -283.08917236328125, "loss": 0.1929, "rewards/accuracies": 1.0, "rewards/chosen": -0.5617907643318176, "rewards/margins": 4.2436323165893555, "rewards/rejected": -4.805422782897949, "step": 6756 }, { "epoch": 0.79, "learning_rate": 6.515885201370024e-08, "logits/chosen": -2.020195245742798, "logits/rejected": -2.2006397247314453, "logps/chosen": -379.0953369140625, "logps/rejected": -355.92523193359375, "loss": 0.4079, "rewards/accuracies": 0.625, "rewards/chosen": -0.13003234565258026, "rewards/margins": 2.3635714054107666, "rewards/rejected": -2.493603467941284, "step": 6757 }, { "epoch": 0.79, "learning_rate": 6.512342033778196e-08, "logits/chosen": -2.332660436630249, "logits/rejected": -2.279651403427124, "logps/chosen": -245.4786834716797, "logps/rejected": -274.9839782714844, "loss": 0.4827, "rewards/accuracies": 0.625, "rewards/chosen": -0.4531765282154083, "rewards/margins": 2.3407700061798096, "rewards/rejected": -2.7939465045928955, "step": 6758 }, { "epoch": 0.79, "learning_rate": 6.50879886618637e-08, "logits/chosen": -2.2957258224487305, "logits/rejected": -2.3555660247802734, "logps/chosen": -258.70166015625, "logps/rejected": -413.1977233886719, "loss": 0.1403, "rewards/accuracies": 1.0, "rewards/chosen": -0.4180722236633301, "rewards/margins": 12.504809379577637, "rewards/rejected": -12.922881126403809, "step": 6759 }, { "epoch": 0.79, "learning_rate": 6.505255698594544e-08, "logits/chosen": -2.1647658348083496, "logits/rejected": -2.176680326461792, "logps/chosen": -403.1396789550781, "logps/rejected": -441.18951416015625, "loss": 0.0772, "rewards/accuracies": 1.0, "rewards/chosen": -1.0489552021026611, "rewards/margins": 4.99904727935791, "rewards/rejected": -6.048002243041992, "step": 6760 }, { "epoch": 0.79, "learning_rate": 6.501712531002717e-08, "logits/chosen": -2.440051555633545, "logits/rejected": -2.5826523303985596, "logps/chosen": -241.02601623535156, "logps/rejected": -267.2476501464844, "loss": 0.4929, "rewards/accuracies": 0.875, "rewards/chosen": -0.9369781017303467, "rewards/margins": 1.6223808526992798, "rewards/rejected": -2.559359073638916, "step": 6761 }, { "epoch": 0.79, "learning_rate": 6.498169363410889e-08, "logits/chosen": -2.602499485015869, "logits/rejected": -2.636160135269165, "logps/chosen": -446.03240966796875, "logps/rejected": -333.2652587890625, "loss": 0.395, "rewards/accuracies": 0.875, "rewards/chosen": -1.2011590003967285, "rewards/margins": 2.303955078125, "rewards/rejected": -3.5051138401031494, "step": 6762 }, { "epoch": 0.79, "learning_rate": 6.494626195819061e-08, "logits/chosen": -1.7616400718688965, "logits/rejected": -1.9467805624008179, "logps/chosen": -374.210205078125, "logps/rejected": -429.8559875488281, "loss": 0.3652, "rewards/accuracies": 0.875, "rewards/chosen": -1.7973393201828003, "rewards/margins": 2.6581778526306152, "rewards/rejected": -4.455517292022705, "step": 6763 }, { "epoch": 0.79, "learning_rate": 6.491083028227235e-08, "logits/chosen": -2.1555492877960205, "logits/rejected": -2.1077823638916016, "logps/chosen": -313.660888671875, "logps/rejected": -344.238525390625, "loss": 0.5436, "rewards/accuracies": 0.875, "rewards/chosen": -0.17069600522518158, "rewards/margins": 1.8394380807876587, "rewards/rejected": -2.010133981704712, "step": 6764 }, { "epoch": 0.79, "learning_rate": 6.487539860635407e-08, "logits/chosen": -1.8631126880645752, "logits/rejected": -1.885514259338379, "logps/chosen": -704.1841430664062, "logps/rejected": -585.5087890625, "loss": 0.6706, "rewards/accuracies": 0.5, "rewards/chosen": -1.569437861442566, "rewards/margins": 1.4699018001556396, "rewards/rejected": -3.039339780807495, "step": 6765 }, { "epoch": 0.79, "learning_rate": 6.483996693043581e-08, "logits/chosen": -2.69454026222229, "logits/rejected": -2.587406635284424, "logps/chosen": -269.1883544921875, "logps/rejected": -232.35519409179688, "loss": 0.2584, "rewards/accuracies": 1.0, "rewards/chosen": -0.7365078926086426, "rewards/margins": 2.3860931396484375, "rewards/rejected": -3.122601270675659, "step": 6766 }, { "epoch": 0.79, "learning_rate": 6.480453525451754e-08, "logits/chosen": -2.0686826705932617, "logits/rejected": -1.9851720333099365, "logps/chosen": -257.35552978515625, "logps/rejected": -403.6017761230469, "loss": 0.6672, "rewards/accuracies": 0.75, "rewards/chosen": -0.689854621887207, "rewards/margins": 1.8388233184814453, "rewards/rejected": -2.5286779403686523, "step": 6767 }, { "epoch": 0.79, "learning_rate": 6.476910357859927e-08, "logits/chosen": -2.481163740158081, "logits/rejected": -2.213897228240967, "logps/chosen": -101.82785034179688, "logps/rejected": -144.59585571289062, "loss": 0.3622, "rewards/accuracies": 0.875, "rewards/chosen": -0.19704991579055786, "rewards/margins": 2.106851100921631, "rewards/rejected": -2.303901195526123, "step": 6768 }, { "epoch": 0.79, "learning_rate": 6.473367190268099e-08, "logits/chosen": -2.824014186859131, "logits/rejected": -2.9293911457061768, "logps/chosen": -348.09173583984375, "logps/rejected": -270.203369140625, "loss": 0.2389, "rewards/accuracies": 0.875, "rewards/chosen": -1.1321749687194824, "rewards/margins": 2.141580104827881, "rewards/rejected": -3.2737550735473633, "step": 6769 }, { "epoch": 0.79, "learning_rate": 6.469824022676272e-08, "logits/chosen": -1.855371117591858, "logits/rejected": -1.72642183303833, "logps/chosen": -246.8117218017578, "logps/rejected": -232.2223358154297, "loss": 0.5009, "rewards/accuracies": 0.625, "rewards/chosen": -1.1168028116226196, "rewards/margins": 1.14151132106781, "rewards/rejected": -2.2583141326904297, "step": 6770 }, { "epoch": 0.79, "learning_rate": 6.466280855084446e-08, "logits/chosen": -2.3918545246124268, "logits/rejected": -2.523070812225342, "logps/chosen": -404.99725341796875, "logps/rejected": -282.2174377441406, "loss": 0.4269, "rewards/accuracies": 0.75, "rewards/chosen": -0.5646174550056458, "rewards/margins": 1.5465744733810425, "rewards/rejected": -2.111191987991333, "step": 6771 }, { "epoch": 0.79, "learning_rate": 6.462737687492618e-08, "logits/chosen": -2.271946430206299, "logits/rejected": -2.6504709720611572, "logps/chosen": -324.5555419921875, "logps/rejected": -150.41871643066406, "loss": 0.1563, "rewards/accuracies": 1.0, "rewards/chosen": -0.07543468475341797, "rewards/margins": 2.293001890182495, "rewards/rejected": -2.368436813354492, "step": 6772 }, { "epoch": 0.79, "learning_rate": 6.459194519900792e-08, "logits/chosen": -1.6467218399047852, "logits/rejected": -2.00667142868042, "logps/chosen": -335.9797668457031, "logps/rejected": -309.2889099121094, "loss": 0.7189, "rewards/accuracies": 0.75, "rewards/chosen": -1.9041330814361572, "rewards/margins": 1.2284350395202637, "rewards/rejected": -3.132567882537842, "step": 6773 }, { "epoch": 0.79, "learning_rate": 6.455651352308964e-08, "logits/chosen": -2.5751824378967285, "logits/rejected": -2.3824427127838135, "logps/chosen": -336.43975830078125, "logps/rejected": -329.6060791015625, "loss": 0.6016, "rewards/accuracies": 0.875, "rewards/chosen": -0.7487530708312988, "rewards/margins": 1.679397463798523, "rewards/rejected": -2.4281504154205322, "step": 6774 }, { "epoch": 0.79, "learning_rate": 6.452108184717136e-08, "logits/chosen": -2.3123629093170166, "logits/rejected": -2.4909796714782715, "logps/chosen": -276.26409912109375, "logps/rejected": -217.53536987304688, "loss": 0.4042, "rewards/accuracies": 0.75, "rewards/chosen": -1.295357584953308, "rewards/margins": 1.583902359008789, "rewards/rejected": -2.8792598247528076, "step": 6775 }, { "epoch": 0.79, "learning_rate": 6.44856501712531e-08, "logits/chosen": -2.4778575897216797, "logits/rejected": -2.470651865005493, "logps/chosen": -296.90399169921875, "logps/rejected": -416.0457458496094, "loss": 0.4017, "rewards/accuracies": 0.75, "rewards/chosen": -0.5988209247589111, "rewards/margins": 2.1861300468444824, "rewards/rejected": -2.7849507331848145, "step": 6776 }, { "epoch": 0.79, "learning_rate": 6.445021849533483e-08, "logits/chosen": -2.9068360328674316, "logits/rejected": -2.645242214202881, "logps/chosen": -159.5697784423828, "logps/rejected": -189.36062622070312, "loss": 0.4952, "rewards/accuracies": 0.75, "rewards/chosen": -0.9467595815658569, "rewards/margins": 1.4570311307907104, "rewards/rejected": -2.4037909507751465, "step": 6777 }, { "epoch": 0.79, "learning_rate": 6.441478681941655e-08, "logits/chosen": -1.7599766254425049, "logits/rejected": -2.1193909645080566, "logps/chosen": -480.92724609375, "logps/rejected": -513.0261840820312, "loss": 0.3123, "rewards/accuracies": 0.75, "rewards/chosen": -0.893574595451355, "rewards/margins": 3.4860892295837402, "rewards/rejected": -4.379663944244385, "step": 6778 }, { "epoch": 0.79, "learning_rate": 6.437935514349829e-08, "logits/chosen": -2.5956902503967285, "logits/rejected": -2.714261054992676, "logps/chosen": -316.0840759277344, "logps/rejected": -274.8338623046875, "loss": 0.4937, "rewards/accuracies": 0.75, "rewards/chosen": -0.717914879322052, "rewards/margins": 2.4928011894226074, "rewards/rejected": -3.2107162475585938, "step": 6779 }, { "epoch": 0.79, "learning_rate": 6.434392346758001e-08, "logits/chosen": -2.7722506523132324, "logits/rejected": -2.7597031593322754, "logps/chosen": -253.97442626953125, "logps/rejected": -227.812744140625, "loss": 0.1431, "rewards/accuracies": 1.0, "rewards/chosen": -1.3998329639434814, "rewards/margins": 2.868847370147705, "rewards/rejected": -4.268680095672607, "step": 6780 }, { "epoch": 0.79, "learning_rate": 6.430849179166175e-08, "logits/chosen": -2.7589733600616455, "logits/rejected": -2.6167430877685547, "logps/chosen": -247.87600708007812, "logps/rejected": -302.966552734375, "loss": 0.3008, "rewards/accuracies": 0.875, "rewards/chosen": -0.2275225818157196, "rewards/margins": 1.8940273523330688, "rewards/rejected": -2.1215500831604004, "step": 6781 }, { "epoch": 0.79, "learning_rate": 6.427306011574348e-08, "logits/chosen": -2.5680832862854004, "logits/rejected": -2.6700496673583984, "logps/chosen": -269.5735168457031, "logps/rejected": -270.3863830566406, "loss": 0.3527, "rewards/accuracies": 0.875, "rewards/chosen": -1.4740527868270874, "rewards/margins": 3.1395316123962402, "rewards/rejected": -4.613584518432617, "step": 6782 }, { "epoch": 0.79, "learning_rate": 6.42376284398252e-08, "logits/chosen": -2.1120963096618652, "logits/rejected": -2.222761631011963, "logps/chosen": -323.16033935546875, "logps/rejected": -225.1178436279297, "loss": 0.4928, "rewards/accuracies": 0.625, "rewards/chosen": -0.8396785259246826, "rewards/margins": 1.0267994403839111, "rewards/rejected": -1.8664780855178833, "step": 6783 }, { "epoch": 0.79, "learning_rate": 6.420219676390692e-08, "logits/chosen": -2.378286600112915, "logits/rejected": -2.6183524131774902, "logps/chosen": -206.07606506347656, "logps/rejected": -240.38479614257812, "loss": 0.5128, "rewards/accuracies": 0.75, "rewards/chosen": -0.5946887731552124, "rewards/margins": 3.5106632709503174, "rewards/rejected": -4.10535192489624, "step": 6784 }, { "epoch": 0.79, "learning_rate": 6.416676508798866e-08, "logits/chosen": -1.9542427062988281, "logits/rejected": -1.672656536102295, "logps/chosen": -368.3193664550781, "logps/rejected": -360.6171875, "loss": 0.6897, "rewards/accuracies": 0.625, "rewards/chosen": -1.0096635818481445, "rewards/margins": 0.9955824017524719, "rewards/rejected": -2.0052459239959717, "step": 6785 }, { "epoch": 0.79, "learning_rate": 6.413133341207038e-08, "logits/chosen": -1.9322152137756348, "logits/rejected": -1.8752424716949463, "logps/chosen": -147.2918701171875, "logps/rejected": -238.3200225830078, "loss": 0.8416, "rewards/accuracies": 0.625, "rewards/chosen": -0.6621087789535522, "rewards/margins": 2.421107769012451, "rewards/rejected": -3.0832161903381348, "step": 6786 }, { "epoch": 0.79, "learning_rate": 6.409590173615212e-08, "logits/chosen": -2.230856418609619, "logits/rejected": -2.345964193344116, "logps/chosen": -236.75242614746094, "logps/rejected": -246.5378875732422, "loss": 0.8337, "rewards/accuracies": 0.625, "rewards/chosen": -1.4754624366760254, "rewards/margins": 0.5911222696304321, "rewards/rejected": -2.066584825515747, "step": 6787 }, { "epoch": 0.79, "learning_rate": 6.406047006023385e-08, "logits/chosen": -2.1282143592834473, "logits/rejected": -1.9664204120635986, "logps/chosen": -406.5665283203125, "logps/rejected": -376.43115234375, "loss": 0.4449, "rewards/accuracies": 0.75, "rewards/chosen": -1.2480473518371582, "rewards/margins": 0.8421512842178345, "rewards/rejected": -2.090198516845703, "step": 6788 }, { "epoch": 0.79, "learning_rate": 6.402503838431558e-08, "logits/chosen": -2.765913963317871, "logits/rejected": -2.871213912963867, "logps/chosen": -158.09007263183594, "logps/rejected": -188.76287841796875, "loss": 0.3622, "rewards/accuracies": 0.75, "rewards/chosen": -0.694503903388977, "rewards/margins": 2.385610818862915, "rewards/rejected": -3.0801146030426025, "step": 6789 }, { "epoch": 0.79, "learning_rate": 6.39896067083973e-08, "logits/chosen": -1.8214585781097412, "logits/rejected": -1.9915573596954346, "logps/chosen": -603.442626953125, "logps/rejected": -488.077880859375, "loss": 0.4965, "rewards/accuracies": 0.75, "rewards/chosen": -0.3335566520690918, "rewards/margins": 0.9505004286766052, "rewards/rejected": -1.2840571403503418, "step": 6790 }, { "epoch": 0.79, "learning_rate": 6.395417503247903e-08, "logits/chosen": -2.566884994506836, "logits/rejected": -2.858926296234131, "logps/chosen": -216.67279052734375, "logps/rejected": -153.56576538085938, "loss": 1.8635, "rewards/accuracies": 0.625, "rewards/chosen": -3.2564234733581543, "rewards/margins": 0.19750335812568665, "rewards/rejected": -3.4539268016815186, "step": 6791 }, { "epoch": 0.79, "learning_rate": 6.391874335656077e-08, "logits/chosen": -2.8240928649902344, "logits/rejected": -2.744661808013916, "logps/chosen": -276.2595520019531, "logps/rejected": -210.21902465820312, "loss": 0.3861, "rewards/accuracies": 0.875, "rewards/chosen": -2.290804386138916, "rewards/margins": 2.221125841140747, "rewards/rejected": -4.511929988861084, "step": 6792 }, { "epoch": 0.79, "learning_rate": 6.388331168064249e-08, "logits/chosen": -2.2747035026550293, "logits/rejected": -2.4926605224609375, "logps/chosen": -425.50567626953125, "logps/rejected": -408.43560791015625, "loss": 0.1326, "rewards/accuracies": 1.0, "rewards/chosen": 0.09721925854682922, "rewards/margins": 3.4144649505615234, "rewards/rejected": -3.3172454833984375, "step": 6793 }, { "epoch": 0.79, "learning_rate": 6.384788000472423e-08, "logits/chosen": -2.4956142902374268, "logits/rejected": -2.4524619579315186, "logps/chosen": -281.15789794921875, "logps/rejected": -392.2569274902344, "loss": 0.249, "rewards/accuracies": 0.875, "rewards/chosen": 0.0813252180814743, "rewards/margins": 2.829874277114868, "rewards/rejected": -2.748548984527588, "step": 6794 }, { "epoch": 0.79, "learning_rate": 6.381244832880595e-08, "logits/chosen": -1.7882614135742188, "logits/rejected": -1.9740612506866455, "logps/chosen": -252.18954467773438, "logps/rejected": -240.942626953125, "loss": 0.3267, "rewards/accuracies": 0.875, "rewards/chosen": -0.7134333848953247, "rewards/margins": 2.48065447807312, "rewards/rejected": -3.1940877437591553, "step": 6795 }, { "epoch": 0.79, "learning_rate": 6.377701665288767e-08, "logits/chosen": -2.222358226776123, "logits/rejected": -2.0604336261749268, "logps/chosen": -347.1020812988281, "logps/rejected": -296.7547302246094, "loss": 0.3148, "rewards/accuracies": 0.75, "rewards/chosen": -0.9110134243965149, "rewards/margins": 2.1604127883911133, "rewards/rejected": -3.0714261531829834, "step": 6796 }, { "epoch": 0.79, "learning_rate": 6.37415849769694e-08, "logits/chosen": -2.1205124855041504, "logits/rejected": -2.520047664642334, "logps/chosen": -460.56463623046875, "logps/rejected": -215.44024658203125, "loss": 0.5113, "rewards/accuracies": 0.75, "rewards/chosen": -0.7175145149230957, "rewards/margins": 1.0822789669036865, "rewards/rejected": -1.7997934818267822, "step": 6797 }, { "epoch": 0.79, "learning_rate": 6.370615330105114e-08, "logits/chosen": -1.941310167312622, "logits/rejected": -1.7959825992584229, "logps/chosen": -172.890869140625, "logps/rejected": -218.49435424804688, "loss": 0.3668, "rewards/accuracies": 0.75, "rewards/chosen": -1.0585466623306274, "rewards/margins": 2.230484962463379, "rewards/rejected": -3.289031982421875, "step": 6798 }, { "epoch": 0.79, "learning_rate": 6.367072162513288e-08, "logits/chosen": -2.1557884216308594, "logits/rejected": -2.217745780944824, "logps/chosen": -265.44488525390625, "logps/rejected": -317.3211669921875, "loss": 0.6724, "rewards/accuracies": 0.5, "rewards/chosen": -1.370278239250183, "rewards/margins": 1.152424693107605, "rewards/rejected": -2.522702932357788, "step": 6799 }, { "epoch": 0.79, "learning_rate": 6.36352899492146e-08, "logits/chosen": -2.3434767723083496, "logits/rejected": -2.5845749378204346, "logps/chosen": -189.81007385253906, "logps/rejected": -188.2084503173828, "loss": 0.2135, "rewards/accuracies": 0.875, "rewards/chosen": -0.13010673224925995, "rewards/margins": 2.838980197906494, "rewards/rejected": -2.9690871238708496, "step": 6800 }, { "epoch": 0.79, "learning_rate": 6.359985827329632e-08, "logits/chosen": -2.397975444793701, "logits/rejected": -2.4049019813537598, "logps/chosen": -312.78070068359375, "logps/rejected": -349.8260498046875, "loss": 0.143, "rewards/accuracies": 0.875, "rewards/chosen": 0.21491481363773346, "rewards/margins": 3.8566417694091797, "rewards/rejected": -3.6417269706726074, "step": 6801 }, { "epoch": 0.79, "learning_rate": 6.356442659737806e-08, "logits/chosen": -1.8943442106246948, "logits/rejected": -1.9619600772857666, "logps/chosen": -199.3164825439453, "logps/rejected": -214.37164306640625, "loss": 0.6279, "rewards/accuracies": 0.75, "rewards/chosen": -0.964583158493042, "rewards/margins": 1.8225607872009277, "rewards/rejected": -2.7871437072753906, "step": 6802 }, { "epoch": 0.79, "learning_rate": 6.352899492145978e-08, "logits/chosen": -2.821326732635498, "logits/rejected": -2.658748149871826, "logps/chosen": -192.15103149414062, "logps/rejected": -305.13214111328125, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": -0.7400562167167664, "rewards/margins": 4.641158580780029, "rewards/rejected": -5.381214618682861, "step": 6803 }, { "epoch": 0.79, "learning_rate": 6.349356324554151e-08, "logits/chosen": -2.022153377532959, "logits/rejected": -1.8650825023651123, "logps/chosen": -158.96315002441406, "logps/rejected": -202.23785400390625, "loss": 0.3173, "rewards/accuracies": 0.875, "rewards/chosen": -0.6663171052932739, "rewards/margins": 1.7247976064682007, "rewards/rejected": -2.3911147117614746, "step": 6804 }, { "epoch": 0.79, "learning_rate": 6.345813156962325e-08, "logits/chosen": -1.7799378633499146, "logits/rejected": -1.8087536096572876, "logps/chosen": -243.47442626953125, "logps/rejected": -286.40008544921875, "loss": 0.2598, "rewards/accuracies": 0.875, "rewards/chosen": -0.8991287350654602, "rewards/margins": 2.0799508094787598, "rewards/rejected": -2.979079484939575, "step": 6805 }, { "epoch": 0.79, "learning_rate": 6.342269989370497e-08, "logits/chosen": -2.2313826084136963, "logits/rejected": -2.3537559509277344, "logps/chosen": -380.25433349609375, "logps/rejected": -233.24615478515625, "loss": 0.7489, "rewards/accuracies": 0.75, "rewards/chosen": -1.6211779117584229, "rewards/margins": 0.3604320287704468, "rewards/rejected": -1.9816100597381592, "step": 6806 }, { "epoch": 0.79, "learning_rate": 6.338726821778669e-08, "logits/chosen": -2.1844873428344727, "logits/rejected": -2.022939920425415, "logps/chosen": -446.2575378417969, "logps/rejected": -484.958251953125, "loss": 0.9916, "rewards/accuracies": 0.625, "rewards/chosen": -1.4694340229034424, "rewards/margins": 0.6024514436721802, "rewards/rejected": -2.071885585784912, "step": 6807 }, { "epoch": 0.79, "learning_rate": 6.335183654186843e-08, "logits/chosen": -2.7030932903289795, "logits/rejected": -2.5498275756835938, "logps/chosen": -355.859130859375, "logps/rejected": -273.297119140625, "loss": 0.462, "rewards/accuracies": 0.875, "rewards/chosen": -1.5947198867797852, "rewards/margins": 2.4618163108825684, "rewards/rejected": -4.056536674499512, "step": 6808 }, { "epoch": 0.79, "learning_rate": 6.331640486595016e-08, "logits/chosen": -2.745579242706299, "logits/rejected": -2.642775535583496, "logps/chosen": -243.49252319335938, "logps/rejected": -238.3797149658203, "loss": 0.7094, "rewards/accuracies": 0.625, "rewards/chosen": -1.6358884572982788, "rewards/margins": 1.5563158988952637, "rewards/rejected": -3.192204475402832, "step": 6809 }, { "epoch": 0.79, "learning_rate": 6.328097319003189e-08, "logits/chosen": -2.2355241775512695, "logits/rejected": -2.4976754188537598, "logps/chosen": -290.7921142578125, "logps/rejected": -256.57861328125, "loss": 0.4579, "rewards/accuracies": 0.875, "rewards/chosen": -1.3895573616027832, "rewards/margins": 1.275899887084961, "rewards/rejected": -2.665457248687744, "step": 6810 }, { "epoch": 0.79, "learning_rate": 6.324554151411362e-08, "logits/chosen": -2.307816505432129, "logits/rejected": -2.5519042015075684, "logps/chosen": -297.6355285644531, "logps/rejected": -257.4981689453125, "loss": 0.4953, "rewards/accuracies": 0.625, "rewards/chosen": -1.3308672904968262, "rewards/margins": 1.0841656923294067, "rewards/rejected": -2.4150328636169434, "step": 6811 }, { "epoch": 0.79, "learning_rate": 6.321010983819534e-08, "logits/chosen": -2.3852977752685547, "logits/rejected": -2.1939754486083984, "logps/chosen": -419.3787536621094, "logps/rejected": -341.34808349609375, "loss": 0.4648, "rewards/accuracies": 0.625, "rewards/chosen": -1.2085882425308228, "rewards/margins": 1.0502952337265015, "rewards/rejected": -2.258883476257324, "step": 6812 }, { "epoch": 0.79, "learning_rate": 6.317467816227706e-08, "logits/chosen": -2.2341983318328857, "logits/rejected": -1.988534927368164, "logps/chosen": -337.31170654296875, "logps/rejected": -292.7730712890625, "loss": 0.3777, "rewards/accuracies": 0.875, "rewards/chosen": -0.7908974885940552, "rewards/margins": 1.66379976272583, "rewards/rejected": -2.4546971321105957, "step": 6813 }, { "epoch": 0.79, "learning_rate": 6.31392464863588e-08, "logits/chosen": -1.9050772190093994, "logits/rejected": -2.3762130737304688, "logps/chosen": -419.4976501464844, "logps/rejected": -325.50958251953125, "loss": 0.1357, "rewards/accuracies": 1.0, "rewards/chosen": -0.5835886001586914, "rewards/margins": 2.633004665374756, "rewards/rejected": -3.2165932655334473, "step": 6814 }, { "epoch": 0.79, "learning_rate": 6.310381481044054e-08, "logits/chosen": -2.432161331176758, "logits/rejected": -2.4981040954589844, "logps/chosen": -308.74908447265625, "logps/rejected": -221.52450561523438, "loss": 0.4929, "rewards/accuracies": 0.75, "rewards/chosen": -0.9778757691383362, "rewards/margins": 1.6180200576782227, "rewards/rejected": -2.595895767211914, "step": 6815 }, { "epoch": 0.79, "learning_rate": 6.306838313452226e-08, "logits/chosen": -1.7220635414123535, "logits/rejected": -2.052182674407959, "logps/chosen": -606.8417358398438, "logps/rejected": -369.26898193359375, "loss": 0.6589, "rewards/accuracies": 0.75, "rewards/chosen": -0.31477779150009155, "rewards/margins": 1.3013675212860107, "rewards/rejected": -1.6161452531814575, "step": 6816 }, { "epoch": 0.79, "learning_rate": 6.303295145860399e-08, "logits/chosen": -2.374607563018799, "logits/rejected": -2.3857107162475586, "logps/chosen": -272.97906494140625, "logps/rejected": -264.08770751953125, "loss": 0.3251, "rewards/accuracies": 0.875, "rewards/chosen": -1.95330810546875, "rewards/margins": 1.4115948677062988, "rewards/rejected": -3.364902973175049, "step": 6817 }, { "epoch": 0.79, "learning_rate": 6.299751978268572e-08, "logits/chosen": -1.8138853311538696, "logits/rejected": -1.7799879312515259, "logps/chosen": -366.2053527832031, "logps/rejected": -377.4897155761719, "loss": 0.5335, "rewards/accuracies": 0.75, "rewards/chosen": -0.2974322438240051, "rewards/margins": 0.9137157201766968, "rewards/rejected": -1.2111480236053467, "step": 6818 }, { "epoch": 0.79, "learning_rate": 6.296208810676745e-08, "logits/chosen": -2.273588180541992, "logits/rejected": -2.2245569229125977, "logps/chosen": -182.46163940429688, "logps/rejected": -156.8542022705078, "loss": 0.8632, "rewards/accuracies": 0.75, "rewards/chosen": -0.9658499360084534, "rewards/margins": 0.8314752578735352, "rewards/rejected": -1.7973252534866333, "step": 6819 }, { "epoch": 0.79, "learning_rate": 6.292665643084917e-08, "logits/chosen": -2.700573205947876, "logits/rejected": -2.5909647941589355, "logps/chosen": -179.2833709716797, "logps/rejected": -229.57565307617188, "loss": 0.3743, "rewards/accuracies": 0.75, "rewards/chosen": -0.6661924123764038, "rewards/margins": 2.598938226699829, "rewards/rejected": -3.2651309967041016, "step": 6820 }, { "epoch": 0.79, "learning_rate": 6.289122475493091e-08, "logits/chosen": -2.6604418754577637, "logits/rejected": -2.654819965362549, "logps/chosen": -231.9023895263672, "logps/rejected": -239.58074951171875, "loss": 0.4162, "rewards/accuracies": 0.75, "rewards/chosen": -0.8056078553199768, "rewards/margins": 2.2858316898345947, "rewards/rejected": -3.091439723968506, "step": 6821 }, { "epoch": 0.79, "learning_rate": 6.285579307901263e-08, "logits/chosen": -2.2104830741882324, "logits/rejected": -2.429827928543091, "logps/chosen": -363.30902099609375, "logps/rejected": -244.61651611328125, "loss": 0.3707, "rewards/accuracies": 0.875, "rewards/chosen": -0.6656179428100586, "rewards/margins": 1.4062573909759521, "rewards/rejected": -2.0718753337860107, "step": 6822 }, { "epoch": 0.79, "learning_rate": 6.282036140309437e-08, "logits/chosen": -2.3316798210144043, "logits/rejected": -2.3102171421051025, "logps/chosen": -159.2357940673828, "logps/rejected": -244.04794311523438, "loss": 0.2649, "rewards/accuracies": 1.0, "rewards/chosen": -0.8664047718048096, "rewards/margins": 2.4343175888061523, "rewards/rejected": -3.300722360610962, "step": 6823 }, { "epoch": 0.79, "learning_rate": 6.278492972717609e-08, "logits/chosen": -1.8175790309906006, "logits/rejected": -1.6613408327102661, "logps/chosen": -280.41351318359375, "logps/rejected": -386.1141357421875, "loss": 0.2723, "rewards/accuracies": 1.0, "rewards/chosen": -0.5777278542518616, "rewards/margins": 1.8316383361816406, "rewards/rejected": -2.4093661308288574, "step": 6824 }, { "epoch": 0.79, "learning_rate": 6.274949805125782e-08, "logits/chosen": -2.522156000137329, "logits/rejected": -2.5817315578460693, "logps/chosen": -357.47296142578125, "logps/rejected": -207.1848602294922, "loss": 0.2351, "rewards/accuracies": 0.875, "rewards/chosen": -0.03694732487201691, "rewards/margins": 2.083955764770508, "rewards/rejected": -2.1209030151367188, "step": 6825 }, { "epoch": 0.79, "learning_rate": 6.271406637533956e-08, "logits/chosen": -2.4609618186950684, "logits/rejected": -2.9065542221069336, "logps/chosen": -282.47607421875, "logps/rejected": -194.25625610351562, "loss": 0.2975, "rewards/accuracies": 0.875, "rewards/chosen": -0.8086976408958435, "rewards/margins": 3.447981595993042, "rewards/rejected": -4.256679534912109, "step": 6826 }, { "epoch": 0.79, "learning_rate": 6.267863469942128e-08, "logits/chosen": -1.9748719930648804, "logits/rejected": -2.0269923210144043, "logps/chosen": -361.55914306640625, "logps/rejected": -323.3653564453125, "loss": 0.5786, "rewards/accuracies": 0.75, "rewards/chosen": -1.0930440425872803, "rewards/margins": 1.2704932689666748, "rewards/rejected": -2.363537311553955, "step": 6827 }, { "epoch": 0.79, "learning_rate": 6.2643203023503e-08, "logits/chosen": -2.0246243476867676, "logits/rejected": -2.1423563957214355, "logps/chosen": -461.56298828125, "logps/rejected": -403.35369873046875, "loss": 0.1873, "rewards/accuracies": 1.0, "rewards/chosen": 0.027841851115226746, "rewards/margins": 2.0495471954345703, "rewards/rejected": -2.021705389022827, "step": 6828 }, { "epoch": 0.79, "learning_rate": 6.260777134758474e-08, "logits/chosen": -2.49320650100708, "logits/rejected": -2.464782238006592, "logps/chosen": -258.2237854003906, "logps/rejected": -258.4863586425781, "loss": 0.3258, "rewards/accuracies": 0.75, "rewards/chosen": -1.063245177268982, "rewards/margins": 2.5312910079956055, "rewards/rejected": -3.594536542892456, "step": 6829 }, { "epoch": 0.79, "learning_rate": 6.257233967166646e-08, "logits/chosen": -2.026853084564209, "logits/rejected": -1.725264549255371, "logps/chosen": -244.90615844726562, "logps/rejected": -314.2957458496094, "loss": 0.7398, "rewards/accuracies": 0.625, "rewards/chosen": -1.3865164518356323, "rewards/margins": 0.1506587266921997, "rewards/rejected": -1.537175178527832, "step": 6830 }, { "epoch": 0.79, "learning_rate": 6.25369079957482e-08, "logits/chosen": -2.2648966312408447, "logits/rejected": -2.23026704788208, "logps/chosen": -416.9146728515625, "logps/rejected": -357.97650146484375, "loss": 0.5728, "rewards/accuracies": 0.75, "rewards/chosen": -1.69813072681427, "rewards/margins": 2.026193618774414, "rewards/rejected": -3.7243242263793945, "step": 6831 }, { "epoch": 0.79, "learning_rate": 6.250147631982993e-08, "logits/chosen": -2.4307215213775635, "logits/rejected": -2.595226287841797, "logps/chosen": -235.8336181640625, "logps/rejected": -254.2156982421875, "loss": 0.3972, "rewards/accuracies": 0.75, "rewards/chosen": -1.3966517448425293, "rewards/margins": 3.0202438831329346, "rewards/rejected": -4.416895866394043, "step": 6832 }, { "epoch": 0.79, "learning_rate": 6.246604464391165e-08, "logits/chosen": -2.3299412727355957, "logits/rejected": -2.471790313720703, "logps/chosen": -477.599365234375, "logps/rejected": -320.89849853515625, "loss": 0.2595, "rewards/accuracies": 1.0, "rewards/chosen": -0.5683954358100891, "rewards/margins": 3.033669948577881, "rewards/rejected": -3.602065324783325, "step": 6833 }, { "epoch": 0.79, "learning_rate": 6.243061296799339e-08, "logits/chosen": -2.185275077819824, "logits/rejected": -2.119556427001953, "logps/chosen": -158.6759033203125, "logps/rejected": -133.61337280273438, "loss": 0.408, "rewards/accuracies": 0.75, "rewards/chosen": -0.7501013875007629, "rewards/margins": 1.1915045976638794, "rewards/rejected": -1.941606044769287, "step": 6834 }, { "epoch": 0.8, "learning_rate": 6.239518129207511e-08, "logits/chosen": -1.6929683685302734, "logits/rejected": -1.9497895240783691, "logps/chosen": -438.0854187011719, "logps/rejected": -390.5484924316406, "loss": 0.4832, "rewards/accuracies": 0.75, "rewards/chosen": -1.362844467163086, "rewards/margins": 1.0009136199951172, "rewards/rejected": -2.363758087158203, "step": 6835 }, { "epoch": 0.8, "learning_rate": 6.235974961615685e-08, "logits/chosen": -2.5380823612213135, "logits/rejected": -2.3468170166015625, "logps/chosen": -320.11224365234375, "logps/rejected": -277.92669677734375, "loss": 0.3031, "rewards/accuracies": 0.875, "rewards/chosen": -0.7744513154029846, "rewards/margins": 2.240217685699463, "rewards/rejected": -3.0146689414978027, "step": 6836 }, { "epoch": 0.8, "learning_rate": 6.232431794023857e-08, "logits/chosen": -2.2293930053710938, "logits/rejected": -2.4432146549224854, "logps/chosen": -431.3406066894531, "logps/rejected": -213.21697998046875, "loss": 0.1883, "rewards/accuracies": 1.0, "rewards/chosen": -1.039171814918518, "rewards/margins": 2.1666224002838135, "rewards/rejected": -3.205794334411621, "step": 6837 }, { "epoch": 0.8, "learning_rate": 6.22888862643203e-08, "logits/chosen": -2.434499502182007, "logits/rejected": -2.6210217475891113, "logps/chosen": -297.30731201171875, "logps/rejected": -280.2550048828125, "loss": 0.5575, "rewards/accuracies": 0.625, "rewards/chosen": -1.225843906402588, "rewards/margins": 1.1421735286712646, "rewards/rejected": -2.3680176734924316, "step": 6838 }, { "epoch": 0.8, "learning_rate": 6.225345458840203e-08, "logits/chosen": -2.186605215072632, "logits/rejected": -2.5406644344329834, "logps/chosen": -468.1019592285156, "logps/rejected": -256.7169189453125, "loss": 0.4812, "rewards/accuracies": 0.625, "rewards/chosen": -0.8386433720588684, "rewards/margins": 1.7743651866912842, "rewards/rejected": -2.613008499145508, "step": 6839 }, { "epoch": 0.8, "learning_rate": 6.221802291248376e-08, "logits/chosen": -2.2246546745300293, "logits/rejected": -2.299765110015869, "logps/chosen": -341.9164733886719, "logps/rejected": -340.49298095703125, "loss": 0.2373, "rewards/accuracies": 0.875, "rewards/chosen": -1.5369409322738647, "rewards/margins": 2.6214704513549805, "rewards/rejected": -4.158411502838135, "step": 6840 }, { "epoch": 0.8, "learning_rate": 6.218259123656548e-08, "logits/chosen": -1.8745458126068115, "logits/rejected": -2.033921241760254, "logps/chosen": -421.938232421875, "logps/rejected": -391.81048583984375, "loss": 0.1919, "rewards/accuracies": 1.0, "rewards/chosen": -0.565506100654602, "rewards/margins": 2.0856711864471436, "rewards/rejected": -2.651177406311035, "step": 6841 }, { "epoch": 0.8, "learning_rate": 6.214715956064722e-08, "logits/chosen": -2.764787435531616, "logits/rejected": -2.7290735244750977, "logps/chosen": -234.2088623046875, "logps/rejected": -227.45156860351562, "loss": 0.3506, "rewards/accuracies": 0.875, "rewards/chosen": -0.610879123210907, "rewards/margins": 1.4026906490325928, "rewards/rejected": -2.0135698318481445, "step": 6842 }, { "epoch": 0.8, "learning_rate": 6.211172788472895e-08, "logits/chosen": -2.291232109069824, "logits/rejected": -2.5643715858459473, "logps/chosen": -166.26429748535156, "logps/rejected": -137.162353515625, "loss": 0.2425, "rewards/accuracies": 0.875, "rewards/chosen": 0.12492899596691132, "rewards/margins": 2.385897397994995, "rewards/rejected": -2.2609682083129883, "step": 6843 }, { "epoch": 0.8, "learning_rate": 6.207629620881068e-08, "logits/chosen": -2.946488618850708, "logits/rejected": -2.908271551132202, "logps/chosen": -129.52249145507812, "logps/rejected": -155.7763671875, "loss": 0.458, "rewards/accuracies": 0.625, "rewards/chosen": -0.5405779480934143, "rewards/margins": 2.5716850757598877, "rewards/rejected": -3.1122629642486572, "step": 6844 }, { "epoch": 0.8, "learning_rate": 6.20408645328924e-08, "logits/chosen": -1.786116361618042, "logits/rejected": -2.055964469909668, "logps/chosen": -228.19223022460938, "logps/rejected": -227.5196533203125, "loss": 0.5517, "rewards/accuracies": 0.75, "rewards/chosen": -1.4102916717529297, "rewards/margins": 1.517952561378479, "rewards/rejected": -2.928244113922119, "step": 6845 }, { "epoch": 0.8, "learning_rate": 6.200543285697413e-08, "logits/chosen": -2.354393243789673, "logits/rejected": -2.3393170833587646, "logps/chosen": -307.44219970703125, "logps/rejected": -300.79290771484375, "loss": 0.1869, "rewards/accuracies": 1.0, "rewards/chosen": -1.558704137802124, "rewards/margins": 2.447596549987793, "rewards/rejected": -4.006300926208496, "step": 6846 }, { "epoch": 0.8, "learning_rate": 6.197000118105586e-08, "logits/chosen": -1.7318826913833618, "logits/rejected": -1.960352897644043, "logps/chosen": -287.6160583496094, "logps/rejected": -210.08221435546875, "loss": 0.5289, "rewards/accuracies": 0.75, "rewards/chosen": -1.0149390697479248, "rewards/margins": 1.2997474670410156, "rewards/rejected": -2.3146865367889404, "step": 6847 }, { "epoch": 0.8, "learning_rate": 6.193456950513759e-08, "logits/chosen": -2.3671865463256836, "logits/rejected": -1.932340383529663, "logps/chosen": -232.91368103027344, "logps/rejected": -325.377197265625, "loss": 0.3741, "rewards/accuracies": 0.875, "rewards/chosen": -0.7375699281692505, "rewards/margins": 2.8285973072052, "rewards/rejected": -3.5661673545837402, "step": 6848 }, { "epoch": 0.8, "learning_rate": 6.189913782921933e-08, "logits/chosen": -2.748404026031494, "logits/rejected": -2.7693657875061035, "logps/chosen": -98.08004760742188, "logps/rejected": -149.220458984375, "loss": 0.7063, "rewards/accuracies": 0.5, "rewards/chosen": -1.2909510135650635, "rewards/margins": 0.8454074263572693, "rewards/rejected": -2.1363582611083984, "step": 6849 }, { "epoch": 0.8, "learning_rate": 6.186370615330105e-08, "logits/chosen": -2.862389326095581, "logits/rejected": -2.8285460472106934, "logps/chosen": -174.32081604003906, "logps/rejected": -120.73200225830078, "loss": 0.3054, "rewards/accuracies": 1.0, "rewards/chosen": -0.8009113073348999, "rewards/margins": 1.2189006805419922, "rewards/rejected": -2.0198121070861816, "step": 6850 }, { "epoch": 0.8, "learning_rate": 6.182827447738277e-08, "logits/chosen": -2.3116378784179688, "logits/rejected": -2.594020366668701, "logps/chosen": -387.82135009765625, "logps/rejected": -310.3623046875, "loss": 0.2908, "rewards/accuracies": 0.75, "rewards/chosen": -0.2876507639884949, "rewards/margins": 2.0654802322387695, "rewards/rejected": -2.35313081741333, "step": 6851 }, { "epoch": 0.8, "learning_rate": 6.17928428014645e-08, "logits/chosen": -1.7338718175888062, "logits/rejected": -1.6062952280044556, "logps/chosen": -498.030517578125, "logps/rejected": -500.8822021484375, "loss": 0.6348, "rewards/accuracies": 0.625, "rewards/chosen": -1.5327322483062744, "rewards/margins": 1.397261381149292, "rewards/rejected": -2.9299936294555664, "step": 6852 }, { "epoch": 0.8, "learning_rate": 6.175741112554624e-08, "logits/chosen": -1.3945882320404053, "logits/rejected": -1.9716272354125977, "logps/chosen": -225.59814453125, "logps/rejected": -212.77056884765625, "loss": 0.5621, "rewards/accuracies": 0.625, "rewards/chosen": -0.8643045425415039, "rewards/margins": 2.4510693550109863, "rewards/rejected": -3.3153738975524902, "step": 6853 }, { "epoch": 0.8, "learning_rate": 6.172197944962796e-08, "logits/chosen": -2.2953336238861084, "logits/rejected": -2.371824026107788, "logps/chosen": -362.1795959472656, "logps/rejected": -311.12274169921875, "loss": 0.3431, "rewards/accuracies": 0.875, "rewards/chosen": -0.9477490186691284, "rewards/margins": 1.445358157157898, "rewards/rejected": -2.3931071758270264, "step": 6854 }, { "epoch": 0.8, "learning_rate": 6.16865477737097e-08, "logits/chosen": -2.47581148147583, "logits/rejected": -2.6453781127929688, "logps/chosen": -347.84246826171875, "logps/rejected": -260.4224548339844, "loss": 0.2726, "rewards/accuracies": 0.875, "rewards/chosen": -0.5556889772415161, "rewards/margins": 2.5416297912597656, "rewards/rejected": -3.0973191261291504, "step": 6855 }, { "epoch": 0.8, "learning_rate": 6.165111609779142e-08, "logits/chosen": -1.9120556116104126, "logits/rejected": -2.1209051609039307, "logps/chosen": -323.7871398925781, "logps/rejected": -283.9405212402344, "loss": 0.8298, "rewards/accuracies": 0.625, "rewards/chosen": -2.4211859703063965, "rewards/margins": 1.362655758857727, "rewards/rejected": -3.783841609954834, "step": 6856 }, { "epoch": 0.8, "learning_rate": 6.161568442187314e-08, "logits/chosen": -2.549255609512329, "logits/rejected": -2.5862505435943604, "logps/chosen": -200.94708251953125, "logps/rejected": -317.8486328125, "loss": 0.4339, "rewards/accuracies": 0.75, "rewards/chosen": -1.8365788459777832, "rewards/margins": 1.6901180744171143, "rewards/rejected": -3.5266969203948975, "step": 6857 }, { "epoch": 0.8, "learning_rate": 6.158025274595488e-08, "logits/chosen": -2.5728507041931152, "logits/rejected": -2.581162452697754, "logps/chosen": -214.61648559570312, "logps/rejected": -284.29290771484375, "loss": 0.4778, "rewards/accuracies": 0.75, "rewards/chosen": -1.2478594779968262, "rewards/margins": 2.533503532409668, "rewards/rejected": -3.781362771987915, "step": 6858 }, { "epoch": 0.8, "learning_rate": 6.154482107003661e-08, "logits/chosen": -2.4273595809936523, "logits/rejected": -2.2840187549591064, "logps/chosen": -200.69815063476562, "logps/rejected": -214.00950622558594, "loss": 0.6355, "rewards/accuracies": 0.875, "rewards/chosen": -1.2133837938308716, "rewards/margins": 1.240335464477539, "rewards/rejected": -2.453719139099121, "step": 6859 }, { "epoch": 0.8, "learning_rate": 6.150938939411834e-08, "logits/chosen": -2.4676544666290283, "logits/rejected": -2.589242458343506, "logps/chosen": -243.68450927734375, "logps/rejected": -238.80548095703125, "loss": 0.4162, "rewards/accuracies": 0.75, "rewards/chosen": -1.1741483211517334, "rewards/margins": 3.063234329223633, "rewards/rejected": -4.237382888793945, "step": 6860 }, { "epoch": 0.8, "learning_rate": 6.147395771820007e-08, "logits/chosen": -2.4629135131835938, "logits/rejected": -2.2211174964904785, "logps/chosen": -292.4907531738281, "logps/rejected": -330.314697265625, "loss": 0.5679, "rewards/accuracies": 0.625, "rewards/chosen": -1.192375898361206, "rewards/margins": 0.8940994739532471, "rewards/rejected": -2.086475372314453, "step": 6861 }, { "epoch": 0.8, "learning_rate": 6.143852604228179e-08, "logits/chosen": -1.8257081508636475, "logits/rejected": -2.1106345653533936, "logps/chosen": -425.1636962890625, "logps/rejected": -377.98974609375, "loss": 0.1489, "rewards/accuracies": 1.0, "rewards/chosen": 0.08446913957595825, "rewards/margins": 2.6171042919158936, "rewards/rejected": -2.532634973526001, "step": 6862 }, { "epoch": 0.8, "learning_rate": 6.140309436636353e-08, "logits/chosen": -2.8049685955047607, "logits/rejected": -2.829469680786133, "logps/chosen": -82.49890899658203, "logps/rejected": -154.82427978515625, "loss": 0.4006, "rewards/accuracies": 0.75, "rewards/chosen": -0.7296584844589233, "rewards/margins": 1.6011145114898682, "rewards/rejected": -2.330772876739502, "step": 6863 }, { "epoch": 0.8, "learning_rate": 6.136766269044525e-08, "logits/chosen": -2.3144547939300537, "logits/rejected": -2.388270378112793, "logps/chosen": -528.3441162109375, "logps/rejected": -454.0592956542969, "loss": 0.1021, "rewards/accuracies": 1.0, "rewards/chosen": -0.2696225047111511, "rewards/margins": 3.642622709274292, "rewards/rejected": -3.912245512008667, "step": 6864 }, { "epoch": 0.8, "learning_rate": 6.133223101452699e-08, "logits/chosen": -1.9052088260650635, "logits/rejected": -2.3367867469787598, "logps/chosen": -316.1722412109375, "logps/rejected": -244.171142578125, "loss": 0.4858, "rewards/accuracies": 0.625, "rewards/chosen": -0.9609666466712952, "rewards/margins": 1.7262738943099976, "rewards/rejected": -2.6872406005859375, "step": 6865 }, { "epoch": 0.8, "learning_rate": 6.129679933860871e-08, "logits/chosen": -2.9436936378479004, "logits/rejected": -2.9177675247192383, "logps/chosen": -253.65330505371094, "logps/rejected": -240.0859832763672, "loss": 0.1414, "rewards/accuracies": 0.875, "rewards/chosen": -0.01873358152806759, "rewards/margins": 2.813572883605957, "rewards/rejected": -2.832306385040283, "step": 6866 }, { "epoch": 0.8, "learning_rate": 6.126136766269044e-08, "logits/chosen": -2.4977145195007324, "logits/rejected": -2.3757247924804688, "logps/chosen": -136.78271484375, "logps/rejected": -196.136962890625, "loss": 0.2955, "rewards/accuracies": 0.875, "rewards/chosen": -0.12349767982959747, "rewards/margins": 2.740003824234009, "rewards/rejected": -2.86350154876709, "step": 6867 }, { "epoch": 0.8, "learning_rate": 6.122593598677217e-08, "logits/chosen": -2.9151084423065186, "logits/rejected": -2.8006560802459717, "logps/chosen": -321.87518310546875, "logps/rejected": -257.6304016113281, "loss": 0.416, "rewards/accuracies": 0.75, "rewards/chosen": -1.0966020822525024, "rewards/margins": 1.8581135272979736, "rewards/rejected": -2.9547154903411865, "step": 6868 }, { "epoch": 0.8, "learning_rate": 6.11905043108539e-08, "logits/chosen": -2.9206643104553223, "logits/rejected": -2.98946213722229, "logps/chosen": -114.8738784790039, "logps/rejected": -182.10032653808594, "loss": 0.6, "rewards/accuracies": 0.875, "rewards/chosen": -1.4498250484466553, "rewards/margins": 2.6918790340423584, "rewards/rejected": -4.141704082489014, "step": 6869 }, { "epoch": 0.8, "learning_rate": 6.115507263493564e-08, "logits/chosen": -2.2308616638183594, "logits/rejected": -2.428957462310791, "logps/chosen": -320.9161376953125, "logps/rejected": -110.08751678466797, "loss": 0.2773, "rewards/accuracies": 0.875, "rewards/chosen": -0.37987589836120605, "rewards/margins": 1.940679907798767, "rewards/rejected": -2.3205556869506836, "step": 6870 }, { "epoch": 0.8, "learning_rate": 6.111964095901736e-08, "logits/chosen": -2.43497371673584, "logits/rejected": -2.551339626312256, "logps/chosen": -428.1810607910156, "logps/rejected": -388.3935852050781, "loss": 0.5103, "rewards/accuracies": 0.625, "rewards/chosen": -1.712624192237854, "rewards/margins": 1.0983455181121826, "rewards/rejected": -2.810969829559326, "step": 6871 }, { "epoch": 0.8, "learning_rate": 6.10842092830991e-08, "logits/chosen": -1.9998674392700195, "logits/rejected": -1.9360617399215698, "logps/chosen": -192.01104736328125, "logps/rejected": -219.0188446044922, "loss": 0.8399, "rewards/accuracies": 0.625, "rewards/chosen": -1.3340623378753662, "rewards/margins": 1.8400330543518066, "rewards/rejected": -3.174095630645752, "step": 6872 }, { "epoch": 0.8, "learning_rate": 6.104877760718082e-08, "logits/chosen": -1.8257018327713013, "logits/rejected": -2.1537225246429443, "logps/chosen": -312.02484130859375, "logps/rejected": -270.003662109375, "loss": 0.4332, "rewards/accuracies": 0.875, "rewards/chosen": -0.8723500370979309, "rewards/margins": 1.6541929244995117, "rewards/rejected": -2.5265426635742188, "step": 6873 }, { "epoch": 0.8, "learning_rate": 6.101334593126254e-08, "logits/chosen": -1.9987155199050903, "logits/rejected": -2.323665142059326, "logps/chosen": -421.58154296875, "logps/rejected": -302.4698486328125, "loss": 0.3925, "rewards/accuracies": 0.75, "rewards/chosen": -0.841350257396698, "rewards/margins": 1.399461030960083, "rewards/rejected": -2.2408111095428467, "step": 6874 }, { "epoch": 0.8, "learning_rate": 6.097791425534427e-08, "logits/chosen": -1.8710644245147705, "logits/rejected": -1.7614995241165161, "logps/chosen": -416.59625244140625, "logps/rejected": -417.6948547363281, "loss": 0.2111, "rewards/accuracies": 0.875, "rewards/chosen": -0.3255206346511841, "rewards/margins": 3.144794225692749, "rewards/rejected": -3.4703147411346436, "step": 6875 }, { "epoch": 0.8, "learning_rate": 6.094248257942601e-08, "logits/chosen": -2.692440986633301, "logits/rejected": -2.3176722526550293, "logps/chosen": -108.86640167236328, "logps/rejected": -180.07888793945312, "loss": 0.3629, "rewards/accuracies": 0.875, "rewards/chosen": -0.5793875455856323, "rewards/margins": 1.8129773139953613, "rewards/rejected": -2.392364978790283, "step": 6876 }, { "epoch": 0.8, "learning_rate": 6.090705090350773e-08, "logits/chosen": -2.173902750015259, "logits/rejected": -2.6319642066955566, "logps/chosen": -404.4335021972656, "logps/rejected": -231.0235595703125, "loss": 0.4182, "rewards/accuracies": 0.625, "rewards/chosen": -0.6690205335617065, "rewards/margins": 1.4047249555587769, "rewards/rejected": -2.0737454891204834, "step": 6877 }, { "epoch": 0.8, "learning_rate": 6.087161922758947e-08, "logits/chosen": -2.621583938598633, "logits/rejected": -2.739276647567749, "logps/chosen": -173.18295288085938, "logps/rejected": -236.97312927246094, "loss": 0.693, "rewards/accuracies": 0.625, "rewards/chosen": -0.818861722946167, "rewards/margins": 1.3644630908966064, "rewards/rejected": -2.1833250522613525, "step": 6878 }, { "epoch": 0.8, "learning_rate": 6.083618755167119e-08, "logits/chosen": -2.077061653137207, "logits/rejected": -2.056602716445923, "logps/chosen": -336.3263854980469, "logps/rejected": -302.77972412109375, "loss": 0.32, "rewards/accuracies": 0.75, "rewards/chosen": -1.1751642227172852, "rewards/margins": 2.348365306854248, "rewards/rejected": -3.523529529571533, "step": 6879 }, { "epoch": 0.8, "learning_rate": 6.080075587575292e-08, "logits/chosen": -2.639233350753784, "logits/rejected": -2.7530274391174316, "logps/chosen": -277.5287170410156, "logps/rejected": -201.5216064453125, "loss": 0.9407, "rewards/accuracies": 0.75, "rewards/chosen": -1.13041353225708, "rewards/margins": 1.434485673904419, "rewards/rejected": -2.564899206161499, "step": 6880 }, { "epoch": 0.8, "learning_rate": 6.076532419983466e-08, "logits/chosen": -2.67049503326416, "logits/rejected": -2.67189884185791, "logps/chosen": -410.9122314453125, "logps/rejected": -325.05987548828125, "loss": 0.1916, "rewards/accuracies": 1.0, "rewards/chosen": -0.547398567199707, "rewards/margins": 2.428257465362549, "rewards/rejected": -2.975656032562256, "step": 6881 }, { "epoch": 0.8, "learning_rate": 6.072989252391638e-08, "logits/chosen": -1.8763231039047241, "logits/rejected": -2.1768198013305664, "logps/chosen": -239.5070343017578, "logps/rejected": -156.99130249023438, "loss": 0.9637, "rewards/accuracies": 0.875, "rewards/chosen": -1.4590353965759277, "rewards/margins": 0.6330040097236633, "rewards/rejected": -2.0920395851135254, "step": 6882 }, { "epoch": 0.8, "learning_rate": 6.06944608479981e-08, "logits/chosen": -2.3112926483154297, "logits/rejected": -2.269711971282959, "logps/chosen": -456.79998779296875, "logps/rejected": -344.9031677246094, "loss": 0.5201, "rewards/accuracies": 0.75, "rewards/chosen": -0.7449369430541992, "rewards/margins": 1.3485500812530518, "rewards/rejected": -2.09348726272583, "step": 6883 }, { "epoch": 0.8, "learning_rate": 6.065902917207984e-08, "logits/chosen": -2.15946626663208, "logits/rejected": -2.2022767066955566, "logps/chosen": -322.4681396484375, "logps/rejected": -349.84295654296875, "loss": 0.2276, "rewards/accuracies": 0.875, "rewards/chosen": -1.19395112991333, "rewards/margins": 4.320577144622803, "rewards/rejected": -5.514528274536133, "step": 6884 }, { "epoch": 0.8, "learning_rate": 6.062359749616156e-08, "logits/chosen": -2.631561756134033, "logits/rejected": -2.4968762397766113, "logps/chosen": -348.88671875, "logps/rejected": -359.77557373046875, "loss": 0.3809, "rewards/accuracies": 0.875, "rewards/chosen": -1.2477723360061646, "rewards/margins": 1.8995622396469116, "rewards/rejected": -3.147334575653076, "step": 6885 }, { "epoch": 0.8, "learning_rate": 6.05881658202433e-08, "logits/chosen": -2.746532917022705, "logits/rejected": -2.66607928276062, "logps/chosen": -170.00213623046875, "logps/rejected": -262.0600280761719, "loss": 0.2595, "rewards/accuracies": 0.875, "rewards/chosen": -0.8095134496688843, "rewards/margins": 3.477121114730835, "rewards/rejected": -4.28663444519043, "step": 6886 }, { "epoch": 0.8, "learning_rate": 6.055273414432503e-08, "logits/chosen": -2.581150770187378, "logits/rejected": -2.6428232192993164, "logps/chosen": -302.81158447265625, "logps/rejected": -339.23876953125, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/chosen": -0.9259889721870422, "rewards/margins": 4.471812725067139, "rewards/rejected": -5.397801876068115, "step": 6887 }, { "epoch": 0.8, "learning_rate": 6.051730246840675e-08, "logits/chosen": -2.2816524505615234, "logits/rejected": -2.3479833602905273, "logps/chosen": -325.6483459472656, "logps/rejected": -348.9211730957031, "loss": 0.4061, "rewards/accuracies": 0.875, "rewards/chosen": -1.160871982574463, "rewards/margins": 3.039816379547119, "rewards/rejected": -4.20068883895874, "step": 6888 }, { "epoch": 0.8, "learning_rate": 6.048187079248848e-08, "logits/chosen": -2.1469123363494873, "logits/rejected": -2.1613872051239014, "logps/chosen": -188.59136962890625, "logps/rejected": -170.01254272460938, "loss": 0.2822, "rewards/accuracies": 0.875, "rewards/chosen": -0.42905956506729126, "rewards/margins": 2.2298736572265625, "rewards/rejected": -2.658933401107788, "step": 6889 }, { "epoch": 0.8, "learning_rate": 6.044643911657021e-08, "logits/chosen": -2.4381847381591797, "logits/rejected": -2.5352230072021484, "logps/chosen": -335.6871337890625, "logps/rejected": -371.23065185546875, "loss": 0.249, "rewards/accuracies": 0.875, "rewards/chosen": -0.6556386947631836, "rewards/margins": 2.150097608566284, "rewards/rejected": -2.8057360649108887, "step": 6890 }, { "epoch": 0.8, "learning_rate": 6.041100744065193e-08, "logits/chosen": -2.400900363922119, "logits/rejected": -2.4083051681518555, "logps/chosen": -184.46250915527344, "logps/rejected": -306.98779296875, "loss": 0.1856, "rewards/accuracies": 0.875, "rewards/chosen": -0.7026311159133911, "rewards/margins": 4.589041233062744, "rewards/rejected": -5.291672229766846, "step": 6891 }, { "epoch": 0.8, "learning_rate": 6.037557576473367e-08, "logits/chosen": -2.5259649753570557, "logits/rejected": -2.3968427181243896, "logps/chosen": -235.30447387695312, "logps/rejected": -289.27667236328125, "loss": 0.2002, "rewards/accuracies": 1.0, "rewards/chosen": -1.3481628894805908, "rewards/margins": 3.3171744346618652, "rewards/rejected": -4.665337562561035, "step": 6892 }, { "epoch": 0.8, "learning_rate": 6.03401440888154e-08, "logits/chosen": -2.6868913173675537, "logits/rejected": -2.7782206535339355, "logps/chosen": -124.67086029052734, "logps/rejected": -188.67047119140625, "loss": 0.521, "rewards/accuracies": 0.875, "rewards/chosen": -0.2644999921321869, "rewards/margins": 2.063897132873535, "rewards/rejected": -2.328397035598755, "step": 6893 }, { "epoch": 0.8, "learning_rate": 6.030471241289713e-08, "logits/chosen": -2.60872745513916, "logits/rejected": -2.807612657546997, "logps/chosen": -264.4891357421875, "logps/rejected": -250.44589233398438, "loss": 0.4321, "rewards/accuracies": 0.625, "rewards/chosen": -1.3954761028289795, "rewards/margins": 1.415717363357544, "rewards/rejected": -2.8111929893493652, "step": 6894 }, { "epoch": 0.8, "learning_rate": 6.026928073697885e-08, "logits/chosen": -2.2900028228759766, "logits/rejected": -2.34365177154541, "logps/chosen": -109.8130111694336, "logps/rejected": -121.01927185058594, "loss": 0.4934, "rewards/accuracies": 0.625, "rewards/chosen": -0.916411817073822, "rewards/margins": 1.3454954624176025, "rewards/rejected": -2.2619073390960693, "step": 6895 }, { "epoch": 0.8, "learning_rate": 6.023384906106058e-08, "logits/chosen": -2.478156089782715, "logits/rejected": -2.4685797691345215, "logps/chosen": -249.77191162109375, "logps/rejected": -352.72705078125, "loss": 0.2777, "rewards/accuracies": 0.875, "rewards/chosen": -0.6337382197380066, "rewards/margins": 2.5091664791107178, "rewards/rejected": -3.142904758453369, "step": 6896 }, { "epoch": 0.8, "learning_rate": 6.019841738514232e-08, "logits/chosen": -2.439596652984619, "logits/rejected": -2.407973527908325, "logps/chosen": -394.96710205078125, "logps/rejected": -421.0386657714844, "loss": 0.7773, "rewards/accuracies": 0.875, "rewards/chosen": -1.2945051193237305, "rewards/margins": 2.7945199012756348, "rewards/rejected": -4.089025020599365, "step": 6897 }, { "epoch": 0.8, "learning_rate": 6.016298570922404e-08, "logits/chosen": -2.16998028755188, "logits/rejected": -2.49896240234375, "logps/chosen": -280.694091796875, "logps/rejected": -320.14532470703125, "loss": 0.5565, "rewards/accuracies": 0.875, "rewards/chosen": -1.0702264308929443, "rewards/margins": 1.3476715087890625, "rewards/rejected": -2.4178977012634277, "step": 6898 }, { "epoch": 0.8, "learning_rate": 6.012755403330578e-08, "logits/chosen": -1.5615559816360474, "logits/rejected": -1.4065842628479004, "logps/chosen": -499.06158447265625, "logps/rejected": -544.5966796875, "loss": 0.7561, "rewards/accuracies": 0.625, "rewards/chosen": -1.456490397453308, "rewards/margins": 1.456023931503296, "rewards/rejected": -2.9125142097473145, "step": 6899 }, { "epoch": 0.8, "learning_rate": 6.00921223573875e-08, "logits/chosen": -2.5633387565612793, "logits/rejected": -2.640937566757202, "logps/chosen": -300.31878662109375, "logps/rejected": -244.67337036132812, "loss": 0.4527, "rewards/accuracies": 0.75, "rewards/chosen": -0.6285059452056885, "rewards/margins": 1.580533742904663, "rewards/rejected": -2.2090396881103516, "step": 6900 }, { "epoch": 0.8, "learning_rate": 6.005669068146922e-08, "logits/chosen": -2.7684342861175537, "logits/rejected": -2.8154263496398926, "logps/chosen": -194.79095458984375, "logps/rejected": -184.09600830078125, "loss": 0.6013, "rewards/accuracies": 0.625, "rewards/chosen": -1.0689170360565186, "rewards/margins": 1.790940761566162, "rewards/rejected": -2.8598575592041016, "step": 6901 }, { "epoch": 0.8, "learning_rate": 6.002125900555096e-08, "logits/chosen": -2.1259872913360596, "logits/rejected": -2.275294780731201, "logps/chosen": -375.4989013671875, "logps/rejected": -231.77810668945312, "loss": 0.4962, "rewards/accuracies": 0.75, "rewards/chosen": -0.6594857573509216, "rewards/margins": 1.0236425399780273, "rewards/rejected": -1.6831282377243042, "step": 6902 }, { "epoch": 0.8, "learning_rate": 5.998582732963269e-08, "logits/chosen": -2.1287176609039307, "logits/rejected": -2.1135940551757812, "logps/chosen": -261.378173828125, "logps/rejected": -345.83843994140625, "loss": 0.2099, "rewards/accuracies": 0.875, "rewards/chosen": -0.8118993043899536, "rewards/margins": 2.3929967880249023, "rewards/rejected": -3.2048959732055664, "step": 6903 }, { "epoch": 0.8, "learning_rate": 5.995039565371441e-08, "logits/chosen": -2.7018275260925293, "logits/rejected": -2.4646379947662354, "logps/chosen": -301.34576416015625, "logps/rejected": -209.955322265625, "loss": 0.5246, "rewards/accuracies": 0.875, "rewards/chosen": -1.4101365804672241, "rewards/margins": 1.3803424835205078, "rewards/rejected": -2.7904791831970215, "step": 6904 }, { "epoch": 0.8, "learning_rate": 5.991496397779615e-08, "logits/chosen": -2.3076164722442627, "logits/rejected": -2.240375280380249, "logps/chosen": -168.90597534179688, "logps/rejected": -220.26681518554688, "loss": 0.6269, "rewards/accuracies": 0.5, "rewards/chosen": -0.6896852254867554, "rewards/margins": 1.234555721282959, "rewards/rejected": -1.9242409467697144, "step": 6905 }, { "epoch": 0.8, "learning_rate": 5.987953230187787e-08, "logits/chosen": -2.423234701156616, "logits/rejected": -2.4396116733551025, "logps/chosen": -296.5135803222656, "logps/rejected": -269.93292236328125, "loss": 0.1656, "rewards/accuracies": 0.875, "rewards/chosen": -1.3353526592254639, "rewards/margins": 3.023375988006592, "rewards/rejected": -4.358728408813477, "step": 6906 }, { "epoch": 0.8, "learning_rate": 5.98441006259596e-08, "logits/chosen": -2.534740447998047, "logits/rejected": -2.324251890182495, "logps/chosen": -163.45994567871094, "logps/rejected": -303.6251525878906, "loss": 0.1717, "rewards/accuracies": 1.0, "rewards/chosen": -0.11256885528564453, "rewards/margins": 3.5816407203674316, "rewards/rejected": -3.6942098140716553, "step": 6907 }, { "epoch": 0.8, "learning_rate": 5.980866895004134e-08, "logits/chosen": -2.337916374206543, "logits/rejected": -2.1289572715759277, "logps/chosen": -295.1421203613281, "logps/rejected": -289.46453857421875, "loss": 0.5309, "rewards/accuracies": 0.625, "rewards/chosen": -0.4490537643432617, "rewards/margins": 1.2827553749084473, "rewards/rejected": -1.7318089008331299, "step": 6908 }, { "epoch": 0.8, "learning_rate": 5.977323727412306e-08, "logits/chosen": -2.850249767303467, "logits/rejected": -2.6536471843719482, "logps/chosen": -118.08811950683594, "logps/rejected": -290.046875, "loss": 0.3509, "rewards/accuracies": 0.875, "rewards/chosen": -0.9690921306610107, "rewards/margins": 2.583361864089966, "rewards/rejected": -3.5524539947509766, "step": 6909 }, { "epoch": 0.8, "learning_rate": 5.97378055982048e-08, "logits/chosen": -2.5368776321411133, "logits/rejected": -2.599597454071045, "logps/chosen": -149.15277099609375, "logps/rejected": -160.67559814453125, "loss": 0.2276, "rewards/accuracies": 1.0, "rewards/chosen": -0.6243939995765686, "rewards/margins": 2.425112247467041, "rewards/rejected": -3.049506425857544, "step": 6910 }, { "epoch": 0.8, "learning_rate": 5.970237392228652e-08, "logits/chosen": -2.1605288982391357, "logits/rejected": -2.1874160766601562, "logps/chosen": -267.1117248535156, "logps/rejected": -298.08941650390625, "loss": 0.6071, "rewards/accuracies": 0.625, "rewards/chosen": -1.3768365383148193, "rewards/margins": 1.0210864543914795, "rewards/rejected": -2.397922992706299, "step": 6911 }, { "epoch": 0.8, "learning_rate": 5.966694224636824e-08, "logits/chosen": -2.272624969482422, "logits/rejected": -2.5979700088500977, "logps/chosen": -515.2598876953125, "logps/rejected": -382.1220703125, "loss": 0.6146, "rewards/accuracies": 0.75, "rewards/chosen": -1.5498842000961304, "rewards/margins": 0.9148114919662476, "rewards/rejected": -2.464695930480957, "step": 6912 }, { "epoch": 0.8, "learning_rate": 5.963151057044998e-08, "logits/chosen": -2.249556064605713, "logits/rejected": -1.9992293119430542, "logps/chosen": -179.02218627929688, "logps/rejected": -248.80575561523438, "loss": 0.1893, "rewards/accuracies": 1.0, "rewards/chosen": -1.3414685726165771, "rewards/margins": 2.912838935852051, "rewards/rejected": -4.254307270050049, "step": 6913 }, { "epoch": 0.8, "learning_rate": 5.959607889453171e-08, "logits/chosen": -1.873137354850769, "logits/rejected": -2.4454219341278076, "logps/chosen": -437.222412109375, "logps/rejected": -202.480712890625, "loss": 1.4339, "rewards/accuracies": 0.625, "rewards/chosen": -1.558374285697937, "rewards/margins": -0.436688095331192, "rewards/rejected": -1.1216862201690674, "step": 6914 }, { "epoch": 0.8, "learning_rate": 5.9560647218613436e-08, "logits/chosen": -2.159658193588257, "logits/rejected": -2.299187660217285, "logps/chosen": -267.0484924316406, "logps/rejected": -309.9268798828125, "loss": 0.5349, "rewards/accuracies": 0.75, "rewards/chosen": -1.2032082080841064, "rewards/margins": 1.8264163732528687, "rewards/rejected": -3.0296247005462646, "step": 6915 }, { "epoch": 0.8, "learning_rate": 5.952521554269517e-08, "logits/chosen": -2.1431057453155518, "logits/rejected": -2.173724889755249, "logps/chosen": -494.3200378417969, "logps/rejected": -407.2119140625, "loss": 0.2259, "rewards/accuracies": 0.875, "rewards/chosen": -0.7035886645317078, "rewards/margins": 4.577124118804932, "rewards/rejected": -5.280712604522705, "step": 6916 }, { "epoch": 0.8, "learning_rate": 5.9489783866776894e-08, "logits/chosen": -2.368222236633301, "logits/rejected": -2.5181570053100586, "logps/chosen": -282.7748107910156, "logps/rejected": -372.24163818359375, "loss": 0.739, "rewards/accuracies": 0.625, "rewards/chosen": -1.1213910579681396, "rewards/margins": 2.2174861431121826, "rewards/rejected": -3.3388772010803223, "step": 6917 }, { "epoch": 0.8, "learning_rate": 5.945435219085862e-08, "logits/chosen": -2.977031707763672, "logits/rejected": -3.0282797813415527, "logps/chosen": -223.34271240234375, "logps/rejected": -192.7584991455078, "loss": 0.2758, "rewards/accuracies": 0.875, "rewards/chosen": -1.1252799034118652, "rewards/margins": 2.472568988800049, "rewards/rejected": -3.597848653793335, "step": 6918 }, { "epoch": 0.8, "learning_rate": 5.941892051494036e-08, "logits/chosen": -2.1648623943328857, "logits/rejected": -2.0049774646759033, "logps/chosen": -219.85833740234375, "logps/rejected": -366.86444091796875, "loss": 0.4562, "rewards/accuracies": 0.75, "rewards/chosen": -0.8938494324684143, "rewards/margins": 1.1907633543014526, "rewards/rejected": -2.0846128463745117, "step": 6919 }, { "epoch": 0.81, "learning_rate": 5.9383488839022087e-08, "logits/chosen": -2.8112521171569824, "logits/rejected": -2.4352900981903076, "logps/chosen": -160.96536254882812, "logps/rejected": -240.36102294921875, "loss": 0.4124, "rewards/accuracies": 0.75, "rewards/chosen": -0.6394641399383545, "rewards/margins": 1.5401849746704102, "rewards/rejected": -2.1796491146087646, "step": 6920 }, { "epoch": 0.81, "learning_rate": 5.934805716310381e-08, "logits/chosen": -1.9146720170974731, "logits/rejected": -1.785881757736206, "logps/chosen": -223.04067993164062, "logps/rejected": -344.9404296875, "loss": 0.3095, "rewards/accuracies": 0.875, "rewards/chosen": -1.0031036138534546, "rewards/margins": 2.157834053039551, "rewards/rejected": -3.160937786102295, "step": 6921 }, { "epoch": 0.81, "learning_rate": 5.9312625487185544e-08, "logits/chosen": -2.2965614795684814, "logits/rejected": -2.4639945030212402, "logps/chosen": -383.33642578125, "logps/rejected": -358.50543212890625, "loss": 0.1284, "rewards/accuracies": 1.0, "rewards/chosen": -0.8799735307693481, "rewards/margins": 3.522183418273926, "rewards/rejected": -4.402156829833984, "step": 6922 }, { "epoch": 0.81, "learning_rate": 5.927719381126727e-08, "logits/chosen": -1.5388253927230835, "logits/rejected": -1.7968069314956665, "logps/chosen": -333.13311767578125, "logps/rejected": -273.1431884765625, "loss": 0.3751, "rewards/accuracies": 0.75, "rewards/chosen": -0.9445818662643433, "rewards/margins": 3.225402593612671, "rewards/rejected": -4.169984340667725, "step": 6923 }, { "epoch": 0.81, "learning_rate": 5.9241762135348995e-08, "logits/chosen": -2.956477165222168, "logits/rejected": -2.992743968963623, "logps/chosen": -257.56658935546875, "logps/rejected": -248.71029663085938, "loss": 0.3286, "rewards/accuracies": 0.875, "rewards/chosen": -0.39945876598358154, "rewards/margins": 2.502977132797241, "rewards/rejected": -2.902435779571533, "step": 6924 }, { "epoch": 0.81, "learning_rate": 5.920633045943073e-08, "logits/chosen": -2.698195457458496, "logits/rejected": -2.6481029987335205, "logps/chosen": -122.56902313232422, "logps/rejected": -185.09597778320312, "loss": 0.2836, "rewards/accuracies": 0.875, "rewards/chosen": -0.9146978259086609, "rewards/margins": 2.650869131088257, "rewards/rejected": -3.5655667781829834, "step": 6925 }, { "epoch": 0.81, "learning_rate": 5.917089878351246e-08, "logits/chosen": -2.4099297523498535, "logits/rejected": -2.5513081550598145, "logps/chosen": -296.16790771484375, "logps/rejected": -227.76943969726562, "loss": 0.4471, "rewards/accuracies": 0.875, "rewards/chosen": -1.2814745903015137, "rewards/margins": 1.2777289152145386, "rewards/rejected": -2.559203624725342, "step": 6926 }, { "epoch": 0.81, "learning_rate": 5.913546710759418e-08, "logits/chosen": -2.475584030151367, "logits/rejected": -2.472195863723755, "logps/chosen": -274.46026611328125, "logps/rejected": -248.33135986328125, "loss": 0.2885, "rewards/accuracies": 0.75, "rewards/chosen": -0.44025570154190063, "rewards/margins": 2.531245231628418, "rewards/rejected": -2.971500873565674, "step": 6927 }, { "epoch": 0.81, "learning_rate": 5.9100035431675917e-08, "logits/chosen": -2.474320650100708, "logits/rejected": -2.594431161880493, "logps/chosen": -346.873046875, "logps/rejected": -299.5714416503906, "loss": 0.1501, "rewards/accuracies": 1.0, "rewards/chosen": -0.03139614313840866, "rewards/margins": 3.1379268169403076, "rewards/rejected": -3.169322967529297, "step": 6928 }, { "epoch": 0.81, "learning_rate": 5.9064603755757645e-08, "logits/chosen": -2.1676878929138184, "logits/rejected": -2.3060238361358643, "logps/chosen": -427.7969970703125, "logps/rejected": -309.10467529296875, "loss": 0.1581, "rewards/accuracies": 1.0, "rewards/chosen": -0.3296813666820526, "rewards/margins": 3.145228147506714, "rewards/rejected": -3.4749093055725098, "step": 6929 }, { "epoch": 0.81, "learning_rate": 5.902917207983937e-08, "logits/chosen": -2.9843757152557373, "logits/rejected": -3.0082621574401855, "logps/chosen": -170.35476684570312, "logps/rejected": -226.33067321777344, "loss": 0.4306, "rewards/accuracies": 0.875, "rewards/chosen": -0.5126721858978271, "rewards/margins": 2.8143670558929443, "rewards/rejected": -3.3270392417907715, "step": 6930 }, { "epoch": 0.81, "learning_rate": 5.89937404039211e-08, "logits/chosen": -2.053554058074951, "logits/rejected": -2.3966469764709473, "logps/chosen": -324.0078430175781, "logps/rejected": -178.91098022460938, "loss": 0.5117, "rewards/accuracies": 0.75, "rewards/chosen": -0.945521354675293, "rewards/margins": 0.8974461555480957, "rewards/rejected": -1.8429676294326782, "step": 6931 }, { "epoch": 0.81, "learning_rate": 5.895830872800283e-08, "logits/chosen": -2.3926193714141846, "logits/rejected": -2.1329751014709473, "logps/chosen": -118.62220001220703, "logps/rejected": -220.97366333007812, "loss": 0.4281, "rewards/accuracies": 0.75, "rewards/chosen": -0.8356322050094604, "rewards/margins": 1.2915868759155273, "rewards/rejected": -2.1272189617156982, "step": 6932 }, { "epoch": 0.81, "learning_rate": 5.892287705208456e-08, "logits/chosen": -2.2731852531433105, "logits/rejected": -2.4036998748779297, "logps/chosen": -357.1069030761719, "logps/rejected": -248.55514526367188, "loss": 0.8515, "rewards/accuracies": 0.875, "rewards/chosen": -1.4061845541000366, "rewards/margins": 0.8615427613258362, "rewards/rejected": -2.2677271366119385, "step": 6933 }, { "epoch": 0.81, "learning_rate": 5.888744537616629e-08, "logits/chosen": -2.847926378250122, "logits/rejected": -2.5771467685699463, "logps/chosen": -261.82373046875, "logps/rejected": -373.1896057128906, "loss": 0.1842, "rewards/accuracies": 1.0, "rewards/chosen": -0.9082393646240234, "rewards/margins": 2.187087059020996, "rewards/rejected": -3.0953264236450195, "step": 6934 }, { "epoch": 0.81, "learning_rate": 5.885201370024802e-08, "logits/chosen": -2.449023962020874, "logits/rejected": -2.432584524154663, "logps/chosen": -357.9031677246094, "logps/rejected": -322.2187194824219, "loss": 0.1107, "rewards/accuracies": 1.0, "rewards/chosen": -0.08927503228187561, "rewards/margins": 4.092731475830078, "rewards/rejected": -4.182006359100342, "step": 6935 }, { "epoch": 0.81, "learning_rate": 5.8816582024329747e-08, "logits/chosen": -2.0975115299224854, "logits/rejected": -2.222388982772827, "logps/chosen": -396.83636474609375, "logps/rejected": -346.7358093261719, "loss": 0.4213, "rewards/accuracies": 0.75, "rewards/chosen": 0.018300257623195648, "rewards/margins": 1.7928481101989746, "rewards/rejected": -1.774547815322876, "step": 6936 }, { "epoch": 0.81, "learning_rate": 5.878115034841148e-08, "logits/chosen": -2.14475417137146, "logits/rejected": -2.1213645935058594, "logps/chosen": -229.7939453125, "logps/rejected": -331.1851501464844, "loss": 0.7505, "rewards/accuracies": 0.75, "rewards/chosen": -1.4984033107757568, "rewards/margins": 2.2515532970428467, "rewards/rejected": -3.7499568462371826, "step": 6937 }, { "epoch": 0.81, "learning_rate": 5.8745718672493204e-08, "logits/chosen": -2.193063735961914, "logits/rejected": -2.479222297668457, "logps/chosen": -302.35992431640625, "logps/rejected": -292.68658447265625, "loss": 0.1759, "rewards/accuracies": 1.0, "rewards/chosen": -1.8758623600006104, "rewards/margins": 2.327329397201538, "rewards/rejected": -4.203191757202148, "step": 6938 }, { "epoch": 0.81, "learning_rate": 5.871028699657493e-08, "logits/chosen": -2.63381028175354, "logits/rejected": -2.87282395362854, "logps/chosen": -468.68096923828125, "logps/rejected": -314.24664306640625, "loss": 0.2902, "rewards/accuracies": 1.0, "rewards/chosen": -0.5661655068397522, "rewards/margins": 1.6350233554840088, "rewards/rejected": -2.201188802719116, "step": 6939 }, { "epoch": 0.81, "learning_rate": 5.867485532065667e-08, "logits/chosen": -1.9521119594573975, "logits/rejected": -2.0112180709838867, "logps/chosen": -237.43124389648438, "logps/rejected": -287.5439453125, "loss": 0.284, "rewards/accuracies": 0.875, "rewards/chosen": -0.5958864092826843, "rewards/margins": 4.474835395812988, "rewards/rejected": -5.0707221031188965, "step": 6940 }, { "epoch": 0.81, "learning_rate": 5.863942364473839e-08, "logits/chosen": -1.8490170240402222, "logits/rejected": -2.0813403129577637, "logps/chosen": -215.92764282226562, "logps/rejected": -170.9210205078125, "loss": 0.5767, "rewards/accuracies": 0.75, "rewards/chosen": -0.9153019785881042, "rewards/margins": 0.8074325919151306, "rewards/rejected": -1.7227346897125244, "step": 6941 }, { "epoch": 0.81, "learning_rate": 5.860399196882012e-08, "logits/chosen": -1.4860576391220093, "logits/rejected": -1.8136402368545532, "logps/chosen": -513.1156616210938, "logps/rejected": -463.6302490234375, "loss": 0.5689, "rewards/accuracies": 0.75, "rewards/chosen": -0.6015863418579102, "rewards/margins": 2.1121268272399902, "rewards/rejected": -2.7137131690979004, "step": 6942 }, { "epoch": 0.81, "learning_rate": 5.8568560292901854e-08, "logits/chosen": -2.519143581390381, "logits/rejected": -2.3967480659484863, "logps/chosen": -160.36354064941406, "logps/rejected": -215.4005126953125, "loss": 0.4802, "rewards/accuracies": 0.625, "rewards/chosen": -0.6472029685974121, "rewards/margins": 1.3045904636383057, "rewards/rejected": -1.9517934322357178, "step": 6943 }, { "epoch": 0.81, "learning_rate": 5.8533128616983576e-08, "logits/chosen": -2.1394565105438232, "logits/rejected": -2.2423088550567627, "logps/chosen": -222.90554809570312, "logps/rejected": -276.9129943847656, "loss": 0.1413, "rewards/accuracies": 1.0, "rewards/chosen": -1.5134353637695312, "rewards/margins": 2.8941218852996826, "rewards/rejected": -4.407557487487793, "step": 6944 }, { "epoch": 0.81, "learning_rate": 5.849769694106531e-08, "logits/chosen": -2.6992509365081787, "logits/rejected": -2.342311143875122, "logps/chosen": -262.9388427734375, "logps/rejected": -321.6473388671875, "loss": 0.1229, "rewards/accuracies": 1.0, "rewards/chosen": -0.887996256351471, "rewards/margins": 3.1814281940460205, "rewards/rejected": -4.069424629211426, "step": 6945 }, { "epoch": 0.81, "learning_rate": 5.846226526514704e-08, "logits/chosen": -2.5511715412139893, "logits/rejected": -2.504384756088257, "logps/chosen": -440.843017578125, "logps/rejected": -360.93505859375, "loss": 0.444, "rewards/accuracies": 0.875, "rewards/chosen": -1.2203733921051025, "rewards/margins": 2.7069571018218994, "rewards/rejected": -3.927330493927002, "step": 6946 }, { "epoch": 0.81, "learning_rate": 5.842683358922877e-08, "logits/chosen": -2.394871950149536, "logits/rejected": -2.6926422119140625, "logps/chosen": -404.0493469238281, "logps/rejected": -354.7840576171875, "loss": 0.2694, "rewards/accuracies": 0.875, "rewards/chosen": -0.4859156608581543, "rewards/margins": 1.930237889289856, "rewards/rejected": -2.4161536693573, "step": 6947 }, { "epoch": 0.81, "learning_rate": 5.83914019133105e-08, "logits/chosen": -2.629350423812866, "logits/rejected": -2.7744522094726562, "logps/chosen": -218.7945556640625, "logps/rejected": -234.1666717529297, "loss": 1.0267, "rewards/accuracies": 0.875, "rewards/chosen": -1.618085503578186, "rewards/margins": 2.453368663787842, "rewards/rejected": -4.071454048156738, "step": 6948 }, { "epoch": 0.81, "learning_rate": 5.835597023739223e-08, "logits/chosen": -2.0143070220947266, "logits/rejected": -2.1031734943389893, "logps/chosen": -273.558349609375, "logps/rejected": -304.2055969238281, "loss": 0.3932, "rewards/accuracies": 0.75, "rewards/chosen": -0.32279425859451294, "rewards/margins": 2.863375663757324, "rewards/rejected": -3.1861696243286133, "step": 6949 }, { "epoch": 0.81, "learning_rate": 5.8320538561473956e-08, "logits/chosen": -1.755435585975647, "logits/rejected": -1.8585846424102783, "logps/chosen": -453.4921875, "logps/rejected": -412.88421630859375, "loss": 0.2282, "rewards/accuracies": 0.875, "rewards/chosen": -0.14517706632614136, "rewards/margins": 2.7922329902648926, "rewards/rejected": -2.9374101161956787, "step": 6950 }, { "epoch": 0.81, "learning_rate": 5.828510688555569e-08, "logits/chosen": -2.8303415775299072, "logits/rejected": -2.6690359115600586, "logps/chosen": -114.41722106933594, "logps/rejected": -144.63766479492188, "loss": 0.4695, "rewards/accuracies": 0.625, "rewards/chosen": -0.6630533337593079, "rewards/margins": 1.654935598373413, "rewards/rejected": -2.317988872528076, "step": 6951 }, { "epoch": 0.81, "learning_rate": 5.824967520963741e-08, "logits/chosen": -1.5581797361373901, "logits/rejected": -2.032939910888672, "logps/chosen": -546.8501586914062, "logps/rejected": -352.4197998046875, "loss": 0.2493, "rewards/accuracies": 0.875, "rewards/chosen": -1.253819465637207, "rewards/margins": 1.9726307392120361, "rewards/rejected": -3.226449966430664, "step": 6952 }, { "epoch": 0.81, "learning_rate": 5.821424353371914e-08, "logits/chosen": -1.9849361181259155, "logits/rejected": -1.9882879257202148, "logps/chosen": -262.4668884277344, "logps/rejected": -340.5745849609375, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/chosen": 0.4014494717121124, "rewards/margins": 4.52146053314209, "rewards/rejected": -4.120010852813721, "step": 6953 }, { "epoch": 0.81, "learning_rate": 5.817881185780088e-08, "logits/chosen": -2.539609670639038, "logits/rejected": -2.3371968269348145, "logps/chosen": -211.05699157714844, "logps/rejected": -311.50787353515625, "loss": 0.0951, "rewards/accuracies": 1.0, "rewards/chosen": -0.6640783548355103, "rewards/margins": 2.834036111831665, "rewards/rejected": -3.4981143474578857, "step": 6954 }, { "epoch": 0.81, "learning_rate": 5.81433801818826e-08, "logits/chosen": -2.537095546722412, "logits/rejected": -2.7251555919647217, "logps/chosen": -160.5239715576172, "logps/rejected": -142.87669372558594, "loss": 0.2907, "rewards/accuracies": 0.875, "rewards/chosen": -0.25215625762939453, "rewards/margins": 1.9948405027389526, "rewards/rejected": -2.2469968795776367, "step": 6955 }, { "epoch": 0.81, "learning_rate": 5.810794850596433e-08, "logits/chosen": -2.027521848678589, "logits/rejected": -2.292147159576416, "logps/chosen": -182.48348999023438, "logps/rejected": -222.15223693847656, "loss": 0.4966, "rewards/accuracies": 0.75, "rewards/chosen": -0.6400614976882935, "rewards/margins": 0.633939266204834, "rewards/rejected": -1.274000644683838, "step": 6956 }, { "epoch": 0.81, "learning_rate": 5.8072516830046063e-08, "logits/chosen": -2.4824366569519043, "logits/rejected": -2.710918664932251, "logps/chosen": -543.2318115234375, "logps/rejected": -336.8566589355469, "loss": 0.1946, "rewards/accuracies": 0.875, "rewards/chosen": -0.6931426525115967, "rewards/margins": 2.9788331985473633, "rewards/rejected": -3.671975612640381, "step": 6957 }, { "epoch": 0.81, "learning_rate": 5.8037085154127785e-08, "logits/chosen": -2.1887872219085693, "logits/rejected": -2.159700393676758, "logps/chosen": -253.4881134033203, "logps/rejected": -321.21331787109375, "loss": 0.6739, "rewards/accuracies": 0.5, "rewards/chosen": -0.8223419189453125, "rewards/margins": 1.7957539558410645, "rewards/rejected": -2.618095636367798, "step": 6958 }, { "epoch": 0.81, "learning_rate": 5.8001653478209514e-08, "logits/chosen": -2.410140037536621, "logits/rejected": -2.2583718299865723, "logps/chosen": -144.30934143066406, "logps/rejected": -254.79275512695312, "loss": 0.2984, "rewards/accuracies": 0.875, "rewards/chosen": -0.7381219267845154, "rewards/margins": 1.9606542587280273, "rewards/rejected": -2.6987760066986084, "step": 6959 }, { "epoch": 0.81, "learning_rate": 5.796622180229125e-08, "logits/chosen": -2.351207971572876, "logits/rejected": -2.4365150928497314, "logps/chosen": -176.10784912109375, "logps/rejected": -222.60726928710938, "loss": 0.2394, "rewards/accuracies": 0.875, "rewards/chosen": -0.9226801991462708, "rewards/margins": 3.5574963092803955, "rewards/rejected": -4.48017692565918, "step": 6960 }, { "epoch": 0.81, "learning_rate": 5.793079012637297e-08, "logits/chosen": -2.2367289066314697, "logits/rejected": -2.1401097774505615, "logps/chosen": -192.02655029296875, "logps/rejected": -235.5335693359375, "loss": 0.1875, "rewards/accuracies": 1.0, "rewards/chosen": -0.66761314868927, "rewards/margins": 3.285983085632324, "rewards/rejected": -3.953596591949463, "step": 6961 }, { "epoch": 0.81, "learning_rate": 5.78953584504547e-08, "logits/chosen": -2.0699310302734375, "logits/rejected": -2.4004082679748535, "logps/chosen": -423.71343994140625, "logps/rejected": -211.35227966308594, "loss": 0.6446, "rewards/accuracies": 0.625, "rewards/chosen": -1.3338202238082886, "rewards/margins": 0.5856020450592041, "rewards/rejected": -1.9194222688674927, "step": 6962 }, { "epoch": 0.81, "learning_rate": 5.7859926774536436e-08, "logits/chosen": -2.4559712409973145, "logits/rejected": -2.4202823638916016, "logps/chosen": -236.87326049804688, "logps/rejected": -281.6687316894531, "loss": 0.6556, "rewards/accuracies": 0.5, "rewards/chosen": -0.2997667193412781, "rewards/margins": 0.7654582262039185, "rewards/rejected": -1.0652248859405518, "step": 6963 }, { "epoch": 0.81, "learning_rate": 5.7824495098618165e-08, "logits/chosen": -2.825956106185913, "logits/rejected": -2.5842437744140625, "logps/chosen": -317.8209228515625, "logps/rejected": -286.7721862792969, "loss": 0.7433, "rewards/accuracies": 0.5, "rewards/chosen": -1.102046012878418, "rewards/margins": 0.9493186473846436, "rewards/rejected": -2.0513644218444824, "step": 6964 }, { "epoch": 0.81, "learning_rate": 5.7789063422699887e-08, "logits/chosen": -2.3499741554260254, "logits/rejected": -2.1329143047332764, "logps/chosen": -338.93853759765625, "logps/rejected": -415.06304931640625, "loss": 0.3845, "rewards/accuracies": 0.875, "rewards/chosen": -0.5139556527137756, "rewards/margins": 1.5393109321594238, "rewards/rejected": -2.0532665252685547, "step": 6965 }, { "epoch": 0.81, "learning_rate": 5.775363174678162e-08, "logits/chosen": -2.7221386432647705, "logits/rejected": -2.8251757621765137, "logps/chosen": -336.4991149902344, "logps/rejected": -396.70465087890625, "loss": 0.2425, "rewards/accuracies": 0.75, "rewards/chosen": -1.231213927268982, "rewards/margins": 3.9743237495422363, "rewards/rejected": -5.205537796020508, "step": 6966 }, { "epoch": 0.81, "learning_rate": 5.771820007086335e-08, "logits/chosen": -1.8360257148742676, "logits/rejected": -1.5685789585113525, "logps/chosen": -148.1580047607422, "logps/rejected": -249.30223083496094, "loss": 0.5354, "rewards/accuracies": 0.75, "rewards/chosen": -0.6783114075660706, "rewards/margins": 1.4771463871002197, "rewards/rejected": -2.1554577350616455, "step": 6967 }, { "epoch": 0.81, "learning_rate": 5.768276839494507e-08, "logits/chosen": -2.081731081008911, "logits/rejected": -2.281965970993042, "logps/chosen": -515.6261596679688, "logps/rejected": -361.91741943359375, "loss": 0.4484, "rewards/accuracies": 0.875, "rewards/chosen": -0.6904502511024475, "rewards/margins": 3.29288649559021, "rewards/rejected": -3.983336925506592, "step": 6968 }, { "epoch": 0.81, "learning_rate": 5.764733671902681e-08, "logits/chosen": -1.909963846206665, "logits/rejected": -2.1901423931121826, "logps/chosen": -425.1414794921875, "logps/rejected": -275.903076171875, "loss": 0.2499, "rewards/accuracies": 0.875, "rewards/chosen": -0.19101133942604065, "rewards/margins": 1.5922093391418457, "rewards/rejected": -1.7832207679748535, "step": 6969 }, { "epoch": 0.81, "learning_rate": 5.761190504310854e-08, "logits/chosen": -1.9031894207000732, "logits/rejected": -1.932436227798462, "logps/chosen": -294.8926086425781, "logps/rejected": -273.4130859375, "loss": 0.6919, "rewards/accuracies": 0.625, "rewards/chosen": -1.4738729000091553, "rewards/margins": 0.09090644121170044, "rewards/rejected": -1.56477952003479, "step": 6970 }, { "epoch": 0.81, "learning_rate": 5.757647336719026e-08, "logits/chosen": -2.0417063236236572, "logits/rejected": -2.119220018386841, "logps/chosen": -365.8646545410156, "logps/rejected": -301.26751708984375, "loss": 0.3706, "rewards/accuracies": 0.75, "rewards/chosen": -0.5905189514160156, "rewards/margins": 1.6636261940002441, "rewards/rejected": -2.2541451454162598, "step": 6971 }, { "epoch": 0.81, "learning_rate": 5.7541041691271994e-08, "logits/chosen": -1.8935199975967407, "logits/rejected": -2.1771726608276367, "logps/chosen": -347.3951416015625, "logps/rejected": -276.03814697265625, "loss": 0.2335, "rewards/accuracies": 0.75, "rewards/chosen": -0.032300978899002075, "rewards/margins": 2.722151517868042, "rewards/rejected": -2.7544524669647217, "step": 6972 }, { "epoch": 0.81, "learning_rate": 5.750561001535372e-08, "logits/chosen": -3.028144359588623, "logits/rejected": -3.010406017303467, "logps/chosen": -86.32718658447266, "logps/rejected": -138.75921630859375, "loss": 0.2498, "rewards/accuracies": 0.875, "rewards/chosen": -0.4675876796245575, "rewards/margins": 2.6173954010009766, "rewards/rejected": -3.0849833488464355, "step": 6973 }, { "epoch": 0.81, "learning_rate": 5.747017833943545e-08, "logits/chosen": -2.205203056335449, "logits/rejected": -2.6454505920410156, "logps/chosen": -347.1150817871094, "logps/rejected": -281.47564697265625, "loss": 0.3951, "rewards/accuracies": 0.75, "rewards/chosen": -1.5696680545806885, "rewards/margins": 1.8272948265075684, "rewards/rejected": -3.3969626426696777, "step": 6974 }, { "epoch": 0.81, "learning_rate": 5.743474666351718e-08, "logits/chosen": -2.48429799079895, "logits/rejected": -2.5060150623321533, "logps/chosen": -224.866455078125, "logps/rejected": -264.07171630859375, "loss": 0.2524, "rewards/accuracies": 1.0, "rewards/chosen": -0.5502662062644958, "rewards/margins": 2.526099681854248, "rewards/rejected": -3.0763659477233887, "step": 6975 }, { "epoch": 0.81, "learning_rate": 5.739931498759891e-08, "logits/chosen": -2.0368740558624268, "logits/rejected": -1.939455270767212, "logps/chosen": -359.8154296875, "logps/rejected": -361.9418029785156, "loss": 0.7506, "rewards/accuracies": 0.75, "rewards/chosen": -0.8221428394317627, "rewards/margins": 3.0155766010284424, "rewards/rejected": -3.837719202041626, "step": 6976 }, { "epoch": 0.81, "learning_rate": 5.736388331168064e-08, "logits/chosen": -2.4694252014160156, "logits/rejected": -2.4764902591705322, "logps/chosen": -334.063720703125, "logps/rejected": -431.93634033203125, "loss": 0.1617, "rewards/accuracies": 0.875, "rewards/chosen": -0.7500292658805847, "rewards/margins": 6.188802242279053, "rewards/rejected": -6.938831329345703, "step": 6977 }, { "epoch": 0.81, "learning_rate": 5.7328451635762374e-08, "logits/chosen": -2.681795120239258, "logits/rejected": -2.66719913482666, "logps/chosen": -293.13446044921875, "logps/rejected": -292.93341064453125, "loss": 0.1274, "rewards/accuracies": 1.0, "rewards/chosen": -0.28624939918518066, "rewards/margins": 2.820626735687256, "rewards/rejected": -3.1068761348724365, "step": 6978 }, { "epoch": 0.81, "learning_rate": 5.7293019959844096e-08, "logits/chosen": -2.2025866508483887, "logits/rejected": -2.769169330596924, "logps/chosen": -187.2576904296875, "logps/rejected": -163.6160125732422, "loss": 0.3085, "rewards/accuracies": 0.875, "rewards/chosen": -0.9636594653129578, "rewards/margins": 2.108764171600342, "rewards/rejected": -3.0724236965179443, "step": 6979 }, { "epoch": 0.81, "learning_rate": 5.725758828392583e-08, "logits/chosen": -2.7277700901031494, "logits/rejected": -2.8943960666656494, "logps/chosen": -342.13336181640625, "logps/rejected": -241.7371368408203, "loss": 0.1962, "rewards/accuracies": 0.875, "rewards/chosen": -0.4444226026535034, "rewards/margins": 3.274862766265869, "rewards/rejected": -3.719285011291504, "step": 6980 }, { "epoch": 0.81, "learning_rate": 5.722215660800756e-08, "logits/chosen": -2.5525505542755127, "logits/rejected": -2.734325647354126, "logps/chosen": -472.78790283203125, "logps/rejected": -593.6925048828125, "loss": 0.4059, "rewards/accuracies": 0.75, "rewards/chosen": -0.4447324872016907, "rewards/margins": 1.5820753574371338, "rewards/rejected": -2.0268077850341797, "step": 6981 }, { "epoch": 0.81, "learning_rate": 5.718672493208928e-08, "logits/chosen": -2.4413208961486816, "logits/rejected": -2.4746201038360596, "logps/chosen": -173.83233642578125, "logps/rejected": -183.9649200439453, "loss": 0.5021, "rewards/accuracies": 0.625, "rewards/chosen": -0.5750913619995117, "rewards/margins": 0.8024642467498779, "rewards/rejected": -1.3775556087493896, "step": 6982 }, { "epoch": 0.81, "learning_rate": 5.715129325617102e-08, "logits/chosen": -2.652353048324585, "logits/rejected": -2.5170845985412598, "logps/chosen": -278.14178466796875, "logps/rejected": -317.2783203125, "loss": 0.2429, "rewards/accuracies": 0.875, "rewards/chosen": -1.0704457759857178, "rewards/margins": 3.7457871437072754, "rewards/rejected": -4.816232681274414, "step": 6983 }, { "epoch": 0.81, "learning_rate": 5.7115861580252746e-08, "logits/chosen": -2.3314056396484375, "logits/rejected": -2.5843279361724854, "logps/chosen": -298.65740966796875, "logps/rejected": -256.2784118652344, "loss": 0.7606, "rewards/accuracies": 0.625, "rewards/chosen": -1.331590175628662, "rewards/margins": 1.5347625017166138, "rewards/rejected": -2.8663525581359863, "step": 6984 }, { "epoch": 0.81, "learning_rate": 5.708042990433447e-08, "logits/chosen": -2.110058307647705, "logits/rejected": -2.342405080795288, "logps/chosen": -254.48764038085938, "logps/rejected": -217.17752075195312, "loss": 0.2598, "rewards/accuracies": 0.875, "rewards/chosen": -0.8226504921913147, "rewards/margins": 1.8612288236618042, "rewards/rejected": -2.6838793754577637, "step": 6985 }, { "epoch": 0.81, "learning_rate": 5.7044998228416203e-08, "logits/chosen": -2.2351136207580566, "logits/rejected": -2.243980646133423, "logps/chosen": -218.61761474609375, "logps/rejected": -141.64718627929688, "loss": 0.7025, "rewards/accuracies": 0.5, "rewards/chosen": -0.918641984462738, "rewards/margins": 0.7701011300086975, "rewards/rejected": -1.688743233680725, "step": 6986 }, { "epoch": 0.81, "learning_rate": 5.700956655249793e-08, "logits/chosen": -2.4341351985931396, "logits/rejected": -2.427731513977051, "logps/chosen": -289.51953125, "logps/rejected": -417.5943298339844, "loss": 0.7309, "rewards/accuracies": 0.5, "rewards/chosen": -1.1060125827789307, "rewards/margins": 0.12258338928222656, "rewards/rejected": -1.2285959720611572, "step": 6987 }, { "epoch": 0.81, "learning_rate": 5.6974134876579654e-08, "logits/chosen": -2.3366141319274902, "logits/rejected": -2.684650421142578, "logps/chosen": -194.67318725585938, "logps/rejected": -231.68618774414062, "loss": 0.1379, "rewards/accuracies": 1.0, "rewards/chosen": -0.8100523948669434, "rewards/margins": 4.066460132598877, "rewards/rejected": -4.87651252746582, "step": 6988 }, { "epoch": 0.81, "learning_rate": 5.693870320066139e-08, "logits/chosen": -2.3096301555633545, "logits/rejected": -2.2615575790405273, "logps/chosen": -282.6059265136719, "logps/rejected": -341.27813720703125, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": -0.4463968276977539, "rewards/margins": 4.804548740386963, "rewards/rejected": -5.250945568084717, "step": 6989 }, { "epoch": 0.81, "learning_rate": 5.690327152474312e-08, "logits/chosen": -2.8228652477264404, "logits/rejected": -2.753466844558716, "logps/chosen": -333.6346130371094, "logps/rejected": -189.7689208984375, "loss": 1.0208, "rewards/accuracies": 0.625, "rewards/chosen": -1.1062802076339722, "rewards/margins": 1.4289377927780151, "rewards/rejected": -2.5352180004119873, "step": 6990 }, { "epoch": 0.81, "learning_rate": 5.686783984882485e-08, "logits/chosen": -2.28448486328125, "logits/rejected": -2.429816484451294, "logps/chosen": -344.054443359375, "logps/rejected": -302.84002685546875, "loss": 0.296, "rewards/accuracies": 0.75, "rewards/chosen": -0.8743220567703247, "rewards/margins": 1.6704249382019043, "rewards/rejected": -2.5447468757629395, "step": 6991 }, { "epoch": 0.81, "learning_rate": 5.6832408172906576e-08, "logits/chosen": -1.7741941213607788, "logits/rejected": -1.9213895797729492, "logps/chosen": -356.81390380859375, "logps/rejected": -326.2787170410156, "loss": 0.4512, "rewards/accuracies": 0.75, "rewards/chosen": -0.2742231488227844, "rewards/margins": 2.159553050994873, "rewards/rejected": -2.4337761402130127, "step": 6992 }, { "epoch": 0.81, "learning_rate": 5.6796976496988305e-08, "logits/chosen": -2.385751724243164, "logits/rejected": -2.6124587059020996, "logps/chosen": -289.2515869140625, "logps/rejected": -192.94003295898438, "loss": 0.5771, "rewards/accuracies": 0.625, "rewards/chosen": -0.6496453285217285, "rewards/margins": 1.4024345874786377, "rewards/rejected": -2.052079916000366, "step": 6993 }, { "epoch": 0.81, "learning_rate": 5.6761544821070033e-08, "logits/chosen": -2.5040743350982666, "logits/rejected": -2.459688663482666, "logps/chosen": -94.85856628417969, "logps/rejected": -174.40863037109375, "loss": 0.1239, "rewards/accuracies": 1.0, "rewards/chosen": -1.1902483701705933, "rewards/margins": 3.0593738555908203, "rewards/rejected": -4.249621868133545, "step": 6994 }, { "epoch": 0.81, "learning_rate": 5.672611314515177e-08, "logits/chosen": -2.192959785461426, "logits/rejected": -2.2895851135253906, "logps/chosen": -411.8631591796875, "logps/rejected": -376.2235412597656, "loss": 0.7831, "rewards/accuracies": 0.625, "rewards/chosen": -1.6120821237564087, "rewards/margins": 1.1904537677764893, "rewards/rejected": -2.8025360107421875, "step": 6995 }, { "epoch": 0.81, "learning_rate": 5.669068146923349e-08, "logits/chosen": -2.1508328914642334, "logits/rejected": -2.013137102127075, "logps/chosen": -330.58209228515625, "logps/rejected": -349.7351989746094, "loss": 0.2077, "rewards/accuracies": 1.0, "rewards/chosen": -0.23129160702228546, "rewards/margins": 2.1678214073181152, "rewards/rejected": -2.399113178253174, "step": 6996 }, { "epoch": 0.81, "learning_rate": 5.665524979331522e-08, "logits/chosen": -1.7649085521697998, "logits/rejected": -2.03485369682312, "logps/chosen": -335.2139587402344, "logps/rejected": -347.0346374511719, "loss": 0.351, "rewards/accuracies": 0.75, "rewards/chosen": -1.1062867641448975, "rewards/margins": 2.589789390563965, "rewards/rejected": -3.6960763931274414, "step": 6997 }, { "epoch": 0.81, "learning_rate": 5.6619818117396955e-08, "logits/chosen": -2.5292820930480957, "logits/rejected": -2.210775136947632, "logps/chosen": -297.22491455078125, "logps/rejected": -345.84759521484375, "loss": 0.8757, "rewards/accuracies": 0.5, "rewards/chosen": -1.2189902067184448, "rewards/margins": 0.3260708451271057, "rewards/rejected": -1.5450611114501953, "step": 6998 }, { "epoch": 0.81, "learning_rate": 5.658438644147868e-08, "logits/chosen": -1.854620337486267, "logits/rejected": -2.031958818435669, "logps/chosen": -281.6073303222656, "logps/rejected": -204.7700653076172, "loss": 0.2818, "rewards/accuracies": 0.875, "rewards/chosen": -0.7884589433670044, "rewards/margins": 1.7698538303375244, "rewards/rejected": -2.5583126544952393, "step": 6999 }, { "epoch": 0.81, "learning_rate": 5.6548954765560406e-08, "logits/chosen": -2.5460143089294434, "logits/rejected": -2.656071901321411, "logps/chosen": -207.1037139892578, "logps/rejected": -218.82284545898438, "loss": 0.2963, "rewards/accuracies": 1.0, "rewards/chosen": -0.7719509601593018, "rewards/margins": 2.0140271186828613, "rewards/rejected": -2.785978317260742, "step": 7000 }, { "epoch": 0.81, "eval_logits/chosen": -1.753435730934143, "eval_logits/rejected": -1.7546595335006714, "eval_logps/chosen": -278.61065673828125, "eval_logps/rejected": -279.0486145019531, "eval_loss": 0.3661438524723053, "eval_rewards/accuracies": 0.8477011322975159, "eval_rewards/chosen": -0.6445215344429016, "eval_rewards/margins": 2.1596851348876953, "eval_rewards/rejected": -2.8042068481445312, "eval_runtime": 238.2166, "eval_samples_per_second": 2.918, "eval_steps_per_second": 1.461, "step": 7000 }, { "epoch": 0.81, "learning_rate": 5.651352308964214e-08, "logits/chosen": -2.743354320526123, "logits/rejected": -2.6290884017944336, "logps/chosen": -310.0019226074219, "logps/rejected": -380.6521911621094, "loss": 0.1182, "rewards/accuracies": 1.0, "rewards/chosen": -0.19075919687747955, "rewards/margins": 2.9790518283843994, "rewards/rejected": -3.1698110103607178, "step": 7001 }, { "epoch": 0.81, "learning_rate": 5.647809141372386e-08, "logits/chosen": -2.221585750579834, "logits/rejected": -2.1962692737579346, "logps/chosen": -124.87165832519531, "logps/rejected": -226.46905517578125, "loss": 0.3811, "rewards/accuracies": 0.875, "rewards/chosen": -1.0817298889160156, "rewards/margins": 2.3608558177948, "rewards/rejected": -3.4425857067108154, "step": 7002 }, { "epoch": 0.81, "learning_rate": 5.644265973780559e-08, "logits/chosen": -2.042593002319336, "logits/rejected": -1.8850102424621582, "logps/chosen": -208.573974609375, "logps/rejected": -140.6519012451172, "loss": 0.9635, "rewards/accuracies": 0.75, "rewards/chosen": -1.0641510486602783, "rewards/margins": 0.6988165378570557, "rewards/rejected": -1.762967586517334, "step": 7003 }, { "epoch": 0.81, "learning_rate": 5.640722806188733e-08, "logits/chosen": -2.2030110359191895, "logits/rejected": -1.848442792892456, "logps/chosen": -286.32025146484375, "logps/rejected": -423.1905517578125, "loss": 0.4171, "rewards/accuracies": 0.75, "rewards/chosen": -1.1458908319473267, "rewards/margins": 2.1772255897521973, "rewards/rejected": -3.3231163024902344, "step": 7004 }, { "epoch": 0.81, "learning_rate": 5.6371796385969056e-08, "logits/chosen": -2.3112523555755615, "logits/rejected": -2.5608091354370117, "logps/chosen": -550.8265991210938, "logps/rejected": -299.2526550292969, "loss": 0.2359, "rewards/accuracies": 0.75, "rewards/chosen": -1.0494734048843384, "rewards/margins": 2.820650577545166, "rewards/rejected": -3.870123863220215, "step": 7005 }, { "epoch": 0.82, "learning_rate": 5.633636471005078e-08, "logits/chosen": -2.3773574829101562, "logits/rejected": -2.5010077953338623, "logps/chosen": -334.86102294921875, "logps/rejected": -260.7117919921875, "loss": 1.0688, "rewards/accuracies": 0.75, "rewards/chosen": -0.8414567708969116, "rewards/margins": 0.5532335042953491, "rewards/rejected": -1.3946901559829712, "step": 7006 }, { "epoch": 0.82, "learning_rate": 5.6300933034132514e-08, "logits/chosen": -2.6330654621124268, "logits/rejected": -2.5602517127990723, "logps/chosen": -247.51730346679688, "logps/rejected": -169.3246612548828, "loss": 0.1369, "rewards/accuracies": 1.0, "rewards/chosen": -0.8064134120941162, "rewards/margins": 2.7514054775238037, "rewards/rejected": -3.55781888961792, "step": 7007 }, { "epoch": 0.82, "learning_rate": 5.626550135821424e-08, "logits/chosen": -2.245460271835327, "logits/rejected": -2.1803512573242188, "logps/chosen": -277.1117858886719, "logps/rejected": -203.94395446777344, "loss": 0.7664, "rewards/accuracies": 0.75, "rewards/chosen": -0.5660412311553955, "rewards/margins": 0.39756739139556885, "rewards/rejected": -0.9636086225509644, "step": 7008 }, { "epoch": 0.82, "learning_rate": 5.6230069682295964e-08, "logits/chosen": -2.5432655811309814, "logits/rejected": -2.6728250980377197, "logps/chosen": -155.08990478515625, "logps/rejected": -159.86756896972656, "loss": 0.689, "rewards/accuracies": 0.625, "rewards/chosen": -0.9497049450874329, "rewards/margins": 0.6727081537246704, "rewards/rejected": -1.6224130392074585, "step": 7009 }, { "epoch": 0.82, "learning_rate": 5.61946380063777e-08, "logits/chosen": -2.2363059520721436, "logits/rejected": -2.3456993103027344, "logps/chosen": -383.2632751464844, "logps/rejected": -263.16253662109375, "loss": 0.451, "rewards/accuracies": 0.75, "rewards/chosen": -1.0815014839172363, "rewards/margins": 0.9606536626815796, "rewards/rejected": -2.0421552658081055, "step": 7010 }, { "epoch": 0.82, "learning_rate": 5.615920633045943e-08, "logits/chosen": -2.629729986190796, "logits/rejected": -2.7211196422576904, "logps/chosen": -224.36520385742188, "logps/rejected": -229.0583953857422, "loss": 0.2655, "rewards/accuracies": 0.875, "rewards/chosen": -0.8444919586181641, "rewards/margins": 3.0238380432128906, "rewards/rejected": -3.868330478668213, "step": 7011 }, { "epoch": 0.82, "learning_rate": 5.612377465454115e-08, "logits/chosen": -2.5971012115478516, "logits/rejected": -2.702526330947876, "logps/chosen": -181.5945587158203, "logps/rejected": -137.58251953125, "loss": 0.709, "rewards/accuracies": 0.75, "rewards/chosen": -0.8414369225502014, "rewards/margins": 0.41044557094573975, "rewards/rejected": -1.2518823146820068, "step": 7012 }, { "epoch": 0.82, "learning_rate": 5.6088342978622886e-08, "logits/chosen": -2.225965738296509, "logits/rejected": -2.447756052017212, "logps/chosen": -319.2898864746094, "logps/rejected": -259.48431396484375, "loss": 0.2653, "rewards/accuracies": 1.0, "rewards/chosen": -1.0668734312057495, "rewards/margins": 2.6885147094726562, "rewards/rejected": -3.7553884983062744, "step": 7013 }, { "epoch": 0.82, "learning_rate": 5.6052911302704615e-08, "logits/chosen": -1.9482793807983398, "logits/rejected": -2.2718496322631836, "logps/chosen": -287.05230712890625, "logps/rejected": -219.4978790283203, "loss": 0.4544, "rewards/accuracies": 0.875, "rewards/chosen": -0.5420180559158325, "rewards/margins": 1.1432679891586304, "rewards/rejected": -1.685286045074463, "step": 7014 }, { "epoch": 0.82, "learning_rate": 5.6017479626786344e-08, "logits/chosen": -2.3629205226898193, "logits/rejected": -2.2595181465148926, "logps/chosen": -171.73695373535156, "logps/rejected": -192.44476318359375, "loss": 0.446, "rewards/accuracies": 0.75, "rewards/chosen": -0.4151574969291687, "rewards/margins": 2.3737101554870605, "rewards/rejected": -2.788867473602295, "step": 7015 }, { "epoch": 0.82, "learning_rate": 5.598204795086807e-08, "logits/chosen": -2.5353453159332275, "logits/rejected": -2.3594565391540527, "logps/chosen": -529.4903564453125, "logps/rejected": -267.05755615234375, "loss": 0.5616, "rewards/accuracies": 0.625, "rewards/chosen": 0.03328150510787964, "rewards/margins": 1.236585259437561, "rewards/rejected": -1.2033036947250366, "step": 7016 }, { "epoch": 0.82, "learning_rate": 5.59466162749498e-08, "logits/chosen": -2.343073606491089, "logits/rejected": -2.3654329776763916, "logps/chosen": -326.5641174316406, "logps/rejected": -298.3931884765625, "loss": 0.416, "rewards/accuracies": 0.875, "rewards/chosen": -0.9496845602989197, "rewards/margins": 2.2766273021698, "rewards/rejected": -3.226311683654785, "step": 7017 }, { "epoch": 0.82, "learning_rate": 5.5911184599031536e-08, "logits/chosen": -2.478144884109497, "logits/rejected": -2.4560585021972656, "logps/chosen": -233.76255798339844, "logps/rejected": -224.95260620117188, "loss": 0.4352, "rewards/accuracies": 0.625, "rewards/chosen": -1.4216960668563843, "rewards/margins": 1.595523476600647, "rewards/rejected": -3.0172197818756104, "step": 7018 }, { "epoch": 0.82, "learning_rate": 5.587575292311326e-08, "logits/chosen": -1.8755110502243042, "logits/rejected": -1.983034372329712, "logps/chosen": -324.5708923339844, "logps/rejected": -320.1654357910156, "loss": 0.4062, "rewards/accuracies": 0.625, "rewards/chosen": -0.8342398405075073, "rewards/margins": 2.1243019104003906, "rewards/rejected": -2.9585416316986084, "step": 7019 }, { "epoch": 0.82, "learning_rate": 5.584032124719499e-08, "logits/chosen": -1.7058281898498535, "logits/rejected": -1.7686514854431152, "logps/chosen": -289.7044677734375, "logps/rejected": -196.11717224121094, "loss": 0.2019, "rewards/accuracies": 1.0, "rewards/chosen": 0.28644055128097534, "rewards/margins": 1.9918581247329712, "rewards/rejected": -1.705417513847351, "step": 7020 }, { "epoch": 0.82, "learning_rate": 5.580488957127672e-08, "logits/chosen": -2.622615098953247, "logits/rejected": -2.713175058364868, "logps/chosen": -391.7327880859375, "logps/rejected": -347.2658386230469, "loss": 0.139, "rewards/accuracies": 1.0, "rewards/chosen": -0.6111325621604919, "rewards/margins": 3.5223305225372314, "rewards/rejected": -4.133462905883789, "step": 7021 }, { "epoch": 0.82, "learning_rate": 5.576945789535845e-08, "logits/chosen": -2.152167320251465, "logits/rejected": -2.2246313095092773, "logps/chosen": -353.34466552734375, "logps/rejected": -236.0394287109375, "loss": 0.5148, "rewards/accuracies": 0.625, "rewards/chosen": -1.0257201194763184, "rewards/margins": 1.7952520847320557, "rewards/rejected": -2.820972204208374, "step": 7022 }, { "epoch": 0.82, "learning_rate": 5.5734026219440173e-08, "logits/chosen": -2.176028251647949, "logits/rejected": -2.0144805908203125, "logps/chosen": -129.39865112304688, "logps/rejected": -237.68597412109375, "loss": 0.6185, "rewards/accuracies": 0.75, "rewards/chosen": -1.6744954586029053, "rewards/margins": 1.2841802835464478, "rewards/rejected": -2.9586758613586426, "step": 7023 }, { "epoch": 0.82, "learning_rate": 5.569859454352191e-08, "logits/chosen": -2.55669903755188, "logits/rejected": -2.5182044506073, "logps/chosen": -172.07899475097656, "logps/rejected": -322.5699462890625, "loss": 0.2786, "rewards/accuracies": 0.875, "rewards/chosen": -0.5288358926773071, "rewards/margins": 2.447700023651123, "rewards/rejected": -2.9765360355377197, "step": 7024 }, { "epoch": 0.82, "learning_rate": 5.566316286760364e-08, "logits/chosen": -1.7736670970916748, "logits/rejected": -1.938071370124817, "logps/chosen": -460.9855651855469, "logps/rejected": -420.30926513671875, "loss": 0.1939, "rewards/accuracies": 1.0, "rewards/chosen": -0.5788826942443848, "rewards/margins": 3.5513153076171875, "rewards/rejected": -4.130197525024414, "step": 7025 }, { "epoch": 0.82, "learning_rate": 5.562773119168536e-08, "logits/chosen": -2.717526912689209, "logits/rejected": -2.8961517810821533, "logps/chosen": -462.6636962890625, "logps/rejected": -312.0180358886719, "loss": 0.2385, "rewards/accuracies": 0.875, "rewards/chosen": -0.041585952043533325, "rewards/margins": 2.156134843826294, "rewards/rejected": -2.197720766067505, "step": 7026 }, { "epoch": 0.82, "learning_rate": 5.5592299515767095e-08, "logits/chosen": -2.22467041015625, "logits/rejected": -2.1989736557006836, "logps/chosen": -119.86175537109375, "logps/rejected": -230.80935668945312, "loss": 0.5487, "rewards/accuracies": 0.75, "rewards/chosen": -0.4714958667755127, "rewards/margins": 3.5385360717773438, "rewards/rejected": -4.010031700134277, "step": 7027 }, { "epoch": 0.82, "learning_rate": 5.5556867839848824e-08, "logits/chosen": -2.4772493839263916, "logits/rejected": -2.5116524696350098, "logps/chosen": -290.0716552734375, "logps/rejected": -306.0292053222656, "loss": 0.3861, "rewards/accuracies": 0.75, "rewards/chosen": -0.9326963424682617, "rewards/margins": 3.4104819297790527, "rewards/rejected": -4.343177795410156, "step": 7028 }, { "epoch": 0.82, "learning_rate": 5.5521436163930546e-08, "logits/chosen": -2.9221065044403076, "logits/rejected": -2.913454294204712, "logps/chosen": -147.1915740966797, "logps/rejected": -143.05372619628906, "loss": 0.2602, "rewards/accuracies": 0.875, "rewards/chosen": -0.759903073310852, "rewards/margins": 2.213841199874878, "rewards/rejected": -2.9737439155578613, "step": 7029 }, { "epoch": 0.82, "learning_rate": 5.548600448801228e-08, "logits/chosen": -2.2329931259155273, "logits/rejected": -2.1490471363067627, "logps/chosen": -112.16510009765625, "logps/rejected": -187.5654754638672, "loss": 0.3718, "rewards/accuracies": 0.875, "rewards/chosen": -0.37015512585639954, "rewards/margins": 3.1372101306915283, "rewards/rejected": -3.5073654651641846, "step": 7030 }, { "epoch": 0.82, "learning_rate": 5.545057281209401e-08, "logits/chosen": -2.2402210235595703, "logits/rejected": -2.347588062286377, "logps/chosen": -270.578369140625, "logps/rejected": -285.31292724609375, "loss": 0.431, "rewards/accuracies": 0.75, "rewards/chosen": -0.5898099541664124, "rewards/margins": 2.670896530151367, "rewards/rejected": -3.260706663131714, "step": 7031 }, { "epoch": 0.82, "learning_rate": 5.541514113617574e-08, "logits/chosen": -2.4829442501068115, "logits/rejected": -2.5313754081726074, "logps/chosen": -329.05706787109375, "logps/rejected": -371.6969909667969, "loss": 0.3045, "rewards/accuracies": 0.75, "rewards/chosen": -0.4001174569129944, "rewards/margins": 2.428717613220215, "rewards/rejected": -2.8288350105285645, "step": 7032 }, { "epoch": 0.82, "learning_rate": 5.537970946025747e-08, "logits/chosen": -2.606682300567627, "logits/rejected": -2.4086666107177734, "logps/chosen": -106.96056365966797, "logps/rejected": -249.67955017089844, "loss": 0.5171, "rewards/accuracies": 0.75, "rewards/chosen": -0.7274596691131592, "rewards/margins": 3.0058183670043945, "rewards/rejected": -3.733278512954712, "step": 7033 }, { "epoch": 0.82, "learning_rate": 5.5344277784339196e-08, "logits/chosen": -2.0438098907470703, "logits/rejected": -2.070625066757202, "logps/chosen": -241.16282653808594, "logps/rejected": -185.55960083007812, "loss": 0.3916, "rewards/accuracies": 0.875, "rewards/chosen": -0.8095307350158691, "rewards/margins": 2.1999406814575195, "rewards/rejected": -3.0094711780548096, "step": 7034 }, { "epoch": 0.82, "learning_rate": 5.5308846108420925e-08, "logits/chosen": -2.70963716506958, "logits/rejected": -2.7681126594543457, "logps/chosen": -330.4197692871094, "logps/rejected": -348.00201416015625, "loss": 0.2456, "rewards/accuracies": 1.0, "rewards/chosen": -1.312230110168457, "rewards/margins": 2.532076358795166, "rewards/rejected": -3.844306468963623, "step": 7035 }, { "epoch": 0.82, "learning_rate": 5.527341443250266e-08, "logits/chosen": -2.6366851329803467, "logits/rejected": -2.638327121734619, "logps/chosen": -248.55072021484375, "logps/rejected": -247.57936096191406, "loss": 0.2866, "rewards/accuracies": 1.0, "rewards/chosen": -1.2234104871749878, "rewards/margins": 1.2410589456558228, "rewards/rejected": -2.4644694328308105, "step": 7036 }, { "epoch": 0.82, "learning_rate": 5.523798275658438e-08, "logits/chosen": -2.486292839050293, "logits/rejected": -2.495518445968628, "logps/chosen": -190.3717041015625, "logps/rejected": -179.67111206054688, "loss": 0.4753, "rewards/accuracies": 0.75, "rewards/chosen": -1.1203267574310303, "rewards/margins": 0.7707808017730713, "rewards/rejected": -1.8911075592041016, "step": 7037 }, { "epoch": 0.82, "learning_rate": 5.520255108066611e-08, "logits/chosen": -2.41007661819458, "logits/rejected": -2.3945388793945312, "logps/chosen": -156.92514038085938, "logps/rejected": -333.22021484375, "loss": 0.4126, "rewards/accuracies": 0.875, "rewards/chosen": -1.296011209487915, "rewards/margins": 2.5217299461364746, "rewards/rejected": -3.8177411556243896, "step": 7038 }, { "epoch": 0.82, "learning_rate": 5.5167119404747847e-08, "logits/chosen": -2.670694351196289, "logits/rejected": -2.8264575004577637, "logps/chosen": -317.3256530761719, "logps/rejected": -305.6164245605469, "loss": 0.5345, "rewards/accuracies": 0.75, "rewards/chosen": -1.2397688627243042, "rewards/margins": 1.3730835914611816, "rewards/rejected": -2.6128523349761963, "step": 7039 }, { "epoch": 0.82, "learning_rate": 5.513168772882957e-08, "logits/chosen": -2.3833930492401123, "logits/rejected": -2.590618848800659, "logps/chosen": -333.7469482421875, "logps/rejected": -300.4892578125, "loss": 0.1947, "rewards/accuracies": 1.0, "rewards/chosen": -1.0649809837341309, "rewards/margins": 2.1104536056518555, "rewards/rejected": -3.1754345893859863, "step": 7040 }, { "epoch": 0.82, "learning_rate": 5.50962560529113e-08, "logits/chosen": -2.313477039337158, "logits/rejected": -2.2493460178375244, "logps/chosen": -240.555908203125, "logps/rejected": -345.084716796875, "loss": 0.1956, "rewards/accuracies": 0.875, "rewards/chosen": -0.7639114856719971, "rewards/margins": 2.477419137954712, "rewards/rejected": -3.24133038520813, "step": 7041 }, { "epoch": 0.82, "learning_rate": 5.506082437699303e-08, "logits/chosen": -1.6895332336425781, "logits/rejected": -1.8515617847442627, "logps/chosen": -353.5566101074219, "logps/rejected": -286.52374267578125, "loss": 0.4794, "rewards/accuracies": 0.75, "rewards/chosen": -0.41857266426086426, "rewards/margins": 2.7628512382507324, "rewards/rejected": -3.1814239025115967, "step": 7042 }, { "epoch": 0.82, "learning_rate": 5.5025392701074755e-08, "logits/chosen": -2.565199851989746, "logits/rejected": -2.536207675933838, "logps/chosen": -103.2051010131836, "logps/rejected": -129.1919403076172, "loss": 0.3441, "rewards/accuracies": 0.875, "rewards/chosen": -0.4302772283554077, "rewards/margins": 1.1429134607315063, "rewards/rejected": -1.5731905698776245, "step": 7043 }, { "epoch": 0.82, "learning_rate": 5.4989961025156484e-08, "logits/chosen": -1.9578036069869995, "logits/rejected": -1.9700123071670532, "logps/chosen": -213.4305877685547, "logps/rejected": -243.66778564453125, "loss": 0.5272, "rewards/accuracies": 0.625, "rewards/chosen": -1.1600946187973022, "rewards/margins": 1.013960599899292, "rewards/rejected": -2.1740550994873047, "step": 7044 }, { "epoch": 0.82, "learning_rate": 5.495452934923822e-08, "logits/chosen": -2.321535587310791, "logits/rejected": -2.4288549423217773, "logps/chosen": -417.1219482421875, "logps/rejected": -320.987060546875, "loss": 0.1949, "rewards/accuracies": 0.875, "rewards/chosen": -0.44053545594215393, "rewards/margins": 2.9639081954956055, "rewards/rejected": -3.4044437408447266, "step": 7045 }, { "epoch": 0.82, "learning_rate": 5.491909767331995e-08, "logits/chosen": -2.1864218711853027, "logits/rejected": -2.347222089767456, "logps/chosen": -143.6334686279297, "logps/rejected": -134.71881103515625, "loss": 0.3098, "rewards/accuracies": 0.875, "rewards/chosen": -0.3129240870475769, "rewards/margins": 1.6892471313476562, "rewards/rejected": -2.002171277999878, "step": 7046 }, { "epoch": 0.82, "learning_rate": 5.488366599740167e-08, "logits/chosen": -2.1925387382507324, "logits/rejected": -2.4267027378082275, "logps/chosen": -480.58013916015625, "logps/rejected": -379.0689392089844, "loss": 0.5811, "rewards/accuracies": 0.625, "rewards/chosen": -1.161381483078003, "rewards/margins": 1.4451416730880737, "rewards/rejected": -2.606523275375366, "step": 7047 }, { "epoch": 0.82, "learning_rate": 5.4848234321483405e-08, "logits/chosen": -1.9338150024414062, "logits/rejected": -2.2091760635375977, "logps/chosen": -444.70703125, "logps/rejected": -289.3113708496094, "loss": 0.4883, "rewards/accuracies": 0.625, "rewards/chosen": -0.3758866786956787, "rewards/margins": 1.1049458980560303, "rewards/rejected": -1.480832576751709, "step": 7048 }, { "epoch": 0.82, "learning_rate": 5.4812802645565134e-08, "logits/chosen": -2.5573129653930664, "logits/rejected": -2.515550136566162, "logps/chosen": -169.90814208984375, "logps/rejected": -291.0963439941406, "loss": 0.2956, "rewards/accuracies": 0.875, "rewards/chosen": -0.7596360445022583, "rewards/margins": 2.345292806625366, "rewards/rejected": -3.104928493499756, "step": 7049 }, { "epoch": 0.82, "learning_rate": 5.4777370969646856e-08, "logits/chosen": -1.9606139659881592, "logits/rejected": -2.335003614425659, "logps/chosen": -489.306396484375, "logps/rejected": -321.69708251953125, "loss": 0.7757, "rewards/accuracies": 0.625, "rewards/chosen": -1.4326328039169312, "rewards/margins": 1.1716562509536743, "rewards/rejected": -2.6042890548706055, "step": 7050 }, { "epoch": 0.82, "learning_rate": 5.474193929372859e-08, "logits/chosen": -2.680314540863037, "logits/rejected": -2.506558418273926, "logps/chosen": -192.56076049804688, "logps/rejected": -215.64962768554688, "loss": 0.2666, "rewards/accuracies": 0.875, "rewards/chosen": -0.7757855653762817, "rewards/margins": 2.044389247894287, "rewards/rejected": -2.8201746940612793, "step": 7051 }, { "epoch": 0.82, "learning_rate": 5.470650761781032e-08, "logits/chosen": -2.650207281112671, "logits/rejected": -2.495008945465088, "logps/chosen": -69.27442932128906, "logps/rejected": -211.58041381835938, "loss": 0.1116, "rewards/accuracies": 1.0, "rewards/chosen": 0.09619168937206268, "rewards/margins": 3.1443819999694824, "rewards/rejected": -3.0481905937194824, "step": 7052 }, { "epoch": 0.82, "learning_rate": 5.4671075941892056e-08, "logits/chosen": -3.022217035293579, "logits/rejected": -3.001046895980835, "logps/chosen": -316.37213134765625, "logps/rejected": -283.6966552734375, "loss": 0.3275, "rewards/accuracies": 0.75, "rewards/chosen": -0.6815279722213745, "rewards/margins": 2.5745835304260254, "rewards/rejected": -3.2561116218566895, "step": 7053 }, { "epoch": 0.82, "learning_rate": 5.463564426597378e-08, "logits/chosen": -2.23233699798584, "logits/rejected": -2.266061305999756, "logps/chosen": -167.78421020507812, "logps/rejected": -242.25592041015625, "loss": 0.2896, "rewards/accuracies": 0.875, "rewards/chosen": -1.020991563796997, "rewards/margins": 1.9985542297363281, "rewards/rejected": -3.0195460319519043, "step": 7054 }, { "epoch": 0.82, "learning_rate": 5.4600212590055506e-08, "logits/chosen": -2.1596860885620117, "logits/rejected": -2.171708822250366, "logps/chosen": -359.32415771484375, "logps/rejected": -377.4686584472656, "loss": 0.3065, "rewards/accuracies": 0.875, "rewards/chosen": -1.0693836212158203, "rewards/margins": 2.970337390899658, "rewards/rejected": -4.0397210121154785, "step": 7055 }, { "epoch": 0.82, "learning_rate": 5.456478091413724e-08, "logits/chosen": -2.1927618980407715, "logits/rejected": -2.477351427078247, "logps/chosen": -289.1364440917969, "logps/rejected": -281.2858581542969, "loss": 0.2641, "rewards/accuracies": 0.875, "rewards/chosen": -0.8507543206214905, "rewards/margins": 2.660212755203247, "rewards/rejected": -3.5109670162200928, "step": 7056 }, { "epoch": 0.82, "learning_rate": 5.4529349238218964e-08, "logits/chosen": -2.792039394378662, "logits/rejected": -2.7392797470092773, "logps/chosen": -200.67559814453125, "logps/rejected": -153.92906188964844, "loss": 6.4108, "rewards/accuracies": 0.75, "rewards/chosen": -6.523861885070801, "rewards/margins": -4.526762008666992, "rewards/rejected": -1.9970991611480713, "step": 7057 }, { "epoch": 0.82, "learning_rate": 5.449391756230069e-08, "logits/chosen": -1.8294763565063477, "logits/rejected": -1.7825604677200317, "logps/chosen": -226.99118041992188, "logps/rejected": -272.90509033203125, "loss": 0.3458, "rewards/accuracies": 0.875, "rewards/chosen": -0.10530853271484375, "rewards/margins": 1.6039793491363525, "rewards/rejected": -1.7092878818511963, "step": 7058 }, { "epoch": 0.82, "learning_rate": 5.445848588638243e-08, "logits/chosen": -2.4309864044189453, "logits/rejected": -2.496431827545166, "logps/chosen": -122.91911315917969, "logps/rejected": -162.16622924804688, "loss": 0.4084, "rewards/accuracies": 0.875, "rewards/chosen": -0.23282170295715332, "rewards/margins": 1.5953443050384521, "rewards/rejected": -1.8281662464141846, "step": 7059 }, { "epoch": 0.82, "learning_rate": 5.442305421046415e-08, "logits/chosen": -2.3069398403167725, "logits/rejected": -2.5516245365142822, "logps/chosen": -516.5227661132812, "logps/rejected": -342.8563232421875, "loss": 0.4918, "rewards/accuracies": 0.625, "rewards/chosen": -0.7194671034812927, "rewards/margins": 2.299372911453247, "rewards/rejected": -3.0188400745391846, "step": 7060 }, { "epoch": 0.82, "learning_rate": 5.438762253454588e-08, "logits/chosen": -2.560863971710205, "logits/rejected": -2.7848618030548096, "logps/chosen": -417.44146728515625, "logps/rejected": -391.58563232421875, "loss": 0.2147, "rewards/accuracies": 1.0, "rewards/chosen": -0.8829959630966187, "rewards/margins": 2.0923233032226562, "rewards/rejected": -2.9753193855285645, "step": 7061 }, { "epoch": 0.82, "learning_rate": 5.4352190858627614e-08, "logits/chosen": -2.5167698860168457, "logits/rejected": -2.6787829399108887, "logps/chosen": -357.91510009765625, "logps/rejected": -343.748291015625, "loss": 0.0883, "rewards/accuracies": 1.0, "rewards/chosen": -0.5590354204177856, "rewards/margins": 3.041914463043213, "rewards/rejected": -3.600950002670288, "step": 7062 }, { "epoch": 0.82, "learning_rate": 5.431675918270934e-08, "logits/chosen": -2.1628222465515137, "logits/rejected": -2.306668758392334, "logps/chosen": -228.98416137695312, "logps/rejected": -192.12281799316406, "loss": 0.3633, "rewards/accuracies": 0.625, "rewards/chosen": -0.6939672231674194, "rewards/margins": 2.430783987045288, "rewards/rejected": -3.124751091003418, "step": 7063 }, { "epoch": 0.82, "learning_rate": 5.4281327506791065e-08, "logits/chosen": -2.004894256591797, "logits/rejected": -2.173497200012207, "logps/chosen": -329.82891845703125, "logps/rejected": -228.1202850341797, "loss": 0.7309, "rewards/accuracies": 0.875, "rewards/chosen": -0.5039427280426025, "rewards/margins": 1.6251156330108643, "rewards/rejected": -2.129058361053467, "step": 7064 }, { "epoch": 0.82, "learning_rate": 5.42458958308728e-08, "logits/chosen": -2.516885757446289, "logits/rejected": -2.3299057483673096, "logps/chosen": -128.3290252685547, "logps/rejected": -296.0967712402344, "loss": 0.6783, "rewards/accuracies": 0.875, "rewards/chosen": -0.9761713743209839, "rewards/margins": 2.1150224208831787, "rewards/rejected": -3.091193675994873, "step": 7065 }, { "epoch": 0.82, "learning_rate": 5.421046415495453e-08, "logits/chosen": -2.446737289428711, "logits/rejected": -2.1466989517211914, "logps/chosen": -153.33035278320312, "logps/rejected": -314.0165100097656, "loss": 0.6291, "rewards/accuracies": 0.375, "rewards/chosen": -1.405031442642212, "rewards/margins": 0.8808308243751526, "rewards/rejected": -2.2858622074127197, "step": 7066 }, { "epoch": 0.82, "learning_rate": 5.417503247903625e-08, "logits/chosen": -2.529860258102417, "logits/rejected": -2.192335367202759, "logps/chosen": -217.42800903320312, "logps/rejected": -242.1964569091797, "loss": 0.2936, "rewards/accuracies": 0.875, "rewards/chosen": -0.9606083631515503, "rewards/margins": 2.1548354625701904, "rewards/rejected": -3.1154439449310303, "step": 7067 }, { "epoch": 0.82, "learning_rate": 5.413960080311799e-08, "logits/chosen": -2.1980345249176025, "logits/rejected": -2.250220775604248, "logps/chosen": -382.2289733886719, "logps/rejected": -328.2364196777344, "loss": 0.1077, "rewards/accuracies": 1.0, "rewards/chosen": -0.6693390011787415, "rewards/margins": 3.673095703125, "rewards/rejected": -4.342434406280518, "step": 7068 }, { "epoch": 0.82, "learning_rate": 5.4104169127199715e-08, "logits/chosen": -2.8791427612304688, "logits/rejected": -2.7693488597869873, "logps/chosen": -218.0260009765625, "logps/rejected": -270.9942932128906, "loss": 0.2942, "rewards/accuracies": 0.875, "rewards/chosen": -0.4256408214569092, "rewards/margins": 2.8898119926452637, "rewards/rejected": -3.3154525756835938, "step": 7069 }, { "epoch": 0.82, "learning_rate": 5.406873745128144e-08, "logits/chosen": -1.9729505777359009, "logits/rejected": -2.14333438873291, "logps/chosen": -264.78900146484375, "logps/rejected": -279.6859130859375, "loss": 0.3105, "rewards/accuracies": 0.875, "rewards/chosen": -0.896172285079956, "rewards/margins": 2.4416356086730957, "rewards/rejected": -3.3378076553344727, "step": 7070 }, { "epoch": 0.82, "learning_rate": 5.403330577536317e-08, "logits/chosen": -2.6455044746398926, "logits/rejected": -2.810408592224121, "logps/chosen": -158.95437622070312, "logps/rejected": -187.54678344726562, "loss": 0.5346, "rewards/accuracies": 0.875, "rewards/chosen": -0.7297312617301941, "rewards/margins": 3.1554696559906006, "rewards/rejected": -3.8852009773254395, "step": 7071 }, { "epoch": 0.82, "learning_rate": 5.39978740994449e-08, "logits/chosen": -2.7388885021209717, "logits/rejected": -2.6037793159484863, "logps/chosen": -237.68621826171875, "logps/rejected": -99.4521713256836, "loss": 0.8618, "rewards/accuracies": 0.625, "rewards/chosen": -1.3324000835418701, "rewards/margins": 0.2650715708732605, "rewards/rejected": -1.5974715948104858, "step": 7072 }, { "epoch": 0.82, "learning_rate": 5.396244242352663e-08, "logits/chosen": -2.2858903408050537, "logits/rejected": -2.4309725761413574, "logps/chosen": -563.9614868164062, "logps/rejected": -439.88623046875, "loss": 0.11, "rewards/accuracies": 1.0, "rewards/chosen": -0.38197243213653564, "rewards/margins": 4.131682395935059, "rewards/rejected": -4.513655185699463, "step": 7073 }, { "epoch": 0.82, "learning_rate": 5.392701074760836e-08, "logits/chosen": -1.8651714324951172, "logits/rejected": -1.9832913875579834, "logps/chosen": -244.70516967773438, "logps/rejected": -182.74102783203125, "loss": 0.428, "rewards/accuracies": 0.75, "rewards/chosen": -1.0378506183624268, "rewards/margins": 1.6991101503372192, "rewards/rejected": -2.7369608879089355, "step": 7074 }, { "epoch": 0.82, "learning_rate": 5.389157907169009e-08, "logits/chosen": -2.3190481662750244, "logits/rejected": -2.268003225326538, "logps/chosen": -372.76898193359375, "logps/rejected": -419.2706604003906, "loss": 0.4622, "rewards/accuracies": 0.625, "rewards/chosen": -1.4713338613510132, "rewards/margins": 1.4608826637268066, "rewards/rejected": -2.9322166442871094, "step": 7075 }, { "epoch": 0.82, "learning_rate": 5.3856147395771817e-08, "logits/chosen": -2.6701719760894775, "logits/rejected": -2.7600643634796143, "logps/chosen": -355.6355895996094, "logps/rejected": -349.1263427734375, "loss": 0.1948, "rewards/accuracies": 0.875, "rewards/chosen": -0.8950479626655579, "rewards/margins": 3.884957790374756, "rewards/rejected": -4.78000545501709, "step": 7076 }, { "epoch": 0.82, "learning_rate": 5.382071571985355e-08, "logits/chosen": -2.001457452774048, "logits/rejected": -2.150477647781372, "logps/chosen": -637.1976928710938, "logps/rejected": -514.8309326171875, "loss": 0.2426, "rewards/accuracies": 0.875, "rewards/chosen": -0.36089959740638733, "rewards/margins": 2.400705337524414, "rewards/rejected": -2.7616047859191895, "step": 7077 }, { "epoch": 0.82, "learning_rate": 5.3785284043935274e-08, "logits/chosen": -2.2319910526275635, "logits/rejected": -2.357527256011963, "logps/chosen": -206.341796875, "logps/rejected": -159.36843872070312, "loss": 0.3757, "rewards/accuracies": 0.875, "rewards/chosen": -0.9682274460792542, "rewards/margins": 1.93678617477417, "rewards/rejected": -2.9050137996673584, "step": 7078 }, { "epoch": 0.82, "learning_rate": 5.3749852368017e-08, "logits/chosen": -2.6746723651885986, "logits/rejected": -2.7257237434387207, "logps/chosen": -158.10011291503906, "logps/rejected": -199.669189453125, "loss": 0.5038, "rewards/accuracies": 0.875, "rewards/chosen": -1.032411813735962, "rewards/margins": 1.9509447813034058, "rewards/rejected": -2.9833567142486572, "step": 7079 }, { "epoch": 0.82, "learning_rate": 5.371442069209874e-08, "logits/chosen": -2.362666368484497, "logits/rejected": -2.563931465148926, "logps/chosen": -309.92230224609375, "logps/rejected": -312.2405700683594, "loss": 0.352, "rewards/accuracies": 0.875, "rewards/chosen": -0.2518518269062042, "rewards/margins": 1.8158719539642334, "rewards/rejected": -2.0677239894866943, "step": 7080 }, { "epoch": 0.82, "learning_rate": 5.367898901618046e-08, "logits/chosen": -2.4716134071350098, "logits/rejected": -2.5042073726654053, "logps/chosen": -234.95396423339844, "logps/rejected": -186.0421600341797, "loss": 0.9593, "rewards/accuracies": 0.5, "rewards/chosen": -1.4034150838851929, "rewards/margins": 1.4118218421936035, "rewards/rejected": -2.815236806869507, "step": 7081 }, { "epoch": 0.82, "learning_rate": 5.364355734026219e-08, "logits/chosen": -1.5371363162994385, "logits/rejected": -1.668870449066162, "logps/chosen": -273.50115966796875, "logps/rejected": -268.65576171875, "loss": 0.1855, "rewards/accuracies": 1.0, "rewards/chosen": -0.5098941326141357, "rewards/margins": 2.4756059646606445, "rewards/rejected": -2.9855000972747803, "step": 7082 }, { "epoch": 0.82, "learning_rate": 5.3608125664343924e-08, "logits/chosen": -2.7036499977111816, "logits/rejected": -2.818734884262085, "logps/chosen": -373.56597900390625, "logps/rejected": -261.6390380859375, "loss": 0.5994, "rewards/accuracies": 0.625, "rewards/chosen": -1.492471694946289, "rewards/margins": 1.2840293645858765, "rewards/rejected": -2.776501178741455, "step": 7083 }, { "epoch": 0.82, "learning_rate": 5.3572693988425647e-08, "logits/chosen": -2.3826260566711426, "logits/rejected": -2.7383384704589844, "logps/chosen": -358.42498779296875, "logps/rejected": -286.14654541015625, "loss": 0.1654, "rewards/accuracies": 1.0, "rewards/chosen": -0.690256655216217, "rewards/margins": 3.972853422164917, "rewards/rejected": -4.66310977935791, "step": 7084 }, { "epoch": 0.82, "learning_rate": 5.3537262312507375e-08, "logits/chosen": -2.790360927581787, "logits/rejected": -2.649407148361206, "logps/chosen": -371.5672302246094, "logps/rejected": -262.5574035644531, "loss": 0.305, "rewards/accuracies": 0.875, "rewards/chosen": -0.5344383716583252, "rewards/margins": 2.247753381729126, "rewards/rejected": -2.782191753387451, "step": 7085 }, { "epoch": 0.82, "learning_rate": 5.350183063658911e-08, "logits/chosen": -2.1201939582824707, "logits/rejected": -2.172301769256592, "logps/chosen": -389.88775634765625, "logps/rejected": -379.9013366699219, "loss": 0.6, "rewards/accuracies": 0.875, "rewards/chosen": -0.7162358164787292, "rewards/margins": 2.2213447093963623, "rewards/rejected": -2.9375805854797363, "step": 7086 }, { "epoch": 0.82, "learning_rate": 5.346639896067083e-08, "logits/chosen": -2.4434680938720703, "logits/rejected": -2.224916458129883, "logps/chosen": -269.817138671875, "logps/rejected": -325.4612121582031, "loss": 0.2035, "rewards/accuracies": 0.875, "rewards/chosen": -1.025089979171753, "rewards/margins": 5.2941694259643555, "rewards/rejected": -6.3192596435546875, "step": 7087 }, { "epoch": 0.82, "learning_rate": 5.343096728475257e-08, "logits/chosen": -2.4680843353271484, "logits/rejected": -2.592451333999634, "logps/chosen": -189.1216278076172, "logps/rejected": -208.7321014404297, "loss": 0.6215, "rewards/accuracies": 0.75, "rewards/chosen": -1.0934727191925049, "rewards/margins": 0.8470551371574402, "rewards/rejected": -1.9405279159545898, "step": 7088 }, { "epoch": 0.82, "learning_rate": 5.33955356088343e-08, "logits/chosen": -2.4609482288360596, "logits/rejected": -1.7658753395080566, "logps/chosen": -103.26201629638672, "logps/rejected": -258.08026123046875, "loss": 0.102, "rewards/accuracies": 1.0, "rewards/chosen": 0.23919722437858582, "rewards/margins": 2.8364298343658447, "rewards/rejected": -2.5972328186035156, "step": 7089 }, { "epoch": 0.82, "learning_rate": 5.3360103932916026e-08, "logits/chosen": -2.339012861251831, "logits/rejected": -2.3758983612060547, "logps/chosen": -204.26614379882812, "logps/rejected": -335.383056640625, "loss": 0.3207, "rewards/accuracies": 0.875, "rewards/chosen": -1.4241714477539062, "rewards/margins": 3.163961887359619, "rewards/rejected": -4.588133335113525, "step": 7090 }, { "epoch": 0.82, "learning_rate": 5.3324672256997754e-08, "logits/chosen": -2.141118288040161, "logits/rejected": -2.330446481704712, "logps/chosen": -203.93617248535156, "logps/rejected": -136.40234375, "loss": 0.7345, "rewards/accuracies": 0.625, "rewards/chosen": -0.8865020871162415, "rewards/margins": 0.2946810722351074, "rewards/rejected": -1.181183099746704, "step": 7091 }, { "epoch": 0.83, "learning_rate": 5.328924058107948e-08, "logits/chosen": -2.4861788749694824, "logits/rejected": -2.318962574005127, "logps/chosen": -137.71517944335938, "logps/rejected": -237.64352416992188, "loss": 0.554, "rewards/accuracies": 0.625, "rewards/chosen": -0.7130979895591736, "rewards/margins": 1.3904852867126465, "rewards/rejected": -2.103583335876465, "step": 7092 }, { "epoch": 0.83, "learning_rate": 5.325380890516121e-08, "logits/chosen": -2.3129074573516846, "logits/rejected": -2.226057767868042, "logps/chosen": -259.1691589355469, "logps/rejected": -350.78704833984375, "loss": 0.4509, "rewards/accuracies": 0.75, "rewards/chosen": -0.7950044274330139, "rewards/margins": 2.8067405223846436, "rewards/rejected": -3.601745128631592, "step": 7093 }, { "epoch": 0.83, "learning_rate": 5.321837722924295e-08, "logits/chosen": -2.583815097808838, "logits/rejected": -2.628852367401123, "logps/chosen": -396.88885498046875, "logps/rejected": -356.92535400390625, "loss": 0.2169, "rewards/accuracies": 0.875, "rewards/chosen": -0.6225636005401611, "rewards/margins": 3.8207473754882812, "rewards/rejected": -4.4433112144470215, "step": 7094 }, { "epoch": 0.83, "learning_rate": 5.318294555332467e-08, "logits/chosen": -2.6331756114959717, "logits/rejected": -2.3665621280670166, "logps/chosen": -257.5335998535156, "logps/rejected": -227.23883056640625, "loss": 0.3647, "rewards/accuracies": 0.75, "rewards/chosen": -1.2546592950820923, "rewards/margins": 2.0340769290924072, "rewards/rejected": -3.288736343383789, "step": 7095 }, { "epoch": 0.83, "learning_rate": 5.31475138774064e-08, "logits/chosen": -2.642678737640381, "logits/rejected": -2.7263565063476562, "logps/chosen": -288.19384765625, "logps/rejected": -363.0511779785156, "loss": 0.2518, "rewards/accuracies": 1.0, "rewards/chosen": -0.6055020093917847, "rewards/margins": 1.730758786201477, "rewards/rejected": -2.3362607955932617, "step": 7096 }, { "epoch": 0.83, "learning_rate": 5.3112082201488133e-08, "logits/chosen": -1.9828640222549438, "logits/rejected": -2.0624942779541016, "logps/chosen": -292.2423095703125, "logps/rejected": -277.7435302734375, "loss": 0.1915, "rewards/accuracies": 1.0, "rewards/chosen": -0.8394526243209839, "rewards/margins": 2.8338265419006348, "rewards/rejected": -3.67327880859375, "step": 7097 }, { "epoch": 0.83, "learning_rate": 5.3076650525569856e-08, "logits/chosen": -2.0936989784240723, "logits/rejected": -2.0842397212982178, "logps/chosen": -242.422607421875, "logps/rejected": -298.00054931640625, "loss": 0.2136, "rewards/accuracies": 1.0, "rewards/chosen": -0.29614442586898804, "rewards/margins": 3.2604801654815674, "rewards/rejected": -3.5566246509552, "step": 7098 }, { "epoch": 0.83, "learning_rate": 5.3041218849651584e-08, "logits/chosen": -1.8181008100509644, "logits/rejected": -2.0407612323760986, "logps/chosen": -385.093994140625, "logps/rejected": -340.95123291015625, "loss": 0.448, "rewards/accuracies": 0.75, "rewards/chosen": -0.2682061791419983, "rewards/margins": 1.8506900072097778, "rewards/rejected": -2.118896245956421, "step": 7099 }, { "epoch": 0.83, "learning_rate": 5.300578717373332e-08, "logits/chosen": -2.5803332328796387, "logits/rejected": -2.231133460998535, "logps/chosen": -185.06626892089844, "logps/rejected": -300.57666015625, "loss": 0.4812, "rewards/accuracies": 0.75, "rewards/chosen": -1.085295557975769, "rewards/margins": 2.1651248931884766, "rewards/rejected": -3.250420570373535, "step": 7100 }, { "epoch": 0.83, "learning_rate": 5.297035549781504e-08, "logits/chosen": -1.9587132930755615, "logits/rejected": -1.5557818412780762, "logps/chosen": -138.3531951904297, "logps/rejected": -219.65618896484375, "loss": 0.4115, "rewards/accuracies": 0.75, "rewards/chosen": -1.062540054321289, "rewards/margins": 1.2229641675949097, "rewards/rejected": -2.285504102706909, "step": 7101 }, { "epoch": 0.83, "learning_rate": 5.293492382189677e-08, "logits/chosen": -2.3611083030700684, "logits/rejected": -2.3948662281036377, "logps/chosen": -291.96807861328125, "logps/rejected": -314.0717468261719, "loss": 0.2487, "rewards/accuracies": 1.0, "rewards/chosen": -0.7211892604827881, "rewards/margins": 2.2462873458862305, "rewards/rejected": -2.9674768447875977, "step": 7102 }, { "epoch": 0.83, "learning_rate": 5.2899492145978506e-08, "logits/chosen": -2.1996397972106934, "logits/rejected": -2.4936602115631104, "logps/chosen": -540.9266357421875, "logps/rejected": -379.2435607910156, "loss": 0.635, "rewards/accuracies": 0.75, "rewards/chosen": -1.330552339553833, "rewards/margins": 1.200603723526001, "rewards/rejected": -2.531156063079834, "step": 7103 }, { "epoch": 0.83, "learning_rate": 5.2864060470060235e-08, "logits/chosen": -2.1967883110046387, "logits/rejected": -2.2393414974212646, "logps/chosen": -179.24169921875, "logps/rejected": -194.68667602539062, "loss": 0.6276, "rewards/accuracies": 0.625, "rewards/chosen": -0.7349981665611267, "rewards/margins": 0.4179682731628418, "rewards/rejected": -1.1529663801193237, "step": 7104 }, { "epoch": 0.83, "learning_rate": 5.282862879414196e-08, "logits/chosen": -2.30183744430542, "logits/rejected": -1.9764306545257568, "logps/chosen": -299.2536926269531, "logps/rejected": -293.5069580078125, "loss": 0.1517, "rewards/accuracies": 1.0, "rewards/chosen": 0.10910052806138992, "rewards/margins": 2.657681941986084, "rewards/rejected": -2.548581600189209, "step": 7105 }, { "epoch": 0.83, "learning_rate": 5.279319711822369e-08, "logits/chosen": -2.1251134872436523, "logits/rejected": -2.4199092388153076, "logps/chosen": -406.0937194824219, "logps/rejected": -220.78990173339844, "loss": 0.7876, "rewards/accuracies": 0.875, "rewards/chosen": -1.2227082252502441, "rewards/margins": 0.6219390630722046, "rewards/rejected": -1.8446474075317383, "step": 7106 }, { "epoch": 0.83, "learning_rate": 5.275776544230542e-08, "logits/chosen": -2.24761700630188, "logits/rejected": -1.5899224281311035, "logps/chosen": -235.53355407714844, "logps/rejected": -478.6787414550781, "loss": 0.6746, "rewards/accuracies": 0.625, "rewards/chosen": -0.9214428067207336, "rewards/margins": 2.0289621353149414, "rewards/rejected": -2.9504051208496094, "step": 7107 }, { "epoch": 0.83, "learning_rate": 5.272233376638714e-08, "logits/chosen": -2.1261441707611084, "logits/rejected": -2.2054073810577393, "logps/chosen": -381.4080810546875, "logps/rejected": -303.0703125, "loss": 0.4313, "rewards/accuracies": 0.75, "rewards/chosen": -0.27069926261901855, "rewards/margins": 3.5682663917541504, "rewards/rejected": -3.838965892791748, "step": 7108 }, { "epoch": 0.83, "learning_rate": 5.268690209046888e-08, "logits/chosen": -2.049912452697754, "logits/rejected": -2.379183292388916, "logps/chosen": -309.290283203125, "logps/rejected": -252.06906127929688, "loss": 0.2223, "rewards/accuracies": 0.875, "rewards/chosen": 0.3155606985092163, "rewards/margins": 2.5110762119293213, "rewards/rejected": -2.1955153942108154, "step": 7109 }, { "epoch": 0.83, "learning_rate": 5.265147041455061e-08, "logits/chosen": -2.629753828048706, "logits/rejected": -2.403799057006836, "logps/chosen": -475.8435363769531, "logps/rejected": -289.5596923828125, "loss": 0.5425, "rewards/accuracies": 0.75, "rewards/chosen": -1.1470344066619873, "rewards/margins": 2.065884590148926, "rewards/rejected": -3.212918996810913, "step": 7110 }, { "epoch": 0.83, "learning_rate": 5.261603873863233e-08, "logits/chosen": -1.710662603378296, "logits/rejected": -2.0135138034820557, "logps/chosen": -326.976806640625, "logps/rejected": -223.577392578125, "loss": 0.4374, "rewards/accuracies": 0.875, "rewards/chosen": -1.5686733722686768, "rewards/margins": 1.847362756729126, "rewards/rejected": -3.4160361289978027, "step": 7111 }, { "epoch": 0.83, "learning_rate": 5.2580607062714065e-08, "logits/chosen": -2.407397747039795, "logits/rejected": -2.5939388275146484, "logps/chosen": -129.783935546875, "logps/rejected": -175.2313232421875, "loss": 1.0041, "rewards/accuracies": 0.75, "rewards/chosen": -1.1112239360809326, "rewards/margins": 2.2663252353668213, "rewards/rejected": -3.377549409866333, "step": 7112 }, { "epoch": 0.83, "learning_rate": 5.2545175386795793e-08, "logits/chosen": -3.0449020862579346, "logits/rejected": -3.007037878036499, "logps/chosen": -193.508544921875, "logps/rejected": -228.6929931640625, "loss": 0.4054, "rewards/accuracies": 0.75, "rewards/chosen": -1.3500027656555176, "rewards/margins": 1.4030952453613281, "rewards/rejected": -2.7530980110168457, "step": 7113 }, { "epoch": 0.83, "learning_rate": 5.250974371087752e-08, "logits/chosen": -2.0530645847320557, "logits/rejected": -2.142463445663452, "logps/chosen": -192.70541381835938, "logps/rejected": -231.62042236328125, "loss": 0.3753, "rewards/accuracies": 0.875, "rewards/chosen": -0.7710584998130798, "rewards/margins": 2.796257257461548, "rewards/rejected": -3.5673155784606934, "step": 7114 }, { "epoch": 0.83, "learning_rate": 5.247431203495925e-08, "logits/chosen": -1.9830033779144287, "logits/rejected": -1.831479787826538, "logps/chosen": -145.20343017578125, "logps/rejected": -314.56793212890625, "loss": 0.3766, "rewards/accuracies": 0.75, "rewards/chosen": -0.7598315477371216, "rewards/margins": 1.8874708414077759, "rewards/rejected": -2.6473026275634766, "step": 7115 }, { "epoch": 0.83, "learning_rate": 5.243888035904098e-08, "logits/chosen": -2.559929370880127, "logits/rejected": -2.4740145206451416, "logps/chosen": -353.0787353515625, "logps/rejected": -348.5948181152344, "loss": 0.3863, "rewards/accuracies": 0.75, "rewards/chosen": -1.6061197519302368, "rewards/margins": 2.7493093013763428, "rewards/rejected": -4.355429172515869, "step": 7116 }, { "epoch": 0.83, "learning_rate": 5.240344868312271e-08, "logits/chosen": -2.2936782836914062, "logits/rejected": -2.2760701179504395, "logps/chosen": -164.6866455078125, "logps/rejected": -354.23040771484375, "loss": 0.5602, "rewards/accuracies": 0.75, "rewards/chosen": -1.1644459962844849, "rewards/margins": 1.8261011838912964, "rewards/rejected": -2.9905471801757812, "step": 7117 }, { "epoch": 0.83, "learning_rate": 5.236801700720444e-08, "logits/chosen": -1.8714359998703003, "logits/rejected": -2.095311403274536, "logps/chosen": -218.4193115234375, "logps/rejected": -240.65237426757812, "loss": 0.3942, "rewards/accuracies": 0.75, "rewards/chosen": -0.9698383808135986, "rewards/margins": 2.593071460723877, "rewards/rejected": -3.5629096031188965, "step": 7118 }, { "epoch": 0.83, "learning_rate": 5.2332585331286166e-08, "logits/chosen": -2.37638521194458, "logits/rejected": -2.144479751586914, "logps/chosen": -138.06277465820312, "logps/rejected": -355.3026123046875, "loss": 0.2489, "rewards/accuracies": 1.0, "rewards/chosen": -0.782085657119751, "rewards/margins": 2.7575955390930176, "rewards/rejected": -3.5396811962127686, "step": 7119 }, { "epoch": 0.83, "learning_rate": 5.2297153655367895e-08, "logits/chosen": -2.777553081512451, "logits/rejected": -2.975330352783203, "logps/chosen": -212.51296997070312, "logps/rejected": -181.91847229003906, "loss": 0.5504, "rewards/accuracies": 0.625, "rewards/chosen": -0.6085493564605713, "rewards/margins": 0.9484121203422546, "rewards/rejected": -1.5569615364074707, "step": 7120 }, { "epoch": 0.83, "learning_rate": 5.226172197944963e-08, "logits/chosen": -2.4897820949554443, "logits/rejected": -2.5976147651672363, "logps/chosen": -258.4271545410156, "logps/rejected": -303.53082275390625, "loss": 0.1486, "rewards/accuracies": 1.0, "rewards/chosen": 0.09243802726268768, "rewards/margins": 3.0199313163757324, "rewards/rejected": -2.9274935722351074, "step": 7121 }, { "epoch": 0.83, "learning_rate": 5.222629030353135e-08, "logits/chosen": -2.3382625579833984, "logits/rejected": -2.6639628410339355, "logps/chosen": -453.81890869140625, "logps/rejected": -344.62725830078125, "loss": 0.3791, "rewards/accuracies": 0.875, "rewards/chosen": -1.9182524681091309, "rewards/margins": 3.4141435623168945, "rewards/rejected": -5.332396030426025, "step": 7122 }, { "epoch": 0.83, "learning_rate": 5.219085862761308e-08, "logits/chosen": -2.4912688732147217, "logits/rejected": -2.4850656986236572, "logps/chosen": -305.2012634277344, "logps/rejected": -347.4366760253906, "loss": 0.3105, "rewards/accuracies": 0.875, "rewards/chosen": -1.3404896259307861, "rewards/margins": 3.113051652908325, "rewards/rejected": -4.453541278839111, "step": 7123 }, { "epoch": 0.83, "learning_rate": 5.2155426951694816e-08, "logits/chosen": -2.6170952320098877, "logits/rejected": -2.4079811573028564, "logps/chosen": -126.78227233886719, "logps/rejected": -120.48257446289062, "loss": 0.8418, "rewards/accuracies": 0.625, "rewards/chosen": -1.4164729118347168, "rewards/margins": 1.6044985055923462, "rewards/rejected": -3.0209715366363525, "step": 7124 }, { "epoch": 0.83, "learning_rate": 5.211999527577654e-08, "logits/chosen": -2.3039863109588623, "logits/rejected": -2.3902111053466797, "logps/chosen": -243.43431091308594, "logps/rejected": -227.1172332763672, "loss": 0.182, "rewards/accuracies": 1.0, "rewards/chosen": -0.6218520402908325, "rewards/margins": 2.103602647781372, "rewards/rejected": -2.725454330444336, "step": 7125 }, { "epoch": 0.83, "learning_rate": 5.2084563599858274e-08, "logits/chosen": -1.9037690162658691, "logits/rejected": -2.4086854457855225, "logps/chosen": -473.8895263671875, "logps/rejected": -291.5407409667969, "loss": 0.1924, "rewards/accuracies": 1.0, "rewards/chosen": -0.40019655227661133, "rewards/margins": 3.169877052307129, "rewards/rejected": -3.5700736045837402, "step": 7126 }, { "epoch": 0.83, "learning_rate": 5.204913192394e-08, "logits/chosen": -2.224423408508301, "logits/rejected": -2.2290871143341064, "logps/chosen": -175.00750732421875, "logps/rejected": -387.51458740234375, "loss": 0.0701, "rewards/accuracies": 1.0, "rewards/chosen": -0.359252393245697, "rewards/margins": 4.04508113861084, "rewards/rejected": -4.404333114624023, "step": 7127 }, { "epoch": 0.83, "learning_rate": 5.2013700248021724e-08, "logits/chosen": -2.1778805255889893, "logits/rejected": -2.509572982788086, "logps/chosen": -400.7255859375, "logps/rejected": -368.87921142578125, "loss": 0.2304, "rewards/accuracies": 0.875, "rewards/chosen": -1.4659647941589355, "rewards/margins": 3.3027243614196777, "rewards/rejected": -4.768689155578613, "step": 7128 }, { "epoch": 0.83, "learning_rate": 5.197826857210346e-08, "logits/chosen": -2.360635995864868, "logits/rejected": -2.468200206756592, "logps/chosen": -503.52496337890625, "logps/rejected": -258.1363220214844, "loss": 0.1711, "rewards/accuracies": 0.875, "rewards/chosen": -0.20617343485355377, "rewards/margins": 3.0621237754821777, "rewards/rejected": -3.2682974338531494, "step": 7129 }, { "epoch": 0.83, "learning_rate": 5.194283689618519e-08, "logits/chosen": -2.414518117904663, "logits/rejected": -2.243349313735962, "logps/chosen": -77.71697235107422, "logps/rejected": -214.50732421875, "loss": 0.4391, "rewards/accuracies": 0.75, "rewards/chosen": -1.4718064069747925, "rewards/margins": 1.063778281211853, "rewards/rejected": -2.5355846881866455, "step": 7130 }, { "epoch": 0.83, "learning_rate": 5.190740522026692e-08, "logits/chosen": -1.850909948348999, "logits/rejected": -1.955790400505066, "logps/chosen": -218.07704162597656, "logps/rejected": -235.55929565429688, "loss": 0.7141, "rewards/accuracies": 0.625, "rewards/chosen": -1.369465947151184, "rewards/margins": 1.011781096458435, "rewards/rejected": -2.381247043609619, "step": 7131 }, { "epoch": 0.83, "learning_rate": 5.1871973544348646e-08, "logits/chosen": -1.9200178384780884, "logits/rejected": -2.216301918029785, "logps/chosen": -552.276611328125, "logps/rejected": -313.0879211425781, "loss": 0.3146, "rewards/accuracies": 0.75, "rewards/chosen": -0.5562664866447449, "rewards/margins": 2.2277512550354004, "rewards/rejected": -2.784017562866211, "step": 7132 }, { "epoch": 0.83, "learning_rate": 5.1836541868430375e-08, "logits/chosen": -2.3273820877075195, "logits/rejected": -2.1277942657470703, "logps/chosen": -351.7817077636719, "logps/rejected": -436.43023681640625, "loss": 0.2971, "rewards/accuracies": 0.75, "rewards/chosen": -1.2669453620910645, "rewards/margins": 3.033970832824707, "rewards/rejected": -4.3009161949157715, "step": 7133 }, { "epoch": 0.83, "learning_rate": 5.1801110192512104e-08, "logits/chosen": -2.4098262786865234, "logits/rejected": -2.4733762741088867, "logps/chosen": -407.6121826171875, "logps/rejected": -322.1366271972656, "loss": 0.2559, "rewards/accuracies": 0.875, "rewards/chosen": -0.42694389820098877, "rewards/margins": 2.8572802543640137, "rewards/rejected": -3.284224033355713, "step": 7134 }, { "epoch": 0.83, "learning_rate": 5.176567851659384e-08, "logits/chosen": -2.110743522644043, "logits/rejected": -2.147102117538452, "logps/chosen": -215.52012634277344, "logps/rejected": -250.29898071289062, "loss": 0.0714, "rewards/accuracies": 1.0, "rewards/chosen": -0.06611805409193039, "rewards/margins": 3.8928465843200684, "rewards/rejected": -3.9589648246765137, "step": 7135 }, { "epoch": 0.83, "learning_rate": 5.173024684067556e-08, "logits/chosen": -2.006789207458496, "logits/rejected": -2.4439101219177246, "logps/chosen": -232.86648559570312, "logps/rejected": -220.13894653320312, "loss": 0.3379, "rewards/accuracies": 0.75, "rewards/chosen": -1.1531846523284912, "rewards/margins": 2.6109726428985596, "rewards/rejected": -3.764157295227051, "step": 7136 }, { "epoch": 0.83, "learning_rate": 5.169481516475729e-08, "logits/chosen": -2.392191171646118, "logits/rejected": -2.6274940967559814, "logps/chosen": -201.02752685546875, "logps/rejected": -309.7099304199219, "loss": 0.5647, "rewards/accuracies": 0.75, "rewards/chosen": -1.1566517353057861, "rewards/margins": 1.7430888414382935, "rewards/rejected": -2.899740695953369, "step": 7137 }, { "epoch": 0.83, "learning_rate": 5.1659383488839025e-08, "logits/chosen": -2.993943691253662, "logits/rejected": -2.783445358276367, "logps/chosen": -241.85784912109375, "logps/rejected": -256.97027587890625, "loss": 0.2004, "rewards/accuracies": 1.0, "rewards/chosen": -0.4644942879676819, "rewards/margins": 2.2968029975891113, "rewards/rejected": -2.7612972259521484, "step": 7138 }, { "epoch": 0.83, "learning_rate": 5.162395181292075e-08, "logits/chosen": -2.372363567352295, "logits/rejected": -2.3671679496765137, "logps/chosen": -206.7469482421875, "logps/rejected": -243.5825958251953, "loss": 0.2725, "rewards/accuracies": 0.75, "rewards/chosen": -0.8097165822982788, "rewards/margins": 2.364013433456421, "rewards/rejected": -3.1737303733825684, "step": 7139 }, { "epoch": 0.83, "learning_rate": 5.1588520137002476e-08, "logits/chosen": -2.9474503993988037, "logits/rejected": -2.6426734924316406, "logps/chosen": -221.73826599121094, "logps/rejected": -168.39122009277344, "loss": 0.9695, "rewards/accuracies": 0.625, "rewards/chosen": -1.4745543003082275, "rewards/margins": 0.2975813150405884, "rewards/rejected": -1.772135615348816, "step": 7140 }, { "epoch": 0.83, "learning_rate": 5.155308846108421e-08, "logits/chosen": -2.1463799476623535, "logits/rejected": -2.132744789123535, "logps/chosen": -340.314453125, "logps/rejected": -400.5849609375, "loss": 0.2743, "rewards/accuracies": 0.875, "rewards/chosen": -0.9550794363021851, "rewards/margins": 2.17525053024292, "rewards/rejected": -3.1303300857543945, "step": 7141 }, { "epoch": 0.83, "learning_rate": 5.1517656785165933e-08, "logits/chosen": -2.672795534133911, "logits/rejected": -2.8036561012268066, "logps/chosen": -374.78564453125, "logps/rejected": -391.685546875, "loss": 0.2941, "rewards/accuracies": 0.875, "rewards/chosen": -0.9270328879356384, "rewards/margins": 2.5095014572143555, "rewards/rejected": -3.4365344047546387, "step": 7142 }, { "epoch": 0.83, "learning_rate": 5.148222510924766e-08, "logits/chosen": -2.15981125831604, "logits/rejected": -2.021361827850342, "logps/chosen": -315.4383239746094, "logps/rejected": -268.4542236328125, "loss": 0.2956, "rewards/accuracies": 0.75, "rewards/chosen": -0.8392565846443176, "rewards/margins": 1.8211842775344849, "rewards/rejected": -2.6604409217834473, "step": 7143 }, { "epoch": 0.83, "learning_rate": 5.14467934333294e-08, "logits/chosen": -2.3296287059783936, "logits/rejected": -2.503373146057129, "logps/chosen": -287.1605224609375, "logps/rejected": -270.7717590332031, "loss": 0.4074, "rewards/accuracies": 0.75, "rewards/chosen": -0.949783205986023, "rewards/margins": 1.634054183959961, "rewards/rejected": -2.5838375091552734, "step": 7144 }, { "epoch": 0.83, "learning_rate": 5.1411361757411126e-08, "logits/chosen": -2.215186595916748, "logits/rejected": -2.501162052154541, "logps/chosen": -219.5772705078125, "logps/rejected": -223.25341796875, "loss": 0.3561, "rewards/accuracies": 0.875, "rewards/chosen": -0.29239922761917114, "rewards/margins": 2.107778549194336, "rewards/rejected": -2.400177478790283, "step": 7145 }, { "epoch": 0.83, "learning_rate": 5.137593008149285e-08, "logits/chosen": -2.605314254760742, "logits/rejected": -2.510509729385376, "logps/chosen": -76.70619201660156, "logps/rejected": -322.2023010253906, "loss": 0.1606, "rewards/accuracies": 1.0, "rewards/chosen": -0.7887746095657349, "rewards/margins": 3.3386518955230713, "rewards/rejected": -4.127426624298096, "step": 7146 }, { "epoch": 0.83, "learning_rate": 5.1340498405574584e-08, "logits/chosen": -2.6901488304138184, "logits/rejected": -2.615410327911377, "logps/chosen": -173.4413299560547, "logps/rejected": -675.906982421875, "loss": 0.1696, "rewards/accuracies": 0.875, "rewards/chosen": -0.13563568890094757, "rewards/margins": 6.192337989807129, "rewards/rejected": -6.327974319458008, "step": 7147 }, { "epoch": 0.83, "learning_rate": 5.130506672965631e-08, "logits/chosen": -2.385394334793091, "logits/rejected": -2.5995168685913086, "logps/chosen": -426.51837158203125, "logps/rejected": -264.0852355957031, "loss": 0.0917, "rewards/accuracies": 1.0, "rewards/chosen": -0.13870924711227417, "rewards/margins": 3.5070152282714844, "rewards/rejected": -3.6457247734069824, "step": 7148 }, { "epoch": 0.83, "learning_rate": 5.1269635053738035e-08, "logits/chosen": -2.1776702404022217, "logits/rejected": -2.2907612323760986, "logps/chosen": -319.604736328125, "logps/rejected": -248.90548706054688, "loss": 0.2767, "rewards/accuracies": 0.875, "rewards/chosen": -0.16602492332458496, "rewards/margins": 2.040645122528076, "rewards/rejected": -2.2066702842712402, "step": 7149 }, { "epoch": 0.83, "learning_rate": 5.123420337781977e-08, "logits/chosen": -2.3177404403686523, "logits/rejected": -2.776045560836792, "logps/chosen": -252.09771728515625, "logps/rejected": -245.4468536376953, "loss": 0.2456, "rewards/accuracies": 0.875, "rewards/chosen": -0.6787501573562622, "rewards/margins": 2.2814877033233643, "rewards/rejected": -2.960237979888916, "step": 7150 }, { "epoch": 0.83, "learning_rate": 5.11987717019015e-08, "logits/chosen": -2.3517916202545166, "logits/rejected": -2.2051470279693604, "logps/chosen": -141.220947265625, "logps/rejected": -222.5313262939453, "loss": 0.1184, "rewards/accuracies": 1.0, "rewards/chosen": -0.7227376699447632, "rewards/margins": 4.761120319366455, "rewards/rejected": -5.483858108520508, "step": 7151 }, { "epoch": 0.83, "learning_rate": 5.116334002598322e-08, "logits/chosen": -2.587747573852539, "logits/rejected": -2.5372321605682373, "logps/chosen": -444.88525390625, "logps/rejected": -241.22023010253906, "loss": 0.4512, "rewards/accuracies": 0.625, "rewards/chosen": -1.5273010730743408, "rewards/margins": 2.3331494331359863, "rewards/rejected": -3.8604507446289062, "step": 7152 }, { "epoch": 0.83, "learning_rate": 5.1127908350064956e-08, "logits/chosen": -2.506044387817383, "logits/rejected": -2.6744484901428223, "logps/chosen": -229.06698608398438, "logps/rejected": -197.02169799804688, "loss": 0.9063, "rewards/accuracies": 0.75, "rewards/chosen": -1.4961929321289062, "rewards/margins": 0.6000464558601379, "rewards/rejected": -2.0962395668029785, "step": 7153 }, { "epoch": 0.83, "learning_rate": 5.1092476674146685e-08, "logits/chosen": -1.8982584476470947, "logits/rejected": -1.9668335914611816, "logps/chosen": -434.50445556640625, "logps/rejected": -317.6068115234375, "loss": 0.3489, "rewards/accuracies": 0.75, "rewards/chosen": -0.8629757165908813, "rewards/margins": 2.1247472763061523, "rewards/rejected": -2.9877231121063232, "step": 7154 }, { "epoch": 0.83, "learning_rate": 5.105704499822841e-08, "logits/chosen": -2.5622966289520264, "logits/rejected": -2.8973584175109863, "logps/chosen": -189.73658752441406, "logps/rejected": -252.08628845214844, "loss": 0.3926, "rewards/accuracies": 0.875, "rewards/chosen": -0.6282339096069336, "rewards/margins": 3.343384265899658, "rewards/rejected": -3.9716179370880127, "step": 7155 }, { "epoch": 0.83, "learning_rate": 5.102161332231014e-08, "logits/chosen": -2.7972865104675293, "logits/rejected": -2.7413339614868164, "logps/chosen": -171.08070373535156, "logps/rejected": -195.12828063964844, "loss": 0.3023, "rewards/accuracies": 0.75, "rewards/chosen": -0.7089846134185791, "rewards/margins": 2.895352840423584, "rewards/rejected": -3.604337692260742, "step": 7156 }, { "epoch": 0.83, "learning_rate": 5.098618164639187e-08, "logits/chosen": -2.4722471237182617, "logits/rejected": -2.3154401779174805, "logps/chosen": -241.64678955078125, "logps/rejected": -382.11260986328125, "loss": 0.5492, "rewards/accuracies": 0.75, "rewards/chosen": -1.2628529071807861, "rewards/margins": 1.4469188451766968, "rewards/rejected": -2.7097716331481934, "step": 7157 }, { "epoch": 0.83, "learning_rate": 5.09507499704736e-08, "logits/chosen": -2.951364278793335, "logits/rejected": -2.9887197017669678, "logps/chosen": -243.45172119140625, "logps/rejected": -261.17108154296875, "loss": 0.3008, "rewards/accuracies": 0.875, "rewards/chosen": -0.7828085422515869, "rewards/margins": 2.0841400623321533, "rewards/rejected": -2.8669486045837402, "step": 7158 }, { "epoch": 0.83, "learning_rate": 5.091531829455533e-08, "logits/chosen": -1.874549150466919, "logits/rejected": -1.7882063388824463, "logps/chosen": -382.13726806640625, "logps/rejected": -405.1980285644531, "loss": 0.2678, "rewards/accuracies": 0.875, "rewards/chosen": -0.08454647660255432, "rewards/margins": 2.0672011375427246, "rewards/rejected": -2.151747703552246, "step": 7159 }, { "epoch": 0.83, "learning_rate": 5.087988661863706e-08, "logits/chosen": -2.4699203968048096, "logits/rejected": -2.263108730316162, "logps/chosen": -312.77398681640625, "logps/rejected": -350.5931396484375, "loss": 0.174, "rewards/accuracies": 0.875, "rewards/chosen": -0.6216564178466797, "rewards/margins": 2.995387554168701, "rewards/rejected": -3.617043972015381, "step": 7160 }, { "epoch": 0.83, "learning_rate": 5.084445494271879e-08, "logits/chosen": -2.2951393127441406, "logits/rejected": -2.4687626361846924, "logps/chosen": -268.2995910644531, "logps/rejected": -252.38473510742188, "loss": 0.3676, "rewards/accuracies": 0.875, "rewards/chosen": -0.840825080871582, "rewards/margins": 2.29970121383667, "rewards/rejected": -3.140526533126831, "step": 7161 }, { "epoch": 0.83, "learning_rate": 5.080902326680052e-08, "logits/chosen": -2.5599539279937744, "logits/rejected": -2.4940667152404785, "logps/chosen": -261.1133117675781, "logps/rejected": -296.9208068847656, "loss": 0.311, "rewards/accuracies": 0.875, "rewards/chosen": -0.9520969986915588, "rewards/margins": 2.2137787342071533, "rewards/rejected": -3.1658756732940674, "step": 7162 }, { "epoch": 0.83, "learning_rate": 5.0773591590882244e-08, "logits/chosen": -2.516817331314087, "logits/rejected": -2.5644359588623047, "logps/chosen": -227.2720947265625, "logps/rejected": -203.6341094970703, "loss": 0.6328, "rewards/accuracies": 0.625, "rewards/chosen": -1.3923768997192383, "rewards/margins": 0.536953866481781, "rewards/rejected": -1.9293309450149536, "step": 7163 }, { "epoch": 0.83, "learning_rate": 5.073815991496398e-08, "logits/chosen": -2.5586860179901123, "logits/rejected": -2.5136899948120117, "logps/chosen": -284.9708251953125, "logps/rejected": -298.86895751953125, "loss": 0.1438, "rewards/accuracies": 1.0, "rewards/chosen": -1.693950891494751, "rewards/margins": 2.832962989807129, "rewards/rejected": -4.526914119720459, "step": 7164 }, { "epoch": 0.83, "learning_rate": 5.070272823904571e-08, "logits/chosen": -2.2489173412323, "logits/rejected": -2.240694999694824, "logps/chosen": -226.05239868164062, "logps/rejected": -212.68222045898438, "loss": 0.4273, "rewards/accuracies": 0.875, "rewards/chosen": -1.041130542755127, "rewards/margins": 1.3085906505584717, "rewards/rejected": -2.3497209548950195, "step": 7165 }, { "epoch": 0.83, "learning_rate": 5.066729656312743e-08, "logits/chosen": -2.773956298828125, "logits/rejected": -2.6638193130493164, "logps/chosen": -163.5788116455078, "logps/rejected": -178.07530212402344, "loss": 0.2226, "rewards/accuracies": 1.0, "rewards/chosen": -0.5373913645744324, "rewards/margins": 2.481682777404785, "rewards/rejected": -3.0190742015838623, "step": 7166 }, { "epoch": 0.83, "learning_rate": 5.0631864887209165e-08, "logits/chosen": -2.089329957962036, "logits/rejected": -2.106614828109741, "logps/chosen": -314.02593994140625, "logps/rejected": -308.55975341796875, "loss": 0.2756, "rewards/accuracies": 0.875, "rewards/chosen": -0.0466296523809433, "rewards/margins": 2.3091797828674316, "rewards/rejected": -2.355809450149536, "step": 7167 }, { "epoch": 0.83, "learning_rate": 5.0596433211290894e-08, "logits/chosen": -2.0777924060821533, "logits/rejected": -2.2606258392333984, "logps/chosen": -218.17251586914062, "logps/rejected": -155.02638244628906, "loss": 0.8018, "rewards/accuracies": 0.625, "rewards/chosen": -1.7421526908874512, "rewards/margins": 0.9888541102409363, "rewards/rejected": -2.731006622314453, "step": 7168 }, { "epoch": 0.83, "learning_rate": 5.0561001535372616e-08, "logits/chosen": -2.3042526245117188, "logits/rejected": -2.597909927368164, "logps/chosen": -423.0379638671875, "logps/rejected": -337.80157470703125, "loss": 0.4513, "rewards/accuracies": 0.75, "rewards/chosen": -0.5529161691665649, "rewards/margins": 1.489574909210205, "rewards/rejected": -2.0424911975860596, "step": 7169 }, { "epoch": 0.83, "learning_rate": 5.052556985945435e-08, "logits/chosen": -2.178675889968872, "logits/rejected": -2.3537189960479736, "logps/chosen": -336.49029541015625, "logps/rejected": -406.6915588378906, "loss": 0.7927, "rewards/accuracies": 0.5, "rewards/chosen": -0.7185924053192139, "rewards/margins": 0.5311200618743896, "rewards/rejected": -1.249712586402893, "step": 7170 }, { "epoch": 0.83, "learning_rate": 5.049013818353608e-08, "logits/chosen": -2.5046677589416504, "logits/rejected": -2.4086878299713135, "logps/chosen": -336.1894226074219, "logps/rejected": -276.52630615234375, "loss": 0.2861, "rewards/accuracies": 0.875, "rewards/chosen": 0.04625406488776207, "rewards/margins": 4.149484634399414, "rewards/rejected": -4.103230953216553, "step": 7171 }, { "epoch": 0.83, "learning_rate": 5.045470650761781e-08, "logits/chosen": -1.7698475122451782, "logits/rejected": -2.3421552181243896, "logps/chosen": -446.08209228515625, "logps/rejected": -261.667724609375, "loss": 0.5429, "rewards/accuracies": 0.75, "rewards/chosen": -0.8496297597885132, "rewards/margins": 1.5830228328704834, "rewards/rejected": -2.432652473449707, "step": 7172 }, { "epoch": 0.83, "learning_rate": 5.041927483169954e-08, "logits/chosen": -2.478815793991089, "logits/rejected": -2.5498456954956055, "logps/chosen": -245.66696166992188, "logps/rejected": -190.73684692382812, "loss": 0.1608, "rewards/accuracies": 0.875, "rewards/chosen": -0.6056062579154968, "rewards/margins": 2.8390073776245117, "rewards/rejected": -3.4446136951446533, "step": 7173 }, { "epoch": 0.83, "learning_rate": 5.0383843155781266e-08, "logits/chosen": -2.5619640350341797, "logits/rejected": -2.5106375217437744, "logps/chosen": -219.90704345703125, "logps/rejected": -196.73098754882812, "loss": 0.3631, "rewards/accuracies": 0.875, "rewards/chosen": -0.8623713254928589, "rewards/margins": 1.038101315498352, "rewards/rejected": -1.900472640991211, "step": 7174 }, { "epoch": 0.83, "learning_rate": 5.0348411479862995e-08, "logits/chosen": -2.482436418533325, "logits/rejected": -2.0035512447357178, "logps/chosen": -292.5877685546875, "logps/rejected": -322.6624755859375, "loss": 0.3634, "rewards/accuracies": 0.875, "rewards/chosen": -0.2757464051246643, "rewards/margins": 1.472292184829712, "rewards/rejected": -1.7480387687683105, "step": 7175 }, { "epoch": 0.83, "learning_rate": 5.031297980394473e-08, "logits/chosen": -1.9767649173736572, "logits/rejected": -2.434264659881592, "logps/chosen": -517.81689453125, "logps/rejected": -351.4700927734375, "loss": 0.2437, "rewards/accuracies": 1.0, "rewards/chosen": -0.31724196672439575, "rewards/margins": 2.4893441200256348, "rewards/rejected": -2.8065860271453857, "step": 7176 }, { "epoch": 0.83, "learning_rate": 5.027754812802645e-08, "logits/chosen": -2.3275210857391357, "logits/rejected": -2.5004940032958984, "logps/chosen": -152.16619873046875, "logps/rejected": -206.15072631835938, "loss": 0.3789, "rewards/accuracies": 0.875, "rewards/chosen": -0.9410203099250793, "rewards/margins": 3.1446657180786133, "rewards/rejected": -4.085686206817627, "step": 7177 }, { "epoch": 0.84, "learning_rate": 5.024211645210818e-08, "logits/chosen": -2.4360506534576416, "logits/rejected": -2.1562728881835938, "logps/chosen": -303.3389587402344, "logps/rejected": -338.8268737792969, "loss": 0.9539, "rewards/accuracies": 0.5, "rewards/chosen": -1.5906689167022705, "rewards/margins": 0.5633884072303772, "rewards/rejected": -2.154057502746582, "step": 7178 }, { "epoch": 0.84, "learning_rate": 5.020668477618992e-08, "logits/chosen": -2.391761302947998, "logits/rejected": -2.5671191215515137, "logps/chosen": -218.13296508789062, "logps/rejected": -162.55575561523438, "loss": 0.3875, "rewards/accuracies": 0.75, "rewards/chosen": -0.8603320717811584, "rewards/margins": 1.600976586341858, "rewards/rejected": -2.461308717727661, "step": 7179 }, { "epoch": 0.84, "learning_rate": 5.017125310027164e-08, "logits/chosen": -2.1369900703430176, "logits/rejected": -2.0169990062713623, "logps/chosen": -139.8245849609375, "logps/rejected": -195.13633728027344, "loss": 0.5665, "rewards/accuracies": 0.75, "rewards/chosen": -0.9784184694290161, "rewards/margins": 0.9666426777839661, "rewards/rejected": -1.945061206817627, "step": 7180 }, { "epoch": 0.84, "learning_rate": 5.013582142435337e-08, "logits/chosen": -2.1509475708007812, "logits/rejected": -2.068617582321167, "logps/chosen": -305.7303161621094, "logps/rejected": -352.71502685546875, "loss": 0.205, "rewards/accuracies": 0.875, "rewards/chosen": -0.32909831404685974, "rewards/margins": 3.2757890224456787, "rewards/rejected": -3.6048872470855713, "step": 7181 }, { "epoch": 0.84, "learning_rate": 5.01003897484351e-08, "logits/chosen": -2.522418260574341, "logits/rejected": -2.5333523750305176, "logps/chosen": -234.79428100585938, "logps/rejected": -253.84617614746094, "loss": 0.5002, "rewards/accuracies": 0.75, "rewards/chosen": -0.44380030035972595, "rewards/margins": 2.1627559661865234, "rewards/rejected": -2.6065564155578613, "step": 7182 }, { "epoch": 0.84, "learning_rate": 5.0064958072516825e-08, "logits/chosen": -2.705954074859619, "logits/rejected": -2.9651198387145996, "logps/chosen": -177.33502197265625, "logps/rejected": -161.70809936523438, "loss": 0.2408, "rewards/accuracies": 0.875, "rewards/chosen": 0.1856221854686737, "rewards/margins": 2.831040382385254, "rewards/rejected": -2.645418405532837, "step": 7183 }, { "epoch": 0.84, "learning_rate": 5.0029526396598554e-08, "logits/chosen": -2.6874961853027344, "logits/rejected": -2.7397801876068115, "logps/chosen": -346.8856201171875, "logps/rejected": -291.56292724609375, "loss": 0.2783, "rewards/accuracies": 1.0, "rewards/chosen": -0.9492874145507812, "rewards/margins": 2.2561867237091064, "rewards/rejected": -3.2054741382598877, "step": 7184 }, { "epoch": 0.84, "learning_rate": 4.999409472068029e-08, "logits/chosen": -2.4974446296691895, "logits/rejected": -2.765751600265503, "logps/chosen": -331.3799133300781, "logps/rejected": -276.6502990722656, "loss": 0.2776, "rewards/accuracies": 0.875, "rewards/chosen": -1.2131173610687256, "rewards/margins": 3.040496349334717, "rewards/rejected": -4.253613471984863, "step": 7185 }, { "epoch": 0.84, "learning_rate": 4.995866304476201e-08, "logits/chosen": -2.320000171661377, "logits/rejected": -2.18325138092041, "logps/chosen": -108.53807830810547, "logps/rejected": -187.33087158203125, "loss": 0.5806, "rewards/accuracies": 0.75, "rewards/chosen": -0.8670979142189026, "rewards/margins": 1.4657410383224487, "rewards/rejected": -2.332839012145996, "step": 7186 }, { "epoch": 0.84, "learning_rate": 4.992323136884374e-08, "logits/chosen": -2.62608003616333, "logits/rejected": -2.6441810131073, "logps/chosen": -307.62060546875, "logps/rejected": -298.3954772949219, "loss": 0.6692, "rewards/accuracies": 0.875, "rewards/chosen": -1.3183602094650269, "rewards/margins": 1.5325745344161987, "rewards/rejected": -2.8509347438812256, "step": 7187 }, { "epoch": 0.84, "learning_rate": 4.9887799692925475e-08, "logits/chosen": -2.8479621410369873, "logits/rejected": -2.7824292182922363, "logps/chosen": -280.92852783203125, "logps/rejected": -285.9387512207031, "loss": 0.1593, "rewards/accuracies": 0.875, "rewards/chosen": -1.1127455234527588, "rewards/margins": 4.150631904602051, "rewards/rejected": -5.263377666473389, "step": 7188 }, { "epoch": 0.84, "learning_rate": 4.9852368017007204e-08, "logits/chosen": -2.352708101272583, "logits/rejected": -2.2586896419525146, "logps/chosen": -107.604248046875, "logps/rejected": -279.6939697265625, "loss": 0.3735, "rewards/accuracies": 0.75, "rewards/chosen": -0.7553883790969849, "rewards/margins": 2.1009490489959717, "rewards/rejected": -2.856337308883667, "step": 7189 }, { "epoch": 0.84, "learning_rate": 4.9816936341088926e-08, "logits/chosen": -2.1253318786621094, "logits/rejected": -2.1744556427001953, "logps/chosen": -430.69720458984375, "logps/rejected": -511.3611145019531, "loss": 0.6715, "rewards/accuracies": 0.5, "rewards/chosen": -1.2242827415466309, "rewards/margins": 3.07436466217041, "rewards/rejected": -4.298647403717041, "step": 7190 }, { "epoch": 0.84, "learning_rate": 4.978150466517066e-08, "logits/chosen": -2.7410764694213867, "logits/rejected": -3.0196354389190674, "logps/chosen": -326.5492858886719, "logps/rejected": -219.04574584960938, "loss": 0.6187, "rewards/accuracies": 0.625, "rewards/chosen": -1.7710084915161133, "rewards/margins": 0.9648841619491577, "rewards/rejected": -2.7358927726745605, "step": 7191 }, { "epoch": 0.84, "learning_rate": 4.974607298925239e-08, "logits/chosen": -2.66976261138916, "logits/rejected": -2.663522720336914, "logps/chosen": -251.55154418945312, "logps/rejected": -265.1261291503906, "loss": 0.8267, "rewards/accuracies": 0.875, "rewards/chosen": -1.3706448078155518, "rewards/margins": 1.996922254562378, "rewards/rejected": -3.3675670623779297, "step": 7192 }, { "epoch": 0.84, "learning_rate": 4.971064131333411e-08, "logits/chosen": -2.4355952739715576, "logits/rejected": -2.437936544418335, "logps/chosen": -301.8266296386719, "logps/rejected": -327.0881652832031, "loss": 0.1187, "rewards/accuracies": 1.0, "rewards/chosen": -0.7147289514541626, "rewards/margins": 3.5094902515411377, "rewards/rejected": -4.224218845367432, "step": 7193 }, { "epoch": 0.84, "learning_rate": 4.967520963741585e-08, "logits/chosen": -2.662003993988037, "logits/rejected": -2.5239040851593018, "logps/chosen": -364.93341064453125, "logps/rejected": -322.0084228515625, "loss": 0.3727, "rewards/accuracies": 0.875, "rewards/chosen": -0.7634075880050659, "rewards/margins": 1.1943705081939697, "rewards/rejected": -1.957777976989746, "step": 7194 }, { "epoch": 0.84, "learning_rate": 4.9639777961497577e-08, "logits/chosen": -2.3027572631835938, "logits/rejected": -2.4071929454803467, "logps/chosen": -292.1983642578125, "logps/rejected": -237.1532745361328, "loss": 0.4694, "rewards/accuracies": 0.875, "rewards/chosen": -2.157797336578369, "rewards/margins": 3.4086198806762695, "rewards/rejected": -5.566417694091797, "step": 7195 }, { "epoch": 0.84, "learning_rate": 4.96043462855793e-08, "logits/chosen": -1.7962408065795898, "logits/rejected": -1.6954057216644287, "logps/chosen": -462.31573486328125, "logps/rejected": -485.83935546875, "loss": 0.6266, "rewards/accuracies": 0.625, "rewards/chosen": -1.277230978012085, "rewards/margins": 0.83509361743927, "rewards/rejected": -2.1123244762420654, "step": 7196 }, { "epoch": 0.84, "learning_rate": 4.9568914609661034e-08, "logits/chosen": -2.9365549087524414, "logits/rejected": -2.8682456016540527, "logps/chosen": -259.52410888671875, "logps/rejected": -356.9435119628906, "loss": 0.2886, "rewards/accuracies": 0.875, "rewards/chosen": -0.5328123569488525, "rewards/margins": 4.5790486335754395, "rewards/rejected": -5.111860752105713, "step": 7197 }, { "epoch": 0.84, "learning_rate": 4.953348293374276e-08, "logits/chosen": -2.2652716636657715, "logits/rejected": -1.9189568758010864, "logps/chosen": -226.55422973632812, "logps/rejected": -301.32574462890625, "loss": 0.3436, "rewards/accuracies": 0.75, "rewards/chosen": -0.9521443843841553, "rewards/margins": 2.1342403888702393, "rewards/rejected": -3.0863845348358154, "step": 7198 }, { "epoch": 0.84, "learning_rate": 4.94980512578245e-08, "logits/chosen": -2.505087375640869, "logits/rejected": -2.429898262023926, "logps/chosen": -279.6246643066406, "logps/rejected": -380.42462158203125, "loss": 0.3023, "rewards/accuracies": 0.875, "rewards/chosen": -0.5368125438690186, "rewards/margins": 2.378314256668091, "rewards/rejected": -2.9151268005371094, "step": 7199 }, { "epoch": 0.84, "learning_rate": 4.946261958190622e-08, "logits/chosen": -2.4863579273223877, "logits/rejected": -2.670398235321045, "logps/chosen": -239.47076416015625, "logps/rejected": -258.3829345703125, "loss": 0.256, "rewards/accuracies": 0.875, "rewards/chosen": -1.4537231922149658, "rewards/margins": 2.9302752017974854, "rewards/rejected": -4.383998394012451, "step": 7200 }, { "epoch": 0.84, "learning_rate": 4.942718790598795e-08, "logits/chosen": -2.500920295715332, "logits/rejected": -2.4729580879211426, "logps/chosen": -285.5711364746094, "logps/rejected": -343.26190185546875, "loss": 0.2251, "rewards/accuracies": 1.0, "rewards/chosen": -0.9123934507369995, "rewards/margins": 2.7548279762268066, "rewards/rejected": -3.6672215461730957, "step": 7201 }, { "epoch": 0.84, "learning_rate": 4.9391756230069684e-08, "logits/chosen": -2.4613351821899414, "logits/rejected": -2.063239336013794, "logps/chosen": -284.67999267578125, "logps/rejected": -297.5213317871094, "loss": 0.4565, "rewards/accuracies": 0.75, "rewards/chosen": -0.5271620750427246, "rewards/margins": 1.4998664855957031, "rewards/rejected": -2.0270285606384277, "step": 7202 }, { "epoch": 0.84, "learning_rate": 4.935632455415141e-08, "logits/chosen": -2.5499303340911865, "logits/rejected": -2.449796676635742, "logps/chosen": -461.87567138671875, "logps/rejected": -303.14434814453125, "loss": 0.5376, "rewards/accuracies": 0.75, "rewards/chosen": -0.8071197271347046, "rewards/margins": 2.634397506713867, "rewards/rejected": -3.4415173530578613, "step": 7203 }, { "epoch": 0.84, "learning_rate": 4.9320892878233135e-08, "logits/chosen": -2.1091604232788086, "logits/rejected": -2.302722454071045, "logps/chosen": -340.1296081542969, "logps/rejected": -294.37481689453125, "loss": 0.3637, "rewards/accuracies": 0.75, "rewards/chosen": -1.0292718410491943, "rewards/margins": 1.7118206024169922, "rewards/rejected": -2.7410924434661865, "step": 7204 }, { "epoch": 0.84, "learning_rate": 4.928546120231487e-08, "logits/chosen": -2.2427682876586914, "logits/rejected": -2.110755443572998, "logps/chosen": -351.5255126953125, "logps/rejected": -243.82398986816406, "loss": 3.0156, "rewards/accuracies": 0.625, "rewards/chosen": -4.638721466064453, "rewards/margins": -1.6563842296600342, "rewards/rejected": -2.982337474822998, "step": 7205 }, { "epoch": 0.84, "learning_rate": 4.92500295263966e-08, "logits/chosen": -2.331860303878784, "logits/rejected": -2.322254180908203, "logps/chosen": -221.8563232421875, "logps/rejected": -266.18328857421875, "loss": 0.2676, "rewards/accuracies": 0.875, "rewards/chosen": -0.8649665117263794, "rewards/margins": 2.330371379852295, "rewards/rejected": -3.1953377723693848, "step": 7206 }, { "epoch": 0.84, "learning_rate": 4.921459785047832e-08, "logits/chosen": -2.059800386428833, "logits/rejected": -1.9741162061691284, "logps/chosen": -313.8770751953125, "logps/rejected": -258.14764404296875, "loss": 0.1277, "rewards/accuracies": 1.0, "rewards/chosen": -0.04994484782218933, "rewards/margins": 3.5578956604003906, "rewards/rejected": -3.6078405380249023, "step": 7207 }, { "epoch": 0.84, "learning_rate": 4.917916617456006e-08, "logits/chosen": -3.001967430114746, "logits/rejected": -3.098764657974243, "logps/chosen": -283.86676025390625, "logps/rejected": -337.62371826171875, "loss": 0.6908, "rewards/accuracies": 0.75, "rewards/chosen": -0.7633185386657715, "rewards/margins": 1.0960038900375366, "rewards/rejected": -1.8593225479125977, "step": 7208 }, { "epoch": 0.84, "learning_rate": 4.9143734498641786e-08, "logits/chosen": -2.4962399005889893, "logits/rejected": -2.485201120376587, "logps/chosen": -194.19058227539062, "logps/rejected": -222.79832458496094, "loss": 0.6691, "rewards/accuracies": 0.75, "rewards/chosen": -1.4197556972503662, "rewards/margins": 1.5179026126861572, "rewards/rejected": -2.9376585483551025, "step": 7209 }, { "epoch": 0.84, "learning_rate": 4.910830282272351e-08, "logits/chosen": -2.0379574298858643, "logits/rejected": -2.0484559535980225, "logps/chosen": -446.5401916503906, "logps/rejected": -300.671875, "loss": 0.7822, "rewards/accuracies": 0.625, "rewards/chosen": -1.7434964179992676, "rewards/margins": 0.24152596294879913, "rewards/rejected": -1.9850224256515503, "step": 7210 }, { "epoch": 0.84, "learning_rate": 4.907287114680524e-08, "logits/chosen": -2.2410483360290527, "logits/rejected": -2.274388313293457, "logps/chosen": -277.4230651855469, "logps/rejected": -253.0372314453125, "loss": 0.4746, "rewards/accuracies": 0.875, "rewards/chosen": -1.1444891691207886, "rewards/margins": 1.9088926315307617, "rewards/rejected": -3.05338191986084, "step": 7211 }, { "epoch": 0.84, "learning_rate": 4.903743947088697e-08, "logits/chosen": -2.04531192779541, "logits/rejected": -1.8743832111358643, "logps/chosen": -382.3240661621094, "logps/rejected": -522.414794921875, "loss": 0.1182, "rewards/accuracies": 1.0, "rewards/chosen": -0.5880683064460754, "rewards/margins": 4.421146392822266, "rewards/rejected": -5.009214401245117, "step": 7212 }, { "epoch": 0.84, "learning_rate": 4.9002007794968694e-08, "logits/chosen": -1.9024722576141357, "logits/rejected": -2.066669225692749, "logps/chosen": -169.7716827392578, "logps/rejected": -236.12615966796875, "loss": 0.3364, "rewards/accuracies": 0.875, "rewards/chosen": -0.9084323644638062, "rewards/margins": 2.4806923866271973, "rewards/rejected": -3.3891243934631348, "step": 7213 }, { "epoch": 0.84, "learning_rate": 4.896657611905043e-08, "logits/chosen": -3.0541741847991943, "logits/rejected": -2.986274480819702, "logps/chosen": -191.76718139648438, "logps/rejected": -215.1719207763672, "loss": 0.1408, "rewards/accuracies": 0.875, "rewards/chosen": -1.0319187641143799, "rewards/margins": 4.092617034912109, "rewards/rejected": -5.12453556060791, "step": 7214 }, { "epoch": 0.84, "learning_rate": 4.893114444313216e-08, "logits/chosen": -2.8095755577087402, "logits/rejected": -2.8311877250671387, "logps/chosen": -230.1826171875, "logps/rejected": -237.23745727539062, "loss": 0.353, "rewards/accuracies": 0.75, "rewards/chosen": -1.7809031009674072, "rewards/margins": 3.18636155128479, "rewards/rejected": -4.967264652252197, "step": 7215 }, { "epoch": 0.84, "learning_rate": 4.889571276721389e-08, "logits/chosen": -1.9311702251434326, "logits/rejected": -2.0014376640319824, "logps/chosen": -373.3750305175781, "logps/rejected": -339.63836669921875, "loss": 0.2122, "rewards/accuracies": 0.875, "rewards/chosen": 0.2309131622314453, "rewards/margins": 2.792041301727295, "rewards/rejected": -2.5611281394958496, "step": 7216 }, { "epoch": 0.84, "learning_rate": 4.8860281091295616e-08, "logits/chosen": -2.1293282508850098, "logits/rejected": -2.2090914249420166, "logps/chosen": -217.07582092285156, "logps/rejected": -181.185302734375, "loss": 0.9253, "rewards/accuracies": 0.625, "rewards/chosen": -1.1613630056381226, "rewards/margins": 0.9650020599365234, "rewards/rejected": -2.1263649463653564, "step": 7217 }, { "epoch": 0.84, "learning_rate": 4.8824849415377344e-08, "logits/chosen": -2.0128746032714844, "logits/rejected": -1.991823434829712, "logps/chosen": -162.3137969970703, "logps/rejected": -299.34466552734375, "loss": 0.2376, "rewards/accuracies": 0.875, "rewards/chosen": -1.1872367858886719, "rewards/margins": 4.107301235198975, "rewards/rejected": -5.294537544250488, "step": 7218 }, { "epoch": 0.84, "learning_rate": 4.878941773945907e-08, "logits/chosen": -2.3487906455993652, "logits/rejected": -2.1722235679626465, "logps/chosen": -356.243896484375, "logps/rejected": -291.303955078125, "loss": 0.2431, "rewards/accuracies": 1.0, "rewards/chosen": -0.2665559649467468, "rewards/margins": 2.201751947402954, "rewards/rejected": -2.4683079719543457, "step": 7219 }, { "epoch": 0.84, "learning_rate": 4.875398606354081e-08, "logits/chosen": -2.354848861694336, "logits/rejected": -2.545307159423828, "logps/chosen": -429.99188232421875, "logps/rejected": -350.27886962890625, "loss": 0.6112, "rewards/accuracies": 0.875, "rewards/chosen": -1.2555952072143555, "rewards/margins": 3.990487575531006, "rewards/rejected": -5.246082782745361, "step": 7220 }, { "epoch": 0.84, "learning_rate": 4.871855438762253e-08, "logits/chosen": -2.140761375427246, "logits/rejected": -2.1159398555755615, "logps/chosen": -422.00128173828125, "logps/rejected": -318.9566650390625, "loss": 0.294, "rewards/accuracies": 0.875, "rewards/chosen": -0.41823112964630127, "rewards/margins": 2.5083274841308594, "rewards/rejected": -2.926558494567871, "step": 7221 }, { "epoch": 0.84, "learning_rate": 4.868312271170426e-08, "logits/chosen": -2.4433751106262207, "logits/rejected": -2.4419424533843994, "logps/chosen": -245.48712158203125, "logps/rejected": -257.4993896484375, "loss": 0.3492, "rewards/accuracies": 0.75, "rewards/chosen": -1.036436676979065, "rewards/margins": 2.474032163619995, "rewards/rejected": -3.5104691982269287, "step": 7222 }, { "epoch": 0.84, "learning_rate": 4.8647691035785995e-08, "logits/chosen": -2.068518877029419, "logits/rejected": -2.2550837993621826, "logps/chosen": -293.7730712890625, "logps/rejected": -244.70651245117188, "loss": 0.2267, "rewards/accuracies": 0.875, "rewards/chosen": -0.7532916069030762, "rewards/margins": 2.2419207096099854, "rewards/rejected": -2.9952125549316406, "step": 7223 }, { "epoch": 0.84, "learning_rate": 4.861225935986772e-08, "logits/chosen": -2.7407784461975098, "logits/rejected": -2.7072811126708984, "logps/chosen": -195.5794677734375, "logps/rejected": -210.6586151123047, "loss": 0.2018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5897418856620789, "rewards/margins": 2.6362717151641846, "rewards/rejected": -3.226013660430908, "step": 7224 }, { "epoch": 0.84, "learning_rate": 4.8576827683949445e-08, "logits/chosen": -2.3176980018615723, "logits/rejected": -2.624950408935547, "logps/chosen": -218.81878662109375, "logps/rejected": -206.48016357421875, "loss": 0.7506, "rewards/accuracies": 0.5, "rewards/chosen": -0.8543405532836914, "rewards/margins": 0.5818531513214111, "rewards/rejected": -1.4361937046051025, "step": 7225 }, { "epoch": 0.84, "learning_rate": 4.854139600803118e-08, "logits/chosen": -2.2553768157958984, "logits/rejected": -2.471886157989502, "logps/chosen": -497.543212890625, "logps/rejected": -376.416748046875, "loss": 0.3897, "rewards/accuracies": 0.875, "rewards/chosen": -0.8622477054595947, "rewards/margins": 2.372678756713867, "rewards/rejected": -3.234926462173462, "step": 7226 }, { "epoch": 0.84, "learning_rate": 4.85059643321129e-08, "logits/chosen": -1.5439844131469727, "logits/rejected": -1.8892663717269897, "logps/chosen": -631.123291015625, "logps/rejected": -464.6608581542969, "loss": 0.2333, "rewards/accuracies": 1.0, "rewards/chosen": -0.33305123448371887, "rewards/margins": 2.1304831504821777, "rewards/rejected": -2.463534355163574, "step": 7227 }, { "epoch": 0.84, "learning_rate": 4.847053265619463e-08, "logits/chosen": -2.2936432361602783, "logits/rejected": -2.6070973873138428, "logps/chosen": -383.8769226074219, "logps/rejected": -327.04669189453125, "loss": 0.2255, "rewards/accuracies": 0.875, "rewards/chosen": -1.5188007354736328, "rewards/margins": 3.098691463470459, "rewards/rejected": -4.617491722106934, "step": 7228 }, { "epoch": 0.84, "learning_rate": 4.843510098027637e-08, "logits/chosen": -2.1351895332336426, "logits/rejected": -1.9213289022445679, "logps/chosen": -562.2785034179688, "logps/rejected": -410.59295654296875, "loss": 0.3004, "rewards/accuracies": 0.75, "rewards/chosen": -0.16033703088760376, "rewards/margins": 2.116529703140259, "rewards/rejected": -2.2768666744232178, "step": 7229 }, { "epoch": 0.84, "learning_rate": 4.8399669304358096e-08, "logits/chosen": -1.9529569149017334, "logits/rejected": -2.557175397872925, "logps/chosen": -573.6538696289062, "logps/rejected": -244.96539306640625, "loss": 0.5597, "rewards/accuracies": 0.75, "rewards/chosen": -0.6464874744415283, "rewards/margins": 1.2078299522399902, "rewards/rejected": -1.854317545890808, "step": 7230 }, { "epoch": 0.84, "learning_rate": 4.836423762843982e-08, "logits/chosen": -1.957033634185791, "logits/rejected": -2.4552161693573, "logps/chosen": -308.44842529296875, "logps/rejected": -223.10250854492188, "loss": 0.947, "rewards/accuracies": 0.75, "rewards/chosen": -1.5065529346466064, "rewards/margins": 1.11775803565979, "rewards/rejected": -2.6243109703063965, "step": 7231 }, { "epoch": 0.84, "learning_rate": 4.832880595252155e-08, "logits/chosen": -2.563549280166626, "logits/rejected": -2.4018239974975586, "logps/chosen": -169.57012939453125, "logps/rejected": -288.75555419921875, "loss": 0.2809, "rewards/accuracies": 1.0, "rewards/chosen": -0.5492761135101318, "rewards/margins": 1.7908350229263306, "rewards/rejected": -2.340111017227173, "step": 7232 }, { "epoch": 0.84, "learning_rate": 4.829337427660328e-08, "logits/chosen": -2.506622076034546, "logits/rejected": -2.203491687774658, "logps/chosen": -150.4077606201172, "logps/rejected": -209.76654052734375, "loss": 0.2685, "rewards/accuracies": 1.0, "rewards/chosen": -0.7883079648017883, "rewards/margins": 1.9063177108764648, "rewards/rejected": -2.6946258544921875, "step": 7233 }, { "epoch": 0.84, "learning_rate": 4.825794260068502e-08, "logits/chosen": -2.28299617767334, "logits/rejected": -2.447516441345215, "logps/chosen": -230.67129516601562, "logps/rejected": -130.64410400390625, "loss": 0.724, "rewards/accuracies": 0.75, "rewards/chosen": -1.035509467124939, "rewards/margins": 0.4918633699417114, "rewards/rejected": -1.5273728370666504, "step": 7234 }, { "epoch": 0.84, "learning_rate": 4.822251092476674e-08, "logits/chosen": -2.0147087574005127, "logits/rejected": -2.3300468921661377, "logps/chosen": -266.4061584472656, "logps/rejected": -153.98507690429688, "loss": 1.2363, "rewards/accuracies": 0.875, "rewards/chosen": -1.934485673904419, "rewards/margins": 0.2829618453979492, "rewards/rejected": -2.2174477577209473, "step": 7235 }, { "epoch": 0.84, "learning_rate": 4.818707924884847e-08, "logits/chosen": -2.201728343963623, "logits/rejected": -1.6382761001586914, "logps/chosen": -131.85482788085938, "logps/rejected": -402.7178039550781, "loss": 0.8423, "rewards/accuracies": 0.625, "rewards/chosen": -0.9245245456695557, "rewards/margins": 0.3916599750518799, "rewards/rejected": -1.316184639930725, "step": 7236 }, { "epoch": 0.84, "learning_rate": 4.8151647572930204e-08, "logits/chosen": -2.775913953781128, "logits/rejected": -2.571322441101074, "logps/chosen": -281.6272277832031, "logps/rejected": -358.2267150878906, "loss": 0.2819, "rewards/accuracies": 0.875, "rewards/chosen": -2.207271099090576, "rewards/margins": 2.307739019393921, "rewards/rejected": -4.515010356903076, "step": 7237 }, { "epoch": 0.84, "learning_rate": 4.8116215897011926e-08, "logits/chosen": -2.3984761238098145, "logits/rejected": -1.753377079963684, "logps/chosen": -185.28623962402344, "logps/rejected": -283.1778564453125, "loss": 0.278, "rewards/accuracies": 0.875, "rewards/chosen": -0.16058349609375, "rewards/margins": 2.5232138633728027, "rewards/rejected": -2.6837973594665527, "step": 7238 }, { "epoch": 0.84, "learning_rate": 4.8080784221093654e-08, "logits/chosen": -2.8229618072509766, "logits/rejected": -2.6311488151550293, "logps/chosen": -422.86395263671875, "logps/rejected": -289.2400207519531, "loss": 0.1496, "rewards/accuracies": 1.0, "rewards/chosen": 0.35182124376296997, "rewards/margins": 4.024928569793701, "rewards/rejected": -3.673107147216797, "step": 7239 }, { "epoch": 0.84, "learning_rate": 4.804535254517539e-08, "logits/chosen": -2.2566823959350586, "logits/rejected": -2.3186519145965576, "logps/chosen": -301.4239196777344, "logps/rejected": -294.5097351074219, "loss": 0.2774, "rewards/accuracies": 0.875, "rewards/chosen": -0.38046687841415405, "rewards/margins": 2.7626724243164062, "rewards/rejected": -3.143139362335205, "step": 7240 }, { "epoch": 0.84, "learning_rate": 4.800992086925711e-08, "logits/chosen": -2.4321773052215576, "logits/rejected": -2.408738136291504, "logps/chosen": -347.9837646484375, "logps/rejected": -256.0306701660156, "loss": 0.2982, "rewards/accuracies": 0.875, "rewards/chosen": -0.4054829478263855, "rewards/margins": 2.386599540710449, "rewards/rejected": -2.7920823097229004, "step": 7241 }, { "epoch": 0.84, "learning_rate": 4.797448919333884e-08, "logits/chosen": -2.211482524871826, "logits/rejected": -2.126861095428467, "logps/chosen": -453.6476745605469, "logps/rejected": -419.8216857910156, "loss": 0.2753, "rewards/accuracies": 0.875, "rewards/chosen": -0.5039078593254089, "rewards/margins": 2.8923816680908203, "rewards/rejected": -3.396289825439453, "step": 7242 }, { "epoch": 0.84, "learning_rate": 4.7939057517420576e-08, "logits/chosen": -2.2669050693511963, "logits/rejected": -1.838313341140747, "logps/chosen": -158.189453125, "logps/rejected": -298.2691650390625, "loss": 0.4724, "rewards/accuracies": 0.75, "rewards/chosen": -1.070899248123169, "rewards/margins": 1.594675064086914, "rewards/rejected": -2.665574312210083, "step": 7243 }, { "epoch": 0.84, "learning_rate": 4.7903625841502305e-08, "logits/chosen": -2.380558967590332, "logits/rejected": -2.4341065883636475, "logps/chosen": -338.31915283203125, "logps/rejected": -261.54193115234375, "loss": 0.1069, "rewards/accuracies": 1.0, "rewards/chosen": -0.18781372904777527, "rewards/margins": 3.0935065746307373, "rewards/rejected": -3.281320333480835, "step": 7244 }, { "epoch": 0.84, "learning_rate": 4.786819416558403e-08, "logits/chosen": -2.6138808727264404, "logits/rejected": -2.2409861087799072, "logps/chosen": -231.86610412597656, "logps/rejected": -330.18035888671875, "loss": 0.46, "rewards/accuracies": 0.875, "rewards/chosen": -0.20809203386306763, "rewards/margins": 1.9651036262512207, "rewards/rejected": -2.1731956005096436, "step": 7245 }, { "epoch": 0.84, "learning_rate": 4.783276248966576e-08, "logits/chosen": -2.254080295562744, "logits/rejected": -2.3006865978240967, "logps/chosen": -559.119873046875, "logps/rejected": -336.6722717285156, "loss": 0.3987, "rewards/accuracies": 0.875, "rewards/chosen": -1.5094993114471436, "rewards/margins": 1.923598051071167, "rewards/rejected": -3.4330973625183105, "step": 7246 }, { "epoch": 0.84, "learning_rate": 4.779733081374749e-08, "logits/chosen": -2.194765567779541, "logits/rejected": -2.332139730453491, "logps/chosen": -466.2408447265625, "logps/rejected": -268.4845886230469, "loss": 0.3918, "rewards/accuracies": 0.875, "rewards/chosen": -1.0451356172561646, "rewards/margins": 3.104945182800293, "rewards/rejected": -4.150080680847168, "step": 7247 }, { "epoch": 0.84, "learning_rate": 4.776189913782921e-08, "logits/chosen": -2.734750270843506, "logits/rejected": -2.54398775100708, "logps/chosen": -101.3779067993164, "logps/rejected": -181.5011749267578, "loss": 0.6118, "rewards/accuracies": 0.625, "rewards/chosen": -0.892003059387207, "rewards/margins": 1.018351435661316, "rewards/rejected": -1.910354495048523, "step": 7248 }, { "epoch": 0.84, "learning_rate": 4.772646746191095e-08, "logits/chosen": -1.613930344581604, "logits/rejected": -1.9587643146514893, "logps/chosen": -500.2410583496094, "logps/rejected": -404.1590576171875, "loss": 0.4243, "rewards/accuracies": 0.875, "rewards/chosen": -0.6046358942985535, "rewards/margins": 3.574763298034668, "rewards/rejected": -4.179399013519287, "step": 7249 }, { "epoch": 0.84, "learning_rate": 4.769103578599268e-08, "logits/chosen": -2.1649787425994873, "logits/rejected": -1.997015118598938, "logps/chosen": -146.96047973632812, "logps/rejected": -269.4560852050781, "loss": 0.2038, "rewards/accuracies": 0.875, "rewards/chosen": -0.33695948123931885, "rewards/margins": 3.4192728996276855, "rewards/rejected": -3.756232738494873, "step": 7250 }, { "epoch": 0.84, "learning_rate": 4.76556041100744e-08, "logits/chosen": -2.541842460632324, "logits/rejected": -2.564763069152832, "logps/chosen": -260.1632385253906, "logps/rejected": -415.3908996582031, "loss": 0.4388, "rewards/accuracies": 0.75, "rewards/chosen": -1.0138322114944458, "rewards/margins": 2.340074300765991, "rewards/rejected": -3.3539066314697266, "step": 7251 }, { "epoch": 0.84, "learning_rate": 4.7620172434156135e-08, "logits/chosen": -1.4108140468597412, "logits/rejected": -1.99052894115448, "logps/chosen": -466.72503662109375, "logps/rejected": -331.4609375, "loss": 1.3149, "rewards/accuracies": 0.625, "rewards/chosen": -2.2050328254699707, "rewards/margins": 0.39518100023269653, "rewards/rejected": -2.6002140045166016, "step": 7252 }, { "epoch": 0.84, "learning_rate": 4.7584740758237863e-08, "logits/chosen": -2.3872666358947754, "logits/rejected": -2.4127869606018066, "logps/chosen": -369.9120788574219, "logps/rejected": -313.3192138671875, "loss": 0.3907, "rewards/accuracies": 0.75, "rewards/chosen": -0.7399356961250305, "rewards/margins": 1.9257956743240356, "rewards/rejected": -2.665731191635132, "step": 7253 }, { "epoch": 0.84, "learning_rate": 4.7549309082319586e-08, "logits/chosen": -2.452110528945923, "logits/rejected": -2.2820703983306885, "logps/chosen": -234.12608337402344, "logps/rejected": -300.25909423828125, "loss": 0.4981, "rewards/accuracies": 0.875, "rewards/chosen": -0.2767859697341919, "rewards/margins": 1.9872243404388428, "rewards/rejected": -2.264010190963745, "step": 7254 }, { "epoch": 0.84, "learning_rate": 4.751387740640132e-08, "logits/chosen": -2.1036062240600586, "logits/rejected": -2.2075722217559814, "logps/chosen": -331.1767272949219, "logps/rejected": -324.660888671875, "loss": 0.1845, "rewards/accuracies": 1.0, "rewards/chosen": -0.6990199089050293, "rewards/margins": 3.088290214538574, "rewards/rejected": -3.7873098850250244, "step": 7255 }, { "epoch": 0.84, "learning_rate": 4.747844573048305e-08, "logits/chosen": -2.424408435821533, "logits/rejected": -2.554405450820923, "logps/chosen": -266.6960144042969, "logps/rejected": -233.13768005371094, "loss": 1.3851, "rewards/accuracies": 0.75, "rewards/chosen": -1.5193181037902832, "rewards/margins": 1.4815313816070557, "rewards/rejected": -3.000849485397339, "step": 7256 }, { "epoch": 0.84, "learning_rate": 4.744301405456478e-08, "logits/chosen": -2.927525758743286, "logits/rejected": -2.8531808853149414, "logps/chosen": -119.38567352294922, "logps/rejected": -169.3896484375, "loss": 0.4373, "rewards/accuracies": 0.875, "rewards/chosen": -1.2408033609390259, "rewards/margins": 1.1853594779968262, "rewards/rejected": -2.4261627197265625, "step": 7257 }, { "epoch": 0.84, "learning_rate": 4.740758237864651e-08, "logits/chosen": -2.3776731491088867, "logits/rejected": -2.134315013885498, "logps/chosen": -190.57273864746094, "logps/rejected": -122.44780731201172, "loss": 0.7977, "rewards/accuracies": 0.75, "rewards/chosen": -1.204563856124878, "rewards/margins": 1.5379582643508911, "rewards/rejected": -2.7425220012664795, "step": 7258 }, { "epoch": 0.84, "learning_rate": 4.7372150702728236e-08, "logits/chosen": -1.765716314315796, "logits/rejected": -1.9259085655212402, "logps/chosen": -420.5760498046875, "logps/rejected": -346.7299499511719, "loss": 0.5348, "rewards/accuracies": 0.75, "rewards/chosen": -0.8933874368667603, "rewards/margins": 1.587803602218628, "rewards/rejected": -2.4811909198760986, "step": 7259 }, { "epoch": 0.84, "learning_rate": 4.7336719026809965e-08, "logits/chosen": -2.3003668785095215, "logits/rejected": -2.36515474319458, "logps/chosen": -236.02853393554688, "logps/rejected": -263.72576904296875, "loss": 0.2449, "rewards/accuracies": 0.875, "rewards/chosen": -1.1744011640548706, "rewards/margins": 3.060971975326538, "rewards/rejected": -4.235373020172119, "step": 7260 }, { "epoch": 0.84, "learning_rate": 4.73012873508917e-08, "logits/chosen": -2.149463653564453, "logits/rejected": -2.3367481231689453, "logps/chosen": -327.5191345214844, "logps/rejected": -504.3203125, "loss": 0.2905, "rewards/accuracies": 0.875, "rewards/chosen": -0.5577700734138489, "rewards/margins": 3.975494384765625, "rewards/rejected": -4.533264636993408, "step": 7261 }, { "epoch": 0.84, "learning_rate": 4.726585567497342e-08, "logits/chosen": -2.584263324737549, "logits/rejected": -2.2885665893554688, "logps/chosen": -225.74264526367188, "logps/rejected": -286.761474609375, "loss": 0.5991, "rewards/accuracies": 0.625, "rewards/chosen": -1.4948036670684814, "rewards/margins": 1.0354961156845093, "rewards/rejected": -2.530299663543701, "step": 7262 }, { "epoch": 0.84, "learning_rate": 4.723042399905515e-08, "logits/chosen": -2.5677809715270996, "logits/rejected": -2.601196527481079, "logps/chosen": -235.41229248046875, "logps/rejected": -330.84295654296875, "loss": 0.5151, "rewards/accuracies": 0.75, "rewards/chosen": -0.8960225582122803, "rewards/margins": 3.263166904449463, "rewards/rejected": -4.159189701080322, "step": 7263 }, { "epoch": 0.85, "learning_rate": 4.7194992323136886e-08, "logits/chosen": -2.401327610015869, "logits/rejected": -2.3511359691619873, "logps/chosen": -154.32994079589844, "logps/rejected": -87.17759704589844, "loss": 0.8824, "rewards/accuracies": 0.5, "rewards/chosen": -0.7874640822410583, "rewards/margins": 0.6826618909835815, "rewards/rejected": -1.4701259136199951, "step": 7264 }, { "epoch": 0.85, "learning_rate": 4.715956064721861e-08, "logits/chosen": -2.4127860069274902, "logits/rejected": -2.141484022140503, "logps/chosen": -206.03335571289062, "logps/rejected": -262.7857360839844, "loss": 0.5793, "rewards/accuracies": 0.375, "rewards/chosen": -1.0386006832122803, "rewards/margins": 1.45302152633667, "rewards/rejected": -2.49162220954895, "step": 7265 }, { "epoch": 0.85, "learning_rate": 4.712412897130034e-08, "logits/chosen": -2.49318265914917, "logits/rejected": -2.472932815551758, "logps/chosen": -221.3561248779297, "logps/rejected": -228.3270263671875, "loss": 0.1906, "rewards/accuracies": 1.0, "rewards/chosen": -0.883466899394989, "rewards/margins": 3.251511573791504, "rewards/rejected": -4.134978294372559, "step": 7266 }, { "epoch": 0.85, "learning_rate": 4.708869729538207e-08, "logits/chosen": -2.6464076042175293, "logits/rejected": -2.3515431880950928, "logps/chosen": -178.24932861328125, "logps/rejected": -305.29425048828125, "loss": 0.5342, "rewards/accuracies": 0.75, "rewards/chosen": -1.0822030305862427, "rewards/margins": 1.7383692264556885, "rewards/rejected": -2.8205723762512207, "step": 7267 }, { "epoch": 0.85, "learning_rate": 4.7053265619463795e-08, "logits/chosen": -2.421297550201416, "logits/rejected": -2.26431941986084, "logps/chosen": -286.5546875, "logps/rejected": -313.0391540527344, "loss": 0.702, "rewards/accuracies": 0.625, "rewards/chosen": -1.1246646642684937, "rewards/margins": 0.7499726414680481, "rewards/rejected": -1.8746371269226074, "step": 7268 }, { "epoch": 0.85, "learning_rate": 4.701783394354552e-08, "logits/chosen": -1.9799296855926514, "logits/rejected": -2.006009578704834, "logps/chosen": -393.84661865234375, "logps/rejected": -331.9061279296875, "loss": 0.122, "rewards/accuracies": 1.0, "rewards/chosen": -1.4568634033203125, "rewards/margins": 3.3995790481567383, "rewards/rejected": -4.856442451477051, "step": 7269 }, { "epoch": 0.85, "learning_rate": 4.698240226762726e-08, "logits/chosen": -2.0191259384155273, "logits/rejected": -1.8194143772125244, "logps/chosen": -286.4261169433594, "logps/rejected": -273.0802917480469, "loss": 0.3602, "rewards/accuracies": 1.0, "rewards/chosen": -0.5703830122947693, "rewards/margins": 2.8857104778289795, "rewards/rejected": -3.4560933113098145, "step": 7270 }, { "epoch": 0.85, "learning_rate": 4.694697059170899e-08, "logits/chosen": -2.659043312072754, "logits/rejected": -2.6569063663482666, "logps/chosen": -220.573486328125, "logps/rejected": -253.46202087402344, "loss": 0.5787, "rewards/accuracies": 0.75, "rewards/chosen": -1.005614995956421, "rewards/margins": 2.4477343559265137, "rewards/rejected": -3.4533495903015137, "step": 7271 }, { "epoch": 0.85, "learning_rate": 4.6911538915790716e-08, "logits/chosen": -2.7096633911132812, "logits/rejected": -2.758909225463867, "logps/chosen": -283.8525390625, "logps/rejected": -214.4556884765625, "loss": 0.2851, "rewards/accuracies": 1.0, "rewards/chosen": -0.9787612557411194, "rewards/margins": 1.5498037338256836, "rewards/rejected": -2.528564929962158, "step": 7272 }, { "epoch": 0.85, "learning_rate": 4.6876107239872445e-08, "logits/chosen": -2.6067609786987305, "logits/rejected": -2.5524420738220215, "logps/chosen": -257.0869140625, "logps/rejected": -265.0462646484375, "loss": 0.1589, "rewards/accuracies": 1.0, "rewards/chosen": -0.8060624599456787, "rewards/margins": 2.8491783142089844, "rewards/rejected": -3.655240774154663, "step": 7273 }, { "epoch": 0.85, "learning_rate": 4.6840675563954174e-08, "logits/chosen": -2.4515979290008545, "logits/rejected": -2.5218050479888916, "logps/chosen": -178.76333618164062, "logps/rejected": -246.77127075195312, "loss": 0.2888, "rewards/accuracies": 0.875, "rewards/chosen": -0.46322980523109436, "rewards/margins": 4.3714823722839355, "rewards/rejected": -4.834712505340576, "step": 7274 }, { "epoch": 0.85, "learning_rate": 4.680524388803591e-08, "logits/chosen": -2.321800947189331, "logits/rejected": -2.4098000526428223, "logps/chosen": -79.12568664550781, "logps/rejected": -205.3490447998047, "loss": 0.2232, "rewards/accuracies": 0.875, "rewards/chosen": 0.10156463086605072, "rewards/margins": 2.2132318019866943, "rewards/rejected": -2.1116671562194824, "step": 7275 }, { "epoch": 0.85, "learning_rate": 4.676981221211763e-08, "logits/chosen": -2.6799166202545166, "logits/rejected": -2.7415380477905273, "logps/chosen": -219.96633911132812, "logps/rejected": -218.77890014648438, "loss": 0.2914, "rewards/accuracies": 0.875, "rewards/chosen": -0.564933180809021, "rewards/margins": 2.2625296115875244, "rewards/rejected": -2.827463150024414, "step": 7276 }, { "epoch": 0.85, "learning_rate": 4.673438053619936e-08, "logits/chosen": -2.455326795578003, "logits/rejected": -2.2442874908447266, "logps/chosen": -257.2427978515625, "logps/rejected": -343.1708984375, "loss": 0.1499, "rewards/accuracies": 1.0, "rewards/chosen": -0.259770929813385, "rewards/margins": 3.680382490158081, "rewards/rejected": -3.9401533603668213, "step": 7277 }, { "epoch": 0.85, "learning_rate": 4.6698948860281095e-08, "logits/chosen": -2.714839458465576, "logits/rejected": -2.6816189289093018, "logps/chosen": -271.8675537109375, "logps/rejected": -182.2960662841797, "loss": 0.3584, "rewards/accuracies": 0.875, "rewards/chosen": -0.888410210609436, "rewards/margins": 2.527264356613159, "rewards/rejected": -3.4156746864318848, "step": 7278 }, { "epoch": 0.85, "learning_rate": 4.666351718436282e-08, "logits/chosen": -2.672159433364868, "logits/rejected": -2.6080496311187744, "logps/chosen": -189.35609436035156, "logps/rejected": -346.45562744140625, "loss": 0.4633, "rewards/accuracies": 0.75, "rewards/chosen": -0.8895136117935181, "rewards/margins": 1.8329894542694092, "rewards/rejected": -2.7225027084350586, "step": 7279 }, { "epoch": 0.85, "learning_rate": 4.6628085508444546e-08, "logits/chosen": -2.141854763031006, "logits/rejected": -2.3860459327697754, "logps/chosen": -342.33001708984375, "logps/rejected": -298.0542297363281, "loss": 0.1175, "rewards/accuracies": 1.0, "rewards/chosen": 0.10440558195114136, "rewards/margins": 4.352990627288818, "rewards/rejected": -4.248584747314453, "step": 7280 }, { "epoch": 0.85, "learning_rate": 4.659265383252628e-08, "logits/chosen": -2.0554325580596924, "logits/rejected": -2.048275947570801, "logps/chosen": -171.28135681152344, "logps/rejected": -172.1506805419922, "loss": 0.3773, "rewards/accuracies": 0.875, "rewards/chosen": -1.5406771898269653, "rewards/margins": 1.4699583053588867, "rewards/rejected": -3.0106353759765625, "step": 7281 }, { "epoch": 0.85, "learning_rate": 4.6557222156608004e-08, "logits/chosen": -2.335662603378296, "logits/rejected": -2.3045449256896973, "logps/chosen": -461.6492614746094, "logps/rejected": -406.17431640625, "loss": 0.163, "rewards/accuracies": 1.0, "rewards/chosen": 0.204179048538208, "rewards/margins": 2.9035849571228027, "rewards/rejected": -2.6994056701660156, "step": 7282 }, { "epoch": 0.85, "learning_rate": 4.652179048068973e-08, "logits/chosen": -2.1742541790008545, "logits/rejected": -1.9896347522735596, "logps/chosen": -275.878173828125, "logps/rejected": -334.9149169921875, "loss": 0.3586, "rewards/accuracies": 0.875, "rewards/chosen": -0.032356590032577515, "rewards/margins": 2.391522169113159, "rewards/rejected": -2.4238789081573486, "step": 7283 }, { "epoch": 0.85, "learning_rate": 4.648635880477147e-08, "logits/chosen": -2.765040397644043, "logits/rejected": -2.660111427307129, "logps/chosen": -404.5589904785156, "logps/rejected": -341.6977233886719, "loss": 0.3524, "rewards/accuracies": 0.875, "rewards/chosen": -0.8929305076599121, "rewards/margins": 2.4329757690429688, "rewards/rejected": -3.325906276702881, "step": 7284 }, { "epoch": 0.85, "learning_rate": 4.645092712885319e-08, "logits/chosen": -2.5072879791259766, "logits/rejected": -2.1639630794525146, "logps/chosen": -191.58786010742188, "logps/rejected": -234.97933959960938, "loss": 0.3774, "rewards/accuracies": 0.875, "rewards/chosen": -1.467726707458496, "rewards/margins": 2.0871779918670654, "rewards/rejected": -3.5549044609069824, "step": 7285 }, { "epoch": 0.85, "learning_rate": 4.641549545293492e-08, "logits/chosen": -2.447723388671875, "logits/rejected": -2.430340051651001, "logps/chosen": -146.14358520507812, "logps/rejected": -198.13125610351562, "loss": 0.4157, "rewards/accuracies": 0.75, "rewards/chosen": -0.6975700855255127, "rewards/margins": 1.5042892694473267, "rewards/rejected": -2.20185923576355, "step": 7286 }, { "epoch": 0.85, "learning_rate": 4.6380063777016654e-08, "logits/chosen": -1.7529547214508057, "logits/rejected": -2.16302227973938, "logps/chosen": -329.9844665527344, "logps/rejected": -208.68714904785156, "loss": 0.6914, "rewards/accuracies": 0.625, "rewards/chosen": -0.9004249572753906, "rewards/margins": 1.0736174583435059, "rewards/rejected": -1.974042534828186, "step": 7287 }, { "epoch": 0.85, "learning_rate": 4.634463210109838e-08, "logits/chosen": -2.842928647994995, "logits/rejected": -2.8077797889709473, "logps/chosen": -331.58056640625, "logps/rejected": -271.06689453125, "loss": 0.3279, "rewards/accuracies": 0.875, "rewards/chosen": -1.0677239894866943, "rewards/margins": 2.935922145843506, "rewards/rejected": -4.003645896911621, "step": 7288 }, { "epoch": 0.85, "learning_rate": 4.6309200425180105e-08, "logits/chosen": -1.9115217924118042, "logits/rejected": -2.0723447799682617, "logps/chosen": -224.51515197753906, "logps/rejected": -217.2948455810547, "loss": 0.5154, "rewards/accuracies": 0.75, "rewards/chosen": -0.22290608286857605, "rewards/margins": 1.5727462768554688, "rewards/rejected": -1.7956522703170776, "step": 7289 }, { "epoch": 0.85, "learning_rate": 4.627376874926184e-08, "logits/chosen": -1.9597933292388916, "logits/rejected": -1.6078323125839233, "logps/chosen": -330.3284606933594, "logps/rejected": -403.7808837890625, "loss": 0.5954, "rewards/accuracies": 0.625, "rewards/chosen": -0.9570811986923218, "rewards/margins": 1.2108606100082397, "rewards/rejected": -2.1679418087005615, "step": 7290 }, { "epoch": 0.85, "learning_rate": 4.623833707334357e-08, "logits/chosen": -2.5812323093414307, "logits/rejected": -2.6780014038085938, "logps/chosen": -216.42526245117188, "logps/rejected": -220.69215393066406, "loss": 0.236, "rewards/accuracies": 1.0, "rewards/chosen": -0.5631855726242065, "rewards/margins": 1.8326056003570557, "rewards/rejected": -2.3957910537719727, "step": 7291 }, { "epoch": 0.85, "learning_rate": 4.620290539742529e-08, "logits/chosen": -2.6994714736938477, "logits/rejected": -2.549102783203125, "logps/chosen": -289.257568359375, "logps/rejected": -294.6665954589844, "loss": 0.2565, "rewards/accuracies": 1.0, "rewards/chosen": -0.9338828325271606, "rewards/margins": 2.419266700744629, "rewards/rejected": -3.3531494140625, "step": 7292 }, { "epoch": 0.85, "learning_rate": 4.6167473721507026e-08, "logits/chosen": -2.8870503902435303, "logits/rejected": -2.7311618328094482, "logps/chosen": -244.91143798828125, "logps/rejected": -270.19293212890625, "loss": 0.2166, "rewards/accuracies": 0.875, "rewards/chosen": -1.0011372566223145, "rewards/margins": 2.6142542362213135, "rewards/rejected": -3.615391492843628, "step": 7293 }, { "epoch": 0.85, "learning_rate": 4.6132042045588755e-08, "logits/chosen": -2.5575194358825684, "logits/rejected": -2.5408082008361816, "logps/chosen": -252.397705078125, "logps/rejected": -307.4574890136719, "loss": 0.2709, "rewards/accuracies": 1.0, "rewards/chosen": -0.9205053448677063, "rewards/margins": 2.34898042678833, "rewards/rejected": -3.2694859504699707, "step": 7294 }, { "epoch": 0.85, "learning_rate": 4.609661036967048e-08, "logits/chosen": -2.4763169288635254, "logits/rejected": -2.484689235687256, "logps/chosen": -154.07408142089844, "logps/rejected": -240.88601684570312, "loss": 0.2365, "rewards/accuracies": 0.75, "rewards/chosen": -0.05343359708786011, "rewards/margins": 3.399695634841919, "rewards/rejected": -3.453129291534424, "step": 7295 }, { "epoch": 0.85, "learning_rate": 4.606117869375221e-08, "logits/chosen": -2.9426636695861816, "logits/rejected": -2.9807536602020264, "logps/chosen": -281.87213134765625, "logps/rejected": -216.0591583251953, "loss": 0.1605, "rewards/accuracies": 1.0, "rewards/chosen": 0.2178903967142105, "rewards/margins": 3.311535120010376, "rewards/rejected": -3.093644857406616, "step": 7296 }, { "epoch": 0.85, "learning_rate": 4.602574701783394e-08, "logits/chosen": -2.1503169536590576, "logits/rejected": -2.439971923828125, "logps/chosen": -394.3768310546875, "logps/rejected": -253.49044799804688, "loss": 0.5404, "rewards/accuracies": 0.75, "rewards/chosen": -1.4857573509216309, "rewards/margins": 1.5544641017913818, "rewards/rejected": -3.0402212142944336, "step": 7297 }, { "epoch": 0.85, "learning_rate": 4.599031534191567e-08, "logits/chosen": -2.464210033416748, "logits/rejected": -2.250335216522217, "logps/chosen": -213.20858764648438, "logps/rejected": -276.6716613769531, "loss": 0.3806, "rewards/accuracies": 0.75, "rewards/chosen": -1.0574941635131836, "rewards/margins": 1.8820066452026367, "rewards/rejected": -2.9395008087158203, "step": 7298 }, { "epoch": 0.85, "learning_rate": 4.59548836659974e-08, "logits/chosen": -2.2917168140411377, "logits/rejected": -2.3977363109588623, "logps/chosen": -324.33740234375, "logps/rejected": -220.11997985839844, "loss": 0.2255, "rewards/accuracies": 1.0, "rewards/chosen": -0.7214868664741516, "rewards/margins": 1.813875675201416, "rewards/rejected": -2.535362720489502, "step": 7299 }, { "epoch": 0.85, "learning_rate": 4.591945199007913e-08, "logits/chosen": -2.425290822982788, "logits/rejected": -2.6377906799316406, "logps/chosen": -297.11822509765625, "logps/rejected": -274.6890563964844, "loss": 0.4428, "rewards/accuracies": 0.75, "rewards/chosen": -1.0619198083877563, "rewards/margins": 1.762895107269287, "rewards/rejected": -2.824815034866333, "step": 7300 }, { "epoch": 0.85, "learning_rate": 4.5884020314160856e-08, "logits/chosen": -2.3076775074005127, "logits/rejected": -2.3018112182617188, "logps/chosen": -273.1908874511719, "logps/rejected": -291.8925476074219, "loss": 0.2205, "rewards/accuracies": 1.0, "rewards/chosen": 0.05372549593448639, "rewards/margins": 2.1939239501953125, "rewards/rejected": -2.140198230743408, "step": 7301 }, { "epoch": 0.85, "learning_rate": 4.584858863824259e-08, "logits/chosen": -1.8830373287200928, "logits/rejected": -2.193453311920166, "logps/chosen": -314.8589782714844, "logps/rejected": -273.13226318359375, "loss": 0.6747, "rewards/accuracies": 0.5, "rewards/chosen": -1.5208985805511475, "rewards/margins": 1.2162978649139404, "rewards/rejected": -2.737196445465088, "step": 7302 }, { "epoch": 0.85, "learning_rate": 4.5813156962324314e-08, "logits/chosen": -2.23165225982666, "logits/rejected": -2.290553331375122, "logps/chosen": -147.7615966796875, "logps/rejected": -226.345458984375, "loss": 0.5152, "rewards/accuracies": 0.625, "rewards/chosen": -1.449201226234436, "rewards/margins": 1.4160616397857666, "rewards/rejected": -2.865262985229492, "step": 7303 }, { "epoch": 0.85, "learning_rate": 4.577772528640604e-08, "logits/chosen": -2.585507392883301, "logits/rejected": -2.7994539737701416, "logps/chosen": -281.14892578125, "logps/rejected": -213.46600341796875, "loss": 0.2368, "rewards/accuracies": 0.875, "rewards/chosen": -0.6074389219284058, "rewards/margins": 2.2927401065826416, "rewards/rejected": -2.900179147720337, "step": 7304 }, { "epoch": 0.85, "learning_rate": 4.574229361048778e-08, "logits/chosen": -2.5350453853607178, "logits/rejected": -2.583035945892334, "logps/chosen": -325.54400634765625, "logps/rejected": -350.11724853515625, "loss": 0.6793, "rewards/accuracies": 0.75, "rewards/chosen": -1.2575048208236694, "rewards/margins": 2.7942183017730713, "rewards/rejected": -4.051723003387451, "step": 7305 }, { "epoch": 0.85, "learning_rate": 4.57068619345695e-08, "logits/chosen": -2.1299540996551514, "logits/rejected": -2.1954190731048584, "logps/chosen": -240.97962951660156, "logps/rejected": -310.7934875488281, "loss": 0.302, "rewards/accuracies": 0.625, "rewards/chosen": -0.5525453686714172, "rewards/margins": 2.854924201965332, "rewards/rejected": -3.4074697494506836, "step": 7306 }, { "epoch": 0.85, "learning_rate": 4.5671430258651235e-08, "logits/chosen": -2.6079864501953125, "logits/rejected": -2.624528169631958, "logps/chosen": -180.7661895751953, "logps/rejected": -216.1150360107422, "loss": 0.5087, "rewards/accuracies": 0.625, "rewards/chosen": -1.0996644496917725, "rewards/margins": 1.8805136680603027, "rewards/rejected": -2.9801783561706543, "step": 7307 }, { "epoch": 0.85, "learning_rate": 4.5635998582732964e-08, "logits/chosen": -2.121796131134033, "logits/rejected": -2.2555434703826904, "logps/chosen": -244.13775634765625, "logps/rejected": -216.88279724121094, "loss": 0.5468, "rewards/accuracies": 0.875, "rewards/chosen": -0.8599817752838135, "rewards/margins": 1.8738982677459717, "rewards/rejected": -2.733880043029785, "step": 7308 }, { "epoch": 0.85, "learning_rate": 4.5600566906814686e-08, "logits/chosen": -2.39072585105896, "logits/rejected": -2.057681083679199, "logps/chosen": -199.20980834960938, "logps/rejected": -280.323486328125, "loss": 0.6579, "rewards/accuracies": 0.625, "rewards/chosen": -1.8685728311538696, "rewards/margins": 1.7132220268249512, "rewards/rejected": -3.5817947387695312, "step": 7309 }, { "epoch": 0.85, "learning_rate": 4.556513523089642e-08, "logits/chosen": -1.8494014739990234, "logits/rejected": -1.8063623905181885, "logps/chosen": -99.88765716552734, "logps/rejected": -185.90750122070312, "loss": 0.3857, "rewards/accuracies": 0.625, "rewards/chosen": -1.5681368112564087, "rewards/margins": 1.4627563953399658, "rewards/rejected": -3.030893087387085, "step": 7310 }, { "epoch": 0.85, "learning_rate": 4.552970355497815e-08, "logits/chosen": -2.0807783603668213, "logits/rejected": -1.9514029026031494, "logps/chosen": -192.13504028320312, "logps/rejected": -187.4327850341797, "loss": 0.1708, "rewards/accuracies": 1.0, "rewards/chosen": -0.7305519580841064, "rewards/margins": 2.657336473464966, "rewards/rejected": -3.3878884315490723, "step": 7311 }, { "epoch": 0.85, "learning_rate": 4.549427187905987e-08, "logits/chosen": -2.2088825702667236, "logits/rejected": -2.146001100540161, "logps/chosen": -362.84393310546875, "logps/rejected": -361.1010437011719, "loss": 0.4923, "rewards/accuracies": 0.75, "rewards/chosen": -0.9589045643806458, "rewards/margins": 2.5212488174438477, "rewards/rejected": -3.4801535606384277, "step": 7312 }, { "epoch": 0.85, "learning_rate": 4.545884020314161e-08, "logits/chosen": -2.6832966804504395, "logits/rejected": -2.7223803997039795, "logps/chosen": -230.97933959960938, "logps/rejected": -193.51158142089844, "loss": 0.4517, "rewards/accuracies": 0.625, "rewards/chosen": -2.0606417655944824, "rewards/margins": 2.6232733726501465, "rewards/rejected": -4.683915615081787, "step": 7313 }, { "epoch": 0.85, "learning_rate": 4.5423408527223337e-08, "logits/chosen": -3.075793981552124, "logits/rejected": -3.0130958557128906, "logps/chosen": -248.43783569335938, "logps/rejected": -259.7244873046875, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -0.4550700783729553, "rewards/margins": 4.59153938293457, "rewards/rejected": -5.046609878540039, "step": 7314 }, { "epoch": 0.85, "learning_rate": 4.5387976851305065e-08, "logits/chosen": -2.730975866317749, "logits/rejected": -2.5815534591674805, "logps/chosen": -144.22885131835938, "logps/rejected": -174.00917053222656, "loss": 0.5294, "rewards/accuracies": 0.625, "rewards/chosen": -1.5326787233352661, "rewards/margins": 2.6661484241485596, "rewards/rejected": -4.198827266693115, "step": 7315 }, { "epoch": 0.85, "learning_rate": 4.5352545175386794e-08, "logits/chosen": -2.2427690029144287, "logits/rejected": -2.2293930053710938, "logps/chosen": -261.3278503417969, "logps/rejected": -307.0343322753906, "loss": 0.3902, "rewards/accuracies": 0.875, "rewards/chosen": -1.0258545875549316, "rewards/margins": 1.7701010704040527, "rewards/rejected": -2.7959556579589844, "step": 7316 }, { "epoch": 0.85, "learning_rate": 4.531711349946852e-08, "logits/chosen": -2.2118663787841797, "logits/rejected": -2.3856520652770996, "logps/chosen": -280.12738037109375, "logps/rejected": -235.937255859375, "loss": 0.2233, "rewards/accuracies": 0.875, "rewards/chosen": -0.5190629959106445, "rewards/margins": 2.63476300239563, "rewards/rejected": -3.1538262367248535, "step": 7317 }, { "epoch": 0.85, "learning_rate": 4.528168182355025e-08, "logits/chosen": -1.7535436153411865, "logits/rejected": -1.4196557998657227, "logps/chosen": -322.5681457519531, "logps/rejected": -377.7336730957031, "loss": 0.3204, "rewards/accuracies": 0.75, "rewards/chosen": -0.3288496732711792, "rewards/margins": 2.1085805892944336, "rewards/rejected": -2.437429904937744, "step": 7318 }, { "epoch": 0.85, "learning_rate": 4.524625014763199e-08, "logits/chosen": -2.6656863689422607, "logits/rejected": -2.6345558166503906, "logps/chosen": -250.86964416503906, "logps/rejected": -147.78956604003906, "loss": 0.424, "rewards/accuracies": 0.75, "rewards/chosen": -1.1686431169509888, "rewards/margins": 1.6843843460083008, "rewards/rejected": -2.85302734375, "step": 7319 }, { "epoch": 0.85, "learning_rate": 4.521081847171371e-08, "logits/chosen": -2.2911293506622314, "logits/rejected": -2.187514305114746, "logps/chosen": -408.85400390625, "logps/rejected": -565.2117309570312, "loss": 0.2077, "rewards/accuracies": 0.875, "rewards/chosen": -1.0001400709152222, "rewards/margins": 2.9090380668640137, "rewards/rejected": -3.9091782569885254, "step": 7320 }, { "epoch": 0.85, "learning_rate": 4.517538679579544e-08, "logits/chosen": -2.46730375289917, "logits/rejected": -2.466951608657837, "logps/chosen": -344.14239501953125, "logps/rejected": -327.0296936035156, "loss": 0.2758, "rewards/accuracies": 1.0, "rewards/chosen": -0.5047395825386047, "rewards/margins": 2.55302095413208, "rewards/rejected": -3.05776047706604, "step": 7321 }, { "epoch": 0.85, "learning_rate": 4.513995511987717e-08, "logits/chosen": -2.267716407775879, "logits/rejected": -2.31528377532959, "logps/chosen": -311.2690734863281, "logps/rejected": -450.06195068359375, "loss": 0.1099, "rewards/accuracies": 1.0, "rewards/chosen": -0.9901121854782104, "rewards/margins": 3.8002209663391113, "rewards/rejected": -4.790333271026611, "step": 7322 }, { "epoch": 0.85, "learning_rate": 4.5104523443958895e-08, "logits/chosen": -2.4814443588256836, "logits/rejected": -2.648103713989258, "logps/chosen": -124.02961730957031, "logps/rejected": -206.2607879638672, "loss": 0.303, "rewards/accuracies": 0.875, "rewards/chosen": -1.5065128803253174, "rewards/margins": 2.5960605144500732, "rewards/rejected": -4.102573394775391, "step": 7323 }, { "epoch": 0.85, "learning_rate": 4.5069091768040624e-08, "logits/chosen": -2.1511263847351074, "logits/rejected": -2.410322904586792, "logps/chosen": -431.5431823730469, "logps/rejected": -219.86117553710938, "loss": 1.4617, "rewards/accuracies": 0.375, "rewards/chosen": -1.4425389766693115, "rewards/margins": -0.10781902074813843, "rewards/rejected": -1.3347200155258179, "step": 7324 }, { "epoch": 0.85, "learning_rate": 4.503366009212236e-08, "logits/chosen": -1.5254638195037842, "logits/rejected": -1.9191210269927979, "logps/chosen": -321.00286865234375, "logps/rejected": -243.042724609375, "loss": 0.5345, "rewards/accuracies": 0.625, "rewards/chosen": -0.7982950806617737, "rewards/margins": 1.0435179471969604, "rewards/rejected": -1.841813087463379, "step": 7325 }, { "epoch": 0.85, "learning_rate": 4.499822841620408e-08, "logits/chosen": -2.350990056991577, "logits/rejected": -2.269261598587036, "logps/chosen": -389.68414306640625, "logps/rejected": -455.2854919433594, "loss": 0.0919, "rewards/accuracies": 1.0, "rewards/chosen": -0.2596868574619293, "rewards/margins": 3.7737104892730713, "rewards/rejected": -4.033397197723389, "step": 7326 }, { "epoch": 0.85, "learning_rate": 4.496279674028581e-08, "logits/chosen": -2.333441734313965, "logits/rejected": -2.5623414516448975, "logps/chosen": -411.7857666015625, "logps/rejected": -279.7125244140625, "loss": 0.1492, "rewards/accuracies": 1.0, "rewards/chosen": -0.4713912010192871, "rewards/margins": 2.7497403621673584, "rewards/rejected": -3.2211318016052246, "step": 7327 }, { "epoch": 0.85, "learning_rate": 4.4927365064367546e-08, "logits/chosen": -2.7420661449432373, "logits/rejected": -2.5432658195495605, "logps/chosen": -220.08737182617188, "logps/rejected": -378.4952392578125, "loss": 0.7052, "rewards/accuracies": 0.625, "rewards/chosen": -1.2540576457977295, "rewards/margins": 0.8891851305961609, "rewards/rejected": -2.143242835998535, "step": 7328 }, { "epoch": 0.85, "learning_rate": 4.4891933388449274e-08, "logits/chosen": -2.457857608795166, "logits/rejected": -2.5072457790374756, "logps/chosen": -228.43380737304688, "logps/rejected": -201.6914825439453, "loss": 0.2936, "rewards/accuracies": 0.75, "rewards/chosen": -1.0318962335586548, "rewards/margins": 1.7700797319412231, "rewards/rejected": -2.801976203918457, "step": 7329 }, { "epoch": 0.85, "learning_rate": 4.4856501712530996e-08, "logits/chosen": -2.224651336669922, "logits/rejected": -2.359795331954956, "logps/chosen": -306.9964599609375, "logps/rejected": -282.6907958984375, "loss": 1.2043, "rewards/accuracies": 0.625, "rewards/chosen": -1.0140880346298218, "rewards/margins": 0.547762930393219, "rewards/rejected": -1.5618510246276855, "step": 7330 }, { "epoch": 0.85, "learning_rate": 4.482107003661273e-08, "logits/chosen": -2.0483202934265137, "logits/rejected": -2.1713271141052246, "logps/chosen": -315.455322265625, "logps/rejected": -288.97686767578125, "loss": 0.1838, "rewards/accuracies": 0.875, "rewards/chosen": -1.1263471841812134, "rewards/margins": 2.8268942832946777, "rewards/rejected": -3.9532415866851807, "step": 7331 }, { "epoch": 0.85, "learning_rate": 4.478563836069446e-08, "logits/chosen": -2.7036354541778564, "logits/rejected": -2.9165408611297607, "logps/chosen": -398.4561462402344, "logps/rejected": -275.6676025390625, "loss": 0.487, "rewards/accuracies": 0.875, "rewards/chosen": -0.958478569984436, "rewards/margins": 1.7154631614685059, "rewards/rejected": -2.6739416122436523, "step": 7332 }, { "epoch": 0.85, "learning_rate": 4.475020668477618e-08, "logits/chosen": -2.4982547760009766, "logits/rejected": -2.56457781791687, "logps/chosen": -294.2204895019531, "logps/rejected": -303.8901062011719, "loss": 0.345, "rewards/accuracies": 1.0, "rewards/chosen": -0.7577487230300903, "rewards/margins": 2.3451154232025146, "rewards/rejected": -3.1028642654418945, "step": 7333 }, { "epoch": 0.85, "learning_rate": 4.471477500885792e-08, "logits/chosen": -2.7136337757110596, "logits/rejected": -2.5595827102661133, "logps/chosen": -301.575439453125, "logps/rejected": -250.81031799316406, "loss": 0.4709, "rewards/accuracies": 0.75, "rewards/chosen": -1.2759387493133545, "rewards/margins": 1.0416202545166016, "rewards/rejected": -2.317559003829956, "step": 7334 }, { "epoch": 0.85, "learning_rate": 4.467934333293965e-08, "logits/chosen": -2.797588586807251, "logits/rejected": -2.792172431945801, "logps/chosen": -247.49037170410156, "logps/rejected": -268.3847351074219, "loss": 0.1458, "rewards/accuracies": 1.0, "rewards/chosen": -1.210490107536316, "rewards/margins": 3.2887914180755615, "rewards/rejected": -4.499281406402588, "step": 7335 }, { "epoch": 0.85, "learning_rate": 4.464391165702137e-08, "logits/chosen": -2.456329822540283, "logits/rejected": -2.4930102825164795, "logps/chosen": -219.3330535888672, "logps/rejected": -214.12879943847656, "loss": 0.4986, "rewards/accuracies": 0.75, "rewards/chosen": -0.49085357785224915, "rewards/margins": 1.2563579082489014, "rewards/rejected": -1.7472115755081177, "step": 7336 }, { "epoch": 0.85, "learning_rate": 4.4608479981103104e-08, "logits/chosen": -2.347580671310425, "logits/rejected": -2.3174405097961426, "logps/chosen": -207.07760620117188, "logps/rejected": -273.7772521972656, "loss": 0.3723, "rewards/accuracies": 0.75, "rewards/chosen": -1.3522975444793701, "rewards/margins": 1.8547861576080322, "rewards/rejected": -3.2070837020874023, "step": 7337 }, { "epoch": 0.85, "learning_rate": 4.457304830518483e-08, "logits/chosen": -2.2284743785858154, "logits/rejected": -2.3651833534240723, "logps/chosen": -382.514892578125, "logps/rejected": -313.3924865722656, "loss": 0.4812, "rewards/accuracies": 0.625, "rewards/chosen": -0.2831036150455475, "rewards/margins": 3.44006609916687, "rewards/rejected": -3.7231693267822266, "step": 7338 }, { "epoch": 0.85, "learning_rate": 4.453761662926656e-08, "logits/chosen": -2.4326493740081787, "logits/rejected": -2.604255199432373, "logps/chosen": -325.2414245605469, "logps/rejected": -308.76422119140625, "loss": 0.198, "rewards/accuracies": 0.875, "rewards/chosen": 0.08794661611318588, "rewards/margins": 2.2480509281158447, "rewards/rejected": -2.160104274749756, "step": 7339 }, { "epoch": 0.85, "learning_rate": 4.450218495334829e-08, "logits/chosen": -3.033639669418335, "logits/rejected": -3.0171189308166504, "logps/chosen": -381.73809814453125, "logps/rejected": -268.6075744628906, "loss": 0.2248, "rewards/accuracies": 0.875, "rewards/chosen": -0.39719754457473755, "rewards/margins": 3.028110980987549, "rewards/rejected": -3.4253087043762207, "step": 7340 }, { "epoch": 0.85, "learning_rate": 4.446675327743002e-08, "logits/chosen": -1.9905246496200562, "logits/rejected": -2.3176980018615723, "logps/chosen": -300.2572021484375, "logps/rejected": -182.72653198242188, "loss": 0.3015, "rewards/accuracies": 0.875, "rewards/chosen": -0.40734192728996277, "rewards/margins": 2.7294883728027344, "rewards/rejected": -3.1368300914764404, "step": 7341 }, { "epoch": 0.85, "learning_rate": 4.443132160151175e-08, "logits/chosen": -2.6938867568969727, "logits/rejected": -2.5876338481903076, "logps/chosen": -126.16220092773438, "logps/rejected": -421.3466491699219, "loss": 0.245, "rewards/accuracies": 0.875, "rewards/chosen": -0.16083520650863647, "rewards/margins": 3.228377342224121, "rewards/rejected": -3.3892123699188232, "step": 7342 }, { "epoch": 0.85, "learning_rate": 4.4395889925593477e-08, "logits/chosen": -2.5300486087799072, "logits/rejected": -2.609264850616455, "logps/chosen": -176.7534637451172, "logps/rejected": -211.013916015625, "loss": 0.4029, "rewards/accuracies": 0.625, "rewards/chosen": -0.7653611898422241, "rewards/margins": 2.3819620609283447, "rewards/rejected": -3.1473228931427, "step": 7343 }, { "epoch": 0.85, "learning_rate": 4.4360458249675205e-08, "logits/chosen": -2.1275603771209717, "logits/rejected": -2.098900556564331, "logps/chosen": -501.35125732421875, "logps/rejected": -475.7704162597656, "loss": 0.2273, "rewards/accuracies": 1.0, "rewards/chosen": -0.17431814968585968, "rewards/margins": 2.5970346927642822, "rewards/rejected": -2.771352767944336, "step": 7344 }, { "epoch": 0.85, "learning_rate": 4.432502657375694e-08, "logits/chosen": -1.5648515224456787, "logits/rejected": -1.6907507181167603, "logps/chosen": -258.5078125, "logps/rejected": -240.20907592773438, "loss": 0.3443, "rewards/accuracies": 0.875, "rewards/chosen": -0.04035671427845955, "rewards/margins": 1.763710379600525, "rewards/rejected": -1.8040671348571777, "step": 7345 }, { "epoch": 0.85, "learning_rate": 4.428959489783867e-08, "logits/chosen": -2.6907835006713867, "logits/rejected": -2.3726606369018555, "logps/chosen": -246.93577575683594, "logps/rejected": -347.5126647949219, "loss": 0.4622, "rewards/accuracies": 0.75, "rewards/chosen": -0.7086324691772461, "rewards/margins": 1.4070476293563843, "rewards/rejected": -2.115679979324341, "step": 7346 }, { "epoch": 0.85, "learning_rate": 4.425416322192039e-08, "logits/chosen": -2.6631593704223633, "logits/rejected": -2.3611233234405518, "logps/chosen": -269.1463623046875, "logps/rejected": -288.017333984375, "loss": 0.5056, "rewards/accuracies": 0.625, "rewards/chosen": -1.4231998920440674, "rewards/margins": 1.5668487548828125, "rewards/rejected": -2.990048408508301, "step": 7347 }, { "epoch": 0.85, "learning_rate": 4.421873154600213e-08, "logits/chosen": -2.6333627700805664, "logits/rejected": -2.8049678802490234, "logps/chosen": -323.4835205078125, "logps/rejected": -298.0621337890625, "loss": 0.6085, "rewards/accuracies": 0.75, "rewards/chosen": -1.5537779331207275, "rewards/margins": 3.883803367614746, "rewards/rejected": -5.4375810623168945, "step": 7348 }, { "epoch": 0.85, "learning_rate": 4.4183299870083856e-08, "logits/chosen": -2.355320930480957, "logits/rejected": -1.959317684173584, "logps/chosen": -192.6806640625, "logps/rejected": -252.55422973632812, "loss": 0.6499, "rewards/accuracies": 0.75, "rewards/chosen": -1.5370771884918213, "rewards/margins": 1.104249119758606, "rewards/rejected": -2.641326427459717, "step": 7349 }, { "epoch": 0.86, "learning_rate": 4.414786819416558e-08, "logits/chosen": -2.4984817504882812, "logits/rejected": -2.256387948989868, "logps/chosen": -152.04742431640625, "logps/rejected": -222.860595703125, "loss": 0.3841, "rewards/accuracies": 1.0, "rewards/chosen": -1.243741750717163, "rewards/margins": 1.2439498901367188, "rewards/rejected": -2.487691640853882, "step": 7350 }, { "epoch": 0.86, "learning_rate": 4.411243651824731e-08, "logits/chosen": -2.295140504837036, "logits/rejected": -2.4356613159179688, "logps/chosen": -264.34686279296875, "logps/rejected": -224.52957153320312, "loss": 0.5032, "rewards/accuracies": 0.75, "rewards/chosen": -1.0905935764312744, "rewards/margins": 2.6236824989318848, "rewards/rejected": -3.7142763137817383, "step": 7351 }, { "epoch": 0.86, "learning_rate": 4.407700484232904e-08, "logits/chosen": -2.3862545490264893, "logits/rejected": -2.2282204627990723, "logps/chosen": -249.524169921875, "logps/rejected": -374.1593322753906, "loss": 0.1805, "rewards/accuracies": 1.0, "rewards/chosen": -0.9359601736068726, "rewards/margins": 2.4585745334625244, "rewards/rejected": -3.3945350646972656, "step": 7352 }, { "epoch": 0.86, "learning_rate": 4.4041573166410764e-08, "logits/chosen": -2.181784152984619, "logits/rejected": -2.1110732555389404, "logps/chosen": -349.128173828125, "logps/rejected": -245.64462280273438, "loss": 0.263, "rewards/accuracies": 1.0, "rewards/chosen": -1.7415443658828735, "rewards/margins": 1.6274608373641968, "rewards/rejected": -3.3690052032470703, "step": 7353 }, { "epoch": 0.86, "learning_rate": 4.40061414904925e-08, "logits/chosen": -2.189936399459839, "logits/rejected": -2.192434310913086, "logps/chosen": -251.35302734375, "logps/rejected": -261.8049011230469, "loss": 0.3262, "rewards/accuracies": 1.0, "rewards/chosen": -1.1037545204162598, "rewards/margins": 1.503386378288269, "rewards/rejected": -2.6071410179138184, "step": 7354 }, { "epoch": 0.86, "learning_rate": 4.397070981457423e-08, "logits/chosen": -2.8448739051818848, "logits/rejected": -2.682577610015869, "logps/chosen": -254.49685668945312, "logps/rejected": -261.059326171875, "loss": 0.0923, "rewards/accuracies": 1.0, "rewards/chosen": -0.7456955313682556, "rewards/margins": 3.2433173656463623, "rewards/rejected": -3.9890127182006836, "step": 7355 }, { "epoch": 0.86, "learning_rate": 4.393527813865596e-08, "logits/chosen": -2.3786914348602295, "logits/rejected": -2.1240782737731934, "logps/chosen": -261.5246887207031, "logps/rejected": -341.02191162109375, "loss": 0.3705, "rewards/accuracies": 0.625, "rewards/chosen": -0.5833624005317688, "rewards/margins": 1.6898002624511719, "rewards/rejected": -2.273162603378296, "step": 7356 }, { "epoch": 0.86, "learning_rate": 4.3899846462737686e-08, "logits/chosen": -2.348440408706665, "logits/rejected": -2.585839033126831, "logps/chosen": -321.4947204589844, "logps/rejected": -248.88595581054688, "loss": 0.2493, "rewards/accuracies": 0.875, "rewards/chosen": -0.5428101420402527, "rewards/margins": 3.4239184856414795, "rewards/rejected": -3.966728687286377, "step": 7357 }, { "epoch": 0.86, "learning_rate": 4.3864414786819414e-08, "logits/chosen": -2.483617067337036, "logits/rejected": -2.618009090423584, "logps/chosen": -346.47344970703125, "logps/rejected": -286.251220703125, "loss": 0.3223, "rewards/accuracies": 0.75, "rewards/chosen": -0.8473241329193115, "rewards/margins": 2.324265718460083, "rewards/rejected": -3.1715898513793945, "step": 7358 }, { "epoch": 0.86, "learning_rate": 4.382898311090114e-08, "logits/chosen": -1.977848768234253, "logits/rejected": -2.2220499515533447, "logps/chosen": -397.8751220703125, "logps/rejected": -344.96240234375, "loss": 0.2382, "rewards/accuracies": 0.875, "rewards/chosen": 0.46844589710235596, "rewards/margins": 2.8950376510620117, "rewards/rejected": -2.4265918731689453, "step": 7359 }, { "epoch": 0.86, "learning_rate": 4.379355143498288e-08, "logits/chosen": -2.09094500541687, "logits/rejected": -2.161003351211548, "logps/chosen": -109.50414276123047, "logps/rejected": -196.18084716796875, "loss": 0.325, "rewards/accuracies": 0.875, "rewards/chosen": -0.6929395198822021, "rewards/margins": 3.1939101219177246, "rewards/rejected": -3.886849880218506, "step": 7360 }, { "epoch": 0.86, "learning_rate": 4.37581197590646e-08, "logits/chosen": -2.589751720428467, "logits/rejected": -2.550672769546509, "logps/chosen": -425.80865478515625, "logps/rejected": -446.1686706542969, "loss": 0.2548, "rewards/accuracies": 0.875, "rewards/chosen": -0.9233182072639465, "rewards/margins": 2.5910022258758545, "rewards/rejected": -3.5143206119537354, "step": 7361 }, { "epoch": 0.86, "learning_rate": 4.372268808314633e-08, "logits/chosen": -2.833526134490967, "logits/rejected": -2.9369349479675293, "logps/chosen": -283.0848083496094, "logps/rejected": -203.12779235839844, "loss": 0.2802, "rewards/accuracies": 1.0, "rewards/chosen": -0.6595021486282349, "rewards/margins": 1.9044163227081299, "rewards/rejected": -2.5639185905456543, "step": 7362 }, { "epoch": 0.86, "learning_rate": 4.3687256407228065e-08, "logits/chosen": -2.37874436378479, "logits/rejected": -2.519209384918213, "logps/chosen": -292.4700927734375, "logps/rejected": -348.56402587890625, "loss": 0.3207, "rewards/accuracies": 0.875, "rewards/chosen": -0.30318683385849, "rewards/margins": 2.921664237976074, "rewards/rejected": -3.22485089302063, "step": 7363 }, { "epoch": 0.86, "learning_rate": 4.365182473130979e-08, "logits/chosen": -2.8587770462036133, "logits/rejected": -2.9571266174316406, "logps/chosen": -315.1448974609375, "logps/rejected": -237.55828857421875, "loss": 0.1719, "rewards/accuracies": 0.875, "rewards/chosen": 0.13579526543617249, "rewards/margins": 3.241835832595825, "rewards/rejected": -3.1060407161712646, "step": 7364 }, { "epoch": 0.86, "learning_rate": 4.3616393055391516e-08, "logits/chosen": -1.664933204650879, "logits/rejected": -1.9434778690338135, "logps/chosen": -414.9933166503906, "logps/rejected": -304.25433349609375, "loss": 0.4602, "rewards/accuracies": 0.625, "rewards/chosen": -1.0309959650039673, "rewards/margins": 1.5149590969085693, "rewards/rejected": -2.545955181121826, "step": 7365 }, { "epoch": 0.86, "learning_rate": 4.358096137947325e-08, "logits/chosen": -1.8133764266967773, "logits/rejected": -1.7225836515426636, "logps/chosen": -512.2757568359375, "logps/rejected": -453.98162841796875, "loss": 0.2106, "rewards/accuracies": 0.875, "rewards/chosen": -0.12128639221191406, "rewards/margins": 2.3654098510742188, "rewards/rejected": -2.486696243286133, "step": 7366 }, { "epoch": 0.86, "learning_rate": 4.354552970355497e-08, "logits/chosen": -2.7024364471435547, "logits/rejected": -2.636679172515869, "logps/chosen": -381.48486328125, "logps/rejected": -271.9089050292969, "loss": 0.3597, "rewards/accuracies": 0.75, "rewards/chosen": -1.3332599401474, "rewards/margins": 2.525358200073242, "rewards/rejected": -3.8586180210113525, "step": 7367 }, { "epoch": 0.86, "learning_rate": 4.35100980276367e-08, "logits/chosen": -1.972661018371582, "logits/rejected": -2.104764699935913, "logps/chosen": -202.8380126953125, "logps/rejected": -198.59889221191406, "loss": 0.5995, "rewards/accuracies": 0.625, "rewards/chosen": -1.1534758806228638, "rewards/margins": 0.9941418170928955, "rewards/rejected": -2.147617816925049, "step": 7368 }, { "epoch": 0.86, "learning_rate": 4.347466635171844e-08, "logits/chosen": -2.427387237548828, "logits/rejected": -2.156757116317749, "logps/chosen": -130.0189208984375, "logps/rejected": -277.1494140625, "loss": 0.2506, "rewards/accuracies": 1.0, "rewards/chosen": -0.8823142647743225, "rewards/margins": 1.7950465679168701, "rewards/rejected": -2.6773605346679688, "step": 7369 }, { "epoch": 0.86, "learning_rate": 4.3439234675800166e-08, "logits/chosen": -2.401526927947998, "logits/rejected": -2.3885245323181152, "logps/chosen": -157.12550354003906, "logps/rejected": -234.8175811767578, "loss": 0.2258, "rewards/accuracies": 0.875, "rewards/chosen": -0.8497658371925354, "rewards/margins": 2.542341709136963, "rewards/rejected": -3.3921074867248535, "step": 7370 }, { "epoch": 0.86, "learning_rate": 4.340380299988189e-08, "logits/chosen": -1.8605473041534424, "logits/rejected": -1.9628580808639526, "logps/chosen": -528.0529174804688, "logps/rejected": -404.09149169921875, "loss": 0.6271, "rewards/accuracies": 0.625, "rewards/chosen": -0.7081144452095032, "rewards/margins": 0.9320942163467407, "rewards/rejected": -1.6402087211608887, "step": 7371 }, { "epoch": 0.86, "learning_rate": 4.3368371323963623e-08, "logits/chosen": -2.4336495399475098, "logits/rejected": -2.492370843887329, "logps/chosen": -329.91912841796875, "logps/rejected": -265.0862731933594, "loss": 0.2661, "rewards/accuracies": 0.875, "rewards/chosen": -0.4976869225502014, "rewards/margins": 2.27584171295166, "rewards/rejected": -2.7735283374786377, "step": 7372 }, { "epoch": 0.86, "learning_rate": 4.333293964804535e-08, "logits/chosen": -2.6293628215789795, "logits/rejected": -2.544898509979248, "logps/chosen": -162.8737030029297, "logps/rejected": -245.86972045898438, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": -0.9415404796600342, "rewards/margins": 3.002837657928467, "rewards/rejected": -3.944378137588501, "step": 7373 }, { "epoch": 0.86, "learning_rate": 4.3297507972127074e-08, "logits/chosen": -2.3336076736450195, "logits/rejected": -2.4770853519439697, "logps/chosen": -174.3780975341797, "logps/rejected": -186.2784423828125, "loss": 0.3702, "rewards/accuracies": 0.875, "rewards/chosen": -0.3997822105884552, "rewards/margins": 1.1106431484222412, "rewards/rejected": -1.5104254484176636, "step": 7374 }, { "epoch": 0.86, "learning_rate": 4.326207629620881e-08, "logits/chosen": -2.3757927417755127, "logits/rejected": -2.2179641723632812, "logps/chosen": -265.916748046875, "logps/rejected": -217.0623016357422, "loss": 0.3156, "rewards/accuracies": 0.875, "rewards/chosen": -0.582455039024353, "rewards/margins": 1.695911169052124, "rewards/rejected": -2.2783660888671875, "step": 7375 }, { "epoch": 0.86, "learning_rate": 4.322664462029054e-08, "logits/chosen": -2.112644672393799, "logits/rejected": -2.2404441833496094, "logps/chosen": -345.8306884765625, "logps/rejected": -262.53302001953125, "loss": 0.6556, "rewards/accuracies": 0.875, "rewards/chosen": -0.7384858131408691, "rewards/margins": 2.3227713108062744, "rewards/rejected": -3.0612571239471436, "step": 7376 }, { "epoch": 0.86, "learning_rate": 4.319121294437226e-08, "logits/chosen": -2.4567487239837646, "logits/rejected": -2.778498411178589, "logps/chosen": -377.16558837890625, "logps/rejected": -191.76596069335938, "loss": 0.8374, "rewards/accuracies": 0.75, "rewards/chosen": -1.351951003074646, "rewards/margins": 0.6583741903305054, "rewards/rejected": -2.0103251934051514, "step": 7377 }, { "epoch": 0.86, "learning_rate": 4.3155781268453996e-08, "logits/chosen": -2.286167621612549, "logits/rejected": -2.3983213901519775, "logps/chosen": -326.8046875, "logps/rejected": -351.3918151855469, "loss": 0.4032, "rewards/accuracies": 0.875, "rewards/chosen": -1.3703670501708984, "rewards/margins": 1.6795284748077393, "rewards/rejected": -3.0498955249786377, "step": 7378 }, { "epoch": 0.86, "learning_rate": 4.3120349592535725e-08, "logits/chosen": -2.304551124572754, "logits/rejected": -2.578907012939453, "logps/chosen": -310.7557067871094, "logps/rejected": -248.43692016601562, "loss": 0.3673, "rewards/accuracies": 0.75, "rewards/chosen": -1.4836699962615967, "rewards/margins": 1.8275984525680542, "rewards/rejected": -3.3112685680389404, "step": 7379 }, { "epoch": 0.86, "learning_rate": 4.308491791661746e-08, "logits/chosen": -2.6922130584716797, "logits/rejected": -2.6473400592803955, "logps/chosen": -257.68267822265625, "logps/rejected": -368.7127990722656, "loss": 0.4121, "rewards/accuracies": 0.75, "rewards/chosen": -0.6306822896003723, "rewards/margins": 4.126639366149902, "rewards/rejected": -4.757321834564209, "step": 7380 }, { "epoch": 0.86, "learning_rate": 4.304948624069918e-08, "logits/chosen": -2.354098320007324, "logits/rejected": -2.554706573486328, "logps/chosen": -192.09414672851562, "logps/rejected": -282.3305358886719, "loss": 0.4369, "rewards/accuracies": 0.625, "rewards/chosen": -1.2429633140563965, "rewards/margins": 3.739074945449829, "rewards/rejected": -4.982038497924805, "step": 7381 }, { "epoch": 0.86, "learning_rate": 4.301405456478091e-08, "logits/chosen": -1.6650643348693848, "logits/rejected": -1.769336462020874, "logps/chosen": -279.493408203125, "logps/rejected": -268.69842529296875, "loss": 0.3475, "rewards/accuracies": 0.75, "rewards/chosen": -0.5399657487869263, "rewards/margins": 2.4125709533691406, "rewards/rejected": -2.9525370597839355, "step": 7382 }, { "epoch": 0.86, "learning_rate": 4.2978622888862646e-08, "logits/chosen": -2.931657075881958, "logits/rejected": -2.8996098041534424, "logps/chosen": -239.61407470703125, "logps/rejected": -352.2484130859375, "loss": 0.7259, "rewards/accuracies": 0.75, "rewards/chosen": -1.4589930772781372, "rewards/margins": 1.556034803390503, "rewards/rejected": -3.0150279998779297, "step": 7383 }, { "epoch": 0.86, "learning_rate": 4.294319121294437e-08, "logits/chosen": -2.546023368835449, "logits/rejected": -2.750946521759033, "logps/chosen": -433.675537109375, "logps/rejected": -293.0964050292969, "loss": 0.4032, "rewards/accuracies": 0.875, "rewards/chosen": -0.7835550308227539, "rewards/margins": 1.8370479345321655, "rewards/rejected": -2.620603084564209, "step": 7384 }, { "epoch": 0.86, "learning_rate": 4.29077595370261e-08, "logits/chosen": -2.4985239505767822, "logits/rejected": -2.5038416385650635, "logps/chosen": -300.75, "logps/rejected": -313.0320129394531, "loss": 0.3923, "rewards/accuracies": 0.875, "rewards/chosen": -1.234737753868103, "rewards/margins": 1.8056812286376953, "rewards/rejected": -3.0404186248779297, "step": 7385 }, { "epoch": 0.86, "learning_rate": 4.287232786110783e-08, "logits/chosen": -2.685487747192383, "logits/rejected": -2.625850200653076, "logps/chosen": -191.33912658691406, "logps/rejected": -215.94918823242188, "loss": 0.1168, "rewards/accuracies": 1.0, "rewards/chosen": -0.698525071144104, "rewards/margins": 2.3235015869140625, "rewards/rejected": -3.022026538848877, "step": 7386 }, { "epoch": 0.86, "learning_rate": 4.283689618518956e-08, "logits/chosen": -2.461545467376709, "logits/rejected": -2.7273592948913574, "logps/chosen": -293.3512268066406, "logps/rejected": -207.5708465576172, "loss": 0.5294, "rewards/accuracies": 0.625, "rewards/chosen": -0.9508684873580933, "rewards/margins": 1.2214903831481934, "rewards/rejected": -2.172358989715576, "step": 7387 }, { "epoch": 0.86, "learning_rate": 4.280146450927128e-08, "logits/chosen": -2.522770404815674, "logits/rejected": -2.8090028762817383, "logps/chosen": -428.70001220703125, "logps/rejected": -192.25653076171875, "loss": 0.477, "rewards/accuracies": 0.875, "rewards/chosen": -0.8552604913711548, "rewards/margins": 1.2574939727783203, "rewards/rejected": -2.1127543449401855, "step": 7388 }, { "epoch": 0.86, "learning_rate": 4.276603283335302e-08, "logits/chosen": -2.806605815887451, "logits/rejected": -2.7857701778411865, "logps/chosen": -157.57241821289062, "logps/rejected": -265.8868408203125, "loss": 0.2303, "rewards/accuracies": 1.0, "rewards/chosen": -0.9828293323516846, "rewards/margins": 1.7199809551239014, "rewards/rejected": -2.702810287475586, "step": 7389 }, { "epoch": 0.86, "learning_rate": 4.273060115743475e-08, "logits/chosen": -2.4018545150756836, "logits/rejected": -2.34497332572937, "logps/chosen": -256.3289489746094, "logps/rejected": -242.50669860839844, "loss": 0.4223, "rewards/accuracies": 0.75, "rewards/chosen": -0.6920467019081116, "rewards/margins": 1.7317196130752563, "rewards/rejected": -2.4237663745880127, "step": 7390 }, { "epoch": 0.86, "learning_rate": 4.269516948151647e-08, "logits/chosen": -2.208813190460205, "logits/rejected": -2.35867977142334, "logps/chosen": -454.2183532714844, "logps/rejected": -380.28094482421875, "loss": 0.4718, "rewards/accuracies": 0.625, "rewards/chosen": -1.8198089599609375, "rewards/margins": 1.8115113973617554, "rewards/rejected": -3.6313204765319824, "step": 7391 }, { "epoch": 0.86, "learning_rate": 4.2659737805598205e-08, "logits/chosen": -2.3672101497650146, "logits/rejected": -2.246225595474243, "logps/chosen": -310.89276123046875, "logps/rejected": -323.53106689453125, "loss": 0.2991, "rewards/accuracies": 0.875, "rewards/chosen": -0.6137959361076355, "rewards/margins": 1.4820036888122559, "rewards/rejected": -2.095799684524536, "step": 7392 }, { "epoch": 0.86, "learning_rate": 4.2624306129679934e-08, "logits/chosen": -2.124868869781494, "logits/rejected": -2.3782637119293213, "logps/chosen": -381.2189025878906, "logps/rejected": -233.09320068359375, "loss": 0.3013, "rewards/accuracies": 0.875, "rewards/chosen": -0.4157032072544098, "rewards/margins": 1.3839783668518066, "rewards/rejected": -1.799681544303894, "step": 7393 }, { "epoch": 0.86, "learning_rate": 4.2588874453761656e-08, "logits/chosen": -1.9648553133010864, "logits/rejected": -1.8516488075256348, "logps/chosen": -217.54640197753906, "logps/rejected": -269.01708984375, "loss": 0.4474, "rewards/accuracies": 0.625, "rewards/chosen": -0.46976763010025024, "rewards/margins": 1.8575575351715088, "rewards/rejected": -2.3273251056671143, "step": 7394 }, { "epoch": 0.86, "learning_rate": 4.255344277784339e-08, "logits/chosen": -2.5727288722991943, "logits/rejected": -2.2998297214508057, "logps/chosen": -634.0182495117188, "logps/rejected": -318.913330078125, "loss": 0.7269, "rewards/accuracies": 0.625, "rewards/chosen": -1.8111302852630615, "rewards/margins": 1.3398747444152832, "rewards/rejected": -3.1510050296783447, "step": 7395 }, { "epoch": 0.86, "learning_rate": 4.251801110192512e-08, "logits/chosen": -2.254666328430176, "logits/rejected": -2.427865743637085, "logps/chosen": -382.54217529296875, "logps/rejected": -413.5176696777344, "loss": 0.4112, "rewards/accuracies": 0.75, "rewards/chosen": -0.1472088247537613, "rewards/margins": 2.9594085216522217, "rewards/rejected": -3.1066174507141113, "step": 7396 }, { "epoch": 0.86, "learning_rate": 4.248257942600685e-08, "logits/chosen": -2.7846522331237793, "logits/rejected": -2.6029982566833496, "logps/chosen": -128.48672485351562, "logps/rejected": -262.2070007324219, "loss": 0.0662, "rewards/accuracies": 1.0, "rewards/chosen": -0.903617799282074, "rewards/margins": 4.666691303253174, "rewards/rejected": -5.570309162139893, "step": 7397 }, { "epoch": 0.86, "learning_rate": 4.244714775008858e-08, "logits/chosen": -2.501453161239624, "logits/rejected": -2.4899227619171143, "logps/chosen": -329.1939697265625, "logps/rejected": -219.50958251953125, "loss": 0.2049, "rewards/accuracies": 0.875, "rewards/chosen": -1.044801950454712, "rewards/margins": 3.2693843841552734, "rewards/rejected": -4.314186096191406, "step": 7398 }, { "epoch": 0.86, "learning_rate": 4.2411716074170306e-08, "logits/chosen": -2.5210464000701904, "logits/rejected": -2.7877557277679443, "logps/chosen": -246.3433837890625, "logps/rejected": -250.97911071777344, "loss": 0.1806, "rewards/accuracies": 0.875, "rewards/chosen": -0.49204790592193604, "rewards/margins": 3.2195796966552734, "rewards/rejected": -3.711627721786499, "step": 7399 }, { "epoch": 0.86, "learning_rate": 4.2376284398252035e-08, "logits/chosen": -2.650407552719116, "logits/rejected": -2.4828267097473145, "logps/chosen": -294.6566162109375, "logps/rejected": -377.3023376464844, "loss": 0.1316, "rewards/accuracies": 1.0, "rewards/chosen": -0.5528160929679871, "rewards/margins": 3.740328550338745, "rewards/rejected": -4.293144702911377, "step": 7400 }, { "epoch": 0.86, "learning_rate": 4.234085272233377e-08, "logits/chosen": -2.2268126010894775, "logits/rejected": -2.099517345428467, "logps/chosen": -432.6499328613281, "logps/rejected": -392.90692138671875, "loss": 0.2718, "rewards/accuracies": 0.875, "rewards/chosen": -1.2720386981964111, "rewards/margins": 2.1240715980529785, "rewards/rejected": -3.3961100578308105, "step": 7401 }, { "epoch": 0.86, "learning_rate": 4.230542104641549e-08, "logits/chosen": -2.3600640296936035, "logits/rejected": -2.3782622814178467, "logps/chosen": -236.00759887695312, "logps/rejected": -184.910400390625, "loss": 0.3231, "rewards/accuracies": 0.875, "rewards/chosen": -0.5416573882102966, "rewards/margins": 1.570248007774353, "rewards/rejected": -2.111905336380005, "step": 7402 }, { "epoch": 0.86, "learning_rate": 4.226998937049722e-08, "logits/chosen": -2.7298436164855957, "logits/rejected": -2.5093538761138916, "logps/chosen": -283.2736511230469, "logps/rejected": -431.697509765625, "loss": 0.1879, "rewards/accuracies": 0.875, "rewards/chosen": -0.4580395817756653, "rewards/margins": 3.1673338413238525, "rewards/rejected": -3.625373601913452, "step": 7403 }, { "epoch": 0.86, "learning_rate": 4.2234557694578956e-08, "logits/chosen": -2.519705295562744, "logits/rejected": -2.308548927307129, "logps/chosen": -239.23609924316406, "logps/rejected": -305.8492736816406, "loss": 0.3443, "rewards/accuracies": 0.75, "rewards/chosen": -0.6551697850227356, "rewards/margins": 1.9842864274978638, "rewards/rejected": -2.639456272125244, "step": 7404 }, { "epoch": 0.86, "learning_rate": 4.219912601866068e-08, "logits/chosen": -2.256939649581909, "logits/rejected": -2.2368788719177246, "logps/chosen": -480.8223876953125, "logps/rejected": -411.6327819824219, "loss": 0.1857, "rewards/accuracies": 1.0, "rewards/chosen": -0.7915116548538208, "rewards/margins": 2.8935739994049072, "rewards/rejected": -3.6850857734680176, "step": 7405 }, { "epoch": 0.86, "learning_rate": 4.216369434274241e-08, "logits/chosen": -2.414038896560669, "logits/rejected": -2.380126714706421, "logps/chosen": -192.4914093017578, "logps/rejected": -223.1443328857422, "loss": 0.313, "rewards/accuracies": 0.875, "rewards/chosen": -0.6677817106246948, "rewards/margins": 2.699429988861084, "rewards/rejected": -3.3672118186950684, "step": 7406 }, { "epoch": 0.86, "learning_rate": 4.212826266682414e-08, "logits/chosen": -2.2251546382904053, "logits/rejected": -1.926259994506836, "logps/chosen": -162.0848388671875, "logps/rejected": -258.6600036621094, "loss": 0.4616, "rewards/accuracies": 0.5, "rewards/chosen": -0.5949878096580505, "rewards/margins": 1.5750833749771118, "rewards/rejected": -2.1700711250305176, "step": 7407 }, { "epoch": 0.86, "learning_rate": 4.2092830990905865e-08, "logits/chosen": -2.2294363975524902, "logits/rejected": -2.3565242290496826, "logps/chosen": -283.98040771484375, "logps/rejected": -242.69631958007812, "loss": 0.3315, "rewards/accuracies": 0.75, "rewards/chosen": -0.5434924960136414, "rewards/margins": 2.5101566314697266, "rewards/rejected": -3.053649425506592, "step": 7408 }, { "epoch": 0.86, "learning_rate": 4.2057399314987593e-08, "logits/chosen": -2.0891835689544678, "logits/rejected": -2.0022828578948975, "logps/chosen": -181.45394897460938, "logps/rejected": -255.66970825195312, "loss": 0.8165, "rewards/accuracies": 0.75, "rewards/chosen": -0.9042434692382812, "rewards/margins": 3.483339309692383, "rewards/rejected": -4.387583255767822, "step": 7409 }, { "epoch": 0.86, "learning_rate": 4.202196763906933e-08, "logits/chosen": -2.2725718021392822, "logits/rejected": -2.4005653858184814, "logps/chosen": -424.0126953125, "logps/rejected": -235.87477111816406, "loss": 0.3122, "rewards/accuracies": 0.875, "rewards/chosen": -0.45461320877075195, "rewards/margins": 1.5001500844955444, "rewards/rejected": -1.954763412475586, "step": 7410 }, { "epoch": 0.86, "learning_rate": 4.198653596315105e-08, "logits/chosen": -2.4087111949920654, "logits/rejected": -2.3705334663391113, "logps/chosen": -220.966064453125, "logps/rejected": -242.06295776367188, "loss": 0.4211, "rewards/accuracies": 0.875, "rewards/chosen": -0.3674432635307312, "rewards/margins": 2.2516703605651855, "rewards/rejected": -2.6191139221191406, "step": 7411 }, { "epoch": 0.86, "learning_rate": 4.195110428723278e-08, "logits/chosen": -2.6623263359069824, "logits/rejected": -2.8734216690063477, "logps/chosen": -348.59185791015625, "logps/rejected": -299.2456970214844, "loss": 0.3761, "rewards/accuracies": 0.875, "rewards/chosen": -0.8946435451507568, "rewards/margins": 2.402216911315918, "rewards/rejected": -3.296860694885254, "step": 7412 }, { "epoch": 0.86, "learning_rate": 4.1915672611314515e-08, "logits/chosen": -2.646946668624878, "logits/rejected": -2.519709348678589, "logps/chosen": -197.58558654785156, "logps/rejected": -237.58331298828125, "loss": 0.4294, "rewards/accuracies": 0.75, "rewards/chosen": -1.8241658210754395, "rewards/margins": 1.91841721534729, "rewards/rejected": -3.7425830364227295, "step": 7413 }, { "epoch": 0.86, "learning_rate": 4.1880240935396244e-08, "logits/chosen": -2.493227005004883, "logits/rejected": -2.519759178161621, "logps/chosen": -253.68472290039062, "logps/rejected": -211.34213256835938, "loss": 0.3613, "rewards/accuracies": 0.625, "rewards/chosen": -1.036268711090088, "rewards/margins": 2.2553019523620605, "rewards/rejected": -3.2915706634521484, "step": 7414 }, { "epoch": 0.86, "learning_rate": 4.184480925947797e-08, "logits/chosen": -2.224849224090576, "logits/rejected": -2.3085567951202393, "logps/chosen": -401.8448181152344, "logps/rejected": -241.9927978515625, "loss": 0.6435, "rewards/accuracies": 0.625, "rewards/chosen": -1.6740058660507202, "rewards/margins": 0.5749037861824036, "rewards/rejected": -2.2489094734191895, "step": 7415 }, { "epoch": 0.86, "learning_rate": 4.18093775835597e-08, "logits/chosen": -2.768008232116699, "logits/rejected": -2.6141791343688965, "logps/chosen": -285.63409423828125, "logps/rejected": -297.1236267089844, "loss": 0.1267, "rewards/accuracies": 1.0, "rewards/chosen": -0.3284810185432434, "rewards/margins": 2.3039073944091797, "rewards/rejected": -2.632388114929199, "step": 7416 }, { "epoch": 0.86, "learning_rate": 4.177394590764143e-08, "logits/chosen": -2.1684062480926514, "logits/rejected": -2.398832082748413, "logps/chosen": -279.0743408203125, "logps/rejected": -250.26022338867188, "loss": 0.3322, "rewards/accuracies": 0.875, "rewards/chosen": -1.5370237827301025, "rewards/margins": 1.8731234073638916, "rewards/rejected": -3.410147190093994, "step": 7417 }, { "epoch": 0.86, "learning_rate": 4.1738514231723165e-08, "logits/chosen": -2.397645950317383, "logits/rejected": -2.6282565593719482, "logps/chosen": -388.2731018066406, "logps/rejected": -303.35137939453125, "loss": 0.3917, "rewards/accuracies": 0.75, "rewards/chosen": -0.6384521722793579, "rewards/margins": 1.7636640071868896, "rewards/rejected": -2.402116298675537, "step": 7418 }, { "epoch": 0.86, "learning_rate": 4.170308255580489e-08, "logits/chosen": -2.3994252681732178, "logits/rejected": -2.2704200744628906, "logps/chosen": -389.8521728515625, "logps/rejected": -470.2279052734375, "loss": 0.2325, "rewards/accuracies": 0.875, "rewards/chosen": -1.5709764957427979, "rewards/margins": 2.6057043075561523, "rewards/rejected": -4.176681041717529, "step": 7419 }, { "epoch": 0.86, "learning_rate": 4.1667650879886616e-08, "logits/chosen": -2.4409396648406982, "logits/rejected": -2.5146567821502686, "logps/chosen": -457.4732971191406, "logps/rejected": -453.3367614746094, "loss": 0.1461, "rewards/accuracies": 1.0, "rewards/chosen": -0.4311331808567047, "rewards/margins": 2.905954360961914, "rewards/rejected": -3.337087631225586, "step": 7420 }, { "epoch": 0.86, "learning_rate": 4.163221920396835e-08, "logits/chosen": -2.0087342262268066, "logits/rejected": -2.2692270278930664, "logps/chosen": -269.1477355957031, "logps/rejected": -286.487060546875, "loss": 0.2986, "rewards/accuracies": 0.875, "rewards/chosen": -0.37997350096702576, "rewards/margins": 2.428813934326172, "rewards/rejected": -2.8087873458862305, "step": 7421 }, { "epoch": 0.86, "learning_rate": 4.1596787528050074e-08, "logits/chosen": -2.09857177734375, "logits/rejected": -2.303708553314209, "logps/chosen": -234.509765625, "logps/rejected": -210.10980224609375, "loss": 0.27, "rewards/accuracies": 0.875, "rewards/chosen": -0.7102940082550049, "rewards/margins": 1.6775791645050049, "rewards/rejected": -2.3878731727600098, "step": 7422 }, { "epoch": 0.86, "learning_rate": 4.15613558521318e-08, "logits/chosen": -2.372453451156616, "logits/rejected": -2.395747661590576, "logps/chosen": -391.77069091796875, "logps/rejected": -271.1944580078125, "loss": 0.2836, "rewards/accuracies": 0.875, "rewards/chosen": -0.19812005758285522, "rewards/margins": 3.433659315109253, "rewards/rejected": -3.631779432296753, "step": 7423 }, { "epoch": 0.86, "learning_rate": 4.152592417621354e-08, "logits/chosen": -2.1211724281311035, "logits/rejected": -2.1210222244262695, "logps/chosen": -418.2323303222656, "logps/rejected": -191.0109100341797, "loss": 1.3796, "rewards/accuracies": 0.5, "rewards/chosen": -1.94867742061615, "rewards/margins": 0.22346678376197815, "rewards/rejected": -2.1721441745758057, "step": 7424 }, { "epoch": 0.86, "learning_rate": 4.149049250029526e-08, "logits/chosen": -2.2247109413146973, "logits/rejected": -2.513376235961914, "logps/chosen": -297.91082763671875, "logps/rejected": -340.728271484375, "loss": 0.6787, "rewards/accuracies": 0.75, "rewards/chosen": -1.2377592325210571, "rewards/margins": 1.4793713092803955, "rewards/rejected": -2.717130661010742, "step": 7425 }, { "epoch": 0.86, "learning_rate": 4.145506082437699e-08, "logits/chosen": -2.4015464782714844, "logits/rejected": -2.0406785011291504, "logps/chosen": -239.76141357421875, "logps/rejected": -416.66021728515625, "loss": 0.1569, "rewards/accuracies": 1.0, "rewards/chosen": -0.9676856994628906, "rewards/margins": 3.6570210456848145, "rewards/rejected": -4.624706745147705, "step": 7426 }, { "epoch": 0.86, "learning_rate": 4.1419629148458724e-08, "logits/chosen": -2.867131233215332, "logits/rejected": -2.766174077987671, "logps/chosen": -192.43174743652344, "logps/rejected": -236.23245239257812, "loss": 0.3242, "rewards/accuracies": 0.875, "rewards/chosen": -0.6829854846000671, "rewards/margins": 2.034066915512085, "rewards/rejected": -2.7170522212982178, "step": 7427 }, { "epoch": 0.86, "learning_rate": 4.138419747254045e-08, "logits/chosen": -2.6047725677490234, "logits/rejected": -2.6888692378997803, "logps/chosen": -317.57965087890625, "logps/rejected": -244.2706756591797, "loss": 0.1431, "rewards/accuracies": 1.0, "rewards/chosen": -0.48612481355667114, "rewards/margins": 3.283125638961792, "rewards/rejected": -3.7692503929138184, "step": 7428 }, { "epoch": 0.86, "learning_rate": 4.1348765796622175e-08, "logits/chosen": -2.479473114013672, "logits/rejected": -2.6523637771606445, "logps/chosen": -356.88067626953125, "logps/rejected": -258.7164306640625, "loss": 0.35, "rewards/accuracies": 0.875, "rewards/chosen": -0.726493239402771, "rewards/margins": 1.5704002380371094, "rewards/rejected": -2.29689359664917, "step": 7429 }, { "epoch": 0.86, "learning_rate": 4.131333412070391e-08, "logits/chosen": -2.513615131378174, "logits/rejected": -2.773898124694824, "logps/chosen": -343.3598937988281, "logps/rejected": -222.90814208984375, "loss": 0.6663, "rewards/accuracies": 0.625, "rewards/chosen": -0.5149953365325928, "rewards/margins": 1.195157527923584, "rewards/rejected": -1.7101528644561768, "step": 7430 }, { "epoch": 0.86, "learning_rate": 4.127790244478564e-08, "logits/chosen": -2.2825732231140137, "logits/rejected": -2.440701961517334, "logps/chosen": -279.2482604980469, "logps/rejected": -314.2821044921875, "loss": 0.1656, "rewards/accuracies": 0.875, "rewards/chosen": -0.6107611060142517, "rewards/margins": 2.887808084487915, "rewards/rejected": -3.4985692501068115, "step": 7431 }, { "epoch": 0.86, "learning_rate": 4.124247076886736e-08, "logits/chosen": -2.2256932258605957, "logits/rejected": -2.217245101928711, "logps/chosen": -259.23126220703125, "logps/rejected": -258.11553955078125, "loss": 0.5736, "rewards/accuracies": 0.75, "rewards/chosen": -1.1330019235610962, "rewards/margins": 1.0320656299591064, "rewards/rejected": -2.165067672729492, "step": 7432 }, { "epoch": 0.86, "learning_rate": 4.1207039092949096e-08, "logits/chosen": -2.2419238090515137, "logits/rejected": -2.0835747718811035, "logps/chosen": -242.33419799804688, "logps/rejected": -227.84742736816406, "loss": 0.317, "rewards/accuracies": 0.875, "rewards/chosen": -0.32714152336120605, "rewards/margins": 1.961592674255371, "rewards/rejected": -2.288734197616577, "step": 7433 }, { "epoch": 0.86, "learning_rate": 4.1171607417030825e-08, "logits/chosen": -2.481661081314087, "logits/rejected": -2.5921473503112793, "logps/chosen": -270.3702392578125, "logps/rejected": -215.79481506347656, "loss": 0.4485, "rewards/accuracies": 0.875, "rewards/chosen": -0.9665314555168152, "rewards/margins": 1.0891231298446655, "rewards/rejected": -2.055654525756836, "step": 7434 }, { "epoch": 0.86, "learning_rate": 4.113617574111255e-08, "logits/chosen": -2.785043954849243, "logits/rejected": -2.5490341186523438, "logps/chosen": -206.52810668945312, "logps/rejected": -326.9478759765625, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -0.5069221258163452, "rewards/margins": 3.893282890319824, "rewards/rejected": -4.400204658508301, "step": 7435 }, { "epoch": 0.87, "learning_rate": 4.110074406519428e-08, "logits/chosen": -2.2484395503997803, "logits/rejected": -2.2394204139709473, "logps/chosen": -255.63900756835938, "logps/rejected": -217.6568603515625, "loss": 0.3821, "rewards/accuracies": 0.625, "rewards/chosen": -0.5845859050750732, "rewards/margins": 1.6633158922195435, "rewards/rejected": -2.2479019165039062, "step": 7436 }, { "epoch": 0.87, "learning_rate": 4.106531238927601e-08, "logits/chosen": -2.6651151180267334, "logits/rejected": -2.5136616230010986, "logps/chosen": -331.2873229980469, "logps/rejected": -325.8858947753906, "loss": 0.3087, "rewards/accuracies": 0.75, "rewards/chosen": -1.8874939680099487, "rewards/margins": 1.9885061979293823, "rewards/rejected": -3.87600040435791, "step": 7437 }, { "epoch": 0.87, "learning_rate": 4.1029880713357734e-08, "logits/chosen": -2.2892918586730957, "logits/rejected": -2.489786148071289, "logps/chosen": -252.24765014648438, "logps/rejected": -188.4664764404297, "loss": 0.341, "rewards/accuracies": 0.875, "rewards/chosen": -1.5855600833892822, "rewards/margins": 2.245267391204834, "rewards/rejected": -3.830827236175537, "step": 7438 }, { "epoch": 0.87, "learning_rate": 4.099444903743947e-08, "logits/chosen": -2.988538980484009, "logits/rejected": -3.0016558170318604, "logps/chosen": -102.7610855102539, "logps/rejected": -118.61161804199219, "loss": 0.3392, "rewards/accuracies": 0.875, "rewards/chosen": -0.30525365471839905, "rewards/margins": 1.9465928077697754, "rewards/rejected": -2.2518465518951416, "step": 7439 }, { "epoch": 0.87, "learning_rate": 4.09590173615212e-08, "logits/chosen": -2.575385808944702, "logits/rejected": -2.343461751937866, "logps/chosen": -205.0209503173828, "logps/rejected": -247.54193115234375, "loss": 0.1798, "rewards/accuracies": 1.0, "rewards/chosen": -0.5323732495307922, "rewards/margins": 2.7233405113220215, "rewards/rejected": -3.255713701248169, "step": 7440 }, { "epoch": 0.87, "learning_rate": 4.0923585685602926e-08, "logits/chosen": -2.4557299613952637, "logits/rejected": -2.499826192855835, "logps/chosen": -297.71160888671875, "logps/rejected": -259.4007873535156, "loss": 0.3988, "rewards/accuracies": 0.75, "rewards/chosen": -1.394883632659912, "rewards/margins": 1.990040898323059, "rewards/rejected": -3.3849244117736816, "step": 7441 }, { "epoch": 0.87, "learning_rate": 4.0888154009684655e-08, "logits/chosen": -1.9132922887802124, "logits/rejected": -2.0855712890625, "logps/chosen": -337.0099182128906, "logps/rejected": -223.9114532470703, "loss": 0.6675, "rewards/accuracies": 0.75, "rewards/chosen": -1.4333034753799438, "rewards/margins": 1.4482204914093018, "rewards/rejected": -2.881524085998535, "step": 7442 }, { "epoch": 0.87, "learning_rate": 4.0852722333766384e-08, "logits/chosen": -2.5627241134643555, "logits/rejected": -2.5413014888763428, "logps/chosen": -179.08676147460938, "logps/rejected": -232.53305053710938, "loss": 0.2979, "rewards/accuracies": 0.75, "rewards/chosen": -0.9355536103248596, "rewards/margins": 3.214989423751831, "rewards/rejected": -4.150543212890625, "step": 7443 }, { "epoch": 0.87, "learning_rate": 4.081729065784811e-08, "logits/chosen": -2.332305431365967, "logits/rejected": -2.422426700592041, "logps/chosen": -259.3702392578125, "logps/rejected": -222.8070068359375, "loss": 0.5868, "rewards/accuracies": 0.875, "rewards/chosen": -1.332239031791687, "rewards/margins": 0.5492417812347412, "rewards/rejected": -1.8814808130264282, "step": 7444 }, { "epoch": 0.87, "learning_rate": 4.078185898192985e-08, "logits/chosen": -2.1690094470977783, "logits/rejected": -2.0823795795440674, "logps/chosen": -240.04525756835938, "logps/rejected": -392.4588317871094, "loss": 0.1616, "rewards/accuracies": 0.875, "rewards/chosen": -0.7538583278656006, "rewards/margins": 3.3708839416503906, "rewards/rejected": -4.12474250793457, "step": 7445 }, { "epoch": 0.87, "learning_rate": 4.074642730601157e-08, "logits/chosen": -2.7771639823913574, "logits/rejected": -2.7408947944641113, "logps/chosen": -180.1182861328125, "logps/rejected": -241.030517578125, "loss": 0.5234, "rewards/accuracies": 0.75, "rewards/chosen": -0.6159970164299011, "rewards/margins": 2.0382094383239746, "rewards/rejected": -2.6542065143585205, "step": 7446 }, { "epoch": 0.87, "learning_rate": 4.07109956300933e-08, "logits/chosen": -2.605668067932129, "logits/rejected": -2.653718948364258, "logps/chosen": -108.45834350585938, "logps/rejected": -187.49371337890625, "loss": 0.2094, "rewards/accuracies": 1.0, "rewards/chosen": 0.08610232174396515, "rewards/margins": 2.9188454151153564, "rewards/rejected": -2.8327431678771973, "step": 7447 }, { "epoch": 0.87, "learning_rate": 4.0675563954175034e-08, "logits/chosen": -2.494749069213867, "logits/rejected": -2.637892723083496, "logps/chosen": -302.77001953125, "logps/rejected": -230.0926513671875, "loss": 0.4721, "rewards/accuracies": 0.75, "rewards/chosen": -0.8562884330749512, "rewards/margins": 1.5954973697662354, "rewards/rejected": -2.4517858028411865, "step": 7448 }, { "epoch": 0.87, "learning_rate": 4.0640132278256756e-08, "logits/chosen": -2.6143791675567627, "logits/rejected": -2.767848014831543, "logps/chosen": -180.4899444580078, "logps/rejected": -351.9959411621094, "loss": 0.7745, "rewards/accuracies": 0.75, "rewards/chosen": -1.8119560480117798, "rewards/margins": 2.1861751079559326, "rewards/rejected": -3.998131036758423, "step": 7449 }, { "epoch": 0.87, "learning_rate": 4.0604700602338485e-08, "logits/chosen": -1.9816347360610962, "logits/rejected": -2.039487600326538, "logps/chosen": -176.08734130859375, "logps/rejected": -205.96249389648438, "loss": 0.5977, "rewards/accuracies": 0.75, "rewards/chosen": -0.5721927881240845, "rewards/margins": 0.626162052154541, "rewards/rejected": -1.1983548402786255, "step": 7450 }, { "epoch": 0.87, "learning_rate": 4.056926892642022e-08, "logits/chosen": -2.4256186485290527, "logits/rejected": -2.2852325439453125, "logps/chosen": -384.98583984375, "logps/rejected": -359.12774658203125, "loss": 0.7605, "rewards/accuracies": 0.625, "rewards/chosen": -1.3783447742462158, "rewards/margins": 1.1083929538726807, "rewards/rejected": -2.4867374897003174, "step": 7451 }, { "epoch": 0.87, "learning_rate": 4.053383725050194e-08, "logits/chosen": -2.4501595497131348, "logits/rejected": -2.458890914916992, "logps/chosen": -318.7148132324219, "logps/rejected": -303.5361022949219, "loss": 0.2608, "rewards/accuracies": 1.0, "rewards/chosen": 0.1532467007637024, "rewards/margins": 2.1351633071899414, "rewards/rejected": -1.981916904449463, "step": 7452 }, { "epoch": 0.87, "learning_rate": 4.049840557458368e-08, "logits/chosen": -2.3681423664093018, "logits/rejected": -2.525272846221924, "logps/chosen": -280.0669860839844, "logps/rejected": -328.72100830078125, "loss": 0.8727, "rewards/accuracies": 0.625, "rewards/chosen": -1.0631685256958008, "rewards/margins": 2.181605339050293, "rewards/rejected": -3.2447738647460938, "step": 7453 }, { "epoch": 0.87, "learning_rate": 4.0462973898665407e-08, "logits/chosen": -2.556973457336426, "logits/rejected": -2.326115131378174, "logps/chosen": -194.3543243408203, "logps/rejected": -228.249267578125, "loss": 0.3991, "rewards/accuracies": 0.75, "rewards/chosen": -0.557931125164032, "rewards/margins": 1.0232553482055664, "rewards/rejected": -1.5811865329742432, "step": 7454 }, { "epoch": 0.87, "learning_rate": 4.0427542222747135e-08, "logits/chosen": -2.96331524848938, "logits/rejected": -2.9399170875549316, "logps/chosen": -224.3788604736328, "logps/rejected": -241.95213317871094, "loss": 0.489, "rewards/accuracies": 0.75, "rewards/chosen": -0.9193310737609863, "rewards/margins": 1.6471266746520996, "rewards/rejected": -2.566457748413086, "step": 7455 }, { "epoch": 0.87, "learning_rate": 4.0392110546828864e-08, "logits/chosen": -2.2597365379333496, "logits/rejected": -2.4630632400512695, "logps/chosen": -350.9752197265625, "logps/rejected": -269.10638427734375, "loss": 0.8947, "rewards/accuracies": 0.5, "rewards/chosen": -1.2796251773834229, "rewards/margins": 0.23490393161773682, "rewards/rejected": -1.5145289897918701, "step": 7456 }, { "epoch": 0.87, "learning_rate": 4.035667887091059e-08, "logits/chosen": -2.0981454849243164, "logits/rejected": -2.082630157470703, "logps/chosen": -276.1609802246094, "logps/rejected": -361.2107238769531, "loss": 0.6434, "rewards/accuracies": 0.625, "rewards/chosen": -0.9780843257904053, "rewards/margins": 1.9016947746276855, "rewards/rejected": -2.87977933883667, "step": 7457 }, { "epoch": 0.87, "learning_rate": 4.032124719499232e-08, "logits/chosen": -2.6170413494110107, "logits/rejected": -2.6239166259765625, "logps/chosen": -246.8736114501953, "logps/rejected": -205.16737365722656, "loss": 0.3147, "rewards/accuracies": 0.75, "rewards/chosen": -0.3066801428794861, "rewards/margins": 2.096379518508911, "rewards/rejected": -2.403059720993042, "step": 7458 }, { "epoch": 0.87, "learning_rate": 4.028581551907406e-08, "logits/chosen": -1.5424563884735107, "logits/rejected": -1.8053616285324097, "logps/chosen": -390.86456298828125, "logps/rejected": -276.9825134277344, "loss": 0.5768, "rewards/accuracies": 0.625, "rewards/chosen": -0.375307559967041, "rewards/margins": 0.49594545364379883, "rewards/rejected": -0.8712530136108398, "step": 7459 }, { "epoch": 0.87, "learning_rate": 4.025038384315578e-08, "logits/chosen": -2.8637747764587402, "logits/rejected": -2.323227643966675, "logps/chosen": -320.23779296875, "logps/rejected": -496.8822937011719, "loss": 0.335, "rewards/accuracies": 0.875, "rewards/chosen": -1.3381191492080688, "rewards/margins": 2.290496349334717, "rewards/rejected": -3.628615617752075, "step": 7460 }, { "epoch": 0.87, "learning_rate": 4.021495216723751e-08, "logits/chosen": -1.740652084350586, "logits/rejected": -2.071352958679199, "logps/chosen": -308.84796142578125, "logps/rejected": -310.53656005859375, "loss": 0.4248, "rewards/accuracies": 0.875, "rewards/chosen": -0.891151487827301, "rewards/margins": 1.589906930923462, "rewards/rejected": -2.481058359146118, "step": 7461 }, { "epoch": 0.87, "learning_rate": 4.017952049131924e-08, "logits/chosen": -2.5051496028900146, "logits/rejected": -2.1887893676757812, "logps/chosen": -95.53691101074219, "logps/rejected": -263.22601318359375, "loss": 0.1041, "rewards/accuracies": 1.0, "rewards/chosen": -0.028713539242744446, "rewards/margins": 3.911935806274414, "rewards/rejected": -3.9406492710113525, "step": 7462 }, { "epoch": 0.87, "learning_rate": 4.0144088815400965e-08, "logits/chosen": -2.415891170501709, "logits/rejected": -2.3629393577575684, "logps/chosen": -139.18809509277344, "logps/rejected": -211.275146484375, "loss": 0.4573, "rewards/accuracies": 0.625, "rewards/chosen": -0.7354158163070679, "rewards/margins": 2.1032328605651855, "rewards/rejected": -2.838648796081543, "step": 7463 }, { "epoch": 0.87, "learning_rate": 4.0108657139482694e-08, "logits/chosen": -2.936413288116455, "logits/rejected": -2.853189706802368, "logps/chosen": -225.50418090820312, "logps/rejected": -270.8065185546875, "loss": 0.5244, "rewards/accuracies": 0.5, "rewards/chosen": -0.6243809461593628, "rewards/margins": 2.3480803966522217, "rewards/rejected": -2.972461223602295, "step": 7464 }, { "epoch": 0.87, "learning_rate": 4.007322546356443e-08, "logits/chosen": -2.2530019283294678, "logits/rejected": -2.1273109912872314, "logps/chosen": -317.8463134765625, "logps/rejected": -385.5718994140625, "loss": 0.2454, "rewards/accuracies": 0.875, "rewards/chosen": -0.08119721710681915, "rewards/margins": 2.7913320064544678, "rewards/rejected": -2.8725290298461914, "step": 7465 }, { "epoch": 0.87, "learning_rate": 4.003779378764615e-08, "logits/chosen": -2.287867546081543, "logits/rejected": -2.304603338241577, "logps/chosen": -407.41033935546875, "logps/rejected": -308.77142333984375, "loss": 0.5155, "rewards/accuracies": 0.75, "rewards/chosen": -0.9147146940231323, "rewards/margins": 2.568711757659912, "rewards/rejected": -3.483426570892334, "step": 7466 }, { "epoch": 0.87, "learning_rate": 4.000236211172788e-08, "logits/chosen": -2.7455592155456543, "logits/rejected": -2.8652217388153076, "logps/chosen": -320.1406555175781, "logps/rejected": -308.87091064453125, "loss": 0.2871, "rewards/accuracies": 0.75, "rewards/chosen": -1.0852218866348267, "rewards/margins": 1.8617819547653198, "rewards/rejected": -2.9470038414001465, "step": 7467 }, { "epoch": 0.87, "learning_rate": 3.9966930435809616e-08, "logits/chosen": -2.32810640335083, "logits/rejected": -2.231466293334961, "logps/chosen": -223.48406982421875, "logps/rejected": -231.22523498535156, "loss": 0.2428, "rewards/accuracies": 0.875, "rewards/chosen": 0.2737163305282593, "rewards/margins": 2.5546059608459473, "rewards/rejected": -2.2808897495269775, "step": 7468 }, { "epoch": 0.87, "learning_rate": 3.9931498759891344e-08, "logits/chosen": -2.125253915786743, "logits/rejected": -2.104539394378662, "logps/chosen": -244.7877655029297, "logps/rejected": -296.8888854980469, "loss": 0.3128, "rewards/accuracies": 0.875, "rewards/chosen": -0.07826540619134903, "rewards/margins": 2.2179198265075684, "rewards/rejected": -2.29618501663208, "step": 7469 }, { "epoch": 0.87, "learning_rate": 3.9896067083973067e-08, "logits/chosen": -2.8640546798706055, "logits/rejected": -2.7171785831451416, "logps/chosen": -224.15684509277344, "logps/rejected": -163.0520782470703, "loss": 0.8724, "rewards/accuracies": 0.875, "rewards/chosen": -1.7173874378204346, "rewards/margins": 0.7800913453102112, "rewards/rejected": -2.497478723526001, "step": 7470 }, { "epoch": 0.87, "learning_rate": 3.98606354080548e-08, "logits/chosen": -1.9381287097930908, "logits/rejected": -1.7445000410079956, "logps/chosen": -239.2338409423828, "logps/rejected": -344.2511291503906, "loss": 0.163, "rewards/accuracies": 1.0, "rewards/chosen": -0.6621671915054321, "rewards/margins": 3.6297380924224854, "rewards/rejected": -4.291904926300049, "step": 7471 }, { "epoch": 0.87, "learning_rate": 3.982520373213653e-08, "logits/chosen": -1.789376139640808, "logits/rejected": -2.160247802734375, "logps/chosen": -219.6719512939453, "logps/rejected": -203.71624755859375, "loss": 0.4051, "rewards/accuracies": 0.75, "rewards/chosen": -0.8259466290473938, "rewards/margins": 1.8520512580871582, "rewards/rejected": -2.6779978275299072, "step": 7472 }, { "epoch": 0.87, "learning_rate": 3.978977205621825e-08, "logits/chosen": -2.5186033248901367, "logits/rejected": -2.097498893737793, "logps/chosen": -227.91026306152344, "logps/rejected": -407.81878662109375, "loss": 0.4157, "rewards/accuracies": 0.75, "rewards/chosen": -1.2582558393478394, "rewards/margins": 2.272338390350342, "rewards/rejected": -3.5305943489074707, "step": 7473 }, { "epoch": 0.87, "learning_rate": 3.975434038029999e-08, "logits/chosen": -2.7007932662963867, "logits/rejected": -2.828233242034912, "logps/chosen": -253.18551635742188, "logps/rejected": -275.609375, "loss": 0.6157, "rewards/accuracies": 0.75, "rewards/chosen": -2.1898021697998047, "rewards/margins": 3.3642122745513916, "rewards/rejected": -5.554014682769775, "step": 7474 }, { "epoch": 0.87, "learning_rate": 3.971890870438172e-08, "logits/chosen": -2.5894033908843994, "logits/rejected": -2.453242063522339, "logps/chosen": -195.01309204101562, "logps/rejected": -212.51077270507812, "loss": 0.152, "rewards/accuracies": 0.875, "rewards/chosen": -0.25076568126678467, "rewards/margins": 3.4053378105163574, "rewards/rejected": -3.6561033725738525, "step": 7475 }, { "epoch": 0.87, "learning_rate": 3.968347702846344e-08, "logits/chosen": -2.5519039630889893, "logits/rejected": -2.709306478500366, "logps/chosen": -198.63784790039062, "logps/rejected": -184.9821319580078, "loss": 0.2355, "rewards/accuracies": 0.875, "rewards/chosen": -0.2773759961128235, "rewards/margins": 2.7246203422546387, "rewards/rejected": -3.0019962787628174, "step": 7476 }, { "epoch": 0.87, "learning_rate": 3.9648045352545174e-08, "logits/chosen": -2.318545341491699, "logits/rejected": -2.2535712718963623, "logps/chosen": -350.3158874511719, "logps/rejected": -280.2073974609375, "loss": 0.3428, "rewards/accuracies": 0.875, "rewards/chosen": -0.7480247616767883, "rewards/margins": 1.790666103363037, "rewards/rejected": -2.5386910438537598, "step": 7477 }, { "epoch": 0.87, "learning_rate": 3.96126136766269e-08, "logits/chosen": -2.9069015979766846, "logits/rejected": -2.9593658447265625, "logps/chosen": -151.27932739257812, "logps/rejected": -201.32086181640625, "loss": 0.1605, "rewards/accuracies": 1.0, "rewards/chosen": -0.6439796090126038, "rewards/margins": 3.0447351932525635, "rewards/rejected": -3.6887147426605225, "step": 7478 }, { "epoch": 0.87, "learning_rate": 3.9577182000708625e-08, "logits/chosen": -2.4626615047454834, "logits/rejected": -2.563568115234375, "logps/chosen": -311.5268859863281, "logps/rejected": -332.6734619140625, "loss": 0.3943, "rewards/accuracies": 0.625, "rewards/chosen": -0.3352310359477997, "rewards/margins": 1.796903133392334, "rewards/rejected": -2.132134199142456, "step": 7479 }, { "epoch": 0.87, "learning_rate": 3.954175032479036e-08, "logits/chosen": -2.5440256595611572, "logits/rejected": -2.673510789871216, "logps/chosen": -326.3738098144531, "logps/rejected": -276.53955078125, "loss": 0.5329, "rewards/accuracies": 0.75, "rewards/chosen": -0.7997738122940063, "rewards/margins": 2.1180038452148438, "rewards/rejected": -2.9177775382995605, "step": 7480 }, { "epoch": 0.87, "learning_rate": 3.950631864887209e-08, "logits/chosen": -2.7066192626953125, "logits/rejected": -2.224942207336426, "logps/chosen": -185.92559814453125, "logps/rejected": -361.118408203125, "loss": 0.6105, "rewards/accuracies": 0.625, "rewards/chosen": -1.250961422920227, "rewards/margins": 0.9742235541343689, "rewards/rejected": -2.225184917449951, "step": 7481 }, { "epoch": 0.87, "learning_rate": 3.947088697295382e-08, "logits/chosen": -1.6574145555496216, "logits/rejected": -1.8611578941345215, "logps/chosen": -296.454345703125, "logps/rejected": -180.52996826171875, "loss": 0.9579, "rewards/accuracies": 0.625, "rewards/chosen": -0.4499104619026184, "rewards/margins": 0.7439854741096497, "rewards/rejected": -1.1938960552215576, "step": 7482 }, { "epoch": 0.87, "learning_rate": 3.943545529703555e-08, "logits/chosen": -2.6334919929504395, "logits/rejected": -2.868267059326172, "logps/chosen": -187.14480590820312, "logps/rejected": -236.28677368164062, "loss": 0.1351, "rewards/accuracies": 1.0, "rewards/chosen": -0.6801818013191223, "rewards/margins": 4.1657257080078125, "rewards/rejected": -4.845907688140869, "step": 7483 }, { "epoch": 0.87, "learning_rate": 3.9400023621117276e-08, "logits/chosen": -2.9997878074645996, "logits/rejected": -2.8900153636932373, "logps/chosen": -358.3124084472656, "logps/rejected": -283.5014343261719, "loss": 0.4944, "rewards/accuracies": 0.75, "rewards/chosen": -1.3210680484771729, "rewards/margins": 2.0989174842834473, "rewards/rejected": -3.419985294342041, "step": 7484 }, { "epoch": 0.87, "learning_rate": 3.9364591945199004e-08, "logits/chosen": -2.3700881004333496, "logits/rejected": -2.283473491668701, "logps/chosen": -315.7730712890625, "logps/rejected": -262.1810607910156, "loss": 0.631, "rewards/accuracies": 0.75, "rewards/chosen": -0.6994385123252869, "rewards/margins": 2.479795217514038, "rewards/rejected": -3.179234027862549, "step": 7485 }, { "epoch": 0.87, "learning_rate": 3.932916026928074e-08, "logits/chosen": -2.5424065589904785, "logits/rejected": -2.4882302284240723, "logps/chosen": -377.7289733886719, "logps/rejected": -258.8668212890625, "loss": 0.8413, "rewards/accuracies": 0.625, "rewards/chosen": -2.19884991645813, "rewards/margins": 1.2343378067016602, "rewards/rejected": -3.43318772315979, "step": 7486 }, { "epoch": 0.87, "learning_rate": 3.929372859336246e-08, "logits/chosen": -1.4403400421142578, "logits/rejected": -1.6577595472335815, "logps/chosen": -326.2098693847656, "logps/rejected": -275.27532958984375, "loss": 0.1855, "rewards/accuracies": 0.875, "rewards/chosen": -0.05754198133945465, "rewards/margins": 2.917477607727051, "rewards/rejected": -2.975019693374634, "step": 7487 }, { "epoch": 0.87, "learning_rate": 3.92582969174442e-08, "logits/chosen": -2.45052433013916, "logits/rejected": -2.244040012359619, "logps/chosen": -121.68570709228516, "logps/rejected": -561.2068481445312, "loss": 0.2863, "rewards/accuracies": 0.75, "rewards/chosen": -0.07030247151851654, "rewards/margins": 5.309096336364746, "rewards/rejected": -5.379398822784424, "step": 7488 }, { "epoch": 0.87, "learning_rate": 3.9222865241525926e-08, "logits/chosen": -2.4226434230804443, "logits/rejected": -2.1578667163848877, "logps/chosen": -253.4890594482422, "logps/rejected": -228.8557891845703, "loss": 0.4049, "rewards/accuracies": 0.875, "rewards/chosen": -0.8983390927314758, "rewards/margins": 1.1459429264068604, "rewards/rejected": -2.0442819595336914, "step": 7489 }, { "epoch": 0.87, "learning_rate": 3.918743356560765e-08, "logits/chosen": -2.5436697006225586, "logits/rejected": -2.6294407844543457, "logps/chosen": -241.81517028808594, "logps/rejected": -249.85643005371094, "loss": 0.2968, "rewards/accuracies": 0.875, "rewards/chosen": -0.9220157861709595, "rewards/margins": 3.2853140830993652, "rewards/rejected": -4.207329750061035, "step": 7490 }, { "epoch": 0.87, "learning_rate": 3.9152001889689383e-08, "logits/chosen": -2.932400941848755, "logits/rejected": -2.9976460933685303, "logps/chosen": -214.0198516845703, "logps/rejected": -259.37060546875, "loss": 0.2845, "rewards/accuracies": 0.875, "rewards/chosen": -0.46973851323127747, "rewards/margins": 1.8081988096237183, "rewards/rejected": -2.277937412261963, "step": 7491 }, { "epoch": 0.87, "learning_rate": 3.911657021377111e-08, "logits/chosen": -2.162764310836792, "logits/rejected": -2.498610496520996, "logps/chosen": -371.01885986328125, "logps/rejected": -223.04290771484375, "loss": 0.6572, "rewards/accuracies": 0.875, "rewards/chosen": -0.4022696018218994, "rewards/margins": 0.9825252294540405, "rewards/rejected": -1.38479483127594, "step": 7492 }, { "epoch": 0.87, "learning_rate": 3.9081138537852834e-08, "logits/chosen": -1.9126758575439453, "logits/rejected": -2.314441680908203, "logps/chosen": -354.7716064453125, "logps/rejected": -196.4732666015625, "loss": 0.6203, "rewards/accuracies": 0.75, "rewards/chosen": -1.6445648670196533, "rewards/margins": 0.7784310579299927, "rewards/rejected": -2.4229960441589355, "step": 7493 }, { "epoch": 0.87, "learning_rate": 3.904570686193457e-08, "logits/chosen": -2.5145556926727295, "logits/rejected": -2.6349141597747803, "logps/chosen": -351.16943359375, "logps/rejected": -349.3561096191406, "loss": 0.3624, "rewards/accuracies": 0.875, "rewards/chosen": -0.577573299407959, "rewards/margins": 2.049403667449951, "rewards/rejected": -2.626976728439331, "step": 7494 }, { "epoch": 0.87, "learning_rate": 3.90102751860163e-08, "logits/chosen": -2.711668014526367, "logits/rejected": -2.632505416870117, "logps/chosen": -207.67723083496094, "logps/rejected": -287.71002197265625, "loss": 0.1646, "rewards/accuracies": 1.0, "rewards/chosen": -1.117865800857544, "rewards/margins": 3.1179416179656982, "rewards/rejected": -4.235807418823242, "step": 7495 }, { "epoch": 0.87, "learning_rate": 3.897484351009803e-08, "logits/chosen": -2.065885543823242, "logits/rejected": -2.1394834518432617, "logps/chosen": -481.0562744140625, "logps/rejected": -407.96160888671875, "loss": 0.2684, "rewards/accuracies": 0.875, "rewards/chosen": -0.45765888690948486, "rewards/margins": 2.4948365688323975, "rewards/rejected": -2.9524953365325928, "step": 7496 }, { "epoch": 0.87, "learning_rate": 3.8939411834179756e-08, "logits/chosen": -2.140235662460327, "logits/rejected": -2.3484044075012207, "logps/chosen": -263.39129638671875, "logps/rejected": -260.3359375, "loss": 0.527, "rewards/accuracies": 0.875, "rewards/chosen": -0.9175694584846497, "rewards/margins": 1.723464012145996, "rewards/rejected": -2.64103364944458, "step": 7497 }, { "epoch": 0.87, "learning_rate": 3.8903980158261485e-08, "logits/chosen": -2.143843412399292, "logits/rejected": -2.0756313800811768, "logps/chosen": -325.08392333984375, "logps/rejected": -263.04931640625, "loss": 0.5948, "rewards/accuracies": 0.625, "rewards/chosen": -1.572506308555603, "rewards/margins": 1.7050936222076416, "rewards/rejected": -3.277599811553955, "step": 7498 }, { "epoch": 0.87, "learning_rate": 3.886854848234321e-08, "logits/chosen": -2.536574363708496, "logits/rejected": -2.7295899391174316, "logps/chosen": -248.046875, "logps/rejected": -233.45281982421875, "loss": 0.9397, "rewards/accuracies": 0.75, "rewards/chosen": -1.1325939893722534, "rewards/margins": 1.6091258525848389, "rewards/rejected": -2.741719961166382, "step": 7499 }, { "epoch": 0.87, "learning_rate": 3.883311680642495e-08, "logits/chosen": -2.360440731048584, "logits/rejected": -2.264335870742798, "logps/chosen": -87.21952819824219, "logps/rejected": -192.10665893554688, "loss": 0.3563, "rewards/accuracies": 0.75, "rewards/chosen": -0.8892935514450073, "rewards/margins": 2.650287628173828, "rewards/rejected": -3.539581060409546, "step": 7500 }, { "epoch": 0.87, "learning_rate": 3.879768513050667e-08, "logits/chosen": -1.781163215637207, "logits/rejected": -1.6736397743225098, "logps/chosen": -593.2574462890625, "logps/rejected": -504.3365783691406, "loss": 0.4747, "rewards/accuracies": 0.75, "rewards/chosen": -0.8683615922927856, "rewards/margins": 1.302520513534546, "rewards/rejected": -2.170881986618042, "step": 7501 }, { "epoch": 0.87, "learning_rate": 3.87622534545884e-08, "logits/chosen": -1.7868337631225586, "logits/rejected": -1.839977741241455, "logps/chosen": -341.46307373046875, "logps/rejected": -356.9876708984375, "loss": 0.4488, "rewards/accuracies": 0.75, "rewards/chosen": -0.6643365025520325, "rewards/margins": 1.3283683061599731, "rewards/rejected": -1.9927046298980713, "step": 7502 }, { "epoch": 0.87, "learning_rate": 3.8726821778670135e-08, "logits/chosen": -2.029974937438965, "logits/rejected": -2.1934258937835693, "logps/chosen": -240.7349853515625, "logps/rejected": -261.1914367675781, "loss": 0.3721, "rewards/accuracies": 0.75, "rewards/chosen": -0.7872371077537537, "rewards/margins": 1.9438073635101318, "rewards/rejected": -2.7310445308685303, "step": 7503 }, { "epoch": 0.87, "learning_rate": 3.869139010275186e-08, "logits/chosen": -2.39571213722229, "logits/rejected": -2.447470188140869, "logps/chosen": -289.93951416015625, "logps/rejected": -287.8323059082031, "loss": 0.4244, "rewards/accuracies": 0.75, "rewards/chosen": -0.3291493058204651, "rewards/margins": 2.530674695968628, "rewards/rejected": -2.8598239421844482, "step": 7504 }, { "epoch": 0.87, "learning_rate": 3.8655958426833586e-08, "logits/chosen": -2.5821428298950195, "logits/rejected": -2.5792949199676514, "logps/chosen": -200.5162353515625, "logps/rejected": -254.36978149414062, "loss": 0.1187, "rewards/accuracies": 1.0, "rewards/chosen": -0.1960863471031189, "rewards/margins": 3.751725196838379, "rewards/rejected": -3.9478116035461426, "step": 7505 }, { "epoch": 0.87, "learning_rate": 3.862052675091532e-08, "logits/chosen": -1.7868914604187012, "logits/rejected": -1.7113152742385864, "logps/chosen": -293.33636474609375, "logps/rejected": -294.551025390625, "loss": 0.3694, "rewards/accuracies": 0.875, "rewards/chosen": -0.6705390214920044, "rewards/margins": 1.1477543115615845, "rewards/rejected": -1.8182933330535889, "step": 7506 }, { "epoch": 0.87, "learning_rate": 3.858509507499704e-08, "logits/chosen": -2.4955410957336426, "logits/rejected": -2.376322031021118, "logps/chosen": -379.24676513671875, "logps/rejected": -340.36785888671875, "loss": 0.2339, "rewards/accuracies": 1.0, "rewards/chosen": -0.5571887493133545, "rewards/margins": 2.565523147583008, "rewards/rejected": -3.1227118968963623, "step": 7507 }, { "epoch": 0.87, "learning_rate": 3.854966339907877e-08, "logits/chosen": -2.2967946529388428, "logits/rejected": -2.4550697803497314, "logps/chosen": -197.96615600585938, "logps/rejected": -145.23403930664062, "loss": 0.331, "rewards/accuracies": 0.875, "rewards/chosen": -0.41671648621559143, "rewards/margins": 1.574009656906128, "rewards/rejected": -1.9907262325286865, "step": 7508 }, { "epoch": 0.87, "learning_rate": 3.851423172316051e-08, "logits/chosen": -2.4118752479553223, "logits/rejected": -2.331817626953125, "logps/chosen": -206.0049285888672, "logps/rejected": -254.46414184570312, "loss": 0.4497, "rewards/accuracies": 0.75, "rewards/chosen": -1.1649662256240845, "rewards/margins": 0.9729798436164856, "rewards/rejected": -2.1379458904266357, "step": 7509 }, { "epoch": 0.87, "learning_rate": 3.847880004724223e-08, "logits/chosen": -2.642547607421875, "logits/rejected": -2.707130193710327, "logps/chosen": -161.63909912109375, "logps/rejected": -128.05995178222656, "loss": 0.5507, "rewards/accuracies": 0.75, "rewards/chosen": -1.0916132926940918, "rewards/margins": 1.0359361171722412, "rewards/rejected": -2.127549648284912, "step": 7510 }, { "epoch": 0.87, "learning_rate": 3.844336837132396e-08, "logits/chosen": -1.9174813032150269, "logits/rejected": -2.637908935546875, "logps/chosen": -495.57928466796875, "logps/rejected": -238.81614685058594, "loss": 0.4253, "rewards/accuracies": 0.875, "rewards/chosen": -1.1786824464797974, "rewards/margins": 2.4145259857177734, "rewards/rejected": -3.5932087898254395, "step": 7511 }, { "epoch": 0.87, "learning_rate": 3.8407936695405694e-08, "logits/chosen": -1.9930419921875, "logits/rejected": -1.9971733093261719, "logps/chosen": -396.9998474121094, "logps/rejected": -342.5757751464844, "loss": 0.1572, "rewards/accuracies": 1.0, "rewards/chosen": -0.7120683193206787, "rewards/margins": 2.3684170246124268, "rewards/rejected": -3.0804853439331055, "step": 7512 }, { "epoch": 0.87, "learning_rate": 3.837250501948742e-08, "logits/chosen": -1.799892544746399, "logits/rejected": -1.6939420700073242, "logps/chosen": -184.44906616210938, "logps/rejected": -239.61451721191406, "loss": 0.2921, "rewards/accuracies": 0.875, "rewards/chosen": -1.4995110034942627, "rewards/margins": 2.0172793865203857, "rewards/rejected": -3.5167906284332275, "step": 7513 }, { "epoch": 0.87, "learning_rate": 3.8337073343569144e-08, "logits/chosen": -2.480130910873413, "logits/rejected": -2.416158676147461, "logps/chosen": -148.5886993408203, "logps/rejected": -214.14231872558594, "loss": 0.1266, "rewards/accuracies": 1.0, "rewards/chosen": -0.5984637141227722, "rewards/margins": 3.172476291656494, "rewards/rejected": -3.770939826965332, "step": 7514 }, { "epoch": 0.87, "learning_rate": 3.830164166765088e-08, "logits/chosen": -1.9780246019363403, "logits/rejected": -2.409618377685547, "logps/chosen": -324.08984375, "logps/rejected": -293.193359375, "loss": 1.4577, "rewards/accuracies": 0.625, "rewards/chosen": -2.1684296131134033, "rewards/margins": -0.2145644724369049, "rewards/rejected": -1.9538650512695312, "step": 7515 }, { "epoch": 0.87, "learning_rate": 3.826620999173261e-08, "logits/chosen": -2.796067237854004, "logits/rejected": -2.900615692138672, "logps/chosen": -229.98208618164062, "logps/rejected": -262.71490478515625, "loss": 0.2802, "rewards/accuracies": 0.875, "rewards/chosen": -0.9960126876831055, "rewards/margins": 2.8545782566070557, "rewards/rejected": -3.8505911827087402, "step": 7516 }, { "epoch": 0.87, "learning_rate": 3.823077831581433e-08, "logits/chosen": -2.012794256210327, "logits/rejected": -2.086329460144043, "logps/chosen": -441.78961181640625, "logps/rejected": -431.6699523925781, "loss": 0.5211, "rewards/accuracies": 0.75, "rewards/chosen": -1.509674310684204, "rewards/margins": 1.555641531944275, "rewards/rejected": -3.0653159618377686, "step": 7517 }, { "epoch": 0.87, "learning_rate": 3.8195346639896066e-08, "logits/chosen": -1.8747005462646484, "logits/rejected": -1.8375592231750488, "logps/chosen": -233.31080627441406, "logps/rejected": -180.36962890625, "loss": 0.427, "rewards/accuracies": 0.875, "rewards/chosen": -0.48130056262016296, "rewards/margins": 1.6039750576019287, "rewards/rejected": -2.085275650024414, "step": 7518 }, { "epoch": 0.87, "learning_rate": 3.8159914963977795e-08, "logits/chosen": -2.4557831287384033, "logits/rejected": -2.3003106117248535, "logps/chosen": -227.46347045898438, "logps/rejected": -281.5614318847656, "loss": 0.5987, "rewards/accuracies": 0.75, "rewards/chosen": -1.2590041160583496, "rewards/margins": 1.5345250368118286, "rewards/rejected": -2.7935290336608887, "step": 7519 }, { "epoch": 0.87, "learning_rate": 3.812448328805952e-08, "logits/chosen": -2.1570701599121094, "logits/rejected": -2.3173041343688965, "logps/chosen": -256.78521728515625, "logps/rejected": -213.810302734375, "loss": 0.3263, "rewards/accuracies": 0.875, "rewards/chosen": -1.4149093627929688, "rewards/margins": 2.4394869804382324, "rewards/rejected": -3.8543965816497803, "step": 7520 }, { "epoch": 0.87, "learning_rate": 3.808905161214125e-08, "logits/chosen": -2.488544464111328, "logits/rejected": -2.931534767150879, "logps/chosen": -257.25811767578125, "logps/rejected": -226.4765167236328, "loss": 0.3114, "rewards/accuracies": 1.0, "rewards/chosen": -1.0324139595031738, "rewards/margins": 2.1716015338897705, "rewards/rejected": -3.2040152549743652, "step": 7521 }, { "epoch": 0.88, "learning_rate": 3.805361993622298e-08, "logits/chosen": -1.7614893913269043, "logits/rejected": -1.8610666990280151, "logps/chosen": -346.7874755859375, "logps/rejected": -350.2413330078125, "loss": 1.2667, "rewards/accuracies": 0.5, "rewards/chosen": -1.604437232017517, "rewards/margins": 0.3535185158252716, "rewards/rejected": -1.9579558372497559, "step": 7522 }, { "epoch": 0.88, "learning_rate": 3.801818826030471e-08, "logits/chosen": -2.0857529640197754, "logits/rejected": -2.0895373821258545, "logps/chosen": -260.63653564453125, "logps/rejected": -203.45745849609375, "loss": 0.3452, "rewards/accuracies": 0.75, "rewards/chosen": -0.35135677456855774, "rewards/margins": 1.9207004308700562, "rewards/rejected": -2.272057294845581, "step": 7523 }, { "epoch": 0.88, "learning_rate": 3.798275658438644e-08, "logits/chosen": -1.895129680633545, "logits/rejected": -2.3861989974975586, "logps/chosen": -354.649169921875, "logps/rejected": -246.88027954101562, "loss": 0.2748, "rewards/accuracies": 1.0, "rewards/chosen": -0.7551931142807007, "rewards/margins": 2.075368881225586, "rewards/rejected": -2.830562114715576, "step": 7524 }, { "epoch": 0.88, "learning_rate": 3.794732490846817e-08, "logits/chosen": -2.000823736190796, "logits/rejected": -2.061523199081421, "logps/chosen": -447.3293151855469, "logps/rejected": -404.1339416503906, "loss": 0.2944, "rewards/accuracies": 0.75, "rewards/chosen": -0.7396426796913147, "rewards/margins": 1.9754657745361328, "rewards/rejected": -2.7151083946228027, "step": 7525 }, { "epoch": 0.88, "learning_rate": 3.79118932325499e-08, "logits/chosen": -2.6365180015563965, "logits/rejected": -2.478926658630371, "logps/chosen": -365.75994873046875, "logps/rejected": -444.59271240234375, "loss": 0.6105, "rewards/accuracies": 0.75, "rewards/chosen": -0.7812925577163696, "rewards/margins": 1.3631021976470947, "rewards/rejected": -2.144394874572754, "step": 7526 }, { "epoch": 0.88, "learning_rate": 3.787646155663163e-08, "logits/chosen": -2.2099249362945557, "logits/rejected": -2.382570505142212, "logps/chosen": -307.29351806640625, "logps/rejected": -295.52734375, "loss": 0.4052, "rewards/accuracies": 0.75, "rewards/chosen": -0.5256860852241516, "rewards/margins": 2.1640634536743164, "rewards/rejected": -2.6897494792938232, "step": 7527 }, { "epoch": 0.88, "learning_rate": 3.7841029880713353e-08, "logits/chosen": -3.0187735557556152, "logits/rejected": -2.9514951705932617, "logps/chosen": -185.81106567382812, "logps/rejected": -186.22023010253906, "loss": 0.3511, "rewards/accuracies": 0.75, "rewards/chosen": -1.6503515243530273, "rewards/margins": 1.89277982711792, "rewards/rejected": -3.5431313514709473, "step": 7528 }, { "epoch": 0.88, "learning_rate": 3.780559820479509e-08, "logits/chosen": -1.9912121295928955, "logits/rejected": -1.6452127695083618, "logps/chosen": -461.4638977050781, "logps/rejected": -453.073486328125, "loss": 0.3192, "rewards/accuracies": 0.75, "rewards/chosen": -0.8563157916069031, "rewards/margins": 2.419893741607666, "rewards/rejected": -3.276209592819214, "step": 7529 }, { "epoch": 0.88, "learning_rate": 3.777016652887682e-08, "logits/chosen": -2.511629343032837, "logits/rejected": -2.468735456466675, "logps/chosen": -133.1564178466797, "logps/rejected": -117.62103271484375, "loss": 0.5329, "rewards/accuracies": 0.625, "rewards/chosen": -0.7593053579330444, "rewards/margins": 1.00485098361969, "rewards/rejected": -1.7641563415527344, "step": 7530 }, { "epoch": 0.88, "learning_rate": 3.773473485295854e-08, "logits/chosen": -2.2442924976348877, "logits/rejected": -2.1935691833496094, "logps/chosen": -118.59750366210938, "logps/rejected": -151.1815185546875, "loss": 0.3724, "rewards/accuracies": 0.625, "rewards/chosen": -0.6515747308731079, "rewards/margins": 1.6301442384719849, "rewards/rejected": -2.2817189693450928, "step": 7531 }, { "epoch": 0.88, "learning_rate": 3.7699303177040275e-08, "logits/chosen": -1.5957058668136597, "logits/rejected": -1.949406623840332, "logps/chosen": -374.2304382324219, "logps/rejected": -253.68788146972656, "loss": 0.5035, "rewards/accuracies": 0.875, "rewards/chosen": -1.19357430934906, "rewards/margins": 0.7374681234359741, "rewards/rejected": -1.9310424327850342, "step": 7532 }, { "epoch": 0.88, "learning_rate": 3.7663871501122004e-08, "logits/chosen": -2.339268207550049, "logits/rejected": -2.2177529335021973, "logps/chosen": -409.3575439453125, "logps/rejected": -386.9306335449219, "loss": 0.0944, "rewards/accuracies": 1.0, "rewards/chosen": -0.20894679427146912, "rewards/margins": 2.8393497467041016, "rewards/rejected": -3.0482964515686035, "step": 7533 }, { "epoch": 0.88, "learning_rate": 3.7628439825203726e-08, "logits/chosen": -2.3660786151885986, "logits/rejected": -2.3146395683288574, "logps/chosen": -217.5308380126953, "logps/rejected": -325.6949768066406, "loss": 0.272, "rewards/accuracies": 1.0, "rewards/chosen": -1.1429389715194702, "rewards/margins": 2.1752686500549316, "rewards/rejected": -3.3182077407836914, "step": 7534 }, { "epoch": 0.88, "learning_rate": 3.759300814928546e-08, "logits/chosen": -2.212500810623169, "logits/rejected": -1.9916319847106934, "logps/chosen": -440.3708190917969, "logps/rejected": -464.888427734375, "loss": 0.4659, "rewards/accuracies": 0.625, "rewards/chosen": -0.9485597610473633, "rewards/margins": 1.9101868867874146, "rewards/rejected": -2.8587465286254883, "step": 7535 }, { "epoch": 0.88, "learning_rate": 3.755757647336719e-08, "logits/chosen": -2.6308751106262207, "logits/rejected": -2.7481284141540527, "logps/chosen": -121.22234344482422, "logps/rejected": -160.33273315429688, "loss": 0.2259, "rewards/accuracies": 1.0, "rewards/chosen": -0.08295218646526337, "rewards/margins": 3.8101868629455566, "rewards/rejected": -3.893138885498047, "step": 7536 }, { "epoch": 0.88, "learning_rate": 3.752214479744891e-08, "logits/chosen": -1.9461264610290527, "logits/rejected": -1.9267064332962036, "logps/chosen": -231.03726196289062, "logps/rejected": -217.8470001220703, "loss": 2.2683, "rewards/accuracies": 0.625, "rewards/chosen": -2.976490020751953, "rewards/margins": -0.2544025182723999, "rewards/rejected": -2.7220873832702637, "step": 7537 }, { "epoch": 0.88, "learning_rate": 3.748671312153065e-08, "logits/chosen": -2.706608772277832, "logits/rejected": -2.6846141815185547, "logps/chosen": -253.7475128173828, "logps/rejected": -314.66497802734375, "loss": 0.117, "rewards/accuracies": 1.0, "rewards/chosen": -0.42624354362487793, "rewards/margins": 3.4176528453826904, "rewards/rejected": -3.8438968658447266, "step": 7538 }, { "epoch": 0.88, "learning_rate": 3.7451281445612376e-08, "logits/chosen": -2.2262656688690186, "logits/rejected": -2.009671688079834, "logps/chosen": -240.5338897705078, "logps/rejected": -257.73211669921875, "loss": 0.3927, "rewards/accuracies": 0.75, "rewards/chosen": -0.5873706340789795, "rewards/margins": 2.2126827239990234, "rewards/rejected": -2.800053358078003, "step": 7539 }, { "epoch": 0.88, "learning_rate": 3.7415849769694105e-08, "logits/chosen": -2.8615362644195557, "logits/rejected": -2.7722954750061035, "logps/chosen": -126.22683715820312, "logps/rejected": -138.80999755859375, "loss": 0.52, "rewards/accuracies": 0.75, "rewards/chosen": -0.6637499332427979, "rewards/margins": 1.7134788036346436, "rewards/rejected": -2.3772284984588623, "step": 7540 }, { "epoch": 0.88, "learning_rate": 3.7380418093775834e-08, "logits/chosen": -2.6344406604766846, "logits/rejected": -2.629892349243164, "logps/chosen": -173.30101013183594, "logps/rejected": -232.96792602539062, "loss": 0.3082, "rewards/accuracies": 0.875, "rewards/chosen": -0.41418784856796265, "rewards/margins": 2.68011736869812, "rewards/rejected": -3.0943052768707275, "step": 7541 }, { "epoch": 0.88, "learning_rate": 3.734498641785756e-08, "logits/chosen": -1.9984859228134155, "logits/rejected": -2.176063299179077, "logps/chosen": -494.263671875, "logps/rejected": -398.6833801269531, "loss": 0.1217, "rewards/accuracies": 1.0, "rewards/chosen": -1.4157119989395142, "rewards/margins": 3.6512207984924316, "rewards/rejected": -5.066932678222656, "step": 7542 }, { "epoch": 0.88, "learning_rate": 3.730955474193929e-08, "logits/chosen": -2.8124639987945557, "logits/rejected": -2.702547550201416, "logps/chosen": -209.90408325195312, "logps/rejected": -297.9097900390625, "loss": 0.4203, "rewards/accuracies": 0.625, "rewards/chosen": -1.8535338640213013, "rewards/margins": 1.7778887748718262, "rewards/rejected": -3.631422519683838, "step": 7543 }, { "epoch": 0.88, "learning_rate": 3.727412306602102e-08, "logits/chosen": -2.7457571029663086, "logits/rejected": -2.5293898582458496, "logps/chosen": -192.8824920654297, "logps/rejected": -274.78045654296875, "loss": 0.4939, "rewards/accuracies": 0.875, "rewards/chosen": -0.5886745452880859, "rewards/margins": 0.9240310788154602, "rewards/rejected": -1.5127055644989014, "step": 7544 }, { "epoch": 0.88, "learning_rate": 3.723869139010275e-08, "logits/chosen": -2.653395652770996, "logits/rejected": -2.1893367767333984, "logps/chosen": -156.72598266601562, "logps/rejected": -333.017822265625, "loss": 0.6192, "rewards/accuracies": 0.75, "rewards/chosen": -0.6938670873641968, "rewards/margins": 1.1947100162506104, "rewards/rejected": -1.8885771036148071, "step": 7545 }, { "epoch": 0.88, "learning_rate": 3.720325971418448e-08, "logits/chosen": -2.5845787525177, "logits/rejected": -2.4068562984466553, "logps/chosen": -236.43666076660156, "logps/rejected": -309.6436767578125, "loss": 0.1506, "rewards/accuracies": 1.0, "rewards/chosen": -0.584252655506134, "rewards/margins": 3.7644872665405273, "rewards/rejected": -4.348740100860596, "step": 7546 }, { "epoch": 0.88, "learning_rate": 3.7167828038266206e-08, "logits/chosen": -2.0665879249572754, "logits/rejected": -2.3806889057159424, "logps/chosen": -283.1510009765625, "logps/rejected": -279.64727783203125, "loss": 0.0756, "rewards/accuracies": 1.0, "rewards/chosen": -0.9882591962814331, "rewards/margins": 4.01683235168457, "rewards/rejected": -5.005091667175293, "step": 7547 }, { "epoch": 0.88, "learning_rate": 3.7132396362347935e-08, "logits/chosen": -1.398958444595337, "logits/rejected": -1.6636059284210205, "logps/chosen": -300.9684143066406, "logps/rejected": -244.02828979492188, "loss": 0.4365, "rewards/accuracies": 0.75, "rewards/chosen": -1.4553240537643433, "rewards/margins": 2.054354190826416, "rewards/rejected": -3.509678363800049, "step": 7548 }, { "epoch": 0.88, "learning_rate": 3.709696468642967e-08, "logits/chosen": -2.731372117996216, "logits/rejected": -2.7232906818389893, "logps/chosen": -169.9280548095703, "logps/rejected": -244.35594177246094, "loss": 0.1856, "rewards/accuracies": 0.875, "rewards/chosen": -0.682743489742279, "rewards/margins": 4.3151021003723145, "rewards/rejected": -4.997845649719238, "step": 7549 }, { "epoch": 0.88, "learning_rate": 3.706153301051139e-08, "logits/chosen": -2.6550824642181396, "logits/rejected": -2.7113287448883057, "logps/chosen": -288.89190673828125, "logps/rejected": -326.2815856933594, "loss": 0.2165, "rewards/accuracies": 0.875, "rewards/chosen": -0.6875851154327393, "rewards/margins": 2.7046422958374023, "rewards/rejected": -3.3922271728515625, "step": 7550 }, { "epoch": 0.88, "learning_rate": 3.702610133459312e-08, "logits/chosen": -2.6675806045532227, "logits/rejected": -2.5462334156036377, "logps/chosen": -294.23748779296875, "logps/rejected": -455.6847229003906, "loss": 0.5107, "rewards/accuracies": 0.875, "rewards/chosen": -0.7384076714515686, "rewards/margins": 1.27349853515625, "rewards/rejected": -2.011906147003174, "step": 7551 }, { "epoch": 0.88, "learning_rate": 3.6990669658674856e-08, "logits/chosen": -1.9464614391326904, "logits/rejected": -2.2415525913238525, "logps/chosen": -295.48223876953125, "logps/rejected": -210.3455810546875, "loss": 0.2409, "rewards/accuracies": 1.0, "rewards/chosen": -0.28938746452331543, "rewards/margins": 2.6272265911102295, "rewards/rejected": -2.916614055633545, "step": 7552 }, { "epoch": 0.88, "learning_rate": 3.6955237982756585e-08, "logits/chosen": -2.2368502616882324, "logits/rejected": -2.7476260662078857, "logps/chosen": -360.89678955078125, "logps/rejected": -284.3489990234375, "loss": 0.1257, "rewards/accuracies": 1.0, "rewards/chosen": -0.7855456471443176, "rewards/margins": 2.8281712532043457, "rewards/rejected": -3.6137170791625977, "step": 7553 }, { "epoch": 0.88, "learning_rate": 3.6919806306838314e-08, "logits/chosen": -1.5108752250671387, "logits/rejected": -2.260213613510132, "logps/chosen": -427.42242431640625, "logps/rejected": -192.83346557617188, "loss": 0.6278, "rewards/accuracies": 0.75, "rewards/chosen": -0.8214280009269714, "rewards/margins": 1.2722249031066895, "rewards/rejected": -2.0936529636383057, "step": 7554 }, { "epoch": 0.88, "learning_rate": 3.688437463092004e-08, "logits/chosen": -1.9015896320343018, "logits/rejected": -2.081286907196045, "logps/chosen": -245.6549530029297, "logps/rejected": -276.97149658203125, "loss": 0.2642, "rewards/accuracies": 0.875, "rewards/chosen": -0.9643962979316711, "rewards/margins": 2.71993350982666, "rewards/rejected": -3.6843295097351074, "step": 7555 }, { "epoch": 0.88, "learning_rate": 3.684894295500177e-08, "logits/chosen": -2.4587509632110596, "logits/rejected": -2.4521541595458984, "logps/chosen": -370.4828186035156, "logps/rejected": -288.0376281738281, "loss": 0.1323, "rewards/accuracies": 1.0, "rewards/chosen": -0.7593858242034912, "rewards/margins": 2.8072123527526855, "rewards/rejected": -3.5665981769561768, "step": 7556 }, { "epoch": 0.88, "learning_rate": 3.68135112790835e-08, "logits/chosen": -2.9223873615264893, "logits/rejected": -2.907885789871216, "logps/chosen": -193.06895446777344, "logps/rejected": -188.84902954101562, "loss": 0.187, "rewards/accuracies": 1.0, "rewards/chosen": -1.933140754699707, "rewards/margins": 3.5564839839935303, "rewards/rejected": -5.489624500274658, "step": 7557 }, { "epoch": 0.88, "learning_rate": 3.677807960316523e-08, "logits/chosen": -1.7162044048309326, "logits/rejected": -1.5704145431518555, "logps/chosen": -356.20086669921875, "logps/rejected": -504.51806640625, "loss": 0.5148, "rewards/accuracies": 0.625, "rewards/chosen": -1.217374324798584, "rewards/margins": 1.3701777458190918, "rewards/rejected": -2.5875518321990967, "step": 7558 }, { "epoch": 0.88, "learning_rate": 3.674264792724696e-08, "logits/chosen": -2.0464694499969482, "logits/rejected": -2.1825578212738037, "logps/chosen": -155.54287719726562, "logps/rejected": -217.27505493164062, "loss": 0.4368, "rewards/accuracies": 0.75, "rewards/chosen": -0.10394778102636337, "rewards/margins": 2.086359977722168, "rewards/rejected": -2.190307855606079, "step": 7559 }, { "epoch": 0.88, "learning_rate": 3.6707216251328686e-08, "logits/chosen": -2.4749503135681152, "logits/rejected": -2.412766695022583, "logps/chosen": -260.7174377441406, "logps/rejected": -277.419677734375, "loss": 0.1332, "rewards/accuracies": 1.0, "rewards/chosen": -0.2621191442012787, "rewards/margins": 2.817535877227783, "rewards/rejected": -3.0796549320220947, "step": 7560 }, { "epoch": 0.88, "learning_rate": 3.6671784575410415e-08, "logits/chosen": -2.410918951034546, "logits/rejected": -2.418250560760498, "logps/chosen": -284.4427185058594, "logps/rejected": -727.6679077148438, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": -0.9451342225074768, "rewards/margins": 4.7319440841674805, "rewards/rejected": -5.6770782470703125, "step": 7561 }, { "epoch": 0.88, "learning_rate": 3.6636352899492144e-08, "logits/chosen": -2.5907928943634033, "logits/rejected": -2.835151433944702, "logps/chosen": -214.52401733398438, "logps/rejected": -297.724609375, "loss": 0.0882, "rewards/accuracies": 1.0, "rewards/chosen": -0.486217200756073, "rewards/margins": 4.765803813934326, "rewards/rejected": -5.252020835876465, "step": 7562 }, { "epoch": 0.88, "learning_rate": 3.660092122357387e-08, "logits/chosen": -2.9356203079223633, "logits/rejected": -2.782942533493042, "logps/chosen": -301.09466552734375, "logps/rejected": -263.81231689453125, "loss": 0.5595, "rewards/accuracies": 0.875, "rewards/chosen": -1.475816249847412, "rewards/margins": 1.0224311351776123, "rewards/rejected": -2.4982473850250244, "step": 7563 }, { "epoch": 0.88, "learning_rate": 3.65654895476556e-08, "logits/chosen": -2.9658052921295166, "logits/rejected": -2.9371490478515625, "logps/chosen": -185.08900451660156, "logps/rejected": -171.83450317382812, "loss": 0.4744, "rewards/accuracies": 0.875, "rewards/chosen": -1.490623950958252, "rewards/margins": 2.3582165241241455, "rewards/rejected": -3.8488409519195557, "step": 7564 }, { "epoch": 0.88, "learning_rate": 3.653005787173733e-08, "logits/chosen": -1.7554082870483398, "logits/rejected": -1.6040486097335815, "logps/chosen": -227.764404296875, "logps/rejected": -377.36126708984375, "loss": 0.4086, "rewards/accuracies": 0.75, "rewards/chosen": -0.46963217854499817, "rewards/margins": 1.7083430290222168, "rewards/rejected": -2.1779751777648926, "step": 7565 }, { "epoch": 0.88, "learning_rate": 3.649462619581906e-08, "logits/chosen": -2.342804431915283, "logits/rejected": -2.1793622970581055, "logps/chosen": -141.48915100097656, "logps/rejected": -390.7406311035156, "loss": 0.2946, "rewards/accuracies": 0.875, "rewards/chosen": -0.483340322971344, "rewards/margins": 3.904831886291504, "rewards/rejected": -4.388172149658203, "step": 7566 }, { "epoch": 0.88, "learning_rate": 3.645919451990079e-08, "logits/chosen": -1.843104362487793, "logits/rejected": -1.9669241905212402, "logps/chosen": -230.51632690429688, "logps/rejected": -247.6981201171875, "loss": 0.2617, "rewards/accuracies": 0.875, "rewards/chosen": -0.07780300080776215, "rewards/margins": 2.553384304046631, "rewards/rejected": -2.6311872005462646, "step": 7567 }, { "epoch": 0.88, "learning_rate": 3.6423762843982516e-08, "logits/chosen": -2.62774658203125, "logits/rejected": -2.801915168762207, "logps/chosen": -376.99847412109375, "logps/rejected": -397.4684753417969, "loss": 0.1932, "rewards/accuracies": 0.875, "rewards/chosen": -0.730708122253418, "rewards/margins": 3.3445348739624023, "rewards/rejected": -4.07524299621582, "step": 7568 }, { "epoch": 0.88, "learning_rate": 3.6388331168064245e-08, "logits/chosen": -2.498012065887451, "logits/rejected": -2.518059253692627, "logps/chosen": -337.848876953125, "logps/rejected": -283.1419677734375, "loss": 1.2054, "rewards/accuracies": 0.75, "rewards/chosen": -3.217557191848755, "rewards/margins": 0.8900099992752075, "rewards/rejected": -4.107567310333252, "step": 7569 }, { "epoch": 0.88, "learning_rate": 3.6352899492145974e-08, "logits/chosen": -2.181763172149658, "logits/rejected": -2.2834460735321045, "logps/chosen": -276.3883056640625, "logps/rejected": -353.9615783691406, "loss": 0.4061, "rewards/accuracies": 0.875, "rewards/chosen": -1.3265082836151123, "rewards/margins": 2.4751083850860596, "rewards/rejected": -3.801616668701172, "step": 7570 }, { "epoch": 0.88, "learning_rate": 3.631746781622771e-08, "logits/chosen": -2.9201650619506836, "logits/rejected": -2.906942367553711, "logps/chosen": -349.6849670410156, "logps/rejected": -167.28265380859375, "loss": 0.3313, "rewards/accuracies": 0.875, "rewards/chosen": -1.166731834411621, "rewards/margins": 1.7206690311431885, "rewards/rejected": -2.8874006271362305, "step": 7571 }, { "epoch": 0.88, "learning_rate": 3.628203614030944e-08, "logits/chosen": -2.510052442550659, "logits/rejected": -2.589045524597168, "logps/chosen": -364.6685485839844, "logps/rejected": -309.8553161621094, "loss": 0.4724, "rewards/accuracies": 0.75, "rewards/chosen": -0.7644312381744385, "rewards/margins": 2.0742383003234863, "rewards/rejected": -2.838669776916504, "step": 7572 }, { "epoch": 0.88, "learning_rate": 3.624660446439116e-08, "logits/chosen": -2.1135199069976807, "logits/rejected": -1.8663151264190674, "logps/chosen": -298.73944091796875, "logps/rejected": -364.9178161621094, "loss": 0.5358, "rewards/accuracies": 0.625, "rewards/chosen": -1.2922887802124023, "rewards/margins": 0.8579879403114319, "rewards/rejected": -2.1502766609191895, "step": 7573 }, { "epoch": 0.88, "learning_rate": 3.6211172788472895e-08, "logits/chosen": -1.9965763092041016, "logits/rejected": -2.320180892944336, "logps/chosen": -162.87847900390625, "logps/rejected": -137.52011108398438, "loss": 0.9638, "rewards/accuracies": 0.75, "rewards/chosen": -1.119978666305542, "rewards/margins": 0.9730182886123657, "rewards/rejected": -2.0929970741271973, "step": 7574 }, { "epoch": 0.88, "learning_rate": 3.6175741112554624e-08, "logits/chosen": -2.217125415802002, "logits/rejected": -2.205868721008301, "logps/chosen": -350.5334777832031, "logps/rejected": -456.9824523925781, "loss": 0.1571, "rewards/accuracies": 1.0, "rewards/chosen": -0.33849671483039856, "rewards/margins": 3.5267257690429688, "rewards/rejected": -3.865222454071045, "step": 7575 }, { "epoch": 0.88, "learning_rate": 3.614030943663635e-08, "logits/chosen": -2.2908670902252197, "logits/rejected": -2.229459285736084, "logps/chosen": -311.8523254394531, "logps/rejected": -311.24896240234375, "loss": 0.172, "rewards/accuracies": 0.875, "rewards/chosen": -0.3701716661453247, "rewards/margins": 3.251307487487793, "rewards/rejected": -3.621479034423828, "step": 7576 }, { "epoch": 0.88, "learning_rate": 3.610487776071808e-08, "logits/chosen": -2.483886957168579, "logits/rejected": -2.2705259323120117, "logps/chosen": -119.80787658691406, "logps/rejected": -273.5419921875, "loss": 0.3254, "rewards/accuracies": 0.875, "rewards/chosen": -1.0740145444869995, "rewards/margins": 3.8268091678619385, "rewards/rejected": -4.90082311630249, "step": 7577 }, { "epoch": 0.88, "learning_rate": 3.606944608479981e-08, "logits/chosen": -1.8000823259353638, "logits/rejected": -1.940086841583252, "logps/chosen": -386.8724365234375, "logps/rejected": -383.9178771972656, "loss": 0.8209, "rewards/accuracies": 0.625, "rewards/chosen": -0.7157626152038574, "rewards/margins": 1.1616417169570923, "rewards/rejected": -1.8774042129516602, "step": 7578 }, { "epoch": 0.88, "learning_rate": 3.603401440888154e-08, "logits/chosen": -2.113367795944214, "logits/rejected": -2.2957262992858887, "logps/chosen": -366.28277587890625, "logps/rejected": -245.8885040283203, "loss": 0.4257, "rewards/accuracies": 0.875, "rewards/chosen": -0.23845605552196503, "rewards/margins": 2.4931743144989014, "rewards/rejected": -2.7316300868988037, "step": 7579 }, { "epoch": 0.88, "learning_rate": 3.599858273296327e-08, "logits/chosen": -2.050365924835205, "logits/rejected": -1.860506534576416, "logps/chosen": -277.6342468261719, "logps/rejected": -310.39129638671875, "loss": 0.3807, "rewards/accuracies": 0.75, "rewards/chosen": -0.3855675458908081, "rewards/margins": 2.080390691757202, "rewards/rejected": -2.465958595275879, "step": 7580 }, { "epoch": 0.88, "learning_rate": 3.5963151057044997e-08, "logits/chosen": -2.786585807800293, "logits/rejected": -2.834717273712158, "logps/chosen": -234.1034393310547, "logps/rejected": -235.03048706054688, "loss": 0.4382, "rewards/accuracies": 0.75, "rewards/chosen": -1.2417691946029663, "rewards/margins": 2.191765308380127, "rewards/rejected": -3.433534622192383, "step": 7581 }, { "epoch": 0.88, "learning_rate": 3.5927719381126725e-08, "logits/chosen": -2.3223533630371094, "logits/rejected": -2.3397417068481445, "logps/chosen": -149.02243041992188, "logps/rejected": -223.93235778808594, "loss": 0.3125, "rewards/accuracies": 0.875, "rewards/chosen": -0.3775911033153534, "rewards/margins": 2.3933608531951904, "rewards/rejected": -2.770951986312866, "step": 7582 }, { "epoch": 0.88, "learning_rate": 3.5892287705208454e-08, "logits/chosen": -2.2260210514068604, "logits/rejected": -2.2951440811157227, "logps/chosen": -231.37539672851562, "logps/rejected": -210.52330017089844, "loss": 0.8598, "rewards/accuracies": 0.625, "rewards/chosen": -1.2619588375091553, "rewards/margins": 0.3023863434791565, "rewards/rejected": -1.5643452405929565, "step": 7583 }, { "epoch": 0.88, "learning_rate": 3.585685602929018e-08, "logits/chosen": -1.7904101610183716, "logits/rejected": -1.9910513162612915, "logps/chosen": -395.53851318359375, "logps/rejected": -151.00314331054688, "loss": 0.6489, "rewards/accuracies": 0.625, "rewards/chosen": -0.422161728143692, "rewards/margins": 1.0912331342697144, "rewards/rejected": -1.513394832611084, "step": 7584 }, { "epoch": 0.88, "learning_rate": 3.582142435337191e-08, "logits/chosen": -2.518705368041992, "logits/rejected": -2.763664722442627, "logps/chosen": -233.6903839111328, "logps/rejected": -138.95498657226562, "loss": 0.5345, "rewards/accuracies": 0.625, "rewards/chosen": -0.3573871850967407, "rewards/margins": 2.077782154083252, "rewards/rejected": -2.4351694583892822, "step": 7585 }, { "epoch": 0.88, "learning_rate": 3.578599267745364e-08, "logits/chosen": -2.1547813415527344, "logits/rejected": -2.277367115020752, "logps/chosen": -356.24462890625, "logps/rejected": -457.767822265625, "loss": 0.093, "rewards/accuracies": 1.0, "rewards/chosen": -1.0266247987747192, "rewards/margins": 4.047144889831543, "rewards/rejected": -5.073769569396973, "step": 7586 }, { "epoch": 0.88, "learning_rate": 3.575056100153537e-08, "logits/chosen": -2.745624542236328, "logits/rejected": -2.767343759536743, "logps/chosen": -283.20001220703125, "logps/rejected": -211.80340576171875, "loss": 0.3888, "rewards/accuracies": 0.75, "rewards/chosen": -2.0527455806732178, "rewards/margins": 1.9179232120513916, "rewards/rejected": -3.9706687927246094, "step": 7587 }, { "epoch": 0.88, "learning_rate": 3.5715129325617104e-08, "logits/chosen": -2.451864719390869, "logits/rejected": -2.5254292488098145, "logps/chosen": -161.39308166503906, "logps/rejected": -211.47601318359375, "loss": 0.9244, "rewards/accuracies": 0.5, "rewards/chosen": -2.5005884170532227, "rewards/margins": 0.14090299606323242, "rewards/rejected": -2.641491413116455, "step": 7588 }, { "epoch": 0.88, "learning_rate": 3.5679697649698826e-08, "logits/chosen": -2.5783376693725586, "logits/rejected": -2.6541740894317627, "logps/chosen": -230.893798828125, "logps/rejected": -344.1298522949219, "loss": 0.3349, "rewards/accuracies": 0.75, "rewards/chosen": -0.4742083251476288, "rewards/margins": 2.6254560947418213, "rewards/rejected": -3.0996642112731934, "step": 7589 }, { "epoch": 0.88, "learning_rate": 3.564426597378056e-08, "logits/chosen": -1.756777048110962, "logits/rejected": -2.2079498767852783, "logps/chosen": -363.1264343261719, "logps/rejected": -319.36163330078125, "loss": 0.8029, "rewards/accuracies": 0.75, "rewards/chosen": -1.3066222667694092, "rewards/margins": 2.1720027923583984, "rewards/rejected": -3.4786250591278076, "step": 7590 }, { "epoch": 0.88, "learning_rate": 3.560883429786229e-08, "logits/chosen": -2.6978700160980225, "logits/rejected": -2.6855592727661133, "logps/chosen": -214.37139892578125, "logps/rejected": -264.273193359375, "loss": 0.2169, "rewards/accuracies": 0.875, "rewards/chosen": -0.8438678979873657, "rewards/margins": 2.559060573577881, "rewards/rejected": -3.402928352355957, "step": 7591 }, { "epoch": 0.88, "learning_rate": 3.557340262194401e-08, "logits/chosen": -2.2046477794647217, "logits/rejected": -2.0360231399536133, "logps/chosen": -173.59393310546875, "logps/rejected": -277.59503173828125, "loss": 0.4252, "rewards/accuracies": 0.75, "rewards/chosen": -0.9191750288009644, "rewards/margins": 2.049398899078369, "rewards/rejected": -2.968574047088623, "step": 7592 }, { "epoch": 0.88, "learning_rate": 3.553797094602575e-08, "logits/chosen": -2.395211935043335, "logits/rejected": -2.4712424278259277, "logps/chosen": -265.9989013671875, "logps/rejected": -219.70201110839844, "loss": 0.5676, "rewards/accuracies": 0.75, "rewards/chosen": -1.3602744340896606, "rewards/margins": 1.043287754058838, "rewards/rejected": -2.403562068939209, "step": 7593 }, { "epoch": 0.88, "learning_rate": 3.550253927010748e-08, "logits/chosen": -3.0294747352600098, "logits/rejected": -3.063089609146118, "logps/chosen": -407.8807067871094, "logps/rejected": -256.14556884765625, "loss": 0.2501, "rewards/accuracies": 0.875, "rewards/chosen": -0.805570662021637, "rewards/margins": 2.104994535446167, "rewards/rejected": -2.91056489944458, "step": 7594 }, { "epoch": 0.88, "learning_rate": 3.5467107594189206e-08, "logits/chosen": -3.047361373901367, "logits/rejected": -3.1675684452056885, "logps/chosen": -264.67791748046875, "logps/rejected": -309.4627990722656, "loss": 0.4385, "rewards/accuracies": 0.75, "rewards/chosen": -1.0145460367202759, "rewards/margins": 1.821189045906067, "rewards/rejected": -2.835735321044922, "step": 7595 }, { "epoch": 0.88, "learning_rate": 3.5431675918270934e-08, "logits/chosen": -1.6422865390777588, "logits/rejected": -1.4409786462783813, "logps/chosen": -362.65472412109375, "logps/rejected": -316.5869140625, "loss": 0.9686, "rewards/accuracies": 0.625, "rewards/chosen": -1.574093222618103, "rewards/margins": 0.1106400191783905, "rewards/rejected": -1.6847333908081055, "step": 7596 }, { "epoch": 0.88, "learning_rate": 3.539624424235266e-08, "logits/chosen": -2.286604404449463, "logits/rejected": -2.3062031269073486, "logps/chosen": -312.30718994140625, "logps/rejected": -303.1934509277344, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": -0.6884717345237732, "rewards/margins": 4.185576438903809, "rewards/rejected": -4.874048233032227, "step": 7597 }, { "epoch": 0.88, "learning_rate": 3.536081256643439e-08, "logits/chosen": -1.8365209102630615, "logits/rejected": -2.321051836013794, "logps/chosen": -558.3326416015625, "logps/rejected": -354.05810546875, "loss": 0.1751, "rewards/accuracies": 0.875, "rewards/chosen": -0.8631792664527893, "rewards/margins": 2.9283766746520996, "rewards/rejected": -3.791555881500244, "step": 7598 }, { "epoch": 0.88, "learning_rate": 3.532538089051612e-08, "logits/chosen": -2.241608142852783, "logits/rejected": -2.3403408527374268, "logps/chosen": -201.49447631835938, "logps/rejected": -358.9652404785156, "loss": 0.4347, "rewards/accuracies": 0.75, "rewards/chosen": -0.6704071760177612, "rewards/margins": 2.6585044860839844, "rewards/rejected": -3.328911781311035, "step": 7599 }, { "epoch": 0.88, "learning_rate": 3.528994921459785e-08, "logits/chosen": -2.118255138397217, "logits/rejected": -2.2543983459472656, "logps/chosen": -136.1275634765625, "logps/rejected": -206.1763916015625, "loss": 0.4207, "rewards/accuracies": 0.625, "rewards/chosen": -1.0280225276947021, "rewards/margins": 1.806870460510254, "rewards/rejected": -2.834892749786377, "step": 7600 }, { "epoch": 0.88, "learning_rate": 3.525451753867958e-08, "logits/chosen": -1.9841358661651611, "logits/rejected": -1.904372215270996, "logps/chosen": -213.83447265625, "logps/rejected": -309.14056396484375, "loss": 0.1811, "rewards/accuracies": 1.0, "rewards/chosen": -0.4439397156238556, "rewards/margins": 2.533085823059082, "rewards/rejected": -2.9770255088806152, "step": 7601 }, { "epoch": 0.88, "learning_rate": 3.521908586276131e-08, "logits/chosen": -2.0665950775146484, "logits/rejected": -2.1095385551452637, "logps/chosen": -377.0731201171875, "logps/rejected": -314.3312072753906, "loss": 0.3043, "rewards/accuracies": 0.75, "rewards/chosen": -0.5919076800346375, "rewards/margins": 1.8448699712753296, "rewards/rejected": -2.4367775917053223, "step": 7602 }, { "epoch": 0.88, "learning_rate": 3.5183654186843035e-08, "logits/chosen": -2.322821617126465, "logits/rejected": -2.4681508541107178, "logps/chosen": -178.25363159179688, "logps/rejected": -198.576416015625, "loss": 0.3016, "rewards/accuracies": 0.875, "rewards/chosen": -0.8365961313247681, "rewards/margins": 2.7911252975463867, "rewards/rejected": -3.6277213096618652, "step": 7603 }, { "epoch": 0.88, "learning_rate": 3.5148222510924764e-08, "logits/chosen": -2.3464293479919434, "logits/rejected": -2.009361743927002, "logps/chosen": -146.68829345703125, "logps/rejected": -317.11572265625, "loss": 0.379, "rewards/accuracies": 0.875, "rewards/chosen": -0.7165323495864868, "rewards/margins": 3.0223255157470703, "rewards/rejected": -3.7388575077056885, "step": 7604 }, { "epoch": 0.88, "learning_rate": 3.511279083500649e-08, "logits/chosen": -1.4003658294677734, "logits/rejected": -1.9235602617263794, "logps/chosen": -456.52252197265625, "logps/rejected": -366.36334228515625, "loss": 0.4976, "rewards/accuracies": 0.75, "rewards/chosen": -0.918946385383606, "rewards/margins": 0.8321403861045837, "rewards/rejected": -1.751086950302124, "step": 7605 }, { "epoch": 0.88, "learning_rate": 3.507735915908822e-08, "logits/chosen": -1.736219882965088, "logits/rejected": -2.1226677894592285, "logps/chosen": -527.1156005859375, "logps/rejected": -361.7975158691406, "loss": 0.3609, "rewards/accuracies": 0.875, "rewards/chosen": -0.68024080991745, "rewards/margins": 1.9000225067138672, "rewards/rejected": -2.580263376235962, "step": 7606 }, { "epoch": 0.88, "learning_rate": 3.504192748316996e-08, "logits/chosen": -2.381582498550415, "logits/rejected": -2.3617539405822754, "logps/chosen": -237.31295776367188, "logps/rejected": -264.87200927734375, "loss": 0.1824, "rewards/accuracies": 0.875, "rewards/chosen": -0.5171858072280884, "rewards/margins": 2.9330527782440186, "rewards/rejected": -3.4502387046813965, "step": 7607 }, { "epoch": 0.89, "learning_rate": 3.500649580725168e-08, "logits/chosen": -3.048222064971924, "logits/rejected": -2.9880573749542236, "logps/chosen": -165.4708251953125, "logps/rejected": -139.1703643798828, "loss": 0.7049, "rewards/accuracies": 0.875, "rewards/chosen": -0.7128046751022339, "rewards/margins": 1.683670997619629, "rewards/rejected": -2.3964757919311523, "step": 7608 }, { "epoch": 0.89, "learning_rate": 3.497106413133341e-08, "logits/chosen": -2.034186840057373, "logits/rejected": -2.1612954139709473, "logps/chosen": -348.9342041015625, "logps/rejected": -322.7240295410156, "loss": 0.1834, "rewards/accuracies": 1.0, "rewards/chosen": -0.562360405921936, "rewards/margins": 3.153618097305298, "rewards/rejected": -3.7159783840179443, "step": 7609 }, { "epoch": 0.89, "learning_rate": 3.493563245541514e-08, "logits/chosen": -2.5055510997772217, "logits/rejected": -2.513185501098633, "logps/chosen": -235.10865783691406, "logps/rejected": -258.5614318847656, "loss": 1.1369, "rewards/accuracies": 0.625, "rewards/chosen": -1.2964661121368408, "rewards/margins": 0.9200330972671509, "rewards/rejected": -2.216499090194702, "step": 7610 }, { "epoch": 0.89, "learning_rate": 3.4900200779496865e-08, "logits/chosen": -2.122264862060547, "logits/rejected": -2.0888943672180176, "logps/chosen": -189.71197509765625, "logps/rejected": -268.19244384765625, "loss": 0.8387, "rewards/accuracies": 0.75, "rewards/chosen": -1.6480445861816406, "rewards/margins": 6.19952392578125, "rewards/rejected": -7.847568511962891, "step": 7611 }, { "epoch": 0.89, "learning_rate": 3.48647691035786e-08, "logits/chosen": -2.5462162494659424, "logits/rejected": -2.386984348297119, "logps/chosen": -199.71084594726562, "logps/rejected": -323.50555419921875, "loss": 0.3425, "rewards/accuracies": 1.0, "rewards/chosen": -0.18095055222511292, "rewards/margins": 2.1839346885681152, "rewards/rejected": -2.364885091781616, "step": 7612 }, { "epoch": 0.89, "learning_rate": 3.482933742766033e-08, "logits/chosen": -2.3475048542022705, "logits/rejected": -2.3897554874420166, "logps/chosen": -293.8746032714844, "logps/rejected": -291.1658020019531, "loss": 0.2535, "rewards/accuracies": 0.875, "rewards/chosen": -0.9285383820533752, "rewards/margins": 2.894887685775757, "rewards/rejected": -3.8234262466430664, "step": 7613 }, { "epoch": 0.89, "learning_rate": 3.479390575174205e-08, "logits/chosen": -2.568373203277588, "logits/rejected": -2.2302021980285645, "logps/chosen": -199.36839294433594, "logps/rejected": -287.2409973144531, "loss": 0.1293, "rewards/accuracies": 1.0, "rewards/chosen": -0.19286921620368958, "rewards/margins": 4.228273391723633, "rewards/rejected": -4.421142101287842, "step": 7614 }, { "epoch": 0.89, "learning_rate": 3.475847407582379e-08, "logits/chosen": -2.0124330520629883, "logits/rejected": -1.9705026149749756, "logps/chosen": -115.16596984863281, "logps/rejected": -141.6118927001953, "loss": 0.2313, "rewards/accuracies": 1.0, "rewards/chosen": -0.4662202000617981, "rewards/margins": 2.2779035568237305, "rewards/rejected": -2.744123697280884, "step": 7615 }, { "epoch": 0.89, "learning_rate": 3.4723042399905516e-08, "logits/chosen": -2.308062791824341, "logits/rejected": -2.5017995834350586, "logps/chosen": -192.04298400878906, "logps/rejected": -219.70993041992188, "loss": 0.2161, "rewards/accuracies": 1.0, "rewards/chosen": -0.8961784243583679, "rewards/margins": 2.8987491130828857, "rewards/rejected": -3.7949275970458984, "step": 7616 }, { "epoch": 0.89, "learning_rate": 3.4687610723987244e-08, "logits/chosen": -1.5001823902130127, "logits/rejected": -2.0855705738067627, "logps/chosen": -357.8998107910156, "logps/rejected": -208.5225830078125, "loss": 0.7362, "rewards/accuracies": 0.5, "rewards/chosen": -1.0668060779571533, "rewards/margins": 0.7136304974555969, "rewards/rejected": -1.7804367542266846, "step": 7617 }, { "epoch": 0.89, "learning_rate": 3.465217904806897e-08, "logits/chosen": -2.5674753189086914, "logits/rejected": -2.3603885173797607, "logps/chosen": -234.48492431640625, "logps/rejected": -260.2656555175781, "loss": 0.8783, "rewards/accuracies": 0.625, "rewards/chosen": -2.0242176055908203, "rewards/margins": -0.004185348749160767, "rewards/rejected": -2.0200324058532715, "step": 7618 }, { "epoch": 0.89, "learning_rate": 3.46167473721507e-08, "logits/chosen": -2.3366308212280273, "logits/rejected": -2.2640366554260254, "logps/chosen": -320.86041259765625, "logps/rejected": -262.58154296875, "loss": 0.2894, "rewards/accuracies": 0.75, "rewards/chosen": -1.1759322881698608, "rewards/margins": 2.8029353618621826, "rewards/rejected": -3.978867530822754, "step": 7619 }, { "epoch": 0.89, "learning_rate": 3.458131569623243e-08, "logits/chosen": -2.2131106853485107, "logits/rejected": -2.2648611068725586, "logps/chosen": -176.7727508544922, "logps/rejected": -194.8362579345703, "loss": 0.4867, "rewards/accuracies": 0.75, "rewards/chosen": -0.37289994955062866, "rewards/margins": 1.27445387840271, "rewards/rejected": -1.6473538875579834, "step": 7620 }, { "epoch": 0.89, "learning_rate": 3.454588402031416e-08, "logits/chosen": -1.6854534149169922, "logits/rejected": -1.9961775541305542, "logps/chosen": -392.02093505859375, "logps/rejected": -389.37213134765625, "loss": 0.3928, "rewards/accuracies": 0.625, "rewards/chosen": -0.5890664458274841, "rewards/margins": 2.6415939331054688, "rewards/rejected": -3.2306604385375977, "step": 7621 }, { "epoch": 0.89, "learning_rate": 3.451045234439589e-08, "logits/chosen": -2.200377941131592, "logits/rejected": -1.970081090927124, "logps/chosen": -230.9412841796875, "logps/rejected": -295.73077392578125, "loss": 0.0897, "rewards/accuracies": 1.0, "rewards/chosen": 0.0034360885620117188, "rewards/margins": 4.186427116394043, "rewards/rejected": -4.182991027832031, "step": 7622 }, { "epoch": 0.89, "learning_rate": 3.447502066847762e-08, "logits/chosen": -2.366344690322876, "logits/rejected": -2.604785442352295, "logps/chosen": -218.9154510498047, "logps/rejected": -192.0758056640625, "loss": 0.583, "rewards/accuracies": 0.625, "rewards/chosen": -1.2585200071334839, "rewards/margins": 1.7580198049545288, "rewards/rejected": -3.016540050506592, "step": 7623 }, { "epoch": 0.89, "learning_rate": 3.4439588992559346e-08, "logits/chosen": -2.9526591300964355, "logits/rejected": -2.9665260314941406, "logps/chosen": -200.92849731445312, "logps/rejected": -195.41363525390625, "loss": 0.4016, "rewards/accuracies": 0.75, "rewards/chosen": -0.9716732501983643, "rewards/margins": 2.096975326538086, "rewards/rejected": -3.06864857673645, "step": 7624 }, { "epoch": 0.89, "learning_rate": 3.4404157316641074e-08, "logits/chosen": -2.771857261657715, "logits/rejected": -2.6223251819610596, "logps/chosen": -221.04901123046875, "logps/rejected": -151.61904907226562, "loss": 0.2839, "rewards/accuracies": 1.0, "rewards/chosen": -0.9981942176818848, "rewards/margins": 1.567647099494934, "rewards/rejected": -2.5658414363861084, "step": 7625 }, { "epoch": 0.89, "learning_rate": 3.436872564072281e-08, "logits/chosen": -2.5276870727539062, "logits/rejected": -2.2914459705352783, "logps/chosen": -387.9427185058594, "logps/rejected": -424.26922607421875, "loss": 0.7229, "rewards/accuracies": 0.75, "rewards/chosen": -1.3404730558395386, "rewards/margins": 2.36922550201416, "rewards/rejected": -3.709698438644409, "step": 7626 }, { "epoch": 0.89, "learning_rate": 3.433329396480453e-08, "logits/chosen": -2.2207117080688477, "logits/rejected": -1.9295684099197388, "logps/chosen": -196.75418090820312, "logps/rejected": -330.66156005859375, "loss": 0.3161, "rewards/accuracies": 0.875, "rewards/chosen": -0.8626158237457275, "rewards/margins": 1.8759870529174805, "rewards/rejected": -2.738602638244629, "step": 7627 }, { "epoch": 0.89, "learning_rate": 3.429786228888626e-08, "logits/chosen": -2.4348373413085938, "logits/rejected": -2.4366884231567383, "logps/chosen": -288.69110107421875, "logps/rejected": -261.41705322265625, "loss": 0.3186, "rewards/accuracies": 0.875, "rewards/chosen": -0.20772197842597961, "rewards/margins": 2.694310426712036, "rewards/rejected": -2.9020323753356934, "step": 7628 }, { "epoch": 0.89, "learning_rate": 3.4262430612967996e-08, "logits/chosen": -2.7229814529418945, "logits/rejected": -2.923460006713867, "logps/chosen": -272.1068115234375, "logps/rejected": -200.5044403076172, "loss": 1.771, "rewards/accuracies": 0.5, "rewards/chosen": -3.1128547191619873, "rewards/margins": 0.5876425504684448, "rewards/rejected": -3.700497627258301, "step": 7629 }, { "epoch": 0.89, "learning_rate": 3.422699893704972e-08, "logits/chosen": -2.331104278564453, "logits/rejected": -2.621427536010742, "logps/chosen": -313.9878234863281, "logps/rejected": -305.54461669921875, "loss": 0.0964, "rewards/accuracies": 1.0, "rewards/chosen": -0.8274447917938232, "rewards/margins": 3.3709306716918945, "rewards/rejected": -4.198375701904297, "step": 7630 }, { "epoch": 0.89, "learning_rate": 3.419156726113145e-08, "logits/chosen": -2.6498172283172607, "logits/rejected": -2.653878688812256, "logps/chosen": -291.9067077636719, "logps/rejected": -310.1039733886719, "loss": 0.3604, "rewards/accuracies": 0.75, "rewards/chosen": -0.3400808572769165, "rewards/margins": 1.6173481941223145, "rewards/rejected": -1.957429051399231, "step": 7631 }, { "epoch": 0.89, "learning_rate": 3.415613558521318e-08, "logits/chosen": -2.3912501335144043, "logits/rejected": -2.332576274871826, "logps/chosen": -220.55227661132812, "logps/rejected": -265.6580810546875, "loss": 0.3691, "rewards/accuracies": 0.875, "rewards/chosen": -0.9988751411437988, "rewards/margins": 2.561654567718506, "rewards/rejected": -3.560529947280884, "step": 7632 }, { "epoch": 0.89, "learning_rate": 3.4120703909294904e-08, "logits/chosen": -2.8664469718933105, "logits/rejected": -2.9578754901885986, "logps/chosen": -147.25344848632812, "logps/rejected": -273.88983154296875, "loss": 0.0728, "rewards/accuracies": 1.0, "rewards/chosen": -1.0096616744995117, "rewards/margins": 3.7188076972961426, "rewards/rejected": -4.728469371795654, "step": 7633 }, { "epoch": 0.89, "learning_rate": 3.408527223337664e-08, "logits/chosen": -2.6539101600646973, "logits/rejected": -2.710662841796875, "logps/chosen": -206.93312072753906, "logps/rejected": -214.2005615234375, "loss": 0.5138, "rewards/accuracies": 0.75, "rewards/chosen": -0.6466584801673889, "rewards/margins": 1.8962739706039429, "rewards/rejected": -2.5429325103759766, "step": 7634 }, { "epoch": 0.89, "learning_rate": 3.404984055745837e-08, "logits/chosen": -2.765563726425171, "logits/rejected": -2.613551378250122, "logps/chosen": -160.9385528564453, "logps/rejected": -278.3681335449219, "loss": 0.2139, "rewards/accuracies": 1.0, "rewards/chosen": -0.3642592132091522, "rewards/margins": 2.570542097091675, "rewards/rejected": -2.9348013401031494, "step": 7635 }, { "epoch": 0.89, "learning_rate": 3.401440888154009e-08, "logits/chosen": -2.3822414875030518, "logits/rejected": -2.4237587451934814, "logps/chosen": -554.42138671875, "logps/rejected": -357.5065002441406, "loss": 0.6391, "rewards/accuracies": 0.625, "rewards/chosen": -1.4683160781860352, "rewards/margins": 1.0344301462173462, "rewards/rejected": -2.502746343612671, "step": 7636 }, { "epoch": 0.89, "learning_rate": 3.3978977205621826e-08, "logits/chosen": -2.5185210704803467, "logits/rejected": -2.4584767818450928, "logps/chosen": -310.05035400390625, "logps/rejected": -249.13772583007812, "loss": 0.7898, "rewards/accuracies": 0.75, "rewards/chosen": -0.7323188185691833, "rewards/margins": 0.8298559188842773, "rewards/rejected": -1.562174916267395, "step": 7637 }, { "epoch": 0.89, "learning_rate": 3.3943545529703555e-08, "logits/chosen": -2.3858444690704346, "logits/rejected": -2.4281742572784424, "logps/chosen": -254.00807189941406, "logps/rejected": -235.343017578125, "loss": 0.3872, "rewards/accuracies": 0.875, "rewards/chosen": -0.6720791459083557, "rewards/margins": 1.157099723815918, "rewards/rejected": -1.829178810119629, "step": 7638 }, { "epoch": 0.89, "learning_rate": 3.3908113853785283e-08, "logits/chosen": -2.29545259475708, "logits/rejected": -2.562448740005493, "logps/chosen": -412.33258056640625, "logps/rejected": -338.9202880859375, "loss": 0.2462, "rewards/accuracies": 1.0, "rewards/chosen": -1.6225334405899048, "rewards/margins": 1.9694113731384277, "rewards/rejected": -3.591944932937622, "step": 7639 }, { "epoch": 0.89, "learning_rate": 3.387268217786701e-08, "logits/chosen": -2.3682260513305664, "logits/rejected": -2.658961296081543, "logps/chosen": -357.8442687988281, "logps/rejected": -259.3177795410156, "loss": 0.4341, "rewards/accuracies": 0.625, "rewards/chosen": -1.2375680208206177, "rewards/margins": 1.7236268520355225, "rewards/rejected": -2.9611947536468506, "step": 7640 }, { "epoch": 0.89, "learning_rate": 3.383725050194874e-08, "logits/chosen": -1.8723139762878418, "logits/rejected": -1.9047759771347046, "logps/chosen": -339.5200500488281, "logps/rejected": -341.146240234375, "loss": 0.3818, "rewards/accuracies": 0.75, "rewards/chosen": -0.987215518951416, "rewards/margins": 2.448420524597168, "rewards/rejected": -3.435636043548584, "step": 7641 }, { "epoch": 0.89, "learning_rate": 3.380181882603047e-08, "logits/chosen": -1.908645749092102, "logits/rejected": -2.1410751342773438, "logps/chosen": -211.60110473632812, "logps/rejected": -223.4034881591797, "loss": 0.56, "rewards/accuracies": 0.625, "rewards/chosen": -1.1155683994293213, "rewards/margins": 2.709200143814087, "rewards/rejected": -3.824768543243408, "step": 7642 }, { "epoch": 0.89, "learning_rate": 3.37663871501122e-08, "logits/chosen": -2.039562702178955, "logits/rejected": -1.8530409336090088, "logps/chosen": -235.6786651611328, "logps/rejected": -280.2018127441406, "loss": 0.6977, "rewards/accuracies": 0.625, "rewards/chosen": -0.5308732986450195, "rewards/margins": 1.4828643798828125, "rewards/rejected": -2.013737678527832, "step": 7643 }, { "epoch": 0.89, "learning_rate": 3.373095547419393e-08, "logits/chosen": -2.4679765701293945, "logits/rejected": -2.1235852241516113, "logps/chosen": -240.7939453125, "logps/rejected": -321.9285888671875, "loss": 0.2128, "rewards/accuracies": 0.875, "rewards/chosen": -0.3819037675857544, "rewards/margins": 4.185276031494141, "rewards/rejected": -4.5671796798706055, "step": 7644 }, { "epoch": 0.89, "learning_rate": 3.3695523798275656e-08, "logits/chosen": -2.2757949829101562, "logits/rejected": -2.1363863945007324, "logps/chosen": -324.9024353027344, "logps/rejected": -299.1056823730469, "loss": 0.0799, "rewards/accuracies": 1.0, "rewards/chosen": -0.5528488159179688, "rewards/margins": 3.4979281425476074, "rewards/rejected": -4.050776481628418, "step": 7645 }, { "epoch": 0.89, "learning_rate": 3.3660092122357385e-08, "logits/chosen": -2.264565944671631, "logits/rejected": -2.454514265060425, "logps/chosen": -208.4196014404297, "logps/rejected": -221.25445556640625, "loss": 0.6829, "rewards/accuracies": 0.75, "rewards/chosen": -1.3173577785491943, "rewards/margins": 0.96364426612854, "rewards/rejected": -2.2810020446777344, "step": 7646 }, { "epoch": 0.89, "learning_rate": 3.3624660446439113e-08, "logits/chosen": -2.3445000648498535, "logits/rejected": -2.757844924926758, "logps/chosen": -414.34124755859375, "logps/rejected": -213.8111572265625, "loss": 0.4728, "rewards/accuracies": 0.75, "rewards/chosen": -0.5289493799209595, "rewards/margins": 1.5690455436706543, "rewards/rejected": -2.097994804382324, "step": 7647 }, { "epoch": 0.89, "learning_rate": 3.358922877052085e-08, "logits/chosen": -2.443530321121216, "logits/rejected": -2.2405943870544434, "logps/chosen": -247.65237426757812, "logps/rejected": -348.1358947753906, "loss": 0.3274, "rewards/accuracies": 0.75, "rewards/chosen": -1.7776471376419067, "rewards/margins": 1.996127963066101, "rewards/rejected": -3.773775100708008, "step": 7648 }, { "epoch": 0.89, "learning_rate": 3.355379709460257e-08, "logits/chosen": -2.3859331607818604, "logits/rejected": -2.3480231761932373, "logps/chosen": -199.93727111816406, "logps/rejected": -227.84422302246094, "loss": 0.2332, "rewards/accuracies": 0.875, "rewards/chosen": -0.715477466583252, "rewards/margins": 2.982395887374878, "rewards/rejected": -3.697873115539551, "step": 7649 }, { "epoch": 0.89, "learning_rate": 3.35183654186843e-08, "logits/chosen": -2.385646104812622, "logits/rejected": -2.2885100841522217, "logps/chosen": -353.9916687011719, "logps/rejected": -238.59259033203125, "loss": 0.7451, "rewards/accuracies": 0.5, "rewards/chosen": -1.0070385932922363, "rewards/margins": 1.0721392631530762, "rewards/rejected": -2.0791778564453125, "step": 7650 }, { "epoch": 0.89, "learning_rate": 3.3482933742766035e-08, "logits/chosen": -2.3410398960113525, "logits/rejected": -2.3802735805511475, "logps/chosen": -162.75714111328125, "logps/rejected": -195.3233642578125, "loss": 0.4383, "rewards/accuracies": 0.875, "rewards/chosen": -0.14393427968025208, "rewards/margins": 1.8067277669906616, "rewards/rejected": -1.9506620168685913, "step": 7651 }, { "epoch": 0.89, "learning_rate": 3.344750206684776e-08, "logits/chosen": -2.1280322074890137, "logits/rejected": -2.2760677337646484, "logps/chosen": -327.3746032714844, "logps/rejected": -245.4957733154297, "loss": 0.2927, "rewards/accuracies": 0.875, "rewards/chosen": -0.5527987480163574, "rewards/margins": 2.6305887699127197, "rewards/rejected": -3.183387517929077, "step": 7652 }, { "epoch": 0.89, "learning_rate": 3.341207039092949e-08, "logits/chosen": -2.41930890083313, "logits/rejected": -2.651841402053833, "logps/chosen": -324.8143310546875, "logps/rejected": -168.6737823486328, "loss": 0.2135, "rewards/accuracies": 1.0, "rewards/chosen": -0.7462116479873657, "rewards/margins": 1.764349341392517, "rewards/rejected": -2.510560989379883, "step": 7653 }, { "epoch": 0.89, "learning_rate": 3.337663871501122e-08, "logits/chosen": -2.5644845962524414, "logits/rejected": -2.6529788970947266, "logps/chosen": -306.45941162109375, "logps/rejected": -308.3450012207031, "loss": 0.1459, "rewards/accuracies": 1.0, "rewards/chosen": -0.6012537479400635, "rewards/margins": 3.1158299446105957, "rewards/rejected": -3.71708345413208, "step": 7654 }, { "epoch": 0.89, "learning_rate": 3.334120703909294e-08, "logits/chosen": -1.8516416549682617, "logits/rejected": -2.103665828704834, "logps/chosen": -351.6147766113281, "logps/rejected": -343.86761474609375, "loss": 1.0257, "rewards/accuracies": 0.625, "rewards/chosen": -1.269489049911499, "rewards/margins": 1.1537859439849854, "rewards/rejected": -2.4232749938964844, "step": 7655 }, { "epoch": 0.89, "learning_rate": 3.330577536317468e-08, "logits/chosen": -2.7640790939331055, "logits/rejected": -2.690114736557007, "logps/chosen": -187.6517333984375, "logps/rejected": -223.4256134033203, "loss": 0.4132, "rewards/accuracies": 0.625, "rewards/chosen": -0.8334189653396606, "rewards/margins": 1.1216970682144165, "rewards/rejected": -1.9551160335540771, "step": 7656 }, { "epoch": 0.89, "learning_rate": 3.327034368725641e-08, "logits/chosen": -2.6894359588623047, "logits/rejected": -2.854762315750122, "logps/chosen": -347.1199951171875, "logps/rejected": -192.97671508789062, "loss": 0.1342, "rewards/accuracies": 1.0, "rewards/chosen": -0.6202476024627686, "rewards/margins": 2.4951579570770264, "rewards/rejected": -3.115406036376953, "step": 7657 }, { "epoch": 0.89, "learning_rate": 3.3234912011338136e-08, "logits/chosen": -2.6689212322235107, "logits/rejected": -2.6596908569335938, "logps/chosen": -271.7841796875, "logps/rejected": -242.7332000732422, "loss": 0.2796, "rewards/accuracies": 0.875, "rewards/chosen": -0.7787849307060242, "rewards/margins": 1.9692171812057495, "rewards/rejected": -2.748002290725708, "step": 7658 }, { "epoch": 0.89, "learning_rate": 3.3199480335419865e-08, "logits/chosen": -2.9938547611236572, "logits/rejected": -2.9006423950195312, "logps/chosen": -232.12716674804688, "logps/rejected": -178.9857177734375, "loss": 0.2902, "rewards/accuracies": 0.875, "rewards/chosen": -0.483713299036026, "rewards/margins": 1.7413876056671143, "rewards/rejected": -2.2251009941101074, "step": 7659 }, { "epoch": 0.89, "learning_rate": 3.3164048659501594e-08, "logits/chosen": -2.8198580741882324, "logits/rejected": -2.8341641426086426, "logps/chosen": -236.49810791015625, "logps/rejected": -251.93162536621094, "loss": 0.1653, "rewards/accuracies": 1.0, "rewards/chosen": -0.47168272733688354, "rewards/margins": 3.270084857940674, "rewards/rejected": -3.7417678833007812, "step": 7660 }, { "epoch": 0.89, "learning_rate": 3.312861698358332e-08, "logits/chosen": -2.931985378265381, "logits/rejected": -2.9639294147491455, "logps/chosen": -149.93771362304688, "logps/rejected": -187.45413208007812, "loss": 0.4082, "rewards/accuracies": 0.875, "rewards/chosen": -1.4805744886398315, "rewards/margins": 1.3743395805358887, "rewards/rejected": -2.8549141883850098, "step": 7661 }, { "epoch": 0.89, "learning_rate": 3.309318530766505e-08, "logits/chosen": -2.3765816688537598, "logits/rejected": -2.2938594818115234, "logps/chosen": -224.10992431640625, "logps/rejected": -243.92904663085938, "loss": 0.3319, "rewards/accuracies": 0.875, "rewards/chosen": -0.5071866512298584, "rewards/margins": 2.3063862323760986, "rewards/rejected": -2.813572883605957, "step": 7662 }, { "epoch": 0.89, "learning_rate": 3.305775363174678e-08, "logits/chosen": -2.4725093841552734, "logits/rejected": -2.458439588546753, "logps/chosen": -297.870361328125, "logps/rejected": -369.881103515625, "loss": 0.2813, "rewards/accuracies": 0.875, "rewards/chosen": -1.297526478767395, "rewards/margins": 2.308138370513916, "rewards/rejected": -3.6056647300720215, "step": 7663 }, { "epoch": 0.89, "learning_rate": 3.302232195582851e-08, "logits/chosen": -2.761014223098755, "logits/rejected": -2.7023820877075195, "logps/chosen": -258.4473571777344, "logps/rejected": -246.70248413085938, "loss": 0.3099, "rewards/accuracies": 0.75, "rewards/chosen": -1.1458522081375122, "rewards/margins": 3.363274097442627, "rewards/rejected": -4.50912618637085, "step": 7664 }, { "epoch": 0.89, "learning_rate": 3.298689027991024e-08, "logits/chosen": -2.1015801429748535, "logits/rejected": -1.864659070968628, "logps/chosen": -246.90020751953125, "logps/rejected": -285.5361022949219, "loss": 0.5375, "rewards/accuracies": 0.625, "rewards/chosen": -0.7182380557060242, "rewards/margins": 1.512763500213623, "rewards/rejected": -2.231001615524292, "step": 7665 }, { "epoch": 0.89, "learning_rate": 3.2951458603991966e-08, "logits/chosen": -1.7852157354354858, "logits/rejected": -2.249724864959717, "logps/chosen": -287.9789123535156, "logps/rejected": -183.77792358398438, "loss": 0.3572, "rewards/accuracies": 0.875, "rewards/chosen": -0.9650783538818359, "rewards/margins": 1.6263879537582397, "rewards/rejected": -2.591466188430786, "step": 7666 }, { "epoch": 0.89, "learning_rate": 3.2916026928073695e-08, "logits/chosen": -2.0630855560302734, "logits/rejected": -1.758200764656067, "logps/chosen": -277.74810791015625, "logps/rejected": -362.7735290527344, "loss": 0.2917, "rewards/accuracies": 0.875, "rewards/chosen": 0.1279546022415161, "rewards/margins": 3.341498851776123, "rewards/rejected": -3.2135443687438965, "step": 7667 }, { "epoch": 0.89, "learning_rate": 3.2880595252155424e-08, "logits/chosen": -2.110138177871704, "logits/rejected": -2.3599557876586914, "logps/chosen": -260.71832275390625, "logps/rejected": -253.713623046875, "loss": 0.5939, "rewards/accuracies": 0.75, "rewards/chosen": -1.4029574394226074, "rewards/margins": 2.2544403076171875, "rewards/rejected": -3.657397508621216, "step": 7668 }, { "epoch": 0.89, "learning_rate": 3.284516357623715e-08, "logits/chosen": -2.3457024097442627, "logits/rejected": -2.5956506729125977, "logps/chosen": -520.1580810546875, "logps/rejected": -455.50872802734375, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": -0.7137840390205383, "rewards/margins": 3.9460339546203613, "rewards/rejected": -4.659817695617676, "step": 7669 }, { "epoch": 0.89, "learning_rate": 3.280973190031889e-08, "logits/chosen": -1.896113634109497, "logits/rejected": -2.3443267345428467, "logps/chosen": -435.766357421875, "logps/rejected": -247.95126342773438, "loss": 0.8535, "rewards/accuracies": 0.625, "rewards/chosen": -1.4853274822235107, "rewards/margins": 0.5584214329719543, "rewards/rejected": -2.0437488555908203, "step": 7670 }, { "epoch": 0.89, "learning_rate": 3.277430022440061e-08, "logits/chosen": -2.1985597610473633, "logits/rejected": -2.538966178894043, "logps/chosen": -475.57501220703125, "logps/rejected": -330.271484375, "loss": 0.2831, "rewards/accuracies": 0.75, "rewards/chosen": -0.17180538177490234, "rewards/margins": 2.6048200130462646, "rewards/rejected": -2.776625394821167, "step": 7671 }, { "epoch": 0.89, "learning_rate": 3.273886854848234e-08, "logits/chosen": -2.060203790664673, "logits/rejected": -1.928792953491211, "logps/chosen": -351.61859130859375, "logps/rejected": -290.73272705078125, "loss": 0.163, "rewards/accuracies": 1.0, "rewards/chosen": -0.14719811081886292, "rewards/margins": 1.8245810270309448, "rewards/rejected": -1.9717791080474854, "step": 7672 }, { "epoch": 0.89, "learning_rate": 3.2703436872564074e-08, "logits/chosen": -2.4832000732421875, "logits/rejected": -2.5773651599884033, "logps/chosen": -233.0186767578125, "logps/rejected": -383.2530517578125, "loss": 0.1901, "rewards/accuracies": 1.0, "rewards/chosen": -0.08504168689250946, "rewards/margins": 2.4097607135772705, "rewards/rejected": -2.494802474975586, "step": 7673 }, { "epoch": 0.89, "learning_rate": 3.2668005196645796e-08, "logits/chosen": -1.7958588600158691, "logits/rejected": -1.9948673248291016, "logps/chosen": -523.9320068359375, "logps/rejected": -426.4649658203125, "loss": 0.24, "rewards/accuracies": 0.875, "rewards/chosen": -0.3226063847541809, "rewards/margins": 2.351445198059082, "rewards/rejected": -2.6740517616271973, "step": 7674 }, { "epoch": 0.89, "learning_rate": 3.263257352072753e-08, "logits/chosen": -2.020905017852783, "logits/rejected": -2.3196091651916504, "logps/chosen": -468.1844177246094, "logps/rejected": -355.77301025390625, "loss": 0.4032, "rewards/accuracies": 0.875, "rewards/chosen": -1.5732598304748535, "rewards/margins": 1.1472797393798828, "rewards/rejected": -2.7205395698547363, "step": 7675 }, { "epoch": 0.89, "learning_rate": 3.259714184480926e-08, "logits/chosen": -1.8211593627929688, "logits/rejected": -1.6197009086608887, "logps/chosen": -354.3990783691406, "logps/rejected": -478.8633728027344, "loss": 0.2456, "rewards/accuracies": 0.875, "rewards/chosen": 0.16968747973442078, "rewards/margins": 2.9078164100646973, "rewards/rejected": -2.738128662109375, "step": 7676 }, { "epoch": 0.89, "learning_rate": 3.256171016889098e-08, "logits/chosen": -2.373667001724243, "logits/rejected": -2.11265230178833, "logps/chosen": -183.045654296875, "logps/rejected": -152.43719482421875, "loss": 0.7171, "rewards/accuracies": 0.625, "rewards/chosen": -1.5732691287994385, "rewards/margins": 0.27703166007995605, "rewards/rejected": -1.8503007888793945, "step": 7677 }, { "epoch": 0.89, "learning_rate": 3.252627849297272e-08, "logits/chosen": -2.786393642425537, "logits/rejected": -2.772739887237549, "logps/chosen": -399.50152587890625, "logps/rejected": -282.0231628417969, "loss": 0.2339, "rewards/accuracies": 0.875, "rewards/chosen": -0.5837604999542236, "rewards/margins": 2.7103981971740723, "rewards/rejected": -3.294158458709717, "step": 7678 }, { "epoch": 0.89, "learning_rate": 3.2490846817054446e-08, "logits/chosen": -2.1490118503570557, "logits/rejected": -2.0671441555023193, "logps/chosen": -396.83148193359375, "logps/rejected": -245.38018798828125, "loss": 0.3938, "rewards/accuracies": 0.875, "rewards/chosen": -1.594802975654602, "rewards/margins": 2.2088875770568848, "rewards/rejected": -3.8036904335021973, "step": 7679 }, { "epoch": 0.89, "learning_rate": 3.2455415141136175e-08, "logits/chosen": -2.4950942993164062, "logits/rejected": -2.6559665203094482, "logps/chosen": -253.08169555664062, "logps/rejected": -123.19444274902344, "loss": 0.4677, "rewards/accuracies": 0.875, "rewards/chosen": -1.3046741485595703, "rewards/margins": 0.8503532409667969, "rewards/rejected": -2.155027389526367, "step": 7680 }, { "epoch": 0.89, "learning_rate": 3.2419983465217904e-08, "logits/chosen": -1.454131841659546, "logits/rejected": -1.5898804664611816, "logps/chosen": -299.0245361328125, "logps/rejected": -333.68988037109375, "loss": 0.3098, "rewards/accuracies": 1.0, "rewards/chosen": -0.7924443483352661, "rewards/margins": 1.945871353149414, "rewards/rejected": -2.7383158206939697, "step": 7681 }, { "epoch": 0.89, "learning_rate": 3.238455178929963e-08, "logits/chosen": -2.1918280124664307, "logits/rejected": -2.203430652618408, "logps/chosen": -546.2197265625, "logps/rejected": -328.8348388671875, "loss": 0.2269, "rewards/accuracies": 0.875, "rewards/chosen": -1.4717222452163696, "rewards/margins": 2.10371732711792, "rewards/rejected": -3.575439453125, "step": 7682 }, { "epoch": 0.89, "learning_rate": 3.234912011338136e-08, "logits/chosen": -2.4558475017547607, "logits/rejected": -2.487470865249634, "logps/chosen": -479.5130310058594, "logps/rejected": -312.52593994140625, "loss": 0.3714, "rewards/accuracies": 0.75, "rewards/chosen": -0.6165217161178589, "rewards/margins": 2.8955581188201904, "rewards/rejected": -3.512079954147339, "step": 7683 }, { "epoch": 0.89, "learning_rate": 3.231368843746309e-08, "logits/chosen": -2.5036206245422363, "logits/rejected": -2.8502919673919678, "logps/chosen": -248.83132934570312, "logps/rejected": -274.6932678222656, "loss": 0.119, "rewards/accuracies": 1.0, "rewards/chosen": -0.9348282217979431, "rewards/margins": 2.7907192707061768, "rewards/rejected": -3.7255475521087646, "step": 7684 }, { "epoch": 0.89, "learning_rate": 3.227825676154482e-08, "logits/chosen": -2.4815752506256104, "logits/rejected": -2.6117172241210938, "logps/chosen": -410.2247314453125, "logps/rejected": -391.9031677246094, "loss": 0.5225, "rewards/accuracies": 0.875, "rewards/chosen": -1.161960482597351, "rewards/margins": 1.9557812213897705, "rewards/rejected": -3.117741823196411, "step": 7685 }, { "epoch": 0.89, "learning_rate": 3.224282508562655e-08, "logits/chosen": -2.4186229705810547, "logits/rejected": -2.4158034324645996, "logps/chosen": -85.47789001464844, "logps/rejected": -159.77943420410156, "loss": 0.6325, "rewards/accuracies": 0.625, "rewards/chosen": -1.376399278640747, "rewards/margins": 2.5285327434539795, "rewards/rejected": -3.9049320220947266, "step": 7686 }, { "epoch": 0.89, "learning_rate": 3.2207393409708276e-08, "logits/chosen": -2.766472578048706, "logits/rejected": -2.4907023906707764, "logps/chosen": -260.208740234375, "logps/rejected": -223.1270294189453, "loss": 0.3246, "rewards/accuracies": 0.875, "rewards/chosen": -0.7621030807495117, "rewards/margins": 2.6078195571899414, "rewards/rejected": -3.369922637939453, "step": 7687 }, { "epoch": 0.89, "learning_rate": 3.2171961733790005e-08, "logits/chosen": -2.0546655654907227, "logits/rejected": -1.9740486145019531, "logps/chosen": -143.8533477783203, "logps/rejected": -192.94357299804688, "loss": 0.4871, "rewards/accuracies": 0.75, "rewards/chosen": -1.3174545764923096, "rewards/margins": 0.7432514429092407, "rewards/rejected": -2.06070613861084, "step": 7688 }, { "epoch": 0.89, "learning_rate": 3.213653005787174e-08, "logits/chosen": -2.282803773880005, "logits/rejected": -2.2564902305603027, "logps/chosen": -403.13470458984375, "logps/rejected": -364.0293884277344, "loss": 0.4573, "rewards/accuracies": 0.875, "rewards/chosen": -0.6409841775894165, "rewards/margins": 2.1511499881744385, "rewards/rejected": -2.7921340465545654, "step": 7689 }, { "epoch": 0.89, "learning_rate": 3.210109838195346e-08, "logits/chosen": -2.4727511405944824, "logits/rejected": -2.472590923309326, "logps/chosen": -175.98504638671875, "logps/rejected": -165.74166870117188, "loss": 0.3011, "rewards/accuracies": 0.875, "rewards/chosen": -0.8442423343658447, "rewards/margins": 1.7452383041381836, "rewards/rejected": -2.5894806385040283, "step": 7690 }, { "epoch": 0.89, "learning_rate": 3.206566670603519e-08, "logits/chosen": -2.5262985229492188, "logits/rejected": -2.464149236679077, "logps/chosen": -256.99908447265625, "logps/rejected": -251.1205596923828, "loss": 0.1542, "rewards/accuracies": 1.0, "rewards/chosen": -0.9181716442108154, "rewards/margins": 2.593554973602295, "rewards/rejected": -3.5117263793945312, "step": 7691 }, { "epoch": 0.89, "learning_rate": 3.2030235030116927e-08, "logits/chosen": -1.8887157440185547, "logits/rejected": -2.0584282875061035, "logps/chosen": -230.34088134765625, "logps/rejected": -233.03871154785156, "loss": 0.768, "rewards/accuracies": 0.75, "rewards/chosen": -1.3511770963668823, "rewards/margins": 1.249011754989624, "rewards/rejected": -2.600188732147217, "step": 7692 }, { "epoch": 0.89, "learning_rate": 3.199480335419865e-08, "logits/chosen": -2.304762840270996, "logits/rejected": -2.536038398742676, "logps/chosen": -204.12928771972656, "logps/rejected": -313.2115478515625, "loss": 0.2626, "rewards/accuracies": 0.875, "rewards/chosen": -0.3411281108856201, "rewards/margins": 2.7864460945129395, "rewards/rejected": -3.1275739669799805, "step": 7693 }, { "epoch": 0.9, "learning_rate": 3.1959371678280384e-08, "logits/chosen": -2.753767728805542, "logits/rejected": -2.6344051361083984, "logps/chosen": -320.63800048828125, "logps/rejected": -294.47784423828125, "loss": 0.6327, "rewards/accuracies": 0.75, "rewards/chosen": -0.23476015031337738, "rewards/margins": 0.9915107488632202, "rewards/rejected": -1.2262707948684692, "step": 7694 }, { "epoch": 0.9, "learning_rate": 3.192394000236211e-08, "logits/chosen": -2.4077565670013428, "logits/rejected": -2.5633111000061035, "logps/chosen": -356.4228210449219, "logps/rejected": -191.31298828125, "loss": 0.2176, "rewards/accuracies": 1.0, "rewards/chosen": 0.17679202556610107, "rewards/margins": 1.930654764175415, "rewards/rejected": -1.753862738609314, "step": 7695 }, { "epoch": 0.9, "learning_rate": 3.1888508326443835e-08, "logits/chosen": -2.474569797515869, "logits/rejected": -2.5959839820861816, "logps/chosen": -273.33428955078125, "logps/rejected": -185.90911865234375, "loss": 0.1758, "rewards/accuracies": 1.0, "rewards/chosen": -0.325956255197525, "rewards/margins": 2.4216866493225098, "rewards/rejected": -2.747642993927002, "step": 7696 }, { "epoch": 0.9, "learning_rate": 3.185307665052557e-08, "logits/chosen": -2.06085205078125, "logits/rejected": -2.012091875076294, "logps/chosen": -230.58041381835938, "logps/rejected": -289.5764465332031, "loss": 1.3405, "rewards/accuracies": 0.375, "rewards/chosen": -1.52175772190094, "rewards/margins": -0.4128458499908447, "rewards/rejected": -1.1089118719100952, "step": 7697 }, { "epoch": 0.9, "learning_rate": 3.18176449746073e-08, "logits/chosen": -2.3191304206848145, "logits/rejected": -2.2596113681793213, "logps/chosen": -238.52813720703125, "logps/rejected": -247.52743530273438, "loss": 0.2378, "rewards/accuracies": 0.875, "rewards/chosen": -0.25012269616127014, "rewards/margins": 2.1630523204803467, "rewards/rejected": -2.413175106048584, "step": 7698 }, { "epoch": 0.9, "learning_rate": 3.178221329868903e-08, "logits/chosen": -1.945847988128662, "logits/rejected": -2.3941879272460938, "logps/chosen": -423.27508544921875, "logps/rejected": -291.5628662109375, "loss": 0.4376, "rewards/accuracies": 0.75, "rewards/chosen": -0.9221624135971069, "rewards/margins": 1.828317403793335, "rewards/rejected": -2.7504796981811523, "step": 7699 }, { "epoch": 0.9, "learning_rate": 3.1746781622770756e-08, "logits/chosen": -2.4278039932250977, "logits/rejected": -2.5137217044830322, "logps/chosen": -329.341064453125, "logps/rejected": -266.0711364746094, "loss": 0.3671, "rewards/accuracies": 0.875, "rewards/chosen": -0.9025762677192688, "rewards/margins": 1.5573179721832275, "rewards/rejected": -2.4598941802978516, "step": 7700 }, { "epoch": 0.9, "learning_rate": 3.1711349946852485e-08, "logits/chosen": -2.5545101165771484, "logits/rejected": -2.583695411682129, "logps/chosen": -150.54376220703125, "logps/rejected": -205.59860229492188, "loss": 0.5287, "rewards/accuracies": 0.75, "rewards/chosen": -0.8792843222618103, "rewards/margins": 1.3776190280914307, "rewards/rejected": -2.256903648376465, "step": 7701 }, { "epoch": 0.9, "learning_rate": 3.1675918270934214e-08, "logits/chosen": -2.8962819576263428, "logits/rejected": -2.9400529861450195, "logps/chosen": -336.44317626953125, "logps/rejected": -392.6435852050781, "loss": 0.5336, "rewards/accuracies": 0.625, "rewards/chosen": -0.761339008808136, "rewards/margins": 1.4799129962921143, "rewards/rejected": -2.2412519454956055, "step": 7702 }, { "epoch": 0.9, "learning_rate": 3.164048659501594e-08, "logits/chosen": -2.3159239292144775, "logits/rejected": -2.376765012741089, "logps/chosen": -409.47955322265625, "logps/rejected": -289.60595703125, "loss": 0.6077, "rewards/accuracies": 0.625, "rewards/chosen": -1.5045260190963745, "rewards/margins": 1.6920639276504517, "rewards/rejected": -3.196589946746826, "step": 7703 }, { "epoch": 0.9, "learning_rate": 3.160505491909767e-08, "logits/chosen": -2.2325756549835205, "logits/rejected": -2.2492318153381348, "logps/chosen": -226.4979705810547, "logps/rejected": -271.08416748046875, "loss": 0.124, "rewards/accuracies": 0.875, "rewards/chosen": 0.0885065346956253, "rewards/margins": 3.9135594367980957, "rewards/rejected": -3.8250532150268555, "step": 7704 }, { "epoch": 0.9, "learning_rate": 3.15696232431794e-08, "logits/chosen": -2.6708874702453613, "logits/rejected": -2.686033248901367, "logps/chosen": -308.9173583984375, "logps/rejected": -272.8843078613281, "loss": 0.6082, "rewards/accuracies": 0.625, "rewards/chosen": -0.9487732648849487, "rewards/margins": 0.7681126594543457, "rewards/rejected": -1.7168858051300049, "step": 7705 }, { "epoch": 0.9, "learning_rate": 3.153419156726113e-08, "logits/chosen": -1.3865489959716797, "logits/rejected": -1.4530807733535767, "logps/chosen": -285.681396484375, "logps/rejected": -307.33258056640625, "loss": 0.2531, "rewards/accuracies": 0.875, "rewards/chosen": -1.252589225769043, "rewards/margins": 3.6025233268737793, "rewards/rejected": -4.855112075805664, "step": 7706 }, { "epoch": 0.9, "learning_rate": 3.149875989134286e-08, "logits/chosen": -2.062410593032837, "logits/rejected": -2.1403391361236572, "logps/chosen": -439.2113342285156, "logps/rejected": -522.3545532226562, "loss": 0.8571, "rewards/accuracies": 0.625, "rewards/chosen": -0.5995011329650879, "rewards/margins": 0.6146401166915894, "rewards/rejected": -1.2141412496566772, "step": 7707 }, { "epoch": 0.9, "learning_rate": 3.1463328215424586e-08, "logits/chosen": -1.9725115299224854, "logits/rejected": -1.919339656829834, "logps/chosen": -295.0091552734375, "logps/rejected": -266.1778564453125, "loss": 0.2097, "rewards/accuracies": 0.875, "rewards/chosen": -0.180082306265831, "rewards/margins": 3.3249170780181885, "rewards/rejected": -3.5049996376037598, "step": 7708 }, { "epoch": 0.9, "learning_rate": 3.1427896539506315e-08, "logits/chosen": -1.8328361511230469, "logits/rejected": -1.9955811500549316, "logps/chosen": -277.7185363769531, "logps/rejected": -328.88580322265625, "loss": 0.4601, "rewards/accuracies": 0.625, "rewards/chosen": -0.825038492679596, "rewards/margins": 1.0241550207138062, "rewards/rejected": -1.8491935729980469, "step": 7709 }, { "epoch": 0.9, "learning_rate": 3.1392464863588044e-08, "logits/chosen": -2.7796478271484375, "logits/rejected": -2.4764463901519775, "logps/chosen": -289.9639892578125, "logps/rejected": -395.9324035644531, "loss": 0.1462, "rewards/accuracies": 1.0, "rewards/chosen": -0.604250431060791, "rewards/margins": 2.898554563522339, "rewards/rejected": -3.502805233001709, "step": 7710 }, { "epoch": 0.9, "learning_rate": 3.135703318766978e-08, "logits/chosen": -2.3143067359924316, "logits/rejected": -2.2599239349365234, "logps/chosen": -328.349853515625, "logps/rejected": -383.67962646484375, "loss": 0.3402, "rewards/accuracies": 0.75, "rewards/chosen": -1.3592385053634644, "rewards/margins": 2.367685556411743, "rewards/rejected": -3.726924419403076, "step": 7711 }, { "epoch": 0.9, "learning_rate": 3.13216015117515e-08, "logits/chosen": -2.427824020385742, "logits/rejected": -2.3860466480255127, "logps/chosen": -246.85569763183594, "logps/rejected": -212.95767211914062, "loss": 0.5264, "rewards/accuracies": 0.875, "rewards/chosen": -0.9636602997779846, "rewards/margins": 1.4756860733032227, "rewards/rejected": -2.4393463134765625, "step": 7712 }, { "epoch": 0.9, "learning_rate": 3.128616983583323e-08, "logits/chosen": -2.8860116004943848, "logits/rejected": -2.9068150520324707, "logps/chosen": -237.21530151367188, "logps/rejected": -248.81024169921875, "loss": 0.4057, "rewards/accuracies": 0.875, "rewards/chosen": -0.5813058018684387, "rewards/margins": 1.8793487548828125, "rewards/rejected": -2.4606542587280273, "step": 7713 }, { "epoch": 0.9, "learning_rate": 3.1250738159914965e-08, "logits/chosen": -2.8352668285369873, "logits/rejected": -2.840061902999878, "logps/chosen": -385.05279541015625, "logps/rejected": -292.4405822753906, "loss": 0.2905, "rewards/accuracies": 0.75, "rewards/chosen": -0.9941176772117615, "rewards/margins": 2.370908737182617, "rewards/rejected": -3.3650264739990234, "step": 7714 }, { "epoch": 0.9, "learning_rate": 3.1215306483996694e-08, "logits/chosen": -2.630293607711792, "logits/rejected": -2.560131072998047, "logps/chosen": -455.25555419921875, "logps/rejected": -193.710205078125, "loss": 0.2912, "rewards/accuracies": 0.875, "rewards/chosen": -0.7224465012550354, "rewards/margins": 1.6022238731384277, "rewards/rejected": -2.3246703147888184, "step": 7715 }, { "epoch": 0.9, "learning_rate": 3.117987480807842e-08, "logits/chosen": -2.578735113143921, "logits/rejected": -2.6199350357055664, "logps/chosen": -216.69082641601562, "logps/rejected": -169.00830078125, "loss": 0.1891, "rewards/accuracies": 1.0, "rewards/chosen": -0.47047513723373413, "rewards/margins": 2.0335986614227295, "rewards/rejected": -2.5040738582611084, "step": 7716 }, { "epoch": 0.9, "learning_rate": 3.114444313216015e-08, "logits/chosen": -2.3303771018981934, "logits/rejected": -2.497947931289673, "logps/chosen": -324.0134582519531, "logps/rejected": -263.43499755859375, "loss": 0.6015, "rewards/accuracies": 0.625, "rewards/chosen": -1.3786022663116455, "rewards/margins": 0.44372329115867615, "rewards/rejected": -1.822325587272644, "step": 7717 }, { "epoch": 0.9, "learning_rate": 3.110901145624188e-08, "logits/chosen": -1.850367784500122, "logits/rejected": -2.1428844928741455, "logps/chosen": -224.75975036621094, "logps/rejected": -193.00807189941406, "loss": 0.2547, "rewards/accuracies": 0.875, "rewards/chosen": -0.9381294846534729, "rewards/margins": 1.828981637954712, "rewards/rejected": -2.76711106300354, "step": 7718 }, { "epoch": 0.9, "learning_rate": 3.107357978032361e-08, "logits/chosen": -2.212958574295044, "logits/rejected": -2.294132947921753, "logps/chosen": -216.19674682617188, "logps/rejected": -282.4936218261719, "loss": 0.3646, "rewards/accuracies": 0.875, "rewards/chosen": -0.8140599727630615, "rewards/margins": 1.5400123596191406, "rewards/rejected": -2.354072093963623, "step": 7719 }, { "epoch": 0.9, "learning_rate": 3.103814810440534e-08, "logits/chosen": -2.7201569080352783, "logits/rejected": -2.369631767272949, "logps/chosen": -214.79458618164062, "logps/rejected": -394.34307861328125, "loss": 0.5078, "rewards/accuracies": 0.75, "rewards/chosen": -0.9077961444854736, "rewards/margins": 2.902552843093872, "rewards/rejected": -3.810349225997925, "step": 7720 }, { "epoch": 0.9, "learning_rate": 3.1002716428487067e-08, "logits/chosen": -1.8970357179641724, "logits/rejected": -1.9875106811523438, "logps/chosen": -178.24368286132812, "logps/rejected": -179.4664764404297, "loss": 0.6482, "rewards/accuracies": 0.5, "rewards/chosen": -0.2592839002609253, "rewards/margins": 0.49247926473617554, "rewards/rejected": -0.751763105392456, "step": 7721 }, { "epoch": 0.9, "learning_rate": 3.0967284752568795e-08, "logits/chosen": -2.3443636894226074, "logits/rejected": -2.330768585205078, "logps/chosen": -313.458740234375, "logps/rejected": -256.6027526855469, "loss": 0.2225, "rewards/accuracies": 0.875, "rewards/chosen": -0.4268263876438141, "rewards/margins": 1.8772671222686768, "rewards/rejected": -2.304093360900879, "step": 7722 }, { "epoch": 0.9, "learning_rate": 3.0931853076650524e-08, "logits/chosen": -1.7862917184829712, "logits/rejected": -1.865889549255371, "logps/chosen": -299.85302734375, "logps/rejected": -416.4881286621094, "loss": 0.5282, "rewards/accuracies": 0.75, "rewards/chosen": -1.8260456323623657, "rewards/margins": 2.555398464202881, "rewards/rejected": -4.381443977355957, "step": 7723 }, { "epoch": 0.9, "learning_rate": 3.089642140073225e-08, "logits/chosen": -1.8699270486831665, "logits/rejected": -1.8416907787322998, "logps/chosen": -393.93292236328125, "logps/rejected": -305.73565673828125, "loss": 0.7024, "rewards/accuracies": 0.625, "rewards/chosen": -0.33216023445129395, "rewards/margins": 1.9714853763580322, "rewards/rejected": -2.3036458492279053, "step": 7724 }, { "epoch": 0.9, "learning_rate": 3.086098972481398e-08, "logits/chosen": -1.9424307346343994, "logits/rejected": -1.8300082683563232, "logps/chosen": -245.59762573242188, "logps/rejected": -316.1534118652344, "loss": 0.8619, "rewards/accuracies": 0.5, "rewards/chosen": -1.7061697244644165, "rewards/margins": 0.6689471006393433, "rewards/rejected": -2.3751168251037598, "step": 7725 }, { "epoch": 0.9, "learning_rate": 3.082555804889571e-08, "logits/chosen": -1.8793165683746338, "logits/rejected": -2.098295211791992, "logps/chosen": -516.914794921875, "logps/rejected": -354.364990234375, "loss": 0.2848, "rewards/accuracies": 0.875, "rewards/chosen": -1.4344823360443115, "rewards/margins": 2.1833534240722656, "rewards/rejected": -3.6178359985351562, "step": 7726 }, { "epoch": 0.9, "learning_rate": 3.079012637297744e-08, "logits/chosen": -2.2876880168914795, "logits/rejected": -2.039128065109253, "logps/chosen": -324.7662353515625, "logps/rejected": -390.26214599609375, "loss": 0.6458, "rewards/accuracies": 0.625, "rewards/chosen": -0.6979549527168274, "rewards/margins": 1.1160484552383423, "rewards/rejected": -1.8140032291412354, "step": 7727 }, { "epoch": 0.9, "learning_rate": 3.075469469705917e-08, "logits/chosen": -2.5558853149414062, "logits/rejected": -2.8291101455688477, "logps/chosen": -300.22039794921875, "logps/rejected": -271.5733947753906, "loss": 0.1984, "rewards/accuracies": 0.875, "rewards/chosen": -0.8689141869544983, "rewards/margins": 4.188506126403809, "rewards/rejected": -5.057420253753662, "step": 7728 }, { "epoch": 0.9, "learning_rate": 3.0719263021140897e-08, "logits/chosen": -1.8045103549957275, "logits/rejected": -1.456221580505371, "logps/chosen": -349.3807373046875, "logps/rejected": -455.7493896484375, "loss": 0.5373, "rewards/accuracies": 0.75, "rewards/chosen": -0.446503221988678, "rewards/margins": 2.0919976234436035, "rewards/rejected": -2.538501024246216, "step": 7729 }, { "epoch": 0.9, "learning_rate": 3.0683831345222625e-08, "logits/chosen": -2.5353922843933105, "logits/rejected": -2.7000842094421387, "logps/chosen": -321.3793029785156, "logps/rejected": -289.55181884765625, "loss": 0.1452, "rewards/accuracies": 1.0, "rewards/chosen": 0.19543391466140747, "rewards/margins": 2.2365317344665527, "rewards/rejected": -2.041097640991211, "step": 7730 }, { "epoch": 0.9, "learning_rate": 3.0648399669304354e-08, "logits/chosen": -2.8129920959472656, "logits/rejected": -2.851372480392456, "logps/chosen": -244.62733459472656, "logps/rejected": -324.7100830078125, "loss": 0.1196, "rewards/accuracies": 1.0, "rewards/chosen": -0.676973819732666, "rewards/margins": 3.6947643756866455, "rewards/rejected": -4.371737957000732, "step": 7731 }, { "epoch": 0.9, "learning_rate": 3.061296799338608e-08, "logits/chosen": -2.2964160442352295, "logits/rejected": -2.0522241592407227, "logps/chosen": -204.27105712890625, "logps/rejected": -198.008544921875, "loss": 0.6387, "rewards/accuracies": 0.5, "rewards/chosen": -1.0785739421844482, "rewards/margins": 0.6751055717468262, "rewards/rejected": -1.7536795139312744, "step": 7732 }, { "epoch": 0.9, "learning_rate": 3.057753631746782e-08, "logits/chosen": -2.0741875171661377, "logits/rejected": -2.0862209796905518, "logps/chosen": -216.65496826171875, "logps/rejected": -300.555419921875, "loss": 0.6198, "rewards/accuracies": 0.625, "rewards/chosen": -1.0175130367279053, "rewards/margins": 0.8269424438476562, "rewards/rejected": -1.8444554805755615, "step": 7733 }, { "epoch": 0.9, "learning_rate": 3.054210464154955e-08, "logits/chosen": -2.228144884109497, "logits/rejected": -2.152557611465454, "logps/chosen": -286.3065185546875, "logps/rejected": -376.6913146972656, "loss": 1.2972, "rewards/accuracies": 0.5, "rewards/chosen": -2.2046055793762207, "rewards/margins": -0.22007551789283752, "rewards/rejected": -1.9845302104949951, "step": 7734 }, { "epoch": 0.9, "learning_rate": 3.050667296563127e-08, "logits/chosen": -2.1313629150390625, "logits/rejected": -2.0979037284851074, "logps/chosen": -216.67672729492188, "logps/rejected": -340.4228210449219, "loss": 0.5765, "rewards/accuracies": 0.75, "rewards/chosen": -2.1394686698913574, "rewards/margins": 2.801581859588623, "rewards/rejected": -4.9410505294799805, "step": 7735 }, { "epoch": 0.9, "learning_rate": 3.0471241289713004e-08, "logits/chosen": -2.3868799209594727, "logits/rejected": -2.6906204223632812, "logps/chosen": -418.4458923339844, "logps/rejected": -235.22140502929688, "loss": 0.337, "rewards/accuracies": 0.75, "rewards/chosen": -1.1503205299377441, "rewards/margins": 1.3690664768218994, "rewards/rejected": -2.5193872451782227, "step": 7736 }, { "epoch": 0.9, "learning_rate": 3.043580961379473e-08, "logits/chosen": -2.108027458190918, "logits/rejected": -2.2079005241394043, "logps/chosen": -311.6959533691406, "logps/rejected": -285.4506530761719, "loss": 4.7369, "rewards/accuracies": 0.75, "rewards/chosen": -5.372298240661621, "rewards/margins": -1.764843225479126, "rewards/rejected": -3.607455015182495, "step": 7737 }, { "epoch": 0.9, "learning_rate": 3.040037793787646e-08, "logits/chosen": -2.4244465827941895, "logits/rejected": -2.312498092651367, "logps/chosen": -234.98977661132812, "logps/rejected": -252.34422302246094, "loss": 0.4269, "rewards/accuracies": 0.875, "rewards/chosen": -0.6346054077148438, "rewards/margins": 2.9261088371276855, "rewards/rejected": -3.5607142448425293, "step": 7738 }, { "epoch": 0.9, "learning_rate": 3.036494626195819e-08, "logits/chosen": -2.064203977584839, "logits/rejected": -2.0394821166992188, "logps/chosen": -229.3427276611328, "logps/rejected": -254.47433471679688, "loss": 0.3944, "rewards/accuracies": 0.75, "rewards/chosen": -0.996541440486908, "rewards/margins": 3.8487119674682617, "rewards/rejected": -4.8452534675598145, "step": 7739 }, { "epoch": 0.9, "learning_rate": 3.032951458603992e-08, "logits/chosen": -2.5769548416137695, "logits/rejected": -2.9213035106658936, "logps/chosen": -251.8530731201172, "logps/rejected": -202.98532104492188, "loss": 0.3098, "rewards/accuracies": 0.875, "rewards/chosen": -1.129786729812622, "rewards/margins": 2.6989612579345703, "rewards/rejected": -3.8287482261657715, "step": 7740 }, { "epoch": 0.9, "learning_rate": 3.029408291012165e-08, "logits/chosen": -1.5221531391143799, "logits/rejected": -2.0079238414764404, "logps/chosen": -469.9354248046875, "logps/rejected": -372.0657958984375, "loss": 0.3998, "rewards/accuracies": 0.875, "rewards/chosen": -0.45569857954978943, "rewards/margins": 2.3955304622650146, "rewards/rejected": -2.851228952407837, "step": 7741 }, { "epoch": 0.9, "learning_rate": 3.025865123420338e-08, "logits/chosen": -1.8487194776535034, "logits/rejected": -2.0471534729003906, "logps/chosen": -383.13214111328125, "logps/rejected": -249.89935302734375, "loss": 0.5481, "rewards/accuracies": 0.625, "rewards/chosen": -0.4702525734901428, "rewards/margins": 1.6733365058898926, "rewards/rejected": -2.1435890197753906, "step": 7742 }, { "epoch": 0.9, "learning_rate": 3.0223219558285106e-08, "logits/chosen": -2.3251304626464844, "logits/rejected": -2.3829345703125, "logps/chosen": -403.6244201660156, "logps/rejected": -433.0500793457031, "loss": 0.736, "rewards/accuracies": 0.875, "rewards/chosen": -0.9047062397003174, "rewards/margins": 1.41823410987854, "rewards/rejected": -2.3229403495788574, "step": 7743 }, { "epoch": 0.9, "learning_rate": 3.0187787882366834e-08, "logits/chosen": -1.660660982131958, "logits/rejected": -1.8386932611465454, "logps/chosen": -512.203369140625, "logps/rejected": -333.9020690917969, "loss": 0.4088, "rewards/accuracies": 0.625, "rewards/chosen": -0.765757143497467, "rewards/margins": 2.746391773223877, "rewards/rejected": -3.5121490955352783, "step": 7744 }, { "epoch": 0.9, "learning_rate": 3.015235620644856e-08, "logits/chosen": -2.025972366333008, "logits/rejected": -2.0235674381256104, "logps/chosen": -301.65557861328125, "logps/rejected": -271.4912109375, "loss": 0.2154, "rewards/accuracies": 0.875, "rewards/chosen": -1.0503520965576172, "rewards/margins": 1.9604213237762451, "rewards/rejected": -3.0107734203338623, "step": 7745 }, { "epoch": 0.9, "learning_rate": 3.011692453053029e-08, "logits/chosen": -2.3795154094696045, "logits/rejected": -2.265852928161621, "logps/chosen": -345.14227294921875, "logps/rejected": -384.7630615234375, "loss": 0.3236, "rewards/accuracies": 1.0, "rewards/chosen": -1.4827134609222412, "rewards/margins": 1.868208885192871, "rewards/rejected": -3.3509223461151123, "step": 7746 }, { "epoch": 0.9, "learning_rate": 3.008149285461202e-08, "logits/chosen": -2.6126670837402344, "logits/rejected": -2.4469966888427734, "logps/chosen": -152.08448791503906, "logps/rejected": -189.40301513671875, "loss": 0.1027, "rewards/accuracies": 1.0, "rewards/chosen": 0.03326214849948883, "rewards/margins": 3.6540136337280273, "rewards/rejected": -3.62075138092041, "step": 7747 }, { "epoch": 0.9, "learning_rate": 3.004606117869375e-08, "logits/chosen": -2.138805389404297, "logits/rejected": -2.268827199935913, "logps/chosen": -235.92210388183594, "logps/rejected": -217.8069305419922, "loss": 0.5862, "rewards/accuracies": 0.625, "rewards/chosen": -0.9416705369949341, "rewards/margins": 0.8405090570449829, "rewards/rejected": -1.782179594039917, "step": 7748 }, { "epoch": 0.9, "learning_rate": 3.001062950277548e-08, "logits/chosen": -2.421022415161133, "logits/rejected": -2.1818885803222656, "logps/chosen": -350.6033935546875, "logps/rejected": -368.2622375488281, "loss": 0.1399, "rewards/accuracies": 1.0, "rewards/chosen": -0.8658220171928406, "rewards/margins": 3.4829742908477783, "rewards/rejected": -4.348796367645264, "step": 7749 }, { "epoch": 0.9, "learning_rate": 2.997519782685721e-08, "logits/chosen": -2.6439476013183594, "logits/rejected": -2.6982502937316895, "logps/chosen": -362.3180236816406, "logps/rejected": -352.5337219238281, "loss": 0.1865, "rewards/accuracies": 0.875, "rewards/chosen": -0.8907387256622314, "rewards/margins": 3.9270272254943848, "rewards/rejected": -4.817766189575195, "step": 7750 }, { "epoch": 0.9, "learning_rate": 2.9939766150938936e-08, "logits/chosen": -2.28498911857605, "logits/rejected": -2.1237874031066895, "logps/chosen": -319.8314514160156, "logps/rejected": -432.989501953125, "loss": 0.4647, "rewards/accuracies": 0.75, "rewards/chosen": -0.7099806070327759, "rewards/margins": 4.497669696807861, "rewards/rejected": -5.207650184631348, "step": 7751 }, { "epoch": 0.9, "learning_rate": 2.990433447502067e-08, "logits/chosen": -2.669466257095337, "logits/rejected": -2.5746665000915527, "logps/chosen": -123.31913757324219, "logps/rejected": -186.71994018554688, "loss": 0.3553, "rewards/accuracies": 0.75, "rewards/chosen": -0.840905487537384, "rewards/margins": 1.9236035346984863, "rewards/rejected": -2.7645092010498047, "step": 7752 }, { "epoch": 0.9, "learning_rate": 2.98689027991024e-08, "logits/chosen": -2.7125349044799805, "logits/rejected": -2.657027244567871, "logps/chosen": -301.6799011230469, "logps/rejected": -322.27642822265625, "loss": 0.3429, "rewards/accuracies": 0.875, "rewards/chosen": -1.2537753582000732, "rewards/margins": 2.953031063079834, "rewards/rejected": -4.206806182861328, "step": 7753 }, { "epoch": 0.9, "learning_rate": 2.983347112318412e-08, "logits/chosen": -2.0172929763793945, "logits/rejected": -2.154832363128662, "logps/chosen": -234.68484497070312, "logps/rejected": -270.33251953125, "loss": 0.357, "rewards/accuracies": 0.75, "rewards/chosen": -0.5685906410217285, "rewards/margins": 2.0278334617614746, "rewards/rejected": -2.596424102783203, "step": 7754 }, { "epoch": 0.9, "learning_rate": 2.9798039447265854e-08, "logits/chosen": -2.300719738006592, "logits/rejected": -2.577850580215454, "logps/chosen": -244.39242553710938, "logps/rejected": -161.05502319335938, "loss": 0.3363, "rewards/accuracies": 0.75, "rewards/chosen": -0.9006334543228149, "rewards/margins": 1.58696711063385, "rewards/rejected": -2.487600564956665, "step": 7755 }, { "epoch": 0.9, "learning_rate": 2.9762607771347586e-08, "logits/chosen": -1.8488723039627075, "logits/rejected": -2.1296777725219727, "logps/chosen": -415.63336181640625, "logps/rejected": -164.8297119140625, "loss": 0.8275, "rewards/accuracies": 0.375, "rewards/chosen": -1.390108346939087, "rewards/margins": -0.09355072677135468, "rewards/rejected": -1.2965576648712158, "step": 7756 }, { "epoch": 0.9, "learning_rate": 2.972717609542931e-08, "logits/chosen": -2.0294995307922363, "logits/rejected": -2.3420538902282715, "logps/chosen": -545.653076171875, "logps/rejected": -336.63189697265625, "loss": 0.2423, "rewards/accuracies": 0.875, "rewards/chosen": -0.5434222221374512, "rewards/margins": 2.448622703552246, "rewards/rejected": -2.9920451641082764, "step": 7757 }, { "epoch": 0.9, "learning_rate": 2.9691744419511043e-08, "logits/chosen": -2.561631441116333, "logits/rejected": -2.2508046627044678, "logps/chosen": -315.4842529296875, "logps/rejected": -324.127685546875, "loss": 0.3966, "rewards/accuracies": 0.75, "rewards/chosen": -0.9244118332862854, "rewards/margins": 1.3542728424072266, "rewards/rejected": -2.278684616088867, "step": 7758 }, { "epoch": 0.9, "learning_rate": 2.9656312743592772e-08, "logits/chosen": -1.807753086090088, "logits/rejected": -1.873401165008545, "logps/chosen": -293.9611511230469, "logps/rejected": -246.61709594726562, "loss": 1.0598, "rewards/accuracies": 0.5, "rewards/chosen": -1.3612035512924194, "rewards/margins": 0.1128699779510498, "rewards/rejected": -1.4740734100341797, "step": 7759 }, { "epoch": 0.9, "learning_rate": 2.9620881067674497e-08, "logits/chosen": -2.2371530532836914, "logits/rejected": -1.870023488998413, "logps/chosen": -368.02447509765625, "logps/rejected": -430.2591552734375, "loss": 0.7097, "rewards/accuracies": 0.625, "rewards/chosen": -1.268432378768921, "rewards/margins": 1.4887992143630981, "rewards/rejected": -2.7572314739227295, "step": 7760 }, { "epoch": 0.9, "learning_rate": 2.958544939175623e-08, "logits/chosen": -2.780423641204834, "logits/rejected": -2.7480783462524414, "logps/chosen": -344.16632080078125, "logps/rejected": -272.30126953125, "loss": 0.0893, "rewards/accuracies": 1.0, "rewards/chosen": -0.5013972520828247, "rewards/margins": 3.364753246307373, "rewards/rejected": -3.866150379180908, "step": 7761 }, { "epoch": 0.9, "learning_rate": 2.9550017715837958e-08, "logits/chosen": -2.386359691619873, "logits/rejected": -2.3342761993408203, "logps/chosen": -335.94549560546875, "logps/rejected": -344.0096130371094, "loss": 0.5507, "rewards/accuracies": 0.625, "rewards/chosen": -0.6650227308273315, "rewards/margins": 1.2286245822906494, "rewards/rejected": -1.8936471939086914, "step": 7762 }, { "epoch": 0.9, "learning_rate": 2.9514586039919684e-08, "logits/chosen": -2.614259958267212, "logits/rejected": -2.9620201587677, "logps/chosen": -285.33575439453125, "logps/rejected": -276.64337158203125, "loss": 0.1891, "rewards/accuracies": 1.0, "rewards/chosen": -0.7389540076255798, "rewards/margins": 2.490063428878784, "rewards/rejected": -3.2290172576904297, "step": 7763 }, { "epoch": 0.9, "learning_rate": 2.9479154364001416e-08, "logits/chosen": -2.133617877960205, "logits/rejected": -2.2232937812805176, "logps/chosen": -207.13864135742188, "logps/rejected": -392.1153259277344, "loss": 0.523, "rewards/accuracies": 0.625, "rewards/chosen": -1.2496898174285889, "rewards/margins": 2.291471004486084, "rewards/rejected": -3.541160821914673, "step": 7764 }, { "epoch": 0.9, "learning_rate": 2.9443722688083145e-08, "logits/chosen": -2.2252912521362305, "logits/rejected": -2.2977800369262695, "logps/chosen": -353.73895263671875, "logps/rejected": -478.194091796875, "loss": 0.2439, "rewards/accuracies": 1.0, "rewards/chosen": -0.5854321718215942, "rewards/margins": 2.993300437927246, "rewards/rejected": -3.578732490539551, "step": 7765 }, { "epoch": 0.9, "learning_rate": 2.9408291012164873e-08, "logits/chosen": -2.7660973072052, "logits/rejected": -2.6385977268218994, "logps/chosen": -239.15451049804688, "logps/rejected": -171.27122497558594, "loss": 0.5053, "rewards/accuracies": 0.875, "rewards/chosen": -0.8716668486595154, "rewards/margins": 2.2358438968658447, "rewards/rejected": -3.107510566711426, "step": 7766 }, { "epoch": 0.9, "learning_rate": 2.9372859336246602e-08, "logits/chosen": -2.3386037349700928, "logits/rejected": -2.5810070037841797, "logps/chosen": -291.3907470703125, "logps/rejected": -208.01927185058594, "loss": 0.5792, "rewards/accuracies": 0.75, "rewards/chosen": -0.6010730862617493, "rewards/margins": 2.2854278087615967, "rewards/rejected": -2.8865013122558594, "step": 7767 }, { "epoch": 0.9, "learning_rate": 2.9337427660328334e-08, "logits/chosen": -1.5870779752731323, "logits/rejected": -1.8302547931671143, "logps/chosen": -293.4154357910156, "logps/rejected": -280.04345703125, "loss": 0.8785, "rewards/accuracies": 0.625, "rewards/chosen": -1.4791429042816162, "rewards/margins": 0.401305228471756, "rewards/rejected": -1.8804481029510498, "step": 7768 }, { "epoch": 0.9, "learning_rate": 2.930199598441006e-08, "logits/chosen": -2.1764297485351562, "logits/rejected": -2.1327829360961914, "logps/chosen": -216.12704467773438, "logps/rejected": -270.95184326171875, "loss": 0.2429, "rewards/accuracies": 0.875, "rewards/chosen": -1.5784616470336914, "rewards/margins": 3.237293004989624, "rewards/rejected": -4.815754413604736, "step": 7769 }, { "epoch": 0.9, "learning_rate": 2.9266564308491788e-08, "logits/chosen": -2.0971078872680664, "logits/rejected": -2.1232049465179443, "logps/chosen": -257.675537109375, "logps/rejected": -260.11083984375, "loss": 0.8103, "rewards/accuracies": 0.875, "rewards/chosen": -0.5552384257316589, "rewards/margins": 1.4537792205810547, "rewards/rejected": -2.0090174674987793, "step": 7770 }, { "epoch": 0.9, "learning_rate": 2.923113263257352e-08, "logits/chosen": -2.531778335571289, "logits/rejected": -2.390392303466797, "logps/chosen": -352.658447265625, "logps/rejected": -302.624267578125, "loss": 0.1147, "rewards/accuracies": 1.0, "rewards/chosen": -0.3945104479789734, "rewards/margins": 2.8807358741760254, "rewards/rejected": -3.2752463817596436, "step": 7771 }, { "epoch": 0.9, "learning_rate": 2.919570095665525e-08, "logits/chosen": -1.87582266330719, "logits/rejected": -2.227600574493408, "logps/chosen": -402.901611328125, "logps/rejected": -227.29421997070312, "loss": 0.3967, "rewards/accuracies": 0.75, "rewards/chosen": -0.29113534092903137, "rewards/margins": 1.3775665760040283, "rewards/rejected": -1.6687018871307373, "step": 7772 }, { "epoch": 0.9, "learning_rate": 2.9160269280736978e-08, "logits/chosen": -2.8728246688842773, "logits/rejected": -2.831409454345703, "logps/chosen": -220.58148193359375, "logps/rejected": -300.328857421875, "loss": 0.1328, "rewards/accuracies": 0.875, "rewards/chosen": -0.29534661769866943, "rewards/margins": 3.4064457416534424, "rewards/rejected": -3.7017922401428223, "step": 7773 }, { "epoch": 0.9, "learning_rate": 2.9124837604818706e-08, "logits/chosen": -1.8029316663742065, "logits/rejected": -1.9336904287338257, "logps/chosen": -368.7296142578125, "logps/rejected": -290.61444091796875, "loss": 0.3123, "rewards/accuracies": 0.875, "rewards/chosen": -0.6182440519332886, "rewards/margins": 2.2695202827453613, "rewards/rejected": -2.8877644538879395, "step": 7774 }, { "epoch": 0.9, "learning_rate": 2.908940592890044e-08, "logits/chosen": -2.259770631790161, "logits/rejected": -2.1659631729125977, "logps/chosen": -307.2257385253906, "logps/rejected": -448.21099853515625, "loss": 0.6255, "rewards/accuracies": 0.875, "rewards/chosen": -0.28634482622146606, "rewards/margins": 2.053135395050049, "rewards/rejected": -2.339480400085449, "step": 7775 }, { "epoch": 0.9, "learning_rate": 2.9053974252982164e-08, "logits/chosen": -2.494269609451294, "logits/rejected": -2.530651092529297, "logps/chosen": -217.77879333496094, "logps/rejected": -213.6360626220703, "loss": 0.244, "rewards/accuracies": 1.0, "rewards/chosen": -0.5549649596214294, "rewards/margins": 1.6481423377990723, "rewards/rejected": -2.2031073570251465, "step": 7776 }, { "epoch": 0.9, "learning_rate": 2.9018542577063893e-08, "logits/chosen": -2.4585254192352295, "logits/rejected": -2.6803500652313232, "logps/chosen": -244.40150451660156, "logps/rejected": -199.9104461669922, "loss": 0.5719, "rewards/accuracies": 0.875, "rewards/chosen": -0.9621416926383972, "rewards/margins": 1.8430845737457275, "rewards/rejected": -2.8052260875701904, "step": 7777 }, { "epoch": 0.9, "learning_rate": 2.8983110901145625e-08, "logits/chosen": -1.8665355443954468, "logits/rejected": -2.422661781311035, "logps/chosen": -506.8324279785156, "logps/rejected": -259.5646057128906, "loss": 0.6008, "rewards/accuracies": 0.5, "rewards/chosen": -1.3148493766784668, "rewards/margins": 0.5736511945724487, "rewards/rejected": -1.888500452041626, "step": 7778 }, { "epoch": 0.9, "learning_rate": 2.894767922522735e-08, "logits/chosen": -2.539205551147461, "logits/rejected": -2.4175198078155518, "logps/chosen": -73.31095123291016, "logps/rejected": -305.56060791015625, "loss": 0.1245, "rewards/accuracies": 1.0, "rewards/chosen": -0.9111602306365967, "rewards/margins": 4.629692077636719, "rewards/rejected": -5.5408525466918945, "step": 7779 }, { "epoch": 0.91, "learning_rate": 2.8912247549309082e-08, "logits/chosen": -2.1128346920013428, "logits/rejected": -1.9557201862335205, "logps/chosen": -291.38629150390625, "logps/rejected": -399.42572021484375, "loss": 0.4571, "rewards/accuracies": 0.75, "rewards/chosen": -0.6897319555282593, "rewards/margins": 2.0726051330566406, "rewards/rejected": -2.7623372077941895, "step": 7780 }, { "epoch": 0.91, "learning_rate": 2.887681587339081e-08, "logits/chosen": -2.19891619682312, "logits/rejected": -2.337376117706299, "logps/chosen": -119.53851318359375, "logps/rejected": -129.3673095703125, "loss": 0.5481, "rewards/accuracies": 0.75, "rewards/chosen": -0.47664231061935425, "rewards/margins": 1.310901403427124, "rewards/rejected": -1.787543773651123, "step": 7781 }, { "epoch": 0.91, "learning_rate": 2.8841384197472536e-08, "logits/chosen": -2.43851375579834, "logits/rejected": -2.235294818878174, "logps/chosen": -183.95376586914062, "logps/rejected": -350.33294677734375, "loss": 0.3787, "rewards/accuracies": 0.625, "rewards/chosen": -0.797174334526062, "rewards/margins": 1.8446927070617676, "rewards/rejected": -2.641867160797119, "step": 7782 }, { "epoch": 0.91, "learning_rate": 2.880595252155427e-08, "logits/chosen": -2.655919075012207, "logits/rejected": -2.634281873703003, "logps/chosen": -158.1014404296875, "logps/rejected": -176.4591827392578, "loss": 0.5624, "rewards/accuracies": 0.75, "rewards/chosen": -0.9254481792449951, "rewards/margins": 1.2227340936660767, "rewards/rejected": -2.1481821537017822, "step": 7783 }, { "epoch": 0.91, "learning_rate": 2.8770520845635997e-08, "logits/chosen": -2.5218958854675293, "logits/rejected": -2.730910062789917, "logps/chosen": -244.1710662841797, "logps/rejected": -288.0486145019531, "loss": 0.163, "rewards/accuracies": 1.0, "rewards/chosen": -1.0073072910308838, "rewards/margins": 2.3031139373779297, "rewards/rejected": -3.3104209899902344, "step": 7784 }, { "epoch": 0.91, "learning_rate": 2.8735089169717726e-08, "logits/chosen": -2.4748146533966064, "logits/rejected": -2.5149314403533936, "logps/chosen": -85.9519271850586, "logps/rejected": -91.47857666015625, "loss": 0.3517, "rewards/accuracies": 0.75, "rewards/chosen": -0.20330707728862762, "rewards/margins": 1.4892146587371826, "rewards/rejected": -1.6925216913223267, "step": 7785 }, { "epoch": 0.91, "learning_rate": 2.8699657493799455e-08, "logits/chosen": -2.204655885696411, "logits/rejected": -2.133025884628296, "logps/chosen": -415.74774169921875, "logps/rejected": -327.9106140136719, "loss": 0.4433, "rewards/accuracies": 0.75, "rewards/chosen": -1.1045482158660889, "rewards/margins": 1.266464114189148, "rewards/rejected": -2.3710122108459473, "step": 7786 }, { "epoch": 0.91, "learning_rate": 2.8664225817881187e-08, "logits/chosen": -2.647646427154541, "logits/rejected": -2.8007779121398926, "logps/chosen": -227.7813720703125, "logps/rejected": -200.08966064453125, "loss": 0.2226, "rewards/accuracies": 0.875, "rewards/chosen": -0.08488786220550537, "rewards/margins": 3.060976505279541, "rewards/rejected": -3.145864486694336, "step": 7787 }, { "epoch": 0.91, "learning_rate": 2.8628794141962915e-08, "logits/chosen": -2.281754732131958, "logits/rejected": -2.268299102783203, "logps/chosen": -341.0829772949219, "logps/rejected": -452.7987060546875, "loss": 0.4725, "rewards/accuracies": 0.75, "rewards/chosen": -0.570919394493103, "rewards/margins": 3.05780029296875, "rewards/rejected": -3.6287195682525635, "step": 7788 }, { "epoch": 0.91, "learning_rate": 2.859336246604464e-08, "logits/chosen": -2.0189661979675293, "logits/rejected": -2.2825465202331543, "logps/chosen": -385.54180908203125, "logps/rejected": -407.10821533203125, "loss": 0.1824, "rewards/accuracies": 1.0, "rewards/chosen": -0.3544990122318268, "rewards/margins": 2.3201181888580322, "rewards/rejected": -2.674617290496826, "step": 7789 }, { "epoch": 0.91, "learning_rate": 2.8557930790126373e-08, "logits/chosen": -2.467367649078369, "logits/rejected": -2.6049346923828125, "logps/chosen": -353.00689697265625, "logps/rejected": -299.5634765625, "loss": 0.5764, "rewards/accuracies": 0.75, "rewards/chosen": -1.204040765762329, "rewards/margins": 1.365154504776001, "rewards/rejected": -2.569195508956909, "step": 7790 }, { "epoch": 0.91, "learning_rate": 2.8522499114208102e-08, "logits/chosen": -2.240598440170288, "logits/rejected": -2.5918936729431152, "logps/chosen": -230.36732482910156, "logps/rejected": -149.77911376953125, "loss": 0.0907, "rewards/accuracies": 1.0, "rewards/chosen": 0.002949148416519165, "rewards/margins": 2.9211301803588867, "rewards/rejected": -2.9181809425354004, "step": 7791 }, { "epoch": 0.91, "learning_rate": 2.8487067438289827e-08, "logits/chosen": -2.7178173065185547, "logits/rejected": -2.6147775650024414, "logps/chosen": -265.32403564453125, "logps/rejected": -289.42584228515625, "loss": 0.3437, "rewards/accuracies": 0.75, "rewards/chosen": -0.9472178220748901, "rewards/margins": 2.345597267150879, "rewards/rejected": -3.2928152084350586, "step": 7792 }, { "epoch": 0.91, "learning_rate": 2.845163576237156e-08, "logits/chosen": -2.603745460510254, "logits/rejected": -2.514835834503174, "logps/chosen": -236.71751403808594, "logps/rejected": -327.80712890625, "loss": 0.0714, "rewards/accuracies": 1.0, "rewards/chosen": -0.38996630907058716, "rewards/margins": 3.835508108139038, "rewards/rejected": -4.2254743576049805, "step": 7793 }, { "epoch": 0.91, "learning_rate": 2.8416204086453288e-08, "logits/chosen": -2.099008560180664, "logits/rejected": -2.232409954071045, "logps/chosen": -384.0172119140625, "logps/rejected": -348.1450500488281, "loss": 0.2377, "rewards/accuracies": 0.875, "rewards/chosen": -0.6556740403175354, "rewards/margins": 2.87734317779541, "rewards/rejected": -3.533017158508301, "step": 7794 }, { "epoch": 0.91, "learning_rate": 2.8380772410535017e-08, "logits/chosen": -2.308295965194702, "logits/rejected": -2.5822501182556152, "logps/chosen": -192.2874755859375, "logps/rejected": -144.19613647460938, "loss": 0.4977, "rewards/accuracies": 0.625, "rewards/chosen": -0.7349642515182495, "rewards/margins": 0.8757230043411255, "rewards/rejected": -1.6106871366500854, "step": 7795 }, { "epoch": 0.91, "learning_rate": 2.8345340734616745e-08, "logits/chosen": -2.3906497955322266, "logits/rejected": -2.23116135597229, "logps/chosen": -225.14337158203125, "logps/rejected": -189.8595733642578, "loss": 0.2852, "rewards/accuracies": 0.875, "rewards/chosen": -2.194859266281128, "rewards/margins": 2.6437125205993652, "rewards/rejected": -4.838572025299072, "step": 7796 }, { "epoch": 0.91, "learning_rate": 2.8309909058698477e-08, "logits/chosen": -1.7749189138412476, "logits/rejected": -1.9616481065750122, "logps/chosen": -560.9602661132812, "logps/rejected": -380.57916259765625, "loss": 0.2514, "rewards/accuracies": 0.875, "rewards/chosen": -0.7658616304397583, "rewards/margins": 2.6699302196502686, "rewards/rejected": -3.4357919692993164, "step": 7797 }, { "epoch": 0.91, "learning_rate": 2.8274477382780203e-08, "logits/chosen": -2.183626413345337, "logits/rejected": -2.3917107582092285, "logps/chosen": -231.58775329589844, "logps/rejected": -259.0075378417969, "loss": 0.2835, "rewards/accuracies": 0.75, "rewards/chosen": -0.7388246059417725, "rewards/margins": 3.359987258911133, "rewards/rejected": -4.098812103271484, "step": 7798 }, { "epoch": 0.91, "learning_rate": 2.823904570686193e-08, "logits/chosen": -1.8685747385025024, "logits/rejected": -1.4300687313079834, "logps/chosen": -198.68832397460938, "logps/rejected": -508.007080078125, "loss": 0.3467, "rewards/accuracies": 0.75, "rewards/chosen": -0.2606270909309387, "rewards/margins": 2.025501012802124, "rewards/rejected": -2.286128044128418, "step": 7799 }, { "epoch": 0.91, "learning_rate": 2.8203614030943664e-08, "logits/chosen": -2.143021583557129, "logits/rejected": -2.158989906311035, "logps/chosen": -361.3835754394531, "logps/rejected": -290.6054382324219, "loss": 0.4174, "rewards/accuracies": 0.75, "rewards/chosen": -1.5084774494171143, "rewards/margins": 1.304731011390686, "rewards/rejected": -2.81320858001709, "step": 7800 }, { "epoch": 0.91, "learning_rate": 2.816818235502539e-08, "logits/chosen": -3.1506359577178955, "logits/rejected": -3.0102901458740234, "logps/chosen": -263.4444580078125, "logps/rejected": -231.13949584960938, "loss": 0.3016, "rewards/accuracies": 0.875, "rewards/chosen": -0.5410915017127991, "rewards/margins": 2.0273280143737793, "rewards/rejected": -2.5684194564819336, "step": 7801 }, { "epoch": 0.91, "learning_rate": 2.813275067910712e-08, "logits/chosen": -2.123265266418457, "logits/rejected": -2.0646462440490723, "logps/chosen": -213.71926879882812, "logps/rejected": -321.90972900390625, "loss": 0.6255, "rewards/accuracies": 0.625, "rewards/chosen": -0.8886911273002625, "rewards/margins": 1.2406991720199585, "rewards/rejected": -2.129390239715576, "step": 7802 }, { "epoch": 0.91, "learning_rate": 2.809731900318885e-08, "logits/chosen": -2.1068053245544434, "logits/rejected": -2.2231075763702393, "logps/chosen": -296.60302734375, "logps/rejected": -285.5090026855469, "loss": 0.1711, "rewards/accuracies": 0.875, "rewards/chosen": -0.26097890734672546, "rewards/margins": 3.4135022163391113, "rewards/rejected": -3.67448091506958, "step": 7803 }, { "epoch": 0.91, "learning_rate": 2.8061887327270575e-08, "logits/chosen": -1.8939952850341797, "logits/rejected": -1.742956280708313, "logps/chosen": -294.158935546875, "logps/rejected": -364.29571533203125, "loss": 0.3505, "rewards/accuracies": 0.875, "rewards/chosen": -1.3094886541366577, "rewards/margins": 10.736759185791016, "rewards/rejected": -12.046247482299805, "step": 7804 }, { "epoch": 0.91, "learning_rate": 2.8026455651352307e-08, "logits/chosen": -2.3410377502441406, "logits/rejected": -2.3977479934692383, "logps/chosen": -359.3341064453125, "logps/rejected": -425.458251953125, "loss": 0.1039, "rewards/accuracies": 1.0, "rewards/chosen": -0.8780902624130249, "rewards/margins": 3.4271817207336426, "rewards/rejected": -4.305272102355957, "step": 7805 }, { "epoch": 0.91, "learning_rate": 2.7991023975434036e-08, "logits/chosen": -2.5017528533935547, "logits/rejected": -2.3562936782836914, "logps/chosen": -188.63217163085938, "logps/rejected": -227.7791748046875, "loss": 0.4809, "rewards/accuracies": 0.875, "rewards/chosen": -0.7357326745986938, "rewards/margins": 1.9596054553985596, "rewards/rejected": -2.695338010787964, "step": 7806 }, { "epoch": 0.91, "learning_rate": 2.7955592299515768e-08, "logits/chosen": -2.271571397781372, "logits/rejected": -2.444981098175049, "logps/chosen": -329.451171875, "logps/rejected": -252.57118225097656, "loss": 0.3333, "rewards/accuracies": 0.875, "rewards/chosen": -1.7090495824813843, "rewards/margins": 1.9694814682006836, "rewards/rejected": -3.6785311698913574, "step": 7807 }, { "epoch": 0.91, "learning_rate": 2.7920160623597494e-08, "logits/chosen": -1.804121732711792, "logits/rejected": -2.0224833488464355, "logps/chosen": -267.73162841796875, "logps/rejected": -237.7820587158203, "loss": 0.561, "rewards/accuracies": 0.875, "rewards/chosen": -0.5574276447296143, "rewards/margins": 1.002381682395935, "rewards/rejected": -1.5598094463348389, "step": 7808 }, { "epoch": 0.91, "learning_rate": 2.7884728947679226e-08, "logits/chosen": -1.908738613128662, "logits/rejected": -1.9852681159973145, "logps/chosen": -236.71371459960938, "logps/rejected": -253.9727020263672, "loss": 0.2262, "rewards/accuracies": 1.0, "rewards/chosen": -0.982132077217102, "rewards/margins": 2.483746290206909, "rewards/rejected": -3.465878486633301, "step": 7809 }, { "epoch": 0.91, "learning_rate": 2.7849297271760954e-08, "logits/chosen": -2.469327688217163, "logits/rejected": -2.714245080947876, "logps/chosen": -205.62155151367188, "logps/rejected": -204.2027130126953, "loss": 0.2266, "rewards/accuracies": 0.875, "rewards/chosen": -0.034002043306827545, "rewards/margins": 3.2153639793395996, "rewards/rejected": -3.249366283416748, "step": 7810 }, { "epoch": 0.91, "learning_rate": 2.781386559584268e-08, "logits/chosen": -2.682725191116333, "logits/rejected": -2.7206904888153076, "logps/chosen": -422.62945556640625, "logps/rejected": -330.67657470703125, "loss": 0.1546, "rewards/accuracies": 1.0, "rewards/chosen": -0.2972520589828491, "rewards/margins": 3.714400053024292, "rewards/rejected": -4.011651992797852, "step": 7811 }, { "epoch": 0.91, "learning_rate": 2.7778433919924412e-08, "logits/chosen": -2.5647411346435547, "logits/rejected": -2.5124623775482178, "logps/chosen": -298.2282409667969, "logps/rejected": -232.6904296875, "loss": 0.2368, "rewards/accuracies": 0.875, "rewards/chosen": -0.6699087023735046, "rewards/margins": 2.296438694000244, "rewards/rejected": -2.9663472175598145, "step": 7812 }, { "epoch": 0.91, "learning_rate": 2.774300224400614e-08, "logits/chosen": -2.1758363246917725, "logits/rejected": -2.3720457553863525, "logps/chosen": -153.42904663085938, "logps/rejected": -202.2509765625, "loss": 1.4641, "rewards/accuracies": 0.75, "rewards/chosen": -2.2635390758514404, "rewards/margins": 0.6524296998977661, "rewards/rejected": -2.915968894958496, "step": 7813 }, { "epoch": 0.91, "learning_rate": 2.770757056808787e-08, "logits/chosen": -1.4636287689208984, "logits/rejected": -1.659960389137268, "logps/chosen": -359.4472961425781, "logps/rejected": -326.4058532714844, "loss": 0.5519, "rewards/accuracies": 0.875, "rewards/chosen": -0.7890291810035706, "rewards/margins": 1.3984565734863281, "rewards/rejected": -2.187485694885254, "step": 7814 }, { "epoch": 0.91, "learning_rate": 2.7672138892169598e-08, "logits/chosen": -2.0322749614715576, "logits/rejected": -2.0300729274749756, "logps/chosen": -274.7593994140625, "logps/rejected": -356.2828369140625, "loss": 0.3438, "rewards/accuracies": 0.875, "rewards/chosen": -0.7962191104888916, "rewards/margins": 2.67726469039917, "rewards/rejected": -3.4734840393066406, "step": 7815 }, { "epoch": 0.91, "learning_rate": 2.763670721625133e-08, "logits/chosen": -3.074483871459961, "logits/rejected": -3.001450300216675, "logps/chosen": -201.23008728027344, "logps/rejected": -190.70919799804688, "loss": 0.3628, "rewards/accuracies": 0.875, "rewards/chosen": -0.2573307156562805, "rewards/margins": 2.712768793106079, "rewards/rejected": -2.970099449157715, "step": 7816 }, { "epoch": 0.91, "learning_rate": 2.7601275540333056e-08, "logits/chosen": -1.6934857368469238, "logits/rejected": -1.8971480131149292, "logps/chosen": -397.91375732421875, "logps/rejected": -314.5960388183594, "loss": 0.2804, "rewards/accuracies": 0.875, "rewards/chosen": -0.541378378868103, "rewards/margins": 2.3073079586029053, "rewards/rejected": -2.8486862182617188, "step": 7817 }, { "epoch": 0.91, "learning_rate": 2.7565843864414784e-08, "logits/chosen": -2.966054916381836, "logits/rejected": -2.8723702430725098, "logps/chosen": -251.3617706298828, "logps/rejected": -263.105224609375, "loss": 0.4065, "rewards/accuracies": 0.875, "rewards/chosen": -0.9083907008171082, "rewards/margins": 2.338012456893921, "rewards/rejected": -3.246403217315674, "step": 7818 }, { "epoch": 0.91, "learning_rate": 2.7530412188496516e-08, "logits/chosen": -1.48484206199646, "logits/rejected": -2.0893795490264893, "logps/chosen": -265.0536804199219, "logps/rejected": -168.73045349121094, "loss": 0.3536, "rewards/accuracies": 0.875, "rewards/chosen": -1.069885015487671, "rewards/margins": 1.3715636730194092, "rewards/rejected": -2.44144868850708, "step": 7819 }, { "epoch": 0.91, "learning_rate": 2.7494980512578242e-08, "logits/chosen": -2.124945640563965, "logits/rejected": -2.0588135719299316, "logps/chosen": -311.458251953125, "logps/rejected": -314.96575927734375, "loss": 0.3767, "rewards/accuracies": 0.875, "rewards/chosen": -0.9676119089126587, "rewards/margins": 2.650390863418579, "rewards/rejected": -3.6180028915405273, "step": 7820 }, { "epoch": 0.91, "learning_rate": 2.7459548836659974e-08, "logits/chosen": -2.119208574295044, "logits/rejected": -2.069493532180786, "logps/chosen": -168.24563598632812, "logps/rejected": -172.00186157226562, "loss": 0.4305, "rewards/accuracies": 0.75, "rewards/chosen": -0.6223419904708862, "rewards/margins": 1.8465046882629395, "rewards/rejected": -2.4688467979431152, "step": 7821 }, { "epoch": 0.91, "learning_rate": 2.7424117160741703e-08, "logits/chosen": -2.313366413116455, "logits/rejected": -2.3397927284240723, "logps/chosen": -122.8309326171875, "logps/rejected": -242.44654846191406, "loss": 0.4431, "rewards/accuracies": 0.75, "rewards/chosen": -0.4786389172077179, "rewards/margins": 2.045037031173706, "rewards/rejected": -2.5236759185791016, "step": 7822 }, { "epoch": 0.91, "learning_rate": 2.7388685484823428e-08, "logits/chosen": -2.571547508239746, "logits/rejected": -2.645779609680176, "logps/chosen": -97.63256072998047, "logps/rejected": -190.81137084960938, "loss": 0.4926, "rewards/accuracies": 0.75, "rewards/chosen": -1.092734694480896, "rewards/margins": 1.6755324602127075, "rewards/rejected": -2.7682671546936035, "step": 7823 }, { "epoch": 0.91, "learning_rate": 2.735325380890516e-08, "logits/chosen": -2.3497262001037598, "logits/rejected": -1.9311044216156006, "logps/chosen": -192.95046997070312, "logps/rejected": -405.9814453125, "loss": 0.5422, "rewards/accuracies": 0.75, "rewards/chosen": -0.6494795083999634, "rewards/margins": 1.6202259063720703, "rewards/rejected": -2.269705295562744, "step": 7824 }, { "epoch": 0.91, "learning_rate": 2.731782213298689e-08, "logits/chosen": -2.5598957538604736, "logits/rejected": -2.765096664428711, "logps/chosen": -259.1525573730469, "logps/rejected": -137.61874389648438, "loss": 0.6124, "rewards/accuracies": 0.625, "rewards/chosen": -1.3501899242401123, "rewards/margins": 0.5727995038032532, "rewards/rejected": -1.9229896068572998, "step": 7825 }, { "epoch": 0.91, "learning_rate": 2.728239045706862e-08, "logits/chosen": -1.441624641418457, "logits/rejected": -1.8219115734100342, "logps/chosen": -413.56219482421875, "logps/rejected": -298.4718017578125, "loss": 0.6591, "rewards/accuracies": 0.5, "rewards/chosen": -1.6945997476577759, "rewards/margins": 1.2711715698242188, "rewards/rejected": -2.965771436691284, "step": 7826 }, { "epoch": 0.91, "learning_rate": 2.7246958781150346e-08, "logits/chosen": -2.6693060398101807, "logits/rejected": -2.8600101470947266, "logps/chosen": -208.62977600097656, "logps/rejected": -190.83123779296875, "loss": 0.3446, "rewards/accuracies": 0.875, "rewards/chosen": -0.3883078396320343, "rewards/margins": 2.26218318939209, "rewards/rejected": -2.6504909992218018, "step": 7827 }, { "epoch": 0.91, "learning_rate": 2.7211527105232075e-08, "logits/chosen": -2.2302088737487793, "logits/rejected": -2.34234619140625, "logps/chosen": -305.230224609375, "logps/rejected": -328.2203369140625, "loss": 0.3125, "rewards/accuracies": 0.875, "rewards/chosen": -0.7381885051727295, "rewards/margins": 3.1803858280181885, "rewards/rejected": -3.918574571609497, "step": 7828 }, { "epoch": 0.91, "learning_rate": 2.7176095429313807e-08, "logits/chosen": -2.4452567100524902, "logits/rejected": -2.346367835998535, "logps/chosen": -229.52728271484375, "logps/rejected": -211.13671875, "loss": 0.3219, "rewards/accuracies": 0.875, "rewards/chosen": -0.17147789895534515, "rewards/margins": 1.7991358041763306, "rewards/rejected": -1.970613718032837, "step": 7829 }, { "epoch": 0.91, "learning_rate": 2.7140663753395533e-08, "logits/chosen": -1.804085612297058, "logits/rejected": -2.008859395980835, "logps/chosen": -206.87290954589844, "logps/rejected": -311.7795104980469, "loss": 0.1602, "rewards/accuracies": 0.875, "rewards/chosen": -0.6475632190704346, "rewards/margins": 5.3357720375061035, "rewards/rejected": -5.983335494995117, "step": 7830 }, { "epoch": 0.91, "learning_rate": 2.7105232077477265e-08, "logits/chosen": -2.578428268432617, "logits/rejected": -2.469276189804077, "logps/chosen": -291.09326171875, "logps/rejected": -292.8094482421875, "loss": 0.6713, "rewards/accuracies": 0.5, "rewards/chosen": -1.1974966526031494, "rewards/margins": 0.9021276831626892, "rewards/rejected": -2.0996243953704834, "step": 7831 }, { "epoch": 0.91, "learning_rate": 2.7069800401558993e-08, "logits/chosen": -2.534238815307617, "logits/rejected": -2.2838048934936523, "logps/chosen": -308.0565490722656, "logps/rejected": -224.52523803710938, "loss": 0.4064, "rewards/accuracies": 0.75, "rewards/chosen": -1.4627076387405396, "rewards/margins": 1.1398911476135254, "rewards/rejected": -2.6025989055633545, "step": 7832 }, { "epoch": 0.91, "learning_rate": 2.703436872564072e-08, "logits/chosen": -2.111743927001953, "logits/rejected": -2.2112739086151123, "logps/chosen": -243.88580322265625, "logps/rejected": -284.9048767089844, "loss": 0.5281, "rewards/accuracies": 0.625, "rewards/chosen": -1.2588226795196533, "rewards/margins": 1.6194127798080444, "rewards/rejected": -2.878235340118408, "step": 7833 }, { "epoch": 0.91, "learning_rate": 2.699893704972245e-08, "logits/chosen": -2.6216118335723877, "logits/rejected": -2.5046045780181885, "logps/chosen": -143.48959350585938, "logps/rejected": -276.5705871582031, "loss": 0.3587, "rewards/accuracies": 0.75, "rewards/chosen": -0.7581494450569153, "rewards/margins": 2.0551400184631348, "rewards/rejected": -2.8132896423339844, "step": 7834 }, { "epoch": 0.91, "learning_rate": 2.696350537380418e-08, "logits/chosen": -2.6369690895080566, "logits/rejected": -2.858222007751465, "logps/chosen": -203.49349975585938, "logps/rejected": -157.95584106445312, "loss": 0.3212, "rewards/accuracies": 0.875, "rewards/chosen": -0.42449846863746643, "rewards/margins": 1.6568524837493896, "rewards/rejected": -2.0813510417938232, "step": 7835 }, { "epoch": 0.91, "learning_rate": 2.6928073697885908e-08, "logits/chosen": -2.608896017074585, "logits/rejected": -2.657409191131592, "logps/chosen": -280.3772888183594, "logps/rejected": -285.59381103515625, "loss": 0.3285, "rewards/accuracies": 0.875, "rewards/chosen": -1.0597807168960571, "rewards/margins": 2.1328701972961426, "rewards/rejected": -3.1926510334014893, "step": 7836 }, { "epoch": 0.91, "learning_rate": 2.6892642021967637e-08, "logits/chosen": -2.037472724914551, "logits/rejected": -2.4206342697143555, "logps/chosen": -298.85736083984375, "logps/rejected": -268.5363464355469, "loss": 0.1023, "rewards/accuracies": 1.0, "rewards/chosen": -0.5951290726661682, "rewards/margins": 3.737485408782959, "rewards/rejected": -4.332614898681641, "step": 7837 }, { "epoch": 0.91, "learning_rate": 2.685721034604937e-08, "logits/chosen": -2.461611032485962, "logits/rejected": -2.3447670936584473, "logps/chosen": -422.7827453613281, "logps/rejected": -311.2020263671875, "loss": 0.8573, "rewards/accuracies": 0.625, "rewards/chosen": -1.2099802494049072, "rewards/margins": 0.30790793895721436, "rewards/rejected": -1.5178881883621216, "step": 7838 }, { "epoch": 0.91, "learning_rate": 2.6821778670131095e-08, "logits/chosen": -2.4233460426330566, "logits/rejected": -2.3633270263671875, "logps/chosen": -226.41748046875, "logps/rejected": -234.62802124023438, "loss": 0.2493, "rewards/accuracies": 0.875, "rewards/chosen": -0.2887444496154785, "rewards/margins": 2.8372323513031006, "rewards/rejected": -3.125976800918579, "step": 7839 }, { "epoch": 0.91, "learning_rate": 2.6786346994212823e-08, "logits/chosen": -2.2658069133758545, "logits/rejected": -2.4389095306396484, "logps/chosen": -357.0491027832031, "logps/rejected": -256.5008850097656, "loss": 0.2984, "rewards/accuracies": 0.875, "rewards/chosen": -0.5574584007263184, "rewards/margins": 1.8538297414779663, "rewards/rejected": -2.411288261413574, "step": 7840 }, { "epoch": 0.91, "learning_rate": 2.6750915318294555e-08, "logits/chosen": -1.9805827140808105, "logits/rejected": -2.27040433883667, "logps/chosen": -297.7172546386719, "logps/rejected": -224.51019287109375, "loss": 0.4636, "rewards/accuracies": 0.625, "rewards/chosen": -1.1512980461120605, "rewards/margins": 2.592268705368042, "rewards/rejected": -3.7435667514801025, "step": 7841 }, { "epoch": 0.91, "learning_rate": 2.6715483642376284e-08, "logits/chosen": -2.438206195831299, "logits/rejected": -2.7582108974456787, "logps/chosen": -293.32330322265625, "logps/rejected": -204.92874145507812, "loss": 0.6156, "rewards/accuracies": 0.75, "rewards/chosen": -1.8719755411148071, "rewards/margins": 2.208967447280884, "rewards/rejected": -4.0809431076049805, "step": 7842 }, { "epoch": 0.91, "learning_rate": 2.6680051966458013e-08, "logits/chosen": -2.54610538482666, "logits/rejected": -2.6854209899902344, "logps/chosen": -183.94552612304688, "logps/rejected": -186.38099670410156, "loss": 1.1229, "rewards/accuracies": 0.625, "rewards/chosen": -1.2942882776260376, "rewards/margins": 0.5130681991577148, "rewards/rejected": -1.807356595993042, "step": 7843 }, { "epoch": 0.91, "learning_rate": 2.664462029053974e-08, "logits/chosen": -2.4463138580322266, "logits/rejected": -2.250988245010376, "logps/chosen": -248.43740844726562, "logps/rejected": -282.69677734375, "loss": 0.5184, "rewards/accuracies": 0.875, "rewards/chosen": -1.1109504699707031, "rewards/margins": 2.3870322704315186, "rewards/rejected": -3.4979827404022217, "step": 7844 }, { "epoch": 0.91, "learning_rate": 2.6609188614621474e-08, "logits/chosen": -2.391791582107544, "logits/rejected": -2.3228251934051514, "logps/chosen": -309.38067626953125, "logps/rejected": -321.7048645019531, "loss": 0.3552, "rewards/accuracies": 0.875, "rewards/chosen": -0.7677709460258484, "rewards/margins": 1.2337822914123535, "rewards/rejected": -2.0015532970428467, "step": 7845 }, { "epoch": 0.91, "learning_rate": 2.65737569387032e-08, "logits/chosen": -2.736665725708008, "logits/rejected": -2.5029428005218506, "logps/chosen": -149.85308837890625, "logps/rejected": -221.61337280273438, "loss": 0.365, "rewards/accuracies": 0.875, "rewards/chosen": -0.8474173545837402, "rewards/margins": 2.9559707641601562, "rewards/rejected": -3.8033883571624756, "step": 7846 }, { "epoch": 0.91, "learning_rate": 2.6538325262784928e-08, "logits/chosen": -2.3299365043640137, "logits/rejected": -2.3816258907318115, "logps/chosen": -193.51112365722656, "logps/rejected": -329.27728271484375, "loss": 0.3366, "rewards/accuracies": 0.75, "rewards/chosen": -1.2804360389709473, "rewards/margins": 1.8022446632385254, "rewards/rejected": -3.0826807022094727, "step": 7847 }, { "epoch": 0.91, "learning_rate": 2.650289358686666e-08, "logits/chosen": -2.2955245971679688, "logits/rejected": -2.5322039127349854, "logps/chosen": -542.624755859375, "logps/rejected": -341.49090576171875, "loss": 0.1318, "rewards/accuracies": 1.0, "rewards/chosen": -0.07893425226211548, "rewards/margins": 3.1659882068634033, "rewards/rejected": -3.244922399520874, "step": 7848 }, { "epoch": 0.91, "learning_rate": 2.6467461910948385e-08, "logits/chosen": -2.2466304302215576, "logits/rejected": -2.341965675354004, "logps/chosen": -291.38641357421875, "logps/rejected": -277.0706787109375, "loss": 0.4509, "rewards/accuracies": 0.625, "rewards/chosen": -0.7700836658477783, "rewards/margins": 1.8395309448242188, "rewards/rejected": -2.609614372253418, "step": 7849 }, { "epoch": 0.91, "learning_rate": 2.6432030235030117e-08, "logits/chosen": -1.677561640739441, "logits/rejected": -2.24678373336792, "logps/chosen": -489.99658203125, "logps/rejected": -224.9002227783203, "loss": 0.2801, "rewards/accuracies": 0.875, "rewards/chosen": -1.0613090991973877, "rewards/margins": 2.1837997436523438, "rewards/rejected": -3.2451090812683105, "step": 7850 }, { "epoch": 0.91, "learning_rate": 2.6396598559111846e-08, "logits/chosen": -2.2706356048583984, "logits/rejected": -2.271763324737549, "logps/chosen": -182.10598754882812, "logps/rejected": -219.88475036621094, "loss": 0.6886, "rewards/accuracies": 0.625, "rewards/chosen": -0.9170463681221008, "rewards/margins": 0.2941240072250366, "rewards/rejected": -1.2111704349517822, "step": 7851 }, { "epoch": 0.91, "learning_rate": 2.636116688319357e-08, "logits/chosen": -2.800891876220703, "logits/rejected": -2.5645909309387207, "logps/chosen": -84.55953216552734, "logps/rejected": -135.32501220703125, "loss": 0.2098, "rewards/accuracies": 1.0, "rewards/chosen": -1.2609913349151611, "rewards/margins": 2.2511143684387207, "rewards/rejected": -3.5121054649353027, "step": 7852 }, { "epoch": 0.91, "learning_rate": 2.6325735207275304e-08, "logits/chosen": -1.9907047748565674, "logits/rejected": -2.37007737159729, "logps/chosen": -369.38580322265625, "logps/rejected": -335.4866943359375, "loss": 0.3763, "rewards/accuracies": 0.75, "rewards/chosen": -0.7452350854873657, "rewards/margins": 3.2299132347106934, "rewards/rejected": -3.9751482009887695, "step": 7853 }, { "epoch": 0.91, "learning_rate": 2.6290303531357032e-08, "logits/chosen": -2.104916572570801, "logits/rejected": -2.4474425315856934, "logps/chosen": -472.1258239746094, "logps/rejected": -225.3874969482422, "loss": 0.2113, "rewards/accuracies": 0.875, "rewards/chosen": -0.9740886688232422, "rewards/margins": 2.0205676555633545, "rewards/rejected": -2.9946563243865967, "step": 7854 }, { "epoch": 0.91, "learning_rate": 2.625487185543876e-08, "logits/chosen": -2.9697654247283936, "logits/rejected": -2.6111884117126465, "logps/chosen": -334.649169921875, "logps/rejected": -291.5124816894531, "loss": 0.319, "rewards/accuracies": 0.75, "rewards/chosen": -1.1983401775360107, "rewards/margins": 2.1195785999298096, "rewards/rejected": -3.3179187774658203, "step": 7855 }, { "epoch": 0.91, "learning_rate": 2.621944017952049e-08, "logits/chosen": -2.5467238426208496, "logits/rejected": -2.605901002883911, "logps/chosen": -269.2330017089844, "logps/rejected": -252.74472045898438, "loss": 0.6619, "rewards/accuracies": 0.625, "rewards/chosen": -1.3258498907089233, "rewards/margins": 0.25637009739875793, "rewards/rejected": -1.5822198390960693, "step": 7856 }, { "epoch": 0.91, "learning_rate": 2.618400850360222e-08, "logits/chosen": -2.4434361457824707, "logits/rejected": -2.2004780769348145, "logps/chosen": -305.7569580078125, "logps/rejected": -313.63214111328125, "loss": 0.4669, "rewards/accuracies": 0.625, "rewards/chosen": -1.1815699338912964, "rewards/margins": 1.5986425876617432, "rewards/rejected": -2.780212640762329, "step": 7857 }, { "epoch": 0.91, "learning_rate": 2.6148576827683947e-08, "logits/chosen": -2.390625, "logits/rejected": -2.212160110473633, "logps/chosen": -269.5479736328125, "logps/rejected": -392.1124267578125, "loss": 0.398, "rewards/accuracies": 0.75, "rewards/chosen": -1.238297700881958, "rewards/margins": 3.0376007556915283, "rewards/rejected": -4.2758989334106445, "step": 7858 }, { "epoch": 0.91, "learning_rate": 2.6113145151765676e-08, "logits/chosen": -2.5776987075805664, "logits/rejected": -2.9130423069000244, "logps/chosen": -379.9630432128906, "logps/rejected": -325.0814514160156, "loss": 0.5876, "rewards/accuracies": 0.625, "rewards/chosen": -0.7534393668174744, "rewards/margins": 2.1143686771392822, "rewards/rejected": -2.8678081035614014, "step": 7859 }, { "epoch": 0.91, "learning_rate": 2.6077713475847408e-08, "logits/chosen": -1.7909144163131714, "logits/rejected": -1.3509516716003418, "logps/chosen": -344.3521728515625, "logps/rejected": -504.03961181640625, "loss": 0.4573, "rewards/accuracies": 0.75, "rewards/chosen": -0.7292894721031189, "rewards/margins": 1.9575880765914917, "rewards/rejected": -2.686877727508545, "step": 7860 }, { "epoch": 0.91, "learning_rate": 2.6042281799929137e-08, "logits/chosen": -1.8095626831054688, "logits/rejected": -2.0864272117614746, "logps/chosen": -289.32952880859375, "logps/rejected": -219.05686950683594, "loss": 0.2058, "rewards/accuracies": 0.875, "rewards/chosen": -0.4356086850166321, "rewards/margins": 2.2327427864074707, "rewards/rejected": -2.668351411819458, "step": 7861 }, { "epoch": 0.91, "learning_rate": 2.6006850124010862e-08, "logits/chosen": -2.6336028575897217, "logits/rejected": -2.7273569107055664, "logps/chosen": -341.30694580078125, "logps/rejected": -314.98779296875, "loss": 0.4345, "rewards/accuracies": 0.75, "rewards/chosen": -1.703446388244629, "rewards/margins": 2.04669189453125, "rewards/rejected": -3.750138282775879, "step": 7862 }, { "epoch": 0.91, "learning_rate": 2.5971418448092594e-08, "logits/chosen": -2.6207571029663086, "logits/rejected": -2.545320510864258, "logps/chosen": -238.58065795898438, "logps/rejected": -284.02691650390625, "loss": 0.2685, "rewards/accuracies": 0.875, "rewards/chosen": -1.5596529245376587, "rewards/margins": 2.6254658699035645, "rewards/rejected": -4.185118675231934, "step": 7863 }, { "epoch": 0.91, "learning_rate": 2.5935986772174323e-08, "logits/chosen": -1.9117369651794434, "logits/rejected": -2.1039280891418457, "logps/chosen": -294.133544921875, "logps/rejected": -210.80856323242188, "loss": 0.4226, "rewards/accuracies": 0.75, "rewards/chosen": -0.6247460842132568, "rewards/margins": 1.4508183002471924, "rewards/rejected": -2.075564384460449, "step": 7864 }, { "epoch": 0.91, "learning_rate": 2.5900555096256052e-08, "logits/chosen": -2.1804206371307373, "logits/rejected": -2.276050329208374, "logps/chosen": -292.1705627441406, "logps/rejected": -344.1279296875, "loss": 0.481, "rewards/accuracies": 0.75, "rewards/chosen": -2.157456636428833, "rewards/margins": 2.2988193035125732, "rewards/rejected": -4.456275939941406, "step": 7865 }, { "epoch": 0.92, "learning_rate": 2.586512342033778e-08, "logits/chosen": -2.4031779766082764, "logits/rejected": -2.333453893661499, "logps/chosen": -198.59976196289062, "logps/rejected": -227.4092559814453, "loss": 0.6541, "rewards/accuracies": 0.75, "rewards/chosen": -0.8001508712768555, "rewards/margins": 4.353246688842773, "rewards/rejected": -5.153397560119629, "step": 7866 }, { "epoch": 0.92, "learning_rate": 2.5829691744419513e-08, "logits/chosen": -2.1582913398742676, "logits/rejected": -2.2558181285858154, "logps/chosen": -268.95159912109375, "logps/rejected": -213.8064422607422, "loss": 0.638, "rewards/accuracies": 0.5, "rewards/chosen": -0.7250502109527588, "rewards/margins": 0.8525638580322266, "rewards/rejected": -1.5776140689849854, "step": 7867 }, { "epoch": 0.92, "learning_rate": 2.5794260068501238e-08, "logits/chosen": -1.8762321472167969, "logits/rejected": -2.1637144088745117, "logps/chosen": -347.3463134765625, "logps/rejected": -271.4024658203125, "loss": 1.0038, "rewards/accuracies": 0.75, "rewards/chosen": -1.1601426601409912, "rewards/margins": 0.7902706861495972, "rewards/rejected": -1.950413465499878, "step": 7868 }, { "epoch": 0.92, "learning_rate": 2.5758828392582967e-08, "logits/chosen": -2.405323028564453, "logits/rejected": -2.5115509033203125, "logps/chosen": -179.38900756835938, "logps/rejected": -165.77456665039062, "loss": 0.3839, "rewards/accuracies": 0.625, "rewards/chosen": -0.7001006603240967, "rewards/margins": 2.566908121109009, "rewards/rejected": -3.2670090198516846, "step": 7869 }, { "epoch": 0.92, "learning_rate": 2.57233967166647e-08, "logits/chosen": -2.1263785362243652, "logits/rejected": -2.1821258068084717, "logps/chosen": -332.02496337890625, "logps/rejected": -378.09130859375, "loss": 0.7063, "rewards/accuracies": 0.75, "rewards/chosen": -0.8021727800369263, "rewards/margins": 1.4246606826782227, "rewards/rejected": -2.2268335819244385, "step": 7870 }, { "epoch": 0.92, "learning_rate": 2.5687965040746424e-08, "logits/chosen": -2.0861408710479736, "logits/rejected": -2.3406178951263428, "logps/chosen": -199.94827270507812, "logps/rejected": -175.6078643798828, "loss": 0.9285, "rewards/accuracies": 0.75, "rewards/chosen": -1.753055214881897, "rewards/margins": 0.40921223163604736, "rewards/rejected": -2.1622674465179443, "step": 7871 }, { "epoch": 0.92, "learning_rate": 2.5652533364828156e-08, "logits/chosen": -2.54176926612854, "logits/rejected": -2.354806423187256, "logps/chosen": -313.56414794921875, "logps/rejected": -309.02227783203125, "loss": 0.5369, "rewards/accuracies": 0.875, "rewards/chosen": -1.0835771560668945, "rewards/margins": 2.6893181800842285, "rewards/rejected": -3.772895336151123, "step": 7872 }, { "epoch": 0.92, "learning_rate": 2.5617101688909885e-08, "logits/chosen": -2.352356433868408, "logits/rejected": -2.162715196609497, "logps/chosen": -140.863037109375, "logps/rejected": -267.7840270996094, "loss": 0.5833, "rewards/accuracies": 0.75, "rewards/chosen": -1.4211351871490479, "rewards/margins": 1.231209635734558, "rewards/rejected": -2.6523447036743164, "step": 7873 }, { "epoch": 0.92, "learning_rate": 2.558167001299161e-08, "logits/chosen": -2.0563182830810547, "logits/rejected": -2.365208148956299, "logps/chosen": -537.2515869140625, "logps/rejected": -236.13711547851562, "loss": 0.4751, "rewards/accuracies": 0.75, "rewards/chosen": -0.9500283002853394, "rewards/margins": 0.7956160306930542, "rewards/rejected": -1.7456443309783936, "step": 7874 }, { "epoch": 0.92, "learning_rate": 2.5546238337073342e-08, "logits/chosen": -2.562055826187134, "logits/rejected": -2.339573860168457, "logps/chosen": -254.1559600830078, "logps/rejected": -185.94851684570312, "loss": 0.1555, "rewards/accuracies": 1.0, "rewards/chosen": -0.19967924058437347, "rewards/margins": 2.334592342376709, "rewards/rejected": -2.534271717071533, "step": 7875 }, { "epoch": 0.92, "learning_rate": 2.551080666115507e-08, "logits/chosen": -2.6980106830596924, "logits/rejected": -2.565164804458618, "logps/chosen": -253.7923583984375, "logps/rejected": -178.51881408691406, "loss": 0.3211, "rewards/accuracies": 0.75, "rewards/chosen": -0.48479902744293213, "rewards/margins": 1.976266860961914, "rewards/rejected": -2.4610657691955566, "step": 7876 }, { "epoch": 0.92, "learning_rate": 2.54753749852368e-08, "logits/chosen": -2.269768714904785, "logits/rejected": -2.257021903991699, "logps/chosen": -307.2211608886719, "logps/rejected": -270.6490173339844, "loss": 0.2759, "rewards/accuracies": 0.875, "rewards/chosen": -0.08433239907026291, "rewards/margins": 2.1422829627990723, "rewards/rejected": -2.2266151905059814, "step": 7877 }, { "epoch": 0.92, "learning_rate": 2.543994330931853e-08, "logits/chosen": -2.3993473052978516, "logits/rejected": -2.399404287338257, "logps/chosen": -241.12767028808594, "logps/rejected": -369.8497314453125, "loss": 0.8587, "rewards/accuracies": 0.625, "rewards/chosen": -1.1262191534042358, "rewards/margins": 1.065895915031433, "rewards/rejected": -2.192115068435669, "step": 7878 }, { "epoch": 0.92, "learning_rate": 2.540451163340026e-08, "logits/chosen": -1.9255352020263672, "logits/rejected": -2.0879273414611816, "logps/chosen": -240.8593292236328, "logps/rejected": -211.7167205810547, "loss": 0.5102, "rewards/accuracies": 0.625, "rewards/chosen": -2.0562353134155273, "rewards/margins": 0.9928839206695557, "rewards/rejected": -3.049118995666504, "step": 7879 }, { "epoch": 0.92, "learning_rate": 2.536907995748199e-08, "logits/chosen": -1.9334609508514404, "logits/rejected": -2.1450767517089844, "logps/chosen": -185.27565002441406, "logps/rejected": -209.24728393554688, "loss": 0.2414, "rewards/accuracies": 0.875, "rewards/chosen": -0.05446035414934158, "rewards/margins": 3.0589725971221924, "rewards/rejected": -3.1134328842163086, "step": 7880 }, { "epoch": 0.92, "learning_rate": 2.5333648281563715e-08, "logits/chosen": -1.8030915260314941, "logits/rejected": -2.298370361328125, "logps/chosen": -220.80929565429688, "logps/rejected": -179.35247802734375, "loss": 1.1568, "rewards/accuracies": 0.75, "rewards/chosen": -1.9548395872116089, "rewards/margins": 0.5012566447257996, "rewards/rejected": -2.4560964107513428, "step": 7881 }, { "epoch": 0.92, "learning_rate": 2.5298216605645447e-08, "logits/chosen": -2.3822197914123535, "logits/rejected": -2.631829023361206, "logps/chosen": -305.43963623046875, "logps/rejected": -293.9334716796875, "loss": 0.2053, "rewards/accuracies": 1.0, "rewards/chosen": -1.16841459274292, "rewards/margins": 3.2405471801757812, "rewards/rejected": -4.408962249755859, "step": 7882 }, { "epoch": 0.92, "learning_rate": 2.5262784929727176e-08, "logits/chosen": -2.955157995223999, "logits/rejected": -2.7164571285247803, "logps/chosen": -72.7317123413086, "logps/rejected": -238.9005126953125, "loss": 0.2788, "rewards/accuracies": 0.875, "rewards/chosen": -0.6832977533340454, "rewards/margins": 2.0410208702087402, "rewards/rejected": -2.724318504333496, "step": 7883 }, { "epoch": 0.92, "learning_rate": 2.5227353253808904e-08, "logits/chosen": -2.248410940170288, "logits/rejected": -2.110419750213623, "logps/chosen": -192.31283569335938, "logps/rejected": -270.1246337890625, "loss": 0.2419, "rewards/accuracies": 0.875, "rewards/chosen": -1.0254149436950684, "rewards/margins": 2.2936766147613525, "rewards/rejected": -3.319091796875, "step": 7884 }, { "epoch": 0.92, "learning_rate": 2.5191921577890633e-08, "logits/chosen": -2.5464251041412354, "logits/rejected": -2.5784854888916016, "logps/chosen": -209.05642700195312, "logps/rejected": -309.631591796875, "loss": 0.1645, "rewards/accuracies": 1.0, "rewards/chosen": -0.49593672156333923, "rewards/margins": 3.1813225746154785, "rewards/rejected": -3.6772589683532715, "step": 7885 }, { "epoch": 0.92, "learning_rate": 2.5156489901972365e-08, "logits/chosen": -2.0244247913360596, "logits/rejected": -2.161618232727051, "logps/chosen": -538.9692993164062, "logps/rejected": -466.417236328125, "loss": 0.1398, "rewards/accuracies": 1.0, "rewards/chosen": -0.4800707697868347, "rewards/margins": 3.5814552307128906, "rewards/rejected": -4.061526298522949, "step": 7886 }, { "epoch": 0.92, "learning_rate": 2.512105822605409e-08, "logits/chosen": -2.6560323238372803, "logits/rejected": -2.659830093383789, "logps/chosen": -123.50048065185547, "logps/rejected": -148.53062438964844, "loss": 0.3963, "rewards/accuracies": 0.625, "rewards/chosen": -0.9771144986152649, "rewards/margins": 1.6141901016235352, "rewards/rejected": -2.5913047790527344, "step": 7887 }, { "epoch": 0.92, "learning_rate": 2.508562655013582e-08, "logits/chosen": -2.6703877449035645, "logits/rejected": -2.612598419189453, "logps/chosen": -182.13963317871094, "logps/rejected": -213.39883422851562, "loss": 0.2974, "rewards/accuracies": 1.0, "rewards/chosen": -0.6449593305587769, "rewards/margins": 1.4322328567504883, "rewards/rejected": -2.0771920680999756, "step": 7888 }, { "epoch": 0.92, "learning_rate": 2.505019487421755e-08, "logits/chosen": -1.859920859336853, "logits/rejected": -2.127315044403076, "logps/chosen": -255.97686767578125, "logps/rejected": -285.4281005859375, "loss": 0.3438, "rewards/accuracies": 0.875, "rewards/chosen": -0.7993826866149902, "rewards/margins": 1.4470783472061157, "rewards/rejected": -2.2464611530303955, "step": 7889 }, { "epoch": 0.92, "learning_rate": 2.5014763198299277e-08, "logits/chosen": -2.2400808334350586, "logits/rejected": -2.503457546234131, "logps/chosen": -351.7080993652344, "logps/rejected": -190.5353240966797, "loss": 0.684, "rewards/accuracies": 0.625, "rewards/chosen": -0.4540064334869385, "rewards/margins": 1.0180299282073975, "rewards/rejected": -1.4720362424850464, "step": 7890 }, { "epoch": 0.92, "learning_rate": 2.4979331522381006e-08, "logits/chosen": -2.276614189147949, "logits/rejected": -2.057448387145996, "logps/chosen": -318.0711364746094, "logps/rejected": -394.2580871582031, "loss": 0.52, "rewards/accuracies": 0.75, "rewards/chosen": -0.638512372970581, "rewards/margins": 1.0929274559020996, "rewards/rejected": -1.7314398288726807, "step": 7891 }, { "epoch": 0.92, "learning_rate": 2.4943899846462738e-08, "logits/chosen": -2.2769975662231445, "logits/rejected": -2.121746063232422, "logps/chosen": -138.58917236328125, "logps/rejected": -267.1032409667969, "loss": 0.2992, "rewards/accuracies": 0.875, "rewards/chosen": -0.40122705698013306, "rewards/margins": 1.990156888961792, "rewards/rejected": -2.3913841247558594, "step": 7892 }, { "epoch": 0.92, "learning_rate": 2.4908468170544463e-08, "logits/chosen": -2.5578696727752686, "logits/rejected": -2.373919725418091, "logps/chosen": -105.67323303222656, "logps/rejected": -178.70294189453125, "loss": 0.5176, "rewards/accuracies": 0.75, "rewards/chosen": -0.7515531778335571, "rewards/margins": 1.3891359567642212, "rewards/rejected": -2.1406891345977783, "step": 7893 }, { "epoch": 0.92, "learning_rate": 2.4873036494626195e-08, "logits/chosen": -2.0119104385375977, "logits/rejected": -2.369147539138794, "logps/chosen": -380.342529296875, "logps/rejected": -281.191650390625, "loss": 0.7559, "rewards/accuracies": 0.875, "rewards/chosen": -0.8536868095397949, "rewards/margins": 1.5256433486938477, "rewards/rejected": -2.3793303966522217, "step": 7894 }, { "epoch": 0.92, "learning_rate": 2.4837604818707924e-08, "logits/chosen": -2.3726658821105957, "logits/rejected": -2.3749172687530518, "logps/chosen": -451.7352294921875, "logps/rejected": -594.7254638671875, "loss": 0.5616, "rewards/accuracies": 0.75, "rewards/chosen": -0.9132817387580872, "rewards/margins": 2.7010536193847656, "rewards/rejected": -3.614335536956787, "step": 7895 }, { "epoch": 0.92, "learning_rate": 2.480217314278965e-08, "logits/chosen": -2.298600196838379, "logits/rejected": -2.5209572315216064, "logps/chosen": -305.44720458984375, "logps/rejected": -177.74185180664062, "loss": 0.4003, "rewards/accuracies": 0.875, "rewards/chosen": -0.6948826313018799, "rewards/margins": 1.4177252054214478, "rewards/rejected": -2.112607955932617, "step": 7896 }, { "epoch": 0.92, "learning_rate": 2.476674146687138e-08, "logits/chosen": -2.823870897293091, "logits/rejected": -2.8776023387908936, "logps/chosen": -261.9993896484375, "logps/rejected": -267.5581359863281, "loss": 0.4103, "rewards/accuracies": 0.875, "rewards/chosen": -0.8336429595947266, "rewards/margins": 3.1213622093200684, "rewards/rejected": -3.955005168914795, "step": 7897 }, { "epoch": 0.92, "learning_rate": 2.473130979095311e-08, "logits/chosen": -2.432009696960449, "logits/rejected": -2.1000757217407227, "logps/chosen": -309.5743103027344, "logps/rejected": -447.2745361328125, "loss": 0.1689, "rewards/accuracies": 1.0, "rewards/chosen": -0.2706286311149597, "rewards/margins": 2.2744650840759277, "rewards/rejected": -2.5450940132141113, "step": 7898 }, { "epoch": 0.92, "learning_rate": 2.4695878115034842e-08, "logits/chosen": -2.00443696975708, "logits/rejected": -1.992392659187317, "logps/chosen": -358.02337646484375, "logps/rejected": -275.7724914550781, "loss": 0.4547, "rewards/accuracies": 0.625, "rewards/chosen": -1.3304870128631592, "rewards/margins": 1.0905416011810303, "rewards/rejected": -2.4210288524627686, "step": 7899 }, { "epoch": 0.92, "learning_rate": 2.4660446439116568e-08, "logits/chosen": -2.655320167541504, "logits/rejected": -2.6873185634613037, "logps/chosen": -206.15830993652344, "logps/rejected": -223.20849609375, "loss": 0.227, "rewards/accuracies": 1.0, "rewards/chosen": -0.9752264022827148, "rewards/margins": 1.9635382890701294, "rewards/rejected": -2.9387645721435547, "step": 7900 }, { "epoch": 0.92, "learning_rate": 2.46250147631983e-08, "logits/chosen": -2.717519760131836, "logits/rejected": -2.700474262237549, "logps/chosen": -127.8447265625, "logps/rejected": -188.705078125, "loss": 0.1321, "rewards/accuracies": 1.0, "rewards/chosen": -0.5006088018417358, "rewards/margins": 3.2391958236694336, "rewards/rejected": -3.73980450630188, "step": 7901 }, { "epoch": 0.92, "learning_rate": 2.458958308728003e-08, "logits/chosen": -1.9116932153701782, "logits/rejected": -2.222883701324463, "logps/chosen": -348.9131774902344, "logps/rejected": -233.498046875, "loss": 0.2679, "rewards/accuracies": 0.875, "rewards/chosen": -0.9342940449714661, "rewards/margins": 2.780888795852661, "rewards/rejected": -3.7151830196380615, "step": 7902 }, { "epoch": 0.92, "learning_rate": 2.4554151411361754e-08, "logits/chosen": -2.5011672973632812, "logits/rejected": -2.6458537578582764, "logps/chosen": -302.64996337890625, "logps/rejected": -250.1312713623047, "loss": 0.4905, "rewards/accuracies": 0.875, "rewards/chosen": -0.5306076407432556, "rewards/margins": 1.8706095218658447, "rewards/rejected": -2.401216983795166, "step": 7903 }, { "epoch": 0.92, "learning_rate": 2.4518719735443486e-08, "logits/chosen": -2.443427085876465, "logits/rejected": -2.36769700050354, "logps/chosen": -309.0700988769531, "logps/rejected": -181.25990295410156, "loss": 0.5333, "rewards/accuracies": 0.75, "rewards/chosen": -1.124724268913269, "rewards/margins": 0.9298417568206787, "rewards/rejected": -2.0545661449432373, "step": 7904 }, { "epoch": 0.92, "learning_rate": 2.4483288059525215e-08, "logits/chosen": -2.0128378868103027, "logits/rejected": -2.084937810897827, "logps/chosen": -332.10955810546875, "logps/rejected": -246.52999877929688, "loss": 0.7447, "rewards/accuracies": 0.625, "rewards/chosen": -2.134190797805786, "rewards/margins": 1.2977468967437744, "rewards/rejected": -3.4319374561309814, "step": 7905 }, { "epoch": 0.92, "learning_rate": 2.4447856383606943e-08, "logits/chosen": -2.3353376388549805, "logits/rejected": -2.271012306213379, "logps/chosen": -326.5997314453125, "logps/rejected": -344.6171569824219, "loss": 0.1638, "rewards/accuracies": 1.0, "rewards/chosen": -0.29515403509140015, "rewards/margins": 3.186164140701294, "rewards/rejected": -3.481318235397339, "step": 7906 }, { "epoch": 0.92, "learning_rate": 2.4412424707688672e-08, "logits/chosen": -2.6031789779663086, "logits/rejected": -2.447949171066284, "logps/chosen": -258.5350341796875, "logps/rejected": -226.427978515625, "loss": 0.706, "rewards/accuracies": 0.75, "rewards/chosen": -1.4374229907989502, "rewards/margins": 1.0430896282196045, "rewards/rejected": -2.4805126190185547, "step": 7907 }, { "epoch": 0.92, "learning_rate": 2.4376993031770404e-08, "logits/chosen": -2.8087270259857178, "logits/rejected": -2.790100574493408, "logps/chosen": -141.4102783203125, "logps/rejected": -155.72772216796875, "loss": 0.2193, "rewards/accuracies": 1.0, "rewards/chosen": -0.35797542333602905, "rewards/margins": 3.4364938735961914, "rewards/rejected": -3.7944693565368652, "step": 7908 }, { "epoch": 0.92, "learning_rate": 2.434156135585213e-08, "logits/chosen": -2.16719651222229, "logits/rejected": -2.4255967140197754, "logps/chosen": -244.84304809570312, "logps/rejected": -204.42742919921875, "loss": 0.172, "rewards/accuracies": 0.875, "rewards/chosen": -0.14899897575378418, "rewards/margins": 2.7660322189331055, "rewards/rejected": -2.9150309562683105, "step": 7909 }, { "epoch": 0.92, "learning_rate": 2.430612967993386e-08, "logits/chosen": -2.1378636360168457, "logits/rejected": -2.058483839035034, "logps/chosen": -251.0749053955078, "logps/rejected": -247.39463806152344, "loss": 0.2482, "rewards/accuracies": 0.875, "rewards/chosen": -0.4721890687942505, "rewards/margins": 2.135995864868164, "rewards/rejected": -2.608185052871704, "step": 7910 }, { "epoch": 0.92, "learning_rate": 2.427069800401559e-08, "logits/chosen": -2.499842643737793, "logits/rejected": -2.5841121673583984, "logps/chosen": -185.98577880859375, "logps/rejected": -337.7648010253906, "loss": 0.2465, "rewards/accuracies": 1.0, "rewards/chosen": -0.5679540038108826, "rewards/margins": 1.837892770767212, "rewards/rejected": -2.4058468341827393, "step": 7911 }, { "epoch": 0.92, "learning_rate": 2.4235266328097316e-08, "logits/chosen": -2.2288529872894287, "logits/rejected": -2.246342658996582, "logps/chosen": -360.1132507324219, "logps/rejected": -184.73468017578125, "loss": 0.8862, "rewards/accuracies": 0.625, "rewards/chosen": -1.4019582271575928, "rewards/margins": 1.603406548500061, "rewards/rejected": -3.0053648948669434, "step": 7912 }, { "epoch": 0.92, "learning_rate": 2.4199834652179048e-08, "logits/chosen": -2.1826891899108887, "logits/rejected": -2.414909601211548, "logps/chosen": -496.55963134765625, "logps/rejected": -357.4420471191406, "loss": 0.3885, "rewards/accuracies": 0.875, "rewards/chosen": -0.9701114892959595, "rewards/margins": 2.3873233795166016, "rewards/rejected": -3.3574347496032715, "step": 7913 }, { "epoch": 0.92, "learning_rate": 2.4164402976260777e-08, "logits/chosen": -2.1301865577697754, "logits/rejected": -2.2222049236297607, "logps/chosen": -283.1365966796875, "logps/rejected": -266.12677001953125, "loss": 0.3776, "rewards/accuracies": 0.875, "rewards/chosen": -0.9253539443016052, "rewards/margins": 1.5733819007873535, "rewards/rejected": -2.4987356662750244, "step": 7914 }, { "epoch": 0.92, "learning_rate": 2.412897130034251e-08, "logits/chosen": -2.443904399871826, "logits/rejected": -2.4630465507507324, "logps/chosen": -258.90826416015625, "logps/rejected": -293.7914123535156, "loss": 0.1962, "rewards/accuracies": 1.0, "rewards/chosen": -0.7137433290481567, "rewards/margins": 3.5591225624084473, "rewards/rejected": -4.2728657722473145, "step": 7915 }, { "epoch": 0.92, "learning_rate": 2.4093539624424234e-08, "logits/chosen": -2.2282724380493164, "logits/rejected": -2.0345242023468018, "logps/chosen": -399.265869140625, "logps/rejected": -387.71661376953125, "loss": 0.2353, "rewards/accuracies": 1.0, "rewards/chosen": -1.0928014516830444, "rewards/margins": 3.081101179122925, "rewards/rejected": -4.17390251159668, "step": 7916 }, { "epoch": 0.92, "learning_rate": 2.4058107948505963e-08, "logits/chosen": -3.0315189361572266, "logits/rejected": -2.989769697189331, "logps/chosen": -339.30767822265625, "logps/rejected": -260.9018859863281, "loss": 0.1741, "rewards/accuracies": 1.0, "rewards/chosen": -1.3835554122924805, "rewards/margins": 3.3323888778686523, "rewards/rejected": -4.715943813323975, "step": 7917 }, { "epoch": 0.92, "learning_rate": 2.4022676272587695e-08, "logits/chosen": -2.321711778640747, "logits/rejected": -2.415259599685669, "logps/chosen": -225.35968017578125, "logps/rejected": -277.0367431640625, "loss": 0.3222, "rewards/accuracies": 0.875, "rewards/chosen": -0.5182996988296509, "rewards/margins": 2.3043363094329834, "rewards/rejected": -2.8226358890533447, "step": 7918 }, { "epoch": 0.92, "learning_rate": 2.398724459666942e-08, "logits/chosen": -2.5577704906463623, "logits/rejected": -2.5920004844665527, "logps/chosen": -168.31614685058594, "logps/rejected": -161.8942108154297, "loss": 0.6798, "rewards/accuracies": 0.625, "rewards/chosen": -1.5825552940368652, "rewards/margins": 0.847343385219574, "rewards/rejected": -2.429898500442505, "step": 7919 }, { "epoch": 0.92, "learning_rate": 2.3951812920751152e-08, "logits/chosen": -1.8999004364013672, "logits/rejected": -1.6054641008377075, "logps/chosen": -272.4313049316406, "logps/rejected": -343.7594909667969, "loss": 0.3802, "rewards/accuracies": 0.75, "rewards/chosen": -0.9420961141586304, "rewards/margins": 1.3364864587783813, "rewards/rejected": -2.2785825729370117, "step": 7920 }, { "epoch": 0.92, "learning_rate": 2.391638124483288e-08, "logits/chosen": -2.4054789543151855, "logits/rejected": -2.3733601570129395, "logps/chosen": -291.7012023925781, "logps/rejected": -268.06207275390625, "loss": 0.4918, "rewards/accuracies": 0.75, "rewards/chosen": -0.37824904918670654, "rewards/margins": 1.5260368585586548, "rewards/rejected": -1.9042860269546509, "step": 7921 }, { "epoch": 0.92, "learning_rate": 2.3880949568914607e-08, "logits/chosen": -2.3120837211608887, "logits/rejected": -2.5042855739593506, "logps/chosen": -174.71795654296875, "logps/rejected": -267.41485595703125, "loss": 0.2148, "rewards/accuracies": 0.875, "rewards/chosen": -0.22401051223278046, "rewards/margins": 2.9427590370178223, "rewards/rejected": -3.1667697429656982, "step": 7922 }, { "epoch": 0.92, "learning_rate": 2.384551789299634e-08, "logits/chosen": -2.602632761001587, "logits/rejected": -2.6612229347229004, "logps/chosen": -156.10350036621094, "logps/rejected": -236.39727783203125, "loss": 0.1849, "rewards/accuracies": 0.875, "rewards/chosen": -1.2153866291046143, "rewards/margins": 3.019911766052246, "rewards/rejected": -4.235298156738281, "step": 7923 }, { "epoch": 0.92, "learning_rate": 2.3810086217078067e-08, "logits/chosen": -1.9638186693191528, "logits/rejected": -2.304720878601074, "logps/chosen": -260.29376220703125, "logps/rejected": -242.98907470703125, "loss": 0.3161, "rewards/accuracies": 0.875, "rewards/chosen": -0.22403855621814728, "rewards/margins": 2.5980265140533447, "rewards/rejected": -2.8220651149749756, "step": 7924 }, { "epoch": 0.92, "learning_rate": 2.3774654541159793e-08, "logits/chosen": -2.0644359588623047, "logits/rejected": -2.0033154487609863, "logps/chosen": -395.11846923828125, "logps/rejected": -372.88330078125, "loss": 0.4911, "rewards/accuracies": 0.75, "rewards/chosen": -1.0338728427886963, "rewards/margins": 1.3245233297348022, "rewards/rejected": -2.358396053314209, "step": 7925 }, { "epoch": 0.92, "learning_rate": 2.3739222865241525e-08, "logits/chosen": -2.635441303253174, "logits/rejected": -2.605717420578003, "logps/chosen": -284.25518798828125, "logps/rejected": -260.8153991699219, "loss": 0.265, "rewards/accuracies": 0.875, "rewards/chosen": -0.831153929233551, "rewards/margins": 2.0157456398010254, "rewards/rejected": -2.8468995094299316, "step": 7926 }, { "epoch": 0.92, "learning_rate": 2.3703791189323254e-08, "logits/chosen": -2.5409960746765137, "logits/rejected": -2.225231409072876, "logps/chosen": -269.84112548828125, "logps/rejected": -351.6633605957031, "loss": 0.4643, "rewards/accuracies": 0.75, "rewards/chosen": -1.4332435131072998, "rewards/margins": 2.1261606216430664, "rewards/rejected": -3.5594043731689453, "step": 7927 }, { "epoch": 0.92, "learning_rate": 2.3668359513404982e-08, "logits/chosen": -2.5507864952087402, "logits/rejected": -2.5258522033691406, "logps/chosen": -159.28123474121094, "logps/rejected": -188.44107055664062, "loss": 0.506, "rewards/accuracies": 0.625, "rewards/chosen": -0.8579781651496887, "rewards/margins": 2.154188394546509, "rewards/rejected": -3.0121665000915527, "step": 7928 }, { "epoch": 0.92, "learning_rate": 2.363292783748671e-08, "logits/chosen": -1.9448347091674805, "logits/rejected": -2.4302451610565186, "logps/chosen": -391.88494873046875, "logps/rejected": -270.1730041503906, "loss": 0.7114, "rewards/accuracies": 0.625, "rewards/chosen": -1.1585097312927246, "rewards/margins": 0.6377213597297668, "rewards/rejected": -1.7962310314178467, "step": 7929 }, { "epoch": 0.92, "learning_rate": 2.3597496161568443e-08, "logits/chosen": -2.4405508041381836, "logits/rejected": -2.615535259246826, "logps/chosen": -274.6668395996094, "logps/rejected": -146.52352905273438, "loss": 0.3987, "rewards/accuracies": 0.875, "rewards/chosen": -0.44402971863746643, "rewards/margins": 1.398289442062378, "rewards/rejected": -1.8423190116882324, "step": 7930 }, { "epoch": 0.92, "learning_rate": 2.356206448565017e-08, "logits/chosen": -2.7612464427948, "logits/rejected": -2.4905612468719482, "logps/chosen": -186.17674255371094, "logps/rejected": -233.54598999023438, "loss": 0.2086, "rewards/accuracies": 1.0, "rewards/chosen": -0.22131508588790894, "rewards/margins": 2.2624282836914062, "rewards/rejected": -2.483743667602539, "step": 7931 }, { "epoch": 0.92, "learning_rate": 2.3526632809731897e-08, "logits/chosen": -2.012943744659424, "logits/rejected": -2.272374391555786, "logps/chosen": -290.6260070800781, "logps/rejected": -288.4266662597656, "loss": 0.7443, "rewards/accuracies": 0.5, "rewards/chosen": -0.996061384677887, "rewards/margins": 1.260544776916504, "rewards/rejected": -2.256605863571167, "step": 7932 }, { "epoch": 0.92, "learning_rate": 2.349120113381363e-08, "logits/chosen": -2.3637595176696777, "logits/rejected": -2.1447930335998535, "logps/chosen": -299.8051452636719, "logps/rejected": -333.3448486328125, "loss": 0.8299, "rewards/accuracies": 0.625, "rewards/chosen": -1.295386552810669, "rewards/margins": 2.759638786315918, "rewards/rejected": -4.055025100708008, "step": 7933 }, { "epoch": 0.92, "learning_rate": 2.3455769457895358e-08, "logits/chosen": -2.504493474960327, "logits/rejected": -2.4831273555755615, "logps/chosen": -111.75584411621094, "logps/rejected": -254.7050323486328, "loss": 0.6642, "rewards/accuracies": 0.75, "rewards/chosen": -0.7831162214279175, "rewards/margins": 1.4520812034606934, "rewards/rejected": -2.2351975440979004, "step": 7934 }, { "epoch": 0.92, "learning_rate": 2.3420337781977087e-08, "logits/chosen": -2.5030336380004883, "logits/rejected": -2.653775215148926, "logps/chosen": -237.0608367919922, "logps/rejected": -316.8910827636719, "loss": 0.2737, "rewards/accuracies": 0.875, "rewards/chosen": -0.9311028122901917, "rewards/margins": 2.081559419631958, "rewards/rejected": -3.012662410736084, "step": 7935 }, { "epoch": 0.92, "learning_rate": 2.3384906106058816e-08, "logits/chosen": -2.766071081161499, "logits/rejected": -2.440854787826538, "logps/chosen": -128.44578552246094, "logps/rejected": -265.63031005859375, "loss": 0.5464, "rewards/accuracies": 0.625, "rewards/chosen": -0.9430932998657227, "rewards/margins": 0.9608340859413147, "rewards/rejected": -1.9039273262023926, "step": 7936 }, { "epoch": 0.92, "learning_rate": 2.3349474430140548e-08, "logits/chosen": -2.6547579765319824, "logits/rejected": -2.2129507064819336, "logps/chosen": -126.23860168457031, "logps/rejected": -276.2158203125, "loss": 0.329, "rewards/accuracies": 0.75, "rewards/chosen": -0.75138258934021, "rewards/margins": 1.5121071338653564, "rewards/rejected": -2.2634897232055664, "step": 7937 }, { "epoch": 0.92, "learning_rate": 2.3314042754222273e-08, "logits/chosen": -2.5367753505706787, "logits/rejected": -2.7656497955322266, "logps/chosen": -443.9340515136719, "logps/rejected": -247.61654663085938, "loss": 0.2357, "rewards/accuracies": 1.0, "rewards/chosen": -0.3303983509540558, "rewards/margins": 2.048977851867676, "rewards/rejected": -2.37937593460083, "step": 7938 }, { "epoch": 0.92, "learning_rate": 2.3278611078304002e-08, "logits/chosen": -2.0574212074279785, "logits/rejected": -2.099177837371826, "logps/chosen": -415.122314453125, "logps/rejected": -198.55160522460938, "loss": 0.592, "rewards/accuracies": 0.625, "rewards/chosen": -1.0559746026992798, "rewards/margins": 0.7258423566818237, "rewards/rejected": -1.7818169593811035, "step": 7939 }, { "epoch": 0.92, "learning_rate": 2.3243179402385734e-08, "logits/chosen": -2.9010469913482666, "logits/rejected": -2.8756604194641113, "logps/chosen": -332.96893310546875, "logps/rejected": -233.5495147705078, "loss": 0.1476, "rewards/accuracies": 0.875, "rewards/chosen": -0.3168620765209198, "rewards/margins": 4.073558330535889, "rewards/rejected": -4.390420436859131, "step": 7940 }, { "epoch": 0.92, "learning_rate": 2.320774772646746e-08, "logits/chosen": -2.3103156089782715, "logits/rejected": -2.54202938079834, "logps/chosen": -171.20233154296875, "logps/rejected": -238.95062255859375, "loss": 0.152, "rewards/accuracies": 1.0, "rewards/chosen": -0.24492357671260834, "rewards/margins": 2.239535331726074, "rewards/rejected": -2.4844589233398438, "step": 7941 }, { "epoch": 0.92, "learning_rate": 2.317231605054919e-08, "logits/chosen": -2.677290201187134, "logits/rejected": -2.575899839401245, "logps/chosen": -290.17620849609375, "logps/rejected": -363.729736328125, "loss": 0.1541, "rewards/accuracies": 1.0, "rewards/chosen": -0.9728565812110901, "rewards/margins": 2.401655673980713, "rewards/rejected": -3.374512195587158, "step": 7942 }, { "epoch": 0.92, "learning_rate": 2.313688437463092e-08, "logits/chosen": -2.1409707069396973, "logits/rejected": -2.2890095710754395, "logps/chosen": -201.75604248046875, "logps/rejected": -269.7251281738281, "loss": 1.8355, "rewards/accuracies": 0.625, "rewards/chosen": -2.644479751586914, "rewards/margins": 0.465520977973938, "rewards/rejected": -3.1100008487701416, "step": 7943 }, { "epoch": 0.92, "learning_rate": 2.3101452698712645e-08, "logits/chosen": -2.4376142024993896, "logits/rejected": -2.4085497856140137, "logps/chosen": -314.70452880859375, "logps/rejected": -279.63714599609375, "loss": 0.2087, "rewards/accuracies": 1.0, "rewards/chosen": -0.9749330282211304, "rewards/margins": 2.119748830795288, "rewards/rejected": -3.094681739807129, "step": 7944 }, { "epoch": 0.92, "learning_rate": 2.3066021022794378e-08, "logits/chosen": -2.178804874420166, "logits/rejected": -2.14040207862854, "logps/chosen": -337.7563781738281, "logps/rejected": -306.2362060546875, "loss": 0.0902, "rewards/accuracies": 1.0, "rewards/chosen": -0.30014145374298096, "rewards/margins": 3.6921420097351074, "rewards/rejected": -3.992283344268799, "step": 7945 }, { "epoch": 0.92, "learning_rate": 2.3030589346876106e-08, "logits/chosen": -1.9577772617340088, "logits/rejected": -2.1096670627593994, "logps/chosen": -212.10662841796875, "logps/rejected": -185.49472045898438, "loss": 0.4694, "rewards/accuracies": 0.75, "rewards/chosen": -0.5436193943023682, "rewards/margins": 1.1928845643997192, "rewards/rejected": -1.7365039587020874, "step": 7946 }, { "epoch": 0.92, "learning_rate": 2.2995157670957835e-08, "logits/chosen": -2.908970355987549, "logits/rejected": -2.829209566116333, "logps/chosen": -260.43890380859375, "logps/rejected": -295.95294189453125, "loss": 0.2402, "rewards/accuracies": 0.875, "rewards/chosen": -1.0091475248336792, "rewards/margins": 3.479346752166748, "rewards/rejected": -4.488494873046875, "step": 7947 }, { "epoch": 0.92, "learning_rate": 2.2959725995039564e-08, "logits/chosen": -2.686594009399414, "logits/rejected": -2.343799352645874, "logps/chosen": -158.93704223632812, "logps/rejected": -220.22451782226562, "loss": 0.3775, "rewards/accuracies": 0.625, "rewards/chosen": -1.3530195951461792, "rewards/margins": 1.897355079650879, "rewards/rejected": -3.2503745555877686, "step": 7948 }, { "epoch": 0.92, "learning_rate": 2.2924294319121296e-08, "logits/chosen": -2.818746328353882, "logits/rejected": -2.828338623046875, "logps/chosen": -259.7038269042969, "logps/rejected": -171.31246948242188, "loss": 0.5046, "rewards/accuracies": 0.5, "rewards/chosen": -0.615162193775177, "rewards/margins": 1.5230515003204346, "rewards/rejected": -2.138213634490967, "step": 7949 }, { "epoch": 0.92, "learning_rate": 2.288886264320302e-08, "logits/chosen": -1.772756576538086, "logits/rejected": -1.8191776275634766, "logps/chosen": -311.5915832519531, "logps/rejected": -282.1527404785156, "loss": 0.2818, "rewards/accuracies": 0.875, "rewards/chosen": -0.6765326857566833, "rewards/margins": 1.7645983695983887, "rewards/rejected": -2.441131114959717, "step": 7950 }, { "epoch": 0.92, "learning_rate": 2.285343096728475e-08, "logits/chosen": -1.9832357168197632, "logits/rejected": -2.0177032947540283, "logps/chosen": -334.6487731933594, "logps/rejected": -279.83563232421875, "loss": 0.3283, "rewards/accuracies": 0.875, "rewards/chosen": -1.7178694009780884, "rewards/margins": 1.4255225658416748, "rewards/rejected": -3.1433918476104736, "step": 7951 }, { "epoch": 0.93, "learning_rate": 2.2817999291366482e-08, "logits/chosen": -2.804170608520508, "logits/rejected": -2.8846681118011475, "logps/chosen": -261.3087463378906, "logps/rejected": -304.3049011230469, "loss": 0.5038, "rewards/accuracies": 0.75, "rewards/chosen": -0.6728501915931702, "rewards/margins": 2.2403061389923096, "rewards/rejected": -2.913156270980835, "step": 7952 }, { "epoch": 0.93, "learning_rate": 2.278256761544821e-08, "logits/chosen": -2.4191031455993652, "logits/rejected": -2.369968891143799, "logps/chosen": -200.87057495117188, "logps/rejected": -198.14102172851562, "loss": 0.615, "rewards/accuracies": 0.75, "rewards/chosen": -0.38278234004974365, "rewards/margins": 1.8026577234268188, "rewards/rejected": -2.1854400634765625, "step": 7953 }, { "epoch": 0.93, "learning_rate": 2.2747135939529936e-08, "logits/chosen": -2.8239660263061523, "logits/rejected": -2.689587354660034, "logps/chosen": -159.35874938964844, "logps/rejected": -267.359130859375, "loss": 0.2505, "rewards/accuracies": 1.0, "rewards/chosen": -0.4760451018810272, "rewards/margins": 3.2247085571289062, "rewards/rejected": -3.7007534503936768, "step": 7954 }, { "epoch": 0.93, "learning_rate": 2.2711704263611668e-08, "logits/chosen": -2.252699613571167, "logits/rejected": -2.1995182037353516, "logps/chosen": -131.16091918945312, "logps/rejected": -185.00538635253906, "loss": 0.414, "rewards/accuracies": 0.75, "rewards/chosen": -1.0721017122268677, "rewards/margins": 1.4404927492141724, "rewards/rejected": -2.51259446144104, "step": 7955 }, { "epoch": 0.93, "learning_rate": 2.2676272587693397e-08, "logits/chosen": -2.2172956466674805, "logits/rejected": -2.5874757766723633, "logps/chosen": -517.50537109375, "logps/rejected": -310.1287841796875, "loss": 0.2268, "rewards/accuracies": 1.0, "rewards/chosen": -0.18548892438411713, "rewards/margins": 2.702253818511963, "rewards/rejected": -2.887742757797241, "step": 7956 }, { "epoch": 0.93, "learning_rate": 2.2640840911775126e-08, "logits/chosen": -2.8744606971740723, "logits/rejected": -2.926292896270752, "logps/chosen": -83.27838134765625, "logps/rejected": -150.31118774414062, "loss": 0.397, "rewards/accuracies": 0.875, "rewards/chosen": -0.9619206786155701, "rewards/margins": 1.2763993740081787, "rewards/rejected": -2.2383198738098145, "step": 7957 }, { "epoch": 0.93, "learning_rate": 2.2605409235856854e-08, "logits/chosen": -2.554727554321289, "logits/rejected": -2.803117036819458, "logps/chosen": -243.24822998046875, "logps/rejected": -207.26861572265625, "loss": 0.3469, "rewards/accuracies": 0.875, "rewards/chosen": -0.6724598407745361, "rewards/margins": 1.8769890069961548, "rewards/rejected": -2.5494489669799805, "step": 7958 }, { "epoch": 0.93, "learning_rate": 2.2569977559938587e-08, "logits/chosen": -1.783987283706665, "logits/rejected": -1.9412462711334229, "logps/chosen": -454.9590759277344, "logps/rejected": -298.49041748046875, "loss": 0.4846, "rewards/accuracies": 0.625, "rewards/chosen": -0.5409785509109497, "rewards/margins": 1.830592155456543, "rewards/rejected": -2.3715708255767822, "step": 7959 }, { "epoch": 0.93, "learning_rate": 2.2534545884020312e-08, "logits/chosen": -1.743109941482544, "logits/rejected": -1.922055959701538, "logps/chosen": -283.1545104980469, "logps/rejected": -294.14691162109375, "loss": 0.8222, "rewards/accuracies": 0.75, "rewards/chosen": -0.8095879554748535, "rewards/margins": 2.9660778045654297, "rewards/rejected": -3.775665760040283, "step": 7960 }, { "epoch": 0.93, "learning_rate": 2.249911420810204e-08, "logits/chosen": -2.4052834510803223, "logits/rejected": -2.3537933826446533, "logps/chosen": -350.47259521484375, "logps/rejected": -266.0898742675781, "loss": 0.4271, "rewards/accuracies": 0.75, "rewards/chosen": -1.1431539058685303, "rewards/margins": 1.0197665691375732, "rewards/rejected": -2.1629204750061035, "step": 7961 }, { "epoch": 0.93, "learning_rate": 2.2463682532183773e-08, "logits/chosen": -2.596705675125122, "logits/rejected": -2.5372259616851807, "logps/chosen": -348.93829345703125, "logps/rejected": -418.7188415527344, "loss": 0.2252, "rewards/accuracies": 1.0, "rewards/chosen": -0.8061137795448303, "rewards/margins": 1.731471061706543, "rewards/rejected": -2.5375847816467285, "step": 7962 }, { "epoch": 0.93, "learning_rate": 2.2428250856265498e-08, "logits/chosen": -2.0659141540527344, "logits/rejected": -2.0213091373443604, "logps/chosen": -249.2177276611328, "logps/rejected": -263.25885009765625, "loss": 0.4614, "rewards/accuracies": 0.875, "rewards/chosen": -1.7510859966278076, "rewards/margins": 2.186274290084839, "rewards/rejected": -3.9373602867126465, "step": 7963 }, { "epoch": 0.93, "learning_rate": 2.239281918034723e-08, "logits/chosen": -2.4914402961730957, "logits/rejected": -2.4141383171081543, "logps/chosen": -208.51651000976562, "logps/rejected": -238.66949462890625, "loss": 0.3218, "rewards/accuracies": 0.75, "rewards/chosen": -0.8023133277893066, "rewards/margins": 2.325784206390381, "rewards/rejected": -3.1280975341796875, "step": 7964 }, { "epoch": 0.93, "learning_rate": 2.235738750442896e-08, "logits/chosen": -2.380258560180664, "logits/rejected": -2.4839227199554443, "logps/chosen": -253.82901000976562, "logps/rejected": -200.03713989257812, "loss": 0.4398, "rewards/accuracies": 0.875, "rewards/chosen": -0.15005724132061005, "rewards/margins": 2.073590040206909, "rewards/rejected": -2.223647117614746, "step": 7965 }, { "epoch": 0.93, "learning_rate": 2.2321955828510684e-08, "logits/chosen": -2.410712957382202, "logits/rejected": -2.053783416748047, "logps/chosen": -146.37596130371094, "logps/rejected": -260.8443298339844, "loss": 0.8249, "rewards/accuracies": 0.75, "rewards/chosen": -0.6185486316680908, "rewards/margins": 1.801001787185669, "rewards/rejected": -2.419550657272339, "step": 7966 }, { "epoch": 0.93, "learning_rate": 2.2286524152592416e-08, "logits/chosen": -2.784095287322998, "logits/rejected": -2.690244674682617, "logps/chosen": -246.8460693359375, "logps/rejected": -288.37200927734375, "loss": 0.3269, "rewards/accuracies": 0.75, "rewards/chosen": -1.0475585460662842, "rewards/margins": 1.824329137802124, "rewards/rejected": -2.871887683868408, "step": 7967 }, { "epoch": 0.93, "learning_rate": 2.2251092476674145e-08, "logits/chosen": -2.003535509109497, "logits/rejected": -1.7436556816101074, "logps/chosen": -365.09381103515625, "logps/rejected": -479.1763916015625, "loss": 0.0827, "rewards/accuracies": 1.0, "rewards/chosen": -0.37955570220947266, "rewards/margins": 4.792245388031006, "rewards/rejected": -5.17180061340332, "step": 7968 }, { "epoch": 0.93, "learning_rate": 2.2215660800755874e-08, "logits/chosen": -2.5739240646362305, "logits/rejected": -2.5221571922302246, "logps/chosen": -180.19772338867188, "logps/rejected": -221.1044921875, "loss": 0.338, "rewards/accuracies": 0.875, "rewards/chosen": -1.0150916576385498, "rewards/margins": 2.4401440620422363, "rewards/rejected": -3.455235481262207, "step": 7969 }, { "epoch": 0.93, "learning_rate": 2.2180229124837603e-08, "logits/chosen": -2.3289341926574707, "logits/rejected": -2.7526071071624756, "logps/chosen": -307.6087646484375, "logps/rejected": -287.57415771484375, "loss": 0.4436, "rewards/accuracies": 0.875, "rewards/chosen": -0.8485410809516907, "rewards/margins": 1.780888557434082, "rewards/rejected": -2.629429578781128, "step": 7970 }, { "epoch": 0.93, "learning_rate": 2.2144797448919335e-08, "logits/chosen": -2.5578601360321045, "logits/rejected": -2.614640951156616, "logps/chosen": -315.7478332519531, "logps/rejected": -285.97369384765625, "loss": 0.8171, "rewards/accuracies": 0.625, "rewards/chosen": -1.0711640119552612, "rewards/margins": 0.7397758960723877, "rewards/rejected": -1.810939908027649, "step": 7971 }, { "epoch": 0.93, "learning_rate": 2.2109365773001063e-08, "logits/chosen": -2.289844512939453, "logits/rejected": -2.2932121753692627, "logps/chosen": -341.7578125, "logps/rejected": -297.2193603515625, "loss": 0.2132, "rewards/accuracies": 0.875, "rewards/chosen": -0.33973026275634766, "rewards/margins": 3.5787386894226074, "rewards/rejected": -3.918468952178955, "step": 7972 }, { "epoch": 0.93, "learning_rate": 2.207393409708279e-08, "logits/chosen": -2.584620952606201, "logits/rejected": -2.5750203132629395, "logps/chosen": -237.507080078125, "logps/rejected": -319.0863037109375, "loss": 0.1713, "rewards/accuracies": 0.875, "rewards/chosen": -0.39611050486564636, "rewards/margins": 3.1721997261047363, "rewards/rejected": -3.568310022354126, "step": 7973 }, { "epoch": 0.93, "learning_rate": 2.203850242116452e-08, "logits/chosen": -2.3300206661224365, "logits/rejected": -2.408979654312134, "logps/chosen": -359.6336669921875, "logps/rejected": -347.79632568359375, "loss": 0.2009, "rewards/accuracies": 1.0, "rewards/chosen": -0.211295023560524, "rewards/margins": 3.5260958671569824, "rewards/rejected": -3.7373909950256348, "step": 7974 }, { "epoch": 0.93, "learning_rate": 2.200307074524625e-08, "logits/chosen": -2.739744186401367, "logits/rejected": -2.729660987854004, "logps/chosen": -265.20550537109375, "logps/rejected": -289.67852783203125, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/chosen": -0.16416095197200775, "rewards/margins": 4.302430152893066, "rewards/rejected": -4.466590881347656, "step": 7975 }, { "epoch": 0.93, "learning_rate": 2.196763906932798e-08, "logits/chosen": -2.1292455196380615, "logits/rejected": -2.473879337310791, "logps/chosen": -342.6792907714844, "logps/rejected": -270.7359313964844, "loss": 0.27, "rewards/accuracies": 0.75, "rewards/chosen": -1.1868382692337036, "rewards/margins": 2.839244842529297, "rewards/rejected": -4.026083469390869, "step": 7976 }, { "epoch": 0.93, "learning_rate": 2.1932207393409707e-08, "logits/chosen": -2.653928279876709, "logits/rejected": -2.7606658935546875, "logps/chosen": -372.07000732421875, "logps/rejected": -214.67076110839844, "loss": 0.1586, "rewards/accuracies": 0.875, "rewards/chosen": -0.451236367225647, "rewards/margins": 2.9641218185424805, "rewards/rejected": -3.415358543395996, "step": 7977 }, { "epoch": 0.93, "learning_rate": 2.189677571749144e-08, "logits/chosen": -2.7326440811157227, "logits/rejected": -2.905953884124756, "logps/chosen": -403.1810302734375, "logps/rejected": -224.84967041015625, "loss": 0.375, "rewards/accuracies": 0.875, "rewards/chosen": -1.2886511087417603, "rewards/margins": 1.4819252490997314, "rewards/rejected": -2.770576238632202, "step": 7978 }, { "epoch": 0.93, "learning_rate": 2.1861344041573165e-08, "logits/chosen": -2.035466194152832, "logits/rejected": -2.008164644241333, "logps/chosen": -304.0318908691406, "logps/rejected": -288.59332275390625, "loss": 0.7338, "rewards/accuracies": 0.75, "rewards/chosen": -1.893661618232727, "rewards/margins": 1.662966012954712, "rewards/rejected": -3.5566277503967285, "step": 7979 }, { "epoch": 0.93, "learning_rate": 2.1825912365654893e-08, "logits/chosen": -2.045381546020508, "logits/rejected": -2.4216904640197754, "logps/chosen": -414.32611083984375, "logps/rejected": -316.7781066894531, "loss": 0.3726, "rewards/accuracies": 0.875, "rewards/chosen": -0.05281589925289154, "rewards/margins": 1.7778620719909668, "rewards/rejected": -1.83067786693573, "step": 7980 }, { "epoch": 0.93, "learning_rate": 2.1790480689736625e-08, "logits/chosen": -2.1544790267944336, "logits/rejected": -2.2158472537994385, "logps/chosen": -194.3736114501953, "logps/rejected": -240.66116333007812, "loss": 1.3612, "rewards/accuracies": 0.375, "rewards/chosen": -2.400655746459961, "rewards/margins": -0.1296863555908203, "rewards/rejected": -2.2709696292877197, "step": 7981 }, { "epoch": 0.93, "learning_rate": 2.175504901381835e-08, "logits/chosen": -2.287816286087036, "logits/rejected": -2.358912706375122, "logps/chosen": -128.73123168945312, "logps/rejected": -283.630615234375, "loss": 0.7669, "rewards/accuracies": 0.625, "rewards/chosen": -1.3471447229385376, "rewards/margins": 1.949524164199829, "rewards/rejected": -3.296668767929077, "step": 7982 }, { "epoch": 0.93, "learning_rate": 2.1719617337900083e-08, "logits/chosen": -1.987858533859253, "logits/rejected": -2.1607489585876465, "logps/chosen": -418.91943359375, "logps/rejected": -358.042236328125, "loss": 0.1028, "rewards/accuracies": 1.0, "rewards/chosen": -0.08607292175292969, "rewards/margins": 3.7297887802124023, "rewards/rejected": -3.815861940383911, "step": 7983 }, { "epoch": 0.93, "learning_rate": 2.1684185661981812e-08, "logits/chosen": -2.2343132495880127, "logits/rejected": -2.245279550552368, "logps/chosen": -209.7129669189453, "logps/rejected": -276.41668701171875, "loss": 0.342, "rewards/accuracies": 0.875, "rewards/chosen": -0.8181359767913818, "rewards/margins": 2.3515584468841553, "rewards/rejected": -3.169694423675537, "step": 7984 }, { "epoch": 0.93, "learning_rate": 2.1648753986063537e-08, "logits/chosen": -2.4495882987976074, "logits/rejected": -2.008030652999878, "logps/chosen": -117.72089385986328, "logps/rejected": -338.3353576660156, "loss": 0.226, "rewards/accuracies": 0.875, "rewards/chosen": -0.9642826914787292, "rewards/margins": 3.2623400688171387, "rewards/rejected": -4.226623058319092, "step": 7985 }, { "epoch": 0.93, "learning_rate": 2.161332231014527e-08, "logits/chosen": -2.6385602951049805, "logits/rejected": -2.756490707397461, "logps/chosen": -356.8431091308594, "logps/rejected": -355.6638488769531, "loss": 0.2289, "rewards/accuracies": 1.0, "rewards/chosen": -0.7990865707397461, "rewards/margins": 2.8925833702087402, "rewards/rejected": -3.6916699409484863, "step": 7986 }, { "epoch": 0.93, "learning_rate": 2.1577890634226998e-08, "logits/chosen": -2.134584426879883, "logits/rejected": -2.2066569328308105, "logps/chosen": -280.8466796875, "logps/rejected": -295.53448486328125, "loss": 0.4146, "rewards/accuracies": 0.75, "rewards/chosen": -0.4595000743865967, "rewards/margins": 1.5076870918273926, "rewards/rejected": -1.9671871662139893, "step": 7987 }, { "epoch": 0.93, "learning_rate": 2.154245895830873e-08, "logits/chosen": -2.2359235286712646, "logits/rejected": -2.0676114559173584, "logps/chosen": -253.96841430664062, "logps/rejected": -346.63800048828125, "loss": 0.7587, "rewards/accuracies": 0.5, "rewards/chosen": -1.229308843612671, "rewards/margins": 1.6873306035995483, "rewards/rejected": -2.9166393280029297, "step": 7988 }, { "epoch": 0.93, "learning_rate": 2.1507027282390455e-08, "logits/chosen": -2.272461414337158, "logits/rejected": -2.136066198348999, "logps/chosen": -338.4999084472656, "logps/rejected": -309.7479248046875, "loss": 0.3871, "rewards/accuracies": 0.75, "rewards/chosen": -0.1676366627216339, "rewards/margins": 1.760820746421814, "rewards/rejected": -1.928457498550415, "step": 7989 }, { "epoch": 0.93, "learning_rate": 2.1471595606472184e-08, "logits/chosen": -2.375124216079712, "logits/rejected": -2.458726406097412, "logps/chosen": -241.36647033691406, "logps/rejected": -171.52743530273438, "loss": 0.4824, "rewards/accuracies": 0.625, "rewards/chosen": 0.031467944383621216, "rewards/margins": 1.833339810371399, "rewards/rejected": -1.8018720149993896, "step": 7990 }, { "epoch": 0.93, "learning_rate": 2.1436163930553916e-08, "logits/chosen": -2.494670867919922, "logits/rejected": -2.6911003589630127, "logps/chosen": -283.97003173828125, "logps/rejected": -270.91180419921875, "loss": 0.32, "rewards/accuracies": 0.875, "rewards/chosen": -0.31934550404548645, "rewards/margins": 1.9394376277923584, "rewards/rejected": -2.2587833404541016, "step": 7991 }, { "epoch": 0.93, "learning_rate": 2.140073225463564e-08, "logits/chosen": -2.3459160327911377, "logits/rejected": -2.311495304107666, "logps/chosen": -181.8314208984375, "logps/rejected": -226.3282470703125, "loss": 0.2661, "rewards/accuracies": 0.875, "rewards/chosen": -0.8728684782981873, "rewards/margins": 2.0685901641845703, "rewards/rejected": -2.9414589405059814, "step": 7992 }, { "epoch": 0.93, "learning_rate": 2.1365300578717374e-08, "logits/chosen": -2.6651415824890137, "logits/rejected": -2.580374002456665, "logps/chosen": -150.93251037597656, "logps/rejected": -222.8038330078125, "loss": 0.2834, "rewards/accuracies": 0.875, "rewards/chosen": -1.236695408821106, "rewards/margins": 2.008108139038086, "rewards/rejected": -3.2448036670684814, "step": 7993 }, { "epoch": 0.93, "learning_rate": 2.1329868902799102e-08, "logits/chosen": -2.3615214824676514, "logits/rejected": -2.3639745712280273, "logps/chosen": -149.2554931640625, "logps/rejected": -181.45912170410156, "loss": 0.1021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7607493996620178, "rewards/margins": 3.8407835960388184, "rewards/rejected": -4.601532936096191, "step": 7994 }, { "epoch": 0.93, "learning_rate": 2.1294437226880828e-08, "logits/chosen": -2.4990766048431396, "logits/rejected": -2.559098958969116, "logps/chosen": -272.220947265625, "logps/rejected": -212.87466430664062, "loss": 0.5706, "rewards/accuracies": 0.75, "rewards/chosen": -0.9893308877944946, "rewards/margins": 1.2760288715362549, "rewards/rejected": -2.26535964012146, "step": 7995 }, { "epoch": 0.93, "learning_rate": 2.125900555096256e-08, "logits/chosen": -2.244805335998535, "logits/rejected": -1.9662052392959595, "logps/chosen": -243.71119689941406, "logps/rejected": -303.9477844238281, "loss": 0.3036, "rewards/accuracies": 0.875, "rewards/chosen": -1.2401723861694336, "rewards/margins": 2.5939159393310547, "rewards/rejected": -3.8340883255004883, "step": 7996 }, { "epoch": 0.93, "learning_rate": 2.122357387504429e-08, "logits/chosen": -2.760526180267334, "logits/rejected": -2.869185447692871, "logps/chosen": -172.59080505371094, "logps/rejected": -319.48590087890625, "loss": 0.2396, "rewards/accuracies": 1.0, "rewards/chosen": -1.2546253204345703, "rewards/margins": 4.089526176452637, "rewards/rejected": -5.344151496887207, "step": 7997 }, { "epoch": 0.93, "learning_rate": 2.1188142199126017e-08, "logits/chosen": -2.648529529571533, "logits/rejected": -2.6099541187286377, "logps/chosen": -201.4505615234375, "logps/rejected": -146.0668182373047, "loss": 0.3948, "rewards/accuracies": 0.875, "rewards/chosen": -1.1993567943572998, "rewards/margins": 1.903824806213379, "rewards/rejected": -3.1031813621520996, "step": 7998 }, { "epoch": 0.93, "learning_rate": 2.1152710523207746e-08, "logits/chosen": -2.3633294105529785, "logits/rejected": -2.564870834350586, "logps/chosen": -248.1110076904297, "logps/rejected": -243.41659545898438, "loss": 0.7302, "rewards/accuracies": 0.5, "rewards/chosen": -1.6178865432739258, "rewards/margins": 1.0867955684661865, "rewards/rejected": -2.7046823501586914, "step": 7999 }, { "epoch": 0.93, "learning_rate": 2.1117278847289478e-08, "logits/chosen": -1.9721966981887817, "logits/rejected": -2.3795511722564697, "logps/chosen": -238.2112579345703, "logps/rejected": -158.1881103515625, "loss": 0.3897, "rewards/accuracies": 0.75, "rewards/chosen": -0.844830334186554, "rewards/margins": 2.080979824066162, "rewards/rejected": -2.9258103370666504, "step": 8000 }, { "epoch": 0.93, "eval_logits/chosen": -1.7372090816497803, "eval_logits/rejected": -1.7367677688598633, "eval_logps/chosen": -278.76385498046875, "eval_logps/rejected": -279.25750732421875, "eval_loss": 0.3668655753135681, "eval_rewards/accuracies": 0.8491379022598267, "eval_rewards/chosen": -0.6598379015922546, "eval_rewards/margins": 2.1652562618255615, "eval_rewards/rejected": -2.825094223022461, "eval_runtime": 237.2001, "eval_samples_per_second": 2.93, "eval_steps_per_second": 1.467, "step": 8000 } ], "logging_steps": 1, "max_steps": 8596, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }