{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 1455, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.36986301369863e-08, "logits/chosen": -2.8295512199401855, "logits/rejected": -2.9639337062835693, "logps/chosen": -242.64569091796875, "logps/rejected": -75.87144470214844, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.02, "learning_rate": 1.36986301369863e-07, "logits/chosen": -2.778250217437744, "logits/rejected": -2.813397169113159, "logps/chosen": -292.17218017578125, "logps/rejected": -78.88499450683594, "loss": 0.6942, "rewards/accuracies": 0.4097222089767456, "rewards/chosen": -0.001105638686567545, "rewards/margins": -0.0026314095593988895, "rewards/rejected": 0.0015257701743394136, "step": 10 }, { "epoch": 0.04, "learning_rate": 2.73972602739726e-07, "logits/chosen": -2.76747465133667, "logits/rejected": -2.7585418224334717, "logps/chosen": -300.1101989746094, "logps/rejected": -81.14244079589844, "loss": 0.6932, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.005251293070614338, "rewards/margins": -0.003377618733793497, "rewards/rejected": -0.001873674220405519, "step": 20 }, { "epoch": 0.06, "learning_rate": 4.10958904109589e-07, "logits/chosen": -2.776379108428955, "logits/rejected": -2.7856156826019287, "logps/chosen": -290.00897216796875, "logps/rejected": -78.96559143066406, "loss": 0.6885, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.005190015770494938, "rewards/margins": 0.011348642408847809, "rewards/rejected": -0.006158626172691584, "step": 30 }, { "epoch": 0.08, "learning_rate": 5.47945205479452e-07, "logits/chosen": -2.7960293292999268, "logits/rejected": -2.814054250717163, "logps/chosen": -245.3889617919922, "logps/rejected": -84.38166046142578, "loss": 0.6823, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.008710930123925209, "rewards/margins": 0.027255941182374954, "rewards/rejected": -0.018545005470514297, "step": 40 }, { "epoch": 0.1, "learning_rate": 6.84931506849315e-07, "logits/chosen": -2.7724318504333496, "logits/rejected": -2.8249223232269287, "logps/chosen": -252.29598999023438, "logps/rejected": -80.92951965332031, "loss": 0.6751, "rewards/accuracies": 0.731249988079071, "rewards/chosen": 0.00529166916385293, "rewards/margins": 0.03346817195415497, "rewards/rejected": -0.028176506981253624, "step": 50 }, { "epoch": 0.12, "learning_rate": 8.21917808219178e-07, "logits/chosen": -2.8436636924743652, "logits/rejected": -2.7858223915100098, "logps/chosen": -281.6982421875, "logps/rejected": -78.57672119140625, "loss": 0.6656, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": 0.0231462512165308, "rewards/margins": 0.06612209975719452, "rewards/rejected": -0.04297585040330887, "step": 60 }, { "epoch": 0.14, "learning_rate": 9.58904109589041e-07, "logits/chosen": -2.8172953128814697, "logits/rejected": -2.813781261444092, "logps/chosen": -264.85235595703125, "logps/rejected": -74.86351013183594, "loss": 0.6541, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.026092741638422012, "rewards/margins": 0.0869915708899498, "rewards/rejected": -0.060898829251527786, "step": 70 }, { "epoch": 0.16, "learning_rate": 1.095890410958904e-06, "logits/chosen": -2.8075528144836426, "logits/rejected": -2.8162968158721924, "logps/chosen": -255.69869995117188, "logps/rejected": -73.08162689208984, "loss": 0.6355, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.0321514829993248, "rewards/margins": 0.11635198444128036, "rewards/rejected": -0.08420050889253616, "step": 80 }, { "epoch": 0.19, "learning_rate": 1.232876712328767e-06, "logits/chosen": -2.841773748397827, "logits/rejected": -2.825883388519287, "logps/chosen": -267.5022888183594, "logps/rejected": -85.21376037597656, "loss": 0.6119, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.030611341819167137, "rewards/margins": 0.16599974036216736, "rewards/rejected": -0.13538840413093567, "step": 90 }, { "epoch": 0.21, "learning_rate": 1.36986301369863e-06, "logits/chosen": -2.852410316467285, "logits/rejected": -2.8519997596740723, "logps/chosen": -286.9059143066406, "logps/rejected": -82.15430450439453, "loss": 0.5808, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": 0.057482797652482986, "rewards/margins": 0.26051220297813416, "rewards/rejected": -0.20302939414978027, "step": 100 }, { "epoch": 0.23, "learning_rate": 1.5068493150684932e-06, "logits/chosen": -2.7674930095672607, "logits/rejected": -2.7689127922058105, "logps/chosen": -277.5893249511719, "logps/rejected": -84.25486755371094, "loss": 0.5528, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.05910978466272354, "rewards/margins": 0.3146067261695862, "rewards/rejected": -0.25549691915512085, "step": 110 }, { "epoch": 0.25, "learning_rate": 1.643835616438356e-06, "logits/chosen": -2.7497572898864746, "logits/rejected": -2.808637857437134, "logps/chosen": -277.6904602050781, "logps/rejected": -82.65525817871094, "loss": 0.5245, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.08153042942285538, "rewards/margins": 0.4226166605949402, "rewards/rejected": -0.3410862386226654, "step": 120 }, { "epoch": 0.27, "learning_rate": 1.780821917808219e-06, "logits/chosen": -2.765390634536743, "logits/rejected": -2.7765676975250244, "logps/chosen": -284.7118835449219, "logps/rejected": -90.37391662597656, "loss": 0.5057, "rewards/accuracies": 0.9375, "rewards/chosen": 0.1007312536239624, "rewards/margins": 0.49337419867515564, "rewards/rejected": -0.39264291524887085, "step": 130 }, { "epoch": 0.29, "learning_rate": 1.917808219178082e-06, "logits/chosen": -2.8435187339782715, "logits/rejected": -2.848928213119507, "logps/chosen": -293.25323486328125, "logps/rejected": -88.73191833496094, "loss": 0.4691, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.09035232663154602, "rewards/margins": 0.6013891696929932, "rewards/rejected": -0.5110368132591248, "step": 140 }, { "epoch": 0.31, "learning_rate": 1.9938884644766997e-06, "logits/chosen": -2.788867235183716, "logits/rejected": -2.797130584716797, "logps/chosen": -290.54638671875, "logps/rejected": -82.95954895019531, "loss": 0.4398, "rewards/accuracies": 0.96875, "rewards/chosen": 0.09947594255208969, "rewards/margins": 0.6646188497543335, "rewards/rejected": -0.5651428699493408, "step": 150 }, { "epoch": 0.33, "learning_rate": 1.978609625668449e-06, "logits/chosen": -2.809487819671631, "logits/rejected": -2.8850138187408447, "logps/chosen": -271.88629150390625, "logps/rejected": -83.17223358154297, "loss": 0.4204, "rewards/accuracies": 0.96875, "rewards/chosen": 0.07071445882320404, "rewards/margins": 0.7117933034896851, "rewards/rejected": -0.6410789489746094, "step": 160 }, { "epoch": 0.35, "learning_rate": 1.9633307868601984e-06, "logits/chosen": -2.7891199588775635, "logits/rejected": -2.789405584335327, "logps/chosen": -288.9054870605469, "logps/rejected": -86.50032043457031, "loss": 0.4053, "rewards/accuracies": 0.96875, "rewards/chosen": 0.13311851024627686, "rewards/margins": 0.861064076423645, "rewards/rejected": -0.7279455065727234, "step": 170 }, { "epoch": 0.37, "learning_rate": 1.948051948051948e-06, "logits/chosen": -2.760305643081665, "logits/rejected": -2.7756900787353516, "logps/chosen": -271.7988586425781, "logps/rejected": -88.26414489746094, "loss": 0.3764, "rewards/accuracies": 0.981249988079071, "rewards/chosen": 0.1391439437866211, "rewards/margins": 0.9942502975463867, "rewards/rejected": -0.8551063537597656, "step": 180 }, { "epoch": 0.39, "learning_rate": 1.9327731092436974e-06, "logits/chosen": -2.7618346214294434, "logits/rejected": -2.7937939167022705, "logps/chosen": -274.2486877441406, "logps/rejected": -89.63475799560547, "loss": 0.3633, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.06488532572984695, "rewards/margins": 0.9964747428894043, "rewards/rejected": -0.9315894246101379, "step": 190 }, { "epoch": 0.41, "learning_rate": 1.917494270435447e-06, "logits/chosen": -2.834862232208252, "logits/rejected": -2.8097951412200928, "logps/chosen": -267.5804748535156, "logps/rejected": -88.04959869384766, "loss": 0.3405, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": 0.11280632019042969, "rewards/margins": 1.167088508605957, "rewards/rejected": -1.054282307624817, "step": 200 }, { "epoch": 0.43, "learning_rate": 1.9022154316271963e-06, "logits/chosen": -2.789674997329712, "logits/rejected": -2.7489635944366455, "logps/chosen": -271.354736328125, "logps/rejected": -101.02803039550781, "loss": 0.3406, "rewards/accuracies": 0.96875, "rewards/chosen": 0.08271731436252594, "rewards/margins": 1.1206178665161133, "rewards/rejected": -1.037900447845459, "step": 210 }, { "epoch": 0.45, "learning_rate": 1.8869365928189456e-06, "logits/chosen": -2.8190276622772217, "logits/rejected": -2.7801876068115234, "logps/chosen": -252.3684844970703, "logps/rejected": -83.58897399902344, "loss": 0.3366, "rewards/accuracies": 0.981249988079071, "rewards/chosen": 0.047989681363105774, "rewards/margins": 1.1506556272506714, "rewards/rejected": -1.102665901184082, "step": 220 }, { "epoch": 0.47, "learning_rate": 1.8716577540106951e-06, "logits/chosen": -2.7455031871795654, "logits/rejected": -2.7612051963806152, "logps/chosen": -296.0608825683594, "logps/rejected": -85.1841049194336, "loss": 0.3066, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.12809792160987854, "rewards/margins": 1.3899600505828857, "rewards/rejected": -1.2618623971939087, "step": 230 }, { "epoch": 0.49, "learning_rate": 1.8563789152024445e-06, "logits/chosen": -2.775570869445801, "logits/rejected": -2.819958209991455, "logps/chosen": -282.4068908691406, "logps/rejected": -89.96455383300781, "loss": 0.3055, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.09846373647451401, "rewards/margins": 1.4890722036361694, "rewards/rejected": -1.390608549118042, "step": 240 }, { "epoch": 0.52, "learning_rate": 1.841100076394194e-06, "logits/chosen": -2.799436092376709, "logits/rejected": -2.8345470428466797, "logps/chosen": -265.59735107421875, "logps/rejected": -96.5230941772461, "loss": 0.2986, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.011983467265963554, "rewards/margins": 1.4505140781402588, "rewards/rejected": -1.462497591972351, "step": 250 }, { "epoch": 0.54, "learning_rate": 1.8258212375859433e-06, "logits/chosen": -2.767268419265747, "logits/rejected": -2.7291500568389893, "logps/chosen": -284.69512939453125, "logps/rejected": -84.67759704589844, "loss": 0.2975, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": 0.13364484906196594, "rewards/margins": 1.5072095394134521, "rewards/rejected": -1.373564600944519, "step": 260 }, { "epoch": 0.56, "learning_rate": 1.8105423987776928e-06, "logits/chosen": -2.740736961364746, "logits/rejected": -2.7466208934783936, "logps/chosen": -276.2334899902344, "logps/rejected": -102.246826171875, "loss": 0.271, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.018421288579702377, "rewards/margins": 1.600907325744629, "rewards/rejected": -1.5824859142303467, "step": 270 }, { "epoch": 0.58, "learning_rate": 1.7952635599694424e-06, "logits/chosen": -2.763476610183716, "logits/rejected": -2.826669216156006, "logps/chosen": -256.71234130859375, "logps/rejected": -84.85465240478516, "loss": 0.2796, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.021867703646421432, "rewards/margins": 1.4527885913848877, "rewards/rejected": -1.474656343460083, "step": 280 }, { "epoch": 0.6, "learning_rate": 1.7799847211611915e-06, "logits/chosen": -2.778783082962036, "logits/rejected": -2.7868287563323975, "logps/chosen": -278.37847900390625, "logps/rejected": -101.5394287109375, "loss": 0.2617, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.06040378659963608, "rewards/margins": 1.8995882272720337, "rewards/rejected": -1.8391841650009155, "step": 290 }, { "epoch": 0.62, "learning_rate": 1.764705882352941e-06, "logits/chosen": -2.727238893508911, "logits/rejected": -2.723588466644287, "logps/chosen": -275.4689025878906, "logps/rejected": -93.48865509033203, "loss": 0.2612, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.02573985978960991, "rewards/margins": 1.606034517288208, "rewards/rejected": -1.5802944898605347, "step": 300 }, { "epoch": 0.64, "learning_rate": 1.7494270435446906e-06, "logits/chosen": -2.8035991191864014, "logits/rejected": -2.8192009925842285, "logps/chosen": -284.1933898925781, "logps/rejected": -100.13687896728516, "loss": 0.2521, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.07830613851547241, "rewards/margins": 1.9092552661895752, "rewards/rejected": -1.9875609874725342, "step": 310 }, { "epoch": 0.66, "learning_rate": 1.7341482047364399e-06, "logits/chosen": -2.8154501914978027, "logits/rejected": -2.810351848602295, "logps/chosen": -274.22906494140625, "logps/rejected": -105.29779052734375, "loss": 0.2418, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.09999797493219376, "rewards/margins": 1.8004556894302368, "rewards/rejected": -1.9004535675048828, "step": 320 }, { "epoch": 0.68, "learning_rate": 1.7188693659281894e-06, "logits/chosen": -2.7782671451568604, "logits/rejected": -2.7969181537628174, "logps/chosen": -253.5823516845703, "logps/rejected": -91.79356384277344, "loss": 0.238, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.16337540745735168, "rewards/margins": 1.8200305700302124, "rewards/rejected": -1.9834058284759521, "step": 330 }, { "epoch": 0.7, "learning_rate": 1.703590527119939e-06, "logits/chosen": -2.7838528156280518, "logits/rejected": -2.7845559120178223, "logps/chosen": -276.6927795410156, "logps/rejected": -105.1791763305664, "loss": 0.2408, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.11859698593616486, "rewards/margins": 1.980875015258789, "rewards/rejected": -2.0994718074798584, "step": 340 }, { "epoch": 0.72, "learning_rate": 1.688311688311688e-06, "logits/chosen": -2.8013651371002197, "logits/rejected": -2.7973175048828125, "logps/chosen": -280.02337646484375, "logps/rejected": -108.97808837890625, "loss": 0.2366, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.17565980553627014, "rewards/margins": 2.0567195415496826, "rewards/rejected": -2.23237943649292, "step": 350 }, { "epoch": 0.74, "learning_rate": 1.6730328495034376e-06, "logits/chosen": -2.7511441707611084, "logits/rejected": -2.79335355758667, "logps/chosen": -295.89630126953125, "logps/rejected": -102.7479476928711, "loss": 0.2329, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.09856131672859192, "rewards/margins": 2.122011184692383, "rewards/rejected": -2.2205727100372314, "step": 360 }, { "epoch": 0.76, "learning_rate": 1.6577540106951871e-06, "logits/chosen": -2.7538399696350098, "logits/rejected": -2.7933411598205566, "logps/chosen": -278.8785705566406, "logps/rejected": -95.23030090332031, "loss": 0.2309, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.1667167693376541, "rewards/margins": 2.049661159515381, "rewards/rejected": -2.2163777351379395, "step": 370 }, { "epoch": 0.78, "learning_rate": 1.6424751718869364e-06, "logits/chosen": -2.7607922554016113, "logits/rejected": -2.763124942779541, "logps/chosen": -270.15484619140625, "logps/rejected": -103.03666687011719, "loss": 0.2252, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.10847791284322739, "rewards/margins": 2.2359797954559326, "rewards/rejected": -2.3444576263427734, "step": 380 }, { "epoch": 0.8, "learning_rate": 1.627196333078686e-06, "logits/chosen": -2.7692110538482666, "logits/rejected": -2.7727513313293457, "logps/chosen": -284.1410217285156, "logps/rejected": -103.25160217285156, "loss": 0.2183, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.07571631669998169, "rewards/margins": 2.3671867847442627, "rewards/rejected": -2.4429030418395996, "step": 390 }, { "epoch": 0.82, "learning_rate": 1.6119174942704355e-06, "logits/chosen": -2.713252544403076, "logits/rejected": -2.695067882537842, "logps/chosen": -246.3933563232422, "logps/rejected": -98.11837768554688, "loss": 0.2068, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.18476447463035583, "rewards/margins": 2.0507025718688965, "rewards/rejected": -2.2354671955108643, "step": 400 }, { "epoch": 0.85, "learning_rate": 1.5966386554621848e-06, "logits/chosen": -2.788705348968506, "logits/rejected": -2.7501461505889893, "logps/chosen": -268.0991516113281, "logps/rejected": -102.09769439697266, "loss": 0.1976, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.12583544850349426, "rewards/margins": 2.4059669971466064, "rewards/rejected": -2.5318026542663574, "step": 410 }, { "epoch": 0.87, "learning_rate": 1.5813598166539341e-06, "logits/chosen": -2.728642702102661, "logits/rejected": -2.7183501720428467, "logps/chosen": -277.0406188964844, "logps/rejected": -108.7503890991211, "loss": 0.2093, "rewards/accuracies": 0.96875, "rewards/chosen": -0.2682625353336334, "rewards/margins": 2.486672878265381, "rewards/rejected": -2.7549355030059814, "step": 420 }, { "epoch": 0.89, "learning_rate": 1.5660809778456837e-06, "logits/chosen": -2.755733013153076, "logits/rejected": -2.794435977935791, "logps/chosen": -267.26983642578125, "logps/rejected": -103.68330383300781, "loss": 0.1918, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.2670840322971344, "rewards/margins": 2.529799461364746, "rewards/rejected": -2.7968833446502686, "step": 430 }, { "epoch": 0.91, "learning_rate": 1.550802139037433e-06, "logits/chosen": -2.722480058670044, "logits/rejected": -2.7511894702911377, "logps/chosen": -271.193359375, "logps/rejected": -101.18013000488281, "loss": 0.1901, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.22587260603904724, "rewards/margins": 2.3586864471435547, "rewards/rejected": -2.584559202194214, "step": 440 }, { "epoch": 0.93, "learning_rate": 1.5355233002291825e-06, "logits/chosen": -2.724370241165161, "logits/rejected": -2.754976511001587, "logps/chosen": -300.2513122558594, "logps/rejected": -107.4105224609375, "loss": 0.2001, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.2436836212873459, "rewards/margins": 2.5888543128967285, "rewards/rejected": -2.83253812789917, "step": 450 }, { "epoch": 0.95, "learning_rate": 1.520244461420932e-06, "logits/chosen": -2.7703804969787598, "logits/rejected": -2.743251323699951, "logps/chosen": -304.1031799316406, "logps/rejected": -113.75921630859375, "loss": 0.2065, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.15126582980155945, "rewards/margins": 2.703249931335449, "rewards/rejected": -2.854515552520752, "step": 460 }, { "epoch": 0.97, "learning_rate": 1.5049656226126814e-06, "logits/chosen": -2.7441253662109375, "logits/rejected": -2.7229297161102295, "logps/chosen": -240.0807342529297, "logps/rejected": -100.44654846191406, "loss": 0.2019, "rewards/accuracies": 0.96875, "rewards/chosen": -0.32086318731307983, "rewards/margins": 2.4921185970306396, "rewards/rejected": -2.8129820823669434, "step": 470 }, { "epoch": 0.99, "learning_rate": 1.4896867838044307e-06, "logits/chosen": -2.704709529876709, "logits/rejected": -2.7237319946289062, "logps/chosen": -255.7570343017578, "logps/rejected": -113.80975341796875, "loss": 0.2024, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.40018701553344727, "rewards/margins": 2.4386959075927734, "rewards/rejected": -2.8388831615448, "step": 480 }, { "epoch": 1.0, "eval_logits/chosen": -2.7680206298828125, "eval_logits/rejected": -2.7271738052368164, "eval_logps/chosen": -277.8106689453125, "eval_logps/rejected": -191.96604919433594, "eval_loss": 0.4197174608707428, "eval_rewards/accuracies": 0.80859375, "eval_rewards/chosen": -0.3973674178123474, "eval_rewards/margins": 1.4956284761428833, "eval_rewards/rejected": -1.892995834350586, "eval_runtime": 259.9183, "eval_samples_per_second": 7.695, "eval_steps_per_second": 0.062, "step": 485 }, { "epoch": 1.01, "learning_rate": 1.4744079449961802e-06, "logits/chosen": -2.7513904571533203, "logits/rejected": -2.8016788959503174, "logps/chosen": -279.7330017089844, "logps/rejected": -109.1887435913086, "loss": 0.1934, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.32768136262893677, "rewards/margins": 2.722477436065674, "rewards/rejected": -3.0501585006713867, "step": 490 }, { "epoch": 1.03, "learning_rate": 1.4591291061879296e-06, "logits/chosen": -2.705814838409424, "logits/rejected": -2.6782376766204834, "logps/chosen": -274.86480712890625, "logps/rejected": -113.91358947753906, "loss": 0.1797, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.2557370066642761, "rewards/margins": 2.544098377227783, "rewards/rejected": -2.799835205078125, "step": 500 }, { "epoch": 1.05, "learning_rate": 1.443850267379679e-06, "logits/chosen": -2.7348856925964355, "logits/rejected": -2.7256247997283936, "logps/chosen": -260.6488342285156, "logps/rejected": -98.91087341308594, "loss": 0.1826, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.3437056541442871, "rewards/margins": 2.4448533058166504, "rewards/rejected": -2.7885591983795166, "step": 510 }, { "epoch": 1.07, "learning_rate": 1.4285714285714286e-06, "logits/chosen": -2.799448013305664, "logits/rejected": -2.778778076171875, "logps/chosen": -279.9732360839844, "logps/rejected": -124.17852783203125, "loss": 0.1678, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4281793236732483, "rewards/margins": 2.834944248199463, "rewards/rejected": -3.2631232738494873, "step": 520 }, { "epoch": 1.09, "learning_rate": 1.413292589763178e-06, "logits/chosen": -2.7671663761138916, "logits/rejected": -2.7472527027130127, "logps/chosen": -306.42181396484375, "logps/rejected": -113.77569580078125, "loss": 0.1743, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.31418782472610474, "rewards/margins": 2.8712990283966064, "rewards/rejected": -3.1854867935180664, "step": 530 }, { "epoch": 1.11, "learning_rate": 1.3980137509549275e-06, "logits/chosen": -2.7520015239715576, "logits/rejected": -2.772326946258545, "logps/chosen": -247.4855499267578, "logps/rejected": -101.20677185058594, "loss": 0.1779, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.4306362271308899, "rewards/margins": 2.6422040462493896, "rewards/rejected": -3.072840452194214, "step": 540 }, { "epoch": 1.13, "learning_rate": 1.3827349121466768e-06, "logits/chosen": -2.685715436935425, "logits/rejected": -2.705381155014038, "logps/chosen": -269.1335144042969, "logps/rejected": -111.51566314697266, "loss": 0.1742, "rewards/accuracies": 0.96875, "rewards/chosen": -0.4332028329372406, "rewards/margins": 2.756269693374634, "rewards/rejected": -3.1894726753234863, "step": 550 }, { "epoch": 1.15, "learning_rate": 1.3674560733384261e-06, "logits/chosen": -2.737140417098999, "logits/rejected": -2.7757163047790527, "logps/chosen": -294.99530029296875, "logps/rejected": -112.22420501708984, "loss": 0.163, "rewards/accuracies": 0.96875, "rewards/chosen": -0.3779616951942444, "rewards/margins": 3.002772331237793, "rewards/rejected": -3.3807339668273926, "step": 560 }, { "epoch": 1.18, "learning_rate": 1.3521772345301757e-06, "logits/chosen": -2.765671730041504, "logits/rejected": -2.776446580886841, "logps/chosen": -291.44732666015625, "logps/rejected": -112.7077865600586, "loss": 0.1604, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.1892886459827423, "rewards/margins": 3.2649528980255127, "rewards/rejected": -3.4542412757873535, "step": 570 }, { "epoch": 1.2, "learning_rate": 1.3368983957219252e-06, "logits/chosen": -2.711365222930908, "logits/rejected": -2.761141300201416, "logps/chosen": -246.60989379882812, "logps/rejected": -106.89306640625, "loss": 0.1662, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.5674314498901367, "rewards/margins": 2.7119510173797607, "rewards/rejected": -3.2793827056884766, "step": 580 }, { "epoch": 1.22, "learning_rate": 1.3216195569136745e-06, "logits/chosen": -2.706845283508301, "logits/rejected": -2.7259907722473145, "logps/chosen": -266.33453369140625, "logps/rejected": -109.5294189453125, "loss": 0.1638, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.49850577116012573, "rewards/margins": 2.929001569747925, "rewards/rejected": -3.4275078773498535, "step": 590 }, { "epoch": 1.24, "learning_rate": 1.306340718105424e-06, "logits/chosen": -2.7694671154022217, "logits/rejected": -2.7433857917785645, "logps/chosen": -279.09429931640625, "logps/rejected": -113.45039367675781, "loss": 0.1663, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5474687218666077, "rewards/margins": 2.8283703327178955, "rewards/rejected": -3.3758392333984375, "step": 600 }, { "epoch": 1.26, "learning_rate": 1.2910618792971732e-06, "logits/chosen": -2.724958896636963, "logits/rejected": -2.6607446670532227, "logps/chosen": -268.52862548828125, "logps/rejected": -99.50247955322266, "loss": 0.1663, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.39182907342910767, "rewards/margins": 2.8105075359344482, "rewards/rejected": -3.2023367881774902, "step": 610 }, { "epoch": 1.28, "learning_rate": 1.2757830404889227e-06, "logits/chosen": -2.7382755279541016, "logits/rejected": -2.747498035430908, "logps/chosen": -295.7679748535156, "logps/rejected": -118.82535552978516, "loss": 0.1486, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.37939247488975525, "rewards/margins": 3.1222548484802246, "rewards/rejected": -3.5016472339630127, "step": 620 }, { "epoch": 1.3, "learning_rate": 1.2605042016806722e-06, "logits/chosen": -2.7766032218933105, "logits/rejected": -2.7410993576049805, "logps/chosen": -282.5389099121094, "logps/rejected": -115.2748794555664, "loss": 0.153, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.4390658736228943, "rewards/margins": 3.1960551738739014, "rewards/rejected": -3.6351211071014404, "step": 630 }, { "epoch": 1.32, "learning_rate": 1.2452253628724215e-06, "logits/chosen": -2.7750675678253174, "logits/rejected": -2.781956195831299, "logps/chosen": -299.9912109375, "logps/rejected": -124.12471008300781, "loss": 0.1546, "rewards/accuracies": 0.96875, "rewards/chosen": -0.4881526827812195, "rewards/margins": 3.3200020790100098, "rewards/rejected": -3.808154582977295, "step": 640 }, { "epoch": 1.34, "learning_rate": 1.229946524064171e-06, "logits/chosen": -2.7297987937927246, "logits/rejected": -2.668787956237793, "logps/chosen": -285.3520202636719, "logps/rejected": -114.99342346191406, "loss": 0.1551, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.43964657187461853, "rewards/margins": 3.080749988555908, "rewards/rejected": -3.5203967094421387, "step": 650 }, { "epoch": 1.36, "learning_rate": 1.2146676852559206e-06, "logits/chosen": -2.70336651802063, "logits/rejected": -2.7211241722106934, "logps/chosen": -288.2897644042969, "logps/rejected": -117.46827697753906, "loss": 0.1598, "rewards/accuracies": 0.96875, "rewards/chosen": -0.5533924102783203, "rewards/margins": 3.033695697784424, "rewards/rejected": -3.587088108062744, "step": 660 }, { "epoch": 1.38, "learning_rate": 1.19938884644767e-06, "logits/chosen": -2.7294211387634277, "logits/rejected": -2.776686191558838, "logps/chosen": -279.3514709472656, "logps/rejected": -110.04444885253906, "loss": 0.1681, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.5076431035995483, "rewards/margins": 3.0213823318481445, "rewards/rejected": -3.5290253162384033, "step": 670 }, { "epoch": 1.4, "learning_rate": 1.1841100076394192e-06, "logits/chosen": -2.749238967895508, "logits/rejected": -2.7573282718658447, "logps/chosen": -270.27874755859375, "logps/rejected": -111.33082580566406, "loss": 0.1629, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4736942648887634, "rewards/margins": 3.030879259109497, "rewards/rejected": -3.5045738220214844, "step": 680 }, { "epoch": 1.42, "learning_rate": 1.1688311688311688e-06, "logits/chosen": -2.7193169593811035, "logits/rejected": -2.7363669872283936, "logps/chosen": -299.65753173828125, "logps/rejected": -120.58744812011719, "loss": 0.1436, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.4750828146934509, "rewards/margins": 3.319221019744873, "rewards/rejected": -3.794304370880127, "step": 690 }, { "epoch": 1.44, "learning_rate": 1.153552330022918e-06, "logits/chosen": -2.7201449871063232, "logits/rejected": -2.7599716186523438, "logps/chosen": -285.97869873046875, "logps/rejected": -111.79976654052734, "loss": 0.1637, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.46921300888061523, "rewards/margins": 3.1174368858337402, "rewards/rejected": -3.5866501331329346, "step": 700 }, { "epoch": 1.46, "learning_rate": 1.1382734912146676e-06, "logits/chosen": -2.683954954147339, "logits/rejected": -2.760031223297119, "logps/chosen": -285.8088073730469, "logps/rejected": -114.0859375, "loss": 0.1569, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.4851107597351074, "rewards/margins": 3.097794771194458, "rewards/rejected": -3.5829052925109863, "step": 710 }, { "epoch": 1.48, "learning_rate": 1.1229946524064172e-06, "logits/chosen": -2.695244550704956, "logits/rejected": -2.7127084732055664, "logps/chosen": -258.01812744140625, "logps/rejected": -116.52490234375, "loss": 0.148, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.48574456572532654, "rewards/margins": 3.0761725902557373, "rewards/rejected": -3.5619170665740967, "step": 720 }, { "epoch": 1.51, "learning_rate": 1.1077158135981665e-06, "logits/chosen": -2.6948394775390625, "logits/rejected": -2.7225587368011475, "logps/chosen": -247.1420440673828, "logps/rejected": -108.1529312133789, "loss": 0.1546, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6709108352661133, "rewards/margins": 2.9054205417633057, "rewards/rejected": -3.5763309001922607, "step": 730 }, { "epoch": 1.53, "learning_rate": 1.0924369747899158e-06, "logits/chosen": -2.776315450668335, "logits/rejected": -2.776728868484497, "logps/chosen": -270.6875915527344, "logps/rejected": -114.87939453125, "loss": 0.1503, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6296391487121582, "rewards/margins": 3.1651525497436523, "rewards/rejected": -3.7947916984558105, "step": 740 }, { "epoch": 1.55, "learning_rate": 1.0771581359816653e-06, "logits/chosen": -2.733975887298584, "logits/rejected": -2.6755785942077637, "logps/chosen": -253.8512420654297, "logps/rejected": -123.96826171875, "loss": 0.1483, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5190945267677307, "rewards/margins": 3.2409470081329346, "rewards/rejected": -3.7600414752960205, "step": 750 }, { "epoch": 1.57, "learning_rate": 1.0618792971734147e-06, "logits/chosen": -2.6686155796051025, "logits/rejected": -2.7576305866241455, "logps/chosen": -315.3510437011719, "logps/rejected": -116.48152160644531, "loss": 0.1401, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.4066368043422699, "rewards/margins": 3.404694080352783, "rewards/rejected": -3.811330795288086, "step": 760 }, { "epoch": 1.59, "learning_rate": 1.0466004583651642e-06, "logits/chosen": -2.7223763465881348, "logits/rejected": -2.7043228149414062, "logps/chosen": -285.30767822265625, "logps/rejected": -123.26036071777344, "loss": 0.1394, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.6819589734077454, "rewards/margins": 3.2477848529815674, "rewards/rejected": -3.929744005203247, "step": 770 }, { "epoch": 1.61, "learning_rate": 1.0313216195569137e-06, "logits/chosen": -2.710700511932373, "logits/rejected": -2.748891592025757, "logps/chosen": -267.1021423339844, "logps/rejected": -108.77484130859375, "loss": 0.15, "rewards/accuracies": 0.96875, "rewards/chosen": -0.7118343710899353, "rewards/margins": 3.120725154876709, "rewards/rejected": -3.832559585571289, "step": 780 }, { "epoch": 1.63, "learning_rate": 1.016042780748663e-06, "logits/chosen": -2.6949081420898438, "logits/rejected": -2.6823809146881104, "logps/chosen": -278.98101806640625, "logps/rejected": -121.89730072021484, "loss": 0.1449, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.5939213037490845, "rewards/margins": 3.3531200885772705, "rewards/rejected": -3.9470412731170654, "step": 790 }, { "epoch": 1.65, "learning_rate": 1.0007639419404126e-06, "logits/chosen": -2.710453748703003, "logits/rejected": -2.720715045928955, "logps/chosen": -277.281982421875, "logps/rejected": -114.69615173339844, "loss": 0.1473, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6140307188034058, "rewards/margins": 3.254765748977661, "rewards/rejected": -3.8687965869903564, "step": 800 }, { "epoch": 1.67, "learning_rate": 9.85485103132162e-07, "logits/chosen": -2.7251040935516357, "logits/rejected": -2.693974494934082, "logps/chosen": -286.97686767578125, "logps/rejected": -119.7470703125, "loss": 0.1369, "rewards/accuracies": 0.96875, "rewards/chosen": -0.5266898274421692, "rewards/margins": 3.49212384223938, "rewards/rejected": -4.018813133239746, "step": 810 }, { "epoch": 1.69, "learning_rate": 9.702062643239114e-07, "logits/chosen": -2.7554683685302734, "logits/rejected": -2.762106418609619, "logps/chosen": -289.8918762207031, "logps/rejected": -125.37986755371094, "loss": 0.1401, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.6475861668586731, "rewards/margins": 3.4364724159240723, "rewards/rejected": -4.08405876159668, "step": 820 }, { "epoch": 1.71, "learning_rate": 9.549274255156608e-07, "logits/chosen": -2.7048392295837402, "logits/rejected": -2.6878910064697266, "logps/chosen": -284.31219482421875, "logps/rejected": -122.46697998046875, "loss": 0.1402, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.6133157014846802, "rewards/margins": 3.361485242843628, "rewards/rejected": -3.9748013019561768, "step": 830 }, { "epoch": 1.73, "learning_rate": 9.396485867074102e-07, "logits/chosen": -2.6750662326812744, "logits/rejected": -2.696866750717163, "logps/chosen": -291.42938232421875, "logps/rejected": -113.9036865234375, "loss": 0.1509, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.6713167428970337, "rewards/margins": 3.037714719772339, "rewards/rejected": -3.709031581878662, "step": 840 }, { "epoch": 1.75, "learning_rate": 9.243697478991597e-07, "logits/chosen": -2.68489933013916, "logits/rejected": -2.6937882900238037, "logps/chosen": -254.6746063232422, "logps/rejected": -115.2020263671875, "loss": 0.1543, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8679746389389038, "rewards/margins": 3.083875894546509, "rewards/rejected": -3.951850414276123, "step": 850 }, { "epoch": 1.77, "learning_rate": 9.09090909090909e-07, "logits/chosen": -2.6926121711730957, "logits/rejected": -2.698666572570801, "logps/chosen": -280.2935485839844, "logps/rejected": -116.54022216796875, "loss": 0.1349, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.43773943185806274, "rewards/margins": 3.511465072631836, "rewards/rejected": -3.949204683303833, "step": 860 }, { "epoch": 1.79, "learning_rate": 8.938120702826585e-07, "logits/chosen": -2.740382671356201, "logits/rejected": -2.68933367729187, "logps/chosen": -276.387451171875, "logps/rejected": -115.27424621582031, "loss": 0.136, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5380962491035461, "rewards/margins": 3.400437593460083, "rewards/rejected": -3.9385337829589844, "step": 870 }, { "epoch": 1.81, "learning_rate": 8.785332314744079e-07, "logits/chosen": -2.7333688735961914, "logits/rejected": -2.771758794784546, "logps/chosen": -293.49578857421875, "logps/rejected": -126.72651672363281, "loss": 0.1379, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.6212050914764404, "rewards/margins": 3.3602237701416016, "rewards/rejected": -3.9814281463623047, "step": 880 }, { "epoch": 1.84, "learning_rate": 8.632543926661573e-07, "logits/chosen": -2.709484577178955, "logits/rejected": -2.7399744987487793, "logps/chosen": -309.5272521972656, "logps/rejected": -123.85481262207031, "loss": 0.1368, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5241509675979614, "rewards/margins": 3.5228843688964844, "rewards/rejected": -4.0470356941223145, "step": 890 }, { "epoch": 1.86, "learning_rate": 8.479755538579067e-07, "logits/chosen": -2.7088303565979004, "logits/rejected": -2.7025811672210693, "logps/chosen": -271.97784423828125, "logps/rejected": -118.300537109375, "loss": 0.1319, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.5483931303024292, "rewards/margins": 3.45857310295105, "rewards/rejected": -4.006965637207031, "step": 900 }, { "epoch": 1.88, "learning_rate": 8.326967150496562e-07, "logits/chosen": -2.747811794281006, "logits/rejected": -2.6834397315979004, "logps/chosen": -301.4414367675781, "logps/rejected": -123.6478271484375, "loss": 0.1264, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.5513515472412109, "rewards/margins": 3.684753894805908, "rewards/rejected": -4.236104965209961, "step": 910 }, { "epoch": 1.9, "learning_rate": 8.174178762414056e-07, "logits/chosen": -2.7275288105010986, "logits/rejected": -2.7216854095458984, "logps/chosen": -300.93328857421875, "logps/rejected": -133.4979248046875, "loss": 0.1301, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.5985573530197144, "rewards/margins": 3.8093056678771973, "rewards/rejected": -4.407863140106201, "step": 920 }, { "epoch": 1.92, "learning_rate": 8.02139037433155e-07, "logits/chosen": -2.7658352851867676, "logits/rejected": -2.738119602203369, "logps/chosen": -272.04449462890625, "logps/rejected": -120.6009521484375, "loss": 0.1252, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.6955040693283081, "rewards/margins": 3.5386176109313965, "rewards/rejected": -4.234121799468994, "step": 930 }, { "epoch": 1.94, "learning_rate": 7.868601986249045e-07, "logits/chosen": -2.7340145111083984, "logits/rejected": -2.7205958366394043, "logps/chosen": -286.5509948730469, "logps/rejected": -126.727294921875, "loss": 0.1468, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8658866882324219, "rewards/margins": 3.4253039360046387, "rewards/rejected": -4.2911906242370605, "step": 940 }, { "epoch": 1.96, "learning_rate": 7.71581359816654e-07, "logits/chosen": -2.7004799842834473, "logits/rejected": -2.6438944339752197, "logps/chosen": -226.2438507080078, "logps/rejected": -113.9751205444336, "loss": 0.1347, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.913210391998291, "rewards/margins": 3.264578342437744, "rewards/rejected": -4.177788734436035, "step": 950 }, { "epoch": 1.98, "learning_rate": 7.563025210084033e-07, "logits/chosen": -2.7258245944976807, "logits/rejected": -2.792160987854004, "logps/chosen": -270.41314697265625, "logps/rejected": -115.57466125488281, "loss": 0.1153, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.7112643122673035, "rewards/margins": 3.4535746574401855, "rewards/rejected": -4.164839267730713, "step": 960 }, { "epoch": 2.0, "learning_rate": 7.410236822001527e-07, "logits/chosen": -2.682445526123047, "logits/rejected": -2.680621862411499, "logps/chosen": -300.4261474609375, "logps/rejected": -123.0173110961914, "loss": 0.1305, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.544654130935669, "rewards/margins": 3.4888763427734375, "rewards/rejected": -4.033530235290527, "step": 970 }, { "epoch": 2.0, "eval_logits/chosen": -2.730585813522339, "eval_logits/rejected": -2.6866092681884766, "eval_logps/chosen": -281.4207763671875, "eval_logps/rejected": -201.63304138183594, "eval_loss": 0.3693665862083435, "eval_rewards/accuracies": 0.82421875, "eval_rewards/chosen": -0.7583777904510498, "eval_rewards/margins": 2.1013174057006836, "eval_rewards/rejected": -2.8596951961517334, "eval_runtime": 259.2152, "eval_samples_per_second": 7.716, "eval_steps_per_second": 0.062, "step": 970 }, { "epoch": 2.02, "learning_rate": 7.257448433919023e-07, "logits/chosen": -2.7307159900665283, "logits/rejected": -2.760425090789795, "logps/chosen": -263.69964599609375, "logps/rejected": -121.00809478759766, "loss": 0.1237, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.7474689483642578, "rewards/margins": 3.526792526245117, "rewards/rejected": -4.274261474609375, "step": 980 }, { "epoch": 2.04, "learning_rate": 7.104660045836516e-07, "logits/chosen": -2.688023090362549, "logits/rejected": -2.6981234550476074, "logps/chosen": -283.63311767578125, "logps/rejected": -119.1172866821289, "loss": 0.1167, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5560388565063477, "rewards/margins": 3.7879276275634766, "rewards/rejected": -4.343966484069824, "step": 990 }, { "epoch": 2.06, "learning_rate": 6.95187165775401e-07, "logits/chosen": -2.6954102516174316, "logits/rejected": -2.696408748626709, "logps/chosen": -277.02764892578125, "logps/rejected": -115.6692886352539, "loss": 0.1243, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6953684091567993, "rewards/margins": 3.4351089000701904, "rewards/rejected": -4.130476951599121, "step": 1000 }, { "epoch": 2.08, "learning_rate": 6.799083269671506e-07, "logits/chosen": -2.6869771480560303, "logits/rejected": -2.7037670612335205, "logps/chosen": -251.7620086669922, "logps/rejected": -117.48094177246094, "loss": 0.1317, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.8689507246017456, "rewards/margins": 3.475069046020508, "rewards/rejected": -4.344019889831543, "step": 1010 }, { "epoch": 2.1, "learning_rate": 6.646294881588999e-07, "logits/chosen": -2.6988840103149414, "logits/rejected": -2.6869451999664307, "logps/chosen": -287.7177734375, "logps/rejected": -123.6144790649414, "loss": 0.119, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.7602699995040894, "rewards/margins": 3.628121852874756, "rewards/rejected": -4.388391971588135, "step": 1020 }, { "epoch": 2.12, "learning_rate": 6.493506493506493e-07, "logits/chosen": -2.7356371879577637, "logits/rejected": -2.8164889812469482, "logps/chosen": -305.6545715332031, "logps/rejected": -119.76715087890625, "loss": 0.1255, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.7907172441482544, "rewards/margins": 3.6126227378845215, "rewards/rejected": -4.403339862823486, "step": 1030 }, { "epoch": 2.14, "learning_rate": 6.340718105423987e-07, "logits/chosen": -2.647362232208252, "logits/rejected": -2.6018130779266357, "logps/chosen": -281.76092529296875, "logps/rejected": -118.23252868652344, "loss": 0.121, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.64565110206604, "rewards/margins": 3.727663516998291, "rewards/rejected": -4.37331485748291, "step": 1040 }, { "epoch": 2.16, "learning_rate": 6.187929717341482e-07, "logits/chosen": -2.7285995483398438, "logits/rejected": -2.67795467376709, "logps/chosen": -273.88555908203125, "logps/rejected": -112.9361801147461, "loss": 0.1223, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.5947860479354858, "rewards/margins": 3.604292392730713, "rewards/rejected": -4.199078559875488, "step": 1050 }, { "epoch": 2.19, "learning_rate": 6.035141329258976e-07, "logits/chosen": -2.718034505844116, "logits/rejected": -2.6936326026916504, "logps/chosen": -287.3531799316406, "logps/rejected": -123.18647766113281, "loss": 0.1196, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.7472411394119263, "rewards/margins": 3.734687328338623, "rewards/rejected": -4.481928825378418, "step": 1060 }, { "epoch": 2.21, "learning_rate": 5.88235294117647e-07, "logits/chosen": -2.699462413787842, "logits/rejected": -2.667182683944702, "logps/chosen": -255.7397003173828, "logps/rejected": -114.04801177978516, "loss": 0.1287, "rewards/accuracies": 0.9375, "rewards/chosen": -0.706480860710144, "rewards/margins": 3.5075461864471436, "rewards/rejected": -4.214027404785156, "step": 1070 }, { "epoch": 2.23, "learning_rate": 5.729564553093965e-07, "logits/chosen": -2.719064712524414, "logits/rejected": -2.724663734436035, "logps/chosen": -283.41607666015625, "logps/rejected": -120.31791687011719, "loss": 0.1205, "rewards/accuracies": 0.96875, "rewards/chosen": -0.8808507919311523, "rewards/margins": 3.6230416297912598, "rewards/rejected": -4.503891944885254, "step": 1080 }, { "epoch": 2.25, "learning_rate": 5.576776165011459e-07, "logits/chosen": -2.6613950729370117, "logits/rejected": -2.669309377670288, "logps/chosen": -274.6718444824219, "logps/rejected": -123.7230453491211, "loss": 0.1295, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.8109513521194458, "rewards/margins": 3.535714626312256, "rewards/rejected": -4.346665859222412, "step": 1090 }, { "epoch": 2.27, "learning_rate": 5.423987776928953e-07, "logits/chosen": -2.752354860305786, "logits/rejected": -2.689492702484131, "logps/chosen": -297.46575927734375, "logps/rejected": -123.75872802734375, "loss": 0.1112, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.8303642272949219, "rewards/margins": 3.775070905685425, "rewards/rejected": -4.605435371398926, "step": 1100 }, { "epoch": 2.29, "learning_rate": 5.271199388846448e-07, "logits/chosen": -2.7163376808166504, "logits/rejected": -2.7011306285858154, "logps/chosen": -268.9468078613281, "logps/rejected": -129.1416473388672, "loss": 0.1147, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.7546231150627136, "rewards/margins": 3.6865787506103516, "rewards/rejected": -4.441201686859131, "step": 1110 }, { "epoch": 2.31, "learning_rate": 5.118411000763941e-07, "logits/chosen": -2.7277450561523438, "logits/rejected": -2.6973800659179688, "logps/chosen": -282.5606689453125, "logps/rejected": -124.7701416015625, "loss": 0.1192, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5854411125183105, "rewards/margins": 3.8541629314422607, "rewards/rejected": -4.439603805541992, "step": 1120 }, { "epoch": 2.33, "learning_rate": 4.965622612681436e-07, "logits/chosen": -2.6750576496124268, "logits/rejected": -2.7182953357696533, "logps/chosen": -274.384765625, "logps/rejected": -122.15213775634766, "loss": 0.1222, "rewards/accuracies": 0.96875, "rewards/chosen": -0.7071852684020996, "rewards/margins": 3.846698045730591, "rewards/rejected": -4.5538835525512695, "step": 1130 }, { "epoch": 2.35, "learning_rate": 4.81283422459893e-07, "logits/chosen": -2.676004648208618, "logits/rejected": -2.6993675231933594, "logps/chosen": -263.89752197265625, "logps/rejected": -116.71295166015625, "loss": 0.1107, "rewards/accuracies": 0.96875, "rewards/chosen": -0.7977498769760132, "rewards/margins": 3.523756742477417, "rewards/rejected": -4.321506500244141, "step": 1140 }, { "epoch": 2.37, "learning_rate": 4.660045836516425e-07, "logits/chosen": -2.7055160999298096, "logits/rejected": -2.7121243476867676, "logps/chosen": -289.3218994140625, "logps/rejected": -121.4267807006836, "loss": 0.1189, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6630896329879761, "rewards/margins": 3.811305522918701, "rewards/rejected": -4.474394798278809, "step": 1150 }, { "epoch": 2.39, "learning_rate": 4.5072574484339185e-07, "logits/chosen": -2.6670591831207275, "logits/rejected": -2.690974712371826, "logps/chosen": -278.7057189941406, "logps/rejected": -117.63764953613281, "loss": 0.1214, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.798798143863678, "rewards/margins": 3.643995761871338, "rewards/rejected": -4.442793846130371, "step": 1160 }, { "epoch": 2.41, "learning_rate": 4.3544690603514133e-07, "logits/chosen": -2.686856269836426, "logits/rejected": -2.6309545040130615, "logps/chosen": -271.1042785644531, "logps/rejected": -131.63259887695312, "loss": 0.1233, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.7242040038108826, "rewards/margins": 3.6964995861053467, "rewards/rejected": -4.420703411102295, "step": 1170 }, { "epoch": 2.43, "learning_rate": 4.2016806722689076e-07, "logits/chosen": -2.677506923675537, "logits/rejected": -2.7353405952453613, "logps/chosen": -304.0357360839844, "logps/rejected": -128.03981018066406, "loss": 0.1109, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6244128942489624, "rewards/margins": 3.888216733932495, "rewards/rejected": -4.512629985809326, "step": 1180 }, { "epoch": 2.45, "learning_rate": 4.0488922841864013e-07, "logits/chosen": -2.738661527633667, "logits/rejected": -2.696394443511963, "logps/chosen": -263.1926574707031, "logps/rejected": -117.48931884765625, "loss": 0.1266, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.903983473777771, "rewards/margins": 3.5497829914093018, "rewards/rejected": -4.453766345977783, "step": 1190 }, { "epoch": 2.47, "learning_rate": 3.896103896103896e-07, "logits/chosen": -2.721292018890381, "logits/rejected": -2.6752657890319824, "logps/chosen": -270.2406005859375, "logps/rejected": -121.4746322631836, "loss": 0.1118, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.7563194036483765, "rewards/margins": 3.8605034351348877, "rewards/rejected": -4.616823673248291, "step": 1200 }, { "epoch": 2.49, "learning_rate": 3.7433155080213904e-07, "logits/chosen": -2.6903328895568848, "logits/rejected": -2.7279880046844482, "logps/chosen": -263.3211975097656, "logps/rejected": -120.23514556884766, "loss": 0.1132, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.7410348057746887, "rewards/margins": 3.5494637489318848, "rewards/rejected": -4.290497779846191, "step": 1210 }, { "epoch": 2.52, "learning_rate": 3.590527119938884e-07, "logits/chosen": -2.697645425796509, "logits/rejected": -2.676959753036499, "logps/chosen": -275.2843322753906, "logps/rejected": -126.24308776855469, "loss": 0.1157, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9119027256965637, "rewards/margins": 3.6408069133758545, "rewards/rejected": -4.552709102630615, "step": 1220 }, { "epoch": 2.54, "learning_rate": 3.437738731856379e-07, "logits/chosen": -2.704871654510498, "logits/rejected": -2.6901638507843018, "logps/chosen": -292.1809387207031, "logps/rejected": -129.85531616210938, "loss": 0.1134, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.8088245391845703, "rewards/margins": 3.841012477874756, "rewards/rejected": -4.649837493896484, "step": 1230 }, { "epoch": 2.56, "learning_rate": 3.2849503437738727e-07, "logits/chosen": -2.6897072792053223, "logits/rejected": -2.6975724697113037, "logps/chosen": -289.3966369628906, "logps/rejected": -130.11814880371094, "loss": 0.1215, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.8563445210456848, "rewards/margins": 3.8016419410705566, "rewards/rejected": -4.657986640930176, "step": 1240 }, { "epoch": 2.58, "learning_rate": 3.1321619556913675e-07, "logits/chosen": -2.6511547565460205, "logits/rejected": -2.6918509006500244, "logps/chosen": -304.6783447265625, "logps/rejected": -128.91067504882812, "loss": 0.1256, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.6992138028144836, "rewards/margins": 3.894549608230591, "rewards/rejected": -4.593764305114746, "step": 1250 }, { "epoch": 2.6, "learning_rate": 2.9793735676088617e-07, "logits/chosen": -2.7180018424987793, "logits/rejected": -2.73219633102417, "logps/chosen": -287.21697998046875, "logps/rejected": -128.64976501464844, "loss": 0.1276, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.9189088940620422, "rewards/margins": 3.821709394454956, "rewards/rejected": -4.7406182289123535, "step": 1260 }, { "epoch": 2.62, "learning_rate": 2.8265851795263555e-07, "logits/chosen": -2.6880316734313965, "logits/rejected": -2.6963746547698975, "logps/chosen": -270.5711364746094, "logps/rejected": -109.63471984863281, "loss": 0.1227, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.8319600820541382, "rewards/margins": 3.4531593322753906, "rewards/rejected": -4.285120010375977, "step": 1270 }, { "epoch": 2.64, "learning_rate": 2.6737967914438503e-07, "logits/chosen": -2.718834400177002, "logits/rejected": -2.730088472366333, "logps/chosen": -285.2646179199219, "logps/rejected": -129.10108947753906, "loss": 0.1177, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.7013461589813232, "rewards/margins": 4.131698131561279, "rewards/rejected": -4.833044052124023, "step": 1280 }, { "epoch": 2.66, "learning_rate": 2.5210084033613445e-07, "logits/chosen": -2.6872308254241943, "logits/rejected": -2.71856689453125, "logps/chosen": -264.56591796875, "logps/rejected": -117.89599609375, "loss": 0.115, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.6815407872200012, "rewards/margins": 3.6640231609344482, "rewards/rejected": -4.3455634117126465, "step": 1290 }, { "epoch": 2.68, "learning_rate": 2.3682200152788388e-07, "logits/chosen": -2.723666191101074, "logits/rejected": -2.7219574451446533, "logps/chosen": -317.8732604980469, "logps/rejected": -120.55049896240234, "loss": 0.1072, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.7161759734153748, "rewards/margins": 3.9293036460876465, "rewards/rejected": -4.645480155944824, "step": 1300 }, { "epoch": 2.7, "learning_rate": 2.215431627196333e-07, "logits/chosen": -2.6789979934692383, "logits/rejected": -2.6807262897491455, "logps/chosen": -260.33062744140625, "logps/rejected": -120.8504409790039, "loss": 0.1201, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.854164719581604, "rewards/margins": 3.6763319969177246, "rewards/rejected": -4.530496597290039, "step": 1310 }, { "epoch": 2.72, "learning_rate": 2.0626432391138274e-07, "logits/chosen": -2.703617572784424, "logits/rejected": -2.696929931640625, "logps/chosen": -250.5372772216797, "logps/rejected": -126.94450378417969, "loss": 0.1262, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0846295356750488, "rewards/margins": 3.540682554244995, "rewards/rejected": -4.625311374664307, "step": 1320 }, { "epoch": 2.74, "learning_rate": 1.9098548510313214e-07, "logits/chosen": -2.7046430110931396, "logits/rejected": -2.691488027572632, "logps/chosen": -267.9579162597656, "logps/rejected": -123.9790267944336, "loss": 0.1141, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.6386908292770386, "rewards/margins": 4.031415939331055, "rewards/rejected": -4.670106887817383, "step": 1330 }, { "epoch": 2.76, "learning_rate": 1.757066462948816e-07, "logits/chosen": -2.689009189605713, "logits/rejected": -2.7033090591430664, "logps/chosen": -266.8208312988281, "logps/rejected": -126.87858581542969, "loss": 0.1157, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6870437860488892, "rewards/margins": 3.979443073272705, "rewards/rejected": -4.666487216949463, "step": 1340 }, { "epoch": 2.78, "learning_rate": 1.6042780748663102e-07, "logits/chosen": -2.70473051071167, "logits/rejected": -2.6794614791870117, "logps/chosen": -278.357421875, "logps/rejected": -125.93829345703125, "loss": 0.1152, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.9934309124946594, "rewards/margins": 3.67195200920105, "rewards/rejected": -4.665382385253906, "step": 1350 }, { "epoch": 2.8, "learning_rate": 1.4514896867838044e-07, "logits/chosen": -2.6879210472106934, "logits/rejected": -2.7055411338806152, "logps/chosen": -301.1689147949219, "logps/rejected": -137.4368896484375, "loss": 0.1134, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.8130217790603638, "rewards/margins": 3.979782819747925, "rewards/rejected": -4.792804718017578, "step": 1360 }, { "epoch": 2.82, "learning_rate": 1.2987012987012984e-07, "logits/chosen": -2.6889195442199707, "logits/rejected": -2.677527666091919, "logps/chosen": -300.85272216796875, "logps/rejected": -131.1885986328125, "loss": 0.1181, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.8304030299186707, "rewards/margins": 4.027594089508057, "rewards/rejected": -4.857996940612793, "step": 1370 }, { "epoch": 2.85, "learning_rate": 1.1459129106187928e-07, "logits/chosen": -2.675020694732666, "logits/rejected": -2.6782567501068115, "logps/chosen": -329.69696044921875, "logps/rejected": -135.40345764160156, "loss": 0.1182, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.631290078163147, "rewards/margins": 3.959303379058838, "rewards/rejected": -4.5905938148498535, "step": 1380 }, { "epoch": 2.87, "learning_rate": 9.931245225362872e-08, "logits/chosen": -2.6736457347869873, "logits/rejected": -2.6810030937194824, "logps/chosen": -274.8864440917969, "logps/rejected": -124.03629302978516, "loss": 0.1258, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.7999323606491089, "rewards/margins": 3.9392266273498535, "rewards/rejected": -4.73915958404541, "step": 1390 }, { "epoch": 2.89, "learning_rate": 8.403361344537815e-08, "logits/chosen": -2.699571132659912, "logits/rejected": -2.701141834259033, "logps/chosen": -283.51751708984375, "logps/rejected": -130.84814453125, "loss": 0.1212, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.7443857192993164, "rewards/margins": 3.96293568611145, "rewards/rejected": -4.707321643829346, "step": 1400 }, { "epoch": 2.91, "learning_rate": 6.875477463712758e-08, "logits/chosen": -2.690229892730713, "logits/rejected": -2.7361502647399902, "logps/chosen": -310.0445556640625, "logps/rejected": -141.15748596191406, "loss": 0.1115, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.7573784589767456, "rewards/margins": 4.050500869750977, "rewards/rejected": -4.807879447937012, "step": 1410 }, { "epoch": 2.93, "learning_rate": 5.3475935828877e-08, "logits/chosen": -2.7038116455078125, "logits/rejected": -2.715642213821411, "logps/chosen": -294.73486328125, "logps/rejected": -127.1126480102539, "loss": 0.1176, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.720859944820404, "rewards/margins": 3.844661235809326, "rewards/rejected": -4.565520763397217, "step": 1420 }, { "epoch": 2.95, "learning_rate": 3.8197097020626426e-08, "logits/chosen": -2.695003032684326, "logits/rejected": -2.7012951374053955, "logps/chosen": -257.13287353515625, "logps/rejected": -120.28269958496094, "loss": 0.1259, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.8394849896430969, "rewards/margins": 3.808011531829834, "rewards/rejected": -4.647497177124023, "step": 1430 }, { "epoch": 2.97, "learning_rate": 2.291825821237586e-08, "logits/chosen": -2.7045774459838867, "logits/rejected": -2.679112672805786, "logps/chosen": -280.9797058105469, "logps/rejected": -127.39398193359375, "loss": 0.1096, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8950273394584656, "rewards/margins": 3.8749961853027344, "rewards/rejected": -4.770023345947266, "step": 1440 }, { "epoch": 2.99, "learning_rate": 7.639419404125286e-09, "logits/chosen": -2.7298693656921387, "logits/rejected": -2.673051357269287, "logps/chosen": -329.364990234375, "logps/rejected": -131.12486267089844, "loss": 0.109, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.5651420950889587, "rewards/margins": 4.195387840270996, "rewards/rejected": -4.760529518127441, "step": 1450 }, { "epoch": 3.0, "eval_logits/chosen": -2.715555429458618, "eval_logits/rejected": -2.6699323654174805, "eval_logps/chosen": -282.458740234375, "eval_logps/rejected": -204.27066040039062, "eval_loss": 0.3553008437156677, "eval_rewards/accuracies": 0.828125, "eval_rewards/chosen": -0.8621728420257568, "eval_rewards/margins": 2.261284112930298, "eval_rewards/rejected": -3.123457193374634, "eval_runtime": 259.2547, "eval_samples_per_second": 7.714, "eval_steps_per_second": 0.062, "step": 1455 }, { "epoch": 3.0, "step": 1455, "total_flos": 0.0, "train_loss": 0.21351368668972423, "train_runtime": 46913.4477, "train_samples_per_second": 3.969, "train_steps_per_second": 0.031 } ], "logging_steps": 10, "max_steps": 1455, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }