{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 3750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.3333333333333334e-08, "logits/chosen": -1.4607182741165161, "logits/rejected": -1.0577633380889893, "logps/chosen": -377.1839599609375, "logps/rejected": -1292.140625, "loss": 0.3828, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.3333333333333336e-07, "logits/chosen": -1.422508716583252, "logits/rejected": -1.0050468444824219, "logps/chosen": -603.676025390625, "logps/rejected": -1303.1278076171875, "loss": 0.3726, "rewards/accuracies": 0.3888888955116272, "rewards/chosen": -0.00046346496674232185, "rewards/margins": -0.0005589782958850265, "rewards/rejected": 9.551318362355232e-05, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.666666666666667e-07, "logits/chosen": -1.469663381576538, "logits/rejected": -1.0887094736099243, "logps/chosen": -544.6007080078125, "logps/rejected": -1043.440673828125, "loss": 0.4111, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0005788623820990324, "rewards/margins": 0.001082524424418807, "rewards/rejected": -0.0005036621587350965, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.0000000000000003e-07, "logits/chosen": -1.6168861389160156, "logits/rejected": -1.055768609046936, "logps/chosen": -653.9005737304688, "logps/rejected": -1314.4986572265625, "loss": 0.3835, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -8.300555782625452e-05, "rewards/margins": 0.0015174217987805605, "rewards/rejected": -0.001600427320227027, "step": 30 }, { "epoch": 0.01, "learning_rate": 5.333333333333335e-07, "logits/chosen": -1.3431918621063232, "logits/rejected": -0.6968151926994324, "logps/chosen": -665.5634765625, "logps/rejected": -1349.5455322265625, "loss": 0.3624, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0007652758504264057, "rewards/margins": 0.004389578010886908, "rewards/rejected": -0.005154854152351618, "step": 40 }, { "epoch": 0.01, "learning_rate": 6.666666666666667e-07, "logits/chosen": -1.36802077293396, "logits/rejected": -0.7910489439964294, "logps/chosen": -665.3347778320312, "logps/rejected": -1172.180419921875, "loss": 0.3291, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0013909535482525826, "rewards/margins": 0.00601952476426959, "rewards/rejected": -0.0074104792438447475, "step": 50 }, { "epoch": 0.02, "learning_rate": 8.000000000000001e-07, "logits/chosen": -1.6208984851837158, "logits/rejected": -0.9291224479675293, "logps/chosen": -660.3453369140625, "logps/rejected": -1401.126220703125, "loss": 0.3343, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.00016163568943738937, "rewards/margins": 0.020959021523594856, "rewards/rejected": -0.02079738676548004, "step": 60 }, { "epoch": 0.02, "learning_rate": 9.333333333333334e-07, "logits/chosen": -1.5982778072357178, "logits/rejected": -0.6397973299026489, "logps/chosen": -600.2814331054688, "logps/rejected": -1338.4075927734375, "loss": 0.3381, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0026822402141988277, "rewards/margins": 0.034768685698509216, "rewards/rejected": -0.032086439430713654, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.066666666666667e-06, "logits/chosen": -1.6199333667755127, "logits/rejected": -0.8920964002609253, "logps/chosen": -589.6707763671875, "logps/rejected": -1347.08837890625, "loss": 0.3062, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.005661836825311184, "rewards/margins": 0.04629923403263092, "rewards/rejected": -0.05196107178926468, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.2000000000000002e-06, "logits/chosen": -1.1941367387771606, "logits/rejected": -0.7822949290275574, "logps/chosen": -618.1773681640625, "logps/rejected": -1214.458251953125, "loss": 0.3073, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.02980031445622444, "rewards/margins": 0.08387573063373566, "rewards/rejected": -0.1136760488152504, "step": 90 }, { "epoch": 0.03, "learning_rate": 1.3333333333333334e-06, "logits/chosen": -1.1643040180206299, "logits/rejected": -0.3697466552257538, "logps/chosen": -628.5645141601562, "logps/rejected": -1501.861572265625, "loss": 0.2341, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11866208165884018, "rewards/margins": 0.1647382378578186, "rewards/rejected": -0.283400297164917, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.4666666666666669e-06, "logits/chosen": -1.1232091188430786, "logits/rejected": -0.17076462507247925, "logps/chosen": -800.4198608398438, "logps/rejected": -1697.21875, "loss": 0.1819, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23575441539287567, "rewards/margins": 0.2496374547481537, "rewards/rejected": -0.48539191484451294, "step": 110 }, { "epoch": 0.03, "learning_rate": 1.6000000000000001e-06, "logits/chosen": -1.1497989892959595, "logits/rejected": -0.4379865527153015, "logps/chosen": -914.0667114257812, "logps/rejected": -1844.274169921875, "loss": 0.2595, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29556211829185486, "rewards/margins": 0.2584637701511383, "rewards/rejected": -0.5540258288383484, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.7333333333333336e-06, "logits/chosen": -1.2714927196502686, "logits/rejected": -0.7818469405174255, "logps/chosen": -809.2772216796875, "logps/rejected": -1702.701171875, "loss": 0.252, "rewards/accuracies": 0.75, "rewards/chosen": -0.20852358639240265, "rewards/margins": 0.2036806344985962, "rewards/rejected": -0.4122042655944824, "step": 130 }, { "epoch": 0.04, "learning_rate": 1.8666666666666669e-06, "logits/chosen": -1.4557206630706787, "logits/rejected": -0.5710101127624512, "logps/chosen": -727.69189453125, "logps/rejected": -1672.314208984375, "loss": 0.1792, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2208588570356369, "rewards/margins": 0.21561995148658752, "rewards/rejected": -0.436478853225708, "step": 140 }, { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -1.2810709476470947, "logits/rejected": -0.3112773001194, "logps/chosen": -960.1036376953125, "logps/rejected": -1976.63671875, "loss": 0.2275, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3756103217601776, "rewards/margins": 0.28143054246902466, "rewards/rejected": -0.6570408344268799, "step": 150 }, { "epoch": 0.04, "learning_rate": 2.133333333333334e-06, "logits/chosen": -1.1071354150772095, "logits/rejected": -0.2948054373264313, "logps/chosen": -707.2420654296875, "logps/rejected": -1617.572509765625, "loss": 0.3027, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2601652145385742, "rewards/margins": 0.25246208906173706, "rewards/rejected": -0.5126273036003113, "step": 160 }, { "epoch": 0.05, "learning_rate": 2.266666666666667e-06, "logits/chosen": -1.1747493743896484, "logits/rejected": -0.516975998878479, "logps/chosen": -671.3464965820312, "logps/rejected": -1495.285400390625, "loss": 0.2483, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1535535305738449, "rewards/margins": 0.13311822712421417, "rewards/rejected": -0.2866717278957367, "step": 170 }, { "epoch": 0.05, "learning_rate": 2.4000000000000003e-06, "logits/chosen": -1.5080617666244507, "logits/rejected": -0.1595776528120041, "logps/chosen": -766.1541137695312, "logps/rejected": -1505.566162109375, "loss": 0.1896, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1698867678642273, "rewards/margins": 0.1528700590133667, "rewards/rejected": -0.322756826877594, "step": 180 }, { "epoch": 0.05, "learning_rate": 2.5333333333333338e-06, "logits/chosen": -1.4777127504348755, "logits/rejected": -0.1578340083360672, "logps/chosen": -883.9136962890625, "logps/rejected": -1729.061279296875, "loss": 0.2441, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2274136245250702, "rewards/margins": 0.1767813265323639, "rewards/rejected": -0.4041949212551117, "step": 190 }, { "epoch": 0.05, "learning_rate": 2.666666666666667e-06, "logits/chosen": -1.5583598613739014, "logits/rejected": -0.15428844094276428, "logps/chosen": -954.4393310546875, "logps/rejected": -1927.611328125, "loss": 0.1691, "rewards/accuracies": 0.875, "rewards/chosen": -0.2696084976196289, "rewards/margins": 0.28209298849105835, "rewards/rejected": -0.5517014861106873, "step": 200 }, { "epoch": 0.06, "learning_rate": 2.8000000000000003e-06, "logits/chosen": -1.148453950881958, "logits/rejected": 0.03259178251028061, "logps/chosen": -963.2941284179688, "logps/rejected": -1878.1683349609375, "loss": 0.2653, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29248175024986267, "rewards/margins": 0.2701197564601898, "rewards/rejected": -0.5626015067100525, "step": 210 }, { "epoch": 0.06, "learning_rate": 2.9333333333333338e-06, "logits/chosen": -1.4708116054534912, "logits/rejected": -0.4165799617767334, "logps/chosen": -663.6636352539062, "logps/rejected": -1678.5081787109375, "loss": 0.1756, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1571148931980133, "rewards/margins": 0.24838630855083466, "rewards/rejected": -0.40550118684768677, "step": 220 }, { "epoch": 0.06, "learning_rate": 3.066666666666667e-06, "logits/chosen": -1.17020583152771, "logits/rejected": -0.47575148940086365, "logps/chosen": -920.1783447265625, "logps/rejected": -1948.2427978515625, "loss": 0.1913, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2500799894332886, "rewards/margins": 0.24250411987304688, "rewards/rejected": -0.49258413910865784, "step": 230 }, { "epoch": 0.06, "learning_rate": 3.2000000000000003e-06, "logits/chosen": -1.615384817123413, "logits/rejected": -0.5452179312705994, "logps/chosen": -855.1349487304688, "logps/rejected": -1652.406494140625, "loss": 0.1915, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20125408470630646, "rewards/margins": 0.23299658298492432, "rewards/rejected": -0.4342506527900696, "step": 240 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -1.231403112411499, "logits/rejected": 0.04080945998430252, "logps/chosen": -830.4852294921875, "logps/rejected": -1568.180908203125, "loss": 0.2531, "rewards/accuracies": 0.75, "rewards/chosen": -0.16112910211086273, "rewards/margins": 0.1832246333360672, "rewards/rejected": -0.34435373544692993, "step": 250 }, { "epoch": 0.07, "learning_rate": 3.4666666666666672e-06, "logits/chosen": -1.4119511842727661, "logits/rejected": 0.3003128170967102, "logps/chosen": -697.4551391601562, "logps/rejected": -1790.427978515625, "loss": 0.1495, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12422380596399307, "rewards/margins": 0.2803717255592346, "rewards/rejected": -0.4045955538749695, "step": 260 }, { "epoch": 0.07, "learning_rate": 3.6000000000000003e-06, "logits/chosen": -1.4090235233306885, "logits/rejected": 0.20056810975074768, "logps/chosen": -781.8931884765625, "logps/rejected": -1546.812744140625, "loss": 0.2132, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18441830575466156, "rewards/margins": 0.21885094046592712, "rewards/rejected": -0.40326929092407227, "step": 270 }, { "epoch": 0.07, "learning_rate": 3.7333333333333337e-06, "logits/chosen": -1.3472883701324463, "logits/rejected": -0.16090384125709534, "logps/chosen": -674.9337158203125, "logps/rejected": -1524.169677734375, "loss": 0.2165, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11200114339590073, "rewards/margins": 0.18906202912330627, "rewards/rejected": -0.3010631799697876, "step": 280 }, { "epoch": 0.08, "learning_rate": 3.866666666666667e-06, "logits/chosen": -1.5704516172409058, "logits/rejected": -0.3292531967163086, "logps/chosen": -733.7753295898438, "logps/rejected": -1638.066162109375, "loss": 0.2036, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0726594403386116, "rewards/margins": 0.25331053137779236, "rewards/rejected": -0.32596996426582336, "step": 290 }, { "epoch": 0.08, "learning_rate": 4.000000000000001e-06, "logits/chosen": -1.4241251945495605, "logits/rejected": -0.6696543097496033, "logps/chosen": -756.4315795898438, "logps/rejected": -1572.5772705078125, "loss": 0.1903, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16283096373081207, "rewards/margins": 0.22412030398845673, "rewards/rejected": -0.3869512677192688, "step": 300 }, { "epoch": 0.08, "learning_rate": 4.133333333333333e-06, "logits/chosen": -1.368127465248108, "logits/rejected": -0.3331999182701111, "logps/chosen": -967.3336791992188, "logps/rejected": -1911.6470947265625, "loss": 0.1603, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2711753249168396, "rewards/margins": 0.3143623471260071, "rewards/rejected": -0.5855377316474915, "step": 310 }, { "epoch": 0.09, "learning_rate": 4.266666666666668e-06, "logits/chosen": -1.71210515499115, "logits/rejected": 0.0729786604642868, "logps/chosen": -1014.9069213867188, "logps/rejected": -1880.586669921875, "loss": 0.1866, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31470808386802673, "rewards/margins": 0.3131619393825531, "rewards/rejected": -0.6278700828552246, "step": 320 }, { "epoch": 0.09, "learning_rate": 4.4e-06, "logits/chosen": -1.2539806365966797, "logits/rejected": -0.792563796043396, "logps/chosen": -697.0578002929688, "logps/rejected": -1576.9892578125, "loss": 0.1946, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15875259041786194, "rewards/margins": 0.22502513229846954, "rewards/rejected": -0.3837777078151703, "step": 330 }, { "epoch": 0.09, "learning_rate": 4.533333333333334e-06, "logits/chosen": -1.3388795852661133, "logits/rejected": -0.407992422580719, "logps/chosen": -761.8697509765625, "logps/rejected": -1639.280517578125, "loss": 1.8252, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2346605360507965, "rewards/margins": 0.4083401560783386, "rewards/rejected": -0.6430006623268127, "step": 340 }, { "epoch": 0.09, "learning_rate": 4.666666666666667e-06, "logits/chosen": -1.370415449142456, "logits/rejected": -0.36617863178253174, "logps/chosen": -647.6007080078125, "logps/rejected": -1585.802001953125, "loss": 0.2431, "rewards/accuracies": 0.75, "rewards/chosen": -0.1556210219860077, "rewards/margins": 0.14714348316192627, "rewards/rejected": -0.30276453495025635, "step": 350 }, { "epoch": 0.1, "learning_rate": 4.800000000000001e-06, "logits/chosen": -1.4223321676254272, "logits/rejected": -0.7678893804550171, "logps/chosen": -862.3582763671875, "logps/rejected": -1554.5501708984375, "loss": 0.3499, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15241160988807678, "rewards/margins": 0.07054910808801651, "rewards/rejected": -0.22296074032783508, "step": 360 }, { "epoch": 0.1, "learning_rate": 4.933333333333334e-06, "logits/chosen": -1.6838983297348022, "logits/rejected": -0.8386624455451965, "logps/chosen": -854.6920776367188, "logps/rejected": -1489.2125244140625, "loss": 0.3149, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.12944355607032776, "rewards/margins": 0.06882871687412262, "rewards/rejected": -0.19827227294445038, "step": 370 }, { "epoch": 0.1, "learning_rate": 4.999972922944898e-06, "logits/chosen": -1.6282720565795898, "logits/rejected": -1.0739606618881226, "logps/chosen": -714.4268188476562, "logps/rejected": -1637.6285400390625, "loss": 0.1918, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07471625506877899, "rewards/margins": 0.17545118927955627, "rewards/rejected": -0.2501674294471741, "step": 380 }, { "epoch": 0.1, "learning_rate": 4.999756310023261e-06, "logits/chosen": -1.5494694709777832, "logits/rejected": -0.6611472964286804, "logps/chosen": -666.4691162109375, "logps/rejected": -1401.627685546875, "loss": 0.2341, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.024204634130001068, "rewards/margins": 0.2136712521314621, "rewards/rejected": -0.23787586390972137, "step": 390 }, { "epoch": 0.11, "learning_rate": 4.999323102948655e-06, "logits/chosen": -1.4422892332077026, "logits/rejected": -0.2696318030357361, "logps/chosen": -627.1585083007812, "logps/rejected": -1432.5504150390625, "loss": 0.2485, "rewards/accuracies": 0.75, "rewards/chosen": -0.0546126663684845, "rewards/margins": 0.1983325034379959, "rewards/rejected": -0.2529451549053192, "step": 400 }, { "epoch": 0.11, "learning_rate": 4.998673339256785e-06, "logits/chosen": -1.4705862998962402, "logits/rejected": -0.8628055453300476, "logps/chosen": -736.0491333007812, "logps/rejected": -1735.3140869140625, "loss": 0.1994, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1055409163236618, "rewards/margins": 0.21688199043273926, "rewards/rejected": -0.32242292165756226, "step": 410 }, { "epoch": 0.11, "learning_rate": 4.997807075247147e-06, "logits/chosen": -1.3044060468673706, "logits/rejected": -0.5804620981216431, "logps/chosen": -709.5520629882812, "logps/rejected": -1824.0869140625, "loss": 0.1758, "rewards/accuracies": 0.75, "rewards/chosen": -0.13376520574092865, "rewards/margins": 0.34297293424606323, "rewards/rejected": -0.4767381548881531, "step": 420 }, { "epoch": 0.11, "learning_rate": 4.996724385978142e-06, "logits/chosen": -1.5715945959091187, "logits/rejected": -0.49768322706222534, "logps/chosen": -752.0654907226562, "logps/rejected": -1800.0816650390625, "loss": 0.1937, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12571558356285095, "rewards/margins": 0.3276565372943878, "rewards/rejected": -0.45337215065956116, "step": 430 }, { "epoch": 0.12, "learning_rate": 4.995425365260585e-06, "logits/chosen": -1.4646804332733154, "logits/rejected": -0.6573309898376465, "logps/chosen": -604.1141357421875, "logps/rejected": -1509.289306640625, "loss": 0.1685, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08397423475980759, "rewards/margins": 0.22961445152759552, "rewards/rejected": -0.3135886788368225, "step": 440 }, { "epoch": 0.12, "learning_rate": 4.993910125649561e-06, "logits/chosen": -1.58323073387146, "logits/rejected": -0.9330165982246399, "logps/chosen": -575.3523559570312, "logps/rejected": -1430.528564453125, "loss": 0.1947, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0727187842130661, "rewards/margins": 0.2145281583070755, "rewards/rejected": -0.2872469425201416, "step": 450 }, { "epoch": 0.12, "learning_rate": 4.992178798434684e-06, "logits/chosen": -1.5033951997756958, "logits/rejected": -0.5900696516036987, "logps/chosen": -897.4099731445312, "logps/rejected": -1658.7633056640625, "loss": 0.2771, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17770914733409882, "rewards/margins": 0.19083838164806366, "rewards/rejected": -0.3685474991798401, "step": 460 }, { "epoch": 0.13, "learning_rate": 4.990231533628719e-06, "logits/chosen": -1.6505463123321533, "logits/rejected": -0.9378656148910522, "logps/chosen": -690.3605346679688, "logps/rejected": -1701.1597900390625, "loss": 0.2146, "rewards/accuracies": 0.875, "rewards/chosen": -0.13155733048915863, "rewards/margins": 0.2767486572265625, "rewards/rejected": -0.40830597281455994, "step": 470 }, { "epoch": 0.13, "learning_rate": 4.988068499954578e-06, "logits/chosen": -1.555983543395996, "logits/rejected": -0.4679872393608093, "logps/chosen": -698.6531982421875, "logps/rejected": -1681.360595703125, "loss": 0.2585, "rewards/accuracies": 0.75, "rewards/chosen": -0.12316081672906876, "rewards/margins": 0.31332293152809143, "rewards/rejected": -0.436483770608902, "step": 480 }, { "epoch": 0.13, "learning_rate": 4.985689884830711e-06, "logits/chosen": -1.5021615028381348, "logits/rejected": -0.6322463750839233, "logps/chosen": -743.8492431640625, "logps/rejected": -1656.78515625, "loss": 0.1861, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09808465093374252, "rewards/margins": 0.18296462297439575, "rewards/rejected": -0.28104931116104126, "step": 490 }, { "epoch": 0.13, "learning_rate": 4.983095894354858e-06, "logits/chosen": -1.537832260131836, "logits/rejected": -0.5591806173324585, "logps/chosen": -771.895751953125, "logps/rejected": -1593.089111328125, "loss": 0.2529, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11263638734817505, "rewards/margins": 0.1450023353099823, "rewards/rejected": -0.25763875246047974, "step": 500 }, { "epoch": 0.14, "learning_rate": 4.980286753286196e-06, "logits/chosen": -1.4696406126022339, "logits/rejected": -0.6640992164611816, "logps/chosen": -686.0765380859375, "logps/rejected": -1460.5648193359375, "loss": 0.1911, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12830252945423126, "rewards/margins": 0.16823554039001465, "rewards/rejected": -0.2965380549430847, "step": 510 }, { "epoch": 0.14, "learning_rate": 4.97726270502586e-06, "logits/chosen": -1.4664279222488403, "logits/rejected": -0.7313605546951294, "logps/chosen": -818.5562744140625, "logps/rejected": -1775.9241943359375, "loss": 0.1947, "rewards/accuracies": 0.75, "rewards/chosen": -0.20023846626281738, "rewards/margins": 0.24244621396064758, "rewards/rejected": -0.44268471002578735, "step": 520 }, { "epoch": 0.14, "learning_rate": 4.974024011595864e-06, "logits/chosen": -1.5485690832138062, "logits/rejected": -0.6978067755699158, "logps/chosen": -942.9921875, "logps/rejected": -1794.7066650390625, "loss": 0.3032, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21240389347076416, "rewards/margins": 0.2622910141944885, "rewards/rejected": -0.4746948778629303, "step": 530 }, { "epoch": 0.14, "learning_rate": 4.970570953616383e-06, "logits/chosen": -1.4141438007354736, "logits/rejected": -0.4546588957309723, "logps/chosen": -668.2946166992188, "logps/rejected": -1650.0318603515625, "loss": 0.2265, "rewards/accuracies": 0.75, "rewards/chosen": -0.1157810240983963, "rewards/margins": 0.2432054728269577, "rewards/rejected": -0.3589865267276764, "step": 540 }, { "epoch": 0.15, "learning_rate": 4.966903830281449e-06, "logits/chosen": -1.8105099201202393, "logits/rejected": -0.36172086000442505, "logps/chosen": -677.1749267578125, "logps/rejected": -1432.0328369140625, "loss": 0.2364, "rewards/accuracies": 0.75, "rewards/chosen": -0.14466652274131775, "rewards/margins": 0.17044545710086823, "rewards/rejected": -0.3151119649410248, "step": 550 }, { "epoch": 0.15, "learning_rate": 4.9630229593330226e-06, "logits/chosen": -1.544588327407837, "logits/rejected": -0.8351410031318665, "logps/chosen": -834.5636596679688, "logps/rejected": -1683.013671875, "loss": 0.2298, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18364231288433075, "rewards/margins": 0.259293794631958, "rewards/rejected": -0.44293609261512756, "step": 560 }, { "epoch": 0.15, "learning_rate": 4.958928677033465e-06, "logits/chosen": -1.6264123916625977, "logits/rejected": -0.5471200942993164, "logps/chosen": -858.4713134765625, "logps/rejected": -1681.949951171875, "loss": 0.2358, "rewards/accuracies": 0.75, "rewards/chosen": -0.2060559242963791, "rewards/margins": 0.24328169226646423, "rewards/rejected": -0.4493376612663269, "step": 570 }, { "epoch": 0.15, "learning_rate": 4.954621338136399e-06, "logits/chosen": -1.4879562854766846, "logits/rejected": -0.6239393949508667, "logps/chosen": -916.1363525390625, "logps/rejected": -1731.7279052734375, "loss": 0.2434, "rewards/accuracies": 0.75, "rewards/chosen": -0.1614031344652176, "rewards/margins": 0.23414401710033417, "rewards/rejected": -0.39554715156555176, "step": 580 }, { "epoch": 0.16, "learning_rate": 4.95010131585597e-06, "logits/chosen": -1.220314383506775, "logits/rejected": -0.44166284799575806, "logps/chosen": -772.2953491210938, "logps/rejected": -1575.3052978515625, "loss": 0.2662, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08703169226646423, "rewards/margins": 0.2424904853105545, "rewards/rejected": -0.32952219247817993, "step": 590 }, { "epoch": 0.16, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -1.4573420286178589, "logits/rejected": -0.5127624273300171, "logps/chosen": -504.53057861328125, "logps/rejected": -1400.8695068359375, "loss": 0.1911, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05916530638933182, "rewards/margins": 0.23245540261268616, "rewards/rejected": -0.291620671749115, "step": 600 }, { "epoch": 0.16, "learning_rate": 4.940424806108619e-06, "logits/chosen": -1.6605002880096436, "logits/rejected": -0.6169177293777466, "logps/chosen": -778.3709716796875, "logps/rejected": -1566.796630859375, "loss": 0.1824, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08246553689241409, "rewards/margins": 0.2251826822757721, "rewards/rejected": -0.307648241519928, "step": 610 }, { "epoch": 0.17, "learning_rate": 4.935269157073597e-06, "logits/chosen": -1.6027615070343018, "logits/rejected": -1.1422998905181885, "logps/chosen": -592.0419921875, "logps/rejected": -1658.290283203125, "loss": 0.2057, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.048401519656181335, "rewards/margins": 0.334246426820755, "rewards/rejected": -0.38264790177345276, "step": 620 }, { "epoch": 0.17, "learning_rate": 4.9299025014463665e-06, "logits/chosen": -1.4095045328140259, "logits/rejected": -0.1883077323436737, "logps/chosen": -607.0613403320312, "logps/rejected": -1580.00341796875, "loss": 0.1792, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07415771484375, "rewards/margins": 0.2879538834095001, "rewards/rejected": -0.3621116280555725, "step": 630 }, { "epoch": 0.17, "learning_rate": 4.924325304226745e-06, "logits/chosen": -1.6297693252563477, "logits/rejected": -0.49017101526260376, "logps/chosen": -792.6488037109375, "logps/rejected": -1665.556396484375, "loss": 0.1603, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09000878036022186, "rewards/margins": 0.2915512025356293, "rewards/rejected": -0.38155999779701233, "step": 640 }, { "epoch": 0.17, "learning_rate": 4.91853804865716e-06, "logits/chosen": -1.1054975986480713, "logits/rejected": -0.42201828956604004, "logps/chosen": -713.8890991210938, "logps/rejected": -1576.742919921875, "loss": 0.1704, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11752257496118546, "rewards/margins": 0.2575463354587555, "rewards/rejected": -0.37506890296936035, "step": 650 }, { "epoch": 0.18, "learning_rate": 4.912541236180779e-06, "logits/chosen": -1.3571802377700806, "logits/rejected": -0.6059755086898804, "logps/chosen": -884.0383911132812, "logps/rejected": -1731.5504150390625, "loss": 0.2647, "rewards/accuracies": 0.625, "rewards/chosen": -0.24475233256816864, "rewards/margins": 0.20648574829101562, "rewards/rejected": -0.45123806595802307, "step": 660 }, { "epoch": 0.18, "learning_rate": 4.9063353863980565e-06, "logits/chosen": -1.3680832386016846, "logits/rejected": -0.6090233325958252, "logps/chosen": -807.2792358398438, "logps/rejected": -1725.8609619140625, "loss": 0.1917, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19080020487308502, "rewards/margins": 0.28758734464645386, "rewards/rejected": -0.4783875346183777, "step": 670 }, { "epoch": 0.18, "learning_rate": 4.899921037021719e-06, "logits/chosen": -1.7092090845108032, "logits/rejected": -0.9691111445426941, "logps/chosen": -696.3843994140625, "logps/rejected": -1516.789794921875, "loss": 0.1585, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16210845112800598, "rewards/margins": 0.24107725918293, "rewards/rejected": -0.4031856954097748, "step": 680 }, { "epoch": 0.18, "learning_rate": 4.893298743830168e-06, "logits/chosen": -1.375356674194336, "logits/rejected": -0.36455339193344116, "logps/chosen": -759.2677612304688, "logps/rejected": -1726.4449462890625, "loss": 0.1731, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.196892648935318, "rewards/margins": 0.2657999098300934, "rewards/rejected": -0.46269258856773376, "step": 690 }, { "epoch": 0.19, "learning_rate": 4.88646908061933e-06, "logits/chosen": -1.4417951107025146, "logits/rejected": -0.3565208911895752, "logps/chosen": -741.1653442382812, "logps/rejected": -1683.4515380859375, "loss": 0.2447, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18826603889465332, "rewards/margins": 0.2816776633262634, "rewards/rejected": -0.46994370222091675, "step": 700 }, { "epoch": 0.19, "learning_rate": 4.879432639152935e-06, "logits/chosen": -1.37065851688385, "logits/rejected": -0.2773549556732178, "logps/chosen": -838.9853515625, "logps/rejected": -1707.4296875, "loss": 0.2116, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12412204593420029, "rewards/margins": 0.23440977931022644, "rewards/rejected": -0.3585318624973297, "step": 710 }, { "epoch": 0.19, "learning_rate": 4.8721900291112415e-06, "logits/chosen": -1.580479621887207, "logits/rejected": -0.6059103608131409, "logps/chosen": -790.3890380859375, "logps/rejected": -1548.9473876953125, "loss": 0.2076, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11692629009485245, "rewards/margins": 0.20617632567882538, "rewards/rejected": -0.3231026232242584, "step": 720 }, { "epoch": 0.19, "learning_rate": 4.864741878038218e-06, "logits/chosen": -1.4246976375579834, "logits/rejected": -0.6282674670219421, "logps/chosen": -739.7086791992188, "logps/rejected": -1689.615234375, "loss": 0.1781, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11477355659008026, "rewards/margins": 0.28999191522598267, "rewards/rejected": -0.40476545691490173, "step": 730 }, { "epoch": 0.2, "learning_rate": 4.857088831287158e-06, "logits/chosen": -1.603228211402893, "logits/rejected": -0.13681410253047943, "logps/chosen": -734.5335693359375, "logps/rejected": -1756.3695068359375, "loss": 0.171, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16050152480602264, "rewards/margins": 0.3059554696083069, "rewards/rejected": -0.4664570391178131, "step": 740 }, { "epoch": 0.2, "learning_rate": 4.849231551964771e-06, "logits/chosen": -1.3526674509048462, "logits/rejected": -0.1356133222579956, "logps/chosen": -775.5394897460938, "logps/rejected": -1612.2784423828125, "loss": 0.2257, "rewards/accuracies": 0.75, "rewards/chosen": -0.16132906079292297, "rewards/margins": 0.2226000726222992, "rewards/rejected": -0.38392913341522217, "step": 750 }, { "epoch": 0.2, "learning_rate": 4.841170720873723e-06, "logits/chosen": -1.466218113899231, "logits/rejected": -0.2698180377483368, "logps/chosen": -699.693359375, "logps/rejected": -1555.100341796875, "loss": 0.2182, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13787570595741272, "rewards/margins": 0.24788126349449158, "rewards/rejected": -0.3857569694519043, "step": 760 }, { "epoch": 0.21, "learning_rate": 4.832907036453647e-06, "logits/chosen": -1.4457144737243652, "logits/rejected": -0.35442107915878296, "logps/chosen": -820.4182739257812, "logps/rejected": -1727.771484375, "loss": 0.1765, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12979528307914734, "rewards/margins": 0.2619260847568512, "rewards/rejected": -0.39172136783599854, "step": 770 }, { "epoch": 0.21, "learning_rate": 4.824441214720629e-06, "logits/chosen": -1.2430192232131958, "logits/rejected": -0.31560835242271423, "logps/chosen": -599.2308959960938, "logps/rejected": -1673.4691162109375, "loss": 0.1464, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11397616565227509, "rewards/margins": 0.341776579618454, "rewards/rejected": -0.45575276017189026, "step": 780 }, { "epoch": 0.21, "learning_rate": 4.815773989205165e-06, "logits/chosen": -1.5526068210601807, "logits/rejected": -0.6952439546585083, "logps/chosen": -760.5252685546875, "logps/rejected": -1961.7568359375, "loss": 0.1258, "rewards/accuracies": 0.875, "rewards/chosen": -0.16641755402088165, "rewards/margins": 0.44562679529190063, "rewards/rejected": -0.6120442748069763, "step": 790 }, { "epoch": 0.21, "learning_rate": 4.806906110888606e-06, "logits/chosen": -1.4445289373397827, "logits/rejected": -0.6352332234382629, "logps/chosen": -702.0392456054688, "logps/rejected": -1625.85302734375, "loss": 0.1976, "rewards/accuracies": 0.75, "rewards/chosen": -0.14837577939033508, "rewards/margins": 0.2570492923259735, "rewards/rejected": -0.4054250717163086, "step": 800 }, { "epoch": 0.22, "learning_rate": 4.7978383481380865e-06, "logits/chosen": -1.404865026473999, "logits/rejected": -0.4630287289619446, "logps/chosen": -645.7227172851562, "logps/rejected": -1415.0540771484375, "loss": 0.1912, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.13058145344257355, "rewards/margins": 0.18823085725307465, "rewards/rejected": -0.3188122808933258, "step": 810 }, { "epoch": 0.22, "learning_rate": 4.788571486639948e-06, "logits/chosen": -1.32173752784729, "logits/rejected": -0.5882034301757812, "logps/chosen": -841.7330932617188, "logps/rejected": -1966.875732421875, "loss": 0.1613, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14226606488227844, "rewards/margins": 0.33964481949806213, "rewards/rejected": -0.4819108545780182, "step": 820 }, { "epoch": 0.22, "learning_rate": 4.779106329331665e-06, "logits/chosen": -1.5085209608078003, "logits/rejected": -0.3688226044178009, "logps/chosen": -730.3353271484375, "logps/rejected": -1609.4810791015625, "loss": 0.1961, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14409688115119934, "rewards/margins": 0.268598735332489, "rewards/rejected": -0.41269564628601074, "step": 830 }, { "epoch": 0.22, "learning_rate": 4.769443696332272e-06, "logits/chosen": -1.30078125, "logits/rejected": -0.17256946861743927, "logps/chosen": -978.0304565429688, "logps/rejected": -2078.25830078125, "loss": 0.1996, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.25719624757766724, "rewards/margins": 0.36804524064064026, "rewards/rejected": -0.6252414584159851, "step": 840 }, { "epoch": 0.23, "learning_rate": 4.759584424871302e-06, "logits/chosen": -1.2128162384033203, "logits/rejected": -0.2545672059059143, "logps/chosen": -833.3894653320312, "logps/rejected": -1909.220703125, "loss": 0.1221, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23837868869304657, "rewards/margins": 0.341843843460083, "rewards/rejected": -0.580222487449646, "step": 850 }, { "epoch": 0.23, "learning_rate": 4.749529369216246e-06, "logits/chosen": -1.505275845527649, "logits/rejected": -0.8203606605529785, "logps/chosen": -819.6467895507812, "logps/rejected": -1751.1910400390625, "loss": 0.2127, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19994454085826874, "rewards/margins": 0.27609339356422424, "rewards/rejected": -0.47603797912597656, "step": 860 }, { "epoch": 0.23, "learning_rate": 4.7392794005985324e-06, "logits/chosen": -1.4275624752044678, "logits/rejected": -0.5869510769844055, "logps/chosen": -676.9829711914062, "logps/rejected": -1566.9033203125, "loss": 0.1924, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13645704090595245, "rewards/margins": 0.26502570509910583, "rewards/rejected": -0.4014827311038971, "step": 870 }, { "epoch": 0.23, "learning_rate": 4.7288354071380415e-06, "logits/chosen": -1.2436352968215942, "logits/rejected": 0.3669028580188751, "logps/chosen": -650.0794067382812, "logps/rejected": -1452.456298828125, "loss": 0.1965, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1296919882297516, "rewards/margins": 0.22590501606464386, "rewards/rejected": -0.35559698939323425, "step": 880 }, { "epoch": 0.24, "learning_rate": 4.7181982937661485e-06, "logits/chosen": -1.2913326025009155, "logits/rejected": -0.41281813383102417, "logps/chosen": -856.18896484375, "logps/rejected": -1820.381591796875, "loss": 0.1497, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22433075308799744, "rewards/margins": 0.2952847480773926, "rewards/rejected": -0.5196155309677124, "step": 890 }, { "epoch": 0.24, "learning_rate": 4.707368982147318e-06, "logits/chosen": -1.3868077993392944, "logits/rejected": -0.7847863435745239, "logps/chosen": -884.7540283203125, "logps/rejected": -1902.289306640625, "loss": 0.1453, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.24957780539989471, "rewards/margins": 0.3298245370388031, "rewards/rejected": -0.5794023275375366, "step": 900 }, { "epoch": 0.24, "learning_rate": 4.696348410599244e-06, "logits/chosen": -1.3086035251617432, "logits/rejected": -0.8314957618713379, "logps/chosen": -689.6898803710938, "logps/rejected": -1640.506103515625, "loss": 0.1847, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20904815196990967, "rewards/margins": 0.30142712593078613, "rewards/rejected": -0.5104752779006958, "step": 910 }, { "epoch": 0.25, "learning_rate": 4.685137534011549e-06, "logits/chosen": -1.565288782119751, "logits/rejected": -0.27172648906707764, "logps/chosen": -844.7750244140625, "logps/rejected": -1699.747314453125, "loss": 0.2055, "rewards/accuracies": 0.75, "rewards/chosen": -0.2513663172721863, "rewards/margins": 0.26970070600509644, "rewards/rejected": -0.5210670232772827, "step": 920 }, { "epoch": 0.25, "learning_rate": 4.673737323763048e-06, "logits/chosen": -1.2228829860687256, "logits/rejected": -0.5248723030090332, "logps/chosen": -664.1156616210938, "logps/rejected": -1562.0504150390625, "loss": 0.1669, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.162824347615242, "rewards/margins": 0.25564128160476685, "rewards/rejected": -0.41846561431884766, "step": 930 }, { "epoch": 0.25, "learning_rate": 4.662148767637578e-06, "logits/chosen": -1.2507654428482056, "logits/rejected": -0.5844524502754211, "logps/chosen": -839.5402221679688, "logps/rejected": -1746.392333984375, "loss": 0.2229, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18832561373710632, "rewards/margins": 0.2621099650859833, "rewards/rejected": -0.4504355788230896, "step": 940 }, { "epoch": 0.25, "learning_rate": 4.650372869738415e-06, "logits/chosen": -1.55803644657135, "logits/rejected": -0.7119165658950806, "logps/chosen": -736.268310546875, "logps/rejected": -1563.0191650390625, "loss": 0.2164, "rewards/accuracies": 0.75, "rewards/chosen": -0.10577349364757538, "rewards/margins": 0.2629134953022003, "rewards/rejected": -0.3686870038509369, "step": 950 }, { "epoch": 0.26, "learning_rate": 4.638410650401267e-06, "logits/chosen": -1.3386160135269165, "logits/rejected": -0.11453273147344589, "logps/chosen": -630.7725830078125, "logps/rejected": -1448.709228515625, "loss": 0.2449, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09674180299043655, "rewards/margins": 0.18708564341068268, "rewards/rejected": -0.28382742404937744, "step": 960 }, { "epoch": 0.26, "learning_rate": 4.626263146105875e-06, "logits/chosen": -1.6188023090362549, "logits/rejected": -0.530141294002533, "logps/chosen": -792.281005859375, "logps/rejected": -1616.726806640625, "loss": 0.2387, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16821546852588654, "rewards/margins": 0.20957235991954803, "rewards/rejected": -0.37778785824775696, "step": 970 }, { "epoch": 0.26, "learning_rate": 4.613931409386196e-06, "logits/chosen": -1.4189367294311523, "logits/rejected": -0.2773023247718811, "logps/chosen": -763.1712646484375, "logps/rejected": -1585.35791015625, "loss": 0.2073, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15055999159812927, "rewards/margins": 0.23061557114124298, "rewards/rejected": -0.38117554783821106, "step": 980 }, { "epoch": 0.26, "learning_rate": 4.601416508739211e-06, "logits/chosen": -1.4606117010116577, "logits/rejected": -0.6397336721420288, "logps/chosen": -748.8417358398438, "logps/rejected": -1670.744140625, "loss": 0.1761, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14542202651500702, "rewards/margins": 0.2664222717285156, "rewards/rejected": -0.4118443429470062, "step": 990 }, { "epoch": 0.27, "learning_rate": 4.588719528532342e-06, "logits/chosen": -1.324894666671753, "logits/rejected": -0.27193373441696167, "logps/chosen": -764.4343872070312, "logps/rejected": -1688.8870849609375, "loss": 0.2086, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13419455289840698, "rewards/margins": 0.2759809195995331, "rewards/rejected": -0.41017547249794006, "step": 1000 }, { "epoch": 0.27, "learning_rate": 4.575841568909494e-06, "logits/chosen": -1.6580641269683838, "logits/rejected": -0.3782033622264862, "logps/chosen": -732.3780517578125, "logps/rejected": -1696.1322021484375, "loss": 0.1872, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.10972636938095093, "rewards/margins": 0.29507070779800415, "rewards/rejected": -0.4047970771789551, "step": 1010 }, { "epoch": 0.27, "learning_rate": 4.562783745695738e-06, "logits/chosen": -1.503248691558838, "logits/rejected": 0.1558968424797058, "logps/chosen": -716.7626953125, "logps/rejected": -1444.2918701171875, "loss": 0.2014, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09443630278110504, "rewards/margins": 0.21701212227344513, "rewards/rejected": -0.31144842505455017, "step": 1020 }, { "epoch": 0.27, "learning_rate": 4.549547190300622e-06, "logits/chosen": -1.3067868947982788, "logits/rejected": -0.4560883641242981, "logps/chosen": -790.8909912109375, "logps/rejected": -1757.387451171875, "loss": 0.224, "rewards/accuracies": 0.75, "rewards/chosen": -0.1645580530166626, "rewards/margins": 0.325479656457901, "rewards/rejected": -0.490037739276886, "step": 1030 }, { "epoch": 0.28, "learning_rate": 4.536133049620143e-06, "logits/chosen": -1.140446662902832, "logits/rejected": -0.34088680148124695, "logps/chosen": -722.1959228515625, "logps/rejected": -1831.537353515625, "loss": 0.1268, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12402956187725067, "rewards/margins": 0.37322643399238586, "rewards/rejected": -0.49725598096847534, "step": 1040 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -1.475990891456604, "logits/rejected": -0.5005327463150024, "logps/chosen": -783.8773193359375, "logps/rejected": -1766.515869140625, "loss": 0.176, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15318922698497772, "rewards/margins": 0.31895914673805237, "rewards/rejected": -0.4721483290195465, "step": 1050 }, { "epoch": 0.28, "learning_rate": 4.508776676821739e-06, "logits/chosen": -1.4537649154663086, "logits/rejected": -0.6859838366508484, "logps/chosen": -839.9093627929688, "logps/rejected": -1558.96435546875, "loss": 0.1925, "rewards/accuracies": 0.75, "rewards/chosen": -0.18507587909698486, "rewards/margins": 0.21864044666290283, "rewards/rejected": -0.4037163257598877, "step": 1060 }, { "epoch": 0.29, "learning_rate": 4.494836815027022e-06, "logits/chosen": -1.3298923969268799, "logits/rejected": 0.0534161701798439, "logps/chosen": -754.2385864257812, "logps/rejected": -1705.55078125, "loss": 0.232, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1495281457901001, "rewards/margins": 0.2592945694923401, "rewards/rejected": -0.4088227152824402, "step": 1070 }, { "epoch": 0.29, "learning_rate": 4.4807241083879774e-06, "logits/chosen": -1.3468838930130005, "logits/rejected": -0.2606663107872009, "logps/chosen": -726.0997314453125, "logps/rejected": -1622.6500244140625, "loss": 0.203, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13949835300445557, "rewards/margins": 0.3188321590423584, "rewards/rejected": -0.45833054184913635, "step": 1080 }, { "epoch": 0.29, "learning_rate": 4.466439779715696e-06, "logits/chosen": -1.3387072086334229, "logits/rejected": -0.5179058313369751, "logps/chosen": -702.0155029296875, "logps/rejected": -1574.4173583984375, "loss": 0.1881, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08557260781526566, "rewards/margins": 0.29005369544029236, "rewards/rejected": -0.3756263256072998, "step": 1090 }, { "epoch": 0.29, "learning_rate": 4.451985066691649e-06, "logits/chosen": -1.3348770141601562, "logits/rejected": -0.6792385578155518, "logps/chosen": -666.2745361328125, "logps/rejected": -1547.283203125, "loss": 0.1755, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09519227594137192, "rewards/margins": 0.2866414785385132, "rewards/rejected": -0.3818337321281433, "step": 1100 }, { "epoch": 0.3, "learning_rate": 4.437361221760449e-06, "logits/chosen": -1.5723166465759277, "logits/rejected": -0.519290566444397, "logps/chosen": -685.1051025390625, "logps/rejected": -1876.2593994140625, "loss": 0.1298, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10364224761724472, "rewards/margins": 0.37660035490989685, "rewards/rejected": -0.48024263978004456, "step": 1110 }, { "epoch": 0.3, "learning_rate": 4.422569512021332e-06, "logits/chosen": -1.4255059957504272, "logits/rejected": 0.1319495439529419, "logps/chosen": -696.9476318359375, "logps/rejected": -1619.9892578125, "loss": 0.2105, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0946447104215622, "rewards/margins": 0.2710058093070984, "rewards/rejected": -0.3656505048274994, "step": 1120 }, { "epoch": 0.3, "learning_rate": 4.407611219118363e-06, "logits/chosen": -1.3705596923828125, "logits/rejected": -0.5301432609558105, "logps/chosen": -698.9526977539062, "logps/rejected": -1700.2493896484375, "loss": 0.164, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1371428668498993, "rewards/margins": 0.29292064905166626, "rewards/rejected": -0.43006348609924316, "step": 1130 }, { "epoch": 0.3, "learning_rate": 4.3924876391293915e-06, "logits/chosen": -1.3947621583938599, "logits/rejected": -0.8983534574508667, "logps/chosen": -651.66162109375, "logps/rejected": -1657.3372802734375, "loss": 0.1776, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11623096466064453, "rewards/margins": 0.2917022407054901, "rewards/rejected": -0.40793323516845703, "step": 1140 }, { "epoch": 0.31, "learning_rate": 4.377200082453748e-06, "logits/chosen": -1.3964405059814453, "logits/rejected": -0.6084726452827454, "logps/chosen": -692.3182983398438, "logps/rejected": -1706.6009521484375, "loss": 0.4044, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12358059734106064, "rewards/margins": 0.3641052842140198, "rewards/rejected": -0.4876858592033386, "step": 1150 }, { "epoch": 0.31, "learning_rate": 4.361749873698707e-06, "logits/chosen": -1.4753860235214233, "logits/rejected": -0.7048450708389282, "logps/chosen": -698.3816528320312, "logps/rejected": -1705.034423828125, "loss": 0.124, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08291604369878769, "rewards/margins": 0.2811528742313385, "rewards/rejected": -0.3640689253807068, "step": 1160 }, { "epoch": 0.31, "learning_rate": 4.346138351564711e-06, "logits/chosen": -1.4870970249176025, "logits/rejected": -0.9103308916091919, "logps/chosen": -671.7784423828125, "logps/rejected": -1529.8997802734375, "loss": 0.2008, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06387507170438766, "rewards/margins": 0.2635475695133209, "rewards/rejected": -0.3274226784706116, "step": 1170 }, { "epoch": 0.31, "learning_rate": 4.330366868729376e-06, "logits/chosen": -1.421744465827942, "logits/rejected": -0.6302933692932129, "logps/chosen": -724.4271240234375, "logps/rejected": -1651.8883056640625, "loss": 0.1881, "rewards/accuracies": 0.75, "rewards/chosen": -0.1005069762468338, "rewards/margins": 0.2800332307815552, "rewards/rejected": -0.3805401921272278, "step": 1180 }, { "epoch": 0.32, "learning_rate": 4.3144367917302964e-06, "logits/chosen": -1.408508062362671, "logits/rejected": -0.9906333088874817, "logps/chosen": -622.6488647460938, "logps/rejected": -1618.7296142578125, "loss": 0.1708, "rewards/accuracies": 0.875, "rewards/chosen": -0.08803695440292358, "rewards/margins": 0.27410316467285156, "rewards/rejected": -0.36214011907577515, "step": 1190 }, { "epoch": 0.32, "learning_rate": 4.2983495008466285e-06, "logits/chosen": -1.4352304935455322, "logits/rejected": -0.9279989004135132, "logps/chosen": -665.1091918945312, "logps/rejected": -1825.142333984375, "loss": 0.2047, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10474482923746109, "rewards/margins": 0.35141628980636597, "rewards/rejected": -0.45616111159324646, "step": 1200 }, { "epoch": 0.32, "learning_rate": 4.2821063899795015e-06, "logits/chosen": -1.563280463218689, "logits/rejected": -0.555804431438446, "logps/chosen": -646.5196533203125, "logps/rejected": -1623.5345458984375, "loss": 0.1522, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.055479682981967926, "rewards/margins": 0.325848251581192, "rewards/rejected": -0.38132789731025696, "step": 1210 }, { "epoch": 0.33, "learning_rate": 4.265708866531238e-06, "logits/chosen": -1.6181457042694092, "logits/rejected": -0.8855217695236206, "logps/chosen": -649.41455078125, "logps/rejected": -1402.5155029296875, "loss": 0.1827, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.051223646849393845, "rewards/margins": 0.2268896847963333, "rewards/rejected": -0.27811330556869507, "step": 1220 }, { "epoch": 0.33, "learning_rate": 4.249158351283414e-06, "logits/chosen": -1.4349555969238281, "logits/rejected": -0.6732171773910522, "logps/chosen": -621.8024291992188, "logps/rejected": -1621.55908203125, "loss": 0.1949, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06152881309390068, "rewards/margins": 0.23424020409584045, "rewards/rejected": -0.2957690358161926, "step": 1230 }, { "epoch": 0.33, "learning_rate": 4.232456278273743e-06, "logits/chosen": -1.477827548980713, "logits/rejected": -1.140575647354126, "logps/chosen": -703.6815185546875, "logps/rejected": -1337.854248046875, "loss": 0.2197, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10893256962299347, "rewards/margins": 0.16516511142253876, "rewards/rejected": -0.2740976810455322, "step": 1240 }, { "epoch": 0.33, "learning_rate": 4.215604094671835e-06, "logits/chosen": -1.6464779376983643, "logits/rejected": -0.990412712097168, "logps/chosen": -644.1171264648438, "logps/rejected": -1499.906005859375, "loss": 0.2661, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10046522319316864, "rewards/margins": 0.22357575595378876, "rewards/rejected": -0.3240409791469574, "step": 1250 }, { "epoch": 0.34, "learning_rate": 4.198603260653792e-06, "logits/chosen": -1.5124647617340088, "logits/rejected": -0.45678481459617615, "logps/chosen": -716.1838989257812, "logps/rejected": -1518.812255859375, "loss": 0.165, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05440264195203781, "rewards/margins": 0.2616801857948303, "rewards/rejected": -0.3160828649997711, "step": 1260 }, { "epoch": 0.34, "learning_rate": 4.181455249275701e-06, "logits/chosen": -1.2498810291290283, "logits/rejected": -0.8803867101669312, "logps/chosen": -659.9884643554688, "logps/rejected": -1640.7095947265625, "loss": 0.2331, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08121126145124435, "rewards/margins": 0.27540498971939087, "rewards/rejected": -0.35661619901657104, "step": 1270 }, { "epoch": 0.34, "learning_rate": 4.1641615463459926e-06, "logits/chosen": -1.5280659198760986, "logits/rejected": -0.723167359828949, "logps/chosen": -665.0728759765625, "logps/rejected": -1953.181884765625, "loss": 0.15, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.034947071224451065, "rewards/margins": 0.38297995924949646, "rewards/rejected": -0.417927086353302, "step": 1280 }, { "epoch": 0.34, "learning_rate": 4.146723650296701e-06, "logits/chosen": -1.5791549682617188, "logits/rejected": -0.7244309186935425, "logps/chosen": -609.816650390625, "logps/rejected": -1450.82080078125, "loss": 0.1426, "rewards/accuracies": 0.75, "rewards/chosen": -0.05617643520236015, "rewards/margins": 0.2224070131778717, "rewards/rejected": -0.2785834074020386, "step": 1290 }, { "epoch": 0.35, "learning_rate": 4.129143072053639e-06, "logits/chosen": -1.7151685953140259, "logits/rejected": -0.39171552658081055, "logps/chosen": -769.5672607421875, "logps/rejected": -1687.483154296875, "loss": 0.1444, "rewards/accuracies": 0.875, "rewards/chosen": -0.071159228682518, "rewards/margins": 0.29008185863494873, "rewards/rejected": -0.36124110221862793, "step": 1300 }, { "epoch": 0.35, "learning_rate": 4.111421334905468e-06, "logits/chosen": -1.49336838722229, "logits/rejected": -0.5438629984855652, "logps/chosen": -848.5822143554688, "logps/rejected": -1763.63671875, "loss": 0.2436, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.10583722591400146, "rewards/margins": 0.27904340624809265, "rewards/rejected": -0.38488060235977173, "step": 1310 }, { "epoch": 0.35, "learning_rate": 4.093559974371725e-06, "logits/chosen": -1.359708547592163, "logits/rejected": -0.6944249272346497, "logps/chosen": -919.8367919921875, "logps/rejected": -1900.1605224609375, "loss": 0.1501, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1508992612361908, "rewards/margins": 0.33068108558654785, "rewards/rejected": -0.48158034682273865, "step": 1320 }, { "epoch": 0.35, "learning_rate": 4.075560538069767e-06, "logits/chosen": -1.4843828678131104, "logits/rejected": -0.6843305826187134, "logps/chosen": -726.9734497070312, "logps/rejected": -1652.866943359375, "loss": 0.1679, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.08053059130907059, "rewards/margins": 0.38363346457481384, "rewards/rejected": -0.4641640782356262, "step": 1330 }, { "epoch": 0.36, "learning_rate": 4.05742458558068e-06, "logits/chosen": -1.5596168041229248, "logits/rejected": -0.7530814409255981, "logps/chosen": -646.64697265625, "logps/rejected": -1448.167724609375, "loss": 0.1954, "rewards/accuracies": 0.75, "rewards/chosen": -0.07448308169841766, "rewards/margins": 0.23775526881217957, "rewards/rejected": -0.3122383654117584, "step": 1340 }, { "epoch": 0.36, "learning_rate": 4.039153688314146e-06, "logits/chosen": -1.5747339725494385, "logits/rejected": -0.38576704263687134, "logps/chosen": -659.3181762695312, "logps/rejected": -1664.3912353515625, "loss": 0.198, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.04063820466399193, "rewards/margins": 0.32130542397499084, "rewards/rejected": -0.36194363236427307, "step": 1350 }, { "epoch": 0.36, "learning_rate": 4.020749429372286e-06, "logits/chosen": -1.5057035684585571, "logits/rejected": -0.5099014043807983, "logps/chosen": -759.4317626953125, "logps/rejected": -1881.2880859375, "loss": 0.1902, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.08269429206848145, "rewards/margins": 0.3579484820365906, "rewards/rejected": -0.440642774105072, "step": 1360 }, { "epoch": 0.37, "learning_rate": 4.002213403412492e-06, "logits/chosen": -1.440815806388855, "logits/rejected": -0.31881892681121826, "logps/chosen": -595.3434448242188, "logps/rejected": -1587.8076171875, "loss": 0.2562, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07667971402406693, "rewards/margins": 0.292102575302124, "rewards/rejected": -0.36878231167793274, "step": 1370 }, { "epoch": 0.37, "learning_rate": 3.983547216509254e-06, "logits/chosen": -1.7196152210235596, "logits/rejected": -0.5447965264320374, "logps/chosen": -607.3961181640625, "logps/rejected": -1578.577880859375, "loss": 0.1834, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.04431430250406265, "rewards/margins": 0.25382199883461, "rewards/rejected": -0.29813629388809204, "step": 1380 }, { "epoch": 0.37, "learning_rate": 3.964752486015001e-06, "logits/chosen": -1.5760372877120972, "logits/rejected": -0.7092293500900269, "logps/chosen": -662.7578125, "logps/rejected": -1628.713623046875, "loss": 0.1779, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06009943410754204, "rewards/margins": 0.26038116216659546, "rewards/rejected": -0.320480614900589, "step": 1390 }, { "epoch": 0.37, "learning_rate": 3.945830840419966e-06, "logits/chosen": -1.5873006582260132, "logits/rejected": -0.4512631893157959, "logps/chosen": -656.304931640625, "logps/rejected": -1755.419677734375, "loss": 0.1451, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.07909057289361954, "rewards/margins": 0.36252719163894653, "rewards/rejected": -0.44161778688430786, "step": 1400 }, { "epoch": 0.38, "learning_rate": 3.92678391921108e-06, "logits/chosen": -1.5479198694229126, "logits/rejected": -0.4876307547092438, "logps/chosen": -590.7452392578125, "logps/rejected": -1592.341064453125, "loss": 0.1647, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07574008405208588, "rewards/margins": 0.30887579917907715, "rewards/rejected": -0.38461586833000183, "step": 1410 }, { "epoch": 0.38, "learning_rate": 3.907613372729916e-06, "logits/chosen": -1.3529613018035889, "logits/rejected": -0.5033882260322571, "logps/chosen": -667.5601806640625, "logps/rejected": -1620.945556640625, "loss": 0.1869, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10878247022628784, "rewards/margins": 0.27404552698135376, "rewards/rejected": -0.3828279972076416, "step": 1420 }, { "epoch": 0.38, "learning_rate": 3.888320862029699e-06, "logits/chosen": -1.7743628025054932, "logits/rejected": -0.5587132573127747, "logps/chosen": -699.76318359375, "logps/rejected": -1693.8353271484375, "loss": 0.2028, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07925678789615631, "rewards/margins": 0.30291515588760376, "rewards/rejected": -0.38217195868492126, "step": 1430 }, { "epoch": 0.38, "learning_rate": 3.868908058731376e-06, "logits/chosen": -1.607465386390686, "logits/rejected": -0.8050976991653442, "logps/chosen": -772.9467163085938, "logps/rejected": -1667.0992431640625, "loss": 0.1693, "rewards/accuracies": 0.875, "rewards/chosen": -0.05015742778778076, "rewards/margins": 0.27269458770751953, "rewards/rejected": -0.32285207509994507, "step": 1440 }, { "epoch": 0.39, "learning_rate": 3.849376644878783e-06, "logits/chosen": -1.4109113216400146, "logits/rejected": -0.8917434811592102, "logps/chosen": -581.6261596679688, "logps/rejected": -1721.5927734375, "loss": 0.1552, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.02441004104912281, "rewards/margins": 0.3415711224079132, "rewards/rejected": -0.36598116159439087, "step": 1450 }, { "epoch": 0.39, "learning_rate": 3.829728312792895e-06, "logits/chosen": -1.7239364385604858, "logits/rejected": -0.6429109573364258, "logps/chosen": -607.516357421875, "logps/rejected": -1509.8909912109375, "loss": 0.2134, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.0776829943060875, "rewards/margins": 0.28966718912124634, "rewards/rejected": -0.36735019087791443, "step": 1460 }, { "epoch": 0.39, "learning_rate": 3.8099647649251984e-06, "logits/chosen": -1.3290464878082275, "logits/rejected": -0.5565693974494934, "logps/chosen": -806.9801025390625, "logps/rejected": -1813.9703369140625, "loss": 0.1661, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13428995013237, "rewards/margins": 0.3366580903530121, "rewards/rejected": -0.4709479808807373, "step": 1470 }, { "epoch": 0.39, "learning_rate": 3.790087713710179e-06, "logits/chosen": -1.167667031288147, "logits/rejected": -0.08988530933856964, "logps/chosen": -961.8670654296875, "logps/rejected": -1891.443115234375, "loss": 0.2308, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2256636619567871, "rewards/margins": 0.27781441807746887, "rewards/rejected": -0.5034780502319336, "step": 1480 }, { "epoch": 0.4, "learning_rate": 3.770098881416945e-06, "logits/chosen": -1.445077896118164, "logits/rejected": -0.7122173309326172, "logps/chosen": -851.2200927734375, "logps/rejected": -1791.288818359375, "loss": 0.194, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18502262234687805, "rewards/margins": 0.28681907057762146, "rewards/rejected": -0.4718416631221771, "step": 1490 }, { "epoch": 0.4, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.6007522344589233, "logits/rejected": -0.3576156497001648, "logps/chosen": -716.0091552734375, "logps/rejected": -1709.4010009765625, "loss": 0.1703, "rewards/accuracies": 0.875, "rewards/chosen": -0.12502431869506836, "rewards/margins": 0.3000204563140869, "rewards/rejected": -0.4250447750091553, "step": 1500 }, { "epoch": 0.4, "learning_rate": 3.7297928109491765e-06, "logits/chosen": -1.4100292921066284, "logits/rejected": -0.429446280002594, "logps/chosen": -616.4102172851562, "logps/rejected": -1615.930908203125, "loss": 0.1452, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10738323628902435, "rewards/margins": 0.33697018027305603, "rewards/rejected": -0.4443534016609192, "step": 1510 }, { "epoch": 0.41, "learning_rate": 3.7094790651387414e-06, "logits/chosen": -1.6581932306289673, "logits/rejected": -0.812160849571228, "logps/chosen": -681.979736328125, "logps/rejected": -1556.7125244140625, "loss": 0.1798, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13339491188526154, "rewards/margins": 0.25554531812667847, "rewards/rejected": -0.3889401853084564, "step": 1520 }, { "epoch": 0.41, "learning_rate": 3.689060522675689e-06, "logits/chosen": -1.450721025466919, "logits/rejected": -0.5784316062927246, "logps/chosen": -761.6971435546875, "logps/rejected": -1722.798583984375, "loss": 0.2113, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1309405416250229, "rewards/margins": 0.2867937386035919, "rewards/rejected": -0.417734295129776, "step": 1530 }, { "epoch": 0.41, "learning_rate": 3.668538952747236e-06, "logits/chosen": -1.659879446029663, "logits/rejected": -0.39928197860717773, "logps/chosen": -810.2103881835938, "logps/rejected": -1897.2626953125, "loss": 0.1285, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12557505071163177, "rewards/margins": 0.390308141708374, "rewards/rejected": -0.515883207321167, "step": 1540 }, { "epoch": 0.41, "learning_rate": 3.6479161334675294e-06, "logits/chosen": -1.4575773477554321, "logits/rejected": -0.07413512468338013, "logps/chosen": -820.2180786132812, "logps/rejected": -1623.4227294921875, "loss": 0.2012, "rewards/accuracies": 0.75, "rewards/chosen": -0.13560107350349426, "rewards/margins": 0.23566505312919617, "rewards/rejected": -0.37126606702804565, "step": 1550 }, { "epoch": 0.42, "learning_rate": 3.627193851723577e-06, "logits/chosen": -1.3455625772476196, "logits/rejected": -0.9724159240722656, "logps/chosen": -803.569091796875, "logps/rejected": -1693.400390625, "loss": 0.2043, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13027095794677734, "rewards/margins": 0.29757434129714966, "rewards/rejected": -0.4278453290462494, "step": 1560 }, { "epoch": 0.42, "learning_rate": 3.6063739030204226e-06, "logits/chosen": -1.5993826389312744, "logits/rejected": -0.9014598727226257, "logps/chosen": -670.8797607421875, "logps/rejected": -1565.011474609375, "loss": 0.1955, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13589544594287872, "rewards/margins": 0.2834901213645935, "rewards/rejected": -0.4193855822086334, "step": 1570 }, { "epoch": 0.42, "learning_rate": 3.5854580913255706e-06, "logits/chosen": -1.425690770149231, "logits/rejected": -0.18355034291744232, "logps/chosen": -754.1428833007812, "logps/rejected": -1659.813720703125, "loss": 0.2144, "rewards/accuracies": 0.75, "rewards/chosen": -0.17976424098014832, "rewards/margins": 0.26915082335472107, "rewards/rejected": -0.4489150941371918, "step": 1580 }, { "epoch": 0.42, "learning_rate": 3.564448228912682e-06, "logits/chosen": -1.5854262113571167, "logits/rejected": -0.6857632398605347, "logps/chosen": -745.6060791015625, "logps/rejected": -1763.920166015625, "loss": 0.1558, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12223930656909943, "rewards/margins": 0.31619927287101746, "rewards/rejected": -0.4384385943412781, "step": 1590 }, { "epoch": 0.43, "learning_rate": 3.543346136204545e-06, "logits/chosen": -1.2614418268203735, "logits/rejected": -0.3045172095298767, "logps/chosen": -896.9788818359375, "logps/rejected": -1856.7265625, "loss": 0.1537, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1813734471797943, "rewards/margins": 0.2908100187778473, "rewards/rejected": -0.4721834659576416, "step": 1600 }, { "epoch": 0.43, "learning_rate": 3.522153641615345e-06, "logits/chosen": -1.4344582557678223, "logits/rejected": -0.6459347009658813, "logps/chosen": -729.3267822265625, "logps/rejected": -1761.7333984375, "loss": 0.1404, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12001194804906845, "rewards/margins": 0.3599693179130554, "rewards/rejected": -0.47998133301734924, "step": 1610 }, { "epoch": 0.43, "learning_rate": 3.5008725813922383e-06, "logits/chosen": -1.4531586170196533, "logits/rejected": -0.5490698218345642, "logps/chosen": -856.87255859375, "logps/rejected": -1625.8597412109375, "loss": 0.1619, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.10974371433258057, "rewards/margins": 0.3060295283794403, "rewards/rejected": -0.4157732427120209, "step": 1620 }, { "epoch": 0.43, "learning_rate": 3.4795047994562463e-06, "logits/chosen": -1.4780563116073608, "logits/rejected": -0.4708196222782135, "logps/chosen": -758.4917602539062, "logps/rejected": -1597.0938720703125, "loss": 0.2097, "rewards/accuracies": 0.75, "rewards/chosen": -0.12127669155597687, "rewards/margins": 0.24855844676494598, "rewards/rejected": -0.36983510851860046, "step": 1630 }, { "epoch": 0.44, "learning_rate": 3.458052147242494e-06, "logits/chosen": -1.7074508666992188, "logits/rejected": -0.8495294451713562, "logps/chosen": -809.2350463867188, "logps/rejected": -1472.79443359375, "loss": 0.186, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10575731843709946, "rewards/margins": 0.18694370985031128, "rewards/rejected": -0.29270103573799133, "step": 1640 }, { "epoch": 0.44, "learning_rate": 3.436516483539781e-06, "logits/chosen": -1.6757071018218994, "logits/rejected": -0.4471355378627777, "logps/chosen": -738.4520874023438, "logps/rejected": -1533.0238037109375, "loss": 0.1949, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09687335044145584, "rewards/margins": 0.2056708037853241, "rewards/rejected": -0.30254414677619934, "step": 1650 }, { "epoch": 0.44, "learning_rate": 3.4148996743295305e-06, "logits/chosen": -1.704223394393921, "logits/rejected": -0.5917268991470337, "logps/chosen": -850.6807861328125, "logps/rejected": -1609.865478515625, "loss": 0.2066, "rewards/accuracies": 0.875, "rewards/chosen": -0.05562291666865349, "rewards/margins": 0.2252390831708908, "rewards/rejected": -0.2808619737625122, "step": 1660 }, { "epoch": 0.45, "learning_rate": 3.3932035926241103e-06, "logits/chosen": -1.5309293270111084, "logits/rejected": -1.2061899900436401, "logps/chosen": -625.7445068359375, "logps/rejected": -1734.443603515625, "loss": 0.1488, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05109323933720589, "rewards/margins": 0.34637781977653503, "rewards/rejected": -0.3974711000919342, "step": 1670 }, { "epoch": 0.45, "learning_rate": 3.3714301183045382e-06, "logits/chosen": -1.503655195236206, "logits/rejected": -0.6576262712478638, "logps/chosen": -691.9463500976562, "logps/rejected": -1632.8353271484375, "loss": 0.1711, "rewards/accuracies": 0.75, "rewards/chosen": -0.08215099573135376, "rewards/margins": 0.270992249250412, "rewards/rejected": -0.35314327478408813, "step": 1680 }, { "epoch": 0.45, "learning_rate": 3.349581137957604e-06, "logits/chosen": -1.7165721654891968, "logits/rejected": -0.6651977896690369, "logps/chosen": -709.5531005859375, "logps/rejected": -1576.0364990234375, "loss": 0.2304, "rewards/accuracies": 0.75, "rewards/chosen": -0.08810341358184814, "rewards/margins": 0.2739308476448059, "rewards/rejected": -0.36203423142433167, "step": 1690 }, { "epoch": 0.45, "learning_rate": 3.3276585447123957e-06, "logits/chosen": -1.232627034187317, "logits/rejected": -0.1004166379570961, "logps/chosen": -676.5740966796875, "logps/rejected": -1472.2635498046875, "loss": 0.2152, "rewards/accuracies": 0.75, "rewards/chosen": -0.10900652408599854, "rewards/margins": 0.22487536072731018, "rewards/rejected": -0.3338818848133087, "step": 1700 }, { "epoch": 0.46, "learning_rate": 3.3056642380762783e-06, "logits/chosen": -1.5267202854156494, "logits/rejected": -0.49788492918014526, "logps/chosen": -719.8569946289062, "logps/rejected": -1706.168212890625, "loss": 0.1489, "rewards/accuracies": 0.875, "rewards/chosen": -0.04703357070684433, "rewards/margins": 0.3542724847793579, "rewards/rejected": -0.40130606293678284, "step": 1710 }, { "epoch": 0.46, "learning_rate": 3.2836001237702993e-06, "logits/chosen": -1.6021124124526978, "logits/rejected": -0.44486141204833984, "logps/chosen": -737.7840576171875, "logps/rejected": -1778.464111328125, "loss": 0.1286, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.05765662342309952, "rewards/margins": 0.3132147192955017, "rewards/rejected": -0.370871365070343, "step": 1720 }, { "epoch": 0.46, "learning_rate": 3.2614681135640696e-06, "logits/chosen": -1.5471503734588623, "logits/rejected": -0.8976337313652039, "logps/chosen": -721.9874267578125, "logps/rejected": -1760.84765625, "loss": 0.161, "rewards/accuracies": 0.875, "rewards/chosen": -0.0736970454454422, "rewards/margins": 0.24696488678455353, "rewards/rejected": -0.3206619620323181, "step": 1730 }, { "epoch": 0.46, "learning_rate": 3.2392701251101172e-06, "logits/chosen": -1.4463790655136108, "logits/rejected": -0.8106206059455872, "logps/chosen": -668.1365356445312, "logps/rejected": -1573.209716796875, "loss": 0.141, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08170486986637115, "rewards/margins": 0.264334499835968, "rewards/rejected": -0.34603938460350037, "step": 1740 }, { "epoch": 0.47, "learning_rate": 3.217008081777726e-06, "logits/chosen": -1.5568472146987915, "logits/rejected": -0.6958287954330444, "logps/chosen": -861.89013671875, "logps/rejected": -1898.191162109375, "loss": 0.1875, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18643470108509064, "rewards/margins": 0.3724428713321686, "rewards/rejected": -0.5588775277137756, "step": 1750 }, { "epoch": 0.47, "learning_rate": 3.1946839124862873e-06, "logits/chosen": -1.314772367477417, "logits/rejected": -0.18543431162834167, "logps/chosen": -708.21923828125, "logps/rejected": -1773.858642578125, "loss": 0.1311, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12136778980493546, "rewards/margins": 0.3536931574344635, "rewards/rejected": -0.4750608801841736, "step": 1760 }, { "epoch": 0.47, "learning_rate": 3.1722995515381644e-06, "logits/chosen": -1.386833667755127, "logits/rejected": -0.44682741165161133, "logps/chosen": -727.7785034179688, "logps/rejected": -1677.187744140625, "loss": 0.2002, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.10554149001836777, "rewards/margins": 0.328396201133728, "rewards/rejected": -0.4339376389980316, "step": 1770 }, { "epoch": 0.47, "learning_rate": 3.149856938451094e-06, "logits/chosen": -1.6613432168960571, "logits/rejected": 0.04711759090423584, "logps/chosen": -814.40380859375, "logps/rejected": -1579.5726318359375, "loss": 0.1541, "rewards/accuracies": 0.875, "rewards/chosen": -0.12790891528129578, "rewards/margins": 0.259776771068573, "rewards/rejected": -0.387685626745224, "step": 1780 }, { "epoch": 0.48, "learning_rate": 3.127358017790132e-06, "logits/chosen": -1.352160096168518, "logits/rejected": 0.36388832330703735, "logps/chosen": -850.2373046875, "logps/rejected": -1793.983154296875, "loss": 0.1351, "rewards/accuracies": 0.875, "rewards/chosen": -0.15040907263755798, "rewards/margins": 0.26560917496681213, "rewards/rejected": -0.4160182476043701, "step": 1790 }, { "epoch": 0.48, "learning_rate": 3.1048047389991693e-06, "logits/chosen": -1.3986834287643433, "logits/rejected": -0.2482280433177948, "logps/chosen": -609.4581298828125, "logps/rejected": -1647.325439453125, "loss": 0.1148, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.05277906730771065, "rewards/margins": 0.2984154522418976, "rewards/rejected": -0.35119450092315674, "step": 1800 }, { "epoch": 0.48, "learning_rate": 3.082199056232015e-06, "logits/chosen": -1.453552007675171, "logits/rejected": -1.1569862365722656, "logps/chosen": -593.3402709960938, "logps/rejected": -1446.66943359375, "loss": 0.2248, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06173533946275711, "rewards/margins": 0.21113066375255585, "rewards/rejected": -0.27286598086357117, "step": 1810 }, { "epoch": 0.49, "learning_rate": 3.059542928183079e-06, "logits/chosen": -1.123428463935852, "logits/rejected": -0.1749972403049469, "logps/chosen": -752.8836669921875, "logps/rejected": -1819.65234375, "loss": 0.1707, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07862022519111633, "rewards/margins": 0.3714417815208435, "rewards/rejected": -0.4500620365142822, "step": 1820 }, { "epoch": 0.49, "learning_rate": 3.0368383179176584e-06, "logits/chosen": -1.3128252029418945, "logits/rejected": -0.5668991208076477, "logps/chosen": -749.9387817382812, "logps/rejected": -1737.239501953125, "loss": 0.1744, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1024591326713562, "rewards/margins": 0.33585435152053833, "rewards/rejected": -0.43831348419189453, "step": 1830 }, { "epoch": 0.49, "learning_rate": 3.0140871927018466e-06, "logits/chosen": -1.5394883155822754, "logits/rejected": -0.7085919380187988, "logps/chosen": -835.1018676757812, "logps/rejected": -1881.120361328125, "loss": 0.1541, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16260461509227753, "rewards/margins": 0.349585622549057, "rewards/rejected": -0.5121902227401733, "step": 1840 }, { "epoch": 0.49, "learning_rate": 2.9912915238320755e-06, "logits/chosen": -1.323406457901001, "logits/rejected": -0.5690110921859741, "logps/chosen": -649.0794677734375, "logps/rejected": -1694.8863525390625, "loss": 0.1603, "rewards/accuracies": 0.875, "rewards/chosen": -0.10500358045101166, "rewards/margins": 0.30655449628829956, "rewards/rejected": -0.41155806183815, "step": 1850 }, { "epoch": 0.5, "learning_rate": 2.9684532864643123e-06, "logits/chosen": -1.6879428625106812, "logits/rejected": -0.8558928370475769, "logps/chosen": -687.2265014648438, "logps/rejected": -1546.723876953125, "loss": 0.1733, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14953789114952087, "rewards/margins": 0.27107977867126465, "rewards/rejected": -0.4206176698207855, "step": 1860 }, { "epoch": 0.5, "learning_rate": 2.945574459442917e-06, "logits/chosen": -1.5647737979888916, "logits/rejected": -1.020084261894226, "logps/chosen": -731.6143798828125, "logps/rejected": -1668.8765869140625, "loss": 0.1614, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15018759667873383, "rewards/margins": 0.3018895983695984, "rewards/rejected": -0.4520772099494934, "step": 1870 }, { "epoch": 0.5, "learning_rate": 2.922657025129185e-06, "logits/chosen": -1.4925051927566528, "logits/rejected": -0.517613410949707, "logps/chosen": -759.2529296875, "logps/rejected": -1762.791748046875, "loss": 0.2552, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16193100810050964, "rewards/margins": 0.29394176602363586, "rewards/rejected": -0.4558727741241455, "step": 1880 }, { "epoch": 0.5, "learning_rate": 2.8997029692295875e-06, "logits/chosen": -1.5928837060928345, "logits/rejected": -0.7866376042366028, "logps/chosen": -774.6558837890625, "logps/rejected": -1785.3062744140625, "loss": 0.1509, "rewards/accuracies": 0.75, "rewards/chosen": -0.1210324764251709, "rewards/margins": 0.31033051013946533, "rewards/rejected": -0.43136295676231384, "step": 1890 }, { "epoch": 0.51, "learning_rate": 2.876714280623708e-06, "logits/chosen": -1.347715139389038, "logits/rejected": -0.3572938144207001, "logps/chosen": -739.8132934570312, "logps/rejected": -1673.948486328125, "loss": 0.1395, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0940694585442543, "rewards/margins": 0.3482546806335449, "rewards/rejected": -0.44232410192489624, "step": 1900 }, { "epoch": 0.51, "learning_rate": 2.8536929511919227e-06, "logits/chosen": -1.5348753929138184, "logits/rejected": -0.0369141586124897, "logps/chosen": -672.0413818359375, "logps/rejected": -1597.8165283203125, "loss": 0.1693, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09073442965745926, "rewards/margins": 0.2980082929134369, "rewards/rejected": -0.38874274492263794, "step": 1910 }, { "epoch": 0.51, "learning_rate": 2.8306409756428067e-06, "logits/chosen": -1.3153806924819946, "logits/rejected": -0.5097673535346985, "logps/chosen": -708.4708251953125, "logps/rejected": -1756.6849365234375, "loss": 0.1513, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13321314752101898, "rewards/margins": 0.31301358342170715, "rewards/rejected": -0.44622668623924255, "step": 1920 }, { "epoch": 0.51, "learning_rate": 2.807560351340302e-06, "logits/chosen": -1.4458258152008057, "logits/rejected": -0.42417654395103455, "logps/chosen": -671.51611328125, "logps/rejected": -1660.485107421875, "loss": 0.1559, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11455672979354858, "rewards/margins": 0.3709821105003357, "rewards/rejected": -0.48553887009620667, "step": 1930 }, { "epoch": 0.52, "learning_rate": 2.7844530781306544e-06, "logits/chosen": -1.3651716709136963, "logits/rejected": -0.4336569905281067, "logps/chosen": -840.4568481445312, "logps/rejected": -2070.00146484375, "loss": 0.1597, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19497820734977722, "rewards/margins": 0.396168977022171, "rewards/rejected": -0.5911471247673035, "step": 1940 }, { "epoch": 0.52, "learning_rate": 2.761321158169134e-06, "logits/chosen": -1.573979377746582, "logits/rejected": -0.7406612038612366, "logps/chosen": -825.8062744140625, "logps/rejected": -1895.0921630859375, "loss": 0.1527, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.156173974275589, "rewards/margins": 0.32018476724624634, "rewards/rejected": -0.4763587415218353, "step": 1950 }, { "epoch": 0.52, "learning_rate": 2.738166595746554e-06, "logits/chosen": -1.6485137939453125, "logits/rejected": -0.31914329528808594, "logps/chosen": -834.5794677734375, "logps/rejected": -1904.170654296875, "loss": 0.1606, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.15797320008277893, "rewards/margins": 0.32314831018447876, "rewards/rejected": -0.4811214506626129, "step": 1960 }, { "epoch": 0.53, "learning_rate": 2.7149913971156105e-06, "logits/chosen": -1.5849123001098633, "logits/rejected": -0.718601644039154, "logps/chosen": -758.3035278320312, "logps/rejected": -1674.5123291015625, "loss": 0.1629, "rewards/accuracies": 0.75, "rewards/chosen": -0.15117041766643524, "rewards/margins": 0.3155497610569, "rewards/rejected": -0.4667201042175293, "step": 1970 }, { "epoch": 0.53, "learning_rate": 2.6917975703170466e-06, "logits/chosen": -1.2325398921966553, "logits/rejected": -0.8877838850021362, "logps/chosen": -891.3978271484375, "logps/rejected": -1764.385498046875, "loss": 0.149, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2210262268781662, "rewards/margins": 0.3151123523712158, "rewards/rejected": -0.536138653755188, "step": 1980 }, { "epoch": 0.53, "learning_rate": 2.668587125005663e-06, "logits/chosen": -1.5326184034347534, "logits/rejected": -0.6966463923454285, "logps/chosen": -733.0716552734375, "logps/rejected": -1570.1944580078125, "loss": 0.1168, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14229288697242737, "rewards/margins": 0.30633196234703064, "rewards/rejected": -0.44862478971481323, "step": 1990 }, { "epoch": 0.53, "learning_rate": 2.6453620722761897e-06, "logits/chosen": -1.5661652088165283, "logits/rejected": -0.7556155920028687, "logps/chosen": -820.5909423828125, "logps/rejected": -1868.76171875, "loss": 0.1276, "rewards/accuracies": 0.875, "rewards/chosen": -0.21517057716846466, "rewards/margins": 0.35130250453948975, "rewards/rejected": -0.566473126411438, "step": 2000 }, { "epoch": 0.54, "learning_rate": 2.6221244244890336e-06, "logits/chosen": -1.37876558303833, "logits/rejected": -0.1346241682767868, "logps/chosen": -989.1995239257812, "logps/rejected": -1853.1685791015625, "loss": 0.1738, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2625274062156677, "rewards/margins": 0.27383849024772644, "rewards/rejected": -0.5363659858703613, "step": 2010 }, { "epoch": 0.54, "learning_rate": 2.5988761950959133e-06, "logits/chosen": -1.4494779109954834, "logits/rejected": -0.5709508061408997, "logps/chosen": -801.075927734375, "logps/rejected": -1952.412841796875, "loss": 0.1589, "rewards/accuracies": 0.875, "rewards/chosen": -0.18583470582962036, "rewards/margins": 0.39585351943969727, "rewards/rejected": -0.5816881656646729, "step": 2020 }, { "epoch": 0.54, "learning_rate": 2.575619398465402e-06, "logits/chosen": -1.5121077299118042, "logits/rejected": -0.7577626705169678, "logps/chosen": -630.1747436523438, "logps/rejected": -1437.3203125, "loss": 0.2045, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12697990238666534, "rewards/margins": 0.22890718281269073, "rewards/rejected": -0.35588711500167847, "step": 2030 }, { "epoch": 0.54, "learning_rate": 2.5523560497083927e-06, "logits/chosen": -1.165709137916565, "logits/rejected": -0.6048153638839722, "logps/chosen": -719.2890625, "logps/rejected": -1616.2135009765625, "loss": 0.1598, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11988051235675812, "rewards/margins": 0.26138466596603394, "rewards/rejected": -0.38126516342163086, "step": 2040 }, { "epoch": 0.55, "learning_rate": 2.5290881645034932e-06, "logits/chosen": -1.6587855815887451, "logits/rejected": -0.6381639838218689, "logps/chosen": -765.1217041015625, "logps/rejected": -1829.5943603515625, "loss": 0.1613, "rewards/accuracies": 0.875, "rewards/chosen": -0.16699892282485962, "rewards/margins": 0.37708956003189087, "rewards/rejected": -0.5440884828567505, "step": 2050 }, { "epoch": 0.55, "learning_rate": 2.5058177589223766e-06, "logits/chosen": -1.379034399986267, "logits/rejected": -0.6342862248420715, "logps/chosen": -677.946533203125, "logps/rejected": -1782.3040771484375, "loss": 0.1529, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12249922752380371, "rewards/margins": 0.374000608921051, "rewards/rejected": -0.49649983644485474, "step": 2060 }, { "epoch": 0.55, "learning_rate": 2.482546849255096e-06, "logits/chosen": -1.2877978086471558, "logits/rejected": -0.628576397895813, "logps/chosen": -799.4703369140625, "logps/rejected": -1640.319580078125, "loss": 0.1346, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15292036533355713, "rewards/margins": 0.2804059386253357, "rewards/rejected": -0.4333263039588928, "step": 2070 }, { "epoch": 0.55, "learning_rate": 2.4592774518353858e-06, "logits/chosen": -1.4938738346099854, "logits/rejected": -0.6422006487846375, "logps/chosen": -752.1285400390625, "logps/rejected": -1773.2620849609375, "loss": 0.1651, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16756217181682587, "rewards/margins": 0.3254481554031372, "rewards/rejected": -0.49301037192344666, "step": 2080 }, { "epoch": 0.56, "learning_rate": 2.436011582865945e-06, "logits/chosen": -1.5365828275680542, "logits/rejected": -0.6787657141685486, "logps/chosen": -893.0496215820312, "logps/rejected": -1954.225341796875, "loss": 0.1604, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2155316323041916, "rewards/margins": 0.3428398668766022, "rewards/rejected": -0.5583714842796326, "step": 2090 }, { "epoch": 0.56, "learning_rate": 2.4127512582437486e-06, "logits/chosen": -1.1174455881118774, "logits/rejected": 0.26067572832107544, "logps/chosen": -777.9120483398438, "logps/rejected": -1707.302001953125, "loss": 0.1845, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17711251974105835, "rewards/margins": 0.2716800570487976, "rewards/rejected": -0.44879254698753357, "step": 2100 }, { "epoch": 0.56, "learning_rate": 2.3894984933853734e-06, "logits/chosen": -1.5128055810928345, "logits/rejected": -0.5743480920791626, "logps/chosen": -856.6569213867188, "logps/rejected": -1764.4976806640625, "loss": 0.1695, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1948443353176117, "rewards/margins": 0.25961554050445557, "rewards/rejected": -0.4544598460197449, "step": 2110 }, { "epoch": 0.57, "learning_rate": 2.366255303052377e-06, "logits/chosen": -1.5328229665756226, "logits/rejected": -0.41585612297058105, "logps/chosen": -865.1114501953125, "logps/rejected": -1857.373046875, "loss": 0.1726, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16477537155151367, "rewards/margins": 0.2972187399864197, "rewards/rejected": -0.46199408173561096, "step": 2120 }, { "epoch": 0.57, "learning_rate": 2.3430237011767166e-06, "logits/chosen": -1.5353227853775024, "logits/rejected": 0.3234299123287201, "logps/chosen": -791.5120239257812, "logps/rejected": -2002.2496337890625, "loss": 0.1074, "rewards/accuracies": 1.0, "rewards/chosen": -0.15296730399131775, "rewards/margins": 0.41708940267562866, "rewards/rejected": -0.5700567960739136, "step": 2130 }, { "epoch": 0.57, "learning_rate": 2.319805700686257e-06, "logits/chosen": -1.2240071296691895, "logits/rejected": -0.7209154963493347, "logps/chosen": -790.4075927734375, "logps/rejected": -1767.944580078125, "loss": 0.1377, "rewards/accuracies": 0.875, "rewards/chosen": -0.12179949134588242, "rewards/margins": 0.326397180557251, "rewards/rejected": -0.4481966495513916, "step": 2140 }, { "epoch": 0.57, "learning_rate": 2.296603313330355e-06, "logits/chosen": -1.6259534358978271, "logits/rejected": -0.29775649309158325, "logps/chosen": -746.3842163085938, "logps/rejected": -1520.571533203125, "loss": 0.1711, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13973134756088257, "rewards/margins": 0.26066604256629944, "rewards/rejected": -0.400397390127182, "step": 2150 }, { "epoch": 0.58, "learning_rate": 2.2734185495055503e-06, "logits/chosen": -1.2232530117034912, "logits/rejected": -0.18962112069129944, "logps/chosen": -576.0113525390625, "logps/rejected": -1693.240966796875, "loss": 0.1384, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13496878743171692, "rewards/margins": 0.3316168785095215, "rewards/rejected": -0.466585636138916, "step": 2160 }, { "epoch": 0.58, "learning_rate": 2.250253418081373e-06, "logits/chosen": -1.4692437648773193, "logits/rejected": 0.04818035289645195, "logps/chosen": -899.2703857421875, "logps/rejected": -1808.1484375, "loss": 0.2009, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21512703597545624, "rewards/margins": 0.30080264806747437, "rewards/rejected": -0.5159296989440918, "step": 2170 }, { "epoch": 0.58, "learning_rate": 2.22710992622628e-06, "logits/chosen": -1.5507208108901978, "logits/rejected": 0.1027313843369484, "logps/chosen": -805.1966552734375, "logps/rejected": -1811.1793212890625, "loss": 0.1454, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13587817549705505, "rewards/margins": 0.39650699496269226, "rewards/rejected": -0.5323852300643921, "step": 2180 }, { "epoch": 0.58, "learning_rate": 2.2039900792337477e-06, "logits/chosen": -1.395817756652832, "logits/rejected": -0.45912352204322815, "logps/chosen": -768.6142578125, "logps/rejected": -1828.7073974609375, "loss": 0.1529, "rewards/accuracies": 0.875, "rewards/chosen": -0.1312076300382614, "rewards/margins": 0.3623288571834564, "rewards/rejected": -0.4935365319252014, "step": 2190 }, { "epoch": 0.59, "learning_rate": 2.1808958803485134e-06, "logits/chosen": -1.5901384353637695, "logits/rejected": -0.8347817659378052, "logps/chosen": -523.8443603515625, "logps/rejected": -1578.4923095703125, "loss": 0.1123, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07345361262559891, "rewards/margins": 0.34433066844940186, "rewards/rejected": -0.41778427362442017, "step": 2200 }, { "epoch": 0.59, "learning_rate": 2.157829330593008e-06, "logits/chosen": -1.5194613933563232, "logits/rejected": 0.044505536556243896, "logps/chosen": -846.5181884765625, "logps/rejected": -1842.278564453125, "loss": 0.1109, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18009065091609955, "rewards/margins": 0.37484800815582275, "rewards/rejected": -0.5549386739730835, "step": 2210 }, { "epoch": 0.59, "learning_rate": 2.134792428593971e-06, "logits/chosen": -1.517464280128479, "logits/rejected": -0.3259011209011078, "logps/chosen": -696.5281982421875, "logps/rejected": -1474.2158203125, "loss": 0.1964, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13613803684711456, "rewards/margins": 0.25182804465293884, "rewards/rejected": -0.3879660964012146, "step": 2220 }, { "epoch": 0.59, "learning_rate": 2.1117871704092818e-06, "logits/chosen": -1.5486299991607666, "logits/rejected": -0.5334731936454773, "logps/chosen": -767.1392822265625, "logps/rejected": -1842.3082275390625, "loss": 0.121, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1175505518913269, "rewards/margins": 0.3775568902492523, "rewards/rejected": -0.4951074719429016, "step": 2230 }, { "epoch": 0.6, "learning_rate": 2.0888155493550027e-06, "logits/chosen": -1.5350594520568848, "logits/rejected": -0.2820148468017578, "logps/chosen": -626.4788208007812, "logps/rejected": -1641.033447265625, "loss": 0.1776, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07961525768041611, "rewards/margins": 0.3609825670719147, "rewards/rejected": -0.4405978322029114, "step": 2240 }, { "epoch": 0.6, "learning_rate": 2.0658795558326745e-06, "logits/chosen": -1.458655595779419, "logits/rejected": -0.2539186477661133, "logps/chosen": -764.8986206054688, "logps/rejected": -1802.0277099609375, "loss": 0.1842, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.09169100970029831, "rewards/margins": 0.3511391580104828, "rewards/rejected": -0.4428301751613617, "step": 2250 }, { "epoch": 0.6, "learning_rate": 2.0429811771568468e-06, "logits/chosen": -1.5172240734100342, "logits/rejected": -0.4895601272583008, "logps/chosen": -801.1700439453125, "logps/rejected": -1634.53955078125, "loss": 0.165, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1120598167181015, "rewards/margins": 0.27336305379867554, "rewards/rejected": -0.38542285561561584, "step": 2260 }, { "epoch": 0.61, "learning_rate": 2.0201223973828917e-06, "logits/chosen": -1.627150535583496, "logits/rejected": -0.6142350435256958, "logps/chosen": -660.062744140625, "logps/rejected": -1752.922119140625, "loss": 0.1921, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.08810608088970184, "rewards/margins": 0.36186718940734863, "rewards/rejected": -0.44997328519821167, "step": 2270 }, { "epoch": 0.61, "learning_rate": 1.997305197135089e-06, "logits/chosen": -1.5025413036346436, "logits/rejected": -0.59123295545578, "logps/chosen": -817.2398681640625, "logps/rejected": -1722.5006103515625, "loss": 0.1627, "rewards/accuracies": 0.75, "rewards/chosen": -0.10656937211751938, "rewards/margins": 0.2904852032661438, "rewards/rejected": -0.3970545828342438, "step": 2280 }, { "epoch": 0.61, "learning_rate": 1.9745315534350157e-06, "logits/chosen": -1.5288734436035156, "logits/rejected": -0.7481527328491211, "logps/chosen": -627.6370849609375, "logps/rejected": -1676.7398681640625, "loss": 0.1255, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09700825065374374, "rewards/margins": 0.31658482551574707, "rewards/rejected": -0.4135931134223938, "step": 2290 }, { "epoch": 0.61, "learning_rate": 1.9518034395302413e-06, "logits/chosen": -1.2082513570785522, "logits/rejected": -0.5326002836227417, "logps/chosen": -606.5856323242188, "logps/rejected": -1724.590087890625, "loss": 0.1711, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08526596426963806, "rewards/margins": 0.3748168349266052, "rewards/rejected": -0.4600828289985657, "step": 2300 }, { "epoch": 0.62, "learning_rate": 1.9291228247233607e-06, "logits/chosen": -1.349005103111267, "logits/rejected": -0.7115123867988586, "logps/chosen": -850.7883911132812, "logps/rejected": -1918.0609130859375, "loss": 0.1049, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1696614772081375, "rewards/margins": 0.3336087763309479, "rewards/rejected": -0.5032702684402466, "step": 2310 }, { "epoch": 0.62, "learning_rate": 1.9064916742013515e-06, "logits/chosen": -1.4064449071884155, "logits/rejected": -0.41218453645706177, "logps/chosen": -922.0281372070312, "logps/rejected": -1926.2939453125, "loss": 0.1868, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20819124579429626, "rewards/margins": 0.350273996591568, "rewards/rejected": -0.558465301990509, "step": 2320 }, { "epoch": 0.62, "learning_rate": 1.883911948865306e-06, "logits/chosen": -1.4412527084350586, "logits/rejected": -0.3628607392311096, "logps/chosen": -686.2496948242188, "logps/rejected": -1755.6363525390625, "loss": 0.1658, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1295745074748993, "rewards/margins": 0.372491717338562, "rewards/rejected": -0.5020662546157837, "step": 2330 }, { "epoch": 0.62, "learning_rate": 1.8613856051605242e-06, "logits/chosen": -1.6775572299957275, "logits/rejected": -0.6549306511878967, "logps/chosen": -597.9432373046875, "logps/rejected": -1609.9539794921875, "loss": 0.137, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.10794492810964584, "rewards/margins": 0.3688591420650482, "rewards/rejected": -0.47680407762527466, "step": 2340 }, { "epoch": 0.63, "learning_rate": 1.8389145949069953e-06, "logits/chosen": -1.382621169090271, "logits/rejected": -0.4452172815799713, "logps/chosen": -683.6560668945312, "logps/rejected": -1507.298095703125, "loss": 0.2337, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.110257588326931, "rewards/margins": 0.25430378317832947, "rewards/rejected": -0.36456140875816345, "step": 2350 }, { "epoch": 0.63, "learning_rate": 1.816500865130279e-06, "logits/chosen": -1.467057228088379, "logits/rejected": -0.31321266293525696, "logps/chosen": -672.0931396484375, "logps/rejected": -1508.775634765625, "loss": 0.1942, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09393011033535004, "rewards/margins": 0.24138036370277405, "rewards/rejected": -0.33531051874160767, "step": 2360 }, { "epoch": 0.63, "learning_rate": 1.7941463578928088e-06, "logits/chosen": -1.6495239734649658, "logits/rejected": -0.6112552285194397, "logps/chosen": -657.7040405273438, "logps/rejected": -1789.280029296875, "loss": 0.149, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07512945681810379, "rewards/margins": 0.32172054052352905, "rewards/rejected": -0.39685001969337463, "step": 2370 }, { "epoch": 0.63, "learning_rate": 1.7718530101256115e-06, "logits/chosen": -1.4681954383850098, "logits/rejected": -0.752936840057373, "logps/chosen": -775.773193359375, "logps/rejected": -1576.1943359375, "loss": 0.2381, "rewards/accuracies": 0.75, "rewards/chosen": -0.12784716486930847, "rewards/margins": 0.20212575793266296, "rewards/rejected": -0.32997292280197144, "step": 2380 }, { "epoch": 0.64, "learning_rate": 1.7496227534604859e-06, "logits/chosen": -1.6714906692504883, "logits/rejected": -1.0251556634902954, "logps/chosen": -675.6273803710938, "logps/rejected": -1630.176025390625, "loss": 0.1692, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.0969114676117897, "rewards/margins": 0.361731618642807, "rewards/rejected": -0.4586430490016937, "step": 2390 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -1.5120388269424438, "logits/rejected": -0.5438052415847778, "logps/chosen": -692.0360717773438, "logps/rejected": -1721.4326171875, "loss": 0.1631, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11012852191925049, "rewards/margins": 0.3708108365535736, "rewards/rejected": -0.4809393882751465, "step": 2400 }, { "epoch": 0.64, "learning_rate": 1.7053592124637557e-06, "logits/chosen": -1.6181414127349854, "logits/rejected": -0.6608393788337708, "logps/chosen": -747.2515869140625, "logps/rejected": -1643.6500244140625, "loss": 0.2218, "rewards/accuracies": 0.75, "rewards/chosen": -0.13988645374774933, "rewards/margins": 0.2650088965892792, "rewards/rejected": -0.4048953652381897, "step": 2410 }, { "epoch": 0.65, "learning_rate": 1.6833297633956647e-06, "logits/chosen": -1.3472005128860474, "logits/rejected": 0.12191818654537201, "logps/chosen": -693.1132202148438, "logps/rejected": -1696.821533203125, "loss": 0.1063, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07493006438016891, "rewards/margins": 0.32017913460731506, "rewards/rejected": -0.3951091766357422, "step": 2420 }, { "epoch": 0.65, "learning_rate": 1.661371075624363e-06, "logits/chosen": -1.4592828750610352, "logits/rejected": -0.45236214995384216, "logps/chosen": -696.2838745117188, "logps/rejected": -1729.255126953125, "loss": 0.1603, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.061267025768756866, "rewards/margins": 0.35353952646255493, "rewards/rejected": -0.4148065447807312, "step": 2430 }, { "epoch": 0.65, "learning_rate": 1.6394850517846621e-06, "logits/chosen": -1.6494789123535156, "logits/rejected": -0.9741545915603638, "logps/chosen": -759.4490966796875, "logps/rejected": -1714.441162109375, "loss": 0.1556, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10241154581308365, "rewards/margins": 0.3450758159160614, "rewards/rejected": -0.44748735427856445, "step": 2440 }, { "epoch": 0.65, "learning_rate": 1.6176735882153284e-06, "logits/chosen": -1.468384027481079, "logits/rejected": -0.16317103803157806, "logps/chosen": -605.1905517578125, "logps/rejected": -1460.53955078125, "loss": 0.1345, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0676204189658165, "rewards/margins": 0.30662956833839417, "rewards/rejected": -0.37424999475479126, "step": 2450 }, { "epoch": 0.66, "learning_rate": 1.5959385747947697e-06, "logits/chosen": -1.4594337940216064, "logits/rejected": -0.8179009556770325, "logps/chosen": -606.8065185546875, "logps/rejected": -1657.3125, "loss": 0.1104, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06961175799369812, "rewards/margins": 0.3652622103691101, "rewards/rejected": -0.4348739683628082, "step": 2460 }, { "epoch": 0.66, "learning_rate": 1.5742818947772875e-06, "logits/chosen": -1.5698840618133545, "logits/rejected": 0.06299029290676117, "logps/chosen": -768.6961669921875, "logps/rejected": -1631.1295166015625, "loss": 0.137, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09274804592132568, "rewards/margins": 0.27431541681289673, "rewards/rejected": -0.3670634627342224, "step": 2470 }, { "epoch": 0.66, "learning_rate": 1.552705424629898e-06, "logits/chosen": -1.4378259181976318, "logits/rejected": -0.6328508257865906, "logps/chosen": -757.0548095703125, "logps/rejected": -1625.8509521484375, "loss": 0.1496, "rewards/accuracies": 0.875, "rewards/chosen": -0.09750431030988693, "rewards/margins": 0.25974932312965393, "rewards/rejected": -0.35725364089012146, "step": 2480 }, { "epoch": 0.66, "learning_rate": 1.5312110338697427e-06, "logits/chosen": -1.429025411605835, "logits/rejected": -1.0502439737319946, "logps/chosen": -620.97998046875, "logps/rejected": -1421.07470703125, "loss": 0.1467, "rewards/accuracies": 0.75, "rewards/chosen": -0.0735950618982315, "rewards/margins": 0.2558991312980652, "rewards/rejected": -0.3294941782951355, "step": 2490 }, { "epoch": 0.67, "learning_rate": 1.509800584902108e-06, "logits/chosen": -1.2865421772003174, "logits/rejected": -0.21516099572181702, "logps/chosen": -712.9671630859375, "logps/rejected": -1577.719970703125, "loss": 0.1254, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08029041439294815, "rewards/margins": 0.2842417359352112, "rewards/rejected": -0.36453211307525635, "step": 2500 }, { "epoch": 0.67, "learning_rate": 1.4884759328590476e-06, "logits/chosen": -1.7966272830963135, "logits/rejected": -0.5356184244155884, "logps/chosen": -774.8032836914062, "logps/rejected": -1728.052490234375, "loss": 0.1569, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13011132180690765, "rewards/margins": 0.38214725255966187, "rewards/rejected": -0.5122585892677307, "step": 2510 }, { "epoch": 0.67, "learning_rate": 1.467238925438646e-06, "logits/chosen": -1.6186519861221313, "logits/rejected": 0.41130122542381287, "logps/chosen": -872.7478637695312, "logps/rejected": -1909.5640869140625, "loss": 0.148, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1565207540988922, "rewards/margins": 0.3737161159515381, "rewards/rejected": -0.5302368402481079, "step": 2520 }, { "epoch": 0.67, "learning_rate": 1.446091402744923e-06, "logits/chosen": -1.2679738998413086, "logits/rejected": -0.5303937196731567, "logps/chosen": -702.9267578125, "logps/rejected": -1642.1829833984375, "loss": 0.1461, "rewards/accuracies": 0.875, "rewards/chosen": -0.1064765453338623, "rewards/margins": 0.34039077162742615, "rewards/rejected": -0.44686728715896606, "step": 2530 }, { "epoch": 0.68, "learning_rate": 1.4250351971283937e-06, "logits/chosen": -1.604107141494751, "logits/rejected": 0.4407750070095062, "logps/chosen": -770.2025146484375, "logps/rejected": -1616.8951416015625, "loss": 0.1425, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11423502117395401, "rewards/margins": 0.29226523637771606, "rewards/rejected": -0.4065002501010895, "step": 2540 }, { "epoch": 0.68, "learning_rate": 1.4040721330273063e-06, "logits/chosen": -1.4690310955047607, "logits/rejected": 0.40654927492141724, "logps/chosen": -752.0418090820312, "logps/rejected": -1654.997802734375, "loss": 0.1918, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11529115587472916, "rewards/margins": 0.2776246964931488, "rewards/rejected": -0.392915815114975, "step": 2550 }, { "epoch": 0.68, "learning_rate": 1.3832040268095589e-06, "logits/chosen": -1.2964470386505127, "logits/rejected": -0.35982269048690796, "logps/chosen": -707.73681640625, "logps/rejected": -1798.7490234375, "loss": 0.1272, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.09786094725131989, "rewards/margins": 0.3779224455356598, "rewards/rejected": -0.4757833480834961, "step": 2560 }, { "epoch": 0.69, "learning_rate": 1.362432686615316e-06, "logits/chosen": -1.4789488315582275, "logits/rejected": -0.4336570203304291, "logps/chosen": -560.9461669921875, "logps/rejected": -1716.0234375, "loss": 0.164, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.058935634791851044, "rewards/margins": 0.3738183081150055, "rewards/rejected": -0.43275389075279236, "step": 2570 }, { "epoch": 0.69, "learning_rate": 1.3417599122003464e-06, "logits/chosen": -1.5111268758773804, "logits/rejected": -0.40306025743484497, "logps/chosen": -659.0839233398438, "logps/rejected": -1699.853515625, "loss": 0.1041, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.08692941069602966, "rewards/margins": 0.35781151056289673, "rewards/rejected": -0.4447408616542816, "step": 2580 }, { "epoch": 0.69, "learning_rate": 1.3211874947800747e-06, "logits/chosen": -1.552757978439331, "logits/rejected": -0.733113169670105, "logps/chosen": -668.4918212890625, "logps/rejected": -1597.004150390625, "loss": 0.1547, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.09527282416820526, "rewards/margins": 0.2590247094631195, "rewards/rejected": -0.35429757833480835, "step": 2590 }, { "epoch": 0.69, "learning_rate": 1.3007172168743854e-06, "logits/chosen": -1.4352308511734009, "logits/rejected": -0.04908560588955879, "logps/chosen": -665.5040893554688, "logps/rejected": -1593.777587890625, "loss": 0.1445, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.08846768736839294, "rewards/margins": 0.32587721943855286, "rewards/rejected": -0.4143448770046234, "step": 2600 }, { "epoch": 0.7, "learning_rate": 1.280350852153168e-06, "logits/chosen": -1.3806612491607666, "logits/rejected": -0.20799100399017334, "logps/chosen": -762.9417724609375, "logps/rejected": -1701.4697265625, "loss": 0.148, "rewards/accuracies": 0.875, "rewards/chosen": -0.15475930273532867, "rewards/margins": 0.29663509130477905, "rewards/rejected": -0.45139437913894653, "step": 2610 }, { "epoch": 0.7, "learning_rate": 1.260090165282645e-06, "logits/chosen": -1.4364516735076904, "logits/rejected": 0.2683382034301758, "logps/chosen": -701.4534912109375, "logps/rejected": -1679.4847412109375, "loss": 0.1688, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19048824906349182, "rewards/margins": 0.27079007029533386, "rewards/rejected": -0.46127834916114807, "step": 2620 }, { "epoch": 0.7, "learning_rate": 1.2399369117724582e-06, "logits/chosen": -1.4400079250335693, "logits/rejected": -0.4409395158290863, "logps/chosen": -797.1912841796875, "logps/rejected": -1693.439453125, "loss": 0.1558, "rewards/accuracies": 0.875, "rewards/chosen": -0.12387750297784805, "rewards/margins": 0.3022800087928772, "rewards/rejected": -0.42615753412246704, "step": 2630 }, { "epoch": 0.7, "learning_rate": 1.2198928378235717e-06, "logits/chosen": -1.5424561500549316, "logits/rejected": 0.6223300695419312, "logps/chosen": -694.9400024414062, "logps/rejected": -1691.6669921875, "loss": 0.1657, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0836716815829277, "rewards/margins": 0.3352142870426178, "rewards/rejected": -0.4188859462738037, "step": 2640 }, { "epoch": 0.71, "learning_rate": 1.1999596801769617e-06, "logits/chosen": -1.6569328308105469, "logits/rejected": -0.4357103407382965, "logps/chosen": -693.4281005859375, "logps/rejected": -1608.7647705078125, "loss": 0.1473, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.08369255810976028, "rewards/margins": 0.2921767234802246, "rewards/rejected": -0.3758693337440491, "step": 2650 }, { "epoch": 0.71, "learning_rate": 1.1801391659631423e-06, "logits/chosen": -1.5653746128082275, "logits/rejected": 0.7312607169151306, "logps/chosen": -660.3027954101562, "logps/rejected": -1588.2919921875, "loss": 0.1214, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06633373349905014, "rewards/margins": 0.30183035135269165, "rewards/rejected": -0.3681640923023224, "step": 2660 }, { "epoch": 0.71, "learning_rate": 1.160433012552508e-06, "logits/chosen": -1.5090980529785156, "logits/rejected": -0.8780859112739563, "logps/chosen": -695.6319580078125, "logps/rejected": -1500.2325439453125, "loss": 0.1882, "rewards/accuracies": 0.75, "rewards/chosen": -0.07338380068540573, "rewards/margins": 0.2535490393638611, "rewards/rejected": -0.326932817697525, "step": 2670 }, { "epoch": 0.71, "learning_rate": 1.1408429274065418e-06, "logits/chosen": -1.3488575220108032, "logits/rejected": -0.6483387351036072, "logps/chosen": -572.9019165039062, "logps/rejected": -1599.5225830078125, "loss": 0.1575, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07061342149972916, "rewards/margins": 0.2944713532924652, "rewards/rejected": -0.36508476734161377, "step": 2680 }, { "epoch": 0.72, "learning_rate": 1.1213706079298566e-06, "logits/chosen": -1.4909251928329468, "logits/rejected": -0.27080297470092773, "logps/chosen": -564.2769165039062, "logps/rejected": -1579.8958740234375, "loss": 0.1631, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.053073953837156296, "rewards/margins": 0.32997792959213257, "rewards/rejected": -0.38305193185806274, "step": 2690 }, { "epoch": 0.72, "learning_rate": 1.1020177413231334e-06, "logits/chosen": -1.5161458253860474, "logits/rejected": -0.2539205551147461, "logps/chosen": -698.4984741210938, "logps/rejected": -1621.921630859375, "loss": 0.1693, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.09174446761608124, "rewards/margins": 0.29953649640083313, "rewards/rejected": -0.3912809491157532, "step": 2700 }, { "epoch": 0.72, "learning_rate": 1.0827860044369226e-06, "logits/chosen": -1.5986204147338867, "logits/rejected": -0.2638501226902008, "logps/chosen": -766.909423828125, "logps/rejected": -1732.082275390625, "loss": 0.1602, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12383983284235, "rewards/margins": 0.32002753019332886, "rewards/rejected": -0.44386744499206543, "step": 2710 }, { "epoch": 0.73, "learning_rate": 1.06367706362636e-06, "logits/chosen": -1.4342091083526611, "logits/rejected": -0.8792727589607239, "logps/chosen": -737.6398315429688, "logps/rejected": -1701.187255859375, "loss": 0.1727, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1248563751578331, "rewards/margins": 0.2940226197242737, "rewards/rejected": -0.41887903213500977, "step": 2720 }, { "epoch": 0.73, "learning_rate": 1.0446925746067768e-06, "logits/chosen": -1.3923488855361938, "logits/rejected": -0.20048478245735168, "logps/chosen": -803.1317138671875, "logps/rejected": -1872.0126953125, "loss": 0.1227, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1411004215478897, "rewards/margins": 0.36530831456184387, "rewards/rejected": -0.5064087510108948, "step": 2730 }, { "epoch": 0.73, "learning_rate": 1.0258341823102418e-06, "logits/chosen": -1.5749719142913818, "logits/rejected": -0.40622204542160034, "logps/chosen": -757.8853759765625, "logps/rejected": -1855.6015625, "loss": 0.1441, "rewards/accuracies": 0.875, "rewards/chosen": -0.1515471190214157, "rewards/margins": 0.4066466689109802, "rewards/rejected": -0.5581938624382019, "step": 2740 }, { "epoch": 0.73, "learning_rate": 1.0071035207430352e-06, "logits/chosen": -1.6754239797592163, "logits/rejected": -0.716029167175293, "logps/chosen": -787.7699584960938, "logps/rejected": -1843.915283203125, "loss": 0.1577, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1490515172481537, "rewards/margins": 0.3542669713497162, "rewards/rejected": -0.5033184885978699, "step": 2750 }, { "epoch": 0.74, "learning_rate": 9.88502212844063e-07, "logits/chosen": -1.3399386405944824, "logits/rejected": -0.6510659456253052, "logps/chosen": -645.7489013671875, "logps/rejected": -1645.0814208984375, "loss": 0.1528, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12475794553756714, "rewards/margins": 0.3248399794101715, "rewards/rejected": -0.44959789514541626, "step": 2760 }, { "epoch": 0.74, "learning_rate": 9.700318703442437e-07, "logits/chosen": -1.5086935758590698, "logits/rejected": -0.44206541776657104, "logps/chosen": -751.8470458984375, "logps/rejected": -1835.744873046875, "loss": 0.1461, "rewards/accuracies": 0.875, "rewards/chosen": -0.0970626249909401, "rewards/margins": 0.4153032898902893, "rewards/rejected": -0.5123659372329712, "step": 2770 }, { "epoch": 0.74, "learning_rate": 9.516940936268504e-07, "logits/chosen": -1.4146547317504883, "logits/rejected": -0.39967912435531616, "logps/chosen": -655.8258666992188, "logps/rejected": -1597.619873046875, "loss": 0.1199, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10331164300441742, "rewards/margins": 0.29826727509498596, "rewards/rejected": -0.4015789031982422, "step": 2780 }, { "epoch": 0.74, "learning_rate": 9.334904715888496e-07, "logits/chosen": -1.5796334743499756, "logits/rejected": -0.18904080986976624, "logps/chosen": -649.2426147460938, "logps/rejected": -1707.6396484375, "loss": 0.1682, "rewards/accuracies": 0.875, "rewards/chosen": -0.08159293234348297, "rewards/margins": 0.3529675602912903, "rewards/rejected": -0.43456047773361206, "step": 2790 }, { "epoch": 0.75, "learning_rate": 9.154225815032242e-07, "logits/chosen": -1.5274991989135742, "logits/rejected": -0.7836966514587402, "logps/chosen": -610.8500366210938, "logps/rejected": -1656.6763916015625, "loss": 0.1472, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06673729419708252, "rewards/margins": 0.36455440521240234, "rewards/rejected": -0.4312916696071625, "step": 2800 }, { "epoch": 0.75, "learning_rate": 8.974919888823164e-07, "logits/chosen": -1.4849748611450195, "logits/rejected": -0.4282095432281494, "logps/chosen": -743.9561767578125, "logps/rejected": -1814.4990234375, "loss": 0.1699, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09201905876398087, "rewards/margins": 0.36516329646110535, "rewards/rejected": -0.4571823179721832, "step": 2810 }, { "epoch": 0.75, "learning_rate": 8.797002473421729e-07, "logits/chosen": -1.4566679000854492, "logits/rejected": -0.5152000188827515, "logps/chosen": -523.7813720703125, "logps/rejected": -1502.1715087890625, "loss": 0.1961, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.058007530868053436, "rewards/margins": 0.3089088201522827, "rewards/rejected": -0.36691635847091675, "step": 2820 }, { "epoch": 0.75, "learning_rate": 8.620488984679378e-07, "logits/chosen": -1.7398220300674438, "logits/rejected": -0.2614799737930298, "logps/chosen": -641.33447265625, "logps/rejected": -1628.4310302734375, "loss": 0.1416, "rewards/accuracies": 0.875, "rewards/chosen": -0.05575539544224739, "rewards/margins": 0.34235674142837524, "rewards/rejected": -0.39811214804649353, "step": 2830 }, { "epoch": 0.76, "learning_rate": 8.445394716802754e-07, "logits/chosen": -1.5343586206436157, "logits/rejected": -0.6991978883743286, "logps/chosen": -695.736328125, "logps/rejected": -1697.3258056640625, "loss": 0.17, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0863880068063736, "rewards/margins": 0.3482551872730255, "rewards/rejected": -0.4346431791782379, "step": 2840 }, { "epoch": 0.76, "learning_rate": 8.271734841028553e-07, "logits/chosen": -1.3757431507110596, "logits/rejected": -0.4847453236579895, "logps/chosen": -624.9798583984375, "logps/rejected": -1600.3311767578125, "loss": 0.1704, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07361427694559097, "rewards/margins": 0.3176981210708618, "rewards/rejected": -0.3913124203681946, "step": 2850 }, { "epoch": 0.76, "learning_rate": 8.099524404308948e-07, "logits/chosen": -1.463273525238037, "logits/rejected": -0.44319063425064087, "logps/chosen": -733.7213745117188, "logps/rejected": -1909.9351806640625, "loss": 0.1014, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1007709726691246, "rewards/margins": 0.38669538497924805, "rewards/rejected": -0.487466424703598, "step": 2860 }, { "epoch": 0.77, "learning_rate": 7.928778328007918e-07, "logits/chosen": -1.7150824069976807, "logits/rejected": -0.10497765243053436, "logps/chosen": -676.3504028320312, "logps/rejected": -1732.474609375, "loss": 0.1293, "rewards/accuracies": 0.875, "rewards/chosen": -0.07949225604534149, "rewards/margins": 0.3401327431201935, "rewards/rejected": -0.4196249842643738, "step": 2870 }, { "epoch": 0.77, "learning_rate": 7.759511406608255e-07, "logits/chosen": -1.4279053211212158, "logits/rejected": -0.10710684955120087, "logps/chosen": -716.6738891601562, "logps/rejected": -1643.4683837890625, "loss": 0.1605, "rewards/accuracies": 0.875, "rewards/chosen": -0.09047757089138031, "rewards/margins": 0.2613658308982849, "rewards/rejected": -0.35184338688850403, "step": 2880 }, { "epoch": 0.77, "learning_rate": 7.591738306429769e-07, "logits/chosen": -1.4157854318618774, "logits/rejected": -0.362983763217926, "logps/chosen": -813.336181640625, "logps/rejected": -1738.5064697265625, "loss": 0.1146, "rewards/accuracies": 0.875, "rewards/chosen": -0.08977197110652924, "rewards/margins": 0.333584189414978, "rewards/rejected": -0.42335623502731323, "step": 2890 }, { "epoch": 0.77, "learning_rate": 7.425473564358457e-07, "logits/chosen": -1.2446939945220947, "logits/rejected": -0.24595150351524353, "logps/chosen": -870.3673706054688, "logps/rejected": -1808.233642578125, "loss": 0.2744, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14074157178401947, "rewards/margins": 0.33063870668411255, "rewards/rejected": -0.4713803231716156, "step": 2900 }, { "epoch": 0.78, "learning_rate": 7.260731586586983e-07, "logits/chosen": -1.6176745891571045, "logits/rejected": -0.5913767218589783, "logps/chosen": -506.3935546875, "logps/rejected": -1553.77099609375, "loss": 0.1245, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.04700837656855583, "rewards/margins": 0.33918124437332153, "rewards/rejected": -0.38618963956832886, "step": 2910 }, { "epoch": 0.78, "learning_rate": 7.097526647366379e-07, "logits/chosen": -1.7674744129180908, "logits/rejected": -0.6992005705833435, "logps/chosen": -651.4013061523438, "logps/rejected": -1570.8125, "loss": 0.1434, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.058572918176651, "rewards/margins": 0.3027827739715576, "rewards/rejected": -0.3613556921482086, "step": 2920 }, { "epoch": 0.78, "learning_rate": 6.935872887769299e-07, "logits/chosen": -1.6404222249984741, "logits/rejected": -0.85943204164505, "logps/chosen": -782.1417846679688, "logps/rejected": -1594.085205078125, "loss": 0.2041, "rewards/accuracies": 0.75, "rewards/chosen": -0.0960140973329544, "rewards/margins": 0.2894643545150757, "rewards/rejected": -0.3854784369468689, "step": 2930 }, { "epoch": 0.78, "learning_rate": 6.775784314464717e-07, "logits/chosen": -1.4033766984939575, "logits/rejected": -0.9920269846916199, "logps/chosen": -629.6160888671875, "logps/rejected": -1653.112548828125, "loss": 0.1683, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07426479458808899, "rewards/margins": 0.29479536414146423, "rewards/rejected": -0.36906009912490845, "step": 2940 }, { "epoch": 0.79, "learning_rate": 6.617274798504286e-07, "logits/chosen": -1.6468874216079712, "logits/rejected": -0.37529969215393066, "logps/chosen": -776.8147583007812, "logps/rejected": -1830.4251708984375, "loss": 0.1382, "rewards/accuracies": 0.875, "rewards/chosen": -0.07489114999771118, "rewards/margins": 0.3607185184955597, "rewards/rejected": -0.4356096684932709, "step": 2950 }, { "epoch": 0.79, "learning_rate": 6.460358074120518e-07, "logits/chosen": -1.5551801919937134, "logits/rejected": -0.9461766481399536, "logps/chosen": -714.2229614257812, "logps/rejected": -1528.100341796875, "loss": 0.1998, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06713708490133286, "rewards/margins": 0.25901907682418823, "rewards/rejected": -0.3261561691761017, "step": 2960 }, { "epoch": 0.79, "learning_rate": 6.305047737536707e-07, "logits/chosen": -1.5757367610931396, "logits/rejected": -1.0922707319259644, "logps/chosen": -647.8770751953125, "logps/rejected": -1738.0703125, "loss": 0.1411, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06151123717427254, "rewards/margins": 0.37969422340393066, "rewards/rejected": -0.4412055015563965, "step": 2970 }, { "epoch": 0.79, "learning_rate": 6.151357245788917e-07, "logits/chosen": -1.52052903175354, "logits/rejected": -0.5594236850738525, "logps/chosen": -708.1317138671875, "logps/rejected": -1907.6968994140625, "loss": 0.1327, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.08830620348453522, "rewards/margins": 0.3933480679988861, "rewards/rejected": -0.4816543161869049, "step": 2980 }, { "epoch": 0.8, "learning_rate": 5.999299915559956e-07, "logits/chosen": -1.422783374786377, "logits/rejected": -0.7664706110954285, "logps/chosen": -648.51171875, "logps/rejected": -1761.1148681640625, "loss": 0.1385, "rewards/accuracies": 0.875, "rewards/chosen": -0.07798387855291367, "rewards/margins": 0.35578176379203796, "rewards/rejected": -0.4337656497955322, "step": 2990 }, { "epoch": 0.8, "learning_rate": 5.848888922025553e-07, "logits/chosen": -1.4832850694656372, "logits/rejected": -0.704194962978363, "logps/chosen": -635.0929565429688, "logps/rejected": -1439.96630859375, "loss": 0.2039, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09270008653402328, "rewards/margins": 0.2593896985054016, "rewards/rejected": -0.3520897328853607, "step": 3000 }, { "epoch": 0.8, "learning_rate": 5.700137297712749e-07, "logits/chosen": -1.560736894607544, "logits/rejected": -0.25968214869499207, "logps/chosen": -725.5360107421875, "logps/rejected": -1755.325927734375, "loss": 0.1363, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.08439795672893524, "rewards/margins": 0.36337295174598694, "rewards/rejected": -0.4477709233760834, "step": 3010 }, { "epoch": 0.81, "learning_rate": 5.553057931370729e-07, "logits/chosen": -1.666855812072754, "logits/rejected": -0.44792652130126953, "logps/chosen": -635.80419921875, "logps/rejected": -1666.586669921875, "loss": 0.1712, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07980332523584366, "rewards/margins": 0.3341625928878784, "rewards/rejected": -0.41396594047546387, "step": 3020 }, { "epoch": 0.81, "learning_rate": 5.407663566854008e-07, "logits/chosen": -1.5879957675933838, "logits/rejected": -0.9095015525817871, "logps/chosen": -787.1989135742188, "logps/rejected": -1740.4361572265625, "loss": 0.1449, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.08927465230226517, "rewards/margins": 0.3183870315551758, "rewards/rejected": -0.4076617360115051, "step": 3030 }, { "epoch": 0.81, "learning_rate": 5.263966802018275e-07, "logits/chosen": -1.6660913228988647, "logits/rejected": -0.7353159785270691, "logps/chosen": -805.4395751953125, "logps/rejected": -1653.5638427734375, "loss": 0.1664, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08214981853961945, "rewards/margins": 0.31050822138786316, "rewards/rejected": -0.3926580250263214, "step": 3040 }, { "epoch": 0.81, "learning_rate": 5.121980087628802e-07, "logits/chosen": -1.63559091091156, "logits/rejected": -0.620925784111023, "logps/chosen": -642.1688232421875, "logps/rejected": -1510.009521484375, "loss": 0.186, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.04546310380101204, "rewards/margins": 0.30619844794273376, "rewards/rejected": -0.3516615927219391, "step": 3050 }, { "epoch": 0.82, "learning_rate": 4.981715726281666e-07, "logits/chosen": -1.423666000366211, "logits/rejected": -0.12835507094860077, "logps/chosen": -631.965576171875, "logps/rejected": -1562.4158935546875, "loss": 0.2021, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06688202172517776, "rewards/margins": 0.2849853038787842, "rewards/rejected": -0.35186734795570374, "step": 3060 }, { "epoch": 0.82, "learning_rate": 4.843185871337722e-07, "logits/chosen": -1.3471550941467285, "logits/rejected": -0.5594549179077148, "logps/chosen": -620.7034301757812, "logps/rejected": -1736.4898681640625, "loss": 0.1361, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.05083579942584038, "rewards/margins": 0.33074623346328735, "rewards/rejected": -0.3815820515155792, "step": 3070 }, { "epoch": 0.82, "learning_rate": 4.706402525869633e-07, "logits/chosen": -1.2812891006469727, "logits/rejected": -0.4547126293182373, "logps/chosen": -562.1158447265625, "logps/rejected": -1769.2896728515625, "loss": 0.1329, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.04002733901143074, "rewards/margins": 0.383873850107193, "rewards/rejected": -0.423901230096817, "step": 3080 }, { "epoch": 0.82, "learning_rate": 4.5713775416217884e-07, "logits/chosen": -1.571263313293457, "logits/rejected": -0.40705451369285583, "logps/chosen": -605.5236206054688, "logps/rejected": -1484.4984130859375, "loss": 0.1846, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05240412801504135, "rewards/margins": 0.2970747649669647, "rewards/rejected": -0.34947890043258667, "step": 3090 }, { "epoch": 0.83, "learning_rate": 4.438122617983442e-07, "logits/chosen": -1.3903917074203491, "logits/rejected": -0.20577654242515564, "logps/chosen": -603.0784301757812, "logps/rejected": -1667.8463134765625, "loss": 0.1127, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.0214361734688282, "rewards/margins": 0.37630894780158997, "rewards/rejected": -0.3977451026439667, "step": 3100 }, { "epoch": 0.83, "learning_rate": 4.3066493009749853e-07, "logits/chosen": -1.6927878856658936, "logits/rejected": -0.6562541723251343, "logps/chosen": -784.9993896484375, "logps/rejected": -1762.9515380859375, "loss": 0.19, "rewards/accuracies": 0.875, "rewards/chosen": -0.07724924385547638, "rewards/margins": 0.29403752088546753, "rewards/rejected": -0.3712867796421051, "step": 3110 }, { "epoch": 0.83, "learning_rate": 4.1769689822475147e-07, "logits/chosen": -1.457176923751831, "logits/rejected": -0.6649435758590698, "logps/chosen": -731.3934326171875, "logps/rejected": -1780.1842041015625, "loss": 0.1694, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06437239050865173, "rewards/margins": 0.33346226811408997, "rewards/rejected": -0.3978345990180969, "step": 3120 }, { "epoch": 0.83, "learning_rate": 4.049092898095816e-07, "logits/chosen": -1.2814868688583374, "logits/rejected": -0.6186938285827637, "logps/chosen": -650.4574584960938, "logps/rejected": -1637.374755859375, "loss": 0.1541, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06206426024436951, "rewards/margins": 0.26436498761177063, "rewards/rejected": -0.32642924785614014, "step": 3130 }, { "epoch": 0.84, "learning_rate": 3.9230321284847856e-07, "logits/chosen": -1.5192755460739136, "logits/rejected": -0.237101748585701, "logps/chosen": -517.3258666992188, "logps/rejected": -1354.155517578125, "loss": 0.1789, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.014683857560157776, "rewards/margins": 0.2717076539993286, "rewards/rejected": -0.2863914966583252, "step": 3140 }, { "epoch": 0.84, "learning_rate": 3.798797596089351e-07, "logits/chosen": -1.514344573020935, "logits/rejected": -1.0580737590789795, "logps/chosen": -651.7825927734375, "logps/rejected": -1709.284423828125, "loss": 0.1427, "rewards/accuracies": 0.875, "rewards/chosen": -0.06703333556652069, "rewards/margins": 0.3800427317619324, "rewards/rejected": -0.4470759928226471, "step": 3150 }, { "epoch": 0.84, "learning_rate": 3.6764000653481263e-07, "logits/chosen": -1.5731451511383057, "logits/rejected": -0.5786491632461548, "logps/chosen": -680.1246337890625, "logps/rejected": -1564.331787109375, "loss": 0.1837, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08038660138845444, "rewards/margins": 0.2984033226966858, "rewards/rejected": -0.3787899315357208, "step": 3160 }, { "epoch": 0.85, "learning_rate": 3.555850141530659e-07, "logits/chosen": -1.620054006576538, "logits/rejected": -0.6548138856887817, "logps/chosen": -725.5187377929688, "logps/rejected": -1559.7091064453125, "loss": 0.1401, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.058062393218278885, "rewards/margins": 0.3103691041469574, "rewards/rejected": -0.3684315085411072, "step": 3170 }, { "epoch": 0.85, "learning_rate": 3.4371582698185636e-07, "logits/chosen": -1.4922726154327393, "logits/rejected": -0.5721961259841919, "logps/chosen": -574.6033935546875, "logps/rejected": -1700.106689453125, "loss": 0.1561, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.055240534245967865, "rewards/margins": 0.36047983169555664, "rewards/rejected": -0.4157203733921051, "step": 3180 }, { "epoch": 0.85, "learning_rate": 3.3203347344004737e-07, "logits/chosen": -1.260347604751587, "logits/rejected": -0.1747826784849167, "logps/chosen": -695.7683715820312, "logps/rejected": -1630.91064453125, "loss": 0.1731, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13098278641700745, "rewards/margins": 0.2832750380039215, "rewards/rejected": -0.41425782442092896, "step": 3190 }, { "epoch": 0.85, "learning_rate": 3.2053896575809426e-07, "logits/chosen": -1.39348566532135, "logits/rejected": -0.686271607875824, "logps/chosen": -608.1319580078125, "logps/rejected": -1700.7486572265625, "loss": 0.0992, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.0731215551495552, "rewards/margins": 0.36609500646591187, "rewards/rejected": -0.4392165243625641, "step": 3200 }, { "epoch": 0.86, "learning_rate": 3.092332998903416e-07, "logits/chosen": -1.3719862699508667, "logits/rejected": -0.6818580031394958, "logps/chosen": -606.0661010742188, "logps/rejected": -1553.185791015625, "loss": 0.1497, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06806117296218872, "rewards/margins": 0.30099108815193176, "rewards/rejected": -0.3690522313117981, "step": 3210 }, { "epoch": 0.86, "learning_rate": 2.981174554287239e-07, "logits/chosen": -1.3921059370040894, "logits/rejected": -0.7532171010971069, "logps/chosen": -703.4324951171875, "logps/rejected": -1927.8841552734375, "loss": 0.1379, "rewards/accuracies": 0.875, "rewards/chosen": -0.07026232779026031, "rewards/margins": 0.38420066237449646, "rewards/rejected": -0.4544629156589508, "step": 3220 }, { "epoch": 0.86, "learning_rate": 2.871923955178918e-07, "logits/chosen": -1.2359908819198608, "logits/rejected": -0.5525861978530884, "logps/chosen": -692.30419921875, "logps/rejected": -1788.237060546875, "loss": 0.1208, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.08661970496177673, "rewards/margins": 0.37772631645202637, "rewards/rejected": -0.4643460810184479, "step": 3230 }, { "epoch": 0.86, "learning_rate": 2.764590667717562e-07, "logits/chosen": -1.4797961711883545, "logits/rejected": -0.3416746258735657, "logps/chosen": -617.2152099609375, "logps/rejected": -1625.399658203125, "loss": 0.1826, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0840892642736435, "rewards/margins": 0.2994067072868347, "rewards/rejected": -0.3834959864616394, "step": 3240 }, { "epoch": 0.87, "learning_rate": 2.6591839919146963e-07, "logits/chosen": -1.503493070602417, "logits/rejected": 0.3321295380592346, "logps/chosen": -688.2984619140625, "logps/rejected": -1707.145263671875, "loss": 0.1299, "rewards/accuracies": 0.875, "rewards/chosen": -0.08260496705770493, "rewards/margins": 0.3498184084892273, "rewards/rejected": -0.43242329359054565, "step": 3250 }, { "epoch": 0.87, "learning_rate": 2.555713060848433e-07, "logits/chosen": -1.3645676374435425, "logits/rejected": -0.04500112682580948, "logps/chosen": -643.5718994140625, "logps/rejected": -1641.6656494140625, "loss": 0.1557, "rewards/accuracies": 0.875, "rewards/chosen": -0.0737595409154892, "rewards/margins": 0.3242550790309906, "rewards/rejected": -0.3980146050453186, "step": 3260 }, { "epoch": 0.87, "learning_rate": 2.454186839872158e-07, "logits/chosen": -1.6310592889785767, "logits/rejected": -0.6514387130737305, "logps/chosen": -804.0472412109375, "logps/rejected": -1859.6383056640625, "loss": 0.1372, "rewards/accuracies": 0.875, "rewards/chosen": -0.12043702602386475, "rewards/margins": 0.3875194489955902, "rewards/rejected": -0.5079564452171326, "step": 3270 }, { "epoch": 0.87, "learning_rate": 2.3546141258376786e-07, "logits/chosen": -1.3590515851974487, "logits/rejected": -0.43198928236961365, "logps/chosen": -733.1363525390625, "logps/rejected": -1653.435791015625, "loss": 0.1397, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11448194831609726, "rewards/margins": 0.33477213978767395, "rewards/rejected": -0.4492540955543518, "step": 3280 }, { "epoch": 0.88, "learning_rate": 2.257003546333042e-07, "logits/chosen": -1.4484094381332397, "logits/rejected": -0.5377854108810425, "logps/chosen": -742.8893432617188, "logps/rejected": -1920.5609130859375, "loss": 0.1697, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11689500510692596, "rewards/margins": 0.35355696082115173, "rewards/rejected": -0.4704520106315613, "step": 3290 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.6482737064361572, "logits/rejected": -0.8282219767570496, "logps/chosen": -565.1773681640625, "logps/rejected": -1599.753662109375, "loss": 0.1429, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.056347571313381195, "rewards/margins": 0.3239589333534241, "rewards/rejected": -0.38030651211738586, "step": 3300 }, { "epoch": 0.88, "learning_rate": 2.0677024504760752e-07, "logits/chosen": -1.502986192703247, "logits/rejected": -0.47991952300071716, "logps/chosen": -559.9749755859375, "logps/rejected": -1734.3677978515625, "loss": 0.1338, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.041349343955516815, "rewards/margins": 0.3722584545612335, "rewards/rejected": -0.41360777616500854, "step": 3310 }, { "epoch": 0.89, "learning_rate": 1.9760283363267684e-07, "logits/chosen": -1.6863027811050415, "logits/rejected": -0.2434253990650177, "logps/chosen": -727.5802612304688, "logps/rejected": -1704.158203125, "loss": 0.1468, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07457654923200607, "rewards/margins": 0.3438524007797241, "rewards/rejected": -0.4184289872646332, "step": 3320 }, { "epoch": 0.89, "learning_rate": 1.8863491596921745e-07, "logits/chosen": -1.3177156448364258, "logits/rejected": -0.1511184722185135, "logps/chosen": -603.4699096679688, "logps/rejected": -1553.7645263671875, "loss": 0.114, "rewards/accuracies": 0.75, "rewards/chosen": -0.07171906530857086, "rewards/margins": 0.3328208923339844, "rewards/rejected": -0.40453997254371643, "step": 3330 }, { "epoch": 0.89, "learning_rate": 1.798672690923828e-07, "logits/chosen": -1.5630302429199219, "logits/rejected": -0.8224193453788757, "logps/chosen": -620.0535888671875, "logps/rejected": -1780.6702880859375, "loss": 0.0976, "rewards/accuracies": 0.875, "rewards/chosen": -0.06969192624092102, "rewards/margins": 0.3709218502044678, "rewards/rejected": -0.4406138062477112, "step": 3340 }, { "epoch": 0.89, "learning_rate": 1.713006526846439e-07, "logits/chosen": -1.5444920063018799, "logits/rejected": -0.24943670630455017, "logps/chosen": -683.787841796875, "logps/rejected": -1977.3343505859375, "loss": 0.0802, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.06767871230840683, "rewards/margins": 0.4605562686920166, "rewards/rejected": -0.5282350778579712, "step": 3350 }, { "epoch": 0.9, "learning_rate": 1.629358090099639e-07, "logits/chosen": -1.5846331119537354, "logits/rejected": -0.536185085773468, "logps/chosen": -703.1005859375, "logps/rejected": -1691.5517578125, "loss": 0.1945, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07973551005125046, "rewards/margins": 0.3327817916870117, "rewards/rejected": -0.4125173091888428, "step": 3360 }, { "epoch": 0.9, "learning_rate": 1.5477346284948292e-07, "logits/chosen": -1.572539210319519, "logits/rejected": -0.8882861137390137, "logps/chosen": -748.1613159179688, "logps/rejected": -1634.09521484375, "loss": 0.1691, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.09525839984416962, "rewards/margins": 0.28481072187423706, "rewards/rejected": -0.38006913661956787, "step": 3370 }, { "epoch": 0.9, "learning_rate": 1.4681432143872133e-07, "logits/chosen": -1.2806552648544312, "logits/rejected": -0.47933030128479004, "logps/chosen": -767.8941650390625, "logps/rejected": -1739.5386962890625, "loss": 0.1768, "rewards/accuracies": 0.875, "rewards/chosen": -0.09690088033676147, "rewards/margins": 0.33204126358032227, "rewards/rejected": -0.42894211411476135, "step": 3380 }, { "epoch": 0.9, "learning_rate": 1.3905907440629752e-07, "logits/chosen": -1.619302749633789, "logits/rejected": -0.4543309211730957, "logps/chosen": -705.6532592773438, "logps/rejected": -1661.1187744140625, "loss": 0.2429, "rewards/accuracies": 0.75, "rewards/chosen": -0.09244809299707413, "rewards/margins": 0.27722662687301636, "rewards/rejected": -0.3696747124195099, "step": 3390 }, { "epoch": 0.91, "learning_rate": 1.31508393714177e-07, "logits/chosen": -1.576765537261963, "logits/rejected": -0.9673765301704407, "logps/chosen": -700.4979248046875, "logps/rejected": -1635.6195068359375, "loss": 0.1433, "rewards/accuracies": 0.75, "rewards/chosen": -0.08088205009698868, "rewards/margins": 0.33026668429374695, "rewards/rejected": -0.41114872694015503, "step": 3400 }, { "epoch": 0.91, "learning_rate": 1.241629335994471e-07, "logits/chosen": -1.3158491849899292, "logits/rejected": -0.2821682393550873, "logps/chosen": -567.6187133789062, "logps/rejected": -1448.58837890625, "loss": 0.2002, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06683605909347534, "rewards/margins": 0.2942168712615967, "rewards/rejected": -0.361052930355072, "step": 3410 }, { "epoch": 0.91, "learning_rate": 1.1702333051763271e-07, "logits/chosen": -1.630567193031311, "logits/rejected": -0.7469101548194885, "logps/chosen": -718.3992919921875, "logps/rejected": -1641.1156005859375, "loss": 0.1399, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.10309512913227081, "rewards/margins": 0.3157398998737335, "rewards/rejected": -0.4188350737094879, "step": 3420 }, { "epoch": 0.91, "learning_rate": 1.1009020308754587e-07, "logits/chosen": -1.4223135709762573, "logits/rejected": -0.4289831519126892, "logps/chosen": -924.49609375, "logps/rejected": -1811.7252197265625, "loss": 0.2089, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13550476729869843, "rewards/margins": 0.3179660737514496, "rewards/rejected": -0.4534708857536316, "step": 3430 }, { "epoch": 0.92, "learning_rate": 1.0336415203768962e-07, "logits/chosen": -1.4638688564300537, "logits/rejected": 0.3213498592376709, "logps/chosen": -778.1163330078125, "logps/rejected": -1671.373779296875, "loss": 0.1553, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07180814445018768, "rewards/margins": 0.3324059545993805, "rewards/rejected": -0.404214084148407, "step": 3440 }, { "epoch": 0.92, "learning_rate": 9.684576015420277e-08, "logits/chosen": -1.3937320709228516, "logits/rejected": -0.29455748200416565, "logps/chosen": -558.4961547851562, "logps/rejected": -1537.4754638671875, "loss": 0.1932, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07989932596683502, "rewards/margins": 0.2945837080478668, "rewards/rejected": -0.37448304891586304, "step": 3450 }, { "epoch": 0.92, "learning_rate": 9.053559223036746e-08, "logits/chosen": -1.3823366165161133, "logits/rejected": -0.34489625692367554, "logps/chosen": -750.284423828125, "logps/rejected": -1547.081298828125, "loss": 0.1481, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07611394673585892, "rewards/margins": 0.2931041121482849, "rewards/rejected": -0.36921799182891846, "step": 3460 }, { "epoch": 0.93, "learning_rate": 8.44341950176683e-08, "logits/chosen": -1.58815598487854, "logits/rejected": -0.0034358978737145662, "logps/chosen": -567.5117797851562, "logps/rejected": -1468.9776611328125, "loss": 0.1411, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.03928015008568764, "rewards/margins": 0.3018725514411926, "rewards/rejected": -0.34115269780158997, "step": 3470 }, { "epoch": 0.93, "learning_rate": 7.854209717842231e-08, "logits/chosen": -1.5472205877304077, "logits/rejected": -0.9048103094100952, "logps/chosen": -627.4727783203125, "logps/rejected": -1713.844970703125, "loss": 0.0771, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.04558110982179642, "rewards/margins": 0.3975328207015991, "rewards/rejected": -0.44311389327049255, "step": 3480 }, { "epoch": 0.93, "learning_rate": 7.285980923996989e-08, "logits/chosen": -1.4914085865020752, "logits/rejected": -0.47641056776046753, "logps/chosen": -650.9934692382812, "logps/rejected": -1637.280029296875, "loss": 0.1166, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.054985035210847855, "rewards/margins": 0.3437821865081787, "rewards/rejected": -0.3987672030925751, "step": 3490 }, { "epoch": 0.93, "learning_rate": 6.738782355044048e-08, "logits/chosen": -1.564391016960144, "logits/rejected": -0.30673253536224365, "logps/chosen": -597.55322265625, "logps/rejected": -1578.419677734375, "loss": 0.1353, "rewards/accuracies": 0.875, "rewards/chosen": -0.04587271437048912, "rewards/margins": 0.32063284516334534, "rewards/rejected": -0.36650553345680237, "step": 3500 }, { "epoch": 0.94, "learning_rate": 6.212661423609184e-08, "logits/chosen": -1.298776626586914, "logits/rejected": -0.1061747819185257, "logps/chosen": -640.8598022460938, "logps/rejected": -1533.592529296875, "loss": 0.1815, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.05050724744796753, "rewards/margins": 0.27905362844467163, "rewards/rejected": -0.32956087589263916, "step": 3510 }, { "epoch": 0.94, "learning_rate": 5.707663716023021e-08, "logits/chosen": -1.5522761344909668, "logits/rejected": -0.26008373498916626, "logps/chosen": -703.5093994140625, "logps/rejected": -1482.5135498046875, "loss": 0.1582, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.06401093304157257, "rewards/margins": 0.2837941348552704, "rewards/rejected": -0.34780508279800415, "step": 3520 }, { "epoch": 0.94, "learning_rate": 5.22383298837098e-08, "logits/chosen": -1.5810019969940186, "logits/rejected": 0.17494335770606995, "logps/chosen": -811.2449340820312, "logps/rejected": -1590.089599609375, "loss": 0.0992, "rewards/accuracies": 0.875, "rewards/chosen": -0.09410594403743744, "rewards/margins": 0.29233235120773315, "rewards/rejected": -0.3864383101463318, "step": 3530 }, { "epoch": 0.94, "learning_rate": 4.761211162702117e-08, "logits/chosen": -1.484390139579773, "logits/rejected": -0.5827163457870483, "logps/chosen": -721.3407592773438, "logps/rejected": -1430.334716796875, "loss": 0.1909, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10846199095249176, "rewards/margins": 0.22209401428699493, "rewards/rejected": -0.3305560052394867, "step": 3540 }, { "epoch": 0.95, "learning_rate": 4.319838323396691e-08, "logits/chosen": -1.5092474222183228, "logits/rejected": -0.8641079068183899, "logps/chosen": -654.3946533203125, "logps/rejected": -1624.559326171875, "loss": 0.1749, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07206736505031586, "rewards/margins": 0.3177274167537689, "rewards/rejected": -0.3897947669029236, "step": 3550 }, { "epoch": 0.95, "learning_rate": 3.8997527136930004e-08, "logits/chosen": -1.3939855098724365, "logits/rejected": -0.9460384249687195, "logps/chosen": -721.7215576171875, "logps/rejected": -1758.743408203125, "loss": 0.1378, "rewards/accuracies": 0.875, "rewards/chosen": -0.1156727522611618, "rewards/margins": 0.3109985291957855, "rewards/rejected": -0.4266713261604309, "step": 3560 }, { "epoch": 0.95, "learning_rate": 3.5009907323737826e-08, "logits/chosen": -1.390904188156128, "logits/rejected": -0.4405292570590973, "logps/chosen": -571.7150268554688, "logps/rejected": -1526.440673828125, "loss": 0.172, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04342179745435715, "rewards/margins": 0.2945147156715393, "rewards/rejected": -0.33793652057647705, "step": 3570 }, { "epoch": 0.95, "learning_rate": 3.1235869306123766e-08, "logits/chosen": -1.743300199508667, "logits/rejected": -0.7482790350914001, "logps/chosen": -623.4041748046875, "logps/rejected": -1765.6488037109375, "loss": 0.1102, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.057156067341566086, "rewards/margins": 0.3713191747665405, "rewards/rejected": -0.4284752309322357, "step": 3580 }, { "epoch": 0.96, "learning_rate": 2.767574008979007e-08, "logits/chosen": -1.62771475315094, "logits/rejected": -0.4970121383666992, "logps/chosen": -712.7774047851562, "logps/rejected": -1704.322021484375, "loss": 0.1109, "rewards/accuracies": 0.875, "rewards/chosen": -0.07325717061758041, "rewards/margins": 0.3189954161643982, "rewards/rejected": -0.3922525942325592, "step": 3590 }, { "epoch": 0.96, "learning_rate": 2.4329828146074096e-08, "logits/chosen": -1.5333154201507568, "logits/rejected": -0.363052636384964, "logps/chosen": -755.8592529296875, "logps/rejected": -1792.1357421875, "loss": 0.1142, "rewards/accuracies": 0.875, "rewards/chosen": -0.06991975009441376, "rewards/margins": 0.3443168103694916, "rewards/rejected": -0.41423654556274414, "step": 3600 }, { "epoch": 0.96, "learning_rate": 2.1198423385220822e-08, "logits/chosen": -1.5201735496520996, "logits/rejected": -0.7737599611282349, "logps/chosen": -648.6341552734375, "logps/rejected": -1637.8931884765625, "loss": 0.1757, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.046808890998363495, "rewards/margins": 0.346151202917099, "rewards/rejected": -0.3929601311683655, "step": 3610 }, { "epoch": 0.97, "learning_rate": 1.82817971312621e-08, "logits/chosen": -1.6378930807113647, "logits/rejected": -0.5780726671218872, "logps/chosen": -671.5693969726562, "logps/rejected": -1713.7747802734375, "loss": 0.1688, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07191529124975204, "rewards/margins": 0.33269262313842773, "rewards/rejected": -0.4046078622341156, "step": 3620 }, { "epoch": 0.97, "learning_rate": 1.5580202098509078e-08, "logits/chosen": -1.6032174825668335, "logits/rejected": -0.24984005093574524, "logps/chosen": -638.01123046875, "logps/rejected": -1515.741943359375, "loss": 0.1446, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06576866656541824, "rewards/margins": 0.31248709559440613, "rewards/rejected": -0.37825578451156616, "step": 3630 }, { "epoch": 0.97, "learning_rate": 1.3093872369654148e-08, "logits/chosen": -1.462215781211853, "logits/rejected": 0.025119613856077194, "logps/chosen": -680.1953125, "logps/rejected": -1622.0228271484375, "loss": 0.2123, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08275710791349411, "rewards/margins": 0.2952966094017029, "rewards/rejected": -0.3780537247657776, "step": 3640 }, { "epoch": 0.97, "learning_rate": 1.0823023375489128e-08, "logits/chosen": -1.6868362426757812, "logits/rejected": -0.5641859173774719, "logps/chosen": -618.9349975585938, "logps/rejected": -1718.207275390625, "loss": 0.107, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.05680794641375542, "rewards/margins": 0.3729218542575836, "rewards/rejected": -0.42972975969314575, "step": 3650 }, { "epoch": 0.98, "learning_rate": 8.767851876239075e-09, "logits/chosen": -1.2520034313201904, "logits/rejected": -0.46264973282814026, "logps/chosen": -756.2525024414062, "logps/rejected": -1568.090576171875, "loss": 0.1772, "rewards/accuracies": 0.75, "rewards/chosen": -0.10075948387384415, "rewards/margins": 0.28606051206588745, "rewards/rejected": -0.3868200182914734, "step": 3660 }, { "epoch": 0.98, "learning_rate": 6.9285359445145366e-09, "logits/chosen": -1.6095876693725586, "logits/rejected": -0.49465712904930115, "logps/chosen": -636.206298828125, "logps/rejected": -1638.2972412109375, "loss": 0.1288, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03626566380262375, "rewards/margins": 0.34111684560775757, "rewards/rejected": -0.3773825466632843, "step": 3670 }, { "epoch": 0.98, "learning_rate": 5.305234949880001e-09, "logits/chosen": -1.4576680660247803, "logits/rejected": -0.8192172050476074, "logps/chosen": -602.4050903320312, "logps/rejected": -1541.21142578125, "loss": 0.2052, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.056397415697574615, "rewards/margins": 0.27618226408958435, "rewards/rejected": -0.33257967233657837, "step": 3680 }, { "epoch": 0.98, "learning_rate": 3.8980895450474455e-09, "logits/chosen": -1.4624910354614258, "logits/rejected": -0.06623925268650055, "logps/chosen": -615.46142578125, "logps/rejected": -1512.7928466796875, "loss": 0.1562, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.050837211310863495, "rewards/margins": 0.3221223056316376, "rewards/rejected": -0.37295952439308167, "step": 3690 }, { "epoch": 0.99, "learning_rate": 2.7072216536885855e-09, "logits/chosen": -1.576015591621399, "logits/rejected": -0.6331445574760437, "logps/chosen": -652.0988159179688, "logps/rejected": -1652.5101318359375, "loss": 0.0999, "rewards/accuracies": 0.875, "rewards/chosen": -0.0720575600862503, "rewards/margins": 0.32736578583717346, "rewards/rejected": -0.3994233012199402, "step": 3700 }, { "epoch": 0.99, "learning_rate": 1.7327344598702667e-09, "logits/chosen": -1.4119715690612793, "logits/rejected": -0.8666298985481262, "logps/chosen": -634.987548828125, "logps/rejected": -1896.078125, "loss": 0.0889, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.053931184113025665, "rewards/margins": 0.42121997475624084, "rewards/rejected": -0.4751511216163635, "step": 3710 }, { "epoch": 0.99, "learning_rate": 9.747123991141193e-10, "logits/chosen": -1.3923088312149048, "logits/rejected": -0.4560534358024597, "logps/chosen": -703.5958862304688, "logps/rejected": -1692.0693359375, "loss": 0.1027, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07977847754955292, "rewards/margins": 0.34622496366500854, "rewards/rejected": -0.42600345611572266, "step": 3720 }, { "epoch": 0.99, "learning_rate": 4.332211510807427e-10, "logits/chosen": -1.3781083822250366, "logits/rejected": -0.20015454292297363, "logps/chosen": -547.2339477539062, "logps/rejected": -1573.963623046875, "loss": 0.1506, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0455051027238369, "rewards/margins": 0.3225332200527191, "rewards/rejected": -0.3680383563041687, "step": 3730 }, { "epoch": 1.0, "learning_rate": 1.0830763387897902e-10, "logits/chosen": -1.4161818027496338, "logits/rejected": 0.6542869806289673, "logps/chosen": -718.9110107421875, "logps/rejected": -1792.678955078125, "loss": 0.0937, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0775211825966835, "rewards/margins": 0.3633776307106018, "rewards/rejected": -0.4408988058567047, "step": 3740 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -1.728247046470642, "logits/rejected": -1.0278023481369019, "logps/chosen": -706.55908203125, "logps/rejected": -1668.5732421875, "loss": 0.1603, "rewards/accuracies": 0.875, "rewards/chosen": -0.07834981381893158, "rewards/margins": 0.3416889011859894, "rewards/rejected": -0.42003870010375977, "step": 3750 }, { "epoch": 1.0, "step": 3750, "total_flos": 0.0, "train_loss": 0.1829929338614146, "train_runtime": 17952.2095, "train_samples_per_second": 0.836, "train_steps_per_second": 0.209 } ], "logging_steps": 10, "max_steps": 3750, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }