{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 1000, "global_step": 22900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004366812227074236, "grad_norm": 0.49188803641260265, "learning_rate": 2.183406113537118e-09, "logits/chosen": -1.130352258682251, "logits/rejected": -0.9433857798576355, "logps/chosen": -272.3143005371094, "logps/rejected": -290.848388671875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.043668122270742356, "grad_norm": 0.5384921206391297, "learning_rate": 2.183406113537118e-08, "logits/chosen": -1.01466703414917, "logits/rejected": -1.0593312978744507, "logps/chosen": -300.28662109375, "logps/rejected": -261.3201904296875, "loss": 0.693, "rewards/accuracies": 0.5555555820465088, "rewards/chosen": 0.00028417169232852757, "rewards/margins": 0.0011761707719415426, "rewards/rejected": -0.0008919990505091846, "step": 10 }, { "epoch": 0.08733624454148471, "grad_norm": 0.5262758710755441, "learning_rate": 4.366812227074236e-08, "logits/chosen": -1.130258321762085, "logits/rejected": -1.0247228145599365, "logps/chosen": -283.00006103515625, "logps/rejected": -323.0061950683594, "loss": 0.6931, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": 0.0008177299168892205, "rewards/margins": -0.0003388174809515476, "rewards/rejected": 0.001156547456048429, "step": 20 }, { "epoch": 0.13100436681222707, "grad_norm": 0.6081401424073755, "learning_rate": 6.550218340611354e-08, "logits/chosen": -1.0656241178512573, "logits/rejected": -1.1066025495529175, "logps/chosen": -290.0464782714844, "logps/rejected": -269.2597961425781, "loss": 0.6933, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": 0.0012347830925136805, "rewards/margins": -0.0003626748512033373, "rewards/rejected": 0.0015974579146131873, "step": 30 }, { "epoch": 0.17467248908296942, "grad_norm": 0.5592981791158257, "learning_rate": 8.733624454148472e-08, "logits/chosen": -1.0917942523956299, "logits/rejected": -1.114440679550171, "logps/chosen": -287.10650634765625, "logps/rejected": -276.1549072265625, "loss": 0.6932, "rewards/accuracies": 0.4375, "rewards/chosen": -0.000830154400318861, "rewards/margins": -0.0005117322434671223, "rewards/rejected": -0.00031842233147472143, "step": 40 }, { "epoch": 0.2183406113537118, "grad_norm": 0.4838614129210499, "learning_rate": 1.091703056768559e-07, "logits/chosen": -1.126975655555725, "logits/rejected": -1.0191268920898438, "logps/chosen": -264.54522705078125, "logps/rejected": -307.33428955078125, "loss": 0.6932, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0006964053027331829, "rewards/margins": 0.0005411204183474183, "rewards/rejected": -0.001237525837495923, "step": 50 }, { "epoch": 0.26200873362445415, "grad_norm": 0.4951444348722871, "learning_rate": 1.3100436681222707e-07, "logits/chosen": -1.0614988803863525, "logits/rejected": -1.0865339040756226, "logps/chosen": -291.81732177734375, "logps/rejected": -258.95758056640625, "loss": 0.6927, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.000319135666359216, "rewards/margins": 0.0020177382975816727, "rewards/rejected": -0.0016986025730147958, "step": 60 }, { "epoch": 0.3056768558951965, "grad_norm": 0.5141672640552767, "learning_rate": 1.5283842794759825e-07, "logits/chosen": -1.0970603227615356, "logits/rejected": -1.1135071516036987, "logps/chosen": -282.20440673828125, "logps/rejected": -267.5580139160156, "loss": 0.6922, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0014477157965302467, "rewards/margins": 0.0014881754759699106, "rewards/rejected": -0.0029358912725001574, "step": 70 }, { "epoch": 0.34934497816593885, "grad_norm": 0.5265721592349695, "learning_rate": 1.7467248908296944e-07, "logits/chosen": -1.117579698562622, "logits/rejected": -1.0172007083892822, "logps/chosen": -269.8170471191406, "logps/rejected": -304.8489074707031, "loss": 0.6919, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.000573660887312144, "rewards/margins": 0.0027795713394880295, "rewards/rejected": -0.0033532320521771908, "step": 80 }, { "epoch": 0.3930131004366812, "grad_norm": 0.551979026980402, "learning_rate": 1.9650655021834065e-07, "logits/chosen": -1.0540940761566162, "logits/rejected": -1.0708531141281128, "logps/chosen": -288.2409362792969, "logps/rejected": -264.5169677734375, "loss": 0.6915, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0029809358529746532, "rewards/margins": 0.0026885909028351307, "rewards/rejected": -0.005669526755809784, "step": 90 }, { "epoch": 0.4366812227074236, "grad_norm": 0.5360744300228703, "learning_rate": 2.183406113537118e-07, "logits/chosen": -1.0659748315811157, "logits/rejected": -1.0245542526245117, "logps/chosen": -260.5068359375, "logps/rejected": -279.3101501464844, "loss": 0.6913, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.002847515046596527, "rewards/margins": 0.0039843120612204075, "rewards/rejected": -0.006831827107816935, "step": 100 }, { "epoch": 0.48034934497816595, "grad_norm": 0.5340749550170745, "learning_rate": 2.40174672489083e-07, "logits/chosen": -1.0229836702346802, "logits/rejected": -1.060808539390564, "logps/chosen": -298.8360900878906, "logps/rejected": -270.68377685546875, "loss": 0.6905, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.004396949894726276, "rewards/margins": 0.006306762341409922, "rewards/rejected": -0.010703710839152336, "step": 110 }, { "epoch": 0.5240174672489083, "grad_norm": 0.4631249752278202, "learning_rate": 2.6200873362445414e-07, "logits/chosen": -1.1011121273040771, "logits/rejected": -1.1093685626983643, "logps/chosen": -297.49761962890625, "logps/rejected": -272.11505126953125, "loss": 0.6892, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.004122347570955753, "rewards/margins": 0.00819461140781641, "rewards/rejected": -0.012316958978772163, "step": 120 }, { "epoch": 0.5676855895196506, "grad_norm": 0.5170243645187762, "learning_rate": 2.8384279475982535e-07, "logits/chosen": -1.0676429271697998, "logits/rejected": -1.0455729961395264, "logps/chosen": -291.5346374511719, "logps/rejected": -276.01702880859375, "loss": 0.6881, "rewards/accuracies": 0.875, "rewards/chosen": -0.005611717235296965, "rewards/margins": 0.011267783120274544, "rewards/rejected": -0.016879498958587646, "step": 130 }, { "epoch": 0.611353711790393, "grad_norm": 0.5308132297458138, "learning_rate": 3.056768558951965e-07, "logits/chosen": -1.1365429162979126, "logits/rejected": -1.032840371131897, "logps/chosen": -270.54168701171875, "logps/rejected": -312.09295654296875, "loss": 0.6865, "rewards/accuracies": 0.875, "rewards/chosen": -0.00891136098653078, "rewards/margins": 0.011615408584475517, "rewards/rejected": -0.020526772364974022, "step": 140 }, { "epoch": 0.6550218340611353, "grad_norm": 0.5185968648830942, "learning_rate": 3.275109170305677e-07, "logits/chosen": -1.09221613407135, "logits/rejected": -1.1050293445587158, "logps/chosen": -292.47735595703125, "logps/rejected": -249.26559448242188, "loss": 0.6846, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.010011271573603153, "rewards/margins": 0.018784288316965103, "rewards/rejected": -0.028795558959245682, "step": 150 }, { "epoch": 0.6986899563318777, "grad_norm": 0.5518270160747941, "learning_rate": 3.4934497816593887e-07, "logits/chosen": -1.1190649271011353, "logits/rejected": -1.0740318298339844, "logps/chosen": -284.9859924316406, "logps/rejected": -318.15667724609375, "loss": 0.6817, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.01166083849966526, "rewards/margins": 0.022031528875231743, "rewards/rejected": -0.033692367374897, "step": 160 }, { "epoch": 0.74235807860262, "grad_norm": 0.53818564398806, "learning_rate": 3.711790393013101e-07, "logits/chosen": -1.0375069379806519, "logits/rejected": -1.081610918045044, "logps/chosen": -298.1676025390625, "logps/rejected": -260.79656982421875, "loss": 0.6782, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.017723847180604935, "rewards/margins": 0.02901584841310978, "rewards/rejected": -0.04673969745635986, "step": 170 }, { "epoch": 0.7860262008733624, "grad_norm": 0.566886533779199, "learning_rate": 3.930131004366813e-07, "logits/chosen": -1.0714492797851562, "logits/rejected": -1.0663259029388428, "logps/chosen": -279.3046569824219, "logps/rejected": -291.1760559082031, "loss": 0.6743, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.022340741008520126, "rewards/margins": 0.03869527205824852, "rewards/rejected": -0.06103602796792984, "step": 180 }, { "epoch": 0.8296943231441049, "grad_norm": 0.5670669454829728, "learning_rate": 4.1484716157205245e-07, "logits/chosen": -1.0951682329177856, "logits/rejected": -1.0681599378585815, "logps/chosen": -289.987060546875, "logps/rejected": -305.0567321777344, "loss": 0.6704, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.029982540756464005, "rewards/margins": 0.040330782532691956, "rewards/rejected": -0.07031331956386566, "step": 190 }, { "epoch": 0.8733624454148472, "grad_norm": 0.5802431830094139, "learning_rate": 4.366812227074236e-07, "logits/chosen": -1.0347440242767334, "logits/rejected": -0.9882251620292664, "logps/chosen": -274.33551025390625, "logps/rejected": -288.6924743652344, "loss": 0.6641, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.03922588378190994, "rewards/margins": 0.05691875144839287, "rewards/rejected": -0.09614463150501251, "step": 200 }, { "epoch": 0.9170305676855895, "grad_norm": 0.5984833719725586, "learning_rate": 4.585152838427948e-07, "logits/chosen": -1.1263666152954102, "logits/rejected": -1.0688358545303345, "logps/chosen": -286.6152648925781, "logps/rejected": -313.4473571777344, "loss": 0.659, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.047683339565992355, "rewards/margins": 0.0797087624669075, "rewards/rejected": -0.12739209830760956, "step": 210 }, { "epoch": 0.9606986899563319, "grad_norm": 0.602540597992744, "learning_rate": 4.80349344978166e-07, "logits/chosen": -1.0834460258483887, "logits/rejected": -1.0848705768585205, "logps/chosen": -299.30023193359375, "logps/rejected": -273.4922180175781, "loss": 0.6489, "rewards/accuracies": 0.9375, "rewards/chosen": -0.06221509724855423, "rewards/margins": 0.09258067607879639, "rewards/rejected": -0.15479575097560883, "step": 220 }, { "epoch": 1.0043668122270741, "grad_norm": 0.7219083775494632, "learning_rate": 5.021834061135371e-07, "logits/chosen": -1.1306490898132324, "logits/rejected": -1.0700007677078247, "logps/chosen": -261.66949462890625, "logps/rejected": -308.7687683105469, "loss": 0.6378, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.07242451608181, "rewards/margins": 0.1035647839307785, "rewards/rejected": -0.1759893000125885, "step": 230 }, { "epoch": 1.0480349344978166, "grad_norm": 0.7569049134200976, "learning_rate": 5.240174672489083e-07, "logits/chosen": -1.0631163120269775, "logits/rejected": -1.0471646785736084, "logps/chosen": -269.5180969238281, "logps/rejected": -286.3121337890625, "loss": 0.6243, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.0763176828622818, "rewards/margins": 0.14215071499347687, "rewards/rejected": -0.21846839785575867, "step": 240 }, { "epoch": 1.091703056768559, "grad_norm": 0.8107294185907971, "learning_rate": 5.458515283842795e-07, "logits/chosen": -1.1501646041870117, "logits/rejected": -1.1274811029434204, "logps/chosen": -301.20294189453125, "logps/rejected": -309.4885559082031, "loss": 0.6084, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.06277679651975632, "rewards/margins": 0.17938895523548126, "rewards/rejected": -0.24216575920581818, "step": 250 }, { "epoch": 1.1353711790393013, "grad_norm": 0.9104593786044158, "learning_rate": 5.676855895196507e-07, "logits/chosen": -1.099898338317871, "logits/rejected": -1.0906788110733032, "logps/chosen": -283.1834411621094, "logps/rejected": -300.26434326171875, "loss": 0.5824, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.05037436634302139, "rewards/margins": 0.23359432816505432, "rewards/rejected": -0.2839687168598175, "step": 260 }, { "epoch": 1.1790393013100438, "grad_norm": 0.9269209080069352, "learning_rate": 5.895196506550219e-07, "logits/chosen": -1.1154959201812744, "logits/rejected": -1.048327088356018, "logps/chosen": -267.4495544433594, "logps/rejected": -330.5212097167969, "loss": 0.5517, "rewards/accuracies": 1.0, "rewards/chosen": -0.04196222871541977, "rewards/margins": 0.3174445331096649, "rewards/rejected": -0.3594067394733429, "step": 270 }, { "epoch": 1.222707423580786, "grad_norm": 1.0252693931428478, "learning_rate": 6.11353711790393e-07, "logits/chosen": -1.1187620162963867, "logits/rejected": -1.0685856342315674, "logps/chosen": -268.8069152832031, "logps/rejected": -334.1720275878906, "loss": 0.5074, "rewards/accuracies": 1.0, "rewards/chosen": -0.048852380365133286, "rewards/margins": 0.399895578622818, "rewards/rejected": -0.4487478733062744, "step": 280 }, { "epoch": 1.2663755458515285, "grad_norm": 0.9616692736825927, "learning_rate": 6.331877729257642e-07, "logits/chosen": -1.131340742111206, "logits/rejected": -1.1003035306930542, "logps/chosen": -284.1922302246094, "logps/rejected": -353.3479309082031, "loss": 0.4642, "rewards/accuracies": 1.0, "rewards/chosen": -0.02227436937391758, "rewards/margins": 0.539787769317627, "rewards/rejected": -0.56206214427948, "step": 290 }, { "epoch": 1.3100436681222707, "grad_norm": 1.0849768403448883, "learning_rate": 6.550218340611354e-07, "logits/chosen": -1.1526134014129639, "logits/rejected": -1.044960618019104, "logps/chosen": -256.9180603027344, "logps/rejected": -365.6823425292969, "loss": 0.4066, "rewards/accuracies": 1.0, "rewards/chosen": -0.027581509202718735, "rewards/margins": 0.6985922455787659, "rewards/rejected": -0.7261737585067749, "step": 300 }, { "epoch": 1.3537117903930131, "grad_norm": 1.209749711651595, "learning_rate": 6.768558951965067e-07, "logits/chosen": -1.1041508913040161, "logits/rejected": -1.1133819818496704, "logps/chosen": -274.03350830078125, "logps/rejected": -357.8449401855469, "loss": 0.3405, "rewards/accuracies": 1.0, "rewards/chosen": -0.004090812988579273, "rewards/margins": 0.9213520288467407, "rewards/rejected": -0.9254428148269653, "step": 310 }, { "epoch": 1.3973799126637554, "grad_norm": 1.1341331098338738, "learning_rate": 6.986899563318777e-07, "logits/chosen": -1.1097848415374756, "logits/rejected": -1.1284868717193604, "logps/chosen": -286.14593505859375, "logps/rejected": -384.2344055175781, "loss": 0.2676, "rewards/accuracies": 1.0, "rewards/chosen": 0.028942784294486046, "rewards/margins": 1.2329121828079224, "rewards/rejected": -1.2039692401885986, "step": 320 }, { "epoch": 1.4410480349344978, "grad_norm": 1.085199706874221, "learning_rate": 7.20524017467249e-07, "logits/chosen": -1.1121447086334229, "logits/rejected": -1.1302967071533203, "logps/chosen": -289.8392028808594, "logps/rejected": -429.4609375, "loss": 0.2024, "rewards/accuracies": 1.0, "rewards/chosen": 0.03365617245435715, "rewards/margins": 1.63966965675354, "rewards/rejected": -1.606013298034668, "step": 330 }, { "epoch": 1.48471615720524, "grad_norm": 0.8650979811246479, "learning_rate": 7.423580786026202e-07, "logits/chosen": -1.1862926483154297, "logits/rejected": -1.0054594278335571, "logps/chosen": -234.2965545654297, "logps/rejected": -510.729736328125, "loss": 0.1454, "rewards/accuracies": 1.0, "rewards/chosen": 0.07937570661306381, "rewards/margins": 1.9606221914291382, "rewards/rejected": -1.881246566772461, "step": 340 }, { "epoch": 1.5283842794759825, "grad_norm": 0.6295130610440852, "learning_rate": 7.641921397379913e-07, "logits/chosen": -1.1614099740982056, "logits/rejected": -1.2081804275512695, "logps/chosen": -304.82794189453125, "logps/rejected": -507.919677734375, "loss": 0.0896, "rewards/accuracies": 1.0, "rewards/chosen": 0.061865758150815964, "rewards/margins": 2.527204990386963, "rewards/rejected": -2.465339422225952, "step": 350 }, { "epoch": 1.572052401746725, "grad_norm": 0.45834875798839025, "learning_rate": 7.860262008733626e-07, "logits/chosen": -1.1362165212631226, "logits/rejected": -1.0453773736953735, "logps/chosen": -270.7845764160156, "logps/rejected": -570.6972045898438, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": 0.030342798680067062, "rewards/margins": 2.9477365016937256, "rewards/rejected": -2.917393684387207, "step": 360 }, { "epoch": 1.6157205240174672, "grad_norm": 0.3233741766922913, "learning_rate": 8.078602620087336e-07, "logits/chosen": -1.104885458946228, "logits/rejected": -1.0617434978485107, "logps/chosen": -283.15118408203125, "logps/rejected": -601.6704711914062, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": 0.01112963818013668, "rewards/margins": 3.4309744834899902, "rewards/rejected": -3.4198451042175293, "step": 370 }, { "epoch": 1.6593886462882095, "grad_norm": 0.2745452331259542, "learning_rate": 8.296943231441049e-07, "logits/chosen": -1.1352978944778442, "logits/rejected": -1.0484027862548828, "logps/chosen": -266.3657531738281, "logps/rejected": -715.82470703125, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": 0.04947192966938019, "rewards/margins": 4.168959140777588, "rewards/rejected": -4.1194868087768555, "step": 380 }, { "epoch": 1.703056768558952, "grad_norm": 0.18579561423748703, "learning_rate": 8.51528384279476e-07, "logits/chosen": -1.132371425628662, "logits/rejected": -1.0741881132125854, "logps/chosen": -283.3558654785156, "logps/rejected": -719.0808715820312, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": 0.008087699301540852, "rewards/margins": 4.502425193786621, "rewards/rejected": -4.494337558746338, "step": 390 }, { "epoch": 1.7467248908296944, "grad_norm": 0.16438075275062816, "learning_rate": 8.733624454148472e-07, "logits/chosen": -1.150843620300293, "logits/rejected": -1.0869419574737549, "logps/chosen": -277.7554626464844, "logps/rejected": -752.481201171875, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": 0.07087412476539612, "rewards/margins": 4.746480464935303, "rewards/rejected": -4.675605773925781, "step": 400 }, { "epoch": 1.7903930131004366, "grad_norm": 0.17523293988364988, "learning_rate": 8.951965065502185e-07, "logits/chosen": -1.1717755794525146, "logits/rejected": -0.9909049272537231, "logps/chosen": -236.96115112304688, "logps/rejected": -849.7864990234375, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": 0.08163632452487946, "rewards/margins": 5.277564525604248, "rewards/rejected": -5.195928573608398, "step": 410 }, { "epoch": 1.8340611353711789, "grad_norm": 0.12209207881380703, "learning_rate": 9.170305676855896e-07, "logits/chosen": -1.1437294483184814, "logits/rejected": -0.9581422805786133, "logps/chosen": -237.2731475830078, "logps/rejected": -819.6331176757812, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 0.10294657945632935, "rewards/margins": 5.307999134063721, "rewards/rejected": -5.205053329467773, "step": 420 }, { "epoch": 1.8777292576419216, "grad_norm": 0.10837677494037731, "learning_rate": 9.388646288209608e-07, "logits/chosen": -1.124634027481079, "logits/rejected": -1.0568747520446777, "logps/chosen": -294.863525390625, "logps/rejected": -865.0472412109375, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 0.013736343011260033, "rewards/margins": 5.884243965148926, "rewards/rejected": -5.870507717132568, "step": 430 }, { "epoch": 1.9213973799126638, "grad_norm": 0.08602245511943761, "learning_rate": 9.60698689956332e-07, "logits/chosen": -1.109368085861206, "logits/rejected": -1.0497287511825562, "logps/chosen": -295.2526550292969, "logps/rejected": -844.66357421875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 0.040230732411146164, "rewards/margins": 5.94547176361084, "rewards/rejected": -5.905241012573242, "step": 440 }, { "epoch": 1.965065502183406, "grad_norm": 1.1904483662590828, "learning_rate": 9.82532751091703e-07, "logits/chosen": -1.1221883296966553, "logits/rejected": -1.011887788772583, "logps/chosen": -269.97125244140625, "logps/rejected": -900.7482299804688, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 0.06182447075843811, "rewards/margins": 6.251893043518066, "rewards/rejected": -6.190068244934082, "step": 450 }, { "epoch": 2.0087336244541483, "grad_norm": 0.17372270907102078, "learning_rate": 1.0043668122270742e-06, "logits/chosen": -1.1051892042160034, "logits/rejected": -0.9787055850028992, "logps/chosen": -252.77682495117188, "logps/rejected": -902.7976684570312, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 0.1315845102071762, "rewards/margins": 6.355137825012207, "rewards/rejected": -6.22355318069458, "step": 460 }, { "epoch": 2.052401746724891, "grad_norm": 0.08383454316608682, "learning_rate": 1.0262008733624455e-06, "logits/chosen": -1.1377214193344116, "logits/rejected": -1.060013771057129, "logps/chosen": -295.84112548828125, "logps/rejected": -931.5189208984375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 0.11537136137485504, "rewards/margins": 6.716187953948975, "rewards/rejected": -6.600817680358887, "step": 470 }, { "epoch": 2.096069868995633, "grad_norm": 0.2602098513840363, "learning_rate": 1.0480349344978166e-06, "logits/chosen": -1.1318188905715942, "logits/rejected": -0.9394232034683228, "logps/chosen": -250.13623046875, "logps/rejected": -1024.8939208984375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 0.16733641922473907, "rewards/margins": 7.342118263244629, "rewards/rejected": -7.174781799316406, "step": 480 }, { "epoch": 2.1397379912663754, "grad_norm": 0.0507621263373976, "learning_rate": 1.0698689956331878e-06, "logits/chosen": -1.1444717645645142, "logits/rejected": -0.9872462153434753, "logps/chosen": -244.42514038085938, "logps/rejected": -1034.5931396484375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.16595785319805145, "rewards/margins": 7.343172550201416, "rewards/rejected": -7.177214622497559, "step": 490 }, { "epoch": 2.183406113537118, "grad_norm": 0.0776627599218249, "learning_rate": 1.091703056768559e-06, "logits/chosen": -1.1139017343521118, "logits/rejected": -0.9806615710258484, "logps/chosen": -256.0354919433594, "logps/rejected": -996.4794921875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.056664906442165375, "rewards/margins": 7.301563262939453, "rewards/rejected": -7.244898319244385, "step": 500 }, { "epoch": 2.2270742358078603, "grad_norm": 0.06571719845158422, "learning_rate": 1.1135371179039301e-06, "logits/chosen": -1.0170562267303467, "logits/rejected": -0.8638440370559692, "logps/chosen": -263.1883239746094, "logps/rejected": -1026.7711181640625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.05126028135418892, "rewards/margins": 7.609605312347412, "rewards/rejected": -7.558345794677734, "step": 510 }, { "epoch": 2.2707423580786026, "grad_norm": 0.04914763295186775, "learning_rate": 1.1353711790393014e-06, "logits/chosen": -1.091996669769287, "logits/rejected": -0.9857221841812134, "logps/chosen": -284.37921142578125, "logps/rejected": -1055.7904052734375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.09198157489299774, "rewards/margins": 7.852242469787598, "rewards/rejected": -7.760260581970215, "step": 520 }, { "epoch": 2.314410480349345, "grad_norm": 0.026273579184085377, "learning_rate": 1.1572052401746727e-06, "logits/chosen": -1.1082035303115845, "logits/rejected": -0.9229341745376587, "logps/chosen": -254.1974639892578, "logps/rejected": -1084.651123046875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.05680902674794197, "rewards/margins": 7.9635491371154785, "rewards/rejected": -7.906739711761475, "step": 530 }, { "epoch": 2.3580786026200875, "grad_norm": 0.025098421029790034, "learning_rate": 1.1790393013100437e-06, "logits/chosen": -1.0478992462158203, "logits/rejected": -0.890853226184845, "logps/chosen": -251.1920623779297, "logps/rejected": -1053.2734375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.2013663947582245, "rewards/margins": 7.940675258636475, "rewards/rejected": -7.7393083572387695, "step": 540 }, { "epoch": 2.4017467248908297, "grad_norm": 0.0887382802977298, "learning_rate": 1.200873362445415e-06, "logits/chosen": -1.0803998708724976, "logits/rejected": -0.9325240850448608, "logps/chosen": -245.40225219726562, "logps/rejected": -1055.2822265625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.20704713463783264, "rewards/margins": 7.924256324768066, "rewards/rejected": -7.7172088623046875, "step": 550 }, { "epoch": 2.445414847161572, "grad_norm": 0.024423044887301376, "learning_rate": 1.222707423580786e-06, "logits/chosen": -1.0573489665985107, "logits/rejected": -0.9667012095451355, "logps/chosen": -290.100830078125, "logps/rejected": -1040.1114501953125, "loss": 0.0025, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.12762892246246338, "rewards/margins": 7.949681758880615, "rewards/rejected": -7.8220534324646, "step": 560 }, { "epoch": 2.489082969432314, "grad_norm": 0.02575418353459741, "learning_rate": 1.2445414847161573e-06, "logits/chosen": -1.0772401094436646, "logits/rejected": -0.9403718113899231, "logps/chosen": -274.24102783203125, "logps/rejected": -1135.19677734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.17034362256526947, "rewards/margins": 8.53914737701416, "rewards/rejected": -8.368803024291992, "step": 570 }, { "epoch": 2.532751091703057, "grad_norm": 0.03780559722884256, "learning_rate": 1.2663755458515283e-06, "logits/chosen": -1.0533926486968994, "logits/rejected": -0.9145461916923523, "logps/chosen": -259.5504455566406, "logps/rejected": -1115.683349609375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.13043391704559326, "rewards/margins": 8.546215057373047, "rewards/rejected": -8.415781021118164, "step": 580 }, { "epoch": 2.576419213973799, "grad_norm": 0.02426042899748694, "learning_rate": 1.2882096069868996e-06, "logits/chosen": -1.152941107749939, "logits/rejected": -0.9298733472824097, "logps/chosen": -255.7036895751953, "logps/rejected": -1222.91552734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.10184156894683838, "rewards/margins": 9.185845375061035, "rewards/rejected": -9.084003448486328, "step": 590 }, { "epoch": 2.6200873362445414, "grad_norm": 0.03967025206790384, "learning_rate": 1.3100436681222709e-06, "logits/chosen": -1.0600261688232422, "logits/rejected": -0.898908793926239, "logps/chosen": -260.12152099609375, "logps/rejected": -1146.6922607421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.10553276538848877, "rewards/margins": 8.844304084777832, "rewards/rejected": -8.738770484924316, "step": 600 }, { "epoch": 2.6637554585152836, "grad_norm": 0.023357637450160187, "learning_rate": 1.3318777292576421e-06, "logits/chosen": -1.0672093629837036, "logits/rejected": -0.9305359721183777, "logps/chosen": -273.31585693359375, "logps/rejected": -1183.632568359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.07606213539838791, "rewards/margins": 9.117072105407715, "rewards/rejected": -9.041009902954102, "step": 610 }, { "epoch": 2.7074235807860263, "grad_norm": 0.019478428280254357, "learning_rate": 1.3537117903930134e-06, "logits/chosen": -1.0217585563659668, "logits/rejected": -0.8505188226699829, "logps/chosen": -266.7040710449219, "logps/rejected": -1220.8331298828125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.07492303103208542, "rewards/margins": 9.350958824157715, "rewards/rejected": -9.276036262512207, "step": 620 }, { "epoch": 2.7510917030567685, "grad_norm": 0.337233399285637, "learning_rate": 1.3755458515283842e-06, "logits/chosen": -1.05553138256073, "logits/rejected": -0.9395098686218262, "logps/chosen": -280.6686096191406, "logps/rejected": -1183.9263916015625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.0074261934496462345, "rewards/margins": 9.025650024414062, "rewards/rejected": -9.018223762512207, "step": 630 }, { "epoch": 2.7947598253275108, "grad_norm": 0.011216478629922396, "learning_rate": 1.3973799126637555e-06, "logits/chosen": -1.034529209136963, "logits/rejected": -0.8385549783706665, "logps/chosen": -251.7305450439453, "logps/rejected": -1212.007080078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.1309480369091034, "rewards/margins": 9.386816024780273, "rewards/rejected": -9.255867004394531, "step": 640 }, { "epoch": 2.8384279475982535, "grad_norm": 0.021230674208718586, "learning_rate": 1.4192139737991267e-06, "logits/chosen": -1.0360944271087646, "logits/rejected": -0.8138653039932251, "logps/chosen": -246.37820434570312, "logps/rejected": -1229.614013671875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.028237273916602135, "rewards/margins": 9.502202987670898, "rewards/rejected": -9.473965644836426, "step": 650 }, { "epoch": 2.8820960698689957, "grad_norm": 0.011954124501318894, "learning_rate": 1.441048034934498e-06, "logits/chosen": -1.0472056865692139, "logits/rejected": -0.9029546976089478, "logps/chosen": -284.7785339355469, "logps/rejected": -1231.779296875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.003605033503845334, "rewards/margins": 9.536118507385254, "rewards/rejected": -9.539722442626953, "step": 660 }, { "epoch": 2.925764192139738, "grad_norm": 0.6101316992313331, "learning_rate": 1.4628820960698693e-06, "logits/chosen": -1.1043437719345093, "logits/rejected": -0.92271888256073, "logps/chosen": -299.22222900390625, "logps/rejected": -1308.3040771484375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.010920705273747444, "rewards/margins": 10.173480033874512, "rewards/rejected": -10.16256046295166, "step": 670 }, { "epoch": 2.96943231441048, "grad_norm": 0.016495782615525163, "learning_rate": 1.4847161572052403e-06, "logits/chosen": -1.0588533878326416, "logits/rejected": -0.9275693893432617, "logps/chosen": -287.153564453125, "logps/rejected": -1251.1766357421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.09794937074184418, "rewards/margins": 9.84514331817627, "rewards/rejected": -9.747194290161133, "step": 680 }, { "epoch": 3.013100436681223, "grad_norm": 0.020168549540481716, "learning_rate": 1.5065502183406114e-06, "logits/chosen": -1.1314984560012817, "logits/rejected": -0.9320915341377258, "logps/chosen": -265.8308410644531, "logps/rejected": -1311.848876953125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.09155896306037903, "rewards/margins": 10.220664978027344, "rewards/rejected": -10.129104614257812, "step": 690 }, { "epoch": 3.056768558951965, "grad_norm": 0.027910275891126074, "learning_rate": 1.5283842794759826e-06, "logits/chosen": -1.0776145458221436, "logits/rejected": -0.8800934553146362, "logps/chosen": -255.56607055664062, "logps/rejected": -1255.459228515625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.15623870491981506, "rewards/margins": 9.873703956604004, "rewards/rejected": -9.717466354370117, "step": 700 }, { "epoch": 3.1004366812227073, "grad_norm": 0.0070801188884802285, "learning_rate": 1.550218340611354e-06, "logits/chosen": -1.109588861465454, "logits/rejected": -0.9777756929397583, "logps/chosen": -307.5519104003906, "logps/rejected": -1268.3970947265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.08143647760152817, "rewards/margins": 10.04019832611084, "rewards/rejected": -9.958761215209961, "step": 710 }, { "epoch": 3.14410480349345, "grad_norm": 0.012532076949021014, "learning_rate": 1.5720524017467252e-06, "logits/chosen": -1.0879167318344116, "logits/rejected": -0.874890148639679, "logps/chosen": -269.1459655761719, "logps/rejected": -1333.4073486328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.04594755545258522, "rewards/margins": 10.501376152038574, "rewards/rejected": -10.455428123474121, "step": 720 }, { "epoch": 3.1877729257641922, "grad_norm": 0.03724281939895254, "learning_rate": 1.5938864628820962e-06, "logits/chosen": -1.0068161487579346, "logits/rejected": -0.7874962687492371, "logps/chosen": -257.151123046875, "logps/rejected": -1280.5509033203125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.14801672101020813, "rewards/margins": 10.215049743652344, "rewards/rejected": -10.067033767700195, "step": 730 }, { "epoch": 3.2314410480349345, "grad_norm": 0.01520014718852352, "learning_rate": 1.6157205240174673e-06, "logits/chosen": -1.0501787662506104, "logits/rejected": -0.8563941717147827, "logps/chosen": -272.47662353515625, "logps/rejected": -1382.72216796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.04600369185209274, "rewards/margins": 10.934247970581055, "rewards/rejected": -10.88824462890625, "step": 740 }, { "epoch": 3.2751091703056767, "grad_norm": 0.005054590869181455, "learning_rate": 1.6375545851528385e-06, "logits/chosen": -1.087894082069397, "logits/rejected": -0.8847143054008484, "logps/chosen": -277.385009765625, "logps/rejected": -1364.647216796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.043006978929042816, "rewards/margins": 10.669514656066895, "rewards/rejected": -10.626508712768555, "step": 750 }, { "epoch": 3.3187772925764194, "grad_norm": 0.027831701449727037, "learning_rate": 1.6593886462882098e-06, "logits/chosen": -1.0353024005889893, "logits/rejected": -0.8677101135253906, "logps/chosen": -288.21343994140625, "logps/rejected": -1296.6470947265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.007609467953443527, "rewards/margins": 10.307931900024414, "rewards/rejected": -10.315540313720703, "step": 760 }, { "epoch": 3.3624454148471616, "grad_norm": 0.0064908347036250515, "learning_rate": 1.681222707423581e-06, "logits/chosen": -1.0759131908416748, "logits/rejected": -0.9375082850456238, "logps/chosen": -301.4073181152344, "logps/rejected": -1328.899658203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.12093822658061981, "rewards/margins": 10.704856872558594, "rewards/rejected": -10.583918571472168, "step": 770 }, { "epoch": 3.406113537117904, "grad_norm": 0.01913931309829449, "learning_rate": 1.703056768558952e-06, "logits/chosen": -1.0479494333267212, "logits/rejected": -0.8500002026557922, "logps/chosen": -272.5672912597656, "logps/rejected": -1366.413330078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.11578051745891571, "rewards/margins": 10.844417572021484, "rewards/rejected": -10.728635787963867, "step": 780 }, { "epoch": 3.449781659388646, "grad_norm": 0.01352654177371944, "learning_rate": 1.7248908296943234e-06, "logits/chosen": -1.0406018495559692, "logits/rejected": -0.8509318232536316, "logps/chosen": -256.7829895019531, "logps/rejected": -1329.2520751953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.026228416711091995, "rewards/margins": 10.573269844055176, "rewards/rejected": -10.547040939331055, "step": 790 }, { "epoch": 3.493449781659389, "grad_norm": 0.013365741607665689, "learning_rate": 1.7467248908296944e-06, "logits/chosen": -1.0522687435150146, "logits/rejected": -0.8280200958251953, "logps/chosen": -270.0743408203125, "logps/rejected": -1426.38720703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.09368885308504105, "rewards/margins": 11.388612747192383, "rewards/rejected": -11.294923782348633, "step": 800 }, { "epoch": 3.537117903930131, "grad_norm": 0.03307042715507647, "learning_rate": 1.7685589519650657e-06, "logits/chosen": -1.008664608001709, "logits/rejected": -0.7871328592300415, "logps/chosen": -266.83843994140625, "logps/rejected": -1440.132568359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.08421434462070465, "rewards/margins": 11.51657485961914, "rewards/rejected": -11.43235969543457, "step": 810 }, { "epoch": 3.5807860262008733, "grad_norm": 0.05082513402252974, "learning_rate": 1.790393013100437e-06, "logits/chosen": -0.9665921926498413, "logits/rejected": -0.7286010980606079, "logps/chosen": -266.60504150390625, "logps/rejected": -1375.111572265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.012148882262408733, "rewards/margins": 10.996332168579102, "rewards/rejected": -11.008480072021484, "step": 820 }, { "epoch": 3.6244541484716155, "grad_norm": 0.032219014027810286, "learning_rate": 1.812227074235808e-06, "logits/chosen": -1.0228643417358398, "logits/rejected": -0.7778269052505493, "logps/chosen": -274.4452209472656, "logps/rejected": -1396.7493896484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.012340235523879528, "rewards/margins": 11.289251327514648, "rewards/rejected": -11.276910781860352, "step": 830 }, { "epoch": 3.668122270742358, "grad_norm": 0.005309464056793855, "learning_rate": 1.8340611353711792e-06, "logits/chosen": -1.0205094814300537, "logits/rejected": -0.8055097460746765, "logps/chosen": -292.59735107421875, "logps/rejected": -1460.403564453125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.014723243191838264, "rewards/margins": 11.74005126953125, "rewards/rejected": -11.725327491760254, "step": 840 }, { "epoch": 3.7117903930131004, "grad_norm": 0.0075442896315395655, "learning_rate": 1.8558951965065503e-06, "logits/chosen": -1.071067214012146, "logits/rejected": -0.8644933700561523, "logps/chosen": -273.3328857421875, "logps/rejected": -1398.2947998046875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.04406129941344261, "rewards/margins": 11.203367233276367, "rewards/rejected": -11.159306526184082, "step": 850 }, { "epoch": 3.7554585152838427, "grad_norm": 0.03400158157872599, "learning_rate": 1.8777292576419216e-06, "logits/chosen": -1.0760852098464966, "logits/rejected": -0.8295665979385376, "logps/chosen": -264.36480712890625, "logps/rejected": -1412.6258544921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.08237462490797043, "rewards/margins": 11.298498153686523, "rewards/rejected": -11.216123580932617, "step": 860 }, { "epoch": 3.7991266375545854, "grad_norm": 0.02518299974558395, "learning_rate": 1.8995633187772928e-06, "logits/chosen": -1.0864571332931519, "logits/rejected": -0.8393993377685547, "logps/chosen": -258.7384948730469, "logps/rejected": -1467.83349609375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.059117209166288376, "rewards/margins": 11.719305038452148, "rewards/rejected": -11.660186767578125, "step": 870 }, { "epoch": 3.8427947598253276, "grad_norm": 0.05507238043515042, "learning_rate": 1.921397379912664e-06, "logits/chosen": -1.0308911800384521, "logits/rejected": -0.8647695779800415, "logps/chosen": -297.03692626953125, "logps/rejected": -1390.5535888671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.03780413419008255, "rewards/margins": 11.256277084350586, "rewards/rejected": -11.218473434448242, "step": 880 }, { "epoch": 3.88646288209607, "grad_norm": 0.01369672540846241, "learning_rate": 1.943231441048035e-06, "logits/chosen": -0.9989644885063171, "logits/rejected": -0.7607526183128357, "logps/chosen": -257.9919128417969, "logps/rejected": -1388.3541259765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.08867905288934708, "rewards/margins": 11.300336837768555, "rewards/rejected": -11.211658477783203, "step": 890 }, { "epoch": 3.930131004366812, "grad_norm": 0.0063467021653359984, "learning_rate": 1.965065502183406e-06, "logits/chosen": -1.0111680030822754, "logits/rejected": -0.9323729276657104, "logps/chosen": -320.0979919433594, "logps/rejected": -1287.162353515625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.09835447371006012, "rewards/margins": 10.564353942871094, "rewards/rejected": -10.662707328796387, "step": 900 }, { "epoch": 3.9737991266375547, "grad_norm": 0.008364983799534854, "learning_rate": 1.9868995633187772e-06, "logits/chosen": -1.040015459060669, "logits/rejected": -0.8308218121528625, "logps/chosen": -281.2102966308594, "logps/rejected": -1494.9273681640625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.012609638273715973, "rewards/margins": 12.146071434020996, "rewards/rejected": -12.133462905883789, "step": 910 }, { "epoch": 4.0174672489082965, "grad_norm": 0.01041720302913254, "learning_rate": 2.0087336244541485e-06, "logits/chosen": -1.029693365097046, "logits/rejected": -0.7959299087524414, "logps/chosen": -284.14288330078125, "logps/rejected": -1473.79296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.034885674715042114, "rewards/margins": 11.989426612854004, "rewards/rejected": -12.024312019348145, "step": 920 }, { "epoch": 4.06113537117904, "grad_norm": 0.06812518524476187, "learning_rate": 2.0305676855895198e-06, "logits/chosen": -1.036238431930542, "logits/rejected": -0.8368526697158813, "logps/chosen": -282.412109375, "logps/rejected": -1530.1536865234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.024571606889367104, "rewards/margins": 12.484519004821777, "rewards/rejected": -12.509092330932617, "step": 930 }, { "epoch": 4.104803493449782, "grad_norm": 0.011334160266513337, "learning_rate": 2.052401746724891e-06, "logits/chosen": -1.0046826601028442, "logits/rejected": -0.8335267901420593, "logps/chosen": -299.3295593261719, "logps/rejected": -1532.0443115234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.08639846742153168, "rewards/margins": 12.485013008117676, "rewards/rejected": -12.571412086486816, "step": 940 }, { "epoch": 4.148471615720524, "grad_norm": 0.007293258625842997, "learning_rate": 2.0742358078602623e-06, "logits/chosen": -1.0440804958343506, "logits/rejected": -0.8221460580825806, "logps/chosen": -271.626953125, "logps/rejected": -1615.7877197265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.021669497713446617, "rewards/margins": 13.2772216796875, "rewards/rejected": -13.255552291870117, "step": 950 }, { "epoch": 4.192139737991266, "grad_norm": 0.0036293425606275364, "learning_rate": 2.096069868995633e-06, "logits/chosen": -0.9749218821525574, "logits/rejected": -0.762458086013794, "logps/chosen": -287.54425048828125, "logps/rejected": -1501.0009765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.10403977334499359, "rewards/margins": 12.288002014160156, "rewards/rejected": -12.39204216003418, "step": 960 }, { "epoch": 4.235807860262009, "grad_norm": 0.01657616839583468, "learning_rate": 2.1179039301310044e-06, "logits/chosen": -1.0912045240402222, "logits/rejected": -0.8178413510322571, "logps/chosen": -273.39495849609375, "logps/rejected": -1674.9853515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.032420337200164795, "rewards/margins": 13.737632751464844, "rewards/rejected": -13.770054817199707, "step": 970 }, { "epoch": 4.279475982532751, "grad_norm": 0.004372947814429197, "learning_rate": 2.1397379912663756e-06, "logits/chosen": -1.075265645980835, "logits/rejected": -0.9077399373054504, "logps/chosen": -314.0255126953125, "logps/rejected": -1494.762939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.1669284552335739, "rewards/margins": 12.16824722290039, "rewards/rejected": -12.335174560546875, "step": 980 }, { "epoch": 4.323144104803493, "grad_norm": 0.006389101804840946, "learning_rate": 2.161572052401747e-06, "logits/chosen": -1.0283783674240112, "logits/rejected": -0.8057458996772766, "logps/chosen": -298.6373596191406, "logps/rejected": -1542.0701904296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.12250169366598129, "rewards/margins": 12.555821418762207, "rewards/rejected": -12.678323745727539, "step": 990 }, { "epoch": 4.366812227074236, "grad_norm": 0.011293858107384805, "learning_rate": 2.183406113537118e-06, "logits/chosen": -1.0529570579528809, "logits/rejected": -0.9011169672012329, "logps/chosen": -311.1553039550781, "logps/rejected": -1506.985107421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.04453587904572487, "rewards/margins": 12.396936416625977, "rewards/rejected": -12.441473007202148, "step": 1000 }, { "epoch": 4.4104803493449785, "grad_norm": 0.023483470090715674, "learning_rate": 2.205240174672489e-06, "logits/chosen": -0.9941900968551636, "logits/rejected": -0.7742090821266174, "logps/chosen": -296.84356689453125, "logps/rejected": -1588.9974365234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.018021302297711372, "rewards/margins": 13.055638313293457, "rewards/rejected": -13.07365894317627, "step": 1010 }, { "epoch": 4.454148471615721, "grad_norm": 0.0016337437068619145, "learning_rate": 2.2270742358078603e-06, "logits/chosen": -1.028407335281372, "logits/rejected": -0.8429180979728699, "logps/chosen": -291.589111328125, "logps/rejected": -1522.6380615234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.11200809478759766, "rewards/margins": 12.524651527404785, "rewards/rejected": -12.636659622192383, "step": 1020 }, { "epoch": 4.497816593886463, "grad_norm": 0.03025450441278019, "learning_rate": 2.2489082969432315e-06, "logits/chosen": -0.9966410398483276, "logits/rejected": -0.7716367244720459, "logps/chosen": -304.3818359375, "logps/rejected": -1604.009033203125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.06505052745342255, "rewards/margins": 13.266500473022461, "rewards/rejected": -13.331552505493164, "step": 1030 }, { "epoch": 4.541484716157205, "grad_norm": 0.0023995939415842075, "learning_rate": 2.270742358078603e-06, "logits/chosen": -1.0893805027008057, "logits/rejected": -0.8689071536064148, "logps/chosen": -301.534912109375, "logps/rejected": -1548.4366455078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.048710450530052185, "rewards/margins": 12.690649032592773, "rewards/rejected": -12.739359855651855, "step": 1040 }, { "epoch": 4.585152838427947, "grad_norm": 0.0027864250818994464, "learning_rate": 2.292576419213974e-06, "logits/chosen": -1.0893011093139648, "logits/rejected": -0.8222888708114624, "logps/chosen": -288.8246765136719, "logps/rejected": -1636.8326416015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.010482832789421082, "rewards/margins": 13.304609298706055, "rewards/rejected": -13.315092086791992, "step": 1050 }, { "epoch": 4.62882096069869, "grad_norm": 0.04336832084611945, "learning_rate": 2.3144104803493453e-06, "logits/chosen": -1.0463380813598633, "logits/rejected": -0.7753561735153198, "logps/chosen": -290.43927001953125, "logps/rejected": -1660.827392578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.03375621885061264, "rewards/margins": 13.665536880493164, "rewards/rejected": -13.69929313659668, "step": 1060 }, { "epoch": 4.672489082969433, "grad_norm": 0.001699638900515744, "learning_rate": 2.336244541484716e-06, "logits/chosen": -1.0077494382858276, "logits/rejected": -0.8233901262283325, "logps/chosen": -319.26568603515625, "logps/rejected": -1522.940673828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.05182263255119324, "rewards/margins": 12.62773609161377, "rewards/rejected": -12.679557800292969, "step": 1070 }, { "epoch": 4.716157205240175, "grad_norm": 0.005876223689202358, "learning_rate": 2.3580786026200874e-06, "logits/chosen": -1.0393884181976318, "logits/rejected": -0.8210234642028809, "logps/chosen": -306.80389404296875, "logps/rejected": -1692.9970703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.10459591448307037, "rewards/margins": 13.898351669311523, "rewards/rejected": -14.002946853637695, "step": 1080 }, { "epoch": 4.759825327510917, "grad_norm": 0.0020800187761925385, "learning_rate": 2.3799126637554587e-06, "logits/chosen": -1.060529351234436, "logits/rejected": -0.7672148942947388, "logps/chosen": -277.88665771484375, "logps/rejected": -1748.0386962890625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.14030799269676208, "rewards/margins": 14.338586807250977, "rewards/rejected": -14.478894233703613, "step": 1090 }, { "epoch": 4.8034934497816595, "grad_norm": 0.005665591488119185, "learning_rate": 2.40174672489083e-06, "logits/chosen": -0.9569341540336609, "logits/rejected": -0.7119146585464478, "logps/chosen": -284.10693359375, "logps/rejected": -1582.0555419921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.15636774897575378, "rewards/margins": 13.032443046569824, "rewards/rejected": -13.188810348510742, "step": 1100 }, { "epoch": 4.847161572052402, "grad_norm": 0.002167736266370639, "learning_rate": 2.423580786026201e-06, "logits/chosen": -1.017564058303833, "logits/rejected": -0.7689334154129028, "logps/chosen": -290.9665832519531, "logps/rejected": -1671.2760009765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.11698714643716812, "rewards/margins": 13.874715805053711, "rewards/rejected": -13.991703987121582, "step": 1110 }, { "epoch": 4.890829694323144, "grad_norm": 0.001022919900326056, "learning_rate": 2.445414847161572e-06, "logits/chosen": -0.9738149642944336, "logits/rejected": -0.6934366226196289, "logps/chosen": -271.05157470703125, "logps/rejected": -1697.1956787109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.08079036325216293, "rewards/margins": 14.049688339233398, "rewards/rejected": -14.130477905273438, "step": 1120 }, { "epoch": 4.934497816593886, "grad_norm": 0.026298589716650264, "learning_rate": 2.4672489082969433e-06, "logits/chosen": -0.9869601130485535, "logits/rejected": -0.7643391489982605, "logps/chosen": -299.12017822265625, "logps/rejected": -1608.2149658203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.06933470815420151, "rewards/margins": 13.391583442687988, "rewards/rejected": -13.460917472839355, "step": 1130 }, { "epoch": 4.978165938864628, "grad_norm": 0.0008784318629078149, "learning_rate": 2.4890829694323146e-06, "logits/chosen": -1.055574655532837, "logits/rejected": -0.8952911496162415, "logps/chosen": -353.84344482421875, "logps/rejected": -1719.22265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.2365751713514328, "rewards/margins": 14.235481262207031, "rewards/rejected": -14.472058296203613, "step": 1140 }, { "epoch": 5.021834061135372, "grad_norm": 0.004673714651830638, "learning_rate": 2.5109170305676854e-06, "logits/chosen": -1.0722620487213135, "logits/rejected": -0.7664593458175659, "logps/chosen": -274.0126953125, "logps/rejected": -1748.3970947265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.1063443273305893, "rewards/margins": 14.473123550415039, "rewards/rejected": -14.579465866088867, "step": 1150 }, { "epoch": 5.065502183406114, "grad_norm": 0.0011991739375122957, "learning_rate": 2.5327510917030567e-06, "logits/chosen": -1.0603504180908203, "logits/rejected": -0.8141652941703796, "logps/chosen": -310.4181823730469, "logps/rejected": -1731.353271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.1581873893737793, "rewards/margins": 14.344244003295898, "rewards/rejected": -14.50243091583252, "step": 1160 }, { "epoch": 5.109170305676856, "grad_norm": 0.002909320284115034, "learning_rate": 2.554585152838428e-06, "logits/chosen": -1.0466502904891968, "logits/rejected": -0.6875467300415039, "logps/chosen": -277.3587951660156, "logps/rejected": -1920.210205078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.10880859196186066, "rewards/margins": 15.945564270019531, "rewards/rejected": -16.054372787475586, "step": 1170 }, { "epoch": 5.152838427947598, "grad_norm": 0.005217614521220853, "learning_rate": 2.576419213973799e-06, "logits/chosen": -1.0007134675979614, "logits/rejected": -0.7871596217155457, "logps/chosen": -318.60723876953125, "logps/rejected": -1735.515380859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.0955287516117096, "rewards/margins": 14.574140548706055, "rewards/rejected": -14.669670104980469, "step": 1180 }, { "epoch": 5.1965065502183405, "grad_norm": 0.0026108297225441923, "learning_rate": 2.5982532751091705e-06, "logits/chosen": -1.0639702081680298, "logits/rejected": -0.8198869824409485, "logps/chosen": -280.21771240234375, "logps/rejected": -1786.991455078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.12469766288995743, "rewards/margins": 14.866384506225586, "rewards/rejected": -14.991083145141602, "step": 1190 }, { "epoch": 5.240174672489083, "grad_norm": 0.002262060286049751, "learning_rate": 2.6200873362445417e-06, "logits/chosen": -1.0841073989868164, "logits/rejected": -0.9098466634750366, "logps/chosen": -334.71795654296875, "logps/rejected": -1755.4521484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.290884405374527, "rewards/margins": 14.646441459655762, "rewards/rejected": -14.937326431274414, "step": 1200 }, { "epoch": 5.283842794759825, "grad_norm": 0.0036575093356928565, "learning_rate": 2.641921397379913e-06, "logits/chosen": -1.0276038646697998, "logits/rejected": -0.7886050939559937, "logps/chosen": -299.50921630859375, "logps/rejected": -1759.7216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.17530933022499084, "rewards/margins": 14.672393798828125, "rewards/rejected": -14.847702026367188, "step": 1210 }, { "epoch": 5.327510917030567, "grad_norm": 0.001924787541735357, "learning_rate": 2.6637554585152842e-06, "logits/chosen": -1.0151865482330322, "logits/rejected": -0.758797824382782, "logps/chosen": -291.7287902832031, "logps/rejected": -1784.828857421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.1621318757534027, "rewards/margins": 14.981706619262695, "rewards/rejected": -15.143839836120605, "step": 1220 }, { "epoch": 5.37117903930131, "grad_norm": 0.004721617344330927, "learning_rate": 2.6855895196506555e-06, "logits/chosen": -1.0915393829345703, "logits/rejected": -0.8418958783149719, "logps/chosen": -301.97857666015625, "logps/rejected": -1854.0426025390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.09268920868635178, "rewards/margins": 15.58464527130127, "rewards/rejected": -15.677332878112793, "step": 1230 }, { "epoch": 5.414847161572053, "grad_norm": 0.0026247376263118893, "learning_rate": 2.7074235807860268e-06, "logits/chosen": -1.0333033800125122, "logits/rejected": -0.8034351468086243, "logps/chosen": -298.00738525390625, "logps/rejected": -1792.6214599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.17875468730926514, "rewards/margins": 14.989255905151367, "rewards/rejected": -15.168009757995605, "step": 1240 }, { "epoch": 5.458515283842795, "grad_norm": 0.0060233003051233585, "learning_rate": 2.729257641921398e-06, "logits/chosen": -1.0356143712997437, "logits/rejected": -0.774070143699646, "logps/chosen": -288.4607849121094, "logps/rejected": -1893.7113037109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.1110936775803566, "rewards/margins": 15.786157608032227, "rewards/rejected": -15.897252082824707, "step": 1250 }, { "epoch": 5.502183406113537, "grad_norm": 0.0017693554979092755, "learning_rate": 2.7510917030567684e-06, "logits/chosen": -1.0685635805130005, "logits/rejected": -0.746213972568512, "logps/chosen": -277.7631530761719, "logps/rejected": -1987.2008056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.07833601534366608, "rewards/margins": 16.780868530273438, "rewards/rejected": -16.859203338623047, "step": 1260 }, { "epoch": 5.545851528384279, "grad_norm": 0.015278209897670383, "learning_rate": 2.7729257641921397e-06, "logits/chosen": -1.038823127746582, "logits/rejected": -0.7490204572677612, "logps/chosen": -271.64654541015625, "logps/rejected": -1775.4935302734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.09689785540103912, "rewards/margins": 14.988189697265625, "rewards/rejected": -15.085088729858398, "step": 1270 }, { "epoch": 5.5895196506550215, "grad_norm": 0.013179593576006645, "learning_rate": 2.794759825327511e-06, "logits/chosen": -1.0802757740020752, "logits/rejected": -0.7876217365264893, "logps/chosen": -299.06817626953125, "logps/rejected": -1958.2044677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.10966118425130844, "rewards/margins": 16.43094253540039, "rewards/rejected": -16.540599822998047, "step": 1280 }, { "epoch": 5.633187772925764, "grad_norm": 0.00463183578204575, "learning_rate": 2.8165938864628822e-06, "logits/chosen": -0.9886074066162109, "logits/rejected": -0.7129583358764648, "logps/chosen": -283.6441955566406, "logps/rejected": -1866.8072509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.07100297510623932, "rewards/margins": 15.749720573425293, "rewards/rejected": -15.820724487304688, "step": 1290 }, { "epoch": 5.676855895196507, "grad_norm": 0.0020575515565097605, "learning_rate": 2.8384279475982535e-06, "logits/chosen": -0.9916365742683411, "logits/rejected": -0.7142736911773682, "logps/chosen": -272.6620178222656, "logps/rejected": -1800.623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.0888563022017479, "rewards/margins": 15.213602066040039, "rewards/rejected": -15.302457809448242, "step": 1300 }, { "epoch": 5.720524017467249, "grad_norm": 0.001406773828181454, "learning_rate": 2.8602620087336248e-06, "logits/chosen": -0.987761378288269, "logits/rejected": -0.7714183926582336, "logps/chosen": -298.6223449707031, "logps/rejected": -1822.839111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.16619773209095, "rewards/margins": 15.492383003234863, "rewards/rejected": -15.658578872680664, "step": 1310 }, { "epoch": 5.764192139737991, "grad_norm": 0.004190364330280941, "learning_rate": 2.882096069868996e-06, "logits/chosen": -0.9624581336975098, "logits/rejected": -0.7134844064712524, "logps/chosen": -311.7620544433594, "logps/rejected": -1870.9615478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2308339774608612, "rewards/margins": 15.71532917022705, "rewards/rejected": -15.94616413116455, "step": 1320 }, { "epoch": 5.807860262008734, "grad_norm": 0.0008699247843495859, "learning_rate": 2.9039301310043673e-06, "logits/chosen": -1.0361287593841553, "logits/rejected": -0.7062743902206421, "logps/chosen": -280.97174072265625, "logps/rejected": -1963.219482421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.209587961435318, "rewards/margins": 16.44705581665039, "rewards/rejected": -16.65664291381836, "step": 1330 }, { "epoch": 5.851528384279476, "grad_norm": 0.0032422913945994425, "learning_rate": 2.9257641921397385e-06, "logits/chosen": -0.9603463411331177, "logits/rejected": -0.8057414293289185, "logps/chosen": -327.0865478515625, "logps/rejected": -1724.1302490234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.21416297554969788, "rewards/margins": 14.60496711730957, "rewards/rejected": -14.819129943847656, "step": 1340 }, { "epoch": 5.895196506550218, "grad_norm": 0.003714111333321006, "learning_rate": 2.94759825327511e-06, "logits/chosen": -1.047400712966919, "logits/rejected": -0.7240643501281738, "logps/chosen": -296.6412048339844, "logps/rejected": -2103.31005859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.24377670884132385, "rewards/margins": 17.869279861450195, "rewards/rejected": -18.113056182861328, "step": 1350 }, { "epoch": 5.93886462882096, "grad_norm": 0.036055093591937176, "learning_rate": 2.9694323144104806e-06, "logits/chosen": -1.0455726385116577, "logits/rejected": -0.7555108666419983, "logps/chosen": -292.35418701171875, "logps/rejected": -2078.142578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.2266191691160202, "rewards/margins": 17.647899627685547, "rewards/rejected": -17.87451934814453, "step": 1360 }, { "epoch": 5.9825327510917035, "grad_norm": 0.0009681794304194634, "learning_rate": 2.9912663755458515e-06, "logits/chosen": -1.0807570219039917, "logits/rejected": -0.7965426445007324, "logps/chosen": -277.0798034667969, "logps/rejected": -1930.9554443359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.07557226717472076, "rewards/margins": 16.501859664916992, "rewards/rejected": -16.577430725097656, "step": 1370 }, { "epoch": 6.026200873362446, "grad_norm": 0.0006217230204094578, "learning_rate": 3.0131004366812227e-06, "logits/chosen": -0.9675992727279663, "logits/rejected": -0.6970279812812805, "logps/chosen": -277.9351501464844, "logps/rejected": -1834.124267578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.12211129814386368, "rewards/margins": 15.519429206848145, "rewards/rejected": -15.641542434692383, "step": 1380 }, { "epoch": 6.069868995633188, "grad_norm": 0.000590236430831123, "learning_rate": 3.034934497816594e-06, "logits/chosen": -1.0724908113479614, "logits/rejected": -0.8346108198165894, "logps/chosen": -296.20635986328125, "logps/rejected": -1908.966064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.11752160638570786, "rewards/margins": 16.306102752685547, "rewards/rejected": -16.423625946044922, "step": 1390 }, { "epoch": 6.11353711790393, "grad_norm": 0.0010789600472434582, "learning_rate": 3.0567685589519653e-06, "logits/chosen": -1.011857271194458, "logits/rejected": -0.6882113218307495, "logps/chosen": -271.4835510253906, "logps/rejected": -2021.258544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.19267281889915466, "rewards/margins": 17.091182708740234, "rewards/rejected": -17.283855438232422, "step": 1400 }, { "epoch": 6.157205240174672, "grad_norm": 0.021162229163187857, "learning_rate": 3.0786026200873365e-06, "logits/chosen": -1.0202292203903198, "logits/rejected": -0.8551548719406128, "logps/chosen": -327.4979553222656, "logps/rejected": -1939.2249755859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.1539764553308487, "rewards/margins": 16.620744705200195, "rewards/rejected": -16.774723052978516, "step": 1410 }, { "epoch": 6.200873362445415, "grad_norm": 0.0005342017818896306, "learning_rate": 3.100436681222708e-06, "logits/chosen": -1.03046452999115, "logits/rejected": -0.8124139904975891, "logps/chosen": -297.50396728515625, "logps/rejected": -1918.8931884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.15192511677742004, "rewards/margins": 16.413522720336914, "rewards/rejected": -16.565446853637695, "step": 1420 }, { "epoch": 6.244541484716157, "grad_norm": 0.00038738226302852504, "learning_rate": 3.122270742358079e-06, "logits/chosen": -1.054620385169983, "logits/rejected": -0.8183094263076782, "logps/chosen": -322.5667724609375, "logps/rejected": -1964.677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.16739939153194427, "rewards/margins": 16.75040054321289, "rewards/rejected": -16.917800903320312, "step": 1430 }, { "epoch": 6.2882096069869, "grad_norm": 0.0003899969239690314, "learning_rate": 3.1441048034934503e-06, "logits/chosen": -0.9781708717346191, "logits/rejected": -0.7461929321289062, "logps/chosen": -318.11480712890625, "logps/rejected": -2061.38916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.21302548050880432, "rewards/margins": 17.599262237548828, "rewards/rejected": -17.81229019165039, "step": 1440 }, { "epoch": 6.331877729257642, "grad_norm": 0.0017794021918756387, "learning_rate": 3.1659388646288216e-06, "logits/chosen": -0.992697536945343, "logits/rejected": -0.6795254945755005, "logps/chosen": -284.16131591796875, "logps/rejected": -2077.75830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.11765768378973007, "rewards/margins": 17.877195358276367, "rewards/rejected": -17.994853973388672, "step": 1450 }, { "epoch": 6.3755458515283845, "grad_norm": 0.0005848625217021266, "learning_rate": 3.1877729257641924e-06, "logits/chosen": -1.0637353658676147, "logits/rejected": -0.8648301959037781, "logps/chosen": -315.26336669921875, "logps/rejected": -1947.400146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2903938889503479, "rewards/margins": 16.5468692779541, "rewards/rejected": -16.837261199951172, "step": 1460 }, { "epoch": 6.419213973799127, "grad_norm": 0.0011193716581220827, "learning_rate": 3.2096069868995637e-06, "logits/chosen": -1.0862258672714233, "logits/rejected": -0.8526533842086792, "logps/chosen": -297.92730712890625, "logps/rejected": -1971.458740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2016836404800415, "rewards/margins": 16.951295852661133, "rewards/rejected": -17.152978897094727, "step": 1470 }, { "epoch": 6.462882096069869, "grad_norm": 0.02198274149575796, "learning_rate": 3.2314410480349345e-06, "logits/chosen": -1.0508402585983276, "logits/rejected": -0.9318809509277344, "logps/chosen": -342.34625244140625, "logps/rejected": -1788.6312255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2576822340488434, "rewards/margins": 15.318506240844727, "rewards/rejected": -15.576187133789062, "step": 1480 }, { "epoch": 6.506550218340611, "grad_norm": 0.0005850987137598556, "learning_rate": 3.2532751091703058e-06, "logits/chosen": -1.0742746591567993, "logits/rejected": -0.8181574940681458, "logps/chosen": -314.84027099609375, "logps/rejected": -2078.405029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.22380796074867249, "rewards/margins": 17.66670799255371, "rewards/rejected": -17.89051628112793, "step": 1490 }, { "epoch": 6.550218340611353, "grad_norm": 0.00028653981336086145, "learning_rate": 3.275109170305677e-06, "logits/chosen": -0.9984280467033386, "logits/rejected": -0.7892557978630066, "logps/chosen": -330.1844482421875, "logps/rejected": -2021.5736083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.2903163433074951, "rewards/margins": 17.275798797607422, "rewards/rejected": -17.566116333007812, "step": 1500 }, { "epoch": 6.593886462882097, "grad_norm": 0.00033440333898832037, "learning_rate": 3.2969432314410483e-06, "logits/chosen": -1.073981523513794, "logits/rejected": -0.8415883183479309, "logps/chosen": -298.2444152832031, "logps/rejected": -2117.98974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2284885197877884, "rewards/margins": 18.054271697998047, "rewards/rejected": -18.282764434814453, "step": 1510 }, { "epoch": 6.637554585152839, "grad_norm": 0.0005527793368517275, "learning_rate": 3.3187772925764196e-06, "logits/chosen": -1.0744001865386963, "logits/rejected": -0.8150192499160767, "logps/chosen": -308.9847717285156, "logps/rejected": -2074.09326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.21661508083343506, "rewards/margins": 17.730154037475586, "rewards/rejected": -17.946767807006836, "step": 1520 }, { "epoch": 6.681222707423581, "grad_norm": 0.00033207105434677, "learning_rate": 3.340611353711791e-06, "logits/chosen": -1.08194899559021, "logits/rejected": -0.8752437829971313, "logps/chosen": -339.40020751953125, "logps/rejected": -2004.1890869140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.33242908120155334, "rewards/margins": 17.010128021240234, "rewards/rejected": -17.34255599975586, "step": 1530 }, { "epoch": 6.724890829694323, "grad_norm": 0.022039497881533077, "learning_rate": 3.362445414847162e-06, "logits/chosen": -1.0350559949874878, "logits/rejected": -0.7454198598861694, "logps/chosen": -295.46429443359375, "logps/rejected": -2007.7119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2251167744398117, "rewards/margins": 17.17304229736328, "rewards/rejected": -17.39815902709961, "step": 1540 }, { "epoch": 6.7685589519650655, "grad_norm": 0.0008590802170732335, "learning_rate": 3.384279475982533e-06, "logits/chosen": -1.013310194015503, "logits/rejected": -0.8459439277648926, "logps/chosen": -316.28485107421875, "logps/rejected": -1971.481689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.1864933967590332, "rewards/margins": 17.023170471191406, "rewards/rejected": -17.20966339111328, "step": 1550 }, { "epoch": 6.812227074235808, "grad_norm": 0.00021966058827162038, "learning_rate": 3.406113537117904e-06, "logits/chosen": -1.1017309427261353, "logits/rejected": -0.809949517250061, "logps/chosen": -300.4569396972656, "logps/rejected": -2362.65673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.19471998512744904, "rewards/margins": 20.213533401489258, "rewards/rejected": -20.408252716064453, "step": 1560 }, { "epoch": 6.85589519650655, "grad_norm": 0.018179725423769777, "learning_rate": 3.4279475982532755e-06, "logits/chosen": -1.0191181898117065, "logits/rejected": -0.8013290166854858, "logps/chosen": -347.39837646484375, "logps/rejected": -2084.37109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.37629953026771545, "rewards/margins": 17.81521987915039, "rewards/rejected": -18.191518783569336, "step": 1570 }, { "epoch": 6.899563318777292, "grad_norm": 0.0006533337684519735, "learning_rate": 3.4497816593886467e-06, "logits/chosen": -1.0635725259780884, "logits/rejected": -0.8008363842964172, "logps/chosen": -281.1112060546875, "logps/rejected": -2110.783447265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.17015284299850464, "rewards/margins": 18.185012817382812, "rewards/rejected": -18.35516357421875, "step": 1580 }, { "epoch": 6.9432314410480345, "grad_norm": 0.0023444223544918093, "learning_rate": 3.4716157205240176e-06, "logits/chosen": -1.06436026096344, "logits/rejected": -0.9005461931228638, "logps/chosen": -321.1396179199219, "logps/rejected": -1934.8779296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.1610509306192398, "rewards/margins": 16.5369873046875, "rewards/rejected": -16.698040008544922, "step": 1590 }, { "epoch": 6.986899563318778, "grad_norm": 0.0005317660437839775, "learning_rate": 3.493449781659389e-06, "logits/chosen": -1.066354513168335, "logits/rejected": -0.8196098208427429, "logps/chosen": -310.8006896972656, "logps/rejected": -1988.5816650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.14243224263191223, "rewards/margins": 17.098468780517578, "rewards/rejected": -17.24090576171875, "step": 1600 }, { "epoch": 7.03056768558952, "grad_norm": 0.008576159687200002, "learning_rate": 3.51528384279476e-06, "logits/chosen": -1.0608209371566772, "logits/rejected": -0.752027690410614, "logps/chosen": -275.2513122558594, "logps/rejected": -2143.815185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.16940443217754364, "rewards/margins": 18.423215866088867, "rewards/rejected": -18.59262466430664, "step": 1610 }, { "epoch": 7.074235807860262, "grad_norm": 0.0020199018897183348, "learning_rate": 3.5371179039301313e-06, "logits/chosen": -1.0418813228607178, "logits/rejected": -0.7335854768753052, "logps/chosen": -278.68255615234375, "logps/rejected": -2300.03466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.10322427749633789, "rewards/margins": 19.8756103515625, "rewards/rejected": -19.978836059570312, "step": 1620 }, { "epoch": 7.117903930131004, "grad_norm": 0.0011908446383915752, "learning_rate": 3.5589519650655026e-06, "logits/chosen": -1.0441044569015503, "logits/rejected": -0.7531333565711975, "logps/chosen": -276.1479797363281, "logps/rejected": -2161.417724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.17423273622989655, "rewards/margins": 18.689699172973633, "rewards/rejected": -18.863929748535156, "step": 1630 }, { "epoch": 7.1615720524017465, "grad_norm": 0.0032115848749195057, "learning_rate": 3.580786026200874e-06, "logits/chosen": -1.0433049201965332, "logits/rejected": -0.7512766718864441, "logps/chosen": -307.67633056640625, "logps/rejected": -2292.62646484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.25965648889541626, "rewards/margins": 19.89341163635254, "rewards/rejected": -20.153064727783203, "step": 1640 }, { "epoch": 7.205240174672489, "grad_norm": 0.0007045445619620613, "learning_rate": 3.6026200873362447e-06, "logits/chosen": -1.0814634561538696, "logits/rejected": -0.8248990178108215, "logps/chosen": -292.111083984375, "logps/rejected": -2221.50537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.20753392577171326, "rewards/margins": 19.260774612426758, "rewards/rejected": -19.468307495117188, "step": 1650 }, { "epoch": 7.248908296943231, "grad_norm": 0.00243561205609059, "learning_rate": 3.624454148471616e-06, "logits/chosen": -1.1132924556732178, "logits/rejected": -0.8150212168693542, "logps/chosen": -286.9057922363281, "logps/rejected": -2308.690185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.26080751419067383, "rewards/margins": 19.81418800354004, "rewards/rejected": -20.074996948242188, "step": 1660 }, { "epoch": 7.292576419213974, "grad_norm": 0.0003185349859103909, "learning_rate": 3.6462882096069872e-06, "logits/chosen": -1.0473792552947998, "logits/rejected": -0.8249233961105347, "logps/chosen": -307.81280517578125, "logps/rejected": -2155.051025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.23299212753772736, "rewards/margins": 18.573911666870117, "rewards/rejected": -18.806903839111328, "step": 1670 }, { "epoch": 7.336244541484716, "grad_norm": 0.0009157760930102551, "learning_rate": 3.6681222707423585e-06, "logits/chosen": -0.9469618797302246, "logits/rejected": -0.7034221887588501, "logps/chosen": -294.6734924316406, "logps/rejected": -2237.697021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.21215610206127167, "rewards/margins": 19.365325927734375, "rewards/rejected": -19.577482223510742, "step": 1680 }, { "epoch": 7.379912663755459, "grad_norm": 0.00021148377256095528, "learning_rate": 3.6899563318777298e-06, "logits/chosen": -1.0770208835601807, "logits/rejected": -0.8840249180793762, "logps/chosen": -331.5302734375, "logps/rejected": -2178.466064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.32442861795425415, "rewards/margins": 18.76183319091797, "rewards/rejected": -19.08626365661621, "step": 1690 }, { "epoch": 7.423580786026201, "grad_norm": 0.007977878275722012, "learning_rate": 3.7117903930131006e-06, "logits/chosen": -1.0482943058013916, "logits/rejected": -0.826102614402771, "logps/chosen": -313.11163330078125, "logps/rejected": -2334.31298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2753329575061798, "rewards/margins": 20.11834716796875, "rewards/rejected": -20.393680572509766, "step": 1700 }, { "epoch": 7.467248908296943, "grad_norm": 0.007341002757910789, "learning_rate": 3.733624454148472e-06, "logits/chosen": -0.9559406042098999, "logits/rejected": -0.7573748826980591, "logps/chosen": -334.9615478515625, "logps/rejected": -2205.76123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.40518322587013245, "rewards/margins": 19.009510040283203, "rewards/rejected": -19.41469383239746, "step": 1710 }, { "epoch": 7.510917030567685, "grad_norm": 0.004867882357371039, "learning_rate": 3.755458515283843e-06, "logits/chosen": -1.1482198238372803, "logits/rejected": -0.9220088720321655, "logps/chosen": -318.21588134765625, "logps/rejected": -2348.99658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.319120854139328, "rewards/margins": 20.20169448852539, "rewards/rejected": -20.520814895629883, "step": 1720 }, { "epoch": 7.554585152838428, "grad_norm": 0.00021659811824570313, "learning_rate": 3.7772925764192144e-06, "logits/chosen": -1.044281005859375, "logits/rejected": -0.7464413642883301, "logps/chosen": -303.97198486328125, "logps/rejected": -2425.71728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2265804558992386, "rewards/margins": 21.094236373901367, "rewards/rejected": -21.320816040039062, "step": 1730 }, { "epoch": 7.598253275109171, "grad_norm": 0.0004549943124664391, "learning_rate": 3.7991266375545856e-06, "logits/chosen": -1.070457100868225, "logits/rejected": -0.7991530299186707, "logps/chosen": -314.72271728515625, "logps/rejected": -2301.8134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.31152811646461487, "rewards/margins": 19.908206939697266, "rewards/rejected": -20.219736099243164, "step": 1740 }, { "epoch": 7.641921397379913, "grad_norm": 0.000762199793715595, "learning_rate": 3.8209606986899565e-06, "logits/chosen": -1.1053345203399658, "logits/rejected": -0.8186489939689636, "logps/chosen": -293.10906982421875, "logps/rejected": -2498.965087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.16857507824897766, "rewards/margins": 21.851232528686523, "rewards/rejected": -22.019807815551758, "step": 1750 }, { "epoch": 7.685589519650655, "grad_norm": 0.0051639086582035585, "learning_rate": 3.842794759825328e-06, "logits/chosen": -1.0405030250549316, "logits/rejected": -0.8000315427780151, "logps/chosen": -300.1859436035156, "logps/rejected": -2385.20947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.24915000796318054, "rewards/margins": 20.66004753112793, "rewards/rejected": -20.909196853637695, "step": 1760 }, { "epoch": 7.729257641921397, "grad_norm": 0.001118186712657208, "learning_rate": 3.864628820960699e-06, "logits/chosen": -1.1220393180847168, "logits/rejected": -0.8782588839530945, "logps/chosen": -295.68121337890625, "logps/rejected": -2247.967041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2505021393299103, "rewards/margins": 19.498477935791016, "rewards/rejected": -19.748979568481445, "step": 1770 }, { "epoch": 7.77292576419214, "grad_norm": 0.00038937863589788736, "learning_rate": 3.88646288209607e-06, "logits/chosen": -1.0430997610092163, "logits/rejected": -0.7787622213363647, "logps/chosen": -292.9209289550781, "logps/rejected": -2202.70263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.15622052550315857, "rewards/margins": 19.106483459472656, "rewards/rejected": -19.262704849243164, "step": 1780 }, { "epoch": 7.816593886462882, "grad_norm": 0.0005447963133061583, "learning_rate": 3.9082969432314415e-06, "logits/chosen": -1.0804011821746826, "logits/rejected": -0.8173781633377075, "logps/chosen": -296.8170471191406, "logps/rejected": -2275.04150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.20234327018260956, "rewards/margins": 19.779094696044922, "rewards/rejected": -19.981435775756836, "step": 1790 }, { "epoch": 7.860262008733624, "grad_norm": 0.0006135625466790888, "learning_rate": 3.930131004366812e-06, "logits/chosen": -1.102577805519104, "logits/rejected": -0.8985908627510071, "logps/chosen": -321.27117919921875, "logps/rejected": -2241.82568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.23137608170509338, "rewards/margins": 19.502399444580078, "rewards/rejected": -19.733776092529297, "step": 1800 }, { "epoch": 7.903930131004367, "grad_norm": 0.00032446191104664894, "learning_rate": 3.951965065502183e-06, "logits/chosen": -1.0766944885253906, "logits/rejected": -0.873723030090332, "logps/chosen": -325.87109375, "logps/rejected": -2231.47119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2940670847892761, "rewards/margins": 19.385669708251953, "rewards/rejected": -19.679737091064453, "step": 1810 }, { "epoch": 7.9475982532751095, "grad_norm": 0.001574974324187702, "learning_rate": 3.9737991266375545e-06, "logits/chosen": -1.0221084356307983, "logits/rejected": -0.8618911504745483, "logps/chosen": -316.0317077636719, "logps/rejected": -2163.2421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.206668883562088, "rewards/margins": 18.93549156188965, "rewards/rejected": -19.142160415649414, "step": 1820 }, { "epoch": 7.991266375545852, "grad_norm": 0.00022419678896042116, "learning_rate": 3.995633187772926e-06, "logits/chosen": -1.0767072439193726, "logits/rejected": -0.8297048807144165, "logps/chosen": -307.5361328125, "logps/rejected": -2416.227294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.26914912462234497, "rewards/margins": 21.10599708557129, "rewards/rejected": -21.375144958496094, "step": 1830 }, { "epoch": 8.034934497816593, "grad_norm": 0.010963634132515956, "learning_rate": 4.017467248908297e-06, "logits/chosen": -1.08732271194458, "logits/rejected": -0.8851385116577148, "logps/chosen": -338.46710205078125, "logps/rejected": -2333.35888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3260396718978882, "rewards/margins": 20.212635040283203, "rewards/rejected": -20.53867530822754, "step": 1840 }, { "epoch": 8.078602620087336, "grad_norm": 0.000381010407384511, "learning_rate": 4.039301310043668e-06, "logits/chosen": -1.1183444261550903, "logits/rejected": -0.8612543344497681, "logps/chosen": -315.2994689941406, "logps/rejected": -2491.36328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2610783576965332, "rewards/margins": 21.668811798095703, "rewards/rejected": -21.929889678955078, "step": 1850 }, { "epoch": 8.12227074235808, "grad_norm": 0.0022042959611507694, "learning_rate": 4.0611353711790395e-06, "logits/chosen": -1.1090279817581177, "logits/rejected": -0.9295045733451843, "logps/chosen": -330.27386474609375, "logps/rejected": -2329.194091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.26038649678230286, "rewards/margins": 20.347036361694336, "rewards/rejected": -20.607421875, "step": 1860 }, { "epoch": 8.16593886462882, "grad_norm": 0.0004920801342588386, "learning_rate": 4.082969432314411e-06, "logits/chosen": -1.0943841934204102, "logits/rejected": -0.8274461627006531, "logps/chosen": -318.8438720703125, "logps/rejected": -2458.033447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3523164391517639, "rewards/margins": 21.382781982421875, "rewards/rejected": -21.735097885131836, "step": 1870 }, { "epoch": 8.209606986899564, "grad_norm": 0.0009447134000736758, "learning_rate": 4.104803493449782e-06, "logits/chosen": -1.098156213760376, "logits/rejected": -0.8561164736747742, "logps/chosen": -311.258056640625, "logps/rejected": -2481.31298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3264026939868927, "rewards/margins": 21.64284896850586, "rewards/rejected": -21.96925163269043, "step": 1880 }, { "epoch": 8.253275109170305, "grad_norm": 0.00023644058574262168, "learning_rate": 4.126637554585153e-06, "logits/chosen": -1.087120532989502, "logits/rejected": -0.8407844305038452, "logps/chosen": -323.6820373535156, "logps/rejected": -2449.951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.32164669036865234, "rewards/margins": 21.341232299804688, "rewards/rejected": -21.662879943847656, "step": 1890 }, { "epoch": 8.296943231441048, "grad_norm": 0.0012447155643288718, "learning_rate": 4.1484716157205246e-06, "logits/chosen": -1.1435941457748413, "logits/rejected": -1.009029746055603, "logps/chosen": -356.09906005859375, "logps/rejected": -2268.1494140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3430097997188568, "rewards/margins": 19.656940460205078, "rewards/rejected": -19.999950408935547, "step": 1900 }, { "epoch": 8.34061135371179, "grad_norm": 0.009259501540196833, "learning_rate": 4.170305676855895e-06, "logits/chosen": -1.080102562904358, "logits/rejected": -0.9438093304634094, "logps/chosen": -329.1517333984375, "logps/rejected": -2090.31591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.26612958312034607, "rewards/margins": 18.181472778320312, "rewards/rejected": -18.447603225708008, "step": 1910 }, { "epoch": 8.384279475982533, "grad_norm": 0.00011445202410234254, "learning_rate": 4.192139737991266e-06, "logits/chosen": -1.098240613937378, "logits/rejected": -0.907220721244812, "logps/chosen": -330.1543884277344, "logps/rejected": -2240.404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.25336986780166626, "rewards/margins": 19.61066436767578, "rewards/rejected": -19.86403465270996, "step": 1920 }, { "epoch": 8.427947598253276, "grad_norm": 0.0003855809296457614, "learning_rate": 4.2139737991266375e-06, "logits/chosen": -1.0787742137908936, "logits/rejected": -0.8587201237678528, "logps/chosen": -284.99200439453125, "logps/rejected": -2388.51611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.14136537909507751, "rewards/margins": 20.929805755615234, "rewards/rejected": -21.071168899536133, "step": 1930 }, { "epoch": 8.471615720524017, "grad_norm": 0.00013697952997553833, "learning_rate": 4.235807860262009e-06, "logits/chosen": -1.054368257522583, "logits/rejected": -0.7648724317550659, "logps/chosen": -276.61810302734375, "logps/rejected": -2521.792236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.1677180528640747, "rewards/margins": 22.203210830688477, "rewards/rejected": -22.370929718017578, "step": 1940 }, { "epoch": 8.51528384279476, "grad_norm": 0.0002293560743829052, "learning_rate": 4.25764192139738e-06, "logits/chosen": -1.1435911655426025, "logits/rejected": -0.8438609838485718, "logps/chosen": -301.98040771484375, "logps/rejected": -2672.17431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.20891527831554413, "rewards/margins": 23.413738250732422, "rewards/rejected": -23.62265396118164, "step": 1950 }, { "epoch": 8.558951965065502, "grad_norm": 0.00019322620168389897, "learning_rate": 4.279475982532751e-06, "logits/chosen": -1.1010676622390747, "logits/rejected": -0.8481037020683289, "logps/chosen": -307.30902099609375, "logps/rejected": -2521.453857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3874928951263428, "rewards/margins": 21.796045303344727, "rewards/rejected": -22.18354034423828, "step": 1960 }, { "epoch": 8.602620087336245, "grad_norm": 0.001175360995308118, "learning_rate": 4.3013100436681226e-06, "logits/chosen": -1.0651524066925049, "logits/rejected": -0.834464430809021, "logps/chosen": -301.6846923828125, "logps/rejected": -2450.784912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.23863554000854492, "rewards/margins": 21.526622772216797, "rewards/rejected": -21.765254974365234, "step": 1970 }, { "epoch": 8.646288209606986, "grad_norm": 0.00016373989132588334, "learning_rate": 4.323144104803494e-06, "logits/chosen": -1.1545393466949463, "logits/rejected": -0.909826934337616, "logps/chosen": -294.3090515136719, "logps/rejected": -2600.486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.21417172253131866, "rewards/margins": 22.84284782409668, "rewards/rejected": -23.057018280029297, "step": 1980 }, { "epoch": 8.68995633187773, "grad_norm": 0.004616209196268066, "learning_rate": 4.344978165938865e-06, "logits/chosen": -1.0946321487426758, "logits/rejected": -0.8385046124458313, "logps/chosen": -302.0052185058594, "logps/rejected": -2754.963134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.27731961011886597, "rewards/margins": 24.13459587097168, "rewards/rejected": -24.411914825439453, "step": 1990 }, { "epoch": 8.733624454148472, "grad_norm": 0.001220925865522623, "learning_rate": 4.366812227074236e-06, "logits/chosen": -1.0913336277008057, "logits/rejected": -0.9391576051712036, "logps/chosen": -349.30535888671875, "logps/rejected": -2253.70263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4201628565788269, "rewards/margins": 19.57293128967285, "rewards/rejected": -19.99309539794922, "step": 2000 }, { "epoch": 8.777292576419214, "grad_norm": 0.000695607193787943, "learning_rate": 4.388646288209608e-06, "logits/chosen": -1.0733985900878906, "logits/rejected": -0.8656112551689148, "logps/chosen": -315.151611328125, "logps/rejected": -2626.15673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3108351230621338, "rewards/margins": 22.997827529907227, "rewards/rejected": -23.308664321899414, "step": 2010 }, { "epoch": 8.820960698689957, "grad_norm": 0.0015031070051841988, "learning_rate": 4.410480349344978e-06, "logits/chosen": -1.0539964437484741, "logits/rejected": -0.7356933355331421, "logps/chosen": -273.33905029296875, "logps/rejected": -2734.50927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.15408843755722046, "rewards/margins": 24.043834686279297, "rewards/rejected": -24.197925567626953, "step": 2020 }, { "epoch": 8.864628820960698, "grad_norm": 0.0010344020904449584, "learning_rate": 4.432314410480349e-06, "logits/chosen": -1.1003235578536987, "logits/rejected": -0.9438535571098328, "logps/chosen": -346.1417236328125, "logps/rejected": -2279.29296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.30353760719299316, "rewards/margins": 19.837757110595703, "rewards/rejected": -20.141292572021484, "step": 2030 }, { "epoch": 8.908296943231441, "grad_norm": 0.002536555693613462, "learning_rate": 4.4541484716157205e-06, "logits/chosen": -1.1146800518035889, "logits/rejected": -0.8929511904716492, "logps/chosen": -318.5375061035156, "logps/rejected": -2508.639892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2395947277545929, "rewards/margins": 22.114337921142578, "rewards/rejected": -22.353931427001953, "step": 2040 }, { "epoch": 8.951965065502183, "grad_norm": 0.00031818519118804995, "learning_rate": 4.475982532751092e-06, "logits/chosen": -1.0807757377624512, "logits/rejected": -0.8622077703475952, "logps/chosen": -301.75091552734375, "logps/rejected": -2419.93212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2995307445526123, "rewards/margins": 21.251846313476562, "rewards/rejected": -21.551380157470703, "step": 2050 }, { "epoch": 8.995633187772926, "grad_norm": 0.00014811678667134488, "learning_rate": 4.497816593886463e-06, "logits/chosen": -1.0796912908554077, "logits/rejected": -0.815513014793396, "logps/chosen": -298.62469482421875, "logps/rejected": -2701.087646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2749957740306854, "rewards/margins": 23.87444305419922, "rewards/rejected": -24.14944076538086, "step": 2060 }, { "epoch": 9.039301310043669, "grad_norm": 8.28809435594768e-05, "learning_rate": 4.519650655021834e-06, "logits/chosen": -1.1091666221618652, "logits/rejected": -0.8657588958740234, "logps/chosen": -305.13397216796875, "logps/rejected": -2696.66259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.30476102232933044, "rewards/margins": 23.822467803955078, "rewards/rejected": -24.127227783203125, "step": 2070 }, { "epoch": 9.08296943231441, "grad_norm": 0.0014778275220867132, "learning_rate": 4.541484716157206e-06, "logits/chosen": -1.0045219659805298, "logits/rejected": -0.7398616671562195, "logps/chosen": -272.2821350097656, "logps/rejected": -2668.02001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.25276488065719604, "rewards/margins": 23.64044952392578, "rewards/rejected": -23.89321517944336, "step": 2080 }, { "epoch": 9.126637554585153, "grad_norm": 0.0017921797725820345, "learning_rate": 4.563318777292577e-06, "logits/chosen": -1.0982134342193604, "logits/rejected": -0.8968244791030884, "logps/chosen": -335.67437744140625, "logps/rejected": -2413.66357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.39835888147354126, "rewards/margins": 21.05763053894043, "rewards/rejected": -21.455989837646484, "step": 2090 }, { "epoch": 9.170305676855895, "grad_norm": 0.0003727675634257327, "learning_rate": 4.585152838427948e-06, "logits/chosen": -1.0919723510742188, "logits/rejected": -0.9493483304977417, "logps/chosen": -347.2327880859375, "logps/rejected": -2420.51123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2742713391780853, "rewards/margins": 21.276058197021484, "rewards/rejected": -21.550331115722656, "step": 2100 }, { "epoch": 9.213973799126638, "grad_norm": 9.979894792196152e-05, "learning_rate": 4.606986899563319e-06, "logits/chosen": -1.084930181503296, "logits/rejected": -0.8425978422164917, "logps/chosen": -299.5055847167969, "logps/rejected": -2611.77294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3116936683654785, "rewards/margins": 23.056970596313477, "rewards/rejected": -23.368663787841797, "step": 2110 }, { "epoch": 9.25764192139738, "grad_norm": 0.00016221331088207686, "learning_rate": 4.628820960698691e-06, "logits/chosen": -1.0726209878921509, "logits/rejected": -0.8374358415603638, "logps/chosen": -301.31640625, "logps/rejected": -2483.377197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.43355685472488403, "rewards/margins": 21.787792205810547, "rewards/rejected": -22.221351623535156, "step": 2120 }, { "epoch": 9.301310043668122, "grad_norm": 0.00032812959097582007, "learning_rate": 4.650655021834061e-06, "logits/chosen": -1.0997345447540283, "logits/rejected": -0.8660569190979004, "logps/chosen": -314.3016662597656, "logps/rejected": -2684.4375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.26993027329444885, "rewards/margins": 23.734806060791016, "rewards/rejected": -24.004735946655273, "step": 2130 }, { "epoch": 9.344978165938866, "grad_norm": 0.00012302533731621353, "learning_rate": 4.672489082969432e-06, "logits/chosen": -1.0587929487228394, "logits/rejected": -0.8265043497085571, "logps/chosen": -278.526611328125, "logps/rejected": -2497.25927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3267934024333954, "rewards/margins": 21.941801071166992, "rewards/rejected": -22.26859474182129, "step": 2140 }, { "epoch": 9.388646288209607, "grad_norm": 0.00206151158278852, "learning_rate": 4.694323144104804e-06, "logits/chosen": -1.038883090019226, "logits/rejected": -0.7537696957588196, "logps/chosen": -304.181396484375, "logps/rejected": -2489.750732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.31951871514320374, "rewards/margins": 21.82440757751465, "rewards/rejected": -22.1439266204834, "step": 2150 }, { "epoch": 9.43231441048035, "grad_norm": 0.00010559442322448935, "learning_rate": 4.716157205240175e-06, "logits/chosen": -1.1295164823532104, "logits/rejected": -0.9322168231010437, "logps/chosen": -335.68646240234375, "logps/rejected": -2629.016357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.43311238288879395, "rewards/margins": 23.092985153198242, "rewards/rejected": -23.526098251342773, "step": 2160 }, { "epoch": 9.475982532751091, "grad_norm": 0.0009937278512192375, "learning_rate": 4.737991266375546e-06, "logits/chosen": -1.1352860927581787, "logits/rejected": -0.9589874148368835, "logps/chosen": -318.0342712402344, "logps/rejected": -2602.77490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2743387818336487, "rewards/margins": 22.960086822509766, "rewards/rejected": -23.234424591064453, "step": 2170 }, { "epoch": 9.519650655021834, "grad_norm": 8.692179780263475e-05, "learning_rate": 4.759825327510917e-06, "logits/chosen": -1.1043927669525146, "logits/rejected": -0.8881224393844604, "logps/chosen": -323.64324951171875, "logps/rejected": -2472.098388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3210279047489166, "rewards/margins": 21.80451774597168, "rewards/rejected": -22.125547409057617, "step": 2180 }, { "epoch": 9.563318777292576, "grad_norm": 0.0016253249475624012, "learning_rate": 4.781659388646289e-06, "logits/chosen": -1.0375983715057373, "logits/rejected": -0.8884676694869995, "logps/chosen": -322.7120056152344, "logps/rejected": -2405.239013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.34034115076065063, "rewards/margins": 21.272212982177734, "rewards/rejected": -21.612552642822266, "step": 2190 }, { "epoch": 9.606986899563319, "grad_norm": 7.589461146118197e-05, "learning_rate": 4.80349344978166e-06, "logits/chosen": -1.0912082195281982, "logits/rejected": -0.8776786923408508, "logps/chosen": -304.48406982421875, "logps/rejected": -2726.15966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2850083112716675, "rewards/margins": 24.10079002380371, "rewards/rejected": -24.385799407958984, "step": 2200 }, { "epoch": 9.65065502183406, "grad_norm": 0.0011910658934673494, "learning_rate": 4.825327510917031e-06, "logits/chosen": -1.0419622659683228, "logits/rejected": -0.8294679522514343, "logps/chosen": -314.42608642578125, "logps/rejected": -2751.25, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.33546608686447144, "rewards/margins": 24.362581253051758, "rewards/rejected": -24.698043823242188, "step": 2210 }, { "epoch": 9.694323144104803, "grad_norm": 0.0001253341192265355, "learning_rate": 4.847161572052402e-06, "logits/chosen": -1.1454894542694092, "logits/rejected": -0.896079421043396, "logps/chosen": -296.33636474609375, "logps/rejected": -3006.023681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.23516850173473358, "rewards/margins": 26.779285430908203, "rewards/rejected": -27.014446258544922, "step": 2220 }, { "epoch": 9.737991266375547, "grad_norm": 0.00042607865634575827, "learning_rate": 4.868995633187774e-06, "logits/chosen": -1.0135618448257446, "logits/rejected": -0.763437807559967, "logps/chosen": -281.0157775878906, "logps/rejected": -2703.194580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.25311702489852905, "rewards/margins": 23.934215545654297, "rewards/rejected": -24.187332153320312, "step": 2230 }, { "epoch": 9.781659388646288, "grad_norm": 0.000666340684893977, "learning_rate": 4.890829694323144e-06, "logits/chosen": -1.1140217781066895, "logits/rejected": -0.8749205470085144, "logps/chosen": -304.79278564453125, "logps/rejected": -2709.57958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2531175911426544, "rewards/margins": 23.879039764404297, "rewards/rejected": -24.132152557373047, "step": 2240 }, { "epoch": 9.825327510917031, "grad_norm": 0.0013889873915955593, "learning_rate": 4.912663755458515e-06, "logits/chosen": -1.1442655324935913, "logits/rejected": -0.8886277079582214, "logps/chosen": -309.7623291015625, "logps/rejected": -2886.984619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3217666745185852, "rewards/margins": 25.48569107055664, "rewards/rejected": -25.807458877563477, "step": 2250 }, { "epoch": 9.868995633187772, "grad_norm": 0.0004272361037956331, "learning_rate": 4.934497816593887e-06, "logits/chosen": -1.060805082321167, "logits/rejected": -0.8273374438285828, "logps/chosen": -282.62774658203125, "logps/rejected": -2716.838623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.20850077271461487, "rewards/margins": 23.999853134155273, "rewards/rejected": -24.20835304260254, "step": 2260 }, { "epoch": 9.912663755458516, "grad_norm": 0.0001424062926954326, "learning_rate": 4.956331877729258e-06, "logits/chosen": -1.11200749874115, "logits/rejected": -0.8911072611808777, "logps/chosen": -304.57489013671875, "logps/rejected": -2679.58544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.24182936549186707, "rewards/margins": 23.679983139038086, "rewards/rejected": -23.921812057495117, "step": 2270 }, { "epoch": 9.956331877729257, "grad_norm": 0.0003809319344929784, "learning_rate": 4.978165938864629e-06, "logits/chosen": -1.0894124507904053, "logits/rejected": -0.8800792694091797, "logps/chosen": -304.31048583984375, "logps/rejected": -2669.032470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3616107106208801, "rewards/margins": 23.49026107788086, "rewards/rejected": -23.85187339782715, "step": 2280 }, { "epoch": 10.0, "grad_norm": 0.0005298305648743474, "learning_rate": 5e-06, "logits/chosen": -1.0010058879852295, "logits/rejected": -0.8166346549987793, "logps/chosen": -316.7028503417969, "logps/rejected": -2720.515380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2724928557872772, "rewards/margins": 24.23125457763672, "rewards/rejected": -24.503747940063477, "step": 2290 }, { "epoch": 10.043668122270743, "grad_norm": 9.502307044558266e-05, "learning_rate": 4.999997095618307e-06, "logits/chosen": -1.059705376625061, "logits/rejected": -0.8408017158508301, "logps/chosen": -314.2841491699219, "logps/rejected": -2868.2021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.19375145435333252, "rewards/margins": 25.497318267822266, "rewards/rejected": -25.69107437133789, "step": 2300 }, { "epoch": 10.087336244541484, "grad_norm": 0.00013875764634502013, "learning_rate": 4.999988382479973e-06, "logits/chosen": -1.1216895580291748, "logits/rejected": -0.908464789390564, "logps/chosen": -291.9554138183594, "logps/rejected": -2772.5439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2752940058708191, "rewards/margins": 24.54482650756836, "rewards/rejected": -24.82012367248535, "step": 2310 }, { "epoch": 10.131004366812228, "grad_norm": 0.00010785359758994678, "learning_rate": 4.9999738606052466e-06, "logits/chosen": -1.0612740516662598, "logits/rejected": -0.808624267578125, "logps/chosen": -306.6039123535156, "logps/rejected": -2690.087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3543640971183777, "rewards/margins": 23.66415786743164, "rewards/rejected": -24.01852035522461, "step": 2320 }, { "epoch": 10.174672489082969, "grad_norm": 0.00016260641250961353, "learning_rate": 4.999953530027867e-06, "logits/chosen": -1.1292903423309326, "logits/rejected": -0.9621719121932983, "logps/chosen": -332.2584228515625, "logps/rejected": -2570.88525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3879398703575134, "rewards/margins": 22.550615310668945, "rewards/rejected": -22.938556671142578, "step": 2330 }, { "epoch": 10.218340611353712, "grad_norm": 8.899641263311713e-05, "learning_rate": 4.999927390795073e-06, "logits/chosen": -1.1047399044036865, "logits/rejected": -0.9063766598701477, "logps/chosen": -302.2047424316406, "logps/rejected": -2779.56640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3003840744495392, "rewards/margins": 24.615142822265625, "rewards/rejected": -24.91552734375, "step": 2340 }, { "epoch": 10.262008733624453, "grad_norm": 0.00021250935956454827, "learning_rate": 4.999895442967599e-06, "logits/chosen": -1.0956833362579346, "logits/rejected": -0.8695257306098938, "logps/chosen": -300.86199951171875, "logps/rejected": -2924.010986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.31274479627609253, "rewards/margins": 26.021503448486328, "rewards/rejected": -26.33424949645996, "step": 2350 }, { "epoch": 10.305676855895197, "grad_norm": 0.0006459060661590984, "learning_rate": 4.999857686619678e-06, "logits/chosen": -1.1102837324142456, "logits/rejected": -0.9224529266357422, "logps/chosen": -326.1658020019531, "logps/rejected": -2522.81689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.33068835735321045, "rewards/margins": 22.204904556274414, "rewards/rejected": -22.53559112548828, "step": 2360 }, { "epoch": 10.34934497816594, "grad_norm": 6.93640801699775e-05, "learning_rate": 4.999814121839034e-06, "logits/chosen": -1.1283729076385498, "logits/rejected": -0.9152158498764038, "logps/chosen": -314.626953125, "logps/rejected": -2684.21533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3617323040962219, "rewards/margins": 23.75537109375, "rewards/rejected": -24.117101669311523, "step": 2370 }, { "epoch": 10.393013100436681, "grad_norm": 0.00011194702440933472, "learning_rate": 4.999764748726891e-06, "logits/chosen": -1.1031768321990967, "logits/rejected": -0.8428797721862793, "logps/chosen": -291.234375, "logps/rejected": -2960.113525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.29078221321105957, "rewards/margins": 26.247400283813477, "rewards/rejected": -26.53818130493164, "step": 2380 }, { "epoch": 10.436681222707424, "grad_norm": 0.0003858244879838093, "learning_rate": 4.999709567397969e-06, "logits/chosen": -1.0553863048553467, "logits/rejected": -0.8277440071105957, "logps/chosen": -289.7860107421875, "logps/rejected": -2691.2724609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3296220302581787, "rewards/margins": 23.805683135986328, "rewards/rejected": -24.13530731201172, "step": 2390 }, { "epoch": 10.480349344978166, "grad_norm": 0.0025862560270072544, "learning_rate": 4.99964857798048e-06, "logits/chosen": -1.0866535902023315, "logits/rejected": -0.8926572799682617, "logps/chosen": -313.35565185546875, "logps/rejected": -2940.981689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.36553871631622314, "rewards/margins": 26.058080673217773, "rewards/rejected": -26.42361831665039, "step": 2400 }, { "epoch": 10.524017467248909, "grad_norm": 0.013620018645021726, "learning_rate": 4.999581780616136e-06, "logits/chosen": -1.0215613842010498, "logits/rejected": -0.8445848226547241, "logps/chosen": -290.8216552734375, "logps/rejected": -2692.339599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.31041836738586426, "rewards/margins": 23.91866111755371, "rewards/rejected": -24.229080200195312, "step": 2410 }, { "epoch": 10.56768558951965, "grad_norm": 8.672134167390889e-05, "learning_rate": 4.999509175460139e-06, "logits/chosen": -1.121306300163269, "logits/rejected": -0.9640787839889526, "logps/chosen": -342.5458679199219, "logps/rejected": -2835.445068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.31585580110549927, "rewards/margins": 25.218027114868164, "rewards/rejected": -25.53388214111328, "step": 2420 }, { "epoch": 10.611353711790393, "grad_norm": 0.0001296915312977456, "learning_rate": 4.999430762681187e-06, "logits/chosen": -1.0996657609939575, "logits/rejected": -0.9175011515617371, "logps/chosen": -314.4928283691406, "logps/rejected": -2759.690673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2822341322898865, "rewards/margins": 24.620983123779297, "rewards/rejected": -24.903217315673828, "step": 2430 }, { "epoch": 10.655021834061134, "grad_norm": 0.00010970618675861519, "learning_rate": 4.999346542461474e-06, "logits/chosen": -1.1289622783660889, "logits/rejected": -0.9295376539230347, "logps/chosen": -306.9939270019531, "logps/rejected": -2745.009521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3311535716056824, "rewards/margins": 24.338151931762695, "rewards/rejected": -24.66930389404297, "step": 2440 }, { "epoch": 10.698689956331878, "grad_norm": 0.0008124997426073802, "learning_rate": 4.999256514996685e-06, "logits/chosen": -1.14092218875885, "logits/rejected": -1.0120009183883667, "logps/chosen": -329.1638488769531, "logps/rejected": -2698.9013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2779102921485901, "rewards/margins": 24.011775970458984, "rewards/rejected": -24.28968620300293, "step": 2450 }, { "epoch": 10.74235807860262, "grad_norm": 6.635513677389761e-05, "learning_rate": 4.999160680496001e-06, "logits/chosen": -1.1566135883331299, "logits/rejected": -0.915454089641571, "logps/chosen": -318.4544982910156, "logps/rejected": -3026.39013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.26705268025398254, "rewards/margins": 27.010784149169922, "rewards/rejected": -27.2778377532959, "step": 2460 }, { "epoch": 10.786026200873362, "grad_norm": 9.36374892785933e-05, "learning_rate": 4.999059039182093e-06, "logits/chosen": -1.0942569971084595, "logits/rejected": -0.8922575116157532, "logps/chosen": -308.99224853515625, "logps/rejected": -2908.498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3325221538543701, "rewards/margins": 25.895248413085938, "rewards/rejected": -26.227771759033203, "step": 2470 }, { "epoch": 10.829694323144105, "grad_norm": 0.0004386471146101754, "learning_rate": 4.998951591291124e-06, "logits/chosen": -1.0898687839508057, "logits/rejected": -0.9148825407028198, "logps/chosen": -335.49365234375, "logps/rejected": -2514.185302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.39506062865257263, "rewards/margins": 22.180789947509766, "rewards/rejected": -22.575849533081055, "step": 2480 }, { "epoch": 10.873362445414847, "grad_norm": 5.8289373699954544e-05, "learning_rate": 4.998838337072751e-06, "logits/chosen": -1.0535749197006226, "logits/rejected": -0.8138370513916016, "logps/chosen": -287.956298828125, "logps/rejected": -3005.851318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.33099812269210815, "rewards/margins": 26.77446937561035, "rewards/rejected": -27.10546875, "step": 2490 }, { "epoch": 10.91703056768559, "grad_norm": 0.00043680280933135516, "learning_rate": 4.99871927679012e-06, "logits/chosen": -1.0648771524429321, "logits/rejected": -0.8667116165161133, "logps/chosen": -298.7175598144531, "logps/rejected": -2803.779052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.285023033618927, "rewards/margins": 24.986709594726562, "rewards/rejected": -25.271732330322266, "step": 2500 }, { "epoch": 10.960698689956331, "grad_norm": 0.0024412450181767577, "learning_rate": 4.998594410719869e-06, "logits/chosen": -1.1069650650024414, "logits/rejected": -0.947489857673645, "logps/chosen": -327.1798095703125, "logps/rejected": -2762.65478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2967913746833801, "rewards/margins": 24.569475173950195, "rewards/rejected": -24.86626625061035, "step": 2510 }, { "epoch": 11.004366812227074, "grad_norm": 0.00043919052987548323, "learning_rate": 4.998463739152125e-06, "logits/chosen": -1.152633786201477, "logits/rejected": -0.9726517796516418, "logps/chosen": -322.6573791503906, "logps/rejected": -2827.310302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4075029492378235, "rewards/margins": 25.041593551635742, "rewards/rejected": -25.4490966796875, "step": 2520 }, { "epoch": 11.048034934497817, "grad_norm": 0.00012518293757398738, "learning_rate": 4.998327262390504e-06, "logits/chosen": -1.0643621683120728, "logits/rejected": -0.9224281311035156, "logps/chosen": -342.7349548339844, "logps/rejected": -2585.510498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.35563695430755615, "rewards/margins": 22.941457748413086, "rewards/rejected": -23.297096252441406, "step": 2530 }, { "epoch": 11.091703056768559, "grad_norm": 4.50382606609815e-05, "learning_rate": 4.99818498075211e-06, "logits/chosen": -1.0665757656097412, "logits/rejected": -0.9093521237373352, "logps/chosen": -323.2288513183594, "logps/rejected": -2687.112548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3864873945713043, "rewards/margins": 23.865331649780273, "rewards/rejected": -24.251819610595703, "step": 2540 }, { "epoch": 11.135371179039302, "grad_norm": 0.00025437858976813985, "learning_rate": 4.998036894567535e-06, "logits/chosen": -1.1533187627792358, "logits/rejected": -0.9765650629997253, "logps/chosen": -315.88671875, "logps/rejected": -2871.84423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2905447483062744, "rewards/margins": 25.592538833618164, "rewards/rejected": -25.883087158203125, "step": 2550 }, { "epoch": 11.179039301310043, "grad_norm": 0.0002302680370696195, "learning_rate": 4.99788300418086e-06, "logits/chosen": -1.159891963005066, "logits/rejected": -0.9434417486190796, "logps/chosen": -295.9383850097656, "logps/rejected": -2986.77490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.29570674896240234, "rewards/margins": 26.734477996826172, "rewards/rejected": -27.03018569946289, "step": 2560 }, { "epoch": 11.222707423580786, "grad_norm": 8.967578683102709e-05, "learning_rate": 4.997723309949647e-06, "logits/chosen": -1.1251469850540161, "logits/rejected": -0.8709354400634766, "logps/chosen": -291.3172302246094, "logps/rejected": -3376.958251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3410993218421936, "rewards/margins": 30.170913696289062, "rewards/rejected": -30.51201820373535, "step": 2570 }, { "epoch": 11.266375545851528, "grad_norm": 0.0001258376333171559, "learning_rate": 4.9975578122449485e-06, "logits/chosen": -1.1270411014556885, "logits/rejected": -0.9374577403068542, "logps/chosen": -308.79559326171875, "logps/rejected": -2984.7734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3923688530921936, "rewards/margins": 26.503246307373047, "rewards/rejected": -26.895614624023438, "step": 2580 }, { "epoch": 11.31004366812227, "grad_norm": 0.00012873638272641846, "learning_rate": 4.997386511451299e-06, "logits/chosen": -1.057881474494934, "logits/rejected": -0.8555137515068054, "logps/chosen": -292.6627197265625, "logps/rejected": -2832.09716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2993658185005188, "rewards/margins": 25.28803825378418, "rewards/rejected": -25.587406158447266, "step": 2590 }, { "epoch": 11.353711790393014, "grad_norm": 0.0005287205608781294, "learning_rate": 4.997209407966716e-06, "logits/chosen": -1.1015608310699463, "logits/rejected": -0.9238055348396301, "logps/chosen": -316.93328857421875, "logps/rejected": -2949.811279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.359840452671051, "rewards/margins": 26.31802749633789, "rewards/rejected": -26.677867889404297, "step": 2600 }, { "epoch": 11.397379912663755, "grad_norm": 0.0003824245654473126, "learning_rate": 4.997026502202701e-06, "logits/chosen": -1.106353521347046, "logits/rejected": -0.9592373967170715, "logps/chosen": -331.7537841796875, "logps/rejected": -2798.820556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.44703930616378784, "rewards/margins": 24.845905303955078, "rewards/rejected": -25.29294776916504, "step": 2610 }, { "epoch": 11.441048034934498, "grad_norm": 0.0009935211680188653, "learning_rate": 4.996837794584237e-06, "logits/chosen": -1.0957896709442139, "logits/rejected": -0.8783075213432312, "logps/chosen": -303.096923828125, "logps/rejected": -2881.987060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.38049525022506714, "rewards/margins": 25.57606315612793, "rewards/rejected": -25.956558227539062, "step": 2620 }, { "epoch": 11.48471615720524, "grad_norm": 0.0005245935283035879, "learning_rate": 4.996643285549787e-06, "logits/chosen": -1.1115862131118774, "logits/rejected": -0.9044306874275208, "logps/chosen": -305.20513916015625, "logps/rejected": -2940.23291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4065364897251129, "rewards/margins": 25.98793601989746, "rewards/rejected": -26.394474029541016, "step": 2630 }, { "epoch": 11.528384279475983, "grad_norm": 8.969155176585141e-05, "learning_rate": 4.996442975551293e-06, "logits/chosen": -1.028293490409851, "logits/rejected": -0.8865048289299011, "logps/chosen": -298.83209228515625, "logps/rejected": -2812.693603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.28327518701553345, "rewards/margins": 25.139461517333984, "rewards/rejected": -25.4227352142334, "step": 2640 }, { "epoch": 11.572052401746724, "grad_norm": 7.067142170596469e-05, "learning_rate": 4.996236865054177e-06, "logits/chosen": -1.088174819946289, "logits/rejected": -0.870819091796875, "logps/chosen": -303.1543884277344, "logps/rejected": -2951.463134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.30142346024513245, "rewards/margins": 26.289321899414062, "rewards/rejected": -26.590744018554688, "step": 2650 }, { "epoch": 11.615720524017467, "grad_norm": 6.059175954117323e-05, "learning_rate": 4.996024954537338e-06, "logits/chosen": -1.1067641973495483, "logits/rejected": -0.9639085531234741, "logps/chosen": -321.2157287597656, "logps/rejected": -2799.444091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.33343327045440674, "rewards/margins": 24.9471378326416, "rewards/rejected": -25.28057098388672, "step": 2660 }, { "epoch": 11.65938864628821, "grad_norm": 0.00045436593113178515, "learning_rate": 4.99580724449315e-06, "logits/chosen": -1.063227653503418, "logits/rejected": -0.8533226251602173, "logps/chosen": -282.34771728515625, "logps/rejected": -3035.23193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.29411283135414124, "rewards/margins": 27.094533920288086, "rewards/rejected": -27.38864517211914, "step": 2670 }, { "epoch": 11.703056768558952, "grad_norm": 0.002516443569347564, "learning_rate": 4.995583735427465e-06, "logits/chosen": -1.076380968093872, "logits/rejected": -0.9121101498603821, "logps/chosen": -308.04010009765625, "logps/rejected": -2828.23193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3284699022769928, "rewards/margins": 25.136049270629883, "rewards/rejected": -25.464519500732422, "step": 2680 }, { "epoch": 11.746724890829695, "grad_norm": 0.000808639880092657, "learning_rate": 4.995354427859607e-06, "logits/chosen": -1.1447213888168335, "logits/rejected": -0.9705305099487305, "logps/chosen": -319.5296325683594, "logps/rejected": -2855.38720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.35538482666015625, "rewards/margins": 25.42190933227539, "rewards/rejected": -25.777292251586914, "step": 2690 }, { "epoch": 11.790393013100436, "grad_norm": 7.916928072891249e-05, "learning_rate": 4.9951193223223725e-06, "logits/chosen": -1.130434274673462, "logits/rejected": -0.9243596196174622, "logps/chosen": -299.07568359375, "logps/rejected": -3190.71142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.26369255781173706, "rewards/margins": 28.553375244140625, "rewards/rejected": -28.81707191467285, "step": 2700 }, { "epoch": 11.83406113537118, "grad_norm": 7.947672965459901e-05, "learning_rate": 4.994878419362033e-06, "logits/chosen": -1.161321997642517, "logits/rejected": -0.9849316477775574, "logps/chosen": -351.2088317871094, "logps/rejected": -2890.69482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.51836758852005, "rewards/margins": 25.640155792236328, "rewards/rejected": -26.158523559570312, "step": 2710 }, { "epoch": 11.87772925764192, "grad_norm": 0.0001624936140912016, "learning_rate": 4.994631719538324e-06, "logits/chosen": -1.131820559501648, "logits/rejected": -1.0258337259292603, "logps/chosen": -355.3280334472656, "logps/rejected": -2690.21435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4593381881713867, "rewards/margins": 23.847755432128906, "rewards/rejected": -24.307092666625977, "step": 2720 }, { "epoch": 11.921397379912664, "grad_norm": 0.0002499404836585868, "learning_rate": 4.994379223424456e-06, "logits/chosen": -1.0243899822235107, "logits/rejected": -0.8986269235610962, "logps/chosen": -334.2411193847656, "logps/rejected": -2708.81689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.33162373304367065, "rewards/margins": 24.148536682128906, "rewards/rejected": -24.480161666870117, "step": 2730 }, { "epoch": 11.965065502183407, "grad_norm": 0.0002130476239842584, "learning_rate": 4.994120931607106e-06, "logits/chosen": -1.181944727897644, "logits/rejected": -1.0450904369354248, "logps/chosen": -330.5018005371094, "logps/rejected": -2692.25537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.37503618001937866, "rewards/margins": 23.86376953125, "rewards/rejected": -24.238805770874023, "step": 2740 }, { "epoch": 12.008733624454148, "grad_norm": 3.5038745925365026e-05, "learning_rate": 4.993856844686415e-06, "logits/chosen": -1.130826711654663, "logits/rejected": -1.0476330518722534, "logps/chosen": -359.94012451171875, "logps/rejected": -2829.598388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.48388925194740295, "rewards/margins": 25.12630844116211, "rewards/rejected": -25.61019515991211, "step": 2750 }, { "epoch": 12.052401746724891, "grad_norm": 5.5887411655828805e-05, "learning_rate": 4.993586963275991e-06, "logits/chosen": -1.121129035949707, "logits/rejected": -0.8797319531440735, "logps/chosen": -296.2095947265625, "logps/rejected": -3119.46630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3120737671852112, "rewards/margins": 27.831619262695312, "rewards/rejected": -28.143692016601562, "step": 2760 }, { "epoch": 12.096069868995633, "grad_norm": 5.1560913848112344e-05, "learning_rate": 4.993311288002903e-06, "logits/chosen": -1.0414012670516968, "logits/rejected": -0.9296094179153442, "logps/chosen": -321.9031066894531, "logps/rejected": -2583.19873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.27469685673713684, "rewards/margins": 23.078269958496094, "rewards/rejected": -23.352970123291016, "step": 2770 }, { "epoch": 12.139737991266376, "grad_norm": 5.850092226636487e-05, "learning_rate": 4.993029819507688e-06, "logits/chosen": -1.0835192203521729, "logits/rejected": -0.9777547121047974, "logps/chosen": -352.7610168457031, "logps/rejected": -2736.876220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3963974416255951, "rewards/margins": 24.404220581054688, "rewards/rejected": -24.800617218017578, "step": 2780 }, { "epoch": 12.183406113537117, "grad_norm": 0.0007195127735056144, "learning_rate": 4.992742558444336e-06, "logits/chosen": -1.077323079109192, "logits/rejected": -0.9440869092941284, "logps/chosen": -325.9335021972656, "logps/rejected": -2858.013916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4020709991455078, "rewards/margins": 25.41234588623047, "rewards/rejected": -25.814416885375977, "step": 2790 }, { "epoch": 12.22707423580786, "grad_norm": 8.237932967297351e-05, "learning_rate": 4.992449505480301e-06, "logits/chosen": -1.1771314144134521, "logits/rejected": -1.0133082866668701, "logps/chosen": -321.5072326660156, "logps/rejected": -2960.59228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3793460726737976, "rewards/margins": 26.393447875976562, "rewards/rejected": -26.77279281616211, "step": 2800 }, { "epoch": 12.270742358078603, "grad_norm": 0.001561962235945413, "learning_rate": 4.992150661296492e-06, "logits/chosen": -1.075110673904419, "logits/rejected": -0.9350481033325195, "logps/chosen": -321.9504699707031, "logps/rejected": -2844.488037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4548390805721283, "rewards/margins": 25.357879638671875, "rewards/rejected": -25.81271743774414, "step": 2810 }, { "epoch": 12.314410480349345, "grad_norm": 3.06644126006848e-05, "learning_rate": 4.991846026587277e-06, "logits/chosen": -1.110081434249878, "logits/rejected": -0.9424473643302917, "logps/chosen": -317.28765869140625, "logps/rejected": -3035.10693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.29364582896232605, "rewards/margins": 27.215213775634766, "rewards/rejected": -27.50885581970215, "step": 2820 }, { "epoch": 12.358078602620088, "grad_norm": 7.295418439091079e-05, "learning_rate": 4.991535602060475e-06, "logits/chosen": -1.1370012760162354, "logits/rejected": -0.973003089427948, "logps/chosen": -325.49560546875, "logps/rejected": -2862.74755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.39572882652282715, "rewards/margins": 25.362043380737305, "rewards/rejected": -25.757776260375977, "step": 2830 }, { "epoch": 12.40174672489083, "grad_norm": 2.8113230424828313e-05, "learning_rate": 4.99121938843736e-06, "logits/chosen": -1.076501488685608, "logits/rejected": -0.9277998208999634, "logps/chosen": -319.3334045410156, "logps/rejected": -2853.37841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3571220338344574, "rewards/margins": 25.54103660583496, "rewards/rejected": -25.898162841796875, "step": 2840 }, { "epoch": 12.445414847161572, "grad_norm": 5.6078603284990514e-05, "learning_rate": 4.990897386452655e-06, "logits/chosen": -1.1316912174224854, "logits/rejected": -0.8830701112747192, "logps/chosen": -300.985107421875, "logps/rejected": -3297.991455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.39625170826911926, "rewards/margins": 29.481121063232422, "rewards/rejected": -29.877368927001953, "step": 2850 }, { "epoch": 12.489082969432314, "grad_norm": 3.3683684215327085e-05, "learning_rate": 4.990569596854534e-06, "logits/chosen": -1.155340552330017, "logits/rejected": -0.9636089205741882, "logps/chosen": -324.57659912109375, "logps/rejected": -3283.81201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.33842915296554565, "rewards/margins": 29.40009117126465, "rewards/rejected": -29.738521575927734, "step": 2860 }, { "epoch": 12.532751091703057, "grad_norm": 4.9440092424344966e-05, "learning_rate": 4.990236020404617e-06, "logits/chosen": -1.0930402278900146, "logits/rejected": -0.931657612323761, "logps/chosen": -312.0984191894531, "logps/rejected": -2995.5634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.39104461669921875, "rewards/margins": 26.79543113708496, "rewards/rejected": -27.186477661132812, "step": 2870 }, { "epoch": 12.5764192139738, "grad_norm": 8.371673728935154e-05, "learning_rate": 4.989896657877972e-06, "logits/chosen": -1.106121301651001, "logits/rejected": -0.9602048993110657, "logps/chosen": -344.65386962890625, "logps/rejected": -2775.35498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4401662349700928, "rewards/margins": 24.687816619873047, "rewards/rejected": -25.12798500061035, "step": 2880 }, { "epoch": 12.620087336244541, "grad_norm": 2.5622372520915082e-05, "learning_rate": 4.989551510063109e-06, "logits/chosen": -1.1259839534759521, "logits/rejected": -0.9986206889152527, "logps/chosen": -330.0254211425781, "logps/rejected": -2962.854248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4482661187648773, "rewards/margins": 26.43106460571289, "rewards/rejected": -26.879329681396484, "step": 2890 }, { "epoch": 12.663755458515285, "grad_norm": 2.4644092793742745e-05, "learning_rate": 4.989200577761981e-06, "logits/chosen": -1.1529628038406372, "logits/rejected": -1.0000560283660889, "logps/chosen": -331.71929931640625, "logps/rejected": -2981.440185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3644999861717224, "rewards/margins": 26.5638370513916, "rewards/rejected": -26.928335189819336, "step": 2900 }, { "epoch": 12.707423580786026, "grad_norm": 0.00010189946582248487, "learning_rate": 4.98884386178998e-06, "logits/chosen": -1.11433744430542, "logits/rejected": -0.9641532897949219, "logps/chosen": -318.1783142089844, "logps/rejected": -2845.221923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.39715301990509033, "rewards/margins": 25.42275619506836, "rewards/rejected": -25.819910049438477, "step": 2910 }, { "epoch": 12.751091703056769, "grad_norm": 5.529239647632391e-05, "learning_rate": 4.988481362975939e-06, "logits/chosen": -1.1477248668670654, "logits/rejected": -0.9422198534011841, "logps/chosen": -331.994384765625, "logps/rejected": -3064.42333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.43580150604248047, "rewards/margins": 27.142736434936523, "rewards/rejected": -27.578536987304688, "step": 2920 }, { "epoch": 12.79475982532751, "grad_norm": 4.733194903358665e-05, "learning_rate": 4.988113082162125e-06, "logits/chosen": -1.1675961017608643, "logits/rejected": -0.9901081323623657, "logps/chosen": -325.6370544433594, "logps/rejected": -3007.281005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.47110024094581604, "rewards/margins": 26.696359634399414, "rewards/rejected": -27.16745948791504, "step": 2930 }, { "epoch": 12.838427947598253, "grad_norm": 0.0068073164459546915, "learning_rate": 4.987739020204241e-06, "logits/chosen": -1.0840141773223877, "logits/rejected": -0.9486913681030273, "logps/chosen": -345.7938537597656, "logps/rejected": -2944.817626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.38392677903175354, "rewards/margins": 26.250402450561523, "rewards/rejected": -26.63433265686035, "step": 2940 }, { "epoch": 12.882096069868995, "grad_norm": 0.00047687203676708816, "learning_rate": 4.987359177971422e-06, "logits/chosen": -1.1535725593566895, "logits/rejected": -1.001331090927124, "logps/chosen": -323.1007080078125, "logps/rejected": -3271.829833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.39721858501434326, "rewards/margins": 29.270217895507812, "rewards/rejected": -29.667438507080078, "step": 2950 }, { "epoch": 12.925764192139738, "grad_norm": 0.0009555054552839034, "learning_rate": 4.986973556346233e-06, "logits/chosen": -1.0954362154006958, "logits/rejected": -0.9317017793655396, "logps/chosen": -321.0083923339844, "logps/rejected": -3069.73095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3032970428466797, "rewards/margins": 27.618587493896484, "rewards/rejected": -27.921884536743164, "step": 2960 }, { "epoch": 12.969432314410481, "grad_norm": 0.0001310722172221118, "learning_rate": 4.986582156224668e-06, "logits/chosen": -1.0976073741912842, "logits/rejected": -0.9277912378311157, "logps/chosen": -282.45721435546875, "logps/rejected": -3046.34814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.28533655405044556, "rewards/margins": 27.280715942382812, "rewards/rejected": -27.566055297851562, "step": 2970 }, { "epoch": 13.013100436681222, "grad_norm": 0.00011699739498765499, "learning_rate": 4.9861849785161465e-06, "logits/chosen": -1.1590147018432617, "logits/rejected": -0.9749581217765808, "logps/chosen": -314.11602783203125, "logps/rejected": -3206.437744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.39968690276145935, "rewards/margins": 28.719919204711914, "rewards/rejected": -29.119604110717773, "step": 2980 }, { "epoch": 13.056768558951966, "grad_norm": 0.0007032362156381278, "learning_rate": 4.985782024143515e-06, "logits/chosen": -1.1264410018920898, "logits/rejected": -1.0057713985443115, "logps/chosen": -319.2245788574219, "logps/rejected": -3140.044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3631485104560852, "rewards/margins": 28.083587646484375, "rewards/rejected": -28.44673728942871, "step": 2990 }, { "epoch": 13.100436681222707, "grad_norm": 7.755546104427199e-05, "learning_rate": 4.985373294043039e-06, "logits/chosen": -1.1231440305709839, "logits/rejected": -0.9806516766548157, "logps/chosen": -318.1066589355469, "logps/rejected": -2958.92919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.37212032079696655, "rewards/margins": 26.31635093688965, "rewards/rejected": -26.688472747802734, "step": 3000 }, { "epoch": 13.14410480349345, "grad_norm": 0.005518523952293167, "learning_rate": 4.984958789164404e-06, "logits/chosen": -1.1687438488006592, "logits/rejected": -0.9604102969169617, "logps/chosen": -296.4488830566406, "logps/rejected": -3047.497802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.31522127985954285, "rewards/margins": 27.206308364868164, "rewards/rejected": -27.521526336669922, "step": 3010 }, { "epoch": 13.187772925764191, "grad_norm": 4.039439563168484e-05, "learning_rate": 4.984538510470716e-06, "logits/chosen": -1.0936968326568604, "logits/rejected": -0.9103947877883911, "logps/chosen": -332.6950378417969, "logps/rejected": -3194.95751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.36437344551086426, "rewards/margins": 28.491436004638672, "rewards/rejected": -28.855810165405273, "step": 3020 }, { "epoch": 13.231441048034934, "grad_norm": 0.0002571596682583939, "learning_rate": 4.984112458938495e-06, "logits/chosen": -1.0798451900482178, "logits/rejected": -0.9091817736625671, "logps/chosen": -329.77874755859375, "logps/rejected": -2898.86376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.40596023201942444, "rewards/margins": 25.923965454101562, "rewards/rejected": -26.329925537109375, "step": 3030 }, { "epoch": 13.275109170305678, "grad_norm": 4.0614594551893505e-05, "learning_rate": 4.983680635557672e-06, "logits/chosen": -1.0896583795547485, "logits/rejected": -0.9338849186897278, "logps/chosen": -329.37615966796875, "logps/rejected": -2920.2412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.452263206243515, "rewards/margins": 26.064960479736328, "rewards/rejected": -26.517223358154297, "step": 3040 }, { "epoch": 13.318777292576419, "grad_norm": 6.655875001097535e-05, "learning_rate": 4.983243041331593e-06, "logits/chosen": -1.1233129501342773, "logits/rejected": -0.9823464155197144, "logps/chosen": -317.7225646972656, "logps/rejected": -3043.6015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.28171008825302124, "rewards/margins": 27.250438690185547, "rewards/rejected": -27.532150268554688, "step": 3050 }, { "epoch": 13.362445414847162, "grad_norm": 0.00023054661561033022, "learning_rate": 4.982799677277009e-06, "logits/chosen": -1.1208397150039673, "logits/rejected": -1.0094242095947266, "logps/chosen": -342.1285400390625, "logps/rejected": -2789.668212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.41740551590919495, "rewards/margins": 24.908782958984375, "rewards/rejected": -25.326187133789062, "step": 3060 }, { "epoch": 13.406113537117903, "grad_norm": 4.6851946527426664e-05, "learning_rate": 4.982350544424079e-06, "logits/chosen": -1.1174852848052979, "logits/rejected": -0.9555479288101196, "logps/chosen": -334.228515625, "logps/rejected": -2934.4833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.44750815629959106, "rewards/margins": 26.186742782592773, "rewards/rejected": -26.63425064086914, "step": 3070 }, { "epoch": 13.449781659388647, "grad_norm": 2.1657154694059757e-05, "learning_rate": 4.981895643816367e-06, "logits/chosen": -1.0953834056854248, "logits/rejected": -0.9312434196472168, "logps/chosen": -311.4670104980469, "logps/rejected": -3135.5078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.28477320075035095, "rewards/margins": 28.162607192993164, "rewards/rejected": -28.447378158569336, "step": 3080 }, { "epoch": 13.493449781659388, "grad_norm": 1.938672988572627e-05, "learning_rate": 4.981434976510836e-06, "logits/chosen": -1.0891046524047852, "logits/rejected": -0.9568377733230591, "logps/chosen": -335.26361083984375, "logps/rejected": -2909.133544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.41791146993637085, "rewards/margins": 26.031396865844727, "rewards/rejected": -26.44930648803711, "step": 3090 }, { "epoch": 13.537117903930131, "grad_norm": 3.891574476520327e-05, "learning_rate": 4.980968543577849e-06, "logits/chosen": -1.0729163885116577, "logits/rejected": -0.9937694668769836, "logps/chosen": -370.64898681640625, "logps/rejected": -2695.956298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.537264883518219, "rewards/margins": 23.949588775634766, "rewards/rejected": -24.486852645874023, "step": 3100 }, { "epoch": 13.580786026200874, "grad_norm": 0.00015556630381330445, "learning_rate": 4.9804963461011655e-06, "logits/chosen": -1.1139771938323975, "logits/rejected": -0.9619326591491699, "logps/chosen": -316.3323669433594, "logps/rejected": -3098.95556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.38797301054000854, "rewards/margins": 27.713973999023438, "rewards/rejected": -28.101947784423828, "step": 3110 }, { "epoch": 13.624454148471616, "grad_norm": 0.0005695423015835967, "learning_rate": 4.980018385177939e-06, "logits/chosen": -1.1247018575668335, "logits/rejected": -0.9576985239982605, "logps/chosen": -310.6287841796875, "logps/rejected": -3101.469970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.41714707016944885, "rewards/margins": 27.771535873413086, "rewards/rejected": -28.18868064880371, "step": 3120 }, { "epoch": 13.668122270742359, "grad_norm": 3.894742367000321e-05, "learning_rate": 4.979534661918712e-06, "logits/chosen": -1.100508689880371, "logits/rejected": -0.9362314939498901, "logps/chosen": -307.0045471191406, "logps/rejected": -3263.732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.44445866346359253, "rewards/margins": 29.389080047607422, "rewards/rejected": -29.833538055419922, "step": 3130 }, { "epoch": 13.7117903930131, "grad_norm": 2.4791752279789068e-05, "learning_rate": 4.979045177447422e-06, "logits/chosen": -1.1537821292877197, "logits/rejected": -0.9653547406196594, "logps/chosen": -313.8179626464844, "logps/rejected": -3078.514892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.35584598779678345, "rewards/margins": 27.592708587646484, "rewards/rejected": -27.94855308532715, "step": 3140 }, { "epoch": 13.755458515283843, "grad_norm": 5.3464839063624246e-05, "learning_rate": 4.978549932901386e-06, "logits/chosen": -1.1732168197631836, "logits/rejected": -1.038177251815796, "logps/chosen": -321.1747131347656, "logps/rejected": -2790.35302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.36780187487602234, "rewards/margins": 24.962379455566406, "rewards/rejected": -25.330181121826172, "step": 3150 }, { "epoch": 13.799126637554584, "grad_norm": 5.853959216296712e-05, "learning_rate": 4.9780489294313096e-06, "logits/chosen": -1.1954654455184937, "logits/rejected": -1.0792608261108398, "logps/chosen": -331.8520812988281, "logps/rejected": -3073.00048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4389861524105072, "rewards/margins": 27.481094360351562, "rewards/rejected": -27.920080184936523, "step": 3160 }, { "epoch": 13.842794759825328, "grad_norm": 0.00017828882432045824, "learning_rate": 4.977542168201274e-06, "logits/chosen": -1.130323052406311, "logits/rejected": -0.9194025993347168, "logps/chosen": -298.2406921386719, "logps/rejected": -3417.50830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4074931740760803, "rewards/margins": 30.682113647460938, "rewards/rejected": -31.0896053314209, "step": 3170 }, { "epoch": 13.886462882096069, "grad_norm": 4.749831621596701e-05, "learning_rate": 4.977029650388745e-06, "logits/chosen": -1.1183439493179321, "logits/rejected": -0.917373776435852, "logps/chosen": -314.4237365722656, "logps/rejected": -3210.526611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.39576855301856995, "rewards/margins": 28.85988998413086, "rewards/rejected": -29.255657196044922, "step": 3180 }, { "epoch": 13.930131004366812, "grad_norm": 2.9902813057700677e-05, "learning_rate": 4.976511377184557e-06, "logits/chosen": -1.0921794176101685, "logits/rejected": -1.0002024173736572, "logps/chosen": -325.649658203125, "logps/rejected": -3153.773193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4225030541419983, "rewards/margins": 28.225093841552734, "rewards/rejected": -28.647594451904297, "step": 3190 }, { "epoch": 13.973799126637555, "grad_norm": 5.9669890719715346e-05, "learning_rate": 4.975987349792924e-06, "logits/chosen": -1.1480635404586792, "logits/rejected": -0.9901136159896851, "logps/chosen": -317.46600341796875, "logps/rejected": -3405.23974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3470613956451416, "rewards/margins": 30.68514633178711, "rewards/rejected": -31.03220558166504, "step": 3200 }, { "epoch": 14.017467248908297, "grad_norm": 3.469555220602485e-05, "learning_rate": 4.975457569431423e-06, "logits/chosen": -1.1169124841690063, "logits/rejected": -1.0211803913116455, "logps/chosen": -346.75531005859375, "logps/rejected": -2988.75048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5267428755760193, "rewards/margins": 26.640056610107422, "rewards/rejected": -27.166799545288086, "step": 3210 }, { "epoch": 14.06113537117904, "grad_norm": 4.161058050805713e-05, "learning_rate": 4.974922037331005e-06, "logits/chosen": -1.1310818195343018, "logits/rejected": -1.0481311082839966, "logps/chosen": -354.9120178222656, "logps/rejected": -2971.907470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.33484718203544617, "rewards/margins": 26.665878295898438, "rewards/rejected": -27.000722885131836, "step": 3220 }, { "epoch": 14.104803493449781, "grad_norm": 3.4346266162556655e-05, "learning_rate": 4.974380754735979e-06, "logits/chosen": -1.1221065521240234, "logits/rejected": -1.0070679187774658, "logps/chosen": -338.353759765625, "logps/rejected": -2768.11083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4178987443447113, "rewards/margins": 24.67993927001953, "rewards/rejected": -25.097835540771484, "step": 3230 }, { "epoch": 14.148471615720524, "grad_norm": 5.0052233388692244e-05, "learning_rate": 4.97383372290402e-06, "logits/chosen": -1.1397979259490967, "logits/rejected": -0.9661018252372742, "logps/chosen": -302.76214599609375, "logps/rejected": -3077.73974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3510505259037018, "rewards/margins": 27.597915649414062, "rewards/rejected": -27.948963165283203, "step": 3240 }, { "epoch": 14.192139737991265, "grad_norm": 0.00020914677529233487, "learning_rate": 4.973280943106158e-06, "logits/chosen": -1.119979977607727, "logits/rejected": -0.9806697964668274, "logps/chosen": -317.0598449707031, "logps/rejected": -3107.838134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.35738855600357056, "rewards/margins": 27.952205657958984, "rewards/rejected": -28.309595108032227, "step": 3250 }, { "epoch": 14.235807860262009, "grad_norm": 5.308923223023793e-05, "learning_rate": 4.97272241662678e-06, "logits/chosen": -1.083249568939209, "logits/rejected": -0.9504167437553406, "logps/chosen": -336.28094482421875, "logps/rejected": -2965.785888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4796178936958313, "rewards/margins": 26.511402130126953, "rewards/rejected": -26.991018295288086, "step": 3260 }, { "epoch": 14.279475982532752, "grad_norm": 7.89405460651185e-05, "learning_rate": 4.972158144763626e-06, "logits/chosen": -1.0542396306991577, "logits/rejected": -0.8799861669540405, "logps/chosen": -285.6694641113281, "logps/rejected": -3246.745361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.38802337646484375, "rewards/margins": 29.186420440673828, "rewards/rejected": -29.57444190979004, "step": 3270 }, { "epoch": 14.323144104803493, "grad_norm": 3.62048099217192e-05, "learning_rate": 4.971588128827783e-06, "logits/chosen": -1.1689660549163818, "logits/rejected": -0.9910618662834167, "logps/chosen": -324.143310546875, "logps/rejected": -3142.4326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4564129710197449, "rewards/margins": 27.979778289794922, "rewards/rejected": -28.43619728088379, "step": 3280 }, { "epoch": 14.366812227074236, "grad_norm": 0.0003352046552715148, "learning_rate": 4.971012370143688e-06, "logits/chosen": -1.133650541305542, "logits/rejected": -0.9623467326164246, "logps/chosen": -301.4905700683594, "logps/rejected": -3189.938232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.348239004611969, "rewards/margins": 28.654205322265625, "rewards/rejected": -29.00244140625, "step": 3290 }, { "epoch": 14.410480349344978, "grad_norm": 0.00017650503197072758, "learning_rate": 4.97043087004912e-06, "logits/chosen": -1.1480653285980225, "logits/rejected": -1.0099231004714966, "logps/chosen": -315.5040588378906, "logps/rejected": -3001.952880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.33763304352760315, "rewards/margins": 26.90639305114746, "rewards/rejected": -27.244028091430664, "step": 3300 }, { "epoch": 14.45414847161572, "grad_norm": 4.3834095242780867e-05, "learning_rate": 4.969843629895194e-06, "logits/chosen": -1.104004144668579, "logits/rejected": -1.0966970920562744, "logps/chosen": -376.7830810546875, "logps/rejected": -2760.520751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.48942869901657104, "rewards/margins": 24.51194953918457, "rewards/rejected": -25.00137710571289, "step": 3310 }, { "epoch": 14.497816593886462, "grad_norm": 6.275025934727566e-05, "learning_rate": 4.96925065104637e-06, "logits/chosen": -1.1053178310394287, "logits/rejected": -0.9917346239089966, "logps/chosen": -356.30718994140625, "logps/rejected": -2961.248779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5198858976364136, "rewards/margins": 26.36159324645996, "rewards/rejected": -26.881479263305664, "step": 3320 }, { "epoch": 14.541484716157205, "grad_norm": 2.3870189533648643e-05, "learning_rate": 4.968651934880434e-06, "logits/chosen": -1.106123685836792, "logits/rejected": -0.9908558130264282, "logps/chosen": -331.75811767578125, "logps/rejected": -3096.50830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4729200303554535, "rewards/margins": 27.82613754272461, "rewards/rejected": -28.299057006835938, "step": 3330 }, { "epoch": 14.585152838427948, "grad_norm": 2.2409613858728956e-05, "learning_rate": 4.968047482788509e-06, "logits/chosen": -1.1777316331863403, "logits/rejected": -1.0554368495941162, "logps/chosen": -329.5940856933594, "logps/rejected": -3071.034423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4204697012901306, "rewards/margins": 27.531116485595703, "rewards/rejected": -27.951589584350586, "step": 3340 }, { "epoch": 14.62882096069869, "grad_norm": 0.000282578435837645, "learning_rate": 4.967437296175039e-06, "logits/chosen": -1.1779831647872925, "logits/rejected": -1.0305255651474, "logps/chosen": -311.205078125, "logps/rejected": -3111.081298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.41430091857910156, "rewards/margins": 27.994836807250977, "rewards/rejected": -28.409137725830078, "step": 3350 }, { "epoch": 14.672489082969433, "grad_norm": 0.00019625123464222139, "learning_rate": 4.9668213764578e-06, "logits/chosen": -1.1637232303619385, "logits/rejected": -1.016692876815796, "logps/chosen": -303.552734375, "logps/rejected": -3199.99853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3768828213214874, "rewards/margins": 28.76849365234375, "rewards/rejected": -29.145376205444336, "step": 3360 }, { "epoch": 14.716157205240174, "grad_norm": 7.731671606795399e-05, "learning_rate": 4.966199725067883e-06, "logits/chosen": -1.085688829421997, "logits/rejected": -0.927503764629364, "logps/chosen": -320.1886291503906, "logps/rejected": -3268.97998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5604182481765747, "rewards/margins": 29.33699607849121, "rewards/rejected": -29.897415161132812, "step": 3370 }, { "epoch": 14.759825327510917, "grad_norm": 2.850385307810135e-05, "learning_rate": 4.965572343449698e-06, "logits/chosen": -1.1587575674057007, "logits/rejected": -0.9828616976737976, "logps/chosen": -299.13568115234375, "logps/rejected": -3318.52392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4206467270851135, "rewards/margins": 29.794824600219727, "rewards/rejected": -30.215473175048828, "step": 3380 }, { "epoch": 14.803493449781659, "grad_norm": 4.197212828757521e-05, "learning_rate": 4.9649392330609694e-06, "logits/chosen": -1.1253366470336914, "logits/rejected": -0.9869173169136047, "logps/chosen": -321.40142822265625, "logps/rejected": -3413.561279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4263404905796051, "rewards/margins": 30.72646713256836, "rewards/rejected": -31.152807235717773, "step": 3390 }, { "epoch": 14.847161572052402, "grad_norm": 2.967004344836155e-05, "learning_rate": 4.964300395372733e-06, "logits/chosen": -1.0858619213104248, "logits/rejected": -0.9103706479072571, "logps/chosen": -296.4745788574219, "logps/rejected": -3219.877197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3777235448360443, "rewards/margins": 29.033939361572266, "rewards/rejected": -29.41166114807129, "step": 3400 }, { "epoch": 14.890829694323145, "grad_norm": 1.743779335085282e-05, "learning_rate": 4.963655831869333e-06, "logits/chosen": -1.1168148517608643, "logits/rejected": -0.9260746836662292, "logps/chosen": -299.701171875, "logps/rejected": -3745.58935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4384227395057678, "rewards/margins": 33.845359802246094, "rewards/rejected": -34.283782958984375, "step": 3410 }, { "epoch": 14.934497816593886, "grad_norm": 3.822474822292129e-05, "learning_rate": 4.963005544048413e-06, "logits/chosen": -1.1266635656356812, "logits/rejected": -0.9543396830558777, "logps/chosen": -313.56964111328125, "logps/rejected": -3218.460693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4068237245082855, "rewards/margins": 28.93178367614746, "rewards/rejected": -29.338607788085938, "step": 3420 }, { "epoch": 14.97816593886463, "grad_norm": 2.891680490052555e-05, "learning_rate": 4.962349533420923e-06, "logits/chosen": -1.1413296461105347, "logits/rejected": -0.9780750274658203, "logps/chosen": -294.94989013671875, "logps/rejected": -3372.01708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4003075957298279, "rewards/margins": 30.471202850341797, "rewards/rejected": -30.871509552001953, "step": 3430 }, { "epoch": 15.02183406113537, "grad_norm": 0.0002608607049431017, "learning_rate": 4.961687801511106e-06, "logits/chosen": -1.121800184249878, "logits/rejected": -0.984615683555603, "logps/chosen": -319.7433776855469, "logps/rejected": -3409.421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.37255755066871643, "rewards/margins": 30.77910804748535, "rewards/rejected": -31.15166664123535, "step": 3440 }, { "epoch": 15.065502183406114, "grad_norm": 0.00010852732291705434, "learning_rate": 4.961020349856499e-06, "logits/chosen": -1.1798295974731445, "logits/rejected": -1.050295352935791, "logps/chosen": -335.9691162109375, "logps/rejected": -3093.149169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4382821023464203, "rewards/margins": 27.682235717773438, "rewards/rejected": -28.12051773071289, "step": 3450 }, { "epoch": 15.109170305676855, "grad_norm": 0.00040580181425651916, "learning_rate": 4.960347180007932e-06, "logits/chosen": -1.1646438837051392, "logits/rejected": -1.0399707555770874, "logps/chosen": -321.16314697265625, "logps/rejected": -3284.846435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5213509798049927, "rewards/margins": 29.380725860595703, "rewards/rejected": -29.902074813842773, "step": 3460 }, { "epoch": 15.152838427947598, "grad_norm": 2.5263914422425114e-05, "learning_rate": 4.959668293529515e-06, "logits/chosen": -1.1246106624603271, "logits/rejected": -0.9972552061080933, "logps/chosen": -314.498291015625, "logps/rejected": -3424.91064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.46771669387817383, "rewards/margins": 30.863412857055664, "rewards/rejected": -31.331127166748047, "step": 3470 }, { "epoch": 15.196506550218341, "grad_norm": 3.975058025009487e-05, "learning_rate": 4.958983691998648e-06, "logits/chosen": -1.135549783706665, "logits/rejected": -0.9480360746383667, "logps/chosen": -289.4554138183594, "logps/rejected": -3391.704345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.468071848154068, "rewards/margins": 30.665706634521484, "rewards/rejected": -31.1337833404541, "step": 3480 }, { "epoch": 15.240174672489083, "grad_norm": 0.0001335358335707131, "learning_rate": 4.958293377006004e-06, "logits/chosen": -1.0902478694915771, "logits/rejected": -1.0097167491912842, "logps/chosen": -318.3880920410156, "logps/rejected": -3137.8681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4787222445011139, "rewards/margins": 28.220760345458984, "rewards/rejected": -28.69948387145996, "step": 3490 }, { "epoch": 15.283842794759826, "grad_norm": 8.339401063321844e-05, "learning_rate": 4.957597350155535e-06, "logits/chosen": -1.1262617111206055, "logits/rejected": -1.0091626644134521, "logps/chosen": -346.4816589355469, "logps/rejected": -3121.384033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3818811774253845, "rewards/margins": 28.12932777404785, "rewards/rejected": -28.511205673217773, "step": 3500 }, { "epoch": 15.327510917030567, "grad_norm": 6.640078231119713e-05, "learning_rate": 4.956895613064462e-06, "logits/chosen": -1.1125268936157227, "logits/rejected": -1.0103546380996704, "logps/chosen": -329.9185485839844, "logps/rejected": -3075.994384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4164176881313324, "rewards/margins": 27.707605361938477, "rewards/rejected": -28.1240234375, "step": 3510 }, { "epoch": 15.37117903930131, "grad_norm": 2.33299265990675e-05, "learning_rate": 4.9561881673632755e-06, "logits/chosen": -1.0947158336639404, "logits/rejected": -0.965274453163147, "logps/chosen": -313.5068054199219, "logps/rejected": -3542.890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.43247103691101074, "rewards/margins": 31.945270538330078, "rewards/rejected": -32.37774658203125, "step": 3520 }, { "epoch": 15.414847161572052, "grad_norm": 0.00013795592349731477, "learning_rate": 4.95547501469573e-06, "logits/chosen": -1.1844799518585205, "logits/rejected": -1.053222894668579, "logps/chosen": -328.7720947265625, "logps/rejected": -3496.594970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5666501522064209, "rewards/margins": 31.396570205688477, "rewards/rejected": -31.963220596313477, "step": 3530 }, { "epoch": 15.458515283842795, "grad_norm": 4.148881760769271e-05, "learning_rate": 4.954756156718837e-06, "logits/chosen": -1.1761152744293213, "logits/rejected": -1.0391234159469604, "logps/chosen": -313.72662353515625, "logps/rejected": -3580.991455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3487403094768524, "rewards/margins": 32.39946365356445, "rewards/rejected": -32.74820327758789, "step": 3540 }, { "epoch": 15.502183406113538, "grad_norm": 2.9491002593660844e-05, "learning_rate": 4.9540315951028695e-06, "logits/chosen": -1.0973246097564697, "logits/rejected": -0.9990777969360352, "logps/chosen": -310.24749755859375, "logps/rejected": -3190.515869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.44979652762413025, "rewards/margins": 28.75104331970215, "rewards/rejected": -29.200836181640625, "step": 3550 }, { "epoch": 15.54585152838428, "grad_norm": 9.065410964793806e-05, "learning_rate": 4.953301331531349e-06, "logits/chosen": -1.1606998443603516, "logits/rejected": -1.0485261678695679, "logps/chosen": -309.6778564453125, "logps/rejected": -3299.484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.46658220887184143, "rewards/margins": 29.77468490600586, "rewards/rejected": -30.24127197265625, "step": 3560 }, { "epoch": 15.589519650655022, "grad_norm": 1.627783181244959e-05, "learning_rate": 4.952565367701049e-06, "logits/chosen": -1.1102514266967773, "logits/rejected": -0.9487846493721008, "logps/chosen": -311.99383544921875, "logps/rejected": -3246.78076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.42269882559776306, "rewards/margins": 29.22957992553711, "rewards/rejected": -29.65227699279785, "step": 3570 }, { "epoch": 15.633187772925764, "grad_norm": 0.002582146849455525, "learning_rate": 4.951823705321982e-06, "logits/chosen": -1.1452381610870361, "logits/rejected": -1.0031293630599976, "logps/chosen": -306.9367980957031, "logps/rejected": -3265.298583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4511648118495941, "rewards/margins": 29.310504913330078, "rewards/rejected": -29.761669158935547, "step": 3580 }, { "epoch": 15.676855895196507, "grad_norm": 3.715425216151903e-05, "learning_rate": 4.951076346117406e-06, "logits/chosen": -1.135014295578003, "logits/rejected": -0.9789797067642212, "logps/chosen": -301.3087463378906, "logps/rejected": -3542.776611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3856702446937561, "rewards/margins": 31.91843032836914, "rewards/rejected": -32.304100036621094, "step": 3590 }, { "epoch": 15.720524017467248, "grad_norm": 1.93723756387267e-05, "learning_rate": 4.950323291823815e-06, "logits/chosen": -1.1468144655227661, "logits/rejected": -1.074881911277771, "logps/chosen": -340.77496337890625, "logps/rejected": -2894.4501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.40337735414505005, "rewards/margins": 26.051223754882812, "rewards/rejected": -26.454599380493164, "step": 3600 }, { "epoch": 15.764192139737991, "grad_norm": 3.3377631867243426e-05, "learning_rate": 4.949564544190933e-06, "logits/chosen": -1.1933954954147339, "logits/rejected": -1.0347158908843994, "logps/chosen": -312.40728759765625, "logps/rejected": -3490.343017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.401611864566803, "rewards/margins": 31.492568969726562, "rewards/rejected": -31.894180297851562, "step": 3610 }, { "epoch": 15.807860262008735, "grad_norm": 3.41874311835694e-05, "learning_rate": 4.948800104981717e-06, "logits/chosen": -1.1035406589508057, "logits/rejected": -1.0227168798446655, "logps/chosen": -345.2730407714844, "logps/rejected": -2980.923583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5233593583106995, "rewards/margins": 26.693506240844727, "rewards/rejected": -27.21686363220215, "step": 3620 }, { "epoch": 15.851528384279476, "grad_norm": 0.0005156870083053058, "learning_rate": 4.948029975972341e-06, "logits/chosen": -1.1249781847000122, "logits/rejected": -0.99785315990448, "logps/chosen": -313.8624267578125, "logps/rejected": -3253.545166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.38006657361984253, "rewards/margins": 29.389690399169922, "rewards/rejected": -29.769756317138672, "step": 3630 }, { "epoch": 15.895196506550219, "grad_norm": 0.0001571654215579797, "learning_rate": 4.947254158952209e-06, "logits/chosen": -1.123586893081665, "logits/rejected": -1.0006965398788452, "logps/chosen": -326.8757019042969, "logps/rejected": -3329.916748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.41692590713500977, "rewards/margins": 29.9765682220459, "rewards/rejected": -30.393497467041016, "step": 3640 }, { "epoch": 15.93886462882096, "grad_norm": 7.332772712325647e-05, "learning_rate": 4.946472655723933e-06, "logits/chosen": -1.1349538564682007, "logits/rejected": -1.016745924949646, "logps/chosen": -323.1857604980469, "logps/rejected": -3553.739013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.48081135749816895, "rewards/margins": 31.99978256225586, "rewards/rejected": -32.480594635009766, "step": 3650 }, { "epoch": 15.982532751091703, "grad_norm": 0.0004092983937667233, "learning_rate": 4.94568546810334e-06, "logits/chosen": -1.1088311672210693, "logits/rejected": -1.0159518718719482, "logps/chosen": -337.761962890625, "logps/rejected": -3129.992919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.44719547033309937, "rewards/margins": 28.252328872680664, "rewards/rejected": -28.699520111083984, "step": 3660 }, { "epoch": 16.026200873362445, "grad_norm": 0.0001630644520760899, "learning_rate": 4.944892597919465e-06, "logits/chosen": -1.1675245761871338, "logits/rejected": -1.0837774276733398, "logps/chosen": -343.91265869140625, "logps/rejected": -3221.513916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.41530531644821167, "rewards/margins": 29.050067901611328, "rewards/rejected": -29.46537208557129, "step": 3670 }, { "epoch": 16.069868995633186, "grad_norm": 5.791239872754001e-05, "learning_rate": 4.944094047014547e-06, "logits/chosen": -1.1170625686645508, "logits/rejected": -1.0719279050827026, "logps/chosen": -351.76519775390625, "logps/rejected": -2881.38818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5203040838241577, "rewards/margins": 25.817806243896484, "rewards/rejected": -26.338109970092773, "step": 3680 }, { "epoch": 16.11353711790393, "grad_norm": 1.8113681911818808e-05, "learning_rate": 4.943289817244022e-06, "logits/chosen": -1.1215661764144897, "logits/rejected": -1.0044279098510742, "logps/chosen": -326.2577819824219, "logps/rejected": -3493.57470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.49273037910461426, "rewards/margins": 31.45066261291504, "rewards/rejected": -31.94339370727539, "step": 3690 }, { "epoch": 16.157205240174672, "grad_norm": 7.232326309766577e-05, "learning_rate": 4.9424799104765245e-06, "logits/chosen": -1.109752893447876, "logits/rejected": -1.0333116054534912, "logps/chosen": -319.9769287109375, "logps/rejected": -3354.11376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.37346333265304565, "rewards/margins": 30.400888442993164, "rewards/rejected": -30.77435302734375, "step": 3700 }, { "epoch": 16.200873362445414, "grad_norm": 3.143285730006901e-05, "learning_rate": 4.941664328593874e-06, "logits/chosen": -1.1209245920181274, "logits/rejected": -1.017314076423645, "logps/chosen": -331.42401123046875, "logps/rejected": -3216.626708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.551834225654602, "rewards/margins": 28.88298988342285, "rewards/rejected": -29.434825897216797, "step": 3710 }, { "epoch": 16.24454148471616, "grad_norm": 0.00015395744395876393, "learning_rate": 4.940843073491081e-06, "logits/chosen": -1.1136215925216675, "logits/rejected": -1.0153545141220093, "logps/chosen": -332.05267333984375, "logps/rejected": -3111.82861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.43680715560913086, "rewards/margins": 28.076257705688477, "rewards/rejected": -28.513065338134766, "step": 3720 }, { "epoch": 16.2882096069869, "grad_norm": 1.9766385077334416e-05, "learning_rate": 4.940016147076337e-06, "logits/chosen": -1.0676990747451782, "logits/rejected": -1.0181289911270142, "logps/chosen": -343.3837890625, "logps/rejected": -3036.989013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5392339825630188, "rewards/margins": 27.27492332458496, "rewards/rejected": -27.814157485961914, "step": 3730 }, { "epoch": 16.33187772925764, "grad_norm": 0.00036004913978153556, "learning_rate": 4.9391835512710076e-06, "logits/chosen": -1.1183878183364868, "logits/rejected": -1.0447802543640137, "logps/chosen": -321.18157958984375, "logps/rejected": -3191.349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.364040732383728, "rewards/margins": 28.94754981994629, "rewards/rejected": -29.31159019470215, "step": 3740 }, { "epoch": 16.375545851528383, "grad_norm": 0.003062787836373958, "learning_rate": 4.938345288009635e-06, "logits/chosen": -1.133833885192871, "logits/rejected": -1.0187727212905884, "logps/chosen": -339.9685974121094, "logps/rejected": -3422.89892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.592016339302063, "rewards/margins": 30.696975708007812, "rewards/rejected": -31.288991928100586, "step": 3750 }, { "epoch": 16.419213973799128, "grad_norm": 5.4187282461029624e-05, "learning_rate": 4.937501359239929e-06, "logits/chosen": -1.1545732021331787, "logits/rejected": -1.0319663286209106, "logps/chosen": -338.25518798828125, "logps/rejected": -3590.64697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5983640551567078, "rewards/margins": 32.35780334472656, "rewards/rejected": -32.9561653137207, "step": 3760 }, { "epoch": 16.46288209606987, "grad_norm": 9.28764559381397e-05, "learning_rate": 4.936651766922761e-06, "logits/chosen": -1.1296498775482178, "logits/rejected": -1.0092902183532715, "logps/chosen": -319.0171813964844, "logps/rejected": -3521.265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.40619784593582153, "rewards/margins": 31.8883056640625, "rewards/rejected": -32.29450225830078, "step": 3770 }, { "epoch": 16.50655021834061, "grad_norm": 1.383814075405317e-05, "learning_rate": 4.935796513032166e-06, "logits/chosen": -1.1477552652359009, "logits/rejected": -1.108064889907837, "logps/chosen": -337.96136474609375, "logps/rejected": -3042.583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.42939719557762146, "rewards/margins": 27.395029067993164, "rewards/rejected": -27.82442283630371, "step": 3780 }, { "epoch": 16.550218340611355, "grad_norm": 2.3329404864495915e-05, "learning_rate": 4.934935599555328e-06, "logits/chosen": -1.1188223361968994, "logits/rejected": -0.9543914794921875, "logps/chosen": -307.6717834472656, "logps/rejected": -3763.51123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.515353798866272, "rewards/margins": 34.04850769042969, "rewards/rejected": -34.563865661621094, "step": 3790 }, { "epoch": 16.593886462882097, "grad_norm": 0.00012255156912911803, "learning_rate": 4.934069028492585e-06, "logits/chosen": -1.2481359243392944, "logits/rejected": -1.1331324577331543, "logps/chosen": -356.5239562988281, "logps/rejected": -3417.83740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5794954895973206, "rewards/margins": 30.64496421813965, "rewards/rejected": -31.224462509155273, "step": 3800 }, { "epoch": 16.637554585152838, "grad_norm": 0.0016003875935358875, "learning_rate": 4.933196801857421e-06, "logits/chosen": -1.1708513498306274, "logits/rejected": -1.0747720003128052, "logps/chosen": -327.0431213378906, "logps/rejected": -3485.60302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5570095777511597, "rewards/margins": 31.426258087158203, "rewards/rejected": -31.983266830444336, "step": 3810 }, { "epoch": 16.68122270742358, "grad_norm": 3.735607693538163e-05, "learning_rate": 4.932318921676458e-06, "logits/chosen": -1.1617597341537476, "logits/rejected": -1.0822246074676514, "logps/chosen": -327.4127502441406, "logps/rejected": -3330.252685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4529361128807068, "rewards/margins": 30.15089988708496, "rewards/rejected": -30.603836059570312, "step": 3820 }, { "epoch": 16.724890829694324, "grad_norm": 2.697485190412666e-05, "learning_rate": 4.931435389989454e-06, "logits/chosen": -1.1004424095153809, "logits/rejected": -0.9821289777755737, "logps/chosen": -307.050537109375, "logps/rejected": -3247.325439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4190834164619446, "rewards/margins": 29.243762969970703, "rewards/rejected": -29.6628475189209, "step": 3830 }, { "epoch": 16.768558951965066, "grad_norm": 4.453988694437276e-05, "learning_rate": 4.9305462088493025e-06, "logits/chosen": -1.1091995239257812, "logits/rejected": -1.0518004894256592, "logps/chosen": -339.5408935546875, "logps/rejected": -3209.39892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4095698297023773, "rewards/margins": 29.017324447631836, "rewards/rejected": -29.426895141601562, "step": 3840 }, { "epoch": 16.812227074235807, "grad_norm": 6.129811903685132e-05, "learning_rate": 4.929651380322019e-06, "logits/chosen": -1.1370481252670288, "logits/rejected": -1.079980731010437, "logps/chosen": -339.10260009765625, "logps/rejected": -3224.82568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5071977376937866, "rewards/margins": 29.138818740844727, "rewards/rejected": -29.64601707458496, "step": 3850 }, { "epoch": 16.85589519650655, "grad_norm": 0.0018049239659001656, "learning_rate": 4.928750906486742e-06, "logits/chosen": -1.137498140335083, "logits/rejected": -1.060060977935791, "logps/chosen": -341.060791015625, "logps/rejected": -3252.63037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.42179933190345764, "rewards/margins": 29.38961410522461, "rewards/rejected": -29.811412811279297, "step": 3860 }, { "epoch": 16.899563318777293, "grad_norm": 2.9181991921435886e-05, "learning_rate": 4.9278447894357275e-06, "logits/chosen": -1.1304962635040283, "logits/rejected": -1.0462381839752197, "logps/chosen": -330.43695068359375, "logps/rejected": -3383.629638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4681795537471771, "rewards/margins": 30.582666397094727, "rewards/rejected": -31.05084228515625, "step": 3870 }, { "epoch": 16.943231441048034, "grad_norm": 2.0533047669525308e-05, "learning_rate": 4.926933031274344e-06, "logits/chosen": -1.143153429031372, "logits/rejected": -1.0271610021591187, "logps/chosen": -305.24249267578125, "logps/rejected": -3566.444091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3812311291694641, "rewards/margins": 32.29736328125, "rewards/rejected": -32.67859649658203, "step": 3880 }, { "epoch": 16.986899563318776, "grad_norm": 2.496144609552051e-05, "learning_rate": 4.926015634121066e-06, "logits/chosen": -1.1124765872955322, "logits/rejected": -0.9955824613571167, "logps/chosen": -291.5870666503906, "logps/rejected": -3321.592529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.39970818161964417, "rewards/margins": 30.02327537536621, "rewards/rejected": -30.422983169555664, "step": 3890 }, { "epoch": 17.03056768558952, "grad_norm": 8.450186973979079e-06, "learning_rate": 4.9250926001074715e-06, "logits/chosen": -1.1628497838974, "logits/rejected": -1.0290435552597046, "logps/chosen": -303.8916320800781, "logps/rejected": -3843.08056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4818456172943115, "rewards/margins": 34.791996002197266, "rewards/rejected": -35.273841857910156, "step": 3900 }, { "epoch": 17.074235807860262, "grad_norm": 1.5365968873514945e-05, "learning_rate": 4.924163931378233e-06, "logits/chosen": -1.1487669944763184, "logits/rejected": -1.1195704936981201, "logps/chosen": -345.8258361816406, "logps/rejected": -3011.80517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3902854919433594, "rewards/margins": 27.157238006591797, "rewards/rejected": -27.54752540588379, "step": 3910 }, { "epoch": 17.117903930131003, "grad_norm": 1.0086654684948946e-05, "learning_rate": 4.923229630091119e-06, "logits/chosen": -1.153738260269165, "logits/rejected": -1.105128288269043, "logps/chosen": -341.00787353515625, "logps/rejected": -3182.510498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4065428674221039, "rewards/margins": 28.787464141845703, "rewards/rejected": -29.194005966186523, "step": 3920 }, { "epoch": 17.16157205240175, "grad_norm": 1.9412391718534037e-05, "learning_rate": 4.922289698416984e-06, "logits/chosen": -1.1639859676361084, "logits/rejected": -1.1170685291290283, "logps/chosen": -357.11102294921875, "logps/rejected": -3130.220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4469476640224457, "rewards/margins": 28.1563720703125, "rewards/rejected": -28.603313446044922, "step": 3930 }, { "epoch": 17.20524017467249, "grad_norm": 4.747466466977344e-05, "learning_rate": 4.921344138539762e-06, "logits/chosen": -1.1433078050613403, "logits/rejected": -1.0781642198562622, "logps/chosen": -342.82208251953125, "logps/rejected": -3211.58984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5045635104179382, "rewards/margins": 28.926904678344727, "rewards/rejected": -29.431472778320312, "step": 3940 }, { "epoch": 17.24890829694323, "grad_norm": 0.00012283973534246497, "learning_rate": 4.9203929526564685e-06, "logits/chosen": -1.1169419288635254, "logits/rejected": -1.0343997478485107, "logps/chosen": -328.0079345703125, "logps/rejected": -3388.42626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.507234513759613, "rewards/margins": 30.511001586914062, "rewards/rejected": -31.018230438232422, "step": 3950 }, { "epoch": 17.292576419213972, "grad_norm": 2.0724787653029147e-05, "learning_rate": 4.919436142977189e-06, "logits/chosen": -1.1591315269470215, "logits/rejected": -1.0877256393432617, "logps/chosen": -323.301513671875, "logps/rejected": -3384.90966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3750132918357849, "rewards/margins": 30.743488311767578, "rewards/rejected": -31.11850357055664, "step": 3960 }, { "epoch": 17.336244541484717, "grad_norm": 1.3688406032165706e-05, "learning_rate": 4.918473711725073e-06, "logits/chosen": -1.1693027019500732, "logits/rejected": -1.049323320388794, "logps/chosen": -317.8309020996094, "logps/rejected": -3555.369873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.40930408239364624, "rewards/margins": 32.1721305847168, "rewards/rejected": -32.58142852783203, "step": 3970 }, { "epoch": 17.37991266375546, "grad_norm": 1.994767088859885e-05, "learning_rate": 4.917505661136339e-06, "logits/chosen": -1.1444370746612549, "logits/rejected": -1.0138721466064453, "logps/chosen": -309.7357482910156, "logps/rejected": -3247.480712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.45539507269859314, "rewards/margins": 29.26839828491211, "rewards/rejected": -29.723791122436523, "step": 3980 }, { "epoch": 17.4235807860262, "grad_norm": 3.227428963854314e-05, "learning_rate": 4.9165319934602554e-06, "logits/chosen": -1.1727807521820068, "logits/rejected": -1.0450091361999512, "logps/chosen": -325.8970031738281, "logps/rejected": -3341.020263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.49693864583969116, "rewards/margins": 30.0976505279541, "rewards/rejected": -30.594593048095703, "step": 3990 }, { "epoch": 17.467248908296945, "grad_norm": 0.0004737608599616994, "learning_rate": 4.9155527109591435e-06, "logits/chosen": -1.1041510105133057, "logits/rejected": -1.0500736236572266, "logps/chosen": -335.0669250488281, "logps/rejected": -3442.716064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.577714741230011, "rewards/margins": 31.06437110900879, "rewards/rejected": -31.642086029052734, "step": 4000 }, { "epoch": 17.510917030567686, "grad_norm": 3.197549470750915e-05, "learning_rate": 4.914567815908372e-06, "logits/chosen": -1.1245784759521484, "logits/rejected": -1.07236909866333, "logps/chosen": -346.0580139160156, "logps/rejected": -3095.834716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.46649423241615295, "rewards/margins": 27.907657623291016, "rewards/rejected": -28.374149322509766, "step": 4010 }, { "epoch": 17.554585152838428, "grad_norm": 1.6826210347893622e-05, "learning_rate": 4.913577310596352e-06, "logits/chosen": -1.205352544784546, "logits/rejected": -1.081782579421997, "logps/chosen": -337.6910400390625, "logps/rejected": -3610.080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5932260751724243, "rewards/margins": 32.52745819091797, "rewards/rejected": -33.12068557739258, "step": 4020 }, { "epoch": 17.59825327510917, "grad_norm": 0.0010192544105716776, "learning_rate": 4.912581197324524e-06, "logits/chosen": -1.1449798345565796, "logits/rejected": -1.0751088857650757, "logps/chosen": -325.3279724121094, "logps/rejected": -3371.946533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.49171143770217896, "rewards/margins": 30.461755752563477, "rewards/rejected": -30.953466415405273, "step": 4030 }, { "epoch": 17.641921397379914, "grad_norm": 2.5479450736235602e-05, "learning_rate": 4.911579478407366e-06, "logits/chosen": -1.1218159198760986, "logits/rejected": -1.0081137418746948, "logps/chosen": -313.460693359375, "logps/rejected": -3325.471435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5428140759468079, "rewards/margins": 30.010705947875977, "rewards/rejected": -30.553516387939453, "step": 4040 }, { "epoch": 17.685589519650655, "grad_norm": 0.00024055494747686818, "learning_rate": 4.910572156172376e-06, "logits/chosen": -1.127424955368042, "logits/rejected": -1.0409348011016846, "logps/chosen": -317.4436950683594, "logps/rejected": -3782.73681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4776168465614319, "rewards/margins": 34.38016891479492, "rewards/rejected": -34.85778045654297, "step": 4050 }, { "epoch": 17.729257641921397, "grad_norm": 0.00011633486979649426, "learning_rate": 4.909559232960072e-06, "logits/chosen": -1.1059181690216064, "logits/rejected": -1.0248090028762817, "logps/chosen": -320.9773864746094, "logps/rejected": -3514.39599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5334897637367249, "rewards/margins": 31.819217681884766, "rewards/rejected": -32.35271072387695, "step": 4060 }, { "epoch": 17.77292576419214, "grad_norm": 3.3630562396321565e-05, "learning_rate": 4.908540711123987e-06, "logits/chosen": -1.1159532070159912, "logits/rejected": -1.0286428928375244, "logps/chosen": -299.9259948730469, "logps/rejected": -3737.41748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.43787580728530884, "rewards/margins": 33.970130920410156, "rewards/rejected": -34.40800857543945, "step": 4070 }, { "epoch": 17.816593886462883, "grad_norm": 1.9369968797099876e-05, "learning_rate": 4.907516593030662e-06, "logits/chosen": -1.0767626762390137, "logits/rejected": -1.009580373764038, "logps/chosen": -322.88409423828125, "logps/rejected": -3564.00537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.543941855430603, "rewards/margins": 32.272884368896484, "rewards/rejected": -32.81682586669922, "step": 4080 }, { "epoch": 17.860262008733624, "grad_norm": 4.9548027547552713e-05, "learning_rate": 4.906486881059641e-06, "logits/chosen": -1.1635464429855347, "logits/rejected": -1.0860925912857056, "logps/chosen": -333.0545349121094, "logps/rejected": -3698.25537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.47473448514938354, "rewards/margins": 33.59881591796875, "rewards/rejected": -34.07355499267578, "step": 4090 }, { "epoch": 17.903930131004365, "grad_norm": 0.00012884412016440588, "learning_rate": 4.905451577603464e-06, "logits/chosen": -1.142629623413086, "logits/rejected": -1.0459332466125488, "logps/chosen": -311.8257141113281, "logps/rejected": -3500.55859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4201423227787018, "rewards/margins": 31.6556339263916, "rewards/rejected": -32.075775146484375, "step": 4100 }, { "epoch": 17.94759825327511, "grad_norm": 1.5943453238846976e-05, "learning_rate": 4.904410685067667e-06, "logits/chosen": -1.1546052694320679, "logits/rejected": -1.1035434007644653, "logps/chosen": -322.1121826171875, "logps/rejected": -3454.404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.46164217591285706, "rewards/margins": 31.261560440063477, "rewards/rejected": -31.723201751708984, "step": 4110 }, { "epoch": 17.99126637554585, "grad_norm": 1.3140473644355343e-05, "learning_rate": 4.903364205870767e-06, "logits/chosen": -1.1395351886749268, "logits/rejected": -1.0910227298736572, "logps/chosen": -328.20281982421875, "logps/rejected": -3396.68212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.413543164730072, "rewards/margins": 30.85115623474121, "rewards/rejected": -31.264698028564453, "step": 4120 }, { "epoch": 18.034934497816593, "grad_norm": 3.743081326184781e-05, "learning_rate": 4.9023121424442635e-06, "logits/chosen": -1.1138339042663574, "logits/rejected": -1.0392777919769287, "logps/chosen": -320.9319763183594, "logps/rejected": -3264.773681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.43754786252975464, "rewards/margins": 29.633270263671875, "rewards/rejected": -30.070816040039062, "step": 4130 }, { "epoch": 18.078602620087338, "grad_norm": 1.2888457880648452e-05, "learning_rate": 4.901254497232634e-06, "logits/chosen": -1.110581398010254, "logits/rejected": -1.0459425449371338, "logps/chosen": -318.1273498535156, "logps/rejected": -3378.25048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.42392539978027344, "rewards/margins": 30.64789390563965, "rewards/rejected": -31.071813583374023, "step": 4140 }, { "epoch": 18.12227074235808, "grad_norm": 0.0005496783104714753, "learning_rate": 4.900191272693321e-06, "logits/chosen": -1.1952486038208008, "logits/rejected": -1.0962164402008057, "logps/chosen": -322.04290771484375, "logps/rejected": -3703.51025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.48779740929603577, "rewards/margins": 33.57780838012695, "rewards/rejected": -34.06560516357422, "step": 4150 }, { "epoch": 18.16593886462882, "grad_norm": 0.00018669797468284324, "learning_rate": 4.899122471296732e-06, "logits/chosen": -1.153424859046936, "logits/rejected": -1.08345627784729, "logps/chosen": -348.58404541015625, "logps/rejected": -3448.867919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6153877973556519, "rewards/margins": 31.032846450805664, "rewards/rejected": -31.648229598999023, "step": 4160 }, { "epoch": 18.209606986899562, "grad_norm": 0.00011778559420148418, "learning_rate": 4.8980480955262345e-06, "logits/chosen": -1.1245146989822388, "logits/rejected": -1.0184351205825806, "logps/chosen": -297.7503967285156, "logps/rejected": -3630.43212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5069342851638794, "rewards/margins": 32.93379211425781, "rewards/rejected": -33.44072723388672, "step": 4170 }, { "epoch": 18.253275109170307, "grad_norm": 0.0007103629773826935, "learning_rate": 4.896968147878146e-06, "logits/chosen": -1.0974441766738892, "logits/rejected": -1.028035283088684, "logps/chosen": -317.8393249511719, "logps/rejected": -3954.100830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.48531293869018555, "rewards/margins": 36.03862380981445, "rewards/rejected": -36.5239372253418, "step": 4180 }, { "epoch": 18.29694323144105, "grad_norm": 3.697095115666922e-05, "learning_rate": 4.895882630861729e-06, "logits/chosen": -1.0902379751205444, "logits/rejected": -1.0530601739883423, "logps/chosen": -336.3571472167969, "logps/rejected": -3052.12451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5630185008049011, "rewards/margins": 27.394550323486328, "rewards/rejected": -27.957569122314453, "step": 4190 }, { "epoch": 18.34061135371179, "grad_norm": 2.151463027154945e-05, "learning_rate": 4.89479154699919e-06, "logits/chosen": -1.1285381317138672, "logits/rejected": -1.0621665716171265, "logps/chosen": -345.0508728027344, "logps/rejected": -3253.394775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5569182634353638, "rewards/margins": 29.411029815673828, "rewards/rejected": -29.967947006225586, "step": 4200 }, { "epoch": 18.38427947598253, "grad_norm": 1.53996537161246e-05, "learning_rate": 4.8936948988256676e-06, "logits/chosen": -1.1448523998260498, "logits/rejected": -1.0577276945114136, "logps/chosen": -303.9554138183594, "logps/rejected": -3777.521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.43593186140060425, "rewards/margins": 34.30093765258789, "rewards/rejected": -34.73686599731445, "step": 4210 }, { "epoch": 18.427947598253276, "grad_norm": 1.4237909874875272e-05, "learning_rate": 4.892592688889228e-06, "logits/chosen": -1.1407477855682373, "logits/rejected": -1.0862990617752075, "logps/chosen": -317.7071228027344, "logps/rejected": -3433.000732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4100712239742279, "rewards/margins": 31.16695213317871, "rewards/rejected": -31.577022552490234, "step": 4220 }, { "epoch": 18.471615720524017, "grad_norm": 9.077784089405918e-06, "learning_rate": 4.891484919750865e-06, "logits/chosen": -1.1042588949203491, "logits/rejected": -1.0645740032196045, "logps/chosen": -319.26904296875, "logps/rejected": -3362.60498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4405575692653656, "rewards/margins": 30.533294677734375, "rewards/rejected": -30.973852157592773, "step": 4230 }, { "epoch": 18.51528384279476, "grad_norm": 2.1602611518000494e-05, "learning_rate": 4.890371593984484e-06, "logits/chosen": -1.1135419607162476, "logits/rejected": -1.0949660539627075, "logps/chosen": -323.7941589355469, "logps/rejected": -3239.308349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.46456852555274963, "rewards/margins": 29.322580337524414, "rewards/rejected": -29.787145614624023, "step": 4240 }, { "epoch": 18.558951965065503, "grad_norm": 1.9413125154085112e-05, "learning_rate": 4.889252714176904e-06, "logits/chosen": -1.1354076862335205, "logits/rejected": -1.034610629081726, "logps/chosen": -317.03436279296875, "logps/rejected": -3687.397216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5214956402778625, "rewards/margins": 33.46592712402344, "rewards/rejected": -33.98741912841797, "step": 4250 }, { "epoch": 18.602620087336245, "grad_norm": 0.00145899178044647, "learning_rate": 4.888128282927848e-06, "logits/chosen": -1.118719458580017, "logits/rejected": -1.0295454263687134, "logps/chosen": -318.9069519042969, "logps/rejected": -3426.868408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5242400169372559, "rewards/margins": 30.954761505126953, "rewards/rejected": -31.4789981842041, "step": 4260 }, { "epoch": 18.646288209606986, "grad_norm": 2.5078249067589452e-05, "learning_rate": 4.886998302849938e-06, "logits/chosen": -1.1832207441329956, "logits/rejected": -1.0703109502792358, "logps/chosen": -296.9598693847656, "logps/rejected": -3818.676513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5194085240364075, "rewards/margins": 34.478736877441406, "rewards/rejected": -34.998146057128906, "step": 4270 }, { "epoch": 18.68995633187773, "grad_norm": 2.195300548922644e-05, "learning_rate": 4.885862776568689e-06, "logits/chosen": -1.0904655456542969, "logits/rejected": -1.0628913640975952, "logps/chosen": -344.2882385253906, "logps/rejected": -3247.393798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5065072774887085, "rewards/margins": 29.438961029052734, "rewards/rejected": -29.945465087890625, "step": 4280 }, { "epoch": 18.733624454148472, "grad_norm": 6.872811933622979e-05, "learning_rate": 4.884721706722503e-06, "logits/chosen": -1.1493566036224365, "logits/rejected": -1.0568716526031494, "logps/chosen": -314.49176025390625, "logps/rejected": -3740.268798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5079344511032104, "rewards/margins": 33.9514045715332, "rewards/rejected": -34.4593391418457, "step": 4290 }, { "epoch": 18.777292576419214, "grad_norm": 5.349621563841042e-05, "learning_rate": 4.883575095962661e-06, "logits/chosen": -1.14870023727417, "logits/rejected": -1.0901598930358887, "logps/chosen": -315.00140380859375, "logps/rejected": -3349.930908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4733281135559082, "rewards/margins": 30.293853759765625, "rewards/rejected": -30.767187118530273, "step": 4300 }, { "epoch": 18.820960698689955, "grad_norm": 1.4964080003440922e-05, "learning_rate": 4.882422946953319e-06, "logits/chosen": -1.1954948902130127, "logits/rejected": -1.108110785484314, "logps/chosen": -306.07073974609375, "logps/rejected": -3901.727294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4934866428375244, "rewards/margins": 35.278785705566406, "rewards/rejected": -35.772274017333984, "step": 4310 }, { "epoch": 18.8646288209607, "grad_norm": 3.178336827612571e-05, "learning_rate": 4.881265262371502e-06, "logits/chosen": -1.134602665901184, "logits/rejected": -1.0922831296920776, "logps/chosen": -364.52154541015625, "logps/rejected": -3446.02734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5254496335983276, "rewards/margins": 31.16427230834961, "rewards/rejected": -31.689722061157227, "step": 4320 }, { "epoch": 18.90829694323144, "grad_norm": 1.2547832946295366e-05, "learning_rate": 4.880102044907096e-06, "logits/chosen": -1.1952992677688599, "logits/rejected": -1.1215534210205078, "logps/chosen": -348.40679931640625, "logps/rejected": -3753.240966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6474721431732178, "rewards/margins": 33.852760314941406, "rewards/rejected": -34.50022506713867, "step": 4330 }, { "epoch": 18.951965065502183, "grad_norm": 2.0751533303412646e-05, "learning_rate": 4.878933297262844e-06, "logits/chosen": -1.1321710348129272, "logits/rejected": -1.1369760036468506, "logps/chosen": -348.44317626953125, "logps/rejected": -3253.1328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4914029538631439, "rewards/margins": 29.383411407470703, "rewards/rejected": -29.87481689453125, "step": 4340 }, { "epoch": 18.995633187772924, "grad_norm": 9.879852659757375e-06, "learning_rate": 4.877759022154336e-06, "logits/chosen": -1.0686160326004028, "logits/rejected": -1.0604947805404663, "logps/chosen": -350.994384765625, "logps/rejected": -3052.02685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5563459992408752, "rewards/margins": 27.558452606201172, "rewards/rejected": -28.11480140686035, "step": 4350 }, { "epoch": 19.03930131004367, "grad_norm": 0.0010120357929915874, "learning_rate": 4.876579222310007e-06, "logits/chosen": -1.2167795896530151, "logits/rejected": -1.1614450216293335, "logps/chosen": -332.29119873046875, "logps/rejected": -3335.987548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.37571436166763306, "rewards/margins": 30.18075942993164, "rewards/rejected": -30.556472778320312, "step": 4360 }, { "epoch": 19.08296943231441, "grad_norm": 4.983699662084309e-05, "learning_rate": 4.8753939004711285e-06, "logits/chosen": -1.0870156288146973, "logits/rejected": -1.0149248838424683, "logps/chosen": -305.3759765625, "logps/rejected": -3440.75341796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5442352294921875, "rewards/margins": 31.114116668701172, "rewards/rejected": -31.658355712890625, "step": 4370 }, { "epoch": 19.12663755458515, "grad_norm": 0.00035988141231727477, "learning_rate": 4.874203059391802e-06, "logits/chosen": -1.1654994487762451, "logits/rejected": -1.0290499925613403, "logps/chosen": -292.1133728027344, "logps/rejected": -4068.719970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4951747953891754, "rewards/margins": 36.898841857910156, "rewards/rejected": -37.394012451171875, "step": 4380 }, { "epoch": 19.170305676855897, "grad_norm": 2.0985131757861162e-05, "learning_rate": 4.8730067018389525e-06, "logits/chosen": -1.1595220565795898, "logits/rejected": -1.168709397315979, "logps/chosen": -365.0734558105469, "logps/rejected": -3042.059814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5054175853729248, "rewards/margins": 27.42752456665039, "rewards/rejected": -27.932937622070312, "step": 4390 }, { "epoch": 19.213973799126638, "grad_norm": 1.4299603067682152e-05, "learning_rate": 4.871804830592325e-06, "logits/chosen": -1.1092866659164429, "logits/rejected": -1.029784917831421, "logps/chosen": -322.2218933105469, "logps/rejected": -3298.25390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4878770411014557, "rewards/margins": 29.829702377319336, "rewards/rejected": -30.317581176757812, "step": 4400 }, { "epoch": 19.25764192139738, "grad_norm": 3.3238146048236875e-05, "learning_rate": 4.870597448444472e-06, "logits/chosen": -1.1111412048339844, "logits/rejected": -1.119275450706482, "logps/chosen": -360.60101318359375, "logps/rejected": -3181.72119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.49439746141433716, "rewards/margins": 28.760456085205078, "rewards/rejected": -29.25485610961914, "step": 4410 }, { "epoch": 19.30131004366812, "grad_norm": 1.3390517355432285e-05, "learning_rate": 4.869384558200752e-06, "logits/chosen": -1.1895005702972412, "logits/rejected": -1.1252131462097168, "logps/chosen": -327.73077392578125, "logps/rejected": -3685.09912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4286181926727295, "rewards/margins": 33.5189323425293, "rewards/rejected": -33.94755172729492, "step": 4420 }, { "epoch": 19.344978165938866, "grad_norm": 9.267269005605614e-05, "learning_rate": 4.868166162679325e-06, "logits/chosen": -1.1652871370315552, "logits/rejected": -1.0859495401382446, "logps/chosen": -318.52960205078125, "logps/rejected": -3420.489501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.46613436937332153, "rewards/margins": 30.984729766845703, "rewards/rejected": -31.450862884521484, "step": 4430 }, { "epoch": 19.388646288209607, "grad_norm": 1.2692326606603438e-05, "learning_rate": 4.866942264711137e-06, "logits/chosen": -1.145635962486267, "logits/rejected": -1.1299865245819092, "logps/chosen": -366.2757873535156, "logps/rejected": -3465.24951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6407070159912109, "rewards/margins": 31.252248764038086, "rewards/rejected": -31.892953872680664, "step": 4440 }, { "epoch": 19.43231441048035, "grad_norm": 1.2531177051976203e-05, "learning_rate": 4.86571286713992e-06, "logits/chosen": -1.15012788772583, "logits/rejected": -1.0910217761993408, "logps/chosen": -332.09002685546875, "logps/rejected": -3805.06494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5769345164299011, "rewards/margins": 34.524742126464844, "rewards/rejected": -35.10167694091797, "step": 4450 }, { "epoch": 19.475982532751093, "grad_norm": 7.628267026660176e-05, "learning_rate": 4.864477972822189e-06, "logits/chosen": -1.1845647096633911, "logits/rejected": -1.139517068862915, "logps/chosen": -369.7764587402344, "logps/rejected": -3600.803955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5838127732276917, "rewards/margins": 32.58751678466797, "rewards/rejected": -33.17132568359375, "step": 4460 }, { "epoch": 19.519650655021834, "grad_norm": 1.6971514375146766e-05, "learning_rate": 4.863237584627227e-06, "logits/chosen": -1.1459400653839111, "logits/rejected": -1.037487506866455, "logps/chosen": -285.5187072753906, "logps/rejected": -4059.26513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4510183334350586, "rewards/margins": 36.96085739135742, "rewards/rejected": -37.41187286376953, "step": 4470 }, { "epoch": 19.563318777292576, "grad_norm": 8.2986837439332e-05, "learning_rate": 4.861991705437081e-06, "logits/chosen": -1.111714243888855, "logits/rejected": -1.0694177150726318, "logps/chosen": -304.5822448730469, "logps/rejected": -3279.451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.40021371841430664, "rewards/margins": 29.72808265686035, "rewards/rejected": -30.1282958984375, "step": 4480 }, { "epoch": 19.606986899563317, "grad_norm": 0.0001542881316066953, "learning_rate": 4.86074033814656e-06, "logits/chosen": -1.2057130336761475, "logits/rejected": -1.1539714336395264, "logps/chosen": -344.5001525878906, "logps/rejected": -3657.910888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5012696981430054, "rewards/margins": 33.233219146728516, "rewards/rejected": -33.73448944091797, "step": 4490 }, { "epoch": 19.650655021834062, "grad_norm": 0.0006983345120658676, "learning_rate": 4.859483485663221e-06, "logits/chosen": -1.163888692855835, "logits/rejected": -1.1058037281036377, "logps/chosen": -323.5475158691406, "logps/rejected": -3482.66455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4909515976905823, "rewards/margins": 31.5389347076416, "rewards/rejected": -32.029884338378906, "step": 4500 }, { "epoch": 19.694323144104803, "grad_norm": 1.5208301131560091e-05, "learning_rate": 4.858221150907367e-06, "logits/chosen": -1.1469285488128662, "logits/rejected": -1.1638261079788208, "logps/chosen": -375.8388671875, "logps/rejected": -2955.94873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4721356928348541, "rewards/margins": 26.665386199951172, "rewards/rejected": -27.137523651123047, "step": 4510 }, { "epoch": 19.737991266375545, "grad_norm": 1.0021641273689027e-05, "learning_rate": 4.856953336812042e-06, "logits/chosen": -1.2184836864471436, "logits/rejected": -1.1637601852416992, "logps/chosen": -331.0744323730469, "logps/rejected": -3669.41748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.39051195979118347, "rewards/margins": 33.318260192871094, "rewards/rejected": -33.70876693725586, "step": 4520 }, { "epoch": 19.78165938864629, "grad_norm": 2.255596102111583e-05, "learning_rate": 4.855680046323017e-06, "logits/chosen": -1.1629506349563599, "logits/rejected": -1.124191164970398, "logps/chosen": -337.5697937011719, "logps/rejected": -3675.216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.445138543844223, "rewards/margins": 33.438194274902344, "rewards/rejected": -33.883331298828125, "step": 4530 }, { "epoch": 19.82532751091703, "grad_norm": 3.0371449413431756e-05, "learning_rate": 4.85440128239879e-06, "logits/chosen": -1.2129652500152588, "logits/rejected": -1.1384363174438477, "logps/chosen": -309.870849609375, "logps/rejected": -3642.1015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.46469545364379883, "rewards/margins": 33.03997802734375, "rewards/rejected": -33.504676818847656, "step": 4540 }, { "epoch": 19.868995633187772, "grad_norm": 0.0010522542060161842, "learning_rate": 4.8531170480105745e-06, "logits/chosen": -1.1784234046936035, "logits/rejected": -1.1131733655929565, "logps/chosen": -292.21502685546875, "logps/rejected": -3801.67724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.43407154083251953, "rewards/margins": 34.60039138793945, "rewards/rejected": -35.034461975097656, "step": 4550 }, { "epoch": 19.912663755458514, "grad_norm": 7.888787589709507e-06, "learning_rate": 4.851827346142298e-06, "logits/chosen": -1.1274186372756958, "logits/rejected": -1.0735470056533813, "logps/chosen": -328.57293701171875, "logps/rejected": -3437.56201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5079654455184937, "rewards/margins": 31.097942352294922, "rewards/rejected": -31.605907440185547, "step": 4560 }, { "epoch": 19.95633187772926, "grad_norm": 1.2255777705115412e-05, "learning_rate": 4.850532179790588e-06, "logits/chosen": -1.2284926176071167, "logits/rejected": -1.1402772665023804, "logps/chosen": -332.41510009765625, "logps/rejected": -3774.760986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3992405831813812, "rewards/margins": 34.265525817871094, "rewards/rejected": -34.66476058959961, "step": 4570 }, { "epoch": 20.0, "grad_norm": 1.617746330204415e-05, "learning_rate": 4.849231551964771e-06, "logits/chosen": -1.1308724880218506, "logits/rejected": -1.0818077325820923, "logps/chosen": -343.4400939941406, "logps/rejected": -3302.94775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5690902471542358, "rewards/margins": 29.767465591430664, "rewards/rejected": -30.336559295654297, "step": 4580 }, { "epoch": 20.04366812227074, "grad_norm": 1.2045535070818525e-05, "learning_rate": 4.847925465686863e-06, "logits/chosen": -1.1798384189605713, "logits/rejected": -1.1167576313018799, "logps/chosen": -339.0193786621094, "logps/rejected": -3471.95556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.497072696685791, "rewards/margins": 31.453067779541016, "rewards/rejected": -31.95013999938965, "step": 4590 }, { "epoch": 20.087336244541486, "grad_norm": 2.4216405776917435e-05, "learning_rate": 4.846613923991563e-06, "logits/chosen": -1.1243852376937866, "logits/rejected": -1.052752137184143, "logps/chosen": -314.41363525390625, "logps/rejected": -3343.721435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.42517510056495667, "rewards/margins": 30.31539535522461, "rewards/rejected": -30.740570068359375, "step": 4600 }, { "epoch": 20.131004366812228, "grad_norm": 0.0003798780707461403, "learning_rate": 4.845296929926244e-06, "logits/chosen": -1.1615965366363525, "logits/rejected": -1.1042793989181519, "logps/chosen": -320.6006164550781, "logps/rejected": -3548.18798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.47495537996292114, "rewards/margins": 32.245849609375, "rewards/rejected": -32.720802307128906, "step": 4610 }, { "epoch": 20.17467248908297, "grad_norm": 2.8889955546965603e-05, "learning_rate": 4.84397448655095e-06, "logits/chosen": -1.1615049839019775, "logits/rejected": -1.138461947441101, "logps/chosen": -331.223876953125, "logps/rejected": -3461.274169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.48463934659957886, "rewards/margins": 31.346776962280273, "rewards/rejected": -31.8314151763916, "step": 4620 }, { "epoch": 20.21834061135371, "grad_norm": 1.8214343103698903e-05, "learning_rate": 4.842646596938383e-06, "logits/chosen": -1.1390700340270996, "logits/rejected": -1.1209309101104736, "logps/chosen": -360.5704345703125, "logps/rejected": -3173.415771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5692693591117859, "rewards/margins": 28.65630531311035, "rewards/rejected": -29.225570678710938, "step": 4630 }, { "epoch": 20.262008733624455, "grad_norm": 1.1549863857147882e-05, "learning_rate": 4.841313264173904e-06, "logits/chosen": -1.1586918830871582, "logits/rejected": -1.0691490173339844, "logps/chosen": -324.5858459472656, "logps/rejected": -3574.45703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5821192264556885, "rewards/margins": 32.275875091552734, "rewards/rejected": -32.85799789428711, "step": 4640 }, { "epoch": 20.305676855895197, "grad_norm": 1.0195484243678322e-05, "learning_rate": 4.839974491355518e-06, "logits/chosen": -1.1114661693572998, "logits/rejected": -1.0694513320922852, "logps/chosen": -307.7980041503906, "logps/rejected": -3690.50146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4843238890171051, "rewards/margins": 33.5147590637207, "rewards/rejected": -33.999080657958984, "step": 4650 }, { "epoch": 20.349344978165938, "grad_norm": 0.00020056614755387646, "learning_rate": 4.83863028159387e-06, "logits/chosen": -1.139801263809204, "logits/rejected": -1.1526947021484375, "logps/chosen": -356.6407470703125, "logps/rejected": -3352.86962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5204558372497559, "rewards/margins": 30.354633331298828, "rewards/rejected": -30.875085830688477, "step": 4660 }, { "epoch": 20.393013100436683, "grad_norm": 2.08780946243617e-05, "learning_rate": 4.83728063801224e-06, "logits/chosen": -1.1585522890090942, "logits/rejected": -1.0815932750701904, "logps/chosen": -323.8712158203125, "logps/rejected": -3306.311767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.49321016669273376, "rewards/margins": 29.785247802734375, "rewards/rejected": -30.278457641601562, "step": 4670 }, { "epoch": 20.436681222707424, "grad_norm": 5.33446740956672e-05, "learning_rate": 4.835925563746532e-06, "logits/chosen": -1.1651889085769653, "logits/rejected": -1.1132709980010986, "logps/chosen": -309.3209533691406, "logps/rejected": -3611.532470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.41438618302345276, "rewards/margins": 32.790409088134766, "rewards/rejected": -33.204795837402344, "step": 4680 }, { "epoch": 20.480349344978166, "grad_norm": 1.3180692834357935e-05, "learning_rate": 4.834565061945266e-06, "logits/chosen": -1.185333490371704, "logits/rejected": -1.1461973190307617, "logps/chosen": -345.1185607910156, "logps/rejected": -3514.208251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6306765079498291, "rewards/margins": 31.78683090209961, "rewards/rejected": -32.41750717163086, "step": 4690 }, { "epoch": 20.524017467248907, "grad_norm": 9.00230470806936e-05, "learning_rate": 4.833199135769578e-06, "logits/chosen": -1.1430447101593018, "logits/rejected": -1.103878140449524, "logps/chosen": -341.6057434082031, "logps/rejected": -3409.125732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5214470028877258, "rewards/margins": 30.89845848083496, "rewards/rejected": -31.419902801513672, "step": 4700 }, { "epoch": 20.56768558951965, "grad_norm": 0.00011338971088427446, "learning_rate": 4.831827788393204e-06, "logits/chosen": -1.2147678136825562, "logits/rejected": -1.1625593900680542, "logps/chosen": -335.0191345214844, "logps/rejected": -3736.02197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5082991719245911, "rewards/margins": 33.83005142211914, "rewards/rejected": -34.338348388671875, "step": 4710 }, { "epoch": 20.611353711790393, "grad_norm": 8.881698723054869e-06, "learning_rate": 4.830451023002477e-06, "logits/chosen": -1.165520429611206, "logits/rejected": -1.1837494373321533, "logps/chosen": -365.5120544433594, "logps/rejected": -3130.66943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.48915404081344604, "rewards/margins": 28.233957290649414, "rewards/rejected": -28.723108291625977, "step": 4720 }, { "epoch": 20.655021834061134, "grad_norm": 1.2838937967295774e-05, "learning_rate": 4.829068842796318e-06, "logits/chosen": -1.212476134300232, "logits/rejected": -1.141273856163025, "logps/chosen": -320.68365478515625, "logps/rejected": -3859.639892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5358672142028809, "rewards/margins": 35.117733001708984, "rewards/rejected": -35.653602600097656, "step": 4730 }, { "epoch": 20.69868995633188, "grad_norm": 0.00022463569250120568, "learning_rate": 4.82768125098623e-06, "logits/chosen": -1.2090340852737427, "logits/rejected": -1.1524584293365479, "logps/chosen": -336.2873840332031, "logps/rejected": -3794.604736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5407024621963501, "rewards/margins": 34.44828414916992, "rewards/rejected": -34.988990783691406, "step": 4740 }, { "epoch": 20.74235807860262, "grad_norm": 1.863698026159917e-05, "learning_rate": 4.826288250796292e-06, "logits/chosen": -1.1500909328460693, "logits/rejected": -1.107428789138794, "logps/chosen": -335.83380126953125, "logps/rejected": -3784.31689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5552411079406738, "rewards/margins": 34.40128707885742, "rewards/rejected": -34.9565315246582, "step": 4750 }, { "epoch": 20.786026200873362, "grad_norm": 0.0001594021606323451, "learning_rate": 4.8248898454631446e-06, "logits/chosen": -1.1276651620864868, "logits/rejected": -1.1198830604553223, "logps/chosen": -342.6922302246094, "logps/rejected": -3491.555908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.45239338278770447, "rewards/margins": 31.761402130126953, "rewards/rejected": -32.21379089355469, "step": 4760 }, { "epoch": 20.829694323144103, "grad_norm": 0.00010613572698773685, "learning_rate": 4.823486038235992e-06, "logits/chosen": -1.1735106706619263, "logits/rejected": -1.1203631162643433, "logps/chosen": -324.93548583984375, "logps/rejected": -3656.13037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5344123244285583, "rewards/margins": 33.123207092285156, "rewards/rejected": -33.65761947631836, "step": 4770 }, { "epoch": 20.87336244541485, "grad_norm": 1.6947833735812677e-05, "learning_rate": 4.822076832376586e-06, "logits/chosen": -1.1215283870697021, "logits/rejected": -1.101292371749878, "logps/chosen": -337.6686706542969, "logps/rejected": -3533.298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5330354571342468, "rewards/margins": 32.140525817871094, "rewards/rejected": -32.673561096191406, "step": 4780 }, { "epoch": 20.91703056768559, "grad_norm": 5.864009159168384e-06, "learning_rate": 4.820662231159227e-06, "logits/chosen": -1.171013355255127, "logits/rejected": -1.161433458328247, "logps/chosen": -343.4436950683594, "logps/rejected": -3636.563720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5450960397720337, "rewards/margins": 32.90643310546875, "rewards/rejected": -33.4515266418457, "step": 4790 }, { "epoch": 20.96069868995633, "grad_norm": 2.232731278479145e-05, "learning_rate": 4.819242237870747e-06, "logits/chosen": -1.2285172939300537, "logits/rejected": -1.1988708972930908, "logps/chosen": -334.37762451171875, "logps/rejected": -3467.204345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6062466502189636, "rewards/margins": 31.332910537719727, "rewards/rejected": -31.93915367126465, "step": 4800 }, { "epoch": 21.004366812227076, "grad_norm": 4.160666077214364e-05, "learning_rate": 4.817816855810507e-06, "logits/chosen": -1.1675870418548584, "logits/rejected": -1.132495403289795, "logps/chosen": -321.3605041503906, "logps/rejected": -3694.97021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.47740644216537476, "rewards/margins": 33.72022247314453, "rewards/rejected": -34.19762420654297, "step": 4810 }, { "epoch": 21.048034934497817, "grad_norm": 1.3099083516389357e-05, "learning_rate": 4.8163860882903905e-06, "logits/chosen": -1.1508872509002686, "logits/rejected": -1.1542279720306396, "logps/chosen": -355.41473388671875, "logps/rejected": -3261.349853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5990090370178223, "rewards/margins": 29.51511001586914, "rewards/rejected": -30.114120483398438, "step": 4820 }, { "epoch": 21.09170305676856, "grad_norm": 0.00018574896641162717, "learning_rate": 4.814949938634793e-06, "logits/chosen": -1.1984652280807495, "logits/rejected": -1.1829990148544312, "logps/chosen": -312.2044677734375, "logps/rejected": -3832.900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.43997693061828613, "rewards/margins": 34.97722625732422, "rewards/rejected": -35.41720199584961, "step": 4830 }, { "epoch": 21.1353711790393, "grad_norm": 5.630652367372845e-06, "learning_rate": 4.8135084101806175e-06, "logits/chosen": -1.1663898229599, "logits/rejected": -1.147510051727295, "logps/chosen": -342.10955810546875, "logps/rejected": -3537.811279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5823013186454773, "rewards/margins": 32.02116012573242, "rewards/rejected": -32.60346221923828, "step": 4840 }, { "epoch": 21.179039301310045, "grad_norm": 9.367114219888739e-06, "learning_rate": 4.812061506277261e-06, "logits/chosen": -1.1109991073608398, "logits/rejected": -1.104315161705017, "logps/chosen": -334.7662048339844, "logps/rejected": -3576.123779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6271835565567017, "rewards/margins": 32.37742614746094, "rewards/rejected": -33.004615783691406, "step": 4850 }, { "epoch": 21.222707423580786, "grad_norm": 1.0264802778817858e-05, "learning_rate": 4.810609230286615e-06, "logits/chosen": -1.187462568283081, "logits/rejected": -1.1187187433242798, "logps/chosen": -313.11016845703125, "logps/rejected": -4105.03759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5988539457321167, "rewards/margins": 37.35902404785156, "rewards/rejected": -37.95787811279297, "step": 4860 }, { "epoch": 21.266375545851528, "grad_norm": 1.6774271416864033e-05, "learning_rate": 4.809151585583047e-06, "logits/chosen": -1.0699975490570068, "logits/rejected": -1.0909768342971802, "logps/chosen": -334.33941650390625, "logps/rejected": -3260.60546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4777081608772278, "rewards/margins": 29.566730499267578, "rewards/rejected": -30.044437408447266, "step": 4870 }, { "epoch": 21.310043668122272, "grad_norm": 1.7713558824069926e-05, "learning_rate": 4.807688575553407e-06, "logits/chosen": -1.1606693267822266, "logits/rejected": -1.1691370010375977, "logps/chosen": -322.00439453125, "logps/rejected": -3637.606201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.45699548721313477, "rewards/margins": 33.16028594970703, "rewards/rejected": -33.61728286743164, "step": 4880 }, { "epoch": 21.353711790393014, "grad_norm": 1.8739925087711575e-05, "learning_rate": 4.806220203597002e-06, "logits/chosen": -1.1839911937713623, "logits/rejected": -1.1947388648986816, "logps/chosen": -354.5254821777344, "logps/rejected": -3928.440673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5998550653457642, "rewards/margins": 35.69218826293945, "rewards/rejected": -36.29204559326172, "step": 4890 }, { "epoch": 21.397379912663755, "grad_norm": 6.89666928901079e-06, "learning_rate": 4.804746473125605e-06, "logits/chosen": -1.1374919414520264, "logits/rejected": -1.0877058506011963, "logps/chosen": -325.6962585449219, "logps/rejected": -3417.20947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5434260368347168, "rewards/margins": 30.867321014404297, "rewards/rejected": -31.410747528076172, "step": 4900 }, { "epoch": 21.441048034934497, "grad_norm": 1.5160115226621392e-05, "learning_rate": 4.803267387563436e-06, "logits/chosen": -1.15541672706604, "logits/rejected": -1.168862223625183, "logps/chosen": -342.7537536621094, "logps/rejected": -3288.41455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.44718432426452637, "rewards/margins": 29.866296768188477, "rewards/rejected": -30.3134822845459, "step": 4910 }, { "epoch": 21.48471615720524, "grad_norm": 1.8299381323240903e-05, "learning_rate": 4.801782950347157e-06, "logits/chosen": -1.1465941667556763, "logits/rejected": -1.116109013557434, "logps/chosen": -316.3350830078125, "logps/rejected": -3864.499267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5349293947219849, "rewards/margins": 35.12157440185547, "rewards/rejected": -35.6565055847168, "step": 4920 }, { "epoch": 21.528384279475983, "grad_norm": 1.854092176161704e-05, "learning_rate": 4.800293164925868e-06, "logits/chosen": -1.1735517978668213, "logits/rejected": -1.136613130569458, "logps/chosen": -330.24957275390625, "logps/rejected": -3567.553466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5768197774887085, "rewards/margins": 32.369659423828125, "rewards/rejected": -32.94647979736328, "step": 4930 }, { "epoch": 21.572052401746724, "grad_norm": 6.038507539301255e-06, "learning_rate": 4.79879803476109e-06, "logits/chosen": -1.200615644454956, "logits/rejected": -1.184861183166504, "logps/chosen": -321.4820251464844, "logps/rejected": -3792.06103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4444403648376465, "rewards/margins": 34.540531158447266, "rewards/rejected": -34.98497009277344, "step": 4940 }, { "epoch": 21.61572052401747, "grad_norm": 7.270777479267665e-05, "learning_rate": 4.7972975633267704e-06, "logits/chosen": -1.213582158088684, "logits/rejected": -1.2132920026779175, "logps/chosen": -351.9986877441406, "logps/rejected": -3908.578857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5547899007797241, "rewards/margins": 35.59600830078125, "rewards/rejected": -36.15079879760742, "step": 4950 }, { "epoch": 21.65938864628821, "grad_norm": 0.00023869519625726546, "learning_rate": 4.79579175410926e-06, "logits/chosen": -1.2039103507995605, "logits/rejected": -1.1435776948928833, "logps/chosen": -315.9364318847656, "logps/rejected": -3548.989013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.49167051911354065, "rewards/margins": 32.29452896118164, "rewards/rejected": -32.786197662353516, "step": 4960 }, { "epoch": 21.70305676855895, "grad_norm": 0.00011188140395387157, "learning_rate": 4.794280610607315e-06, "logits/chosen": -1.1936290264129639, "logits/rejected": -1.1613142490386963, "logps/chosen": -331.2950744628906, "logps/rejected": -3664.207763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5162336230278015, "rewards/margins": 33.248374938964844, "rewards/rejected": -33.76460647583008, "step": 4970 }, { "epoch": 21.746724890829693, "grad_norm": 7.763631821697857e-05, "learning_rate": 4.792764136332084e-06, "logits/chosen": -1.1623071432113647, "logits/rejected": -1.16731858253479, "logps/chosen": -349.23211669921875, "logps/rejected": -3719.03955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5776728391647339, "rewards/margins": 33.86302947998047, "rewards/rejected": -34.44070053100586, "step": 4980 }, { "epoch": 21.790393013100438, "grad_norm": 5.982238652356127e-06, "learning_rate": 4.791242334807106e-06, "logits/chosen": -1.181596279144287, "logits/rejected": -1.1417335271835327, "logps/chosen": -353.41912841796875, "logps/rejected": -3065.015380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5770040154457092, "rewards/margins": 27.596572875976562, "rewards/rejected": -28.1735782623291, "step": 4990 }, { "epoch": 21.83406113537118, "grad_norm": 0.0002785206869794105, "learning_rate": 4.789715209568293e-06, "logits/chosen": -1.1407253742218018, "logits/rejected": -1.1522927284240723, "logps/chosen": -341.58416748046875, "logps/rejected": -3290.83056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5424796342849731, "rewards/margins": 29.758697509765625, "rewards/rejected": -30.301177978515625, "step": 5000 }, { "epoch": 21.87772925764192, "grad_norm": 5.387123269669475e-06, "learning_rate": 4.788182764163929e-06, "logits/chosen": -1.1411224603652954, "logits/rejected": -1.1322163343429565, "logps/chosen": -332.45782470703125, "logps/rejected": -3780.522216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5603969693183899, "rewards/margins": 34.37720489501953, "rewards/rejected": -34.937599182128906, "step": 5010 }, { "epoch": 21.921397379912662, "grad_norm": 8.544052630969717e-06, "learning_rate": 4.786645002154659e-06, "logits/chosen": -1.16183602809906, "logits/rejected": -1.1453773975372314, "logps/chosen": -336.44158935546875, "logps/rejected": -3686.133544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5428224802017212, "rewards/margins": 33.531455993652344, "rewards/rejected": -34.07427978515625, "step": 5020 }, { "epoch": 21.965065502183407, "grad_norm": 1.0744073383773688e-05, "learning_rate": 4.785101927113482e-06, "logits/chosen": -1.2007582187652588, "logits/rejected": -1.1853846311569214, "logps/chosen": -354.03961181640625, "logps/rejected": -3725.884033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5243078470230103, "rewards/margins": 33.855506896972656, "rewards/rejected": -34.379817962646484, "step": 5030 }, { "epoch": 22.00873362445415, "grad_norm": 1.1832047622120583e-05, "learning_rate": 4.78355354262574e-06, "logits/chosen": -1.1806683540344238, "logits/rejected": -1.175394058227539, "logps/chosen": -355.33074951171875, "logps/rejected": -3529.278076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5747621059417725, "rewards/margins": 32.03532791137695, "rewards/rejected": -32.61008834838867, "step": 5040 }, { "epoch": 22.05240174672489, "grad_norm": 1.0939325016676783e-05, "learning_rate": 4.7819998522891135e-06, "logits/chosen": -1.1558133363723755, "logits/rejected": -1.146919846534729, "logps/chosen": -353.82470703125, "logps/rejected": -3433.798095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4612000584602356, "rewards/margins": 31.137622833251953, "rewards/rejected": -31.59882164001465, "step": 5050 }, { "epoch": 22.096069868995635, "grad_norm": 1.327772635903205e-05, "learning_rate": 4.78044085971361e-06, "logits/chosen": -1.1896289587020874, "logits/rejected": -1.1733863353729248, "logps/chosen": -326.35296630859375, "logps/rejected": -3560.141845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.529158353805542, "rewards/margins": 32.26811981201172, "rewards/rejected": -32.79727554321289, "step": 5060 }, { "epoch": 22.139737991266376, "grad_norm": 1.6301251854012565e-05, "learning_rate": 4.778876568521558e-06, "logits/chosen": -1.1893714666366577, "logits/rejected": -1.1777160167694092, "logps/chosen": -344.6730041503906, "logps/rejected": -3567.914794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5853402614593506, "rewards/margins": 32.206722259521484, "rewards/rejected": -32.79206466674805, "step": 5070 }, { "epoch": 22.183406113537117, "grad_norm": 8.554706663148225e-06, "learning_rate": 4.7773069823475945e-06, "logits/chosen": -1.175362467765808, "logits/rejected": -1.1709082126617432, "logps/chosen": -356.6847839355469, "logps/rejected": -3312.40478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6665069460868835, "rewards/margins": 29.8397159576416, "rewards/rejected": -30.5062198638916, "step": 5080 }, { "epoch": 22.22707423580786, "grad_norm": 5.123469841444092e-05, "learning_rate": 4.775732104838664e-06, "logits/chosen": -1.181365728378296, "logits/rejected": -1.2213958501815796, "logps/chosen": -333.45770263671875, "logps/rejected": -3795.768798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.499134361743927, "rewards/margins": 34.552513122558594, "rewards/rejected": -35.0516471862793, "step": 5090 }, { "epoch": 22.270742358078603, "grad_norm": 0.00012720626292512074, "learning_rate": 4.7741519396539994e-06, "logits/chosen": -1.1625845432281494, "logits/rejected": -1.1709480285644531, "logps/chosen": -335.0519104003906, "logps/rejected": -3764.443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5992254018783569, "rewards/margins": 34.18537521362305, "rewards/rejected": -34.78459930419922, "step": 5100 }, { "epoch": 22.314410480349345, "grad_norm": 0.0003971406178454676, "learning_rate": 4.772566490465126e-06, "logits/chosen": -1.177323579788208, "logits/rejected": -1.179137945175171, "logps/chosen": -344.6827697753906, "logps/rejected": -3270.716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5871642231941223, "rewards/margins": 29.564428329467773, "rewards/rejected": -30.151592254638672, "step": 5110 }, { "epoch": 22.358078602620086, "grad_norm": 1.6404808937977937e-05, "learning_rate": 4.770975760955843e-06, "logits/chosen": -1.1265596151351929, "logits/rejected": -1.1253873109817505, "logps/chosen": -319.7091979980469, "logps/rejected": -3483.42138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5686477422714233, "rewards/margins": 31.559856414794922, "rewards/rejected": -32.128501892089844, "step": 5120 }, { "epoch": 22.40174672489083, "grad_norm": 5.952534959192227e-06, "learning_rate": 4.769379754822217e-06, "logits/chosen": -1.204649567604065, "logits/rejected": -1.158025860786438, "logps/chosen": -314.44384765625, "logps/rejected": -3824.059326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5417857766151428, "rewards/margins": 34.65578079223633, "rewards/rejected": -35.19757080078125, "step": 5130 }, { "epoch": 22.445414847161572, "grad_norm": 5.2090731187166686e-05, "learning_rate": 4.767778475772579e-06, "logits/chosen": -1.2511235475540161, "logits/rejected": -1.223476529121399, "logps/chosen": -348.49761962890625, "logps/rejected": -3647.022705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5987604856491089, "rewards/margins": 33.016510009765625, "rewards/rejected": -33.615272521972656, "step": 5140 }, { "epoch": 22.489082969432314, "grad_norm": 0.0004439860909750503, "learning_rate": 4.766171927527507e-06, "logits/chosen": -1.1405409574508667, "logits/rejected": -1.1782681941986084, "logps/chosen": -348.523681640625, "logps/rejected": -3738.28515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5726685523986816, "rewards/margins": 34.02510452270508, "rewards/rejected": -34.5977668762207, "step": 5150 }, { "epoch": 22.532751091703055, "grad_norm": 1.7682188462501174e-05, "learning_rate": 4.7645601138198285e-06, "logits/chosen": -1.1560076475143433, "logits/rejected": -1.137841820716858, "logps/chosen": -338.44219970703125, "logps/rejected": -3432.39599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.613092303276062, "rewards/margins": 30.99191665649414, "rewards/rejected": -31.605005264282227, "step": 5160 }, { "epoch": 22.5764192139738, "grad_norm": 1.2535145245106618e-05, "learning_rate": 4.762943038394597e-06, "logits/chosen": -1.1902356147766113, "logits/rejected": -1.2059876918792725, "logps/chosen": -362.10968017578125, "logps/rejected": -3555.572998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5854187607765198, "rewards/margins": 32.18910217285156, "rewards/rejected": -32.7745246887207, "step": 5170 }, { "epoch": 22.62008733624454, "grad_norm": 5.489295308919052e-06, "learning_rate": 4.7613207050090985e-06, "logits/chosen": -1.2004698514938354, "logits/rejected": -1.2428547143936157, "logps/chosen": -356.294921875, "logps/rejected": -3396.828857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5816095471382141, "rewards/margins": 30.722665786743164, "rewards/rejected": -31.304271697998047, "step": 5180 }, { "epoch": 22.663755458515283, "grad_norm": 3.6247674866474004e-05, "learning_rate": 4.759693117432833e-06, "logits/chosen": -1.1823723316192627, "logits/rejected": -1.2008742094039917, "logps/chosen": -336.72088623046875, "logps/rejected": -3620.20458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4258580803871155, "rewards/margins": 32.96714401245117, "rewards/rejected": -33.39300537109375, "step": 5190 }, { "epoch": 22.707423580786028, "grad_norm": 9.55008306617646e-06, "learning_rate": 4.758060279447508e-06, "logits/chosen": -1.1943024396896362, "logits/rejected": -1.2000106573104858, "logps/chosen": -345.08624267578125, "logps/rejected": -3646.25634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.622309148311615, "rewards/margins": 33.02737045288086, "rewards/rejected": -33.64967727661133, "step": 5200 }, { "epoch": 22.75109170305677, "grad_norm": 1.230446675802398e-05, "learning_rate": 4.756422194847031e-06, "logits/chosen": -1.147827386856079, "logits/rejected": -1.1270458698272705, "logps/chosen": -326.57855224609375, "logps/rejected": -3452.399169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5300008654594421, "rewards/margins": 31.309926986694336, "rewards/rejected": -31.83992576599121, "step": 5210 }, { "epoch": 22.79475982532751, "grad_norm": 5.482302121045582e-05, "learning_rate": 4.754778867437502e-06, "logits/chosen": -1.1962954998016357, "logits/rejected": -1.1927706003189087, "logps/chosen": -339.1246643066406, "logps/rejected": -3379.016845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.481696754693985, "rewards/margins": 30.644372940063477, "rewards/rejected": -31.126068115234375, "step": 5220 }, { "epoch": 22.83842794759825, "grad_norm": 9.144813248667047e-06, "learning_rate": 4.753130301037199e-06, "logits/chosen": -1.1919358968734741, "logits/rejected": -1.2118369340896606, "logps/chosen": -384.41009521484375, "logps/rejected": -3640.44580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7896652221679688, "rewards/margins": 32.852516174316406, "rewards/rejected": -33.642181396484375, "step": 5230 }, { "epoch": 22.882096069868997, "grad_norm": 1.095162731207215e-05, "learning_rate": 4.751476499476577e-06, "logits/chosen": -1.1878939867019653, "logits/rejected": -1.1885377168655396, "logps/chosen": -338.36212158203125, "logps/rejected": -3351.45751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.49743857979774475, "rewards/margins": 30.399438858032227, "rewards/rejected": -30.896875381469727, "step": 5240 }, { "epoch": 22.925764192139738, "grad_norm": 2.0965563240662914e-05, "learning_rate": 4.749817466598251e-06, "logits/chosen": -1.2136931419372559, "logits/rejected": -1.2077834606170654, "logps/chosen": -337.93609619140625, "logps/rejected": -3972.67041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.554345965385437, "rewards/margins": 36.08985137939453, "rewards/rejected": -36.64419937133789, "step": 5250 }, { "epoch": 22.96943231441048, "grad_norm": 5.793331307372156e-06, "learning_rate": 4.7481532062569945e-06, "logits/chosen": -1.2103259563446045, "logits/rejected": -1.1791229248046875, "logps/chosen": -338.73431396484375, "logps/rejected": -3702.942626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6903024911880493, "rewards/margins": 33.49357986450195, "rewards/rejected": -34.18387985229492, "step": 5260 }, { "epoch": 23.013100436681224, "grad_norm": 1.3529649141442024e-05, "learning_rate": 4.746483722319725e-06, "logits/chosen": -1.163828730583191, "logits/rejected": -1.1817286014556885, "logps/chosen": -361.0147399902344, "logps/rejected": -3357.92236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6160452961921692, "rewards/margins": 30.43954086303711, "rewards/rejected": -31.055583953857422, "step": 5270 }, { "epoch": 23.056768558951966, "grad_norm": 7.283739014665074e-05, "learning_rate": 4.744809018665495e-06, "logits/chosen": -1.1772675514221191, "logits/rejected": -1.2004387378692627, "logps/chosen": -346.380126953125, "logps/rejected": -3317.434326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5523272156715393, "rewards/margins": 29.951831817626953, "rewards/rejected": -30.504159927368164, "step": 5280 }, { "epoch": 23.100436681222707, "grad_norm": 1.3592709980074482e-05, "learning_rate": 4.74312909918549e-06, "logits/chosen": -1.1495859622955322, "logits/rejected": -1.1598405838012695, "logps/chosen": -328.3020935058594, "logps/rejected": -3912.474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5835208296775818, "rewards/margins": 35.662025451660156, "rewards/rejected": -36.24554443359375, "step": 5290 }, { "epoch": 23.14410480349345, "grad_norm": 0.00017729550396225303, "learning_rate": 4.741443967783012e-06, "logits/chosen": -1.1583377122879028, "logits/rejected": -1.1028149127960205, "logps/chosen": -316.6402282714844, "logps/rejected": -3534.177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5304518938064575, "rewards/margins": 32.13536834716797, "rewards/rejected": -32.66582489013672, "step": 5300 }, { "epoch": 23.187772925764193, "grad_norm": 8.689754807746458e-06, "learning_rate": 4.73975362837347e-06, "logits/chosen": -1.0910910367965698, "logits/rejected": -1.1470059156417847, "logps/chosen": -337.69085693359375, "logps/rejected": -2999.5009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5945495367050171, "rewards/margins": 27.09366798400879, "rewards/rejected": -27.688217163085938, "step": 5310 }, { "epoch": 23.231441048034934, "grad_norm": 6.216526579113494e-05, "learning_rate": 4.73805808488438e-06, "logits/chosen": -1.181781530380249, "logits/rejected": -1.1338871717453003, "logps/chosen": -327.038818359375, "logps/rejected": -4064.689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6169982552528381, "rewards/margins": 37.006160736083984, "rewards/rejected": -37.62316131591797, "step": 5320 }, { "epoch": 23.275109170305676, "grad_norm": 5.250686645288864e-06, "learning_rate": 4.736357341255344e-06, "logits/chosen": -1.1395585536956787, "logits/rejected": -1.1871337890625, "logps/chosen": -341.4068298339844, "logps/rejected": -3313.369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5225416421890259, "rewards/margins": 30.0237979888916, "rewards/rejected": -30.546340942382812, "step": 5330 }, { "epoch": 23.31877729257642, "grad_norm": 1.3376286271600509e-05, "learning_rate": 4.734651401438051e-06, "logits/chosen": -1.1439473628997803, "logits/rejected": -1.0917606353759766, "logps/chosen": -326.62774658203125, "logps/rejected": -3936.00146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7137805223464966, "rewards/margins": 35.76108169555664, "rewards/rejected": -36.47486114501953, "step": 5340 }, { "epoch": 23.362445414847162, "grad_norm": 7.452198210200867e-06, "learning_rate": 4.732940269396259e-06, "logits/chosen": -1.2386348247528076, "logits/rejected": -1.2199093103408813, "logps/chosen": -345.75042724609375, "logps/rejected": -3687.701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6081947088241577, "rewards/margins": 33.42036819458008, "rewards/rejected": -34.028564453125, "step": 5350 }, { "epoch": 23.406113537117903, "grad_norm": 5.787494650977723e-06, "learning_rate": 4.7312239491057945e-06, "logits/chosen": -1.2203105688095093, "logits/rejected": -1.2129942178726196, "logps/chosen": -338.14239501953125, "logps/rejected": -3429.232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4903899133205414, "rewards/margins": 31.1561222076416, "rewards/rejected": -31.64651107788086, "step": 5360 }, { "epoch": 23.449781659388645, "grad_norm": 1.062829653584861e-05, "learning_rate": 4.729502444554536e-06, "logits/chosen": -1.1721292734146118, "logits/rejected": -1.1832144260406494, "logps/chosen": -364.0677185058594, "logps/rejected": -3912.80712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7553869485855103, "rewards/margins": 35.46536636352539, "rewards/rejected": -36.22075653076172, "step": 5370 }, { "epoch": 23.49344978165939, "grad_norm": 0.0022221701333103453, "learning_rate": 4.727775759742408e-06, "logits/chosen": -1.1589229106903076, "logits/rejected": -1.185834288597107, "logps/chosen": -339.85980224609375, "logps/rejected": -3703.610107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4898103177547455, "rewards/margins": 33.740814208984375, "rewards/rejected": -34.230628967285156, "step": 5380 }, { "epoch": 23.53711790393013, "grad_norm": 6.068177745955297e-06, "learning_rate": 4.726043898681373e-06, "logits/chosen": -1.1486760377883911, "logits/rejected": -1.1275224685668945, "logps/chosen": -342.111572265625, "logps/rejected": -3755.27587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.579861044883728, "rewards/margins": 34.16009521484375, "rewards/rejected": -34.73995590209961, "step": 5390 }, { "epoch": 23.580786026200872, "grad_norm": 2.600809399945345e-05, "learning_rate": 4.724306865395418e-06, "logits/chosen": -1.138237714767456, "logits/rejected": -1.2077096700668335, "logps/chosen": -381.5221862792969, "logps/rejected": -3059.859130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5791900157928467, "rewards/margins": 27.665889739990234, "rewards/rejected": -28.24508285522461, "step": 5400 }, { "epoch": 23.624454148471617, "grad_norm": 9.922690351317344e-06, "learning_rate": 4.722564663920552e-06, "logits/chosen": -1.1656674146652222, "logits/rejected": -1.1801152229309082, "logps/chosen": -371.138427734375, "logps/rejected": -3470.68408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5501183271408081, "rewards/margins": 31.46108627319336, "rewards/rejected": -32.01120376586914, "step": 5410 }, { "epoch": 23.66812227074236, "grad_norm": 1.4054501439124265e-05, "learning_rate": 4.720817298304785e-06, "logits/chosen": -1.1546108722686768, "logits/rejected": -1.1713950634002686, "logps/chosen": -339.54498291015625, "logps/rejected": -3837.18505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7105956673622131, "rewards/margins": 34.823402404785156, "rewards/rejected": -35.53400421142578, "step": 5420 }, { "epoch": 23.7117903930131, "grad_norm": 7.950144081103607e-06, "learning_rate": 4.719064772608135e-06, "logits/chosen": -1.1722958087921143, "logits/rejected": -1.1946077346801758, "logps/chosen": -354.00091552734375, "logps/rejected": -3430.56201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6521971225738525, "rewards/margins": 31.03391456604004, "rewards/rejected": -31.686107635498047, "step": 5430 }, { "epoch": 23.75545851528384, "grad_norm": 2.841076343163688e-05, "learning_rate": 4.7173070909026015e-06, "logits/chosen": -1.2080512046813965, "logits/rejected": -1.2361419200897217, "logps/chosen": -365.52178955078125, "logps/rejected": -3655.32421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5256883502006531, "rewards/margins": 33.294342041015625, "rewards/rejected": -33.820030212402344, "step": 5440 }, { "epoch": 23.799126637554586, "grad_norm": 1.3333286720038966e-05, "learning_rate": 4.7155442572721685e-06, "logits/chosen": -1.185363531112671, "logits/rejected": -1.2312899827957153, "logps/chosen": -363.8575134277344, "logps/rejected": -3777.408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6116653680801392, "rewards/margins": 34.323768615722656, "rewards/rejected": -34.93543243408203, "step": 5450 }, { "epoch": 23.842794759825328, "grad_norm": 2.1952996859455204e-05, "learning_rate": 4.71377627581279e-06, "logits/chosen": -1.1482139825820923, "logits/rejected": -1.1361101865768433, "logps/chosen": -345.80645751953125, "logps/rejected": -3663.700439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6855758428573608, "rewards/margins": 33.121437072753906, "rewards/rejected": -33.80701446533203, "step": 5460 }, { "epoch": 23.88646288209607, "grad_norm": 0.0001295068563311481, "learning_rate": 4.71200315063238e-06, "logits/chosen": -1.223639965057373, "logits/rejected": -1.2426375150680542, "logps/chosen": -356.92041015625, "logps/rejected": -3842.46533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5386031866073608, "rewards/margins": 34.952659606933594, "rewards/rejected": -35.49126052856445, "step": 5470 }, { "epoch": 23.930131004366814, "grad_norm": 3.972588428168292e-06, "learning_rate": 4.7102248858508046e-06, "logits/chosen": -1.2066301107406616, "logits/rejected": -1.2001632452011108, "logps/chosen": -333.0633239746094, "logps/rejected": -3786.424560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5815504193305969, "rewards/margins": 34.436561584472656, "rewards/rejected": -35.01811599731445, "step": 5480 }, { "epoch": 23.973799126637555, "grad_norm": 1.5583472305756818e-05, "learning_rate": 4.7084414855998706e-06, "logits/chosen": -1.1604702472686768, "logits/rejected": -1.133777141571045, "logps/chosen": -334.98956298828125, "logps/rejected": -3875.18359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5021079778671265, "rewards/margins": 35.26410675048828, "rewards/rejected": -35.766212463378906, "step": 5490 }, { "epoch": 24.017467248908297, "grad_norm": 4.7816555074284834e-06, "learning_rate": 4.706652954023318e-06, "logits/chosen": -1.1618189811706543, "logits/rejected": -1.1846325397491455, "logps/chosen": -344.52825927734375, "logps/rejected": -3578.780029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5689977407455444, "rewards/margins": 32.44840621948242, "rewards/rejected": -33.01741027832031, "step": 5500 }, { "epoch": 24.061135371179038, "grad_norm": 0.0002548094827832198, "learning_rate": 4.704859295276811e-06, "logits/chosen": -1.1679669618606567, "logits/rejected": -1.174860954284668, "logps/chosen": -324.2937316894531, "logps/rejected": -3851.452392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6078824996948242, "rewards/margins": 35.06720733642578, "rewards/rejected": -35.67509460449219, "step": 5510 }, { "epoch": 24.104803493449783, "grad_norm": 0.00012458358244240645, "learning_rate": 4.703060513527924e-06, "logits/chosen": -1.1603645086288452, "logits/rejected": -1.2004879713058472, "logps/chosen": -353.1865234375, "logps/rejected": -3979.815673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8269935846328735, "rewards/margins": 35.98107147216797, "rewards/rejected": -36.80806350708008, "step": 5520 }, { "epoch": 24.148471615720524, "grad_norm": 7.550353756671909e-06, "learning_rate": 4.701256612956137e-06, "logits/chosen": -1.2048131227493286, "logits/rejected": -1.2525293827056885, "logps/chosen": -366.7615966796875, "logps/rejected": -3455.44677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5672973394393921, "rewards/margins": 31.38223648071289, "rewards/rejected": -31.949535369873047, "step": 5530 }, { "epoch": 24.192139737991265, "grad_norm": 1.6303475936690628e-05, "learning_rate": 4.699447597752821e-06, "logits/chosen": -1.2191699743270874, "logits/rejected": -1.2105731964111328, "logps/chosen": -336.58453369140625, "logps/rejected": -3879.042236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6434630155563354, "rewards/margins": 35.22203826904297, "rewards/rejected": -35.865501403808594, "step": 5540 }, { "epoch": 24.23580786026201, "grad_norm": 8.444455922934454e-06, "learning_rate": 4.697633472121234e-06, "logits/chosen": -1.1988381147384644, "logits/rejected": -1.2352583408355713, "logps/chosen": -327.34014892578125, "logps/rejected": -3351.124267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5131651163101196, "rewards/margins": 30.396068572998047, "rewards/rejected": -30.909236907958984, "step": 5550 }, { "epoch": 24.27947598253275, "grad_norm": 6.364955959839307e-06, "learning_rate": 4.695814240276506e-06, "logits/chosen": -1.2134555578231812, "logits/rejected": -1.1846438646316528, "logps/chosen": -302.1916809082031, "logps/rejected": -4011.93798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.49501147866249084, "rewards/margins": 36.606056213378906, "rewards/rejected": -37.1010627746582, "step": 5560 }, { "epoch": 24.323144104803493, "grad_norm": 2.8750755005730604e-05, "learning_rate": 4.693989906445633e-06, "logits/chosen": -1.1893084049224854, "logits/rejected": -1.2186152935028076, "logps/chosen": -352.44744873046875, "logps/rejected": -3327.18359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6274372935295105, "rewards/margins": 30.057254791259766, "rewards/rejected": -30.6846923828125, "step": 5570 }, { "epoch": 24.366812227074234, "grad_norm": 0.00035610013899056936, "learning_rate": 4.692160474867463e-06, "logits/chosen": -1.1822307109832764, "logits/rejected": -1.1546939611434937, "logps/chosen": -322.09259033203125, "logps/rejected": -3625.3984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4860486090183258, "rewards/margins": 32.95624542236328, "rewards/rejected": -33.442298889160156, "step": 5580 }, { "epoch": 24.41048034934498, "grad_norm": 0.00027559186814083645, "learning_rate": 4.69032594979269e-06, "logits/chosen": -1.2139981985092163, "logits/rejected": -1.2369755506515503, "logps/chosen": -336.81829833984375, "logps/rejected": -3898.530517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5284819006919861, "rewards/margins": 35.51451873779297, "rewards/rejected": -36.043006896972656, "step": 5590 }, { "epoch": 24.45414847161572, "grad_norm": 1.9177691187399185e-05, "learning_rate": 4.688486335483844e-06, "logits/chosen": -1.1505731344223022, "logits/rejected": -1.2071573734283447, "logps/chosen": -360.30181884765625, "logps/rejected": -3491.240966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5969746708869934, "rewards/margins": 31.74094581604004, "rewards/rejected": -32.33791732788086, "step": 5600 }, { "epoch": 24.497816593886462, "grad_norm": 0.0007187276404929076, "learning_rate": 4.686641636215278e-06, "logits/chosen": -1.174551248550415, "logits/rejected": -1.161385178565979, "logps/chosen": -306.07916259765625, "logps/rejected": -4234.64306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6073982119560242, "rewards/margins": 38.68224334716797, "rewards/rejected": -39.28964614868164, "step": 5610 }, { "epoch": 24.541484716157207, "grad_norm": 3.996170656059125e-05, "learning_rate": 4.684791856273161e-06, "logits/chosen": -1.1499732732772827, "logits/rejected": -1.1893564462661743, "logps/chosen": -352.17578125, "logps/rejected": -3382.14453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.589980959892273, "rewards/margins": 30.65545654296875, "rewards/rejected": -31.245433807373047, "step": 5620 }, { "epoch": 24.58515283842795, "grad_norm": 2.234622425315211e-05, "learning_rate": 4.6829369999554664e-06, "logits/chosen": -1.1470940113067627, "logits/rejected": -1.169297218322754, "logps/chosen": -353.0755920410156, "logps/rejected": -3377.192138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5694118738174438, "rewards/margins": 30.589651107788086, "rewards/rejected": -31.1590633392334, "step": 5630 }, { "epoch": 24.62882096069869, "grad_norm": 7.495922818711074e-06, "learning_rate": 4.681077071571962e-06, "logits/chosen": -1.1436012983322144, "logits/rejected": -1.1843605041503906, "logps/chosen": -338.5530090332031, "logps/rejected": -3808.97265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5436632037162781, "rewards/margins": 34.81926727294922, "rewards/rejected": -35.3629264831543, "step": 5640 }, { "epoch": 24.67248908296943, "grad_norm": 9.002428499162918e-06, "learning_rate": 4.679212075444203e-06, "logits/chosen": -1.1863281726837158, "logits/rejected": -1.1874008178710938, "logps/chosen": -350.5816345214844, "logps/rejected": -3671.00048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5568342208862305, "rewards/margins": 33.271766662597656, "rewards/rejected": -33.8286018371582, "step": 5650 }, { "epoch": 24.716157205240176, "grad_norm": 7.348937532312367e-05, "learning_rate": 4.677342015905517e-06, "logits/chosen": -1.1536147594451904, "logits/rejected": -1.183640956878662, "logps/chosen": -340.2001037597656, "logps/rejected": -3680.763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5578964948654175, "rewards/margins": 33.371192932128906, "rewards/rejected": -33.92908477783203, "step": 5660 }, { "epoch": 24.759825327510917, "grad_norm": 1.3041560855972846e-05, "learning_rate": 4.675466897300997e-06, "logits/chosen": -1.2502837181091309, "logits/rejected": -1.2474950551986694, "logps/chosen": -333.3052062988281, "logps/rejected": -3852.69775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5783816576004028, "rewards/margins": 34.959381103515625, "rewards/rejected": -35.53776168823242, "step": 5670 }, { "epoch": 24.80349344978166, "grad_norm": 1.9653383356513112e-05, "learning_rate": 4.673586723987491e-06, "logits/chosen": -1.1702178716659546, "logits/rejected": -1.2026498317718506, "logps/chosen": -352.0107727050781, "logps/rejected": -3458.15234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5394114255905151, "rewards/margins": 31.435211181640625, "rewards/rejected": -31.974620819091797, "step": 5680 }, { "epoch": 24.8471615720524, "grad_norm": 0.00013541118617798267, "learning_rate": 4.671701500333593e-06, "logits/chosen": -1.169298529624939, "logits/rejected": -1.2534996271133423, "logps/chosen": -363.94268798828125, "logps/rejected": -3387.233642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6432844400405884, "rewards/margins": 30.584192276000977, "rewards/rejected": -31.22747230529785, "step": 5690 }, { "epoch": 24.890829694323145, "grad_norm": 1.1599130658792923e-05, "learning_rate": 4.669811230719629e-06, "logits/chosen": -1.189867377281189, "logits/rejected": -1.2226089239120483, "logps/chosen": -324.47637939453125, "logps/rejected": -3469.52734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5224462747573853, "rewards/margins": 31.559017181396484, "rewards/rejected": -32.08146286010742, "step": 5700 }, { "epoch": 24.934497816593886, "grad_norm": 3.7927207447877186e-05, "learning_rate": 4.667915919537651e-06, "logits/chosen": -1.182998776435852, "logits/rejected": -1.2182395458221436, "logps/chosen": -330.25787353515625, "logps/rejected": -3697.58203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6528623700141907, "rewards/margins": 33.543968200683594, "rewards/rejected": -34.19682693481445, "step": 5710 }, { "epoch": 24.978165938864628, "grad_norm": 4.745936908923507e-06, "learning_rate": 4.666015571191426e-06, "logits/chosen": -1.16318678855896, "logits/rejected": -1.2107188701629639, "logps/chosen": -347.2037353515625, "logps/rejected": -3399.56884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5119761228561401, "rewards/margins": 30.962108612060547, "rewards/rejected": -31.474084854125977, "step": 5720 }, { "epoch": 25.021834061135372, "grad_norm": 3.1918338656925596e-06, "learning_rate": 4.664110190096421e-06, "logits/chosen": -1.2142025232315063, "logits/rejected": -1.2603459358215332, "logps/chosen": -343.06658935546875, "logps/rejected": -3930.35205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5947311520576477, "rewards/margins": 35.788475036621094, "rewards/rejected": -36.383209228515625, "step": 5730 }, { "epoch": 25.065502183406114, "grad_norm": 4.701037840684077e-06, "learning_rate": 4.662199780679801e-06, "logits/chosen": -1.1945098638534546, "logits/rejected": -1.1927108764648438, "logps/chosen": -328.9498596191406, "logps/rejected": -3917.04248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6507487893104553, "rewards/margins": 35.592628479003906, "rewards/rejected": -36.243385314941406, "step": 5740 }, { "epoch": 25.109170305676855, "grad_norm": 1.067071342834299e-05, "learning_rate": 4.660284347380412e-06, "logits/chosen": -1.1958644390106201, "logits/rejected": -1.2502437829971313, "logps/chosen": -355.8912353515625, "logps/rejected": -3753.93408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5611786842346191, "rewards/margins": 34.238006591796875, "rewards/rejected": -34.7991828918457, "step": 5750 }, { "epoch": 25.152838427947597, "grad_norm": 1.1972606736233825e-05, "learning_rate": 4.658363894648773e-06, "logits/chosen": -1.1793768405914307, "logits/rejected": -1.2097008228302002, "logps/chosen": -347.1600646972656, "logps/rejected": -3595.151611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6493815183639526, "rewards/margins": 32.68815231323242, "rewards/rejected": -33.3375358581543, "step": 5760 }, { "epoch": 25.19650655021834, "grad_norm": 1.437525284493936e-05, "learning_rate": 4.656438426947068e-06, "logits/chosen": -1.228337287902832, "logits/rejected": -1.2976044416427612, "logps/chosen": -350.86346435546875, "logps/rejected": -4147.1240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5565715432167053, "rewards/margins": 37.88048553466797, "rewards/rejected": -38.43705749511719, "step": 5770 }, { "epoch": 25.240174672489083, "grad_norm": 0.00013246179081171974, "learning_rate": 4.654507948749128e-06, "logits/chosen": -1.2199633121490479, "logits/rejected": -1.2382429838180542, "logps/chosen": -356.3660888671875, "logps/rejected": -3607.341796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7019289135932922, "rewards/margins": 32.7265625, "rewards/rejected": -33.428489685058594, "step": 5780 }, { "epoch": 25.283842794759824, "grad_norm": 2.734999771134448e-05, "learning_rate": 4.6525724645404335e-06, "logits/chosen": -1.2151048183441162, "logits/rejected": -1.2174996137619019, "logps/chosen": -308.8096618652344, "logps/rejected": -3866.04052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5744065046310425, "rewards/margins": 35.203731536865234, "rewards/rejected": -35.77813720703125, "step": 5790 }, { "epoch": 25.32751091703057, "grad_norm": 0.000386345211112999, "learning_rate": 4.65063197881809e-06, "logits/chosen": -1.1578586101531982, "logits/rejected": -1.2143924236297607, "logps/chosen": -327.99200439453125, "logps/rejected": -3551.16259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5650061368942261, "rewards/margins": 32.28638458251953, "rewards/rejected": -32.85139083862305, "step": 5800 }, { "epoch": 25.37117903930131, "grad_norm": 0.00022320668690464904, "learning_rate": 4.6486864960908275e-06, "logits/chosen": -1.206616759300232, "logits/rejected": -1.2628695964813232, "logps/chosen": -338.5455322265625, "logps/rejected": -3658.835205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5334581136703491, "rewards/margins": 33.34501647949219, "rewards/rejected": -33.878475189208984, "step": 5810 }, { "epoch": 25.41484716157205, "grad_norm": 5.31029683204418e-06, "learning_rate": 4.6467360208789844e-06, "logits/chosen": -1.1855831146240234, "logits/rejected": -1.202300786972046, "logps/chosen": -323.9513244628906, "logps/rejected": -3815.50244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5840621590614319, "rewards/margins": 34.638999938964844, "rewards/rejected": -35.223060607910156, "step": 5820 }, { "epoch": 25.458515283842793, "grad_norm": 9.892009044467745e-06, "learning_rate": 4.6447805577145024e-06, "logits/chosen": -1.2463500499725342, "logits/rejected": -1.2694003582000732, "logps/chosen": -356.9418029785156, "logps/rejected": -3695.594482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5711425542831421, "rewards/margins": 33.641353607177734, "rewards/rejected": -34.212501525878906, "step": 5830 }, { "epoch": 25.502183406113538, "grad_norm": 4.8178089828511585e-06, "learning_rate": 4.642820111140908e-06, "logits/chosen": -1.216831922531128, "logits/rejected": -1.2665334939956665, "logps/chosen": -332.75189208984375, "logps/rejected": -3706.40185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5888631343841553, "rewards/margins": 33.745304107666016, "rewards/rejected": -34.33416748046875, "step": 5840 }, { "epoch": 25.54585152838428, "grad_norm": 3.1891834016884524e-06, "learning_rate": 4.64085468571331e-06, "logits/chosen": -1.2207882404327393, "logits/rejected": -1.230215311050415, "logps/chosen": -350.1354675292969, "logps/rejected": -3511.860595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.71825110912323, "rewards/margins": 31.722265243530273, "rewards/rejected": -32.440513610839844, "step": 5850 }, { "epoch": 25.58951965065502, "grad_norm": 2.8261634909829975e-06, "learning_rate": 4.638884285998386e-06, "logits/chosen": -1.1995891332626343, "logits/rejected": -1.2632849216461182, "logps/chosen": -361.7134094238281, "logps/rejected": -3524.52783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.638340413570404, "rewards/margins": 31.8640079498291, "rewards/rejected": -32.502349853515625, "step": 5860 }, { "epoch": 25.633187772925766, "grad_norm": 1.610684777388983e-05, "learning_rate": 4.636908916574371e-06, "logits/chosen": -1.213470458984375, "logits/rejected": -1.2568113803863525, "logps/chosen": -340.677001953125, "logps/rejected": -3888.95263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5540992021560669, "rewards/margins": 35.39733123779297, "rewards/rejected": -35.95143127441406, "step": 5870 }, { "epoch": 25.676855895196507, "grad_norm": 0.00010854910956281505, "learning_rate": 4.634928582031044e-06, "logits/chosen": -1.1762501001358032, "logits/rejected": -1.2134779691696167, "logps/chosen": -328.57794189453125, "logps/rejected": -4069.12939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6370394229888916, "rewards/margins": 37.16755294799805, "rewards/rejected": -37.804588317871094, "step": 5880 }, { "epoch": 25.72052401746725, "grad_norm": 7.099275890766825e-06, "learning_rate": 4.632943286969724e-06, "logits/chosen": -1.1902116537094116, "logits/rejected": -1.2327381372451782, "logps/chosen": -323.7455139160156, "logps/rejected": -3875.51904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5179435014724731, "rewards/margins": 35.377960205078125, "rewards/rejected": -35.89590072631836, "step": 5890 }, { "epoch": 25.76419213973799, "grad_norm": 5.7618141057584116e-06, "learning_rate": 4.630953036003255e-06, "logits/chosen": -1.2115107774734497, "logits/rejected": -1.2682751417160034, "logps/chosen": -359.8846740722656, "logps/rejected": -3579.13525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6358590126037598, "rewards/margins": 32.40993118286133, "rewards/rejected": -33.0457878112793, "step": 5900 }, { "epoch": 25.807860262008735, "grad_norm": 3.626550244239798e-06, "learning_rate": 4.6289578337559954e-06, "logits/chosen": -1.1771113872528076, "logits/rejected": -1.192408561706543, "logps/chosen": -324.5118408203125, "logps/rejected": -3773.791748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5997554063796997, "rewards/margins": 34.40519332885742, "rewards/rejected": -35.00495147705078, "step": 5910 }, { "epoch": 25.851528384279476, "grad_norm": 5.75986492989502e-06, "learning_rate": 4.626957684863809e-06, "logits/chosen": -1.1866337060928345, "logits/rejected": -1.237210988998413, "logps/chosen": -341.6529541015625, "logps/rejected": -3776.612548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.552283763885498, "rewards/margins": 34.38853454589844, "rewards/rejected": -34.940818786621094, "step": 5920 }, { "epoch": 25.895196506550217, "grad_norm": 4.572379207824682e-06, "learning_rate": 4.6249525939740515e-06, "logits/chosen": -1.1704292297363281, "logits/rejected": -1.2452142238616943, "logps/chosen": -342.6253662109375, "logps/rejected": -3602.39501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6068056225776672, "rewards/margins": 32.75109100341797, "rewards/rejected": -33.3578987121582, "step": 5930 }, { "epoch": 25.938864628820962, "grad_norm": 9.251388769310343e-05, "learning_rate": 4.622942565745563e-06, "logits/chosen": -1.1445882320404053, "logits/rejected": -1.2267838716506958, "logps/chosen": -374.96893310546875, "logps/rejected": -3379.029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7226255536079407, "rewards/margins": 30.5255184173584, "rewards/rejected": -31.24814796447754, "step": 5940 }, { "epoch": 25.982532751091703, "grad_norm": 0.0002520495382107043, "learning_rate": 4.620927604848654e-06, "logits/chosen": -1.1936110258102417, "logits/rejected": -1.2144155502319336, "logps/chosen": -336.4920959472656, "logps/rejected": -4022.440185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.710148811340332, "rewards/margins": 36.68120193481445, "rewards/rejected": -37.39134979248047, "step": 5950 }, { "epoch": 26.026200873362445, "grad_norm": 0.00025030665964490906, "learning_rate": 4.618907715965098e-06, "logits/chosen": -1.2468277215957642, "logits/rejected": -1.2972233295440674, "logps/chosen": -359.492919921875, "logps/rejected": -4176.11962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6437286138534546, "rewards/margins": 38.08871078491211, "rewards/rejected": -38.732444763183594, "step": 5960 }, { "epoch": 26.069868995633186, "grad_norm": 0.00016849158458711843, "learning_rate": 4.616882903788118e-06, "logits/chosen": -1.1778112649917603, "logits/rejected": -1.305408000946045, "logps/chosen": -365.6324157714844, "logps/rejected": -3646.240966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5428453683853149, "rewards/margins": 33.270896911621094, "rewards/rejected": -33.813743591308594, "step": 5970 }, { "epoch": 26.11353711790393, "grad_norm": 1.9606119282896716e-05, "learning_rate": 4.614853173022374e-06, "logits/chosen": -1.1677038669586182, "logits/rejected": -1.1694481372833252, "logps/chosen": -333.0330810546875, "logps/rejected": -3706.8515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6710373759269714, "rewards/margins": 33.59252166748047, "rewards/rejected": -34.26355743408203, "step": 5980 }, { "epoch": 26.157205240174672, "grad_norm": 4.39142640534814e-06, "learning_rate": 4.612818528383958e-06, "logits/chosen": -1.1849294900894165, "logits/rejected": -1.2365798950195312, "logps/chosen": -324.75775146484375, "logps/rejected": -3989.778564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.499702513217926, "rewards/margins": 36.47930145263672, "rewards/rejected": -36.97901153564453, "step": 5990 }, { "epoch": 26.200873362445414, "grad_norm": 5.356954847223359e-06, "learning_rate": 4.610778974600376e-06, "logits/chosen": -1.1747087240219116, "logits/rejected": -1.2212879657745361, "logps/chosen": -343.0284423828125, "logps/rejected": -4003.79345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7759420275688171, "rewards/margins": 36.362770080566406, "rewards/rejected": -37.138710021972656, "step": 6000 }, { "epoch": 26.24454148471616, "grad_norm": 3.227557261364749e-06, "learning_rate": 4.608734516410545e-06, "logits/chosen": -1.1490503549575806, "logits/rejected": -1.2559362649917603, "logps/chosen": -374.9457092285156, "logps/rejected": -3344.158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6012547016143799, "rewards/margins": 30.29276466369629, "rewards/rejected": -30.894018173217773, "step": 6010 }, { "epoch": 26.2882096069869, "grad_norm": 2.0208626668687886e-05, "learning_rate": 4.606685158564771e-06, "logits/chosen": -1.1859310865402222, "logits/rejected": -1.2168041467666626, "logps/chosen": -344.1734313964844, "logps/rejected": -3884.586669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6659940481185913, "rewards/margins": 35.428443908691406, "rewards/rejected": -36.09444046020508, "step": 6020 }, { "epoch": 26.33187772925764, "grad_norm": 3.0955476896403433e-06, "learning_rate": 4.6046309058247514e-06, "logits/chosen": -1.2538869380950928, "logits/rejected": -1.265487790107727, "logps/chosen": -322.70721435546875, "logps/rejected": -4133.52880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6576775312423706, "rewards/margins": 37.61731719970703, "rewards/rejected": -38.27499008178711, "step": 6030 }, { "epoch": 26.375545851528383, "grad_norm": 0.00016246509751651717, "learning_rate": 4.602571762963551e-06, "logits/chosen": -1.2326607704162598, "logits/rejected": -1.2989423274993896, "logps/chosen": -348.9615478515625, "logps/rejected": -4011.755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7420942783355713, "rewards/margins": 36.50444412231445, "rewards/rejected": -37.24653625488281, "step": 6040 }, { "epoch": 26.419213973799128, "grad_norm": 9.373049408458929e-06, "learning_rate": 4.600507734765601e-06, "logits/chosen": -1.2301523685455322, "logits/rejected": -1.2718207836151123, "logps/chosen": -326.7671203613281, "logps/rejected": -4194.9462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6003162264823914, "rewards/margins": 38.35254669189453, "rewards/rejected": -38.95286560058594, "step": 6050 }, { "epoch": 26.46288209606987, "grad_norm": 3.201802991793056e-06, "learning_rate": 4.59843882602668e-06, "logits/chosen": -1.166656255722046, "logits/rejected": -1.2260305881500244, "logps/chosen": -348.787353515625, "logps/rejected": -3704.285888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6500599384307861, "rewards/margins": 33.64190673828125, "rewards/rejected": -34.291969299316406, "step": 6060 }, { "epoch": 26.50655021834061, "grad_norm": 0.0002253804363396325, "learning_rate": 4.59636504155391e-06, "logits/chosen": -1.2623107433319092, "logits/rejected": -1.3015730381011963, "logps/chosen": -352.83648681640625, "logps/rejected": -3751.524169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6245170831680298, "rewards/margins": 34.055728912353516, "rewards/rejected": -34.68024444580078, "step": 6070 }, { "epoch": 26.550218340611355, "grad_norm": 7.303510225693688e-05, "learning_rate": 4.5942863861657395e-06, "logits/chosen": -1.1905882358551025, "logits/rejected": -1.2030316591262817, "logps/chosen": -336.8035583496094, "logps/rejected": -3974.161376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6531264185905457, "rewards/margins": 36.1732063293457, "rewards/rejected": -36.826332092285156, "step": 6080 }, { "epoch": 26.593886462882097, "grad_norm": 0.00012049002771926831, "learning_rate": 4.592202864691936e-06, "logits/chosen": -1.177721381187439, "logits/rejected": -1.2369654178619385, "logps/chosen": -346.5216064453125, "logps/rejected": -3547.76025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5420833230018616, "rewards/margins": 32.302024841308594, "rewards/rejected": -32.84410858154297, "step": 6090 }, { "epoch": 26.637554585152838, "grad_norm": 1.5356971700295646e-05, "learning_rate": 4.590114481973572e-06, "logits/chosen": -1.2318854331970215, "logits/rejected": -1.3231500387191772, "logps/chosen": -359.9068908691406, "logps/rejected": -3927.59130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6314152479171753, "rewards/margins": 35.77869415283203, "rewards/rejected": -36.41010665893555, "step": 6100 }, { "epoch": 26.68122270742358, "grad_norm": 2.253966786364089e-05, "learning_rate": 4.588021242863018e-06, "logits/chosen": -1.1995757818222046, "logits/rejected": -1.2825086116790771, "logps/chosen": -346.27081298828125, "logps/rejected": -4196.1630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.721103310585022, "rewards/margins": 38.27753448486328, "rewards/rejected": -38.99863815307617, "step": 6110 }, { "epoch": 26.724890829694324, "grad_norm": 7.256422611407141e-05, "learning_rate": 4.585923152223923e-06, "logits/chosen": -1.1994619369506836, "logits/rejected": -1.2455025911331177, "logps/chosen": -328.4881896972656, "logps/rejected": -4281.453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8125243186950684, "rewards/margins": 38.885032653808594, "rewards/rejected": -39.69755554199219, "step": 6120 }, { "epoch": 26.768558951965066, "grad_norm": 0.0005094251683652847, "learning_rate": 4.583820214931215e-06, "logits/chosen": -1.2053035497665405, "logits/rejected": -1.2844603061676025, "logps/chosen": -344.43731689453125, "logps/rejected": -3718.52197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6048862338066101, "rewards/margins": 33.7392578125, "rewards/rejected": -34.344146728515625, "step": 6130 }, { "epoch": 26.812227074235807, "grad_norm": 4.053497238198217e-06, "learning_rate": 4.5817124358710785e-06, "logits/chosen": -1.1980295181274414, "logits/rejected": -1.250301718711853, "logps/chosen": -322.24847412109375, "logps/rejected": -3983.12255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5829660892486572, "rewards/margins": 36.32219696044922, "rewards/rejected": -36.90515899658203, "step": 6140 }, { "epoch": 26.85589519650655, "grad_norm": 3.991444457820592e-06, "learning_rate": 4.57959981994095e-06, "logits/chosen": -1.1612200736999512, "logits/rejected": -1.2130215167999268, "logps/chosen": -340.02288818359375, "logps/rejected": -3816.36962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5776423811912537, "rewards/margins": 34.79607009887695, "rewards/rejected": -35.37371063232422, "step": 6150 }, { "epoch": 26.899563318777293, "grad_norm": 0.0010656181169293542, "learning_rate": 4.577482372049503e-06, "logits/chosen": -1.1703450679779053, "logits/rejected": -1.257323145866394, "logps/chosen": -374.87310791015625, "logps/rejected": -3351.001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6644852757453918, "rewards/margins": 30.26698875427246, "rewards/rejected": -30.931476593017578, "step": 6160 }, { "epoch": 26.943231441048034, "grad_norm": 4.947068392900371e-06, "learning_rate": 4.5753600971166405e-06, "logits/chosen": -1.215075135231018, "logits/rejected": -1.2708227634429932, "logps/chosen": -346.0373229980469, "logps/rejected": -3841.34033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7310583591461182, "rewards/margins": 34.807735443115234, "rewards/rejected": -35.538795471191406, "step": 6170 }, { "epoch": 26.986899563318776, "grad_norm": 1.0550127492868556e-05, "learning_rate": 4.5732330000734775e-06, "logits/chosen": -1.2469532489776611, "logits/rejected": -1.2964259386062622, "logps/chosen": -349.2544860839844, "logps/rejected": -3959.052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6902323961257935, "rewards/margins": 35.9727897644043, "rewards/rejected": -36.66302490234375, "step": 6180 }, { "epoch": 27.03056768558952, "grad_norm": 3.0279738616691027e-06, "learning_rate": 4.571101085862337e-06, "logits/chosen": -1.1892614364624023, "logits/rejected": -1.2594196796417236, "logps/chosen": -356.35223388671875, "logps/rejected": -3688.96875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6138612627983093, "rewards/margins": 33.467323303222656, "rewards/rejected": -34.08118438720703, "step": 6190 }, { "epoch": 27.074235807860262, "grad_norm": 1.0568966279249495e-05, "learning_rate": 4.568964359436733e-06, "logits/chosen": -1.2050278186798096, "logits/rejected": -1.2900995016098022, "logps/chosen": -348.0448913574219, "logps/rejected": -4022.92822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6057989001274109, "rewards/margins": 36.60253143310547, "rewards/rejected": -37.20833206176758, "step": 6200 }, { "epoch": 27.117903930131003, "grad_norm": 1.547374355215032e-05, "learning_rate": 4.566822825761361e-06, "logits/chosen": -1.1956651210784912, "logits/rejected": -1.2512781620025635, "logps/chosen": -336.28369140625, "logps/rejected": -3659.53125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5371262431144714, "rewards/margins": 33.44087600708008, "rewards/rejected": -33.978004455566406, "step": 6210 }, { "epoch": 27.16157205240175, "grad_norm": 1.8408490123561346e-05, "learning_rate": 4.564676489812085e-06, "logits/chosen": -1.1955808401107788, "logits/rejected": -1.2535998821258545, "logps/chosen": -339.2187194824219, "logps/rejected": -3696.171142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5834928750991821, "rewards/margins": 33.65995788574219, "rewards/rejected": -34.24345016479492, "step": 6220 }, { "epoch": 27.20524017467249, "grad_norm": 6.1151805803838726e-06, "learning_rate": 4.562525356575929e-06, "logits/chosen": -1.2104030847549438, "logits/rejected": -1.304868459701538, "logps/chosen": -376.6513671875, "logps/rejected": -3534.90478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6713887453079224, "rewards/margins": 32.126346588134766, "rewards/rejected": -32.79773712158203, "step": 6230 }, { "epoch": 27.24890829694323, "grad_norm": 3.0098528090458558e-05, "learning_rate": 4.560369431051061e-06, "logits/chosen": -1.1694926023483276, "logits/rejected": -1.2512973546981812, "logps/chosen": -338.8404846191406, "logps/rejected": -3948.95556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6337717175483704, "rewards/margins": 35.98173522949219, "rewards/rejected": -36.615501403808594, "step": 6240 }, { "epoch": 27.292576419213972, "grad_norm": 1.0705939586919785e-05, "learning_rate": 4.558208718246787e-06, "logits/chosen": -1.201108455657959, "logits/rejected": -1.2966219186782837, "logps/chosen": -369.9951477050781, "logps/rejected": -4005.88134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6608028411865234, "rewards/margins": 36.47580337524414, "rewards/rejected": -37.1366081237793, "step": 6250 }, { "epoch": 27.336244541484717, "grad_norm": 6.822354932091061e-06, "learning_rate": 4.556043223183535e-06, "logits/chosen": -1.1200233697891235, "logits/rejected": -1.2021821737289429, "logps/chosen": -366.8541564941406, "logps/rejected": -3078.883544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6270990967750549, "rewards/margins": 27.771408081054688, "rewards/rejected": -28.39850425720215, "step": 6260 }, { "epoch": 27.37991266375546, "grad_norm": 9.937912632008366e-05, "learning_rate": 4.553872950892844e-06, "logits/chosen": -1.161240816116333, "logits/rejected": -1.2388025522232056, "logps/chosen": -340.4908447265625, "logps/rejected": -3540.42626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.59084552526474, "rewards/margins": 32.185157775878906, "rewards/rejected": -32.7760009765625, "step": 6270 }, { "epoch": 27.4235807860262, "grad_norm": 2.1295183329255534e-06, "learning_rate": 4.551697906417352e-06, "logits/chosen": -1.1849721670150757, "logits/rejected": -1.2829124927520752, "logps/chosen": -351.3813781738281, "logps/rejected": -3694.90869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5667170286178589, "rewards/margins": 33.72881317138672, "rewards/rejected": -34.29552459716797, "step": 6280 }, { "epoch": 27.467248908296945, "grad_norm": 2.038019148434869e-06, "learning_rate": 4.549518094810788e-06, "logits/chosen": -1.2296960353851318, "logits/rejected": -1.3069924116134644, "logps/chosen": -332.9064025878906, "logps/rejected": -4046.23291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6155154705047607, "rewards/margins": 36.8662109375, "rewards/rejected": -37.48172378540039, "step": 6290 }, { "epoch": 27.510917030567686, "grad_norm": 0.00016304178720669417, "learning_rate": 4.547333521137957e-06, "logits/chosen": -1.197097897529602, "logits/rejected": -1.222382664680481, "logps/chosen": -314.75653076171875, "logps/rejected": -4156.0869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6366162300109863, "rewards/margins": 37.93803787231445, "rewards/rejected": -38.57465362548828, "step": 6300 }, { "epoch": 27.554585152838428, "grad_norm": 8.660829694126949e-06, "learning_rate": 4.545144190474725e-06, "logits/chosen": -1.2068290710449219, "logits/rejected": -1.2659984827041626, "logps/chosen": -346.095458984375, "logps/rejected": -4001.08837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.652156412601471, "rewards/margins": 36.358558654785156, "rewards/rejected": -37.010719299316406, "step": 6310 }, { "epoch": 27.59825327510917, "grad_norm": 0.00022778749490770636, "learning_rate": 4.542950107908014e-06, "logits/chosen": -1.1950746774673462, "logits/rejected": -1.315103530883789, "logps/chosen": -377.8918762207031, "logps/rejected": -3640.803955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.732780933380127, "rewards/margins": 33.08643341064453, "rewards/rejected": -33.8192138671875, "step": 6320 }, { "epoch": 27.641921397379914, "grad_norm": 4.143711978350706e-05, "learning_rate": 4.540751278535788e-06, "logits/chosen": -1.1719586849212646, "logits/rejected": -1.2257734537124634, "logps/chosen": -349.5539245605469, "logps/rejected": -3487.203857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6975764036178589, "rewards/margins": 31.5290470123291, "rewards/rejected": -32.226619720458984, "step": 6330 }, { "epoch": 27.685589519650655, "grad_norm": 0.0009473969398251267, "learning_rate": 4.538547707467038e-06, "logits/chosen": -1.1495733261108398, "logits/rejected": -1.2581409215927124, "logps/chosen": -369.26080322265625, "logps/rejected": -3389.045654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7852225303649902, "rewards/margins": 30.62836265563965, "rewards/rejected": -31.413583755493164, "step": 6340 }, { "epoch": 27.729257641921397, "grad_norm": 3.942201321454867e-05, "learning_rate": 4.536339399821774e-06, "logits/chosen": -1.1845242977142334, "logits/rejected": -1.270341157913208, "logps/chosen": -366.49615478515625, "logps/rejected": -3725.896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6837798357009888, "rewards/margins": 33.811668395996094, "rewards/rejected": -34.495445251464844, "step": 6350 }, { "epoch": 27.77292576419214, "grad_norm": 0.00039409726389704776, "learning_rate": 4.534126360731008e-06, "logits/chosen": -1.1896231174468994, "logits/rejected": -1.2397558689117432, "logps/chosen": -342.6216125488281, "logps/rejected": -3692.81396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6790193319320679, "rewards/margins": 33.5132942199707, "rewards/rejected": -34.19231414794922, "step": 6360 }, { "epoch": 27.816593886462883, "grad_norm": 0.00036906361081928246, "learning_rate": 4.531908595336751e-06, "logits/chosen": -1.2246049642562866, "logits/rejected": -1.281428337097168, "logps/chosen": -351.899169921875, "logps/rejected": -3678.625732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6724026799201965, "rewards/margins": 33.35068130493164, "rewards/rejected": -34.02307891845703, "step": 6370 }, { "epoch": 27.860262008733624, "grad_norm": 4.186986188646539e-06, "learning_rate": 4.529686108791993e-06, "logits/chosen": -1.2221566438674927, "logits/rejected": -1.2765302658081055, "logps/chosen": -329.31646728515625, "logps/rejected": -4310.43310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7161601781845093, "rewards/margins": 39.42743682861328, "rewards/rejected": -40.14360046386719, "step": 6380 }, { "epoch": 27.903930131004365, "grad_norm": 8.400556458160027e-05, "learning_rate": 4.527458906260691e-06, "logits/chosen": -1.2102869749069214, "logits/rejected": -1.2734768390655518, "logps/chosen": -323.6438293457031, "logps/rejected": -4088.4609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6107405424118042, "rewards/margins": 37.21936798095703, "rewards/rejected": -37.830108642578125, "step": 6390 }, { "epoch": 27.94759825327511, "grad_norm": 3.508295772931688e-06, "learning_rate": 4.5252269929177636e-06, "logits/chosen": -1.2708090543746948, "logits/rejected": -1.337214708328247, "logps/chosen": -349.8399353027344, "logps/rejected": -3790.40234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5828974843025208, "rewards/margins": 34.62218475341797, "rewards/rejected": -35.205078125, "step": 6400 }, { "epoch": 27.99126637554585, "grad_norm": 3.315811670110605e-06, "learning_rate": 4.522990373949074e-06, "logits/chosen": -1.1719506978988647, "logits/rejected": -1.2608619928359985, "logps/chosen": -332.529296875, "logps/rejected": -4165.5283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6080942153930664, "rewards/margins": 38.140235900878906, "rewards/rejected": -38.74833297729492, "step": 6410 }, { "epoch": 28.034934497816593, "grad_norm": 4.135266016044662e-06, "learning_rate": 4.520749054551415e-06, "logits/chosen": -1.2567886114120483, "logits/rejected": -1.3020005226135254, "logps/chosen": -359.54400634765625, "logps/rejected": -3899.10400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7726985812187195, "rewards/margins": 35.28397750854492, "rewards/rejected": -36.056678771972656, "step": 6420 }, { "epoch": 28.078602620087338, "grad_norm": 6.436749166914806e-05, "learning_rate": 4.5185030399325085e-06, "logits/chosen": -1.1975791454315186, "logits/rejected": -1.3045963048934937, "logps/chosen": -351.5902099609375, "logps/rejected": -3505.1796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5393074154853821, "rewards/margins": 31.866241455078125, "rewards/rejected": -32.40555191040039, "step": 6430 }, { "epoch": 28.12227074235808, "grad_norm": 2.9637661141224305e-05, "learning_rate": 4.516252335310979e-06, "logits/chosen": -1.1609550714492798, "logits/rejected": -1.287247896194458, "logps/chosen": -363.1451110839844, "logps/rejected": -3390.631591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5948449373245239, "rewards/margins": 30.80404281616211, "rewards/rejected": -31.398889541625977, "step": 6440 }, { "epoch": 28.16593886462882, "grad_norm": 3.7433341528392098e-06, "learning_rate": 4.513996945916351e-06, "logits/chosen": -1.2187420129776, "logits/rejected": -1.2895851135253906, "logps/chosen": -351.6308898925781, "logps/rejected": -4119.08056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7037245035171509, "rewards/margins": 37.52363204956055, "rewards/rejected": -38.227359771728516, "step": 6450 }, { "epoch": 28.209606986899562, "grad_norm": 5.615333271581704e-05, "learning_rate": 4.5117368769890345e-06, "logits/chosen": -1.2351938486099243, "logits/rejected": -1.2931249141693115, "logps/chosen": -362.2550354003906, "logps/rejected": -3570.184814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7066178917884827, "rewards/margins": 32.286861419677734, "rewards/rejected": -32.99347686767578, "step": 6460 }, { "epoch": 28.253275109170307, "grad_norm": 8.595704656304528e-05, "learning_rate": 4.5094721337803115e-06, "logits/chosen": -1.1598010063171387, "logits/rejected": -1.2440314292907715, "logps/chosen": -338.21875, "logps/rejected": -3619.125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5564351081848145, "rewards/margins": 32.97862243652344, "rewards/rejected": -33.535057067871094, "step": 6470 }, { "epoch": 28.29694323144105, "grad_norm": 4.434453669472402e-06, "learning_rate": 4.5072027215523255e-06, "logits/chosen": -1.1866906881332397, "logits/rejected": -1.2654625177383423, "logps/chosen": -329.12469482421875, "logps/rejected": -3603.483642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6421784162521362, "rewards/margins": 32.72524642944336, "rewards/rejected": -33.36742401123047, "step": 6480 }, { "epoch": 28.34061135371179, "grad_norm": 2.4100053817752047e-06, "learning_rate": 4.504928645578067e-06, "logits/chosen": -1.1717290878295898, "logits/rejected": -1.3028185367584229, "logps/chosen": -329.47210693359375, "logps/rejected": -4009.012451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6242690682411194, "rewards/margins": 36.683349609375, "rewards/rejected": -37.3076171875, "step": 6490 }, { "epoch": 28.38427947598253, "grad_norm": 7.687013416207738e-05, "learning_rate": 4.502649911141364e-06, "logits/chosen": -1.2187925577163696, "logits/rejected": -1.302990198135376, "logps/chosen": -344.14788818359375, "logps/rejected": -4077.943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7111617922782898, "rewards/margins": 37.2096061706543, "rewards/rejected": -37.92076873779297, "step": 6500 }, { "epoch": 28.427947598253276, "grad_norm": 8.820245867836195e-06, "learning_rate": 4.500366523536868e-06, "logits/chosen": -1.2147890329360962, "logits/rejected": -1.3252639770507812, "logps/chosen": -336.9293518066406, "logps/rejected": -4053.52783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6244794130325317, "rewards/margins": 37.120506286621094, "rewards/rejected": -37.74498748779297, "step": 6510 }, { "epoch": 28.471615720524017, "grad_norm": 4.727303573948493e-06, "learning_rate": 4.498078488070044e-06, "logits/chosen": -1.257491111755371, "logits/rejected": -1.3280233144760132, "logps/chosen": -346.58624267578125, "logps/rejected": -4023.212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6859235167503357, "rewards/margins": 36.639015197753906, "rewards/rejected": -37.3249397277832, "step": 6520 }, { "epoch": 28.51528384279476, "grad_norm": 4.408785845160207e-06, "learning_rate": 4.495785810057151e-06, "logits/chosen": -1.2447090148925781, "logits/rejected": -1.3117156028747559, "logps/chosen": -325.54595947265625, "logps/rejected": -3990.737060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.613930344581604, "rewards/margins": 36.37934494018555, "rewards/rejected": -36.9932746887207, "step": 6530 }, { "epoch": 28.558951965065503, "grad_norm": 3.4299694811311397e-06, "learning_rate": 4.493488494825242e-06, "logits/chosen": -1.1705399751663208, "logits/rejected": -1.2480192184448242, "logps/chosen": -326.5218811035156, "logps/rejected": -4148.61962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6895171999931335, "rewards/margins": 37.82258987426758, "rewards/rejected": -38.512107849121094, "step": 6540 }, { "epoch": 28.602620087336245, "grad_norm": 4.443849042618299e-06, "learning_rate": 4.49118654771214e-06, "logits/chosen": -1.2565722465515137, "logits/rejected": -1.3302333354949951, "logps/chosen": -346.45147705078125, "logps/rejected": -4132.744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7319974899291992, "rewards/margins": 37.6983528137207, "rewards/rejected": -38.43034744262695, "step": 6550 }, { "epoch": 28.646288209606986, "grad_norm": 7.5120246762787945e-06, "learning_rate": 4.488879974066432e-06, "logits/chosen": -1.1917669773101807, "logits/rejected": -1.2832090854644775, "logps/chosen": -338.3736267089844, "logps/rejected": -3860.391845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7066457867622375, "rewards/margins": 35.13323974609375, "rewards/rejected": -35.83988952636719, "step": 6560 }, { "epoch": 28.68995633187773, "grad_norm": 3.6336622673381195e-06, "learning_rate": 4.486568779247453e-06, "logits/chosen": -1.1952917575836182, "logits/rejected": -1.343418836593628, "logps/chosen": -389.01995849609375, "logps/rejected": -3822.05029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9244363903999329, "rewards/margins": 34.559974670410156, "rewards/rejected": -35.48440933227539, "step": 6570 }, { "epoch": 28.733624454148472, "grad_norm": 3.027643250190629e-06, "learning_rate": 4.484252968625278e-06, "logits/chosen": -1.2196810245513916, "logits/rejected": -1.2729055881500244, "logps/chosen": -317.8720703125, "logps/rejected": -3899.75634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6793792843818665, "rewards/margins": 35.463523864746094, "rewards/rejected": -36.14290237426758, "step": 6580 }, { "epoch": 28.777292576419214, "grad_norm": 2.3809084596901343e-06, "learning_rate": 4.4819325475807044e-06, "logits/chosen": -1.2499805688858032, "logits/rejected": -1.3246351480484009, "logps/chosen": -330.0541687011719, "logps/rejected": -4067.4140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6103534698486328, "rewards/margins": 37.065059661865234, "rewards/rejected": -37.6754150390625, "step": 6590 }, { "epoch": 28.820960698689955, "grad_norm": 6.953602798491296e-05, "learning_rate": 4.479607521505244e-06, "logits/chosen": -1.2406384944915771, "logits/rejected": -1.343422293663025, "logps/chosen": -366.58221435546875, "logps/rejected": -3920.35888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6509696245193481, "rewards/margins": 35.793617248535156, "rewards/rejected": -36.4445915222168, "step": 6600 }, { "epoch": 28.8646288209607, "grad_norm": 4.296640729648712e-06, "learning_rate": 4.477277895801105e-06, "logits/chosen": -1.1967227458953857, "logits/rejected": -1.304254174232483, "logps/chosen": -358.6907653808594, "logps/rejected": -3613.76318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6536988019943237, "rewards/margins": 32.76278305053711, "rewards/rejected": -33.41648483276367, "step": 6610 }, { "epoch": 28.90829694323144, "grad_norm": 3.7006596766183326e-06, "learning_rate": 4.474943675881188e-06, "logits/chosen": -1.1875131130218506, "logits/rejected": -1.3219764232635498, "logps/chosen": -391.54522705078125, "logps/rejected": -3599.59130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8740888833999634, "rewards/margins": 32.50812530517578, "rewards/rejected": -33.38221740722656, "step": 6620 }, { "epoch": 28.951965065502183, "grad_norm": 2.8741706511292656e-06, "learning_rate": 4.472604867169064e-06, "logits/chosen": -1.2427794933319092, "logits/rejected": -1.294601321220398, "logps/chosen": -359.63494873046875, "logps/rejected": -4122.30859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9206622838973999, "rewards/margins": 37.2613410949707, "rewards/rejected": -38.18199920654297, "step": 6630 }, { "epoch": 28.995633187772924, "grad_norm": 5.927848242223293e-06, "learning_rate": 4.470261475098968e-06, "logits/chosen": -1.2327418327331543, "logits/rejected": -1.3017427921295166, "logps/chosen": -339.7654724121094, "logps/rejected": -3850.817138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.673488199710846, "rewards/margins": 34.93653106689453, "rewards/rejected": -35.61001968383789, "step": 6640 }, { "epoch": 29.03930131004367, "grad_norm": 3.4820861383014395e-06, "learning_rate": 4.467913505115783e-06, "logits/chosen": -1.1883196830749512, "logits/rejected": -1.3068063259124756, "logps/chosen": -384.0401916503906, "logps/rejected": -3483.91796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7840637564659119, "rewards/margins": 31.477447509765625, "rewards/rejected": -32.261512756347656, "step": 6650 }, { "epoch": 29.08296943231441, "grad_norm": 3.1068728375993545e-06, "learning_rate": 4.465560962675032e-06, "logits/chosen": -1.2310645580291748, "logits/rejected": -1.3424257040023804, "logps/chosen": -368.6910705566406, "logps/rejected": -4043.99169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7317205667495728, "rewards/margins": 36.71860122680664, "rewards/rejected": -37.45032501220703, "step": 6660 }, { "epoch": 29.12663755458515, "grad_norm": 2.2428225434078614e-05, "learning_rate": 4.463203853242856e-06, "logits/chosen": -1.2232245206832886, "logits/rejected": -1.3527734279632568, "logps/chosen": -369.6368103027344, "logps/rejected": -3980.14697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7559410929679871, "rewards/margins": 36.15554428100586, "rewards/rejected": -36.9114875793457, "step": 6670 }, { "epoch": 29.170305676855897, "grad_norm": 2.1814565766633238e-05, "learning_rate": 4.4608421822960165e-06, "logits/chosen": -1.2020597457885742, "logits/rejected": -1.3471509218215942, "logps/chosen": -375.37530517578125, "logps/rejected": -3585.99072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7296799421310425, "rewards/margins": 32.468196868896484, "rewards/rejected": -33.1978759765625, "step": 6680 }, { "epoch": 29.213973799126638, "grad_norm": 0.0005843818705083514, "learning_rate": 4.458475955321866e-06, "logits/chosen": -1.1909959316253662, "logits/rejected": -1.2938063144683838, "logps/chosen": -330.92987060546875, "logps/rejected": -3749.315673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6502922773361206, "rewards/margins": 34.139976501464844, "rewards/rejected": -34.79026412963867, "step": 6690 }, { "epoch": 29.25764192139738, "grad_norm": 9.01407403388976e-06, "learning_rate": 4.456105177818345e-06, "logits/chosen": -1.1818839311599731, "logits/rejected": -1.2639294862747192, "logps/chosen": -360.8274841308594, "logps/rejected": -3544.2109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.760307252407074, "rewards/margins": 32.10175323486328, "rewards/rejected": -32.862056732177734, "step": 6700 }, { "epoch": 29.30131004366812, "grad_norm": 3.2153701781611114e-06, "learning_rate": 4.453729855293969e-06, "logits/chosen": -1.1813013553619385, "logits/rejected": -1.2713524103164673, "logps/chosen": -329.0753173828125, "logps/rejected": -3509.03955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6735775470733643, "rewards/margins": 31.865427017211914, "rewards/rejected": -32.539005279541016, "step": 6710 }, { "epoch": 29.344978165938866, "grad_norm": 6.576924537734445e-06, "learning_rate": 4.451349993267811e-06, "logits/chosen": -1.1899216175079346, "logits/rejected": -1.2975318431854248, "logps/chosen": -341.01953125, "logps/rejected": -4107.47412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6692463755607605, "rewards/margins": 37.47783660888672, "rewards/rejected": -38.14708709716797, "step": 6720 }, { "epoch": 29.388646288209607, "grad_norm": 5.051361003253018e-05, "learning_rate": 4.448965597269494e-06, "logits/chosen": -1.1687352657318115, "logits/rejected": -1.2947901487350464, "logps/chosen": -367.46258544921875, "logps/rejected": -3687.01708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6861888766288757, "rewards/margins": 33.625335693359375, "rewards/rejected": -34.311527252197266, "step": 6730 }, { "epoch": 29.43231441048035, "grad_norm": 1.5375841596042802e-05, "learning_rate": 4.446576672839177e-06, "logits/chosen": -1.243691325187683, "logits/rejected": -1.3231743574142456, "logps/chosen": -344.26666259765625, "logps/rejected": -3898.09716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7920023202896118, "rewards/margins": 35.398014068603516, "rewards/rejected": -36.19001388549805, "step": 6740 }, { "epoch": 29.475982532751093, "grad_norm": 5.405824678865568e-05, "learning_rate": 4.444183225527535e-06, "logits/chosen": -1.2016828060150146, "logits/rejected": -1.2968212366104126, "logps/chosen": -353.92083740234375, "logps/rejected": -4005.07275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7011070847511292, "rewards/margins": 36.546714782714844, "rewards/rejected": -37.24781799316406, "step": 6750 }, { "epoch": 29.519650655021834, "grad_norm": 5.697564055145964e-05, "learning_rate": 4.4417852608957575e-06, "logits/chosen": -1.2285094261169434, "logits/rejected": -1.298283576965332, "logps/chosen": -337.43524169921875, "logps/rejected": -3864.6328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6319738626480103, "rewards/margins": 35.136959075927734, "rewards/rejected": -35.7689323425293, "step": 6760 }, { "epoch": 29.563318777292576, "grad_norm": 3.9283115887079335e-06, "learning_rate": 4.4393827845155276e-06, "logits/chosen": -1.1762274503707886, "logits/rejected": -1.2887800931930542, "logps/chosen": -330.07757568359375, "logps/rejected": -3927.9453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6922167539596558, "rewards/margins": 35.781089782714844, "rewards/rejected": -36.47330093383789, "step": 6770 }, { "epoch": 29.606986899563317, "grad_norm": 1.5808708247699377e-06, "learning_rate": 4.436975801969013e-06, "logits/chosen": -1.1711474657058716, "logits/rejected": -1.286702036857605, "logps/chosen": -353.86944580078125, "logps/rejected": -3729.39453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.72574782371521, "rewards/margins": 33.807613372802734, "rewards/rejected": -34.533363342285156, "step": 6780 }, { "epoch": 29.650655021834062, "grad_norm": 0.00024652503877420877, "learning_rate": 4.434564318848851e-06, "logits/chosen": -1.2296160459518433, "logits/rejected": -1.3437385559082031, "logps/chosen": -364.03594970703125, "logps/rejected": -3771.51806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7494003176689148, "rewards/margins": 34.26659393310547, "rewards/rejected": -35.015995025634766, "step": 6790 }, { "epoch": 29.694323144104803, "grad_norm": 9.761318724090971e-05, "learning_rate": 4.432148340758133e-06, "logits/chosen": -1.1842491626739502, "logits/rejected": -1.26802396774292, "logps/chosen": -326.4446105957031, "logps/rejected": -3955.057861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6560534834861755, "rewards/margins": 36.0023307800293, "rewards/rejected": -36.658382415771484, "step": 6800 }, { "epoch": 29.737991266375545, "grad_norm": 0.00015195729952832026, "learning_rate": 4.4297278733104e-06, "logits/chosen": -1.1978609561920166, "logits/rejected": -1.2960799932479858, "logps/chosen": -353.6824645996094, "logps/rejected": -4027.200439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6822935342788696, "rewards/margins": 36.81673812866211, "rewards/rejected": -37.49903106689453, "step": 6810 }, { "epoch": 29.78165938864629, "grad_norm": 1.8434749575500788e-06, "learning_rate": 4.427302922129619e-06, "logits/chosen": -1.206969976425171, "logits/rejected": -1.3128225803375244, "logps/chosen": -350.79730224609375, "logps/rejected": -3770.208251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8272565007209778, "rewards/margins": 34.08159255981445, "rewards/rejected": -34.908851623535156, "step": 6820 }, { "epoch": 29.82532751091703, "grad_norm": 7.673221277671734e-06, "learning_rate": 4.424873492850178e-06, "logits/chosen": -1.2087414264678955, "logits/rejected": -1.3603012561798096, "logps/chosen": -364.53839111328125, "logps/rejected": -4016.885986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6677767038345337, "rewards/margins": 36.70444107055664, "rewards/rejected": -37.372215270996094, "step": 6830 }, { "epoch": 29.868995633187772, "grad_norm": 8.362628206363511e-06, "learning_rate": 4.422439591116868e-06, "logits/chosen": -1.2271770238876343, "logits/rejected": -1.2756683826446533, "logps/chosen": -315.6665954589844, "logps/rejected": -4129.67138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7213646173477173, "rewards/margins": 37.68602752685547, "rewards/rejected": -38.40739059448242, "step": 6840 }, { "epoch": 29.912663755458514, "grad_norm": 4.7494668047110634e-05, "learning_rate": 4.420001222584873e-06, "logits/chosen": -1.2111371755599976, "logits/rejected": -1.295401692390442, "logps/chosen": -328.9146423339844, "logps/rejected": -3598.290283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6469680070877075, "rewards/margins": 32.789398193359375, "rewards/rejected": -33.436363220214844, "step": 6850 }, { "epoch": 29.95633187772926, "grad_norm": 3.532618312889144e-06, "learning_rate": 4.417558392919756e-06, "logits/chosen": -1.2380740642547607, "logits/rejected": -1.3121895790100098, "logps/chosen": -351.9100341796875, "logps/rejected": -3605.67236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6836426854133606, "rewards/margins": 32.699039459228516, "rewards/rejected": -33.38268280029297, "step": 6860 }, { "epoch": 30.0, "grad_norm": 4.870801122849432e-06, "learning_rate": 4.415111107797445e-06, "logits/chosen": -1.318587064743042, "logits/rejected": -1.4070709943771362, "logps/chosen": -344.5884704589844, "logps/rejected": -4298.25, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7322229743003845, "rewards/margins": 39.20935821533203, "rewards/rejected": -39.94158935546875, "step": 6870 }, { "epoch": 30.04366812227074, "grad_norm": 2.07072898564373e-06, "learning_rate": 4.41265937290422e-06, "logits/chosen": -1.1875745058059692, "logits/rejected": -1.3184025287628174, "logps/chosen": -338.2386474609375, "logps/rejected": -4220.68212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6797592639923096, "rewards/margins": 38.63063430786133, "rewards/rejected": -39.31039047241211, "step": 6880 }, { "epoch": 30.087336244541486, "grad_norm": 4.61563179134839e-06, "learning_rate": 4.4102031939367e-06, "logits/chosen": -1.2607146501541138, "logits/rejected": -1.3638458251953125, "logps/chosen": -338.7166442871094, "logps/rejected": -3929.283935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7182987332344055, "rewards/margins": 35.8665657043457, "rewards/rejected": -36.584861755371094, "step": 6890 }, { "epoch": 30.131004366812228, "grad_norm": 3.684471535847073e-06, "learning_rate": 4.4077425766018275e-06, "logits/chosen": -1.2352960109710693, "logits/rejected": -1.347505807876587, "logps/chosen": -337.3502197265625, "logps/rejected": -4069.38525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6523352265357971, "rewards/margins": 37.188560485839844, "rewards/rejected": -37.84089660644531, "step": 6900 }, { "epoch": 30.17467248908297, "grad_norm": 9.097003647840641e-05, "learning_rate": 4.4052775266168644e-06, "logits/chosen": -1.247682809829712, "logits/rejected": -1.3527987003326416, "logps/chosen": -375.19287109375, "logps/rejected": -3558.280029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6567533612251282, "rewards/margins": 32.37522506713867, "rewards/rejected": -33.03197479248047, "step": 6910 }, { "epoch": 30.21834061135371, "grad_norm": 1.3600853732883447e-05, "learning_rate": 4.402808049709364e-06, "logits/chosen": -1.2129266262054443, "logits/rejected": -1.3165056705474854, "logps/chosen": -342.6649475097656, "logps/rejected": -4096.96044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7987712621688843, "rewards/margins": 37.332923889160156, "rewards/rejected": -38.13169860839844, "step": 6920 }, { "epoch": 30.262008733624455, "grad_norm": 3.302535359783884e-06, "learning_rate": 4.400334151617172e-06, "logits/chosen": -1.2042826414108276, "logits/rejected": -1.3475866317749023, "logps/chosen": -361.0517272949219, "logps/rejected": -3988.577392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8421779870986938, "rewards/margins": 36.29026794433594, "rewards/rejected": -37.132442474365234, "step": 6930 }, { "epoch": 30.305676855895197, "grad_norm": 5.094722249678035e-06, "learning_rate": 4.397855838088401e-06, "logits/chosen": -1.2498356103897095, "logits/rejected": -1.364078164100647, "logps/chosen": -347.0533752441406, "logps/rejected": -4096.27978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8025282621383667, "rewards/margins": 37.21556854248047, "rewards/rejected": -38.01809310913086, "step": 6940 }, { "epoch": 30.349344978165938, "grad_norm": 5.331598323055378e-05, "learning_rate": 4.395373114881428e-06, "logits/chosen": -1.2239893674850464, "logits/rejected": -1.3106361627578735, "logps/chosen": -324.57586669921875, "logps/rejected": -4390.04931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6198574304580688, "rewards/margins": 40.26310348510742, "rewards/rejected": -40.88296127319336, "step": 6950 }, { "epoch": 30.393013100436683, "grad_norm": 2.700932832638928e-06, "learning_rate": 4.392885987764873e-06, "logits/chosen": -1.2898457050323486, "logits/rejected": -1.4077084064483643, "logps/chosen": -356.7659912109375, "logps/rejected": -4188.57177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7548850178718567, "rewards/margins": 38.095176696777344, "rewards/rejected": -38.850059509277344, "step": 6960 }, { "epoch": 30.436681222707424, "grad_norm": 1.4383676009888399e-06, "learning_rate": 4.390394462517589e-06, "logits/chosen": -1.22822904586792, "logits/rejected": -1.3500728607177734, "logps/chosen": -363.6156921386719, "logps/rejected": -4044.32568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8103289604187012, "rewards/margins": 36.70494079589844, "rewards/rejected": -37.51527404785156, "step": 6970 }, { "epoch": 30.480349344978166, "grad_norm": 6.274743657424448e-05, "learning_rate": 4.3878985449286486e-06, "logits/chosen": -1.2332261800765991, "logits/rejected": -1.331549882888794, "logps/chosen": -339.37579345703125, "logps/rejected": -3742.109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.637725293636322, "rewards/margins": 34.00209045410156, "rewards/rejected": -34.63981246948242, "step": 6980 }, { "epoch": 30.524017467248907, "grad_norm": 2.112923005057182e-06, "learning_rate": 4.385398240797328e-06, "logits/chosen": -1.2470782995224, "logits/rejected": -1.3770020008087158, "logps/chosen": -376.8738098144531, "logps/rejected": -4115.0107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.878453254699707, "rewards/margins": 37.43146514892578, "rewards/rejected": -38.309913635253906, "step": 6990 }, { "epoch": 30.56768558951965, "grad_norm": 3.6748781061302823e-06, "learning_rate": 4.3828935559330996e-06, "logits/chosen": -1.2508854866027832, "logits/rejected": -1.3899260759353638, "logps/chosen": -377.57464599609375, "logps/rejected": -4288.84619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7160466909408569, "rewards/margins": 39.110809326171875, "rewards/rejected": -39.82685470581055, "step": 7000 }, { "epoch": 30.611353711790393, "grad_norm": 1.792348871200839e-06, "learning_rate": 4.380384496155611e-06, "logits/chosen": -1.2108663320541382, "logits/rejected": -1.3126128911972046, "logps/chosen": -345.9971618652344, "logps/rejected": -4181.5927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6658647060394287, "rewards/margins": 38.207122802734375, "rewards/rejected": -38.872989654541016, "step": 7010 }, { "epoch": 30.655021834061134, "grad_norm": 8.575761417087059e-05, "learning_rate": 4.377871067294675e-06, "logits/chosen": -1.2038843631744385, "logits/rejected": -1.3174092769622803, "logps/chosen": -341.6082458496094, "logps/rejected": -3676.588623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7021277546882629, "rewards/margins": 33.41625213623047, "rewards/rejected": -34.118385314941406, "step": 7020 }, { "epoch": 30.69868995633188, "grad_norm": 2.156744534374734e-06, "learning_rate": 4.375353275190259e-06, "logits/chosen": -1.2745883464813232, "logits/rejected": -1.381510853767395, "logps/chosen": -360.0860290527344, "logps/rejected": -4364.2265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7939866185188293, "rewards/margins": 39.789894104003906, "rewards/rejected": -40.58388137817383, "step": 7030 }, { "epoch": 30.74235807860262, "grad_norm": 0.0007671156107619268, "learning_rate": 4.372831125692466e-06, "logits/chosen": -1.2264814376831055, "logits/rejected": -1.300046682357788, "logps/chosen": -352.18304443359375, "logps/rejected": -3849.25927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7468708753585815, "rewards/margins": 34.994163513183594, "rewards/rejected": -35.74103546142578, "step": 7040 }, { "epoch": 30.786026200873362, "grad_norm": 7.361956767056248e-05, "learning_rate": 4.370304624661523e-06, "logits/chosen": -1.1931875944137573, "logits/rejected": -1.3577913045883179, "logps/chosen": -375.79742431640625, "logps/rejected": -3688.157470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.720754861831665, "rewards/margins": 33.65407943725586, "rewards/rejected": -34.37483596801758, "step": 7050 }, { "epoch": 30.829694323144103, "grad_norm": 0.00015480064458032487, "learning_rate": 4.367773777967769e-06, "logits/chosen": -1.2653008699417114, "logits/rejected": -1.3640549182891846, "logps/chosen": -369.0059814453125, "logps/rejected": -4272.9375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7573091387748718, "rewards/margins": 38.971046447753906, "rewards/rejected": -39.728355407714844, "step": 7060 }, { "epoch": 30.87336244541485, "grad_norm": 0.00022070834102001045, "learning_rate": 4.36523859149164e-06, "logits/chosen": -1.3018602132797241, "logits/rejected": -1.4329675436019897, "logps/chosen": -336.05133056640625, "logps/rejected": -4569.8681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7025894522666931, "rewards/margins": 41.80839920043945, "rewards/rejected": -42.510990142822266, "step": 7070 }, { "epoch": 30.91703056768559, "grad_norm": 2.7098862996538604e-06, "learning_rate": 4.362699071123655e-06, "logits/chosen": -1.215356707572937, "logits/rejected": -1.3080543279647827, "logps/chosen": -348.70001220703125, "logps/rejected": -3633.80126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5964152812957764, "rewards/margins": 33.0515251159668, "rewards/rejected": -33.64794158935547, "step": 7080 }, { "epoch": 30.96069868995633, "grad_norm": 4.617829766904715e-06, "learning_rate": 4.360155222764404e-06, "logits/chosen": -1.261244535446167, "logits/rejected": -1.3663008213043213, "logps/chosen": -352.2098693847656, "logps/rejected": -4237.82958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.707321047782898, "rewards/margins": 38.61404800415039, "rewards/rejected": -39.32136917114258, "step": 7090 }, { "epoch": 31.004366812227076, "grad_norm": 1.7086592824497624e-05, "learning_rate": 4.3576070523245315e-06, "logits/chosen": -1.2053104639053345, "logits/rejected": -1.3605915307998657, "logps/chosen": -357.6734619140625, "logps/rejected": -4097.60009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8176485300064087, "rewards/margins": 37.39099884033203, "rewards/rejected": -38.208641052246094, "step": 7100 }, { "epoch": 31.048034934497817, "grad_norm": 8.568638841891928e-06, "learning_rate": 4.355054565724726e-06, "logits/chosen": -1.2114616632461548, "logits/rejected": -1.320845365524292, "logps/chosen": -339.17779541015625, "logps/rejected": -4114.9814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7545214891433716, "rewards/margins": 37.452056884765625, "rewards/rejected": -38.206581115722656, "step": 7110 }, { "epoch": 31.09170305676856, "grad_norm": 9.662508844342137e-06, "learning_rate": 4.352497768895702e-06, "logits/chosen": -1.1899452209472656, "logits/rejected": -1.3518033027648926, "logps/chosen": -361.21026611328125, "logps/rejected": -3738.10693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6881293058395386, "rewards/margins": 34.012168884277344, "rewards/rejected": -34.70029830932617, "step": 7120 }, { "epoch": 31.1353711790393, "grad_norm": 2.3527771596664396e-06, "learning_rate": 4.349936667778193e-06, "logits/chosen": -1.207578420639038, "logits/rejected": -1.3407763242721558, "logps/chosen": -376.87884521484375, "logps/rejected": -3918.35888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7781400680541992, "rewards/margins": 35.48493194580078, "rewards/rejected": -36.26306915283203, "step": 7130 }, { "epoch": 31.179039301310045, "grad_norm": 3.5769352979718767e-06, "learning_rate": 4.347371268322929e-06, "logits/chosen": -1.2195560932159424, "logits/rejected": -1.334215760231018, "logps/chosen": -371.56707763671875, "logps/rejected": -3564.625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6947908401489258, "rewards/margins": 32.38021469116211, "rewards/rejected": -33.07500457763672, "step": 7140 }, { "epoch": 31.222707423580786, "grad_norm": 0.00018777142649174891, "learning_rate": 4.344801576490631e-06, "logits/chosen": -1.277087926864624, "logits/rejected": -1.3954721689224243, "logps/chosen": -345.8432922363281, "logps/rejected": -4176.779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.769226610660553, "rewards/margins": 38.029747009277344, "rewards/rejected": -38.798973083496094, "step": 7150 }, { "epoch": 31.266375545851528, "grad_norm": 2.0459998543112783e-06, "learning_rate": 4.3422275982519915e-06, "logits/chosen": -1.2126305103302002, "logits/rejected": -1.3569484949111938, "logps/chosen": -356.0064697265625, "logps/rejected": -3731.30810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7122482061386108, "rewards/margins": 33.928375244140625, "rewards/rejected": -34.640625, "step": 7160 }, { "epoch": 31.310043668122272, "grad_norm": 1.2118386356142432e-05, "learning_rate": 4.3396493395876616e-06, "logits/chosen": -1.2246806621551514, "logits/rejected": -1.403018832206726, "logps/chosen": -345.6824951171875, "logps/rejected": -4311.69384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6512702703475952, "rewards/margins": 39.50627517700195, "rewards/rejected": -40.15754699707031, "step": 7170 }, { "epoch": 31.353711790393014, "grad_norm": 2.7338740003130192e-06, "learning_rate": 4.33706680648824e-06, "logits/chosen": -1.1962507963180542, "logits/rejected": -1.3303263187408447, "logps/chosen": -362.041259765625, "logps/rejected": -3290.303955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6760124564170837, "rewards/margins": 29.80277442932129, "rewards/rejected": -30.478784561157227, "step": 7180 }, { "epoch": 31.397379912663755, "grad_norm": 5.679263991947387e-06, "learning_rate": 4.334480004954256e-06, "logits/chosen": -1.22044837474823, "logits/rejected": -1.3614122867584229, "logps/chosen": -335.80841064453125, "logps/rejected": -3878.887939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6820683479309082, "rewards/margins": 35.411033630371094, "rewards/rejected": -36.093101501464844, "step": 7190 }, { "epoch": 31.441048034934497, "grad_norm": 3.694662557284848e-06, "learning_rate": 4.3318889409961574e-06, "logits/chosen": -1.2268167734146118, "logits/rejected": -1.360828161239624, "logps/chosen": -343.05047607421875, "logps/rejected": -3893.788330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6865259408950806, "rewards/margins": 35.42948913574219, "rewards/rejected": -36.11601638793945, "step": 7200 }, { "epoch": 31.48471615720524, "grad_norm": 2.724973772701078e-05, "learning_rate": 4.329293620634294e-06, "logits/chosen": -1.2480148077011108, "logits/rejected": -1.4153028726577759, "logps/chosen": -334.2532958984375, "logps/rejected": -4467.47705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.750078558921814, "rewards/margins": 40.9984245300293, "rewards/rejected": -41.74850082397461, "step": 7210 }, { "epoch": 31.528384279475983, "grad_norm": 4.2271325785552367e-05, "learning_rate": 4.326694049898907e-06, "logits/chosen": -1.2383965253829956, "logits/rejected": -1.3806605339050293, "logps/chosen": -354.7417297363281, "logps/rejected": -4090.72021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8497894406318665, "rewards/margins": 37.241729736328125, "rewards/rejected": -38.09151840209961, "step": 7220 }, { "epoch": 31.572052401746724, "grad_norm": 2.7316466036243255e-06, "learning_rate": 4.324090234830113e-06, "logits/chosen": -1.2950143814086914, "logits/rejected": -1.3985669612884521, "logps/chosen": -342.30206298828125, "logps/rejected": -4477.26025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.787964940071106, "rewards/margins": 40.89179229736328, "rewards/rejected": -41.67975997924805, "step": 7230 }, { "epoch": 31.61572052401747, "grad_norm": 2.128798416198738e-06, "learning_rate": 4.321482181477891e-06, "logits/chosen": -1.2052218914031982, "logits/rejected": -1.3491132259368896, "logps/chosen": -372.20562744140625, "logps/rejected": -3985.75634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7558681964874268, "rewards/margins": 36.21317672729492, "rewards/rejected": -36.96904754638672, "step": 7240 }, { "epoch": 31.65938864628821, "grad_norm": 2.9148334357682152e-05, "learning_rate": 4.3188698959020655e-06, "logits/chosen": -1.2039515972137451, "logits/rejected": -1.3129243850708008, "logps/chosen": -367.0706787109375, "logps/rejected": -3223.102783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7878437042236328, "rewards/margins": 29.098758697509766, "rewards/rejected": -29.886606216430664, "step": 7250 }, { "epoch": 31.70305676855895, "grad_norm": 5.468142859008707e-05, "learning_rate": 4.3162533841722975e-06, "logits/chosen": -1.2583099603652954, "logits/rejected": -1.4027049541473389, "logps/chosen": -357.67303466796875, "logps/rejected": -3991.359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7164020538330078, "rewards/margins": 36.41591262817383, "rewards/rejected": -37.1323127746582, "step": 7260 }, { "epoch": 31.746724890829693, "grad_norm": 6.895106325242244e-05, "learning_rate": 4.313632652368065e-06, "logits/chosen": -1.1786898374557495, "logits/rejected": -1.344584345817566, "logps/chosen": -341.8398132324219, "logps/rejected": -3944.74267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7479851841926575, "rewards/margins": 35.973182678222656, "rewards/rejected": -36.72116470336914, "step": 7270 }, { "epoch": 31.790393013100438, "grad_norm": 2.8772731169311288e-06, "learning_rate": 4.311007706578654e-06, "logits/chosen": -1.2024098634719849, "logits/rejected": -1.3799335956573486, "logps/chosen": -352.6366882324219, "logps/rejected": -4028.465576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7399144768714905, "rewards/margins": 36.849266052246094, "rewards/rejected": -37.58917999267578, "step": 7280 }, { "epoch": 31.83406113537118, "grad_norm": 3.3484541774773347e-06, "learning_rate": 4.308378552903137e-06, "logits/chosen": -1.210569143295288, "logits/rejected": -1.3320887088775635, "logps/chosen": -381.4495544433594, "logps/rejected": -3605.823486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8926548957824707, "rewards/margins": 32.52997970581055, "rewards/rejected": -33.42264175415039, "step": 7290 }, { "epoch": 31.87772925764192, "grad_norm": 1.3699317153658765e-06, "learning_rate": 4.30574519745037e-06, "logits/chosen": -1.2799617052078247, "logits/rejected": -1.422360897064209, "logps/chosen": -334.28887939453125, "logps/rejected": -4399.7109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7796693444252014, "rewards/margins": 40.208099365234375, "rewards/rejected": -40.987770080566406, "step": 7300 }, { "epoch": 31.921397379912662, "grad_norm": 3.14174420529728e-05, "learning_rate": 4.303107646338965e-06, "logits/chosen": -1.2544456720352173, "logits/rejected": -1.3935215473175049, "logps/chosen": -350.8890686035156, "logps/rejected": -3946.733154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7067967057228088, "rewards/margins": 35.86093521118164, "rewards/rejected": -36.56772994995117, "step": 7310 }, { "epoch": 31.965065502183407, "grad_norm": 3.990719305011866e-06, "learning_rate": 4.300465905697289e-06, "logits/chosen": -1.24556565284729, "logits/rejected": -1.3790409564971924, "logps/chosen": -351.58441162109375, "logps/rejected": -3952.813232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7560165524482727, "rewards/margins": 36.04497146606445, "rewards/rejected": -36.800987243652344, "step": 7320 }, { "epoch": 32.00873362445415, "grad_norm": 6.363365481936426e-06, "learning_rate": 4.29781998166344e-06, "logits/chosen": -1.213643193244934, "logits/rejected": -1.3460938930511475, "logps/chosen": -351.1808166503906, "logps/rejected": -3778.42578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6883854866027832, "rewards/margins": 34.452613830566406, "rewards/rejected": -35.14099884033203, "step": 7330 }, { "epoch": 32.05240174672489, "grad_norm": 2.8853311560260353e-06, "learning_rate": 4.295169880385236e-06, "logits/chosen": -1.2248460054397583, "logits/rejected": -1.3701550960540771, "logps/chosen": -365.95172119140625, "logps/rejected": -3724.58935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7085824012756348, "rewards/margins": 33.87826919555664, "rewards/rejected": -34.58685302734375, "step": 7340 }, { "epoch": 32.096069868995635, "grad_norm": 4.30458954416486e-05, "learning_rate": 4.292515608020202e-06, "logits/chosen": -1.274010181427002, "logits/rejected": -1.392160415649414, "logps/chosen": -364.72747802734375, "logps/rejected": -3986.03564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7659487128257751, "rewards/margins": 36.271297454833984, "rewards/rejected": -37.03724670410156, "step": 7350 }, { "epoch": 32.13973799126637, "grad_norm": 1.4059403345865256e-06, "learning_rate": 4.289857170735553e-06, "logits/chosen": -1.2283358573913574, "logits/rejected": -1.3779528141021729, "logps/chosen": -352.3643493652344, "logps/rejected": -3788.1796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6499203443527222, "rewards/margins": 34.52995681762695, "rewards/rejected": -35.179874420166016, "step": 7360 }, { "epoch": 32.18340611353712, "grad_norm": 9.384727936162484e-05, "learning_rate": 4.287194574708184e-06, "logits/chosen": -1.2526211738586426, "logits/rejected": -1.3693801164627075, "logps/chosen": -341.404541015625, "logps/rejected": -4066.45654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7596755623817444, "rewards/margins": 36.98579025268555, "rewards/rejected": -37.7454719543457, "step": 7370 }, { "epoch": 32.22707423580786, "grad_norm": 1.8582588163603534e-06, "learning_rate": 4.2845278261246495e-06, "logits/chosen": -1.251461386680603, "logits/rejected": -1.3726623058319092, "logps/chosen": -354.71173095703125, "logps/rejected": -4015.11669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7755028605461121, "rewards/margins": 36.59066390991211, "rewards/rejected": -37.36616897583008, "step": 7380 }, { "epoch": 32.2707423580786, "grad_norm": 2.156399556801771e-06, "learning_rate": 4.281856931181155e-06, "logits/chosen": -1.2513954639434814, "logits/rejected": -1.3614658117294312, "logps/chosen": -346.15234375, "logps/rejected": -3787.856689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7673665285110474, "rewards/margins": 34.36830520629883, "rewards/rejected": -35.13566589355469, "step": 7390 }, { "epoch": 32.314410480349345, "grad_norm": 1.6895300083767214e-06, "learning_rate": 4.2791818960835395e-06, "logits/chosen": -1.2622281312942505, "logits/rejected": -1.4122951030731201, "logps/chosen": -326.8980712890625, "logps/rejected": -4494.28857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7493070363998413, "rewards/margins": 41.07365417480469, "rewards/rejected": -41.82296371459961, "step": 7400 }, { "epoch": 32.35807860262009, "grad_norm": 3.0597016611059555e-06, "learning_rate": 4.276502727047261e-06, "logits/chosen": -1.2417489290237427, "logits/rejected": -1.4095876216888428, "logps/chosen": -360.5323181152344, "logps/rejected": -4004.661376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7397840619087219, "rewards/margins": 36.55213928222656, "rewards/rejected": -37.29192352294922, "step": 7410 }, { "epoch": 32.40174672489083, "grad_norm": 4.68673179468639e-06, "learning_rate": 4.273819430297382e-06, "logits/chosen": -1.2121702432632446, "logits/rejected": -1.3947778940200806, "logps/chosen": -388.80523681640625, "logps/rejected": -3661.530517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8232305645942688, "rewards/margins": 33.287662506103516, "rewards/rejected": -34.11089324951172, "step": 7420 }, { "epoch": 32.44541484716157, "grad_norm": 2.6926670912729384e-06, "learning_rate": 4.27113201206856e-06, "logits/chosen": -1.1733366250991821, "logits/rejected": -1.34346604347229, "logps/chosen": -393.52532958984375, "logps/rejected": -3377.947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7137469053268433, "rewards/margins": 30.618606567382812, "rewards/rejected": -31.332351684570312, "step": 7430 }, { "epoch": 32.48908296943232, "grad_norm": 3.510629021143971e-06, "learning_rate": 4.268440478605021e-06, "logits/chosen": -1.2562296390533447, "logits/rejected": -1.4219210147857666, "logps/chosen": -326.5792541503906, "logps/rejected": -4294.56982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6983938813209534, "rewards/margins": 39.378868103027344, "rewards/rejected": -40.07726287841797, "step": 7440 }, { "epoch": 32.532751091703055, "grad_norm": 3.87597252732317e-05, "learning_rate": 4.265744836160561e-06, "logits/chosen": -1.2644939422607422, "logits/rejected": -1.3922584056854248, "logps/chosen": -301.2896728515625, "logps/rejected": -4445.53076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7385694980621338, "rewards/margins": 40.761871337890625, "rewards/rejected": -41.50043869018555, "step": 7450 }, { "epoch": 32.5764192139738, "grad_norm": 5.6627402662649356e-05, "learning_rate": 4.263045090998519e-06, "logits/chosen": -1.2203431129455566, "logits/rejected": -1.3533806800842285, "logps/chosen": -365.28668212890625, "logps/rejected": -3761.39599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7248810529708862, "rewards/margins": 34.22005081176758, "rewards/rejected": -34.9449348449707, "step": 7460 }, { "epoch": 32.620087336244545, "grad_norm": 0.00015432166475292874, "learning_rate": 4.260341249391766e-06, "logits/chosen": -1.2123044729232788, "logits/rejected": -1.361824631690979, "logps/chosen": -353.8623046875, "logps/rejected": -3920.897216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7701870799064636, "rewards/margins": 35.71456527709961, "rewards/rejected": -36.4847526550293, "step": 7470 }, { "epoch": 32.66375545851528, "grad_norm": 4.615128784040472e-06, "learning_rate": 4.257633317622695e-06, "logits/chosen": -1.2644091844558716, "logits/rejected": -1.3746843338012695, "logps/chosen": -355.29205322265625, "logps/rejected": -3821.918701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6982828974723816, "rewards/margins": 34.78622055053711, "rewards/rejected": -35.48450469970703, "step": 7480 }, { "epoch": 32.70742358078603, "grad_norm": 1.4778326586478052e-06, "learning_rate": 4.254921301983197e-06, "logits/chosen": -1.2252840995788574, "logits/rejected": -1.389187216758728, "logps/chosen": -353.55560302734375, "logps/rejected": -4103.9970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7494048476219177, "rewards/margins": 37.417423248291016, "rewards/rejected": -38.16682434082031, "step": 7490 }, { "epoch": 32.751091703056765, "grad_norm": 2.102254561754873e-06, "learning_rate": 4.2522052087746565e-06, "logits/chosen": -1.2499911785125732, "logits/rejected": -1.376103401184082, "logps/chosen": -345.08642578125, "logps/rejected": -4235.85498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7850403785705566, "rewards/margins": 38.70276641845703, "rewards/rejected": -39.4878044128418, "step": 7500 }, { "epoch": 32.79475982532751, "grad_norm": 1.1554599123580064e-06, "learning_rate": 4.2494850443079305e-06, "logits/chosen": -1.2307202816009521, "logits/rejected": -1.367653250694275, "logps/chosen": -354.97344970703125, "logps/rejected": -4037.10595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9074983596801758, "rewards/margins": 36.67849349975586, "rewards/rejected": -37.58599090576172, "step": 7510 }, { "epoch": 32.838427947598255, "grad_norm": 1.6832499219258525e-06, "learning_rate": 4.246760814903335e-06, "logits/chosen": -1.2738640308380127, "logits/rejected": -1.3784154653549194, "logps/chosen": -356.9599609375, "logps/rejected": -4133.33203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9600456357002258, "rewards/margins": 37.4758415222168, "rewards/rejected": -38.43588638305664, "step": 7520 }, { "epoch": 32.88209606986899, "grad_norm": 0.00026134145312580755, "learning_rate": 4.244032526890633e-06, "logits/chosen": -1.285121202468872, "logits/rejected": -1.44183349609375, "logps/chosen": -355.669677734375, "logps/rejected": -4111.4619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.762496829032898, "rewards/margins": 37.50836944580078, "rewards/rejected": -38.27085876464844, "step": 7530 }, { "epoch": 32.92576419213974, "grad_norm": 3.0694294266078563e-06, "learning_rate": 4.241300186609015e-06, "logits/chosen": -1.2284746170043945, "logits/rejected": -1.4103200435638428, "logps/chosen": -393.52154541015625, "logps/rejected": -3493.884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8149396777153015, "rewards/margins": 31.69277000427246, "rewards/rejected": -32.50770950317383, "step": 7540 }, { "epoch": 32.96943231441048, "grad_norm": 2.3217029071795267e-06, "learning_rate": 4.2385638004070895e-06, "logits/chosen": -1.204528570175171, "logits/rejected": -1.3612297773361206, "logps/chosen": -357.392578125, "logps/rejected": -3822.856689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9371939897537231, "rewards/margins": 34.665016174316406, "rewards/rejected": -35.602210998535156, "step": 7550 }, { "epoch": 33.01310043668122, "grad_norm": 2.884633469154507e-06, "learning_rate": 4.235823374642863e-06, "logits/chosen": -1.2591968774795532, "logits/rejected": -1.4091991186141968, "logps/chosen": -369.5581970214844, "logps/rejected": -4147.099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7272570133209229, "rewards/margins": 37.778770446777344, "rewards/rejected": -38.50602722167969, "step": 7560 }, { "epoch": 33.056768558951966, "grad_norm": 5.200453079979777e-06, "learning_rate": 4.23307891568373e-06, "logits/chosen": -1.2729848623275757, "logits/rejected": -1.4061335325241089, "logps/chosen": -350.02862548828125, "logps/rejected": -4090.792236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8273415565490723, "rewards/margins": 37.30099105834961, "rewards/rejected": -38.128334045410156, "step": 7570 }, { "epoch": 33.10043668122271, "grad_norm": 2.1654925688077908e-06, "learning_rate": 4.230330429906457e-06, "logits/chosen": -1.2340761423110962, "logits/rejected": -1.3774033784866333, "logps/chosen": -352.74200439453125, "logps/rejected": -3955.417236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7767871022224426, "rewards/margins": 36.06307601928711, "rewards/rejected": -36.83986282348633, "step": 7580 }, { "epoch": 33.14410480349345, "grad_norm": 2.1447700894250606e-06, "learning_rate": 4.227577923697162e-06, "logits/chosen": -1.19509756565094, "logits/rejected": -1.3899462223052979, "logps/chosen": -355.56219482421875, "logps/rejected": -3764.258544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7352873682975769, "rewards/margins": 34.3662109375, "rewards/rejected": -35.101497650146484, "step": 7590 }, { "epoch": 33.18777292576419, "grad_norm": 1.9487440812794245e-06, "learning_rate": 4.2248214034513114e-06, "logits/chosen": -1.1854406595230103, "logits/rejected": -1.3556092977523804, "logps/chosen": -366.70867919921875, "logps/rejected": -3844.30517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8353007435798645, "rewards/margins": 34.80233383178711, "rewards/rejected": -35.637638092041016, "step": 7600 }, { "epoch": 33.23144104803494, "grad_norm": 3.4082239657480536e-06, "learning_rate": 4.222060875573694e-06, "logits/chosen": -1.2410470247268677, "logits/rejected": -1.4221525192260742, "logps/chosen": -403.26837158203125, "logps/rejected": -3834.374267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8527250289916992, "rewards/margins": 34.78178405761719, "rewards/rejected": -35.6345100402832, "step": 7610 }, { "epoch": 33.275109170305676, "grad_norm": 3.919090202887854e-05, "learning_rate": 4.219296346478409e-06, "logits/chosen": -1.2760426998138428, "logits/rejected": -1.4161678552627563, "logps/chosen": -365.36468505859375, "logps/rejected": -3796.38671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7951620221138, "rewards/margins": 34.47158432006836, "rewards/rejected": -35.266746520996094, "step": 7620 }, { "epoch": 33.31877729257642, "grad_norm": 1.7631186645671085e-06, "learning_rate": 4.216527822588857e-06, "logits/chosen": -1.2264050245285034, "logits/rejected": -1.4113860130310059, "logps/chosen": -366.811279296875, "logps/rejected": -3853.918701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8015092015266418, "rewards/margins": 35.06343078613281, "rewards/rejected": -35.86494064331055, "step": 7630 }, { "epoch": 33.36244541484716, "grad_norm": 1.5389748385384555e-06, "learning_rate": 4.213755310337717e-06, "logits/chosen": -1.2222731113433838, "logits/rejected": -1.4038515090942383, "logps/chosen": -381.5045471191406, "logps/rejected": -4545.6064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8840065002441406, "rewards/margins": 41.475440979003906, "rewards/rejected": -42.35944366455078, "step": 7640 }, { "epoch": 33.4061135371179, "grad_norm": 2.226890847489939e-06, "learning_rate": 4.210978816166937e-06, "logits/chosen": -1.2522802352905273, "logits/rejected": -1.399115800857544, "logps/chosen": -377.13702392578125, "logps/rejected": -3872.77001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.75348961353302, "rewards/margins": 35.2464714050293, "rewards/rejected": -35.999961853027344, "step": 7650 }, { "epoch": 33.44978165938865, "grad_norm": 1.5599739256237314e-06, "learning_rate": 4.208198346527715e-06, "logits/chosen": -1.3109664916992188, "logits/rejected": -1.4332094192504883, "logps/chosen": -351.0946350097656, "logps/rejected": -4294.6875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8502732515335083, "rewards/margins": 39.127296447753906, "rewards/rejected": -39.977569580078125, "step": 7660 }, { "epoch": 33.493449781659386, "grad_norm": 3.3653669202930796e-06, "learning_rate": 4.205413907880487e-06, "logits/chosen": -1.2800263166427612, "logits/rejected": -1.3989861011505127, "logps/chosen": -348.6878356933594, "logps/rejected": -4487.2666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7702924609184265, "rewards/margins": 40.90445327758789, "rewards/rejected": -41.674739837646484, "step": 7670 }, { "epoch": 33.53711790393013, "grad_norm": 2.66820843762326e-06, "learning_rate": 4.202625506694911e-06, "logits/chosen": -1.2840039730072021, "logits/rejected": -1.4187307357788086, "logps/chosen": -336.16632080078125, "logps/rejected": -4453.56640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8968279957771301, "rewards/margins": 40.48770523071289, "rewards/rejected": -41.3845329284668, "step": 7680 }, { "epoch": 33.580786026200876, "grad_norm": 4.1446378048667244e-05, "learning_rate": 4.199833149449853e-06, "logits/chosen": -1.2043997049331665, "logits/rejected": -1.3633426427841187, "logps/chosen": -374.8032531738281, "logps/rejected": -4000.13818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7675725221633911, "rewards/margins": 36.43967056274414, "rewards/rejected": -37.207244873046875, "step": 7690 }, { "epoch": 33.624454148471614, "grad_norm": 5.9104295898745554e-06, "learning_rate": 4.197036842633371e-06, "logits/chosen": -1.283716082572937, "logits/rejected": -1.4483458995819092, "logps/chosen": -354.49151611328125, "logps/rejected": -4572.85546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7972037196159363, "rewards/margins": 41.78077697753906, "rewards/rejected": -42.57798385620117, "step": 7700 }, { "epoch": 33.66812227074236, "grad_norm": 1.927050766431394e-05, "learning_rate": 4.194236592742696e-06, "logits/chosen": -1.2654062509536743, "logits/rejected": -1.4586834907531738, "logps/chosen": -325.3616027832031, "logps/rejected": -4648.4794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6731284260749817, "rewards/margins": 42.62810134887695, "rewards/rejected": -43.301231384277344, "step": 7710 }, { "epoch": 33.7117903930131, "grad_norm": 0.00015700413281446998, "learning_rate": 4.191432406284226e-06, "logits/chosen": -1.1688849925994873, "logits/rejected": -1.3615785837173462, "logps/chosen": -361.1780700683594, "logps/rejected": -3412.759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8125074505805969, "rewards/margins": 30.86538314819336, "rewards/rejected": -31.677892684936523, "step": 7720 }, { "epoch": 33.75545851528384, "grad_norm": 4.834371995817511e-06, "learning_rate": 4.1886242897735015e-06, "logits/chosen": -1.2965962886810303, "logits/rejected": -1.462483525276184, "logps/chosen": -340.96087646484375, "logps/rejected": -4114.88623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7840489149093628, "rewards/margins": 37.52985763549805, "rewards/rejected": -38.31391143798828, "step": 7730 }, { "epoch": 33.799126637554586, "grad_norm": 2.1611832193288986e-06, "learning_rate": 4.185812249735198e-06, "logits/chosen": -1.2512245178222656, "logits/rejected": -1.4087626934051514, "logps/chosen": -357.2834777832031, "logps/rejected": -3632.99072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7678031921386719, "rewards/margins": 32.975067138671875, "rewards/rejected": -33.74286651611328, "step": 7740 }, { "epoch": 33.842794759825324, "grad_norm": 3.5219554394548687e-06, "learning_rate": 4.1829962927031035e-06, "logits/chosen": -1.2741248607635498, "logits/rejected": -1.4147870540618896, "logps/chosen": -366.05877685546875, "logps/rejected": -4028.452392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8099355697631836, "rewards/margins": 36.59396743774414, "rewards/rejected": -37.403907775878906, "step": 7750 }, { "epoch": 33.88646288209607, "grad_norm": 3.304968336473043e-05, "learning_rate": 4.180176425220111e-06, "logits/chosen": -1.2810531854629517, "logits/rejected": -1.459830403327942, "logps/chosen": -359.25592041015625, "logps/rejected": -3996.86572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7372865676879883, "rewards/margins": 36.537620544433594, "rewards/rejected": -37.27490997314453, "step": 7760 }, { "epoch": 33.930131004366814, "grad_norm": 1.3326466137424193e-06, "learning_rate": 4.1773526538381985e-06, "logits/chosen": -1.218811273574829, "logits/rejected": -1.4801099300384521, "logps/chosen": -392.5830993652344, "logps/rejected": -3863.12646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7709678411483765, "rewards/margins": 35.27763366699219, "rewards/rejected": -36.04860305786133, "step": 7770 }, { "epoch": 33.97379912663755, "grad_norm": 1.0322840930301714e-06, "learning_rate": 4.174524985118411e-06, "logits/chosen": -1.319282054901123, "logits/rejected": -1.4507997035980225, "logps/chosen": -364.3254699707031, "logps/rejected": -3997.526611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8087431788444519, "rewards/margins": 36.29662322998047, "rewards/rejected": -37.10536575317383, "step": 7780 }, { "epoch": 34.0174672489083, "grad_norm": 2.4227709263954044e-06, "learning_rate": 4.171693425630854e-06, "logits/chosen": -1.2554786205291748, "logits/rejected": -1.433192491531372, "logps/chosen": -354.80670166015625, "logps/rejected": -4518.404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9121116399765015, "rewards/margins": 41.210533142089844, "rewards/rejected": -42.122642517089844, "step": 7790 }, { "epoch": 34.06113537117904, "grad_norm": 1.8705226781622543e-06, "learning_rate": 4.16885798195467e-06, "logits/chosen": -1.3113257884979248, "logits/rejected": -1.5079411268234253, "logps/chosen": -339.63043212890625, "logps/rejected": -4592.2353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7810433506965637, "rewards/margins": 42.12697219848633, "rewards/rejected": -42.90801239013672, "step": 7800 }, { "epoch": 34.10480349344978, "grad_norm": 3.4493124887197338e-06, "learning_rate": 4.166018660678029e-06, "logits/chosen": -1.2827914953231812, "logits/rejected": -1.4444687366485596, "logps/chosen": -356.8995056152344, "logps/rejected": -3692.758544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8508466482162476, "rewards/margins": 33.48554229736328, "rewards/rejected": -34.33639144897461, "step": 7810 }, { "epoch": 34.148471615720524, "grad_norm": 2.0841485677031756e-06, "learning_rate": 4.163175468398108e-06, "logits/chosen": -1.263437271118164, "logits/rejected": -1.4561338424682617, "logps/chosen": -374.9819030761719, "logps/rejected": -3720.340576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7984741926193237, "rewards/margins": 33.81209182739258, "rewards/rejected": -34.610565185546875, "step": 7820 }, { "epoch": 34.19213973799127, "grad_norm": 4.5477848193319785e-06, "learning_rate": 4.16032841172108e-06, "logits/chosen": -1.2381798028945923, "logits/rejected": -1.4538114070892334, "logps/chosen": -351.0126953125, "logps/rejected": -4120.42529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8728067278862, "rewards/margins": 37.46091842651367, "rewards/rejected": -38.33372116088867, "step": 7830 }, { "epoch": 34.23580786026201, "grad_norm": 8.204137425089234e-05, "learning_rate": 4.157477497262095e-06, "logits/chosen": -1.2343039512634277, "logits/rejected": -1.4122045040130615, "logps/chosen": -385.85906982421875, "logps/rejected": -3572.92431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8313088417053223, "rewards/margins": 32.305137634277344, "rewards/rejected": -33.13644790649414, "step": 7840 }, { "epoch": 34.27947598253275, "grad_norm": 4.276503868454824e-06, "learning_rate": 4.154622731645272e-06, "logits/chosen": -1.290832757949829, "logits/rejected": -1.481961965560913, "logps/chosen": -366.4110412597656, "logps/rejected": -4632.33984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8144643902778625, "rewards/margins": 42.27952575683594, "rewards/rejected": -43.093994140625, "step": 7850 }, { "epoch": 34.3231441048035, "grad_norm": 9.025552980287805e-06, "learning_rate": 4.15176412150367e-06, "logits/chosen": -1.2428648471832275, "logits/rejected": -1.4519894123077393, "logps/chosen": -369.04669189453125, "logps/rejected": -4084.190185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.783187747001648, "rewards/margins": 37.195003509521484, "rewards/rejected": -37.97819900512695, "step": 7860 }, { "epoch": 34.366812227074234, "grad_norm": 9.488913985847851e-06, "learning_rate": 4.148901673479285e-06, "logits/chosen": -1.2578856945037842, "logits/rejected": -1.4205833673477173, "logps/chosen": -347.26788330078125, "logps/rejected": -4288.62255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8104890584945679, "rewards/margins": 39.07762908935547, "rewards/rejected": -39.88811492919922, "step": 7870 }, { "epoch": 34.41048034934498, "grad_norm": 0.00021986364802146352, "learning_rate": 4.146035394223034e-06, "logits/chosen": -1.2532271146774292, "logits/rejected": -1.452984094619751, "logps/chosen": -360.5004577636719, "logps/rejected": -3687.955810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7545972466468811, "rewards/margins": 33.52155303955078, "rewards/rejected": -34.27614974975586, "step": 7880 }, { "epoch": 34.45414847161572, "grad_norm": 1.419368059174851e-06, "learning_rate": 4.14316529039473e-06, "logits/chosen": -1.2847908735275269, "logits/rejected": -1.4299938678741455, "logps/chosen": -359.38726806640625, "logps/rejected": -3979.274658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8164394497871399, "rewards/margins": 36.25011444091797, "rewards/rejected": -37.066551208496094, "step": 7890 }, { "epoch": 34.49781659388646, "grad_norm": 2.0941884833386942e-06, "learning_rate": 4.140291368663073e-06, "logits/chosen": -1.2830313444137573, "logits/rejected": -1.4495540857315063, "logps/chosen": -373.39581298828125, "logps/rejected": -3915.74462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9469231367111206, "rewards/margins": 35.384769439697266, "rewards/rejected": -36.331687927246094, "step": 7900 }, { "epoch": 34.54148471615721, "grad_norm": 1.9770737814288263e-06, "learning_rate": 4.137413635705639e-06, "logits/chosen": -1.2805912494659424, "logits/rejected": -1.477117896080017, "logps/chosen": -354.39227294921875, "logps/rejected": -4345.2470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9356716871261597, "rewards/margins": 39.59673309326172, "rewards/rejected": -40.53240966796875, "step": 7910 }, { "epoch": 34.585152838427945, "grad_norm": 2.3759892144047537e-06, "learning_rate": 4.134532098208852e-06, "logits/chosen": -1.2573152780532837, "logits/rejected": -1.4514788389205933, "logps/chosen": -394.76123046875, "logps/rejected": -3708.734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7607026100158691, "rewards/margins": 33.73967361450195, "rewards/rejected": -34.50037384033203, "step": 7920 }, { "epoch": 34.62882096069869, "grad_norm": 2.4308889506837638e-05, "learning_rate": 4.131646762867984e-06, "logits/chosen": -1.2707178592681885, "logits/rejected": -1.4722950458526611, "logps/chosen": -357.1829528808594, "logps/rejected": -4148.14306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7245969772338867, "rewards/margins": 37.938270568847656, "rewards/rejected": -38.662872314453125, "step": 7930 }, { "epoch": 34.672489082969435, "grad_norm": 8.440823018886822e-05, "learning_rate": 4.128757636387123e-06, "logits/chosen": -1.2849804162979126, "logits/rejected": -1.4860570430755615, "logps/chosen": -384.27093505859375, "logps/rejected": -4063.54296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.916800320148468, "rewards/margins": 36.90120315551758, "rewards/rejected": -37.8180046081543, "step": 7940 }, { "epoch": 34.71615720524017, "grad_norm": 1.2543942703617618e-06, "learning_rate": 4.125864725479173e-06, "logits/chosen": -1.292121171951294, "logits/rejected": -1.4840447902679443, "logps/chosen": -353.4554443359375, "logps/rejected": -4112.63916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7546106576919556, "rewards/margins": 37.55231475830078, "rewards/rejected": -38.306922912597656, "step": 7950 }, { "epoch": 34.75982532751092, "grad_norm": 1.993539533194436e-06, "learning_rate": 4.122968036865827e-06, "logits/chosen": -1.2924474477767944, "logits/rejected": -1.456191062927246, "logps/chosen": -350.36614990234375, "logps/rejected": -4488.60400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9632463455200195, "rewards/margins": 40.81956481933594, "rewards/rejected": -41.782806396484375, "step": 7960 }, { "epoch": 34.80349344978166, "grad_norm": 4.6834119798176505e-05, "learning_rate": 4.120067577277556e-06, "logits/chosen": -1.2570297718048096, "logits/rejected": -1.442458152770996, "logps/chosen": -373.92333984375, "logps/rejected": -4007.459716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9284345507621765, "rewards/margins": 36.28133773803711, "rewards/rejected": -37.20977020263672, "step": 7970 }, { "epoch": 34.8471615720524, "grad_norm": 2.3140993618944047e-06, "learning_rate": 4.1171633534535934e-06, "logits/chosen": -1.2596558332443237, "logits/rejected": -1.483156681060791, "logps/chosen": -339.53839111328125, "logps/rejected": -4554.5927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7306458353996277, "rewards/margins": 41.79889678955078, "rewards/rejected": -42.529537200927734, "step": 7980 }, { "epoch": 34.890829694323145, "grad_norm": 2.681377176309134e-06, "learning_rate": 4.114255372141919e-06, "logits/chosen": -1.2837038040161133, "logits/rejected": -1.4738796949386597, "logps/chosen": -373.79168701171875, "logps/rejected": -3771.08349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9235410690307617, "rewards/margins": 34.18318557739258, "rewards/rejected": -35.106727600097656, "step": 7990 }, { "epoch": 34.93449781659389, "grad_norm": 2.8926194137201216e-05, "learning_rate": 4.111343640099243e-06, "logits/chosen": -1.2614266872406006, "logits/rejected": -1.4271290302276611, "logps/chosen": -343.24481201171875, "logps/rejected": -3933.530029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8164898157119751, "rewards/margins": 35.81548309326172, "rewards/rejected": -36.6319694519043, "step": 8000 }, { "epoch": 34.97816593886463, "grad_norm": 3.4106947750976845e-06, "learning_rate": 4.108428164090992e-06, "logits/chosen": -1.25531005859375, "logits/rejected": -1.486797571182251, "logps/chosen": -375.9328308105469, "logps/rejected": -4134.6298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8012577295303345, "rewards/margins": 37.807655334472656, "rewards/rejected": -38.608909606933594, "step": 8010 }, { "epoch": 35.02183406113537, "grad_norm": 8.94421052266161e-07, "learning_rate": 4.1055089508912874e-06, "logits/chosen": -1.205916404724121, "logits/rejected": -1.4006872177124023, "logps/chosen": -375.0865173339844, "logps/rejected": -3988.31689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9585872888565063, "rewards/margins": 36.198970794677734, "rewards/rejected": -37.157562255859375, "step": 8020 }, { "epoch": 35.06550218340611, "grad_norm": 1.58779658277359e-06, "learning_rate": 4.102586007282938e-06, "logits/chosen": -1.2351295948028564, "logits/rejected": -1.4398813247680664, "logps/chosen": -363.1708984375, "logps/rejected": -4182.375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9136743545532227, "rewards/margins": 38.125770568847656, "rewards/rejected": -39.03944396972656, "step": 8030 }, { "epoch": 35.109170305676855, "grad_norm": 1.0674722406947742e-05, "learning_rate": 4.099659340057418e-06, "logits/chosen": -1.2434629201889038, "logits/rejected": -1.4664900302886963, "logps/chosen": -364.34918212890625, "logps/rejected": -3830.77001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8164265751838684, "rewards/margins": 34.8953742980957, "rewards/rejected": -35.71179962158203, "step": 8040 }, { "epoch": 35.1528384279476, "grad_norm": 0.0001105829254932205, "learning_rate": 4.096728956014857e-06, "logits/chosen": -1.2731273174285889, "logits/rejected": -1.469321608543396, "logps/chosen": -371.5960998535156, "logps/rejected": -4317.51904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8098027110099792, "rewards/margins": 39.54979705810547, "rewards/rejected": -40.35960006713867, "step": 8050 }, { "epoch": 35.19650655021834, "grad_norm": 3.676623338840386e-06, "learning_rate": 4.0937948619640145e-06, "logits/chosen": -1.289573073387146, "logits/rejected": -1.4733197689056396, "logps/chosen": -380.588623046875, "logps/rejected": -4177.57763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8976618647575378, "rewards/margins": 37.97679901123047, "rewards/rejected": -38.87446212768555, "step": 8060 }, { "epoch": 35.24017467248908, "grad_norm": 2.1318860422318363e-05, "learning_rate": 4.090857064722277e-06, "logits/chosen": -1.2636449337005615, "logits/rejected": -1.4504867792129517, "logps/chosen": -368.8955993652344, "logps/rejected": -4062.15625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7642645835876465, "rewards/margins": 36.99481201171875, "rewards/rejected": -37.759071350097656, "step": 8070 }, { "epoch": 35.28384279475983, "grad_norm": 1.3692182608694979e-06, "learning_rate": 4.0879155711156294e-06, "logits/chosen": -1.2404592037200928, "logits/rejected": -1.408039927482605, "logps/chosen": -366.6426086425781, "logps/rejected": -3800.624267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7961522936820984, "rewards/margins": 34.58699035644531, "rewards/rejected": -35.38314437866211, "step": 8080 }, { "epoch": 35.327510917030565, "grad_norm": 2.353236024645166e-06, "learning_rate": 4.084970387978649e-06, "logits/chosen": -1.2661757469177246, "logits/rejected": -1.4472781419754028, "logps/chosen": -383.9104919433594, "logps/rejected": -3854.43896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7786067724227905, "rewards/margins": 35.02387237548828, "rewards/rejected": -35.80248260498047, "step": 8090 }, { "epoch": 35.37117903930131, "grad_norm": 2.8391581144359103e-05, "learning_rate": 4.082021522154485e-06, "logits/chosen": -1.2751699686050415, "logits/rejected": -1.4673973321914673, "logps/chosen": -343.1568298339844, "logps/rejected": -4416.3125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7561488747596741, "rewards/margins": 40.427391052246094, "rewards/rejected": -41.18354797363281, "step": 8100 }, { "epoch": 35.414847161572055, "grad_norm": 2.2912186861451793e-06, "learning_rate": 4.079068980494843e-06, "logits/chosen": -1.2952275276184082, "logits/rejected": -1.4531446695327759, "logps/chosen": -372.14349365234375, "logps/rejected": -3759.913330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8922691345214844, "rewards/margins": 34.06416320800781, "rewards/rejected": -34.9564323425293, "step": 8110 }, { "epoch": 35.45851528384279, "grad_norm": 1.953133162558778e-06, "learning_rate": 4.076112769859968e-06, "logits/chosen": -1.259953260421753, "logits/rejected": -1.4520666599273682, "logps/chosen": -366.9216003417969, "logps/rejected": -4085.288330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7262275815010071, "rewards/margins": 37.330169677734375, "rewards/rejected": -38.056400299072266, "step": 8120 }, { "epoch": 35.50218340611354, "grad_norm": 0.0001209290556416558, "learning_rate": 4.073152897118632e-06, "logits/chosen": -1.2531660795211792, "logits/rejected": -1.4731621742248535, "logps/chosen": -363.76336669921875, "logps/rejected": -4223.5185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.899738609790802, "rewards/margins": 38.4730339050293, "rewards/rejected": -39.372772216796875, "step": 8130 }, { "epoch": 35.54585152838428, "grad_norm": 2.1832466251132044e-06, "learning_rate": 4.070189369148117e-06, "logits/chosen": -1.280074954032898, "logits/rejected": -1.45194411277771, "logps/chosen": -345.72271728515625, "logps/rejected": -4048.967529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7918404936790466, "rewards/margins": 36.92405700683594, "rewards/rejected": -37.71589660644531, "step": 8140 }, { "epoch": 35.58951965065502, "grad_norm": 8.87431882942518e-06, "learning_rate": 4.067222192834193e-06, "logits/chosen": -1.274084448814392, "logits/rejected": -1.5277670621871948, "logps/chosen": -372.4093017578125, "logps/rejected": -4261.22509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8330062627792358, "rewards/margins": 39.00176239013672, "rewards/rejected": -39.83476638793945, "step": 8150 }, { "epoch": 35.633187772925766, "grad_norm": 1.2497056721191145e-06, "learning_rate": 4.064251375071111e-06, "logits/chosen": -1.3223257064819336, "logits/rejected": -1.5193936824798584, "logps/chosen": -365.64739990234375, "logps/rejected": -4453.9794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9507187604904175, "rewards/margins": 40.61537551879883, "rewards/rejected": -41.56609344482422, "step": 8160 }, { "epoch": 35.6768558951965, "grad_norm": 1.8566872439142862e-06, "learning_rate": 4.061276922761584e-06, "logits/chosen": -1.2184852361679077, "logits/rejected": -1.4656927585601807, "logps/chosen": -332.93511962890625, "logps/rejected": -4184.84375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8003745079040527, "rewards/margins": 38.185081481933594, "rewards/rejected": -38.98545455932617, "step": 8170 }, { "epoch": 35.72052401746725, "grad_norm": 4.052500435995991e-06, "learning_rate": 4.058298842816765e-06, "logits/chosen": -1.301485300064087, "logits/rejected": -1.5347225666046143, "logps/chosen": -378.15216064453125, "logps/rejected": -4631.4580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8901048898696899, "rewards/margins": 42.3327522277832, "rewards/rejected": -43.22285842895508, "step": 8180 }, { "epoch": 35.76419213973799, "grad_norm": 1.2267096107063052e-06, "learning_rate": 4.05531714215624e-06, "logits/chosen": -1.2725465297698975, "logits/rejected": -1.4488544464111328, "logps/chosen": -364.22784423828125, "logps/rejected": -4287.580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8839645385742188, "rewards/margins": 38.99976348876953, "rewards/rejected": -39.88372802734375, "step": 8190 }, { "epoch": 35.80786026200873, "grad_norm": 4.9644131712675666e-05, "learning_rate": 4.052331827708007e-06, "logits/chosen": -1.2489145994186401, "logits/rejected": -1.4570090770721436, "logps/chosen": -334.21160888671875, "logps/rejected": -4336.80712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7905577421188354, "rewards/margins": 39.69462203979492, "rewards/rejected": -40.48517990112305, "step": 8200 }, { "epoch": 35.851528384279476, "grad_norm": 1.2506288263883963e-06, "learning_rate": 4.0493429064084595e-06, "logits/chosen": -1.291191816329956, "logits/rejected": -1.5343716144561768, "logps/chosen": -367.7167663574219, "logps/rejected": -4188.669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.743448793888092, "rewards/margins": 38.38983154296875, "rewards/rejected": -39.13328170776367, "step": 8210 }, { "epoch": 35.89519650655022, "grad_norm": 4.979756194345233e-05, "learning_rate": 4.046350385202372e-06, "logits/chosen": -1.3197550773620605, "logits/rejected": -1.4915510416030884, "logps/chosen": -368.2541809082031, "logps/rejected": -4188.7021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8044946789741516, "rewards/margins": 38.14137649536133, "rewards/rejected": -38.94586944580078, "step": 8220 }, { "epoch": 35.93886462882096, "grad_norm": 1.867194579845303e-06, "learning_rate": 4.043354271042884e-06, "logits/chosen": -1.2852863073349, "logits/rejected": -1.5241138935089111, "logps/chosen": -346.3177185058594, "logps/rejected": -4519.8720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8104537725448608, "rewards/margins": 41.40593338012695, "rewards/rejected": -42.216392517089844, "step": 8230 }, { "epoch": 35.9825327510917, "grad_norm": 3.1852614275826564e-06, "learning_rate": 4.040354570891482e-06, "logits/chosen": -1.2768213748931885, "logits/rejected": -1.4855470657348633, "logps/chosen": -362.0581359863281, "logps/rejected": -4264.5126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9877797961235046, "rewards/margins": 38.751060485839844, "rewards/rejected": -39.73883819580078, "step": 8240 }, { "epoch": 36.02620087336245, "grad_norm": 1.2834651654612012e-06, "learning_rate": 4.037351291717987e-06, "logits/chosen": -1.3059508800506592, "logits/rejected": -1.5228240489959717, "logps/chosen": -386.0630798339844, "logps/rejected": -4195.634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9298911094665527, "rewards/margins": 38.1965446472168, "rewards/rejected": -39.126441955566406, "step": 8250 }, { "epoch": 36.069868995633186, "grad_norm": 6.809335056285464e-05, "learning_rate": 4.034344440500533e-06, "logits/chosen": -1.261996865272522, "logits/rejected": -1.4884618520736694, "logps/chosen": -358.3743896484375, "logps/rejected": -4439.9306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.876456618309021, "rewards/margins": 40.64379119873047, "rewards/rejected": -41.52024841308594, "step": 8260 }, { "epoch": 36.11353711790393, "grad_norm": 3.845102642302632e-06, "learning_rate": 4.0313340242255535e-06, "logits/chosen": -1.2770905494689941, "logits/rejected": -1.4864108562469482, "logps/chosen": -361.0494079589844, "logps/rejected": -4175.3076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8074965476989746, "rewards/margins": 38.08392333984375, "rewards/rejected": -38.89141845703125, "step": 8270 }, { "epoch": 36.157205240174676, "grad_norm": 1.5117835666990807e-05, "learning_rate": 4.02832004988777e-06, "logits/chosen": -1.19482421875, "logits/rejected": -1.431931495666504, "logps/chosen": -356.92205810546875, "logps/rejected": -3907.173095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8605692982673645, "rewards/margins": 35.585933685302734, "rewards/rejected": -36.446502685546875, "step": 8280 }, { "epoch": 36.200873362445414, "grad_norm": 2.7730451221512263e-06, "learning_rate": 4.025302524490167e-06, "logits/chosen": -1.2386246919631958, "logits/rejected": -1.452494502067566, "logps/chosen": -381.3476867675781, "logps/rejected": -3794.58447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8285929560661316, "rewards/margins": 34.59008026123047, "rewards/rejected": -35.418678283691406, "step": 8290 }, { "epoch": 36.24454148471616, "grad_norm": 5.017604354634339e-05, "learning_rate": 4.0222814550439786e-06, "logits/chosen": -1.2447218894958496, "logits/rejected": -1.4836983680725098, "logps/chosen": -413.9576110839844, "logps/rejected": -3616.723388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8967299461364746, "rewards/margins": 32.823387145996094, "rewards/rejected": -33.720115661621094, "step": 8300 }, { "epoch": 36.2882096069869, "grad_norm": 1.4580723963017556e-06, "learning_rate": 4.019256848568678e-06, "logits/chosen": -1.2627780437469482, "logits/rejected": -1.4541466236114502, "logps/chosen": -342.7860107421875, "logps/rejected": -3792.457763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8432623147964478, "rewards/margins": 34.508750915527344, "rewards/rejected": -35.35201644897461, "step": 8310 }, { "epoch": 36.33187772925764, "grad_norm": 5.8413730631148604e-05, "learning_rate": 4.0162287120919545e-06, "logits/chosen": -1.1899281740188599, "logits/rejected": -1.4658597707748413, "logps/chosen": -375.6962890625, "logps/rejected": -3683.526123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8303133845329285, "rewards/margins": 33.526573181152344, "rewards/rejected": -34.35688400268555, "step": 8320 }, { "epoch": 36.375545851528386, "grad_norm": 1.300577312026599e-06, "learning_rate": 4.013197052649699e-06, "logits/chosen": -1.3058115243911743, "logits/rejected": -1.4893893003463745, "logps/chosen": -376.71710205078125, "logps/rejected": -3907.63525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8828374147415161, "rewards/margins": 35.46186065673828, "rewards/rejected": -36.344696044921875, "step": 8330 }, { "epoch": 36.419213973799124, "grad_norm": 1.4034530726964178e-05, "learning_rate": 4.010161877285989e-06, "logits/chosen": -1.328046202659607, "logits/rejected": -1.5178980827331543, "logps/chosen": -377.2076110839844, "logps/rejected": -3945.94189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8607738614082336, "rewards/margins": 35.91310119628906, "rewards/rejected": -36.77387237548828, "step": 8340 }, { "epoch": 36.46288209606987, "grad_norm": 9.257197444859085e-06, "learning_rate": 4.007123193053069e-06, "logits/chosen": -1.2991983890533447, "logits/rejected": -1.4766249656677246, "logps/chosen": -366.9497375488281, "logps/rejected": -4212.14794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9412742853164673, "rewards/margins": 38.303977966308594, "rewards/rejected": -39.24525451660156, "step": 8350 }, { "epoch": 36.506550218340614, "grad_norm": 7.735155352109147e-06, "learning_rate": 4.00408100701134e-06, "logits/chosen": -1.2711068391799927, "logits/rejected": -1.4645365476608276, "logps/chosen": -376.45098876953125, "logps/rejected": -3865.23291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8541182279586792, "rewards/margins": 35.17475509643555, "rewards/rejected": -36.028873443603516, "step": 8360 }, { "epoch": 36.55021834061135, "grad_norm": 2.2010055068834205e-05, "learning_rate": 4.001035326229337e-06, "logits/chosen": -1.2497336864471436, "logits/rejected": -1.452214002609253, "logps/chosen": -364.1743469238281, "logps/rejected": -3892.98876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9148198962211609, "rewards/margins": 35.39236068725586, "rewards/rejected": -36.30717849731445, "step": 8370 }, { "epoch": 36.5938864628821, "grad_norm": 1.430247002540311e-05, "learning_rate": 3.997986157783716e-06, "logits/chosen": -1.224894642829895, "logits/rejected": -1.4743419885635376, "logps/chosen": -375.1416015625, "logps/rejected": -3986.40625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9187698364257812, "rewards/margins": 36.27477264404297, "rewards/rejected": -37.19354248046875, "step": 8380 }, { "epoch": 36.63755458515284, "grad_norm": 1.920115452707829e-06, "learning_rate": 3.994933508759233e-06, "logits/chosen": -1.2990379333496094, "logits/rejected": -1.4403307437896729, "logps/chosen": -373.45904541015625, "logps/rejected": -3851.64697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8696298599243164, "rewards/margins": 34.863197326660156, "rewards/rejected": -35.732826232910156, "step": 8390 }, { "epoch": 36.68122270742358, "grad_norm": 1.0573376588961174e-05, "learning_rate": 3.9918773862487395e-06, "logits/chosen": -1.2444075345993042, "logits/rejected": -1.4741010665893555, "logps/chosen": -382.08013916015625, "logps/rejected": -4277.72412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9270259737968445, "rewards/margins": 39.00202178955078, "rewards/rejected": -39.92904281616211, "step": 8400 }, { "epoch": 36.724890829694324, "grad_norm": 2.4443600397872352e-05, "learning_rate": 3.988817797353149e-06, "logits/chosen": -1.2405115365982056, "logits/rejected": -1.4574792385101318, "logps/chosen": -378.0492248535156, "logps/rejected": -4176.64306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8636399507522583, "rewards/margins": 38.023494720458984, "rewards/rejected": -38.88713455200195, "step": 8410 }, { "epoch": 36.76855895196506, "grad_norm": 2.7626604522256703e-06, "learning_rate": 3.9857547491814346e-06, "logits/chosen": -1.2496955394744873, "logits/rejected": -1.475364089012146, "logps/chosen": -364.7618713378906, "logps/rejected": -4037.78466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8029076457023621, "rewards/margins": 36.850460052490234, "rewards/rejected": -37.65337371826172, "step": 8420 }, { "epoch": 36.81222707423581, "grad_norm": 9.749950352404266e-07, "learning_rate": 3.982688248850604e-06, "logits/chosen": -1.2350388765335083, "logits/rejected": -1.4774335622787476, "logps/chosen": -371.94830322265625, "logps/rejected": -3858.34912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9182599782943726, "rewards/margins": 35.0130729675293, "rewards/rejected": -35.931339263916016, "step": 8430 }, { "epoch": 36.85589519650655, "grad_norm": 1.6134477181224886e-06, "learning_rate": 3.979618303485687e-06, "logits/chosen": -1.2864190340042114, "logits/rejected": -1.4858605861663818, "logps/chosen": -358.26531982421875, "logps/rejected": -4463.33740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9473125338554382, "rewards/margins": 40.739227294921875, "rewards/rejected": -41.68654251098633, "step": 8440 }, { "epoch": 36.89956331877729, "grad_norm": 6.324227175023377e-05, "learning_rate": 3.976544920219719e-06, "logits/chosen": -1.3171360492706299, "logits/rejected": -1.520315408706665, "logps/chosen": -378.01324462890625, "logps/rejected": -4375.93212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9804149866104126, "rewards/margins": 39.85712432861328, "rewards/rejected": -40.8375358581543, "step": 8450 }, { "epoch": 36.943231441048034, "grad_norm": 6.957755123901619e-06, "learning_rate": 3.973468106193721e-06, "logits/chosen": -1.2447974681854248, "logits/rejected": -1.5325813293457031, "logps/chosen": -352.47821044921875, "logps/rejected": -4376.66357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9180800318717957, "rewards/margins": 40.035484313964844, "rewards/rejected": -40.95356750488281, "step": 8460 }, { "epoch": 36.98689956331878, "grad_norm": 2.4690381929553294e-05, "learning_rate": 3.9703878685566884e-06, "logits/chosen": -1.398642659187317, "logits/rejected": -1.6071094274520874, "logps/chosen": -368.21051025390625, "logps/rejected": -4836.3037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9465948939323425, "rewards/margins": 44.14708709716797, "rewards/rejected": -45.09368133544922, "step": 8470 }, { "epoch": 37.03056768558952, "grad_norm": 1.1458770651612732e-05, "learning_rate": 3.9673042144655695e-06, "logits/chosen": -1.321022629737854, "logits/rejected": -1.4912296533584595, "logps/chosen": -351.0570373535156, "logps/rejected": -4085.235595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.87464439868927, "rewards/margins": 37.115272521972656, "rewards/rejected": -37.98991775512695, "step": 8480 }, { "epoch": 37.07423580786026, "grad_norm": 7.594541240260953e-07, "learning_rate": 3.9642171510852515e-06, "logits/chosen": -1.263014554977417, "logits/rejected": -1.5429760217666626, "logps/chosen": -363.82708740234375, "logps/rejected": -4462.46826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9278360605239868, "rewards/margins": 40.776031494140625, "rewards/rejected": -41.70386505126953, "step": 8490 }, { "epoch": 37.11790393013101, "grad_norm": 3.2622867909326055e-06, "learning_rate": 3.961126685588541e-06, "logits/chosen": -1.3128348588943481, "logits/rejected": -1.5292164087295532, "logps/chosen": -334.3698425292969, "logps/rejected": -4697.0830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8679884672164917, "rewards/margins": 43.079708099365234, "rewards/rejected": -43.94770050048828, "step": 8500 }, { "epoch": 37.161572052401745, "grad_norm": 8.810947397466707e-07, "learning_rate": 3.958032825156151e-06, "logits/chosen": -1.2548977136611938, "logits/rejected": -1.5069243907928467, "logps/chosen": -377.1316833496094, "logps/rejected": -3787.854736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8033713102340698, "rewards/margins": 34.494232177734375, "rewards/rejected": -35.297607421875, "step": 8510 }, { "epoch": 37.20524017467249, "grad_norm": 2.641274263174291e-05, "learning_rate": 3.954935576976686e-06, "logits/chosen": -1.2951362133026123, "logits/rejected": -1.5279395580291748, "logps/chosen": -371.1959228515625, "logps/rejected": -3971.164794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8472470045089722, "rewards/margins": 36.19519805908203, "rewards/rejected": -37.04244613647461, "step": 8520 }, { "epoch": 37.248908296943235, "grad_norm": 1.6590909467354063e-05, "learning_rate": 3.951834948246616e-06, "logits/chosen": -1.2823026180267334, "logits/rejected": -1.506593942642212, "logps/chosen": -400.2193298339844, "logps/rejected": -4292.3076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0212695598602295, "rewards/margins": 39.00067901611328, "rewards/rejected": -40.021942138671875, "step": 8530 }, { "epoch": 37.29257641921397, "grad_norm": 4.066528807119335e-05, "learning_rate": 3.94873094617027e-06, "logits/chosen": -1.2602485418319702, "logits/rejected": -1.4909348487854004, "logps/chosen": -342.1995544433594, "logps/rejected": -4414.2509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8062237501144409, "rewards/margins": 40.350181579589844, "rewards/rejected": -41.156402587890625, "step": 8540 }, { "epoch": 37.33624454148472, "grad_norm": 6.13756817410232e-07, "learning_rate": 3.945623577959811e-06, "logits/chosen": -1.2777163982391357, "logits/rejected": -1.5300451517105103, "logps/chosen": -370.8778076171875, "logps/rejected": -4143.759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9008724093437195, "rewards/margins": 37.8154411315918, "rewards/rejected": -38.716312408447266, "step": 8550 }, { "epoch": 37.379912663755455, "grad_norm": 2.289223241012274e-06, "learning_rate": 3.9425128508352285e-06, "logits/chosen": -1.2403351068496704, "logits/rejected": -1.4969021081924438, "logps/chosen": -356.4953918457031, "logps/rejected": -4206.9208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8501344919204712, "rewards/margins": 38.49115753173828, "rewards/rejected": -39.34128952026367, "step": 8560 }, { "epoch": 37.4235807860262, "grad_norm": 1.7323265729684714e-06, "learning_rate": 3.9393987720243125e-06, "logits/chosen": -1.302018404006958, "logits/rejected": -1.5231168270111084, "logps/chosen": -372.74688720703125, "logps/rejected": -4394.33447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9877273440361023, "rewards/margins": 40.01799774169922, "rewards/rejected": -41.0057258605957, "step": 8570 }, { "epoch": 37.467248908296945, "grad_norm": 7.707659300669289e-07, "learning_rate": 3.936281348762641e-06, "logits/chosen": -1.3374603986740112, "logits/rejected": -1.5808278322219849, "logps/chosen": -374.2947998046875, "logps/rejected": -4430.23583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8598998785018921, "rewards/margins": 40.48351287841797, "rewards/rejected": -41.34341049194336, "step": 8580 }, { "epoch": 37.51091703056768, "grad_norm": 0.0003548046660585427, "learning_rate": 3.933160588293564e-06, "logits/chosen": -1.305577039718628, "logits/rejected": -1.5214755535125732, "logps/chosen": -363.62078857421875, "logps/rejected": -4144.85791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9322733879089355, "rewards/margins": 37.69120407104492, "rewards/rejected": -38.623470306396484, "step": 8590 }, { "epoch": 37.55458515283843, "grad_norm": 2.8263036379879194e-06, "learning_rate": 3.930036497868187e-06, "logits/chosen": -1.2695825099945068, "logits/rejected": -1.4696696996688843, "logps/chosen": -391.30999755859375, "logps/rejected": -3527.353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8936063051223755, "rewards/margins": 31.836780548095703, "rewards/rejected": -32.730384826660156, "step": 8600 }, { "epoch": 37.59825327510917, "grad_norm": 9.419503564635353e-07, "learning_rate": 3.926909084745348e-06, "logits/chosen": -1.267364263534546, "logits/rejected": -1.5346320867538452, "logps/chosen": -354.5287170410156, "logps/rejected": -4552.849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8574118614196777, "rewards/margins": 41.77190017700195, "rewards/rejected": -42.62931442260742, "step": 8610 }, { "epoch": 37.64192139737991, "grad_norm": 1.62190120800596e-06, "learning_rate": 3.92377835619161e-06, "logits/chosen": -1.3194657564163208, "logits/rejected": -1.5265874862670898, "logps/chosen": -362.43585205078125, "logps/rejected": -4220.35693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8938363194465637, "rewards/margins": 38.48442077636719, "rewards/rejected": -39.378257751464844, "step": 8620 }, { "epoch": 37.685589519650655, "grad_norm": 6.978634738472216e-07, "learning_rate": 3.920644319481237e-06, "logits/chosen": -1.2647504806518555, "logits/rejected": -1.488782525062561, "logps/chosen": -355.9391174316406, "logps/rejected": -4142.671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9257093667984009, "rewards/margins": 37.64998245239258, "rewards/rejected": -38.57569122314453, "step": 8630 }, { "epoch": 37.7292576419214, "grad_norm": 1.3328399913098587e-06, "learning_rate": 3.91750698189618e-06, "logits/chosen": -1.3126425743103027, "logits/rejected": -1.5582592487335205, "logps/chosen": -366.73834228515625, "logps/rejected": -4385.4833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9004396200180054, "rewards/margins": 39.990013122558594, "rewards/rejected": -40.89044952392578, "step": 8640 }, { "epoch": 37.77292576419214, "grad_norm": 1.5183425483214133e-06, "learning_rate": 3.91436635072606e-06, "logits/chosen": -1.269518256187439, "logits/rejected": -1.5475237369537354, "logps/chosen": -416.378173828125, "logps/rejected": -3943.85107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8651858568191528, "rewards/margins": 35.93024444580078, "rewards/rejected": -36.79542541503906, "step": 8650 }, { "epoch": 37.81659388646288, "grad_norm": 8.776228036518987e-07, "learning_rate": 3.911222433268151e-06, "logits/chosen": -1.3252003192901611, "logits/rejected": -1.545840859413147, "logps/chosen": -360.0669250488281, "logps/rejected": -4320.3857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9643014073371887, "rewards/margins": 39.39952850341797, "rewards/rejected": -40.36383056640625, "step": 8660 }, { "epoch": 37.86026200873363, "grad_norm": 2.8053671233988616e-06, "learning_rate": 3.908075236827359e-06, "logits/chosen": -1.3269729614257812, "logits/rejected": -1.5506505966186523, "logps/chosen": -390.158935546875, "logps/rejected": -3743.85693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9027523994445801, "rewards/margins": 33.975486755371094, "rewards/rejected": -34.878238677978516, "step": 8670 }, { "epoch": 37.903930131004365, "grad_norm": 0.0004218435423349042, "learning_rate": 3.904924768716216e-06, "logits/chosen": -1.2615175247192383, "logits/rejected": -1.5205676555633545, "logps/chosen": -365.1324768066406, "logps/rejected": -4106.765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8933040499687195, "rewards/margins": 37.41657257080078, "rewards/rejected": -38.309871673583984, "step": 8680 }, { "epoch": 37.94759825327511, "grad_norm": 7.072473047095029e-07, "learning_rate": 3.9017710362548485e-06, "logits/chosen": -1.2653504610061646, "logits/rejected": -1.4756276607513428, "logps/chosen": -407.92413330078125, "logps/rejected": -3484.876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9070475697517395, "rewards/margins": 31.579833984375, "rewards/rejected": -32.48688507080078, "step": 8690 }, { "epoch": 37.99126637554585, "grad_norm": 3.808723226001289e-05, "learning_rate": 3.8986140467709725e-06, "logits/chosen": -1.3297382593154907, "logits/rejected": -1.565420389175415, "logps/chosen": -347.8296813964844, "logps/rejected": -4577.328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.984015166759491, "rewards/margins": 41.73925018310547, "rewards/rejected": -42.7232666015625, "step": 8700 }, { "epoch": 38.03493449781659, "grad_norm": 3.0664523878302125e-06, "learning_rate": 3.895453807599868e-06, "logits/chosen": -1.2983287572860718, "logits/rejected": -1.5491101741790771, "logps/chosen": -343.27117919921875, "logps/rejected": -4365.8701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.939549446105957, "rewards/margins": 39.81465148925781, "rewards/rejected": -40.75419998168945, "step": 8710 }, { "epoch": 38.07860262008734, "grad_norm": 1.0350735780810493e-06, "learning_rate": 3.89229032608437e-06, "logits/chosen": -1.2627127170562744, "logits/rejected": -1.4925731420516968, "logps/chosen": -380.84100341796875, "logps/rejected": -3971.869873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9292028546333313, "rewards/margins": 36.115047454833984, "rewards/rejected": -37.044246673583984, "step": 8720 }, { "epoch": 38.122270742358076, "grad_norm": 1.027944460525276e-06, "learning_rate": 3.889123609574843e-06, "logits/chosen": -1.2860000133514404, "logits/rejected": -1.5092341899871826, "logps/chosen": -352.6324462890625, "logps/rejected": -4173.85595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9909518957138062, "rewards/margins": 37.978126525878906, "rewards/rejected": -38.969078063964844, "step": 8730 }, { "epoch": 38.16593886462882, "grad_norm": 1.5566538187747142e-06, "learning_rate": 3.885953665429171e-06, "logits/chosen": -1.2721836566925049, "logits/rejected": -1.506331205368042, "logps/chosen": -398.68414306640625, "logps/rejected": -3768.78515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.028712511062622, "rewards/margins": 34.07883071899414, "rewards/rejected": -35.1075439453125, "step": 8740 }, { "epoch": 38.209606986899566, "grad_norm": 4.986004978122997e-05, "learning_rate": 3.882780501012735e-06, "logits/chosen": -1.3364943265914917, "logits/rejected": -1.5714950561523438, "logps/chosen": -376.7752380371094, "logps/rejected": -4691.4580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1380622386932373, "rewards/margins": 42.70343780517578, "rewards/rejected": -43.841495513916016, "step": 8750 }, { "epoch": 38.2532751091703, "grad_norm": 8.372438673803884e-07, "learning_rate": 3.879604123698401e-06, "logits/chosen": -1.3135626316070557, "logits/rejected": -1.55580472946167, "logps/chosen": -386.78094482421875, "logps/rejected": -4130.44873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0205503702163696, "rewards/margins": 37.553401947021484, "rewards/rejected": -38.57395553588867, "step": 8760 }, { "epoch": 38.29694323144105, "grad_norm": 1.3791561856269928e-05, "learning_rate": 3.8764245408664964e-06, "logits/chosen": -1.3296626806259155, "logits/rejected": -1.5713169574737549, "logps/chosen": -345.3514709472656, "logps/rejected": -4548.26953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9728398323059082, "rewards/margins": 41.49724197387695, "rewards/rejected": -42.4700813293457, "step": 8770 }, { "epoch": 38.34061135371179, "grad_norm": 5.1967014032536735e-06, "learning_rate": 3.8732417599048014e-06, "logits/chosen": -1.2762205600738525, "logits/rejected": -1.5727962255477905, "logps/chosen": -411.03192138671875, "logps/rejected": -4136.67724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9585390090942383, "rewards/margins": 37.81079864501953, "rewards/rejected": -38.76933670043945, "step": 8780 }, { "epoch": 38.38427947598253, "grad_norm": 1.0883809323610172e-06, "learning_rate": 3.870055788208522e-06, "logits/chosen": -1.2851665019989014, "logits/rejected": -1.5188406705856323, "logps/chosen": -358.5579528808594, "logps/rejected": -4171.0810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9552133679389954, "rewards/margins": 38.07263946533203, "rewards/rejected": -39.027854919433594, "step": 8790 }, { "epoch": 38.427947598253276, "grad_norm": 2.4675475336192916e-05, "learning_rate": 3.866866633180284e-06, "logits/chosen": -1.2963489294052124, "logits/rejected": -1.5515542030334473, "logps/chosen": -364.52569580078125, "logps/rejected": -4298.46435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8826769590377808, "rewards/margins": 39.1487922668457, "rewards/rejected": -40.031471252441406, "step": 8800 }, { "epoch": 38.47161572052402, "grad_norm": 1.055047909503872e-06, "learning_rate": 3.863674302230103e-06, "logits/chosen": -1.2640933990478516, "logits/rejected": -1.4895274639129639, "logps/chosen": -365.31610107421875, "logps/rejected": -4071.52978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9108362197875977, "rewards/margins": 37.05625534057617, "rewards/rejected": -37.96709442138672, "step": 8810 }, { "epoch": 38.51528384279476, "grad_norm": 1.0577224782703687e-06, "learning_rate": 3.8604788027753796e-06, "logits/chosen": -1.2709076404571533, "logits/rejected": -1.522012710571289, "logps/chosen": -379.449462890625, "logps/rejected": -4099.84765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8359874486923218, "rewards/margins": 37.34709167480469, "rewards/rejected": -38.183074951171875, "step": 8820 }, { "epoch": 38.5589519650655, "grad_norm": 2.4271520057728386e-05, "learning_rate": 3.857280142240871e-06, "logits/chosen": -1.301532506942749, "logits/rejected": -1.5289556980133057, "logps/chosen": -370.88262939453125, "logps/rejected": -4082.718017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8449046015739441, "rewards/margins": 37.213348388671875, "rewards/rejected": -38.058250427246094, "step": 8830 }, { "epoch": 38.60262008733624, "grad_norm": 5.525741707667018e-06, "learning_rate": 3.854078328058684e-06, "logits/chosen": -1.287628412246704, "logits/rejected": -1.5352412462234497, "logps/chosen": -375.08770751953125, "logps/rejected": -4378.07421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9756516218185425, "rewards/margins": 39.994712829589844, "rewards/rejected": -40.970367431640625, "step": 8840 }, { "epoch": 38.646288209606986, "grad_norm": 1.6024437979869489e-06, "learning_rate": 3.850873367668252e-06, "logits/chosen": -1.3263193368911743, "logits/rejected": -1.5758168697357178, "logps/chosen": -355.0450439453125, "logps/rejected": -4549.40771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8428980708122253, "rewards/margins": 41.52554702758789, "rewards/rejected": -42.368446350097656, "step": 8850 }, { "epoch": 38.68995633187773, "grad_norm": 3.3391035126656364e-05, "learning_rate": 3.847665268516314e-06, "logits/chosen": -1.308389663696289, "logits/rejected": -1.5709537267684937, "logps/chosen": -376.54656982421875, "logps/rejected": -4211.47412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9804501533508301, "rewards/margins": 38.33263397216797, "rewards/rejected": -39.313087463378906, "step": 8860 }, { "epoch": 38.73362445414847, "grad_norm": 3.3797590858143535e-05, "learning_rate": 3.84445403805691e-06, "logits/chosen": -1.2351878881454468, "logits/rejected": -1.5961521863937378, "logps/chosen": -393.4942321777344, "logps/rejected": -4280.9287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9171779751777649, "rewards/margins": 39.11894607543945, "rewards/rejected": -40.0361213684082, "step": 8870 }, { "epoch": 38.777292576419214, "grad_norm": 9.746701247065955e-05, "learning_rate": 3.8412396837513485e-06, "logits/chosen": -1.3461339473724365, "logits/rejected": -1.636967420578003, "logps/chosen": -370.76824951171875, "logps/rejected": -4678.26513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9515541195869446, "rewards/margins": 42.779083251953125, "rewards/rejected": -43.73064041137695, "step": 8880 }, { "epoch": 38.82096069868996, "grad_norm": 7.34254901758173e-07, "learning_rate": 3.838022213068198e-06, "logits/chosen": -1.3378393650054932, "logits/rejected": -1.5441609621047974, "logps/chosen": -372.0064392089844, "logps/rejected": -4209.33740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9463123083114624, "rewards/margins": 38.26377487182617, "rewards/rejected": -39.21009063720703, "step": 8890 }, { "epoch": 38.8646288209607, "grad_norm": 1.8280753640498366e-06, "learning_rate": 3.834801633483272e-06, "logits/chosen": -1.3054711818695068, "logits/rejected": -1.5378170013427734, "logps/chosen": -379.9939270019531, "logps/rejected": -4031.48486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9216340184211731, "rewards/margins": 36.673133850097656, "rewards/rejected": -37.59476852416992, "step": 8900 }, { "epoch": 38.90829694323144, "grad_norm": 1.2709170056701178e-06, "learning_rate": 3.831577952479602e-06, "logits/chosen": -1.3023784160614014, "logits/rejected": -1.585871934890747, "logps/chosen": -365.1500244140625, "logps/rejected": -4193.5771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8456674814224243, "rewards/margins": 38.210289001464844, "rewards/rejected": -39.05595397949219, "step": 8910 }, { "epoch": 38.951965065502186, "grad_norm": 1.4195528800699604e-06, "learning_rate": 3.828351177547429e-06, "logits/chosen": -1.3404892683029175, "logits/rejected": -1.5838011503219604, "logps/chosen": -371.6647033691406, "logps/rejected": -4262.3037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.904767632484436, "rewards/margins": 38.94096755981445, "rewards/rejected": -39.845733642578125, "step": 8920 }, { "epoch": 38.995633187772924, "grad_norm": 1.6332639151886975e-06, "learning_rate": 3.825121316184181e-06, "logits/chosen": -1.3539316654205322, "logits/rejected": -1.5789486169815063, "logps/chosen": -386.35260009765625, "logps/rejected": -4327.02685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0464568138122559, "rewards/margins": 39.18194580078125, "rewards/rejected": -40.22840118408203, "step": 8930 }, { "epoch": 39.03930131004367, "grad_norm": 3.95826877559971e-06, "learning_rate": 3.8218883758944605e-06, "logits/chosen": -1.2870447635650635, "logits/rejected": -1.5198304653167725, "logps/chosen": -390.56011962890625, "logps/rejected": -3861.60400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8890445828437805, "rewards/margins": 35.03717803955078, "rewards/rejected": -35.92621994018555, "step": 8940 }, { "epoch": 39.082969432314414, "grad_norm": 1.900883777085347e-06, "learning_rate": 3.818652364190018e-06, "logits/chosen": -1.3307517766952515, "logits/rejected": -1.5789480209350586, "logps/chosen": -346.2577819824219, "logps/rejected": -4581.26416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0449010133743286, "rewards/margins": 41.77682113647461, "rewards/rejected": -42.82172393798828, "step": 8950 }, { "epoch": 39.12663755458515, "grad_norm": 1.567655618696389e-05, "learning_rate": 3.815413288589747e-06, "logits/chosen": -1.236490249633789, "logits/rejected": -1.5435079336166382, "logps/chosen": -363.6553649902344, "logps/rejected": -4357.740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8715760111808777, "rewards/margins": 39.86055374145508, "rewards/rejected": -40.73213195800781, "step": 8960 }, { "epoch": 39.1703056768559, "grad_norm": 3.2230735299664636e-05, "learning_rate": 3.8121711566196555e-06, "logits/chosen": -1.290111780166626, "logits/rejected": -1.5625643730163574, "logps/chosen": -372.43585205078125, "logps/rejected": -4196.45849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9808707237243652, "rewards/margins": 38.275352478027344, "rewards/rejected": -39.25621795654297, "step": 8970 }, { "epoch": 39.213973799126634, "grad_norm": 1.7155058654943806e-06, "learning_rate": 3.808925975812855e-06, "logits/chosen": -1.2670830488204956, "logits/rejected": -1.5813658237457275, "logps/chosen": -393.8588562011719, "logps/rejected": -4114.86376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0175015926361084, "rewards/margins": 37.47612380981445, "rewards/rejected": -38.49362564086914, "step": 8980 }, { "epoch": 39.25764192139738, "grad_norm": 5.995829777794405e-07, "learning_rate": 3.8056777537095404e-06, "logits/chosen": -1.3153183460235596, "logits/rejected": -1.5816380977630615, "logps/chosen": -368.6558532714844, "logps/rejected": -4233.6181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9159805178642273, "rewards/margins": 38.57996368408203, "rewards/rejected": -39.49594497680664, "step": 8990 }, { "epoch": 39.301310043668124, "grad_norm": 4.813128562270042e-07, "learning_rate": 3.8024264978569725e-06, "logits/chosen": -1.2909296751022339, "logits/rejected": -1.5808959007263184, "logps/chosen": -376.54730224609375, "logps/rejected": -4134.01513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9996463656425476, "rewards/margins": 37.598182678222656, "rewards/rejected": -38.59783172607422, "step": 9000 }, { "epoch": 39.34497816593886, "grad_norm": 1.201232735746156e-06, "learning_rate": 3.7991722158094633e-06, "logits/chosen": -1.3102375268936157, "logits/rejected": -1.595304012298584, "logps/chosen": -388.81329345703125, "logps/rejected": -4518.7783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9009407162666321, "rewards/margins": 41.2156867980957, "rewards/rejected": -42.11661911010742, "step": 9010 }, { "epoch": 39.38864628820961, "grad_norm": 2.342923472347187e-05, "learning_rate": 3.795914915128354e-06, "logits/chosen": -1.2928831577301025, "logits/rejected": -1.5376642942428589, "logps/chosen": -375.17413330078125, "logps/rejected": -3883.670654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9585100412368774, "rewards/margins": 35.30365753173828, "rewards/rejected": -36.26216125488281, "step": 9020 }, { "epoch": 39.43231441048035, "grad_norm": 9.954608215379416e-07, "learning_rate": 3.792654603381998e-06, "logits/chosen": -1.3206737041473389, "logits/rejected": -1.5609687566757202, "logps/chosen": -372.9518737792969, "logps/rejected": -4483.7470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9040133357048035, "rewards/margins": 40.97189712524414, "rewards/rejected": -41.87590789794922, "step": 9030 }, { "epoch": 39.47598253275109, "grad_norm": 9.314674164160895e-07, "learning_rate": 3.7893912881457505e-06, "logits/chosen": -1.2649128437042236, "logits/rejected": -1.5502212047576904, "logps/chosen": -353.4836120605469, "logps/rejected": -4228.91552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9618204832077026, "rewards/margins": 38.65827178955078, "rewards/rejected": -39.620094299316406, "step": 9040 }, { "epoch": 39.519650655021834, "grad_norm": 3.1906082038121837e-06, "learning_rate": 3.786124977001939e-06, "logits/chosen": -1.3194923400878906, "logits/rejected": -1.6076900959014893, "logps/chosen": -386.73077392578125, "logps/rejected": -4492.583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9689961671829224, "rewards/margins": 41.01674270629883, "rewards/rejected": -41.985740661621094, "step": 9050 }, { "epoch": 39.56331877729258, "grad_norm": 7.065267669579584e-07, "learning_rate": 3.782855677539857e-06, "logits/chosen": -1.2701666355133057, "logits/rejected": -1.5824873447418213, "logps/chosen": -418.947998046875, "logps/rejected": -3976.94140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.013678789138794, "rewards/margins": 36.201011657714844, "rewards/rejected": -37.214691162109375, "step": 9060 }, { "epoch": 39.60698689956332, "grad_norm": 8.514951775382008e-06, "learning_rate": 3.779583397355738e-06, "logits/chosen": -1.3138396739959717, "logits/rejected": -1.5344326496124268, "logps/chosen": -390.2968444824219, "logps/rejected": -3859.887939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9775360226631165, "rewards/margins": 35.05876541137695, "rewards/rejected": -36.03629684448242, "step": 9070 }, { "epoch": 39.65065502183406, "grad_norm": 5.563437796731094e-05, "learning_rate": 3.776308144052744e-06, "logits/chosen": -1.299718976020813, "logits/rejected": -1.590745449066162, "logps/chosen": -370.12249755859375, "logps/rejected": -4360.19482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9657918810844421, "rewards/margins": 39.755645751953125, "rewards/rejected": -40.721435546875, "step": 9080 }, { "epoch": 39.6943231441048, "grad_norm": 7.289395192904075e-07, "learning_rate": 3.7730299252409425e-06, "logits/chosen": -1.3786529302597046, "logits/rejected": -1.6483328342437744, "logps/chosen": -375.71429443359375, "logps/rejected": -4637.45947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1066172122955322, "rewards/margins": 42.24767303466797, "rewards/rejected": -43.354286193847656, "step": 9090 }, { "epoch": 39.737991266375545, "grad_norm": 1.1805597094893627e-06, "learning_rate": 3.769748748537292e-06, "logits/chosen": -1.3585875034332275, "logits/rejected": -1.6568772792816162, "logps/chosen": -363.0855407714844, "logps/rejected": -4643.22607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9808639287948608, "rewards/margins": 42.52168655395508, "rewards/rejected": -43.5025520324707, "step": 9100 }, { "epoch": 39.78165938864629, "grad_norm": 2.178828409339166e-06, "learning_rate": 3.7664646215656246e-06, "logits/chosen": -1.325937032699585, "logits/rejected": -1.607234001159668, "logps/chosen": -370.36614990234375, "logps/rejected": -4255.4560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0521587133407593, "rewards/margins": 38.6592903137207, "rewards/rejected": -39.711448669433594, "step": 9110 }, { "epoch": 39.82532751091703, "grad_norm": 1.910743469880245e-06, "learning_rate": 3.763177551956628e-06, "logits/chosen": -1.319101095199585, "logits/rejected": -1.568381667137146, "logps/chosen": -382.67022705078125, "logps/rejected": -4284.37451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9642934799194336, "rewards/margins": 38.888160705566406, "rewards/rejected": -39.852455139160156, "step": 9120 }, { "epoch": 39.86899563318777, "grad_norm": 0.00010160184137286352, "learning_rate": 3.759887547347825e-06, "logits/chosen": -1.3148198127746582, "logits/rejected": -1.6021192073822021, "logps/chosen": -355.0747375488281, "logps/rejected": -4576.71435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9754033088684082, "rewards/margins": 41.829307556152344, "rewards/rejected": -42.804710388183594, "step": 9130 }, { "epoch": 39.91266375545852, "grad_norm": 1.562693649424651e-06, "learning_rate": 3.756594615383558e-06, "logits/chosen": -1.3231070041656494, "logits/rejected": -1.5753613710403442, "logps/chosen": -405.4352722167969, "logps/rejected": -3986.524169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0097378492355347, "rewards/margins": 36.22113037109375, "rewards/rejected": -37.23086929321289, "step": 9140 }, { "epoch": 39.956331877729255, "grad_norm": 9.822526229400252e-05, "learning_rate": 3.7532987637149737e-06, "logits/chosen": -1.3632420301437378, "logits/rejected": -1.6234657764434814, "logps/chosen": -381.80120849609375, "logps/rejected": -4463.4345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9549969434738159, "rewards/margins": 40.83608627319336, "rewards/rejected": -41.791080474853516, "step": 9150 }, { "epoch": 40.0, "grad_norm": 1.9799248108761924e-05, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.308091402053833, "logits/rejected": -1.5948963165283203, "logps/chosen": -348.35357666015625, "logps/rejected": -4453.64501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9283116459846497, "rewards/margins": 40.74823760986328, "rewards/rejected": -41.676551818847656, "step": 9160 }, { "epoch": 40.043668122270745, "grad_norm": 2.049780229132679e-05, "learning_rate": 3.7466983319033336e-06, "logits/chosen": -1.2877956628799438, "logits/rejected": -1.546494483947754, "logps/chosen": -392.9704895019531, "logps/rejected": -3890.57275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0581237077713013, "rewards/margins": 35.19221878051758, "rewards/rejected": -36.25033950805664, "step": 9170 }, { "epoch": 40.08733624454148, "grad_norm": 3.7959927942144254e-05, "learning_rate": 3.7433937670964165e-06, "logits/chosen": -1.346783995628357, "logits/rejected": -1.644317865371704, "logps/chosen": -369.94903564453125, "logps/rejected": -4607.47705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0526387691497803, "rewards/margins": 41.99651336669922, "rewards/rejected": -43.04914093017578, "step": 9180 }, { "epoch": 40.13100436681223, "grad_norm": 1.7190742616468647e-05, "learning_rate": 3.740086313257423e-06, "logits/chosen": -1.3864589929580688, "logits/rejected": -1.6973679065704346, "logps/chosen": -363.55535888671875, "logps/rejected": -4829.54296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.096238374710083, "rewards/margins": 44.092918395996094, "rewards/rejected": -45.18915939331055, "step": 9190 }, { "epoch": 40.17467248908297, "grad_norm": 3.151801025284215e-05, "learning_rate": 3.73677597807124e-06, "logits/chosen": -1.3663101196289062, "logits/rejected": -1.6068041324615479, "logps/chosen": -368.82989501953125, "logps/rejected": -4573.36962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9873367547988892, "rewards/margins": 41.66905212402344, "rewards/rejected": -42.65639114379883, "step": 9200 }, { "epoch": 40.21834061135371, "grad_norm": 1.0073831839869386e-06, "learning_rate": 3.733462769229449e-06, "logits/chosen": -1.367348551750183, "logits/rejected": -1.640655279159546, "logps/chosen": -367.8431701660156, "logps/rejected": -4452.48779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0304609537124634, "rewards/margins": 40.5499267578125, "rewards/rejected": -41.58039093017578, "step": 9210 }, { "epoch": 40.262008733624455, "grad_norm": 3.048615237463046e-06, "learning_rate": 3.730146694430308e-06, "logits/chosen": -1.2766882181167603, "logits/rejected": -1.6422916650772095, "logps/chosen": -391.190185546875, "logps/rejected": -4331.01708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0173022747039795, "rewards/margins": 39.56862258911133, "rewards/rejected": -40.58592224121094, "step": 9220 }, { "epoch": 40.30567685589519, "grad_norm": 2.4877839143277362e-05, "learning_rate": 3.726827761378736e-06, "logits/chosen": -1.310097336769104, "logits/rejected": -1.6333191394805908, "logps/chosen": -408.95111083984375, "logps/rejected": -4188.64501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0147953033447266, "rewards/margins": 38.114356994628906, "rewards/rejected": -39.12915802001953, "step": 9230 }, { "epoch": 40.34934497816594, "grad_norm": 9.169428134694984e-07, "learning_rate": 3.7235059777862897e-06, "logits/chosen": -1.3244006633758545, "logits/rejected": -1.609675645828247, "logps/chosen": -376.8053894042969, "logps/rejected": -4068.971435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9597539901733398, "rewards/margins": 36.98383712768555, "rewards/rejected": -37.9435920715332, "step": 9240 }, { "epoch": 40.39301310043668, "grad_norm": 1.0131191131208168e-06, "learning_rate": 3.7201813513711527e-06, "logits/chosen": -1.363229751586914, "logits/rejected": -1.6585853099822998, "logps/chosen": -388.77838134765625, "logps/rejected": -4434.75244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9851490259170532, "rewards/margins": 40.3338737487793, "rewards/rejected": -41.31902313232422, "step": 9250 }, { "epoch": 40.43668122270742, "grad_norm": 2.140689639303902e-05, "learning_rate": 3.716853889858112e-06, "logits/chosen": -1.3647382259368896, "logits/rejected": -1.6405503749847412, "logps/chosen": -388.22821044921875, "logps/rejected": -4334.65625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0464738607406616, "rewards/margins": 39.51506042480469, "rewards/rejected": -40.5615348815918, "step": 9260 }, { "epoch": 40.480349344978166, "grad_norm": 4.5600212723077906e-05, "learning_rate": 3.7135236009785418e-06, "logits/chosen": -1.347532868385315, "logits/rejected": -1.6370713710784912, "logps/chosen": -377.4234619140625, "logps/rejected": -4385.8369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0198001861572266, "rewards/margins": 40.011383056640625, "rewards/rejected": -41.03118133544922, "step": 9270 }, { "epoch": 40.52401746724891, "grad_norm": 1.2426187387548892e-05, "learning_rate": 3.7101904924703865e-06, "logits/chosen": -1.3562287092208862, "logits/rejected": -1.6691890954971313, "logps/chosen": -373.2101135253906, "logps/rejected": -4288.1396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0231094360351562, "rewards/margins": 39.03974151611328, "rewards/rejected": -40.062843322753906, "step": 9280 }, { "epoch": 40.56768558951965, "grad_norm": 3.6586146681023574e-05, "learning_rate": 3.70685457207814e-06, "logits/chosen": -1.3494410514831543, "logits/rejected": -1.6498397588729858, "logps/chosen": -364.51849365234375, "logps/rejected": -4698.755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9119510650634766, "rewards/margins": 42.96495056152344, "rewards/rejected": -43.87689971923828, "step": 9290 }, { "epoch": 40.61135371179039, "grad_norm": 1.0488010851121758e-06, "learning_rate": 3.7035158475528336e-06, "logits/chosen": -1.3082332611083984, "logits/rejected": -1.5963305234909058, "logps/chosen": -375.81890869140625, "logps/rejected": -4234.8857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9943358302116394, "rewards/margins": 38.500892639160156, "rewards/rejected": -39.4952278137207, "step": 9300 }, { "epoch": 40.65502183406114, "grad_norm": 1.1692547729778134e-06, "learning_rate": 3.700174326652011e-06, "logits/chosen": -1.322517991065979, "logits/rejected": -1.6410577297210693, "logps/chosen": -389.6023254394531, "logps/rejected": -4299.630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9837114214897156, "rewards/margins": 39.26232147216797, "rewards/rejected": -40.24603271484375, "step": 9310 }, { "epoch": 40.698689956331876, "grad_norm": 7.498789783832521e-07, "learning_rate": 3.6968300171397133e-06, "logits/chosen": -1.3303494453430176, "logits/rejected": -1.6040623188018799, "logps/chosen": -397.545166015625, "logps/rejected": -4115.07666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0146939754486084, "rewards/margins": 37.35481262207031, "rewards/rejected": -38.36951446533203, "step": 9320 }, { "epoch": 40.74235807860262, "grad_norm": 7.378584395762033e-07, "learning_rate": 3.693482926786461e-06, "logits/chosen": -1.2946218252182007, "logits/rejected": -1.5950735807418823, "logps/chosen": -389.87322998046875, "logps/rejected": -4121.91259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1800825595855713, "rewards/margins": 37.339088439941406, "rewards/rejected": -38.5191764831543, "step": 9330 }, { "epoch": 40.786026200873366, "grad_norm": 7.220417143905248e-07, "learning_rate": 3.690133063369238e-06, "logits/chosen": -1.3175197839736938, "logits/rejected": -1.5949375629425049, "logps/chosen": -377.42901611328125, "logps/rejected": -4158.1025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9556092023849487, "rewards/margins": 37.967079162597656, "rewards/rejected": -38.92268753051758, "step": 9340 }, { "epoch": 40.8296943231441, "grad_norm": 9.310655851636271e-07, "learning_rate": 3.686780434671469e-06, "logits/chosen": -1.2830168008804321, "logits/rejected": -1.5934183597564697, "logps/chosen": -408.8470153808594, "logps/rejected": -3817.72021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.137746810913086, "rewards/margins": 34.50638961791992, "rewards/rejected": -35.64413833618164, "step": 9350 }, { "epoch": 40.87336244541485, "grad_norm": 7.915656898693529e-07, "learning_rate": 3.683425048483005e-06, "logits/chosen": -1.2918862104415894, "logits/rejected": -1.5725178718566895, "logps/chosen": -379.4170837402344, "logps/rejected": -4104.1875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9740253686904907, "rewards/margins": 37.380794525146484, "rewards/rejected": -38.354820251464844, "step": 9360 }, { "epoch": 40.917030567685586, "grad_norm": 2.6018898545287314e-06, "learning_rate": 3.6800669126001025e-06, "logits/chosen": -1.2841399908065796, "logits/rejected": -1.6071150302886963, "logps/chosen": -404.85687255859375, "logps/rejected": -4156.13720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1108410358428955, "rewards/margins": 37.85605239868164, "rewards/rejected": -38.966896057128906, "step": 9370 }, { "epoch": 40.96069868995633, "grad_norm": 1.6057828296539685e-06, "learning_rate": 3.67670603482541e-06, "logits/chosen": -1.2817529439926147, "logits/rejected": -1.5495535135269165, "logps/chosen": -374.9440612792969, "logps/rejected": -3625.626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9936618804931641, "rewards/margins": 32.87001419067383, "rewards/rejected": -33.86367416381836, "step": 9380 }, { "epoch": 41.004366812227076, "grad_norm": 4.300974959520239e-05, "learning_rate": 3.6733424229679443e-06, "logits/chosen": -1.2805943489074707, "logits/rejected": -1.60415518283844, "logps/chosen": -375.54656982421875, "logps/rejected": -4110.77099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0495401620864868, "rewards/margins": 37.39307403564453, "rewards/rejected": -38.4426155090332, "step": 9390 }, { "epoch": 41.048034934497814, "grad_norm": 2.966148727140021e-05, "learning_rate": 3.6699760848430753e-06, "logits/chosen": -1.38267982006073, "logits/rejected": -1.7030357122421265, "logps/chosen": -340.0284729003906, "logps/rejected": -5153.42333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0094029903411865, "rewards/margins": 47.218360900878906, "rewards/rejected": -48.22776794433594, "step": 9400 }, { "epoch": 41.09170305676856, "grad_norm": 8.575376768271156e-07, "learning_rate": 3.6666070282725086e-06, "logits/chosen": -1.315995216369629, "logits/rejected": -1.6730855703353882, "logps/chosen": -388.5723876953125, "logps/rejected": -4297.4619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9974080920219421, "rewards/margins": 39.255516052246094, "rewards/rejected": -40.25292205810547, "step": 9410 }, { "epoch": 41.1353711790393, "grad_norm": 1.4852388178136824e-06, "learning_rate": 3.6632352610842644e-06, "logits/chosen": -1.3689250946044922, "logits/rejected": -1.7120097875595093, "logps/chosen": -343.6511535644531, "logps/rejected": -4891.5068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.994952380657196, "rewards/margins": 44.82396697998047, "rewards/rejected": -45.81892013549805, "step": 9420 }, { "epoch": 41.17903930131004, "grad_norm": 1.624799494916388e-06, "learning_rate": 3.6598607911126614e-06, "logits/chosen": -1.294793725013733, "logits/rejected": -1.6227142810821533, "logps/chosen": -379.1533203125, "logps/rejected": -3798.707763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0019090175628662, "rewards/margins": 34.44221115112305, "rewards/rejected": -35.444122314453125, "step": 9430 }, { "epoch": 41.222707423580786, "grad_norm": 5.6832219971837e-06, "learning_rate": 3.6564836261982995e-06, "logits/chosen": -1.344571590423584, "logits/rejected": -1.6758205890655518, "logps/chosen": -384.2198791503906, "logps/rejected": -4496.39306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0048933029174805, "rewards/margins": 41.04423141479492, "rewards/rejected": -42.04912567138672, "step": 9440 }, { "epoch": 41.26637554585153, "grad_norm": 8.736157676772306e-07, "learning_rate": 3.653103774188039e-06, "logits/chosen": -1.3209130764007568, "logits/rejected": -1.6458994150161743, "logps/chosen": -398.1841735839844, "logps/rejected": -4113.4267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.985622227191925, "rewards/margins": 37.50380325317383, "rewards/rejected": -38.48942947387695, "step": 9450 }, { "epoch": 41.31004366812227, "grad_norm": 2.344119481234071e-06, "learning_rate": 3.6497212429349848e-06, "logits/chosen": -1.3522450923919678, "logits/rejected": -1.6724870204925537, "logps/chosen": -404.16326904296875, "logps/rejected": -4281.1494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.242368221282959, "rewards/margins": 38.81561279296875, "rewards/rejected": -40.05797576904297, "step": 9460 }, { "epoch": 41.353711790393014, "grad_norm": 7.610605822799029e-07, "learning_rate": 3.6463360402984656e-06, "logits/chosen": -1.3361843824386597, "logits/rejected": -1.671330451965332, "logps/chosen": -390.9928283691406, "logps/rejected": -4120.796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1189265251159668, "rewards/margins": 37.55131912231445, "rewards/rejected": -38.67024230957031, "step": 9470 }, { "epoch": 41.39737991266376, "grad_norm": 2.528323472307873e-05, "learning_rate": 3.6429481741440176e-06, "logits/chosen": -1.3114118576049805, "logits/rejected": -1.6378971338272095, "logps/chosen": -413.9805603027344, "logps/rejected": -4068.239013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0521118640899658, "rewards/margins": 36.972694396972656, "rewards/rejected": -38.024803161621094, "step": 9480 }, { "epoch": 41.4410480349345, "grad_norm": 2.4709720091062743e-05, "learning_rate": 3.6395576523433672e-06, "logits/chosen": -1.4078172445297241, "logits/rejected": -1.7284799814224243, "logps/chosen": -366.4231262207031, "logps/rejected": -5007.3291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9632472991943359, "rewards/margins": 46.00090789794922, "rewards/rejected": -46.964149475097656, "step": 9490 }, { "epoch": 41.48471615720524, "grad_norm": 1.189864546633928e-06, "learning_rate": 3.636164482774408e-06, "logits/chosen": -1.3363869190216064, "logits/rejected": -1.6449304819107056, "logps/chosen": -373.4593505859375, "logps/rejected": -4386.07177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9970817565917969, "rewards/margins": 39.992149353027344, "rewards/rejected": -40.98923110961914, "step": 9500 }, { "epoch": 41.52838427947598, "grad_norm": 1.5607362995963916e-05, "learning_rate": 3.63276867332119e-06, "logits/chosen": -1.3613418340682983, "logits/rejected": -1.6661970615386963, "logps/chosen": -393.5556945800781, "logps/rejected": -4296.607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.145193338394165, "rewards/margins": 38.93695068359375, "rewards/rejected": -40.0821418762207, "step": 9510 }, { "epoch": 41.572052401746724, "grad_norm": 1.1394035955397752e-05, "learning_rate": 3.6293702318738937e-06, "logits/chosen": -1.3366461992263794, "logits/rejected": -1.7024898529052734, "logps/chosen": -433.05975341796875, "logps/rejected": -4267.68115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1567285060882568, "rewards/margins": 38.76183319091797, "rewards/rejected": -39.91857147216797, "step": 9520 }, { "epoch": 41.61572052401747, "grad_norm": 7.545912750668724e-07, "learning_rate": 3.6259691663288155e-06, "logits/chosen": -1.3896945714950562, "logits/rejected": -1.7162030935287476, "logps/chosen": -362.23931884765625, "logps/rejected": -4740.39501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0602957010269165, "rewards/margins": 43.3348274230957, "rewards/rejected": -44.39512252807617, "step": 9530 }, { "epoch": 41.65938864628821, "grad_norm": 1.9705187754403325e-05, "learning_rate": 3.6225654845883495e-06, "logits/chosen": -1.4116536378860474, "logits/rejected": -1.705548882484436, "logps/chosen": -342.31866455078125, "logps/rejected": -4952.13232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9969393610954285, "rewards/margins": 45.298179626464844, "rewards/rejected": -46.29512023925781, "step": 9540 }, { "epoch": 41.70305676855895, "grad_norm": 2.55697334481764e-05, "learning_rate": 3.6191591945609696e-06, "logits/chosen": -1.3825212717056274, "logits/rejected": -1.7188020944595337, "logps/chosen": -395.1722717285156, "logps/rejected": -4522.142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1515785455703735, "rewards/margins": 41.19541931152344, "rewards/rejected": -42.34699630737305, "step": 9550 }, { "epoch": 41.7467248908297, "grad_norm": 3.0296573821259747e-05, "learning_rate": 3.6157503041612076e-06, "logits/chosen": -1.398061990737915, "logits/rejected": -1.730523705482483, "logps/chosen": -387.11260986328125, "logps/rejected": -4446.51416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9647854566574097, "rewards/margins": 40.57984924316406, "rewards/rejected": -41.54462814331055, "step": 9560 }, { "epoch": 41.790393013100434, "grad_norm": 1.2814925281484547e-06, "learning_rate": 3.612338821309638e-06, "logits/chosen": -1.336963415145874, "logits/rejected": -1.6244964599609375, "logps/chosen": -389.1455383300781, "logps/rejected": -4110.26904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1122369766235352, "rewards/margins": 37.31626510620117, "rewards/rejected": -38.42850112915039, "step": 9570 }, { "epoch": 41.83406113537118, "grad_norm": 5.17390155694307e-07, "learning_rate": 3.608924753932862e-06, "logits/chosen": -1.3756074905395508, "logits/rejected": -1.6809616088867188, "logps/chosen": -364.39813232421875, "logps/rejected": -4416.2724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.076537847518921, "rewards/margins": 40.22077178955078, "rewards/rejected": -41.297306060791016, "step": 9580 }, { "epoch": 41.877729257641924, "grad_norm": 8.704988024451888e-07, "learning_rate": 3.605508109963481e-06, "logits/chosen": -1.3122217655181885, "logits/rejected": -1.7101329565048218, "logps/chosen": -382.8075866699219, "logps/rejected": -4317.5400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9974538087844849, "rewards/margins": 39.46940231323242, "rewards/rejected": -40.46685791015625, "step": 9590 }, { "epoch": 41.92139737991266, "grad_norm": 1.8020401404102188e-05, "learning_rate": 3.6020888973400868e-06, "logits/chosen": -1.3132412433624268, "logits/rejected": -1.7016029357910156, "logps/chosen": -419.1852111816406, "logps/rejected": -4255.73583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1158881187438965, "rewards/margins": 38.72046661376953, "rewards/rejected": -39.83635711669922, "step": 9600 }, { "epoch": 41.96506550218341, "grad_norm": 1.164313790446808e-06, "learning_rate": 3.5986671240072384e-06, "logits/chosen": -1.317927598953247, "logits/rejected": -1.642204999923706, "logps/chosen": -393.88922119140625, "logps/rejected": -4220.80029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0782744884490967, "rewards/margins": 38.412513732910156, "rewards/rejected": -39.490787506103516, "step": 9610 }, { "epoch": 42.00873362445415, "grad_norm": 5.875014047854254e-07, "learning_rate": 3.595242797915443e-06, "logits/chosen": -1.3573808670043945, "logits/rejected": -1.7188720703125, "logps/chosen": -388.267333984375, "logps/rejected": -4618.0302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1883097887039185, "rewards/margins": 42.08388900756836, "rewards/rejected": -43.272193908691406, "step": 9620 }, { "epoch": 42.05240174672489, "grad_norm": 2.0305330760517435e-06, "learning_rate": 3.5918159270211423e-06, "logits/chosen": -1.2999887466430664, "logits/rejected": -1.6522296667099, "logps/chosen": -392.66131591796875, "logps/rejected": -4116.19580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0831577777862549, "rewards/margins": 37.43233871459961, "rewards/rejected": -38.51549530029297, "step": 9630 }, { "epoch": 42.096069868995635, "grad_norm": 1.500470187886747e-06, "learning_rate": 3.5883865192866886e-06, "logits/chosen": -1.3732726573944092, "logits/rejected": -1.7327327728271484, "logps/chosen": -367.05987548828125, "logps/rejected": -5020.39111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0310848951339722, "rewards/margins": 45.92085266113281, "rewards/rejected": -46.95193862915039, "step": 9640 }, { "epoch": 42.13973799126637, "grad_norm": 0.0007868412373261899, "learning_rate": 3.584954582680328e-06, "logits/chosen": -1.341403603553772, "logits/rejected": -1.699082374572754, "logps/chosen": -381.9629211425781, "logps/rejected": -4411.27880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.003710150718689, "rewards/margins": 40.25922393798828, "rewards/rejected": -41.26293182373047, "step": 9650 }, { "epoch": 42.18340611353712, "grad_norm": 2.0399815422025587e-06, "learning_rate": 3.5815201251761854e-06, "logits/chosen": -1.3105579614639282, "logits/rejected": -1.6366218328475952, "logps/chosen": -395.5363464355469, "logps/rejected": -4102.27587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2344188690185547, "rewards/margins": 37.12092590332031, "rewards/rejected": -38.355342864990234, "step": 9660 }, { "epoch": 42.22707423580786, "grad_norm": 1.155343485579417e-06, "learning_rate": 3.578083154754241e-06, "logits/chosen": -1.3653831481933594, "logits/rejected": -1.7198009490966797, "logps/chosen": -413.01458740234375, "logps/rejected": -4392.9150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0841282606124878, "rewards/margins": 40.03726577758789, "rewards/rejected": -41.121395111083984, "step": 9670 }, { "epoch": 42.2707423580786, "grad_norm": 8.864513250523484e-06, "learning_rate": 3.5746436794003126e-06, "logits/chosen": -1.336261510848999, "logits/rejected": -1.7072563171386719, "logps/chosen": -404.1436767578125, "logps/rejected": -4123.3720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0847523212432861, "rewards/margins": 37.49838638305664, "rewards/rejected": -38.58313751220703, "step": 9680 }, { "epoch": 42.314410480349345, "grad_norm": 4.465687626547072e-06, "learning_rate": 3.5712017071060402e-06, "logits/chosen": -1.3405824899673462, "logits/rejected": -1.6441398859024048, "logps/chosen": -402.46038818359375, "logps/rejected": -4081.43505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.015092372894287, "rewards/margins": 36.968849182128906, "rewards/rejected": -37.98394012451172, "step": 9690 }, { "epoch": 42.35807860262009, "grad_norm": 1.9098748913958842e-05, "learning_rate": 3.5677572458688654e-06, "logits/chosen": -1.3944706916809082, "logits/rejected": -1.7504844665527344, "logps/chosen": -371.8915100097656, "logps/rejected": -4638.8916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.160615086555481, "rewards/margins": 42.20024490356445, "rewards/rejected": -43.36085510253906, "step": 9700 }, { "epoch": 42.40174672489083, "grad_norm": 2.032977388949193e-06, "learning_rate": 3.5643103036920116e-06, "logits/chosen": -1.3630824089050293, "logits/rejected": -1.7028506994247437, "logps/chosen": -385.071533203125, "logps/rejected": -4309.3720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1147198677062988, "rewards/margins": 39.1944465637207, "rewards/rejected": -40.30916976928711, "step": 9710 }, { "epoch": 42.44541484716157, "grad_norm": 9.566788122987791e-07, "learning_rate": 3.5608608885844685e-06, "logits/chosen": -1.3659809827804565, "logits/rejected": -1.7487550973892212, "logps/chosen": -380.18157958984375, "logps/rejected": -4770.0517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1642860174179077, "rewards/margins": 43.46075439453125, "rewards/rejected": -44.62504196166992, "step": 9720 }, { "epoch": 42.48908296943232, "grad_norm": 2.2524028335453694e-05, "learning_rate": 3.5574090085609697e-06, "logits/chosen": -1.3858458995819092, "logits/rejected": -1.721029281616211, "logps/chosen": -384.6856994628906, "logps/rejected": -4528.953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.046578049659729, "rewards/margins": 41.24367141723633, "rewards/rejected": -42.290245056152344, "step": 9730 }, { "epoch": 42.532751091703055, "grad_norm": 4.848097875925506e-07, "learning_rate": 3.553954671641977e-06, "logits/chosen": -1.2812707424163818, "logits/rejected": -1.5786880254745483, "logps/chosen": -388.11578369140625, "logps/rejected": -3920.97265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.042118787765503, "rewards/margins": 35.59456253051758, "rewards/rejected": -36.636680603027344, "step": 9740 }, { "epoch": 42.5764192139738, "grad_norm": 1.7054606954960342e-06, "learning_rate": 3.5504978858536605e-06, "logits/chosen": -1.3445061445236206, "logits/rejected": -1.6400690078735352, "logps/chosen": -386.8675231933594, "logps/rejected": -3886.678955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0679134130477905, "rewards/margins": 35.232208251953125, "rewards/rejected": -36.30012130737305, "step": 9750 }, { "epoch": 42.620087336244545, "grad_norm": 2.3020559855150854e-07, "learning_rate": 3.547038659227881e-06, "logits/chosen": -1.347333550453186, "logits/rejected": -1.702291488647461, "logps/chosen": -422.40509033203125, "logps/rejected": -4160.51220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2350947856903076, "rewards/margins": 37.71281051635742, "rewards/rejected": -38.947898864746094, "step": 9760 }, { "epoch": 42.66375545851528, "grad_norm": 2.1202104821992582e-05, "learning_rate": 3.5435769998021698e-06, "logits/chosen": -1.289858102798462, "logits/rejected": -1.6328601837158203, "logps/chosen": -392.7647399902344, "logps/rejected": -3864.47705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0662152767181396, "rewards/margins": 35.05681610107422, "rewards/rejected": -36.1230354309082, "step": 9770 }, { "epoch": 42.70742358078603, "grad_norm": 2.4755999037630312e-06, "learning_rate": 3.5401129156197106e-06, "logits/chosen": -1.3601348400115967, "logits/rejected": -1.6638247966766357, "logps/chosen": -384.0453796386719, "logps/rejected": -4030.60986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.069248080253601, "rewards/margins": 36.55609130859375, "rewards/rejected": -37.62533950805664, "step": 9780 }, { "epoch": 42.751091703056765, "grad_norm": 8.080112846234131e-05, "learning_rate": 3.5366464147293226e-06, "logits/chosen": -1.355118989944458, "logits/rejected": -1.6913963556289673, "logps/chosen": -375.6574401855469, "logps/rejected": -4136.6689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0870273113250732, "rewards/margins": 37.54057693481445, "rewards/rejected": -38.62760543823242, "step": 9790 }, { "epoch": 42.79475982532751, "grad_norm": 2.6858114214686438e-05, "learning_rate": 3.53317750518544e-06, "logits/chosen": -1.4139697551727295, "logits/rejected": -1.7556474208831787, "logps/chosen": -381.36480712890625, "logps/rejected": -4974.4169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.14626145362854, "rewards/margins": 45.3977165222168, "rewards/rejected": -46.543975830078125, "step": 9800 }, { "epoch": 42.838427947598255, "grad_norm": 8.302202190438729e-07, "learning_rate": 3.5297061950480894e-06, "logits/chosen": -1.3429772853851318, "logits/rejected": -1.6831077337265015, "logps/chosen": -420.67974853515625, "logps/rejected": -4222.52490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1440439224243164, "rewards/margins": 38.29603576660156, "rewards/rejected": -39.44007873535156, "step": 9810 }, { "epoch": 42.88209606986899, "grad_norm": 1.5369833379127793e-05, "learning_rate": 3.5262324923828817e-06, "logits/chosen": -1.34055757522583, "logits/rejected": -1.6671956777572632, "logps/chosen": -417.053955078125, "logps/rejected": -4268.79931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1098707914352417, "rewards/margins": 38.76665115356445, "rewards/rejected": -39.87651824951172, "step": 9820 }, { "epoch": 42.92576419213974, "grad_norm": 6.760043586404803e-07, "learning_rate": 3.522756405260982e-06, "logits/chosen": -1.3364053964614868, "logits/rejected": -1.687121033668518, "logps/chosen": -387.3602600097656, "logps/rejected": -4151.9541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0655826330184937, "rewards/margins": 37.762813568115234, "rewards/rejected": -38.82839584350586, "step": 9830 }, { "epoch": 42.96943231441048, "grad_norm": 5.15471665919242e-07, "learning_rate": 3.5192779417590985e-06, "logits/chosen": -1.288830041885376, "logits/rejected": -1.612707495689392, "logps/chosen": -405.6219482421875, "logps/rejected": -3875.890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.068671703338623, "rewards/margins": 35.15284729003906, "rewards/rejected": -36.221519470214844, "step": 9840 }, { "epoch": 43.01310043668122, "grad_norm": 9.686348409350374e-07, "learning_rate": 3.515797109959458e-06, "logits/chosen": -1.3536776304244995, "logits/rejected": -1.7191011905670166, "logps/chosen": -377.4076232910156, "logps/rejected": -4569.4453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0548603534698486, "rewards/margins": 41.9017448425293, "rewards/rejected": -42.95660400390625, "step": 9850 }, { "epoch": 43.056768558951966, "grad_norm": 1.6734791691199222e-06, "learning_rate": 3.512313917949794e-06, "logits/chosen": -1.3230475187301636, "logits/rejected": -1.6758642196655273, "logps/chosen": -406.12518310546875, "logps/rejected": -4069.673095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.151188850402832, "rewards/margins": 36.89063262939453, "rewards/rejected": -38.04182052612305, "step": 9860 }, { "epoch": 43.10043668122271, "grad_norm": 2.659980955693576e-05, "learning_rate": 3.5088283738233203e-06, "logits/chosen": -1.4128930568695068, "logits/rejected": -1.7546329498291016, "logps/chosen": -362.01458740234375, "logps/rejected": -4649.29296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.027788758277893, "rewards/margins": 42.50461196899414, "rewards/rejected": -43.53240203857422, "step": 9870 }, { "epoch": 43.14410480349345, "grad_norm": 9.685758882261596e-07, "learning_rate": 3.5053404856787173e-06, "logits/chosen": -1.3537724018096924, "logits/rejected": -1.6933492422103882, "logps/chosen": -382.95806884765625, "logps/rejected": -4239.00439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2560462951660156, "rewards/margins": 38.38239288330078, "rewards/rejected": -39.6384391784668, "step": 9880 }, { "epoch": 43.18777292576419, "grad_norm": 8.428535192122119e-05, "learning_rate": 3.5018502616201126e-06, "logits/chosen": -1.343145728111267, "logits/rejected": -1.691876769065857, "logps/chosen": -393.8322448730469, "logps/rejected": -4583.52294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0739840269088745, "rewards/margins": 41.828182220458984, "rewards/rejected": -42.902164459228516, "step": 9890 }, { "epoch": 43.23144104803494, "grad_norm": 2.0053113539373488e-06, "learning_rate": 3.49835770975706e-06, "logits/chosen": -1.3461253643035889, "logits/rejected": -1.7063783407211304, "logps/chosen": -407.04345703125, "logps/rejected": -4163.43115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2161248922348022, "rewards/margins": 37.79737091064453, "rewards/rejected": -39.01349639892578, "step": 9900 }, { "epoch": 43.275109170305676, "grad_norm": 7.622613412495772e-07, "learning_rate": 3.4948628382045223e-06, "logits/chosen": -1.3326677083969116, "logits/rejected": -1.6896264553070068, "logps/chosen": -417.3880920410156, "logps/rejected": -4176.4833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.105574131011963, "rewards/margins": 38.00785827636719, "rewards/rejected": -39.113433837890625, "step": 9910 }, { "epoch": 43.31877729257642, "grad_norm": 6.399014341275019e-07, "learning_rate": 3.4913656550828536e-06, "logits/chosen": -1.3193622827529907, "logits/rejected": -1.6390635967254639, "logps/chosen": -431.0810546875, "logps/rejected": -3883.212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.171741247177124, "rewards/margins": 35.11399459838867, "rewards/rejected": -36.285736083984375, "step": 9920 }, { "epoch": 43.36244541484716, "grad_norm": 6.036133509005336e-07, "learning_rate": 3.4878661685177758e-06, "logits/chosen": -1.354440450668335, "logits/rejected": -1.7160295248031616, "logps/chosen": -386.71099853515625, "logps/rejected": -4294.80859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1392443180084229, "rewards/margins": 39.065826416015625, "rewards/rejected": -40.205074310302734, "step": 9930 }, { "epoch": 43.4061135371179, "grad_norm": 3.658932993824413e-05, "learning_rate": 3.484364386640365e-06, "logits/chosen": -1.3927581310272217, "logits/rejected": -1.72794508934021, "logps/chosen": -405.55340576171875, "logps/rejected": -4356.72314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.166751742362976, "rewards/margins": 39.513336181640625, "rewards/rejected": -40.68008804321289, "step": 9940 }, { "epoch": 43.44978165938865, "grad_norm": 1.1688839470503074e-05, "learning_rate": 3.480860317587031e-06, "logits/chosen": -1.3918007612228394, "logits/rejected": -1.7656338214874268, "logps/chosen": -431.0546875, "logps/rejected": -4345.57373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.065185546875, "rewards/margins": 39.6394157409668, "rewards/rejected": -40.7046012878418, "step": 9950 }, { "epoch": 43.493449781659386, "grad_norm": 1.0480136319259071e-06, "learning_rate": 3.4773539694994967e-06, "logits/chosen": -1.389481544494629, "logits/rejected": -1.7611109018325806, "logps/chosen": -411.77923583984375, "logps/rejected": -4382.0595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1962316036224365, "rewards/margins": 39.83917236328125, "rewards/rejected": -41.03540802001953, "step": 9960 }, { "epoch": 43.53711790393013, "grad_norm": 1.5714509875554994e-05, "learning_rate": 3.4738453505247798e-06, "logits/chosen": -1.3482438325881958, "logits/rejected": -1.693687081336975, "logps/chosen": -419.69952392578125, "logps/rejected": -4343.8125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1498534679412842, "rewards/margins": 39.4481315612793, "rewards/rejected": -40.597984313964844, "step": 9970 }, { "epoch": 43.580786026200876, "grad_norm": 1.033889925195335e-06, "learning_rate": 3.4703344688151774e-06, "logits/chosen": -1.340855360031128, "logits/rejected": -1.7502307891845703, "logps/chosen": -420.59588623046875, "logps/rejected": -4299.8544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1930384635925293, "rewards/margins": 39.074859619140625, "rewards/rejected": -40.26789474487305, "step": 9980 }, { "epoch": 43.624454148471614, "grad_norm": 7.62552224412631e-07, "learning_rate": 3.466821332528239e-06, "logits/chosen": -1.3510949611663818, "logits/rejected": -1.683932900428772, "logps/chosen": -409.2018127441406, "logps/rejected": -4131.03076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0480659008026123, "rewards/margins": 37.536964416503906, "rewards/rejected": -38.58502960205078, "step": 9990 }, { "epoch": 43.66812227074236, "grad_norm": 6.377308323521132e-07, "learning_rate": 3.4633059498267578e-06, "logits/chosen": -1.3731515407562256, "logits/rejected": -1.740764856338501, "logps/chosen": -394.413330078125, "logps/rejected": -4584.37060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2510764598846436, "rewards/margins": 41.65357208251953, "rewards/rejected": -42.9046516418457, "step": 10000 }, { "epoch": 43.7117903930131, "grad_norm": 8.489424500612415e-07, "learning_rate": 3.459788328878745e-06, "logits/chosen": -1.35395085811615, "logits/rejected": -1.7164628505706787, "logps/chosen": -408.74517822265625, "logps/rejected": -4247.50244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.154498815536499, "rewards/margins": 38.52543258666992, "rewards/rejected": -39.679931640625, "step": 10010 }, { "epoch": 43.75545851528384, "grad_norm": 4.167921764153041e-06, "learning_rate": 3.4562684778574084e-06, "logits/chosen": -1.3131883144378662, "logits/rejected": -1.7074098587036133, "logps/chosen": -412.73040771484375, "logps/rejected": -4018.10791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1578483581542969, "rewards/margins": 36.521202087402344, "rewards/rejected": -37.67905044555664, "step": 10020 }, { "epoch": 43.799126637554586, "grad_norm": 1.3338784183750697e-06, "learning_rate": 3.452746404941143e-06, "logits/chosen": -1.3259856700897217, "logits/rejected": -1.7117664813995361, "logps/chosen": -372.3418273925781, "logps/rejected": -4388.97265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1277135610580444, "rewards/margins": 39.96883010864258, "rewards/rejected": -41.09654235839844, "step": 10030 }, { "epoch": 43.842794759825324, "grad_norm": 9.064581966512643e-07, "learning_rate": 3.4492221183135046e-06, "logits/chosen": -1.3576804399490356, "logits/rejected": -1.789417028427124, "logps/chosen": -401.9754943847656, "logps/rejected": -4723.1982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1766053438186646, "rewards/margins": 43.17119598388672, "rewards/rejected": -44.347801208496094, "step": 10040 }, { "epoch": 43.88646288209607, "grad_norm": 5.846170743975175e-05, "learning_rate": 3.4456956261631905e-06, "logits/chosen": -1.4087094068527222, "logits/rejected": -1.8174680471420288, "logps/chosen": -373.9812316894531, "logps/rejected": -4578.9248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0842944383621216, "rewards/margins": 41.72551345825195, "rewards/rejected": -42.80980682373047, "step": 10050 }, { "epoch": 43.930131004366814, "grad_norm": 8.845670670830723e-07, "learning_rate": 3.442166936684025e-06, "logits/chosen": -1.3852112293243408, "logits/rejected": -1.7716983556747437, "logps/chosen": -389.8218688964844, "logps/rejected": -4682.640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1639251708984375, "rewards/margins": 42.7164421081543, "rewards/rejected": -43.880367279052734, "step": 10060 }, { "epoch": 43.97379912663755, "grad_norm": 0.00023874511137359696, "learning_rate": 3.438636058074937e-06, "logits/chosen": -1.3813189268112183, "logits/rejected": -1.7580143213272095, "logps/chosen": -403.8226318359375, "logps/rejected": -4291.29150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2328920364379883, "rewards/margins": 38.92688751220703, "rewards/rejected": -40.15978240966797, "step": 10070 }, { "epoch": 44.0174672489083, "grad_norm": 4.028547613960159e-06, "learning_rate": 3.435102998539942e-06, "logits/chosen": -1.3671469688415527, "logits/rejected": -1.7491929531097412, "logps/chosen": -401.95416259765625, "logps/rejected": -4386.04052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2222349643707275, "rewards/margins": 39.782135009765625, "rewards/rejected": -41.004371643066406, "step": 10080 }, { "epoch": 44.06113537117904, "grad_norm": 2.9260616263210027e-07, "learning_rate": 3.431567766288121e-06, "logits/chosen": -1.4285004138946533, "logits/rejected": -1.7389991283416748, "logps/chosen": -381.2070617675781, "logps/rejected": -4371.68408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1176648139953613, "rewards/margins": 39.71975326538086, "rewards/rejected": -40.837425231933594, "step": 10090 }, { "epoch": 44.10480349344978, "grad_norm": 1.6536639089250109e-06, "learning_rate": 3.4280303695336075e-06, "logits/chosen": -1.370891809463501, "logits/rejected": -1.7495081424713135, "logps/chosen": -421.15496826171875, "logps/rejected": -4531.6474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.244384527206421, "rewards/margins": 41.22222900390625, "rewards/rejected": -42.46661376953125, "step": 10100 }, { "epoch": 44.148471615720524, "grad_norm": 1.1403402477559736e-06, "learning_rate": 3.424490816495561e-06, "logits/chosen": -1.3528438806533813, "logits/rejected": -1.7395824193954468, "logps/chosen": -402.16497802734375, "logps/rejected": -4206.625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1188350915908813, "rewards/margins": 38.3264274597168, "rewards/rejected": -39.44526672363281, "step": 10110 }, { "epoch": 44.19213973799127, "grad_norm": 6.140333865372878e-05, "learning_rate": 3.420949115398151e-06, "logits/chosen": -1.3121669292449951, "logits/rejected": -1.7043691873550415, "logps/chosen": -421.0325622558594, "logps/rejected": -4179.5419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1930298805236816, "rewards/margins": 37.98099136352539, "rewards/rejected": -39.17401885986328, "step": 10120 }, { "epoch": 44.23580786026201, "grad_norm": 2.7888378431061684e-06, "learning_rate": 3.4174052744705403e-06, "logits/chosen": -1.3599159717559814, "logits/rejected": -1.7418292760849, "logps/chosen": -411.99310302734375, "logps/rejected": -4014.5, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1587941646575928, "rewards/margins": 36.458953857421875, "rewards/rejected": -37.61774826049805, "step": 10130 }, { "epoch": 44.27947598253275, "grad_norm": 6.778148190132217e-07, "learning_rate": 3.4138593019468614e-06, "logits/chosen": -1.331404685974121, "logits/rejected": -1.6814260482788086, "logps/chosen": -411.3203125, "logps/rejected": -4345.07275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.176973819732666, "rewards/margins": 39.40439987182617, "rewards/rejected": -40.58137512207031, "step": 10140 }, { "epoch": 44.3231441048035, "grad_norm": 1.6788764252811686e-06, "learning_rate": 3.4103112060662007e-06, "logits/chosen": -1.3514149188995361, "logits/rejected": -1.7453300952911377, "logps/chosen": -390.8112487792969, "logps/rejected": -4506.3115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1501107215881348, "rewards/margins": 41.108829498291016, "rewards/rejected": -42.25893783569336, "step": 10150 }, { "epoch": 44.366812227074234, "grad_norm": 7.893590744093491e-07, "learning_rate": 3.4067609950725782e-06, "logits/chosen": -1.375327229499817, "logits/rejected": -1.7379487752914429, "logps/chosen": -384.24462890625, "logps/rejected": -4226.33203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1467337608337402, "rewards/margins": 38.39972686767578, "rewards/rejected": -39.54645919799805, "step": 10160 }, { "epoch": 44.41048034934498, "grad_norm": 5.020019049670884e-07, "learning_rate": 3.4032086772149277e-06, "logits/chosen": -1.3207664489746094, "logits/rejected": -1.7077081203460693, "logps/chosen": -420.2806701660156, "logps/rejected": -4597.068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1284292936325073, "rewards/margins": 41.96434020996094, "rewards/rejected": -43.09276580810547, "step": 10170 }, { "epoch": 44.45414847161572, "grad_norm": 5.763030445097194e-07, "learning_rate": 3.3996542607470783e-06, "logits/chosen": -1.394010305404663, "logits/rejected": -1.811926245689392, "logps/chosen": -421.37060546875, "logps/rejected": -4663.32470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2397516965866089, "rewards/margins": 42.383399963378906, "rewards/rejected": -43.623146057128906, "step": 10180 }, { "epoch": 44.49781659388646, "grad_norm": 1.0455047058950397e-05, "learning_rate": 3.396097753927737e-06, "logits/chosen": -1.3419760465621948, "logits/rejected": -1.7270441055297852, "logps/chosen": -383.8836364746094, "logps/rejected": -4272.65087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1206862926483154, "rewards/margins": 38.92300796508789, "rewards/rejected": -40.04369354248047, "step": 10190 }, { "epoch": 44.54148471615721, "grad_norm": 2.4369677106974107e-05, "learning_rate": 3.392539165020467e-06, "logits/chosen": -1.4073222875595093, "logits/rejected": -1.7826799154281616, "logps/chosen": -411.7093200683594, "logps/rejected": -4512.2626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2483583688735962, "rewards/margins": 41.0316047668457, "rewards/rejected": -42.279964447021484, "step": 10200 }, { "epoch": 44.585152838427945, "grad_norm": 5.5475967505827e-07, "learning_rate": 3.388978502293666e-06, "logits/chosen": -1.3286583423614502, "logits/rejected": -1.7183862924575806, "logps/chosen": -411.6001892089844, "logps/rejected": -4466.162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3602625131607056, "rewards/margins": 40.47590637207031, "rewards/rejected": -41.83616638183594, "step": 10210 }, { "epoch": 44.62882096069869, "grad_norm": 3.5314092449990647e-07, "learning_rate": 3.385415774020555e-06, "logits/chosen": -1.4229495525360107, "logits/rejected": -1.7866308689117432, "logps/chosen": -358.42706298828125, "logps/rejected": -4811.869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1229143142700195, "rewards/margins": 43.95830535888672, "rewards/rejected": -45.08121871948242, "step": 10220 }, { "epoch": 44.672489082969435, "grad_norm": 9.405922916338096e-07, "learning_rate": 3.3818509884791516e-06, "logits/chosen": -1.390855073928833, "logits/rejected": -1.7348378896713257, "logps/chosen": -386.336669921875, "logps/rejected": -4677.09423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1942698955535889, "rewards/margins": 42.627349853515625, "rewards/rejected": -43.82161331176758, "step": 10230 }, { "epoch": 44.71615720524017, "grad_norm": 2.617281293919681e-07, "learning_rate": 3.3782841539522543e-06, "logits/chosen": -1.3607529401779175, "logits/rejected": -1.7218878269195557, "logps/chosen": -403.84600830078125, "logps/rejected": -4067.33349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2684179544448853, "rewards/margins": 36.78803634643555, "rewards/rejected": -38.056453704833984, "step": 10240 }, { "epoch": 44.75982532751092, "grad_norm": 3.717260118635141e-05, "learning_rate": 3.374715278727422e-06, "logits/chosen": -1.3596925735473633, "logits/rejected": -1.7105789184570312, "logps/chosen": -425.7826232910156, "logps/rejected": -3763.48486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0989494323730469, "rewards/margins": 34.09178161621094, "rewards/rejected": -35.19072723388672, "step": 10250 }, { "epoch": 44.80349344978166, "grad_norm": 5.400647791369984e-07, "learning_rate": 3.3711443710969555e-06, "logits/chosen": -1.3859121799468994, "logits/rejected": -1.7672879695892334, "logps/chosen": -405.68878173828125, "logps/rejected": -4602.482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.245996356010437, "rewards/margins": 41.78668975830078, "rewards/rejected": -43.032684326171875, "step": 10260 }, { "epoch": 44.8471615720524, "grad_norm": 3.5869344206258915e-07, "learning_rate": 3.3675714393578774e-06, "logits/chosen": -1.398563265800476, "logits/rejected": -1.734908103942871, "logps/chosen": -369.6419982910156, "logps/rejected": -4312.19775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.115853190422058, "rewards/margins": 39.2096061706543, "rewards/rejected": -40.32545852661133, "step": 10270 }, { "epoch": 44.890829694323145, "grad_norm": 1.2841212272271205e-05, "learning_rate": 3.363996491811914e-06, "logits/chosen": -1.374906301498413, "logits/rejected": -1.7830969095230103, "logps/chosen": -384.4949645996094, "logps/rejected": -4568.5791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1517751216888428, "rewards/margins": 41.7067985534668, "rewards/rejected": -42.85858154296875, "step": 10280 }, { "epoch": 44.93449781659389, "grad_norm": 1.182156963319789e-06, "learning_rate": 3.360419536765476e-06, "logits/chosen": -1.3722296953201294, "logits/rejected": -1.7343568801879883, "logps/chosen": -376.9317626953125, "logps/rejected": -4607.6123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.117580771446228, "rewards/margins": 42.06940460205078, "rewards/rejected": -43.186988830566406, "step": 10290 }, { "epoch": 44.97816593886463, "grad_norm": 1.4697628356156867e-06, "learning_rate": 3.3568405825296355e-06, "logits/chosen": -1.37960684299469, "logits/rejected": -1.7585010528564453, "logps/chosen": -399.1564636230469, "logps/rejected": -4611.67333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1395695209503174, "rewards/margins": 42.09226608276367, "rewards/rejected": -43.23183059692383, "step": 10300 }, { "epoch": 45.02183406113537, "grad_norm": 1.2114629624914258e-05, "learning_rate": 3.353259637420114e-06, "logits/chosen": -1.4126414060592651, "logits/rejected": -1.8024911880493164, "logps/chosen": -371.880859375, "logps/rejected": -4880.7255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1427991390228271, "rewards/margins": 44.54473876953125, "rewards/rejected": -45.687538146972656, "step": 10310 }, { "epoch": 45.06550218340611, "grad_norm": 8.886871736028585e-06, "learning_rate": 3.349676709757256e-06, "logits/chosen": -1.4335720539093018, "logits/rejected": -1.7800161838531494, "logps/chosen": -394.34222412109375, "logps/rejected": -4749.19140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2390401363372803, "rewards/margins": 43.259056091308594, "rewards/rejected": -44.49809265136719, "step": 10320 }, { "epoch": 45.109170305676855, "grad_norm": 3.0630702479903614e-07, "learning_rate": 3.3460918078660125e-06, "logits/chosen": -1.3800654411315918, "logits/rejected": -1.7831666469573975, "logps/chosen": -407.2474365234375, "logps/rejected": -4555.2109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2593872547149658, "rewards/margins": 41.522605895996094, "rewards/rejected": -42.78199005126953, "step": 10330 }, { "epoch": 45.1528384279476, "grad_norm": 6.670134104676362e-07, "learning_rate": 3.342504940075923e-06, "logits/chosen": -1.3499623537063599, "logits/rejected": -1.7187232971191406, "logps/chosen": -417.66119384765625, "logps/rejected": -4202.669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1346014738082886, "rewards/margins": 38.336585998535156, "rewards/rejected": -39.471187591552734, "step": 10340 }, { "epoch": 45.19650655021834, "grad_norm": 8.077686140973376e-06, "learning_rate": 3.338916114721093e-06, "logits/chosen": -1.4057743549346924, "logits/rejected": -1.8121535778045654, "logps/chosen": -382.49627685546875, "logps/rejected": -4885.45458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1620566844940186, "rewards/margins": 44.694557189941406, "rewards/rejected": -45.856605529785156, "step": 10350 }, { "epoch": 45.24017467248908, "grad_norm": 9.276669886028359e-07, "learning_rate": 3.3353253401401785e-06, "logits/chosen": -1.3884166479110718, "logits/rejected": -1.7994188070297241, "logps/chosen": -410.3199157714844, "logps/rejected": -4697.4091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.238932490348816, "rewards/margins": 42.816078186035156, "rewards/rejected": -44.05501174926758, "step": 10360 }, { "epoch": 45.28384279475983, "grad_norm": 8.133698640189356e-07, "learning_rate": 3.331732624676362e-06, "logits/chosen": -1.4448190927505493, "logits/rejected": -1.8772016763687134, "logps/chosen": -397.41180419921875, "logps/rejected": -4996.38330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2196567058563232, "rewards/margins": 45.658573150634766, "rewards/rejected": -46.878231048583984, "step": 10370 }, { "epoch": 45.327510917030565, "grad_norm": 5.3317798560936405e-06, "learning_rate": 3.3281379766773393e-06, "logits/chosen": -1.4233481884002686, "logits/rejected": -1.8470436334609985, "logps/chosen": -395.67340087890625, "logps/rejected": -4328.98828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1442735195159912, "rewards/margins": 39.30529022216797, "rewards/rejected": -40.449562072753906, "step": 10380 }, { "epoch": 45.37117903930131, "grad_norm": 6.284040906887842e-07, "learning_rate": 3.3245414044952927e-06, "logits/chosen": -1.3830525875091553, "logits/rejected": -1.7943652868270874, "logps/chosen": -380.0361328125, "logps/rejected": -4663.4228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1573587656021118, "rewards/margins": 42.552635192871094, "rewards/rejected": -43.70999526977539, "step": 10390 }, { "epoch": 45.414847161572055, "grad_norm": 3.322333798483657e-05, "learning_rate": 3.3209429164868767e-06, "logits/chosen": -1.3460676670074463, "logits/rejected": -1.7364248037338257, "logps/chosen": -409.4378662109375, "logps/rejected": -4065.288330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2479137182235718, "rewards/margins": 36.835166931152344, "rewards/rejected": -38.08308792114258, "step": 10400 }, { "epoch": 45.45851528384279, "grad_norm": 4.4078393204455544e-07, "learning_rate": 3.3173425210131993e-06, "logits/chosen": -1.3443505764007568, "logits/rejected": -1.7015998363494873, "logps/chosen": -402.3202819824219, "logps/rejected": -3996.460205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2032039165496826, "rewards/margins": 36.08274841308594, "rewards/rejected": -37.28594970703125, "step": 10410 }, { "epoch": 45.50218340611354, "grad_norm": 1.1220755915278983e-06, "learning_rate": 3.3137402264397966e-06, "logits/chosen": -1.3806217908859253, "logits/rejected": -1.7652909755706787, "logps/chosen": -405.7267150878906, "logps/rejected": -4455.7666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2579119205474854, "rewards/margins": 40.4453239440918, "rewards/rejected": -41.7032356262207, "step": 10420 }, { "epoch": 45.54585152838428, "grad_norm": 3.875678394743264e-07, "learning_rate": 3.3101360411366205e-06, "logits/chosen": -1.3943403959274292, "logits/rejected": -1.7433305978775024, "logps/chosen": -408.0453186035156, "logps/rejected": -4122.35546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1458485126495361, "rewards/margins": 37.5346565246582, "rewards/rejected": -38.680503845214844, "step": 10430 }, { "epoch": 45.58951965065502, "grad_norm": 5.609458568266076e-07, "learning_rate": 3.3065299734780144e-06, "logits/chosen": -1.3998591899871826, "logits/rejected": -1.8221791982650757, "logps/chosen": -383.2408752441406, "logps/rejected": -4755.77001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2490184307098389, "rewards/margins": 43.36774444580078, "rewards/rejected": -44.61676025390625, "step": 10440 }, { "epoch": 45.633187772925766, "grad_norm": 1.1751409047845354e-06, "learning_rate": 3.3029220318426963e-06, "logits/chosen": -1.3772990703582764, "logits/rejected": -1.767221450805664, "logps/chosen": -389.1448059082031, "logps/rejected": -4617.28271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2342216968536377, "rewards/margins": 42.045387268066406, "rewards/rejected": -43.27960968017578, "step": 10450 }, { "epoch": 45.6768558951965, "grad_norm": 2.0364976489419933e-05, "learning_rate": 3.299312224613737e-06, "logits/chosen": -1.3806982040405273, "logits/rejected": -1.795692801475525, "logps/chosen": -419.7334899902344, "logps/rejected": -4612.93017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2780383825302124, "rewards/margins": 41.952579498291016, "rewards/rejected": -43.230621337890625, "step": 10460 }, { "epoch": 45.72052401746725, "grad_norm": 4.879308441041861e-07, "learning_rate": 3.295700560178544e-06, "logits/chosen": -1.351818323135376, "logits/rejected": -1.7922741174697876, "logps/chosen": -438.92364501953125, "logps/rejected": -4083.72021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2414909601211548, "rewards/margins": 37.0820426940918, "rewards/rejected": -38.32353591918945, "step": 10470 }, { "epoch": 45.76419213973799, "grad_norm": 0.0001974725278748776, "learning_rate": 3.292087046928838e-06, "logits/chosen": -1.3788955211639404, "logits/rejected": -1.7980461120605469, "logps/chosen": -400.58380126953125, "logps/rejected": -4448.44384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2288258075714111, "rewards/margins": 40.45349884033203, "rewards/rejected": -41.68232345581055, "step": 10480 }, { "epoch": 45.80786026200873, "grad_norm": 9.87620389999291e-07, "learning_rate": 3.288471693260637e-06, "logits/chosen": -1.4244482517242432, "logits/rejected": -1.8892666101455688, "logps/chosen": -410.2581481933594, "logps/rejected": -4850.3037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3503166437149048, "rewards/margins": 44.14502716064453, "rewards/rejected": -45.4953498840332, "step": 10490 }, { "epoch": 45.851528384279476, "grad_norm": 6.937327120334261e-06, "learning_rate": 3.2848545075742345e-06, "logits/chosen": -1.363517165184021, "logits/rejected": -1.6945298910140991, "logps/chosen": -384.7757873535156, "logps/rejected": -4269.34619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2428109645843506, "rewards/margins": 38.79538345336914, "rewards/rejected": -40.03819274902344, "step": 10500 }, { "epoch": 45.89519650655022, "grad_norm": 4.420682058694499e-05, "learning_rate": 3.2812354982741806e-06, "logits/chosen": -1.3630712032318115, "logits/rejected": -1.6892133951187134, "logps/chosen": -402.3846130371094, "logps/rejected": -3879.63330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.182901382446289, "rewards/margins": 35.129276275634766, "rewards/rejected": -36.31217575073242, "step": 10510 }, { "epoch": 45.93886462882096, "grad_norm": 4.048278594902854e-06, "learning_rate": 3.2776146737692634e-06, "logits/chosen": -1.3364295959472656, "logits/rejected": -1.7105249166488647, "logps/chosen": -442.2484436035156, "logps/rejected": -4137.375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.329344630241394, "rewards/margins": 37.43069839477539, "rewards/rejected": -38.760047912597656, "step": 10520 }, { "epoch": 45.9825327510917, "grad_norm": 1.999569624261799e-05, "learning_rate": 3.273992042472487e-06, "logits/chosen": -1.3931812047958374, "logits/rejected": -1.7952091693878174, "logps/chosen": -403.8631286621094, "logps/rejected": -4621.529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2187483310699463, "rewards/margins": 42.118385314941406, "rewards/rejected": -43.33713150024414, "step": 10530 }, { "epoch": 46.02620087336245, "grad_norm": 3.94854859338777e-06, "learning_rate": 3.2703676128010555e-06, "logits/chosen": -1.4237862825393677, "logits/rejected": -1.8443622589111328, "logps/chosen": -425.3539123535156, "logps/rejected": -4697.72900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2794783115386963, "rewards/margins": 42.721466064453125, "rewards/rejected": -44.00094223022461, "step": 10540 }, { "epoch": 46.069868995633186, "grad_norm": 9.357661761451613e-07, "learning_rate": 3.2667413931763503e-06, "logits/chosen": -1.3659241199493408, "logits/rejected": -1.7774235010147095, "logps/chosen": -386.91436767578125, "logps/rejected": -4709.5146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.159396767616272, "rewards/margins": 42.959381103515625, "rewards/rejected": -44.1187744140625, "step": 10550 }, { "epoch": 46.11353711790393, "grad_norm": 6.858830465209747e-07, "learning_rate": 3.263113392023912e-06, "logits/chosen": -1.3463222980499268, "logits/rejected": -1.7132351398468018, "logps/chosen": -423.0672912597656, "logps/rejected": -4011.83154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2003682851791382, "rewards/margins": 36.25753402709961, "rewards/rejected": -37.4578971862793, "step": 10560 }, { "epoch": 46.157205240174676, "grad_norm": 1.0377726707716553e-06, "learning_rate": 3.2594836177734208e-06, "logits/chosen": -1.3867466449737549, "logits/rejected": -1.852745771408081, "logps/chosen": -405.38616943359375, "logps/rejected": -4730.71044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2114057540893555, "rewards/margins": 43.200618743896484, "rewards/rejected": -44.412025451660156, "step": 10570 }, { "epoch": 46.200873362445414, "grad_norm": 7.877383509334556e-07, "learning_rate": 3.2558520788586767e-06, "logits/chosen": -1.396272897720337, "logits/rejected": -1.7966006994247437, "logps/chosen": -407.67633056640625, "logps/rejected": -4276.31298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2040294408798218, "rewards/margins": 38.8544921875, "rewards/rejected": -40.05852127075195, "step": 10580 }, { "epoch": 46.24454148471616, "grad_norm": 8.675207189818416e-05, "learning_rate": 3.2522187837175797e-06, "logits/chosen": -1.365479826927185, "logits/rejected": -1.799335241317749, "logps/chosen": -379.4496154785156, "logps/rejected": -4552.9580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1237720251083374, "rewards/margins": 41.51537322998047, "rewards/rejected": -42.6391487121582, "step": 10590 }, { "epoch": 46.2882096069869, "grad_norm": 1.3055428361966273e-05, "learning_rate": 3.2485837407921113e-06, "logits/chosen": -1.3797428607940674, "logits/rejected": -1.8190038204193115, "logps/chosen": -430.3252868652344, "logps/rejected": -4754.0419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4866551160812378, "rewards/margins": 43.24980163574219, "rewards/rejected": -44.73645782470703, "step": 10600 }, { "epoch": 46.33187772925764, "grad_norm": 7.692305106127273e-06, "learning_rate": 3.244946958528311e-06, "logits/chosen": -1.4005193710327148, "logits/rejected": -1.7856683731079102, "logps/chosen": -405.75823974609375, "logps/rejected": -4379.5439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3076971769332886, "rewards/margins": 39.62186050415039, "rewards/rejected": -40.92955017089844, "step": 10610 }, { "epoch": 46.375545851528386, "grad_norm": 3.4590652723021567e-07, "learning_rate": 3.2413084453762646e-06, "logits/chosen": -1.3963826894760132, "logits/rejected": -1.7774333953857422, "logps/chosen": -372.45416259765625, "logps/rejected": -4794.8173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.173656702041626, "rewards/margins": 43.781551361083984, "rewards/rejected": -44.95520782470703, "step": 10620 }, { "epoch": 46.419213973799124, "grad_norm": 4.766029788891117e-05, "learning_rate": 3.2376682097900756e-06, "logits/chosen": -1.4343469142913818, "logits/rejected": -1.8373019695281982, "logps/chosen": -380.4017639160156, "logps/rejected": -4609.95556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0967497825622559, "rewards/margins": 42.1701774597168, "rewards/rejected": -43.26692581176758, "step": 10630 }, { "epoch": 46.46288209606987, "grad_norm": 1.3197853952522655e-05, "learning_rate": 3.2340262602278504e-06, "logits/chosen": -1.3419541120529175, "logits/rejected": -1.7301502227783203, "logps/chosen": -415.35296630859375, "logps/rejected": -3890.49951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1761597394943237, "rewards/margins": 35.25223922729492, "rewards/rejected": -36.42839813232422, "step": 10640 }, { "epoch": 46.506550218340614, "grad_norm": 1.3898058668710786e-06, "learning_rate": 3.230382605151679e-06, "logits/chosen": -1.3752063512802124, "logits/rejected": -1.7662899494171143, "logps/chosen": -403.3805847167969, "logps/rejected": -4491.5087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1535508632659912, "rewards/margins": 40.91805648803711, "rewards/rejected": -42.07160186767578, "step": 10650 }, { "epoch": 46.55021834061135, "grad_norm": 8.289906619232985e-06, "learning_rate": 3.226737253027614e-06, "logits/chosen": -1.3890917301177979, "logits/rejected": -1.8021529912948608, "logps/chosen": -410.1346740722656, "logps/rejected": -4402.5166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2489585876464844, "rewards/margins": 39.97555160522461, "rewards/rejected": -41.224510192871094, "step": 10660 }, { "epoch": 46.5938864628821, "grad_norm": 7.165291783316483e-06, "learning_rate": 3.2230902123256496e-06, "logits/chosen": -1.370481252670288, "logits/rejected": -1.7661945819854736, "logps/chosen": -375.8973693847656, "logps/rejected": -4625.453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1563092470169067, "rewards/margins": 42.06373977661133, "rewards/rejected": -43.22004699707031, "step": 10670 }, { "epoch": 46.63755458515284, "grad_norm": 1.6699206130164214e-07, "learning_rate": 3.2194414915197037e-06, "logits/chosen": -1.3899073600769043, "logits/rejected": -1.8100261688232422, "logps/chosen": -412.5758361816406, "logps/rejected": -4461.32177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.302864670753479, "rewards/margins": 40.63837814331055, "rewards/rejected": -41.941246032714844, "step": 10680 }, { "epoch": 46.68122270742358, "grad_norm": 2.297367938355847e-07, "learning_rate": 3.215791099087601e-06, "logits/chosen": -1.3992958068847656, "logits/rejected": -1.8064950704574585, "logps/chosen": -401.0730895996094, "logps/rejected": -4816.40380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3175275325775146, "rewards/margins": 43.877479553222656, "rewards/rejected": -45.19499969482422, "step": 10690 }, { "epoch": 46.724890829694324, "grad_norm": 5.281527241980945e-06, "learning_rate": 3.2121390435110455e-06, "logits/chosen": -1.3493802547454834, "logits/rejected": -1.7354366779327393, "logps/chosen": -384.78936767578125, "logps/rejected": -3978.9453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2070457935333252, "rewards/margins": 36.091087341308594, "rewards/rejected": -37.298133850097656, "step": 10700 }, { "epoch": 46.76855895196506, "grad_norm": 3.0652693152917344e-05, "learning_rate": 3.2084853332756096e-06, "logits/chosen": -1.3139514923095703, "logits/rejected": -1.7270982265472412, "logps/chosen": -407.09417724609375, "logps/rejected": -4026.63134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2832118272781372, "rewards/margins": 36.52423095703125, "rewards/rejected": -37.80744171142578, "step": 10710 }, { "epoch": 46.81222707423581, "grad_norm": 6.244786807900662e-05, "learning_rate": 3.204829976870708e-06, "logits/chosen": -1.3370983600616455, "logits/rejected": -1.7235578298568726, "logps/chosen": -425.46630859375, "logps/rejected": -4137.18505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2813835144042969, "rewards/margins": 37.5063591003418, "rewards/rejected": -38.787742614746094, "step": 10720 }, { "epoch": 46.85589519650655, "grad_norm": 7.226130643849631e-07, "learning_rate": 3.20117298278958e-06, "logits/chosen": -1.3502867221832275, "logits/rejected": -1.749647855758667, "logps/chosen": -404.54791259765625, "logps/rejected": -4511.5673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1329259872436523, "rewards/margins": 41.1273193359375, "rewards/rejected": -42.2602424621582, "step": 10730 }, { "epoch": 46.89956331877729, "grad_norm": 2.839797756921121e-05, "learning_rate": 3.1975143595292723e-06, "logits/chosen": -1.3949912786483765, "logits/rejected": -1.8140928745269775, "logps/chosen": -403.8334655761719, "logps/rejected": -4564.83447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.149863839149475, "rewards/margins": 41.65277862548828, "rewards/rejected": -42.802642822265625, "step": 10740 }, { "epoch": 46.943231441048034, "grad_norm": 0.0005083777195886681, "learning_rate": 3.1938541155906146e-06, "logits/chosen": -1.4352613687515259, "logits/rejected": -1.817281723022461, "logps/chosen": -401.11212158203125, "logps/rejected": -4714.7822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1942691802978516, "rewards/margins": 42.93071746826172, "rewards/rejected": -44.12498474121094, "step": 10750 }, { "epoch": 46.98689956331878, "grad_norm": 5.753532032130776e-05, "learning_rate": 3.1901922594782043e-06, "logits/chosen": -1.3744256496429443, "logits/rejected": -1.7822338342666626, "logps/chosen": -394.8987731933594, "logps/rejected": -4634.62109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1678352355957031, "rewards/margins": 42.28645706176758, "rewards/rejected": -43.45429229736328, "step": 10760 }, { "epoch": 47.03056768558952, "grad_norm": 3.9467005993289586e-07, "learning_rate": 3.1865287997003818e-06, "logits/chosen": -1.393357515335083, "logits/rejected": -1.8570953607559204, "logps/chosen": -370.9501953125, "logps/rejected": -4774.43603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1275359392166138, "rewards/margins": 43.70524978637695, "rewards/rejected": -44.83278274536133, "step": 10770 }, { "epoch": 47.07423580786026, "grad_norm": 1.8633693832018332e-06, "learning_rate": 3.1828637447692184e-06, "logits/chosen": -1.3874621391296387, "logits/rejected": -1.7776412963867188, "logps/chosen": -411.1365661621094, "logps/rejected": -4594.22509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.258317470550537, "rewards/margins": 41.775901794433594, "rewards/rejected": -43.034217834472656, "step": 10780 }, { "epoch": 47.11790393013101, "grad_norm": 5.851359721876494e-07, "learning_rate": 3.179197103200487e-06, "logits/chosen": -1.346521019935608, "logits/rejected": -1.7808557748794556, "logps/chosen": -400.1255798339844, "logps/rejected": -4530.7978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2208030223846436, "rewards/margins": 41.361610412597656, "rewards/rejected": -42.58241653442383, "step": 10790 }, { "epoch": 47.161572052401745, "grad_norm": 3.7164721093569654e-05, "learning_rate": 3.175528883513648e-06, "logits/chosen": -1.4286129474639893, "logits/rejected": -1.8220106363296509, "logps/chosen": -394.84454345703125, "logps/rejected": -4558.3681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2174818515777588, "rewards/margins": 41.51200485229492, "rewards/rejected": -42.72948455810547, "step": 10800 }, { "epoch": 47.20524017467249, "grad_norm": 2.0280646555790867e-07, "learning_rate": 3.1718590942318318e-06, "logits/chosen": -1.3405344486236572, "logits/rejected": -1.7304245233535767, "logps/chosen": -400.4102478027344, "logps/rejected": -4215.6103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.222116231918335, "rewards/margins": 38.33971405029297, "rewards/rejected": -39.56183624267578, "step": 10810 }, { "epoch": 47.248908296943235, "grad_norm": 4.759771643959212e-05, "learning_rate": 3.1681877438818122e-06, "logits/chosen": -1.431320309638977, "logits/rejected": -1.8066316843032837, "logps/chosen": -407.80133056640625, "logps/rejected": -4447.8974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.372309923171997, "rewards/margins": 40.34308624267578, "rewards/rejected": -41.715396881103516, "step": 10820 }, { "epoch": 47.29257641921397, "grad_norm": 1.975529677167965e-05, "learning_rate": 3.164514840993992e-06, "logits/chosen": -1.4002450704574585, "logits/rejected": -1.7882678508758545, "logps/chosen": -410.72247314453125, "logps/rejected": -4319.58544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2559051513671875, "rewards/margins": 39.238807678222656, "rewards/rejected": -40.494712829589844, "step": 10830 }, { "epoch": 47.33624454148472, "grad_norm": 1.7637218101197078e-07, "learning_rate": 3.1608403941023793e-06, "logits/chosen": -1.3926503658294678, "logits/rejected": -1.852829933166504, "logps/chosen": -399.18597412109375, "logps/rejected": -4589.1337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.195719838142395, "rewards/margins": 41.983543395996094, "rewards/rejected": -43.179264068603516, "step": 10840 }, { "epoch": 47.379912663755455, "grad_norm": 3.6605718532486697e-05, "learning_rate": 3.1571644117445727e-06, "logits/chosen": -1.4062283039093018, "logits/rejected": -1.7677205801010132, "logps/chosen": -414.2830505371094, "logps/rejected": -4392.8251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3171026706695557, "rewards/margins": 39.75764083862305, "rewards/rejected": -41.074745178222656, "step": 10850 }, { "epoch": 47.4235807860262, "grad_norm": 1.4843568458025105e-06, "learning_rate": 3.153486902461736e-06, "logits/chosen": -1.3898890018463135, "logits/rejected": -1.78806471824646, "logps/chosen": -453.154296875, "logps/rejected": -4253.7626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.365558385848999, "rewards/margins": 38.464881896972656, "rewards/rejected": -39.830440521240234, "step": 10860 }, { "epoch": 47.467248908296945, "grad_norm": 3.006609795789659e-07, "learning_rate": 3.149807874798582e-06, "logits/chosen": -1.4113681316375732, "logits/rejected": -1.7861335277557373, "logps/chosen": -394.3350524902344, "logps/rejected": -4657.5869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.227648377418518, "rewards/margins": 42.410682678222656, "rewards/rejected": -43.638328552246094, "step": 10870 }, { "epoch": 47.51091703056768, "grad_norm": 3.243915636808925e-07, "learning_rate": 3.146127337303352e-06, "logits/chosen": -1.402493953704834, "logits/rejected": -1.873373031616211, "logps/chosen": -389.9505920410156, "logps/rejected": -5093.0126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1972397565841675, "rewards/margins": 46.82252502441406, "rewards/rejected": -48.01976776123047, "step": 10880 }, { "epoch": 47.55458515283843, "grad_norm": 4.0347276049471897e-07, "learning_rate": 3.142445298527792e-06, "logits/chosen": -1.432251214981079, "logits/rejected": -1.8160873651504517, "logps/chosen": -409.5646667480469, "logps/rejected": -4823.90283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1896945238113403, "rewards/margins": 44.06748580932617, "rewards/rejected": -45.25718688964844, "step": 10890 }, { "epoch": 47.59825327510917, "grad_norm": 1.1018590722714202e-06, "learning_rate": 3.138761767027142e-06, "logits/chosen": -1.3965373039245605, "logits/rejected": -1.8246952295303345, "logps/chosen": -406.433349609375, "logps/rejected": -4557.6650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2625348567962646, "rewards/margins": 41.45463943481445, "rewards/rejected": -42.7171745300293, "step": 10900 }, { "epoch": 47.64192139737991, "grad_norm": 7.327660457761411e-07, "learning_rate": 3.1350767513601054e-06, "logits/chosen": -1.3690853118896484, "logits/rejected": -1.8008549213409424, "logps/chosen": -397.60858154296875, "logps/rejected": -4580.87255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3501002788543701, "rewards/margins": 41.635066986083984, "rewards/rejected": -42.985164642333984, "step": 10910 }, { "epoch": 47.685589519650655, "grad_norm": 2.965681166192494e-07, "learning_rate": 3.1313902600888356e-06, "logits/chosen": -1.3795956373214722, "logits/rejected": -1.7829620838165283, "logps/chosen": -402.0404357910156, "logps/rejected": -4508.37255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2307424545288086, "rewards/margins": 41.06523132324219, "rewards/rejected": -42.29597473144531, "step": 10920 }, { "epoch": 47.7292576419214, "grad_norm": 2.425558144247191e-07, "learning_rate": 3.1277023017789166e-06, "logits/chosen": -1.396227240562439, "logits/rejected": -1.8848644495010376, "logps/chosen": -399.66693115234375, "logps/rejected": -4633.79443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.289345622062683, "rewards/margins": 42.325767517089844, "rewards/rejected": -43.6151123046875, "step": 10930 }, { "epoch": 47.77292576419214, "grad_norm": 4.5299571370586766e-07, "learning_rate": 3.1240128849993373e-06, "logits/chosen": -1.3876161575317383, "logits/rejected": -1.8431835174560547, "logps/chosen": -385.1604919433594, "logps/rejected": -4884.857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2063974142074585, "rewards/margins": 44.79460525512695, "rewards/rejected": -46.000999450683594, "step": 10940 }, { "epoch": 47.81659388646288, "grad_norm": 0.00017887167416478754, "learning_rate": 3.120322018322478e-06, "logits/chosen": -1.367973804473877, "logits/rejected": -1.7528064250946045, "logps/chosen": -416.6319274902344, "logps/rejected": -4048.934814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2438271045684814, "rewards/margins": 36.687557220458984, "rewards/rejected": -37.9313850402832, "step": 10950 }, { "epoch": 47.86026200873363, "grad_norm": 0.0004932454767369104, "learning_rate": 3.1166297103240874e-06, "logits/chosen": -1.4292126893997192, "logits/rejected": -1.871154546737671, "logps/chosen": -410.5572814941406, "logps/rejected": -4784.49951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2368704080581665, "rewards/margins": 43.66730499267578, "rewards/rejected": -44.90416717529297, "step": 10960 }, { "epoch": 47.903930131004365, "grad_norm": 4.6664002227778983e-07, "learning_rate": 3.1129359695832633e-06, "logits/chosen": -1.3873735666275024, "logits/rejected": -1.8453731536865234, "logps/chosen": -413.40057373046875, "logps/rejected": -4591.2509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2083112001419067, "rewards/margins": 41.93821334838867, "rewards/rejected": -43.14652633666992, "step": 10970 }, { "epoch": 47.94759825327511, "grad_norm": 5.123112175085271e-07, "learning_rate": 3.1092408046824307e-06, "logits/chosen": -1.4436784982681274, "logits/rejected": -1.882990837097168, "logps/chosen": -410.65838623046875, "logps/rejected": -4745.9609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3872015476226807, "rewards/margins": 43.121910095214844, "rewards/rejected": -44.50911331176758, "step": 10980 }, { "epoch": 47.99126637554585, "grad_norm": 7.280056856012072e-07, "learning_rate": 3.1055442242073262e-06, "logits/chosen": -1.4225454330444336, "logits/rejected": -1.8773943185806274, "logps/chosen": -414.12237548828125, "logps/rejected": -4347.29248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3946038484573364, "rewards/margins": 39.33219909667969, "rewards/rejected": -40.726802825927734, "step": 10990 }, { "epoch": 48.03493449781659, "grad_norm": 7.084144432318337e-07, "learning_rate": 3.1018462367469747e-06, "logits/chosen": -1.3430352210998535, "logits/rejected": -1.8062217235565186, "logps/chosen": -416.2109375, "logps/rejected": -4272.0625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3813296556472778, "rewards/margins": 38.673362731933594, "rewards/rejected": -40.054691314697266, "step": 11000 }, { "epoch": 48.07860262008734, "grad_norm": 2.1485267198502516e-06, "learning_rate": 3.0981468508936684e-06, "logits/chosen": -1.357295036315918, "logits/rejected": -1.736742377281189, "logps/chosen": -393.2538757324219, "logps/rejected": -4084.68603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3510786294937134, "rewards/margins": 36.93269348144531, "rewards/rejected": -38.28376770019531, "step": 11010 }, { "epoch": 48.122270742358076, "grad_norm": 0.00015833624725205384, "learning_rate": 3.094446075242952e-06, "logits/chosen": -1.4150750637054443, "logits/rejected": -1.8282406330108643, "logps/chosen": -441.31878662109375, "logps/rejected": -4495.7861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4495718479156494, "rewards/margins": 40.665775299072266, "rewards/rejected": -42.1153450012207, "step": 11020 }, { "epoch": 48.16593886462882, "grad_norm": 4.129101390370251e-07, "learning_rate": 3.0907439183935956e-06, "logits/chosen": -1.4240882396697998, "logits/rejected": -1.855318307876587, "logps/chosen": -436.061767578125, "logps/rejected": -4708.3447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4400360584259033, "rewards/margins": 42.70572280883789, "rewards/rejected": -44.14575958251953, "step": 11030 }, { "epoch": 48.209606986899566, "grad_norm": 1.0227537689027444e-05, "learning_rate": 3.087040388947582e-06, "logits/chosen": -1.3906071186065674, "logits/rejected": -1.8532291650772095, "logps/chosen": -427.451416015625, "logps/rejected": -4485.8994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3130886554718018, "rewards/margins": 40.69761657714844, "rewards/rejected": -42.010704040527344, "step": 11040 }, { "epoch": 48.2532751091703, "grad_norm": 3.9922829912847577e-07, "learning_rate": 3.083335495510081e-06, "logits/chosen": -1.410715103149414, "logits/rejected": -1.832498550415039, "logps/chosen": -426.54345703125, "logps/rejected": -4543.70068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3162181377410889, "rewards/margins": 41.372314453125, "rewards/rejected": -42.688533782958984, "step": 11050 }, { "epoch": 48.29694323144105, "grad_norm": 6.553169433560628e-07, "learning_rate": 3.0796292466894316e-06, "logits/chosen": -1.3889362812042236, "logits/rejected": -1.8371778726577759, "logps/chosen": -435.1189880371094, "logps/rejected": -4508.85693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2174841165542603, "rewards/margins": 41.067298889160156, "rewards/rejected": -42.28478240966797, "step": 11060 }, { "epoch": 48.34061135371179, "grad_norm": 1.4763889298520763e-06, "learning_rate": 3.0759216510971245e-06, "logits/chosen": -1.3442376852035522, "logits/rejected": -1.7494621276855469, "logps/chosen": -440.20037841796875, "logps/rejected": -3966.883544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.372406244277954, "rewards/margins": 35.794769287109375, "rewards/rejected": -37.16716766357422, "step": 11070 }, { "epoch": 48.38427947598253, "grad_norm": 2.715487248698434e-07, "learning_rate": 3.0722127173477763e-06, "logits/chosen": -1.4374278783798218, "logits/rejected": -1.8064085245132446, "logps/chosen": -410.712646484375, "logps/rejected": -4508.6455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2590901851654053, "rewards/margins": 40.925270080566406, "rewards/rejected": -42.18436050415039, "step": 11080 }, { "epoch": 48.427947598253276, "grad_norm": 1.0145887876573955e-06, "learning_rate": 3.068502454059116e-06, "logits/chosen": -1.4264492988586426, "logits/rejected": -1.838805913925171, "logps/chosen": -403.0926208496094, "logps/rejected": -4364.8515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2929006814956665, "rewards/margins": 39.61579132080078, "rewards/rejected": -40.90869140625, "step": 11090 }, { "epoch": 48.47161572052402, "grad_norm": 4.3415746347017796e-07, "learning_rate": 3.064790869851958e-06, "logits/chosen": -1.414261817932129, "logits/rejected": -1.8745664358139038, "logps/chosen": -406.9413146972656, "logps/rejected": -4426.33740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2979176044464111, "rewards/margins": 40.269004821777344, "rewards/rejected": -41.566917419433594, "step": 11100 }, { "epoch": 48.51528384279476, "grad_norm": 7.988632756373374e-06, "learning_rate": 3.0610779733501904e-06, "logits/chosen": -1.3947796821594238, "logits/rejected": -1.8513259887695312, "logps/chosen": -422.62030029296875, "logps/rejected": -4473.68408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4269639253616333, "rewards/margins": 40.488224029541016, "rewards/rejected": -41.915184020996094, "step": 11110 }, { "epoch": 48.5589519650655, "grad_norm": 5.342219486034708e-05, "learning_rate": 3.0573637731807474e-06, "logits/chosen": -1.4179494380950928, "logits/rejected": -1.8848965167999268, "logps/chosen": -430.37493896484375, "logps/rejected": -4648.6005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4151923656463623, "rewards/margins": 42.18524932861328, "rewards/rejected": -43.600440979003906, "step": 11120 }, { "epoch": 48.60262008733624, "grad_norm": 8.4275542562926e-05, "learning_rate": 3.053648277973592e-06, "logits/chosen": -1.406171202659607, "logits/rejected": -1.900665044784546, "logps/chosen": -408.80963134765625, "logps/rejected": -4708.5595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.322922945022583, "rewards/margins": 42.97440719604492, "rewards/rejected": -44.29732894897461, "step": 11130 }, { "epoch": 48.646288209606986, "grad_norm": 7.295255833162134e-07, "learning_rate": 3.049931496361699e-06, "logits/chosen": -1.4063786268234253, "logits/rejected": -1.7733685970306396, "logps/chosen": -408.3210144042969, "logps/rejected": -4100.3095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3319447040557861, "rewards/margins": 36.95833969116211, "rewards/rejected": -38.290283203125, "step": 11140 }, { "epoch": 48.68995633187773, "grad_norm": 5.657244649802234e-07, "learning_rate": 3.0462134369810282e-06, "logits/chosen": -1.382938265800476, "logits/rejected": -1.83868408203125, "logps/chosen": -453.14569091796875, "logps/rejected": -4288.203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.34300696849823, "rewards/margins": 38.90888214111328, "rewards/rejected": -40.25188446044922, "step": 11150 }, { "epoch": 48.73362445414847, "grad_norm": 6.28027345481339e-07, "learning_rate": 3.0424941084705123e-06, "logits/chosen": -1.3873058557510376, "logits/rejected": -1.8266464471817017, "logps/chosen": -419.35394287109375, "logps/rejected": -4350.89794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2744665145874023, "rewards/margins": 39.531463623046875, "rewards/rejected": -40.805931091308594, "step": 11160 }, { "epoch": 48.777292576419214, "grad_norm": 1.4085038951742275e-06, "learning_rate": 3.0387735194720292e-06, "logits/chosen": -1.3802697658538818, "logits/rejected": -1.8067271709442139, "logps/chosen": -441.87841796875, "logps/rejected": -4297.7607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3609120845794678, "rewards/margins": 38.97858810424805, "rewards/rejected": -40.339500427246094, "step": 11170 }, { "epoch": 48.82096069868996, "grad_norm": 6.963378811470645e-07, "learning_rate": 3.03505167863039e-06, "logits/chosen": -1.3283495903015137, "logits/rejected": -1.8015985488891602, "logps/chosen": -426.64569091796875, "logps/rejected": -4218.18603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3315439224243164, "rewards/margins": 38.28562927246094, "rewards/rejected": -39.61717224121094, "step": 11180 }, { "epoch": 48.8646288209607, "grad_norm": 6.182041501167854e-07, "learning_rate": 3.0313285945933096e-06, "logits/chosen": -1.4417561292648315, "logits/rejected": -1.961974859237671, "logps/chosen": -384.7183837890625, "logps/rejected": -4765.72705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2965569496154785, "rewards/margins": 43.520164489746094, "rewards/rejected": -44.81672286987305, "step": 11190 }, { "epoch": 48.90829694323144, "grad_norm": 4.5477031147648595e-07, "learning_rate": 3.0276042760113937e-06, "logits/chosen": -1.4702789783477783, "logits/rejected": -2.0071845054626465, "logps/chosen": -378.67645263671875, "logps/rejected": -5214.224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2985308170318604, "rewards/margins": 47.7647819519043, "rewards/rejected": -49.063316345214844, "step": 11200 }, { "epoch": 48.951965065502186, "grad_norm": 7.308127699006411e-07, "learning_rate": 3.023878731538118e-06, "logits/chosen": -1.4617054462432861, "logits/rejected": -1.9077354669570923, "logps/chosen": -427.74786376953125, "logps/rejected": -4648.87744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4316655397415161, "rewards/margins": 42.177452087402344, "rewards/rejected": -43.6091194152832, "step": 11210 }, { "epoch": 48.995633187772924, "grad_norm": 4.354101741237377e-06, "learning_rate": 3.0201519698298043e-06, "logits/chosen": -1.3973712921142578, "logits/rejected": -1.8319671154022217, "logps/chosen": -404.59185791015625, "logps/rejected": -4589.82958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2914159297943115, "rewards/margins": 41.821678161621094, "rewards/rejected": -43.11309051513672, "step": 11220 }, { "epoch": 49.03930131004367, "grad_norm": 5.715855838891383e-07, "learning_rate": 3.016423999545603e-06, "logits/chosen": -1.3847930431365967, "logits/rejected": -1.8353850841522217, "logps/chosen": -420.69512939453125, "logps/rejected": -4411.9775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3484470844268799, "rewards/margins": 40.05206298828125, "rewards/rejected": -41.4005126953125, "step": 11230 }, { "epoch": 49.082969432314414, "grad_norm": 2.7235432153949516e-06, "learning_rate": 3.0126948293474734e-06, "logits/chosen": -1.4562828540802002, "logits/rejected": -1.8926239013671875, "logps/chosen": -395.037841796875, "logps/rejected": -4678.4814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2384940385818481, "rewards/margins": 42.61274337768555, "rewards/rejected": -43.85123825073242, "step": 11240 }, { "epoch": 49.12663755458515, "grad_norm": 1.5100646209911627e-06, "learning_rate": 3.008964467900162e-06, "logits/chosen": -1.353941559791565, "logits/rejected": -1.7977060079574585, "logps/chosen": -421.994140625, "logps/rejected": -4049.274658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.277696967124939, "rewards/margins": 36.76131057739258, "rewards/rejected": -38.03900909423828, "step": 11250 }, { "epoch": 49.1703056768559, "grad_norm": 1.2149785587748388e-06, "learning_rate": 3.005232923871184e-06, "logits/chosen": -1.409439206123352, "logits/rejected": -1.8968251943588257, "logps/chosen": -429.127197265625, "logps/rejected": -4699.02099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.450608730316162, "rewards/margins": 42.64386749267578, "rewards/rejected": -44.09447479248047, "step": 11260 }, { "epoch": 49.213973799126634, "grad_norm": 5.820964884118951e-06, "learning_rate": 3.0015002059308026e-06, "logits/chosen": -1.413524866104126, "logits/rejected": -1.880393624305725, "logps/chosen": -436.67657470703125, "logps/rejected": -4707.0146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3022913932800293, "rewards/margins": 42.89249801635742, "rewards/rejected": -44.19478988647461, "step": 11270 }, { "epoch": 49.25764192139738, "grad_norm": 1.2392441374165565e-05, "learning_rate": 2.9977663227520064e-06, "logits/chosen": -1.3649797439575195, "logits/rejected": -1.7981386184692383, "logps/chosen": -419.3418884277344, "logps/rejected": -4241.5205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3110096454620361, "rewards/margins": 38.515228271484375, "rewards/rejected": -39.82624053955078, "step": 11280 }, { "epoch": 49.301310043668124, "grad_norm": 7.623725946547479e-06, "learning_rate": 2.9940312830104936e-06, "logits/chosen": -1.3781460523605347, "logits/rejected": -1.862309455871582, "logps/chosen": -403.4757385253906, "logps/rejected": -4522.3466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.424699306488037, "rewards/margins": 40.996891021728516, "rewards/rejected": -42.42158889770508, "step": 11290 }, { "epoch": 49.34497816593886, "grad_norm": 4.670763068687707e-07, "learning_rate": 2.99029509538465e-06, "logits/chosen": -1.4145021438598633, "logits/rejected": -1.8751720190048218, "logps/chosen": -422.41790771484375, "logps/rejected": -4575.1376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2530657052993774, "rewards/margins": 41.685264587402344, "rewards/rejected": -42.938331604003906, "step": 11300 }, { "epoch": 49.38864628820961, "grad_norm": 5.047218878705051e-07, "learning_rate": 2.9865577685555257e-06, "logits/chosen": -1.3750988245010376, "logits/rejected": -1.8614997863769531, "logps/chosen": -452.240478515625, "logps/rejected": -4360.068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2976758480072021, "rewards/margins": 39.66754150390625, "rewards/rejected": -40.96521759033203, "step": 11310 }, { "epoch": 49.43231441048035, "grad_norm": 2.66468117252821e-07, "learning_rate": 2.9828193112068216e-06, "logits/chosen": -1.463989019393921, "logits/rejected": -1.8528127670288086, "logps/chosen": -406.7374267578125, "logps/rejected": -4911.125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3568040132522583, "rewards/margins": 44.64378356933594, "rewards/rejected": -46.000587463378906, "step": 11320 }, { "epoch": 49.47598253275109, "grad_norm": 2.9588252963077e-05, "learning_rate": 2.9790797320248623e-06, "logits/chosen": -1.3481703996658325, "logits/rejected": -1.8587560653686523, "logps/chosen": -435.787841796875, "logps/rejected": -4497.10009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.326608419418335, "rewards/margins": 40.95994567871094, "rewards/rejected": -42.286556243896484, "step": 11330 }, { "epoch": 49.519650655021834, "grad_norm": 1.1099196048046952e-06, "learning_rate": 2.97533903969858e-06, "logits/chosen": -1.4520350694656372, "logits/rejected": -1.861669898033142, "logps/chosen": -413.2173767089844, "logps/rejected": -4965.2197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3094843626022339, "rewards/margins": 45.33491134643555, "rewards/rejected": -46.64440155029297, "step": 11340 }, { "epoch": 49.56331877729258, "grad_norm": 1.6714421675199342e-05, "learning_rate": 2.971597242919493e-06, "logits/chosen": -1.4381921291351318, "logits/rejected": -1.8997688293457031, "logps/chosen": -411.54498291015625, "logps/rejected": -4726.2158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2488638162612915, "rewards/margins": 43.080631256103516, "rewards/rejected": -44.32949447631836, "step": 11350 }, { "epoch": 49.60698689956332, "grad_norm": 1.070014169255123e-05, "learning_rate": 2.9678543503816864e-06, "logits/chosen": -1.4057319164276123, "logits/rejected": -1.8659067153930664, "logps/chosen": -422.0342712402344, "logps/rejected": -4469.64013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3455655574798584, "rewards/margins": 40.58135986328125, "rewards/rejected": -41.92692184448242, "step": 11360 }, { "epoch": 49.65065502183406, "grad_norm": 2.496583365919428e-07, "learning_rate": 2.964110370781793e-06, "logits/chosen": -1.4002052545547485, "logits/rejected": -1.8479645252227783, "logps/chosen": -408.40667724609375, "logps/rejected": -4528.95458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2938238382339478, "rewards/margins": 41.213897705078125, "rewards/rejected": -42.50771713256836, "step": 11370 }, { "epoch": 49.6943231441048, "grad_norm": 2.0222424483846445e-07, "learning_rate": 2.960365312818967e-06, "logits/chosen": -1.4562594890594482, "logits/rejected": -1.8610591888427734, "logps/chosen": -462.44927978515625, "logps/rejected": -4730.67822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.614996314048767, "rewards/margins": 42.725093841552734, "rewards/rejected": -44.340087890625, "step": 11380 }, { "epoch": 49.737991266375545, "grad_norm": 4.904291478181921e-06, "learning_rate": 2.9566191851948708e-06, "logits/chosen": -1.4650170803070068, "logits/rejected": -1.9388306140899658, "logps/chosen": -434.56903076171875, "logps/rejected": -5115.23388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5988574028015137, "rewards/margins": 46.44672775268555, "rewards/rejected": -48.04559326171875, "step": 11390 }, { "epoch": 49.78165938864629, "grad_norm": 6.955786603844933e-05, "learning_rate": 2.952871996613653e-06, "logits/chosen": -1.411449909210205, "logits/rejected": -1.8830375671386719, "logps/chosen": -434.8711853027344, "logps/rejected": -4272.06787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3096542358398438, "rewards/margins": 38.87152862548828, "rewards/rejected": -40.18118667602539, "step": 11400 }, { "epoch": 49.82532751091703, "grad_norm": 4.1359789491857995e-07, "learning_rate": 2.949123755781926e-06, "logits/chosen": -1.4674546718597412, "logits/rejected": -1.9549589157104492, "logps/chosen": -428.17510986328125, "logps/rejected": -4840.49658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4111722707748413, "rewards/margins": 43.978031158447266, "rewards/rejected": -45.38920211791992, "step": 11410 }, { "epoch": 49.86899563318777, "grad_norm": 4.1600782630978e-06, "learning_rate": 2.945374471408747e-06, "logits/chosen": -1.4209109544754028, "logits/rejected": -1.9124295711517334, "logps/chosen": -410.4835510253906, "logps/rejected": -4698.64111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3660989999771118, "rewards/margins": 42.7663459777832, "rewards/rejected": -44.13243865966797, "step": 11420 }, { "epoch": 49.91266375545852, "grad_norm": 1.092675668422943e-06, "learning_rate": 2.941624152205599e-06, "logits/chosen": -1.425105333328247, "logits/rejected": -1.8751394748687744, "logps/chosen": -409.1556091308594, "logps/rejected": -4831.74169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3904914855957031, "rewards/margins": 43.97521209716797, "rewards/rejected": -45.365699768066406, "step": 11430 }, { "epoch": 49.956331877729255, "grad_norm": 7.702821787002326e-07, "learning_rate": 2.9378728068863684e-06, "logits/chosen": -1.4980199337005615, "logits/rejected": -1.9806476831436157, "logps/chosen": -395.78790283203125, "logps/rejected": -5230.70751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4329802989959717, "rewards/margins": 47.71369552612305, "rewards/rejected": -49.14667892456055, "step": 11440 }, { "epoch": 50.0, "grad_norm": 2.1071812505401145e-07, "learning_rate": 2.9341204441673267e-06, "logits/chosen": -1.4100054502487183, "logits/rejected": -1.8648090362548828, "logps/chosen": -400.8707275390625, "logps/rejected": -4573.208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3827979564666748, "rewards/margins": 41.52434539794922, "rewards/rejected": -42.907142639160156, "step": 11450 }, { "epoch": 50.043668122270745, "grad_norm": 9.227253557759899e-07, "learning_rate": 2.9303670727671073e-06, "logits/chosen": -1.3844249248504639, "logits/rejected": -1.8297662734985352, "logps/chosen": -419.6665954589844, "logps/rejected": -4126.87890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5281853675842285, "rewards/margins": 37.192176818847656, "rewards/rejected": -38.720359802246094, "step": 11460 }, { "epoch": 50.08733624454148, "grad_norm": 4.631596615382973e-07, "learning_rate": 2.9266127014066905e-06, "logits/chosen": -1.3811687231063843, "logits/rejected": -1.8588778972625732, "logps/chosen": -402.51470947265625, "logps/rejected": -4510.3125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3457276821136475, "rewards/margins": 41.06476593017578, "rewards/rejected": -42.410491943359375, "step": 11470 }, { "epoch": 50.13100436681223, "grad_norm": 6.075477557248573e-07, "learning_rate": 2.922857338809377e-06, "logits/chosen": -1.4790194034576416, "logits/rejected": -1.9513494968414307, "logps/chosen": -399.2284851074219, "logps/rejected": -5496.4638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3993923664093018, "rewards/margins": 50.33414077758789, "rewards/rejected": -51.7335319519043, "step": 11480 }, { "epoch": 50.17467248908297, "grad_norm": 2.9146463940425133e-07, "learning_rate": 2.9191009937007726e-06, "logits/chosen": -1.4115791320800781, "logits/rejected": -1.9769102334976196, "logps/chosen": -401.402587890625, "logps/rejected": -4840.6298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3613418340682983, "rewards/margins": 44.14234161376953, "rewards/rejected": -45.503684997558594, "step": 11490 }, { "epoch": 50.21834061135371, "grad_norm": 3.621925699653942e-07, "learning_rate": 2.9153436748087645e-06, "logits/chosen": -1.3723957538604736, "logits/rejected": -1.8498615026474, "logps/chosen": -448.29052734375, "logps/rejected": -4126.5166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.392869234085083, "rewards/margins": 37.38011169433594, "rewards/rejected": -38.772979736328125, "step": 11500 }, { "epoch": 50.262008733624455, "grad_norm": 4.215368228096858e-07, "learning_rate": 2.9115853908635043e-06, "logits/chosen": -1.3573665618896484, "logits/rejected": -1.8230279684066772, "logps/chosen": -462.8094787597656, "logps/rejected": -4068.860107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3719489574432373, "rewards/margins": 36.828956604003906, "rewards/rejected": -38.20090866088867, "step": 11510 }, { "epoch": 50.30567685589519, "grad_norm": 2.2206321380456309e-07, "learning_rate": 2.907826150597385e-06, "logits/chosen": -1.4836715459823608, "logits/rejected": -1.918662667274475, "logps/chosen": -430.6395568847656, "logps/rejected": -5177.6357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3640648126602173, "rewards/margins": 47.259681701660156, "rewards/rejected": -48.62374496459961, "step": 11520 }, { "epoch": 50.34934497816594, "grad_norm": 3.602937427444545e-07, "learning_rate": 2.9040659627450195e-06, "logits/chosen": -1.4574429988861084, "logits/rejected": -1.9176616668701172, "logps/chosen": -387.16766357421875, "logps/rejected": -5299.09765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2866007089614868, "rewards/margins": 48.53866195678711, "rewards/rejected": -49.82526397705078, "step": 11530 }, { "epoch": 50.39301310043668, "grad_norm": 3.513206728077858e-07, "learning_rate": 2.900304836043227e-06, "logits/chosen": -1.4589877128601074, "logits/rejected": -1.950282335281372, "logps/chosen": -406.19134521484375, "logps/rejected": -4975.58251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.323740005493164, "rewards/margins": 45.38785934448242, "rewards/rejected": -46.71160125732422, "step": 11540 }, { "epoch": 50.43668122270742, "grad_norm": 8.684301138097563e-07, "learning_rate": 2.8965427792310037e-06, "logits/chosen": -1.4431586265563965, "logits/rejected": -1.8709194660186768, "logps/chosen": -418.30450439453125, "logps/rejected": -4626.2392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3766530752182007, "rewards/margins": 42.08913803100586, "rewards/rejected": -43.46579360961914, "step": 11550 }, { "epoch": 50.480349344978166, "grad_norm": 7.528136760818865e-07, "learning_rate": 2.8927798010495095e-06, "logits/chosen": -1.393633246421814, "logits/rejected": -1.9000160694122314, "logps/chosen": -447.1377868652344, "logps/rejected": -4396.2900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4732013940811157, "rewards/margins": 39.79701232910156, "rewards/rejected": -41.27021789550781, "step": 11560 }, { "epoch": 50.52401746724891, "grad_norm": 8.925215122593385e-07, "learning_rate": 2.889015910242044e-06, "logits/chosen": -1.480027198791504, "logits/rejected": -2.0118744373321533, "logps/chosen": -384.8761291503906, "logps/rejected": -5260.95068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3498141765594482, "rewards/margins": 48.10286331176758, "rewards/rejected": -49.452674865722656, "step": 11570 }, { "epoch": 50.56768558951965, "grad_norm": 5.721643499590388e-07, "learning_rate": 2.8852511155540285e-06, "logits/chosen": -1.44820237159729, "logits/rejected": -1.8883453607559204, "logps/chosen": -418.22222900390625, "logps/rejected": -4423.46728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3491524457931519, "rewards/margins": 40.26726531982422, "rewards/rejected": -41.61641311645508, "step": 11580 }, { "epoch": 50.61135371179039, "grad_norm": 2.8633897751280667e-07, "learning_rate": 2.8814854257329827e-06, "logits/chosen": -1.4883731603622437, "logits/rejected": -1.9450820684432983, "logps/chosen": -420.6214294433594, "logps/rejected": -4742.57373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.442225694656372, "rewards/margins": 43.10006332397461, "rewards/rejected": -44.542293548583984, "step": 11590 }, { "epoch": 50.65502183406114, "grad_norm": 5.751357569582872e-07, "learning_rate": 2.8777188495285067e-06, "logits/chosen": -1.457509160041809, "logits/rejected": -1.9498488903045654, "logps/chosen": -403.174560546875, "logps/rejected": -4920.77001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3820688724517822, "rewards/margins": 44.8012580871582, "rewards/rejected": -46.183326721191406, "step": 11600 }, { "epoch": 50.698689956331876, "grad_norm": 1.3548037300933442e-06, "learning_rate": 2.8739513956922617e-06, "logits/chosen": -1.4163249731063843, "logits/rejected": -1.8991458415985107, "logps/chosen": -408.615234375, "logps/rejected": -4786.26953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3358452320098877, "rewards/margins": 43.787384033203125, "rewards/rejected": -45.123233795166016, "step": 11610 }, { "epoch": 50.74235807860262, "grad_norm": 2.7431382235875664e-07, "learning_rate": 2.8701830729779466e-06, "logits/chosen": -1.4605716466903687, "logits/rejected": -1.9454498291015625, "logps/chosen": -402.98193359375, "logps/rejected": -4998.86083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3840198516845703, "rewards/margins": 45.633209228515625, "rewards/rejected": -47.01722717285156, "step": 11620 }, { "epoch": 50.786026200873366, "grad_norm": 5.5522781076258964e-05, "learning_rate": 2.8664138901412787e-06, "logits/chosen": -1.4320846796035767, "logits/rejected": -1.90786612033844, "logps/chosen": -425.20697021484375, "logps/rejected": -4491.3232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.380642056465149, "rewards/margins": 40.743003845214844, "rewards/rejected": -42.12364959716797, "step": 11630 }, { "epoch": 50.8296943231441, "grad_norm": 8.888179816687109e-07, "learning_rate": 2.8626438559399748e-06, "logits/chosen": -1.4737457036972046, "logits/rejected": -1.9757951498031616, "logps/chosen": -429.5225524902344, "logps/rejected": -5096.97998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3174866437911987, "rewards/margins": 46.65302276611328, "rewards/rejected": -47.97050857543945, "step": 11640 }, { "epoch": 50.87336244541485, "grad_norm": 6.162293477148386e-07, "learning_rate": 2.8588729791337298e-06, "logits/chosen": -1.4774963855743408, "logits/rejected": -1.9671319723129272, "logps/chosen": -418.3829040527344, "logps/rejected": -4827.25390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4777649641036987, "rewards/margins": 43.90654754638672, "rewards/rejected": -45.384315490722656, "step": 11650 }, { "epoch": 50.917030567685586, "grad_norm": 6.607145462335421e-07, "learning_rate": 2.8551012684841966e-06, "logits/chosen": -1.4359300136566162, "logits/rejected": -1.898771047592163, "logps/chosen": -437.10870361328125, "logps/rejected": -4596.99072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3905290365219116, "rewards/margins": 41.91053009033203, "rewards/rejected": -43.30105972290039, "step": 11660 }, { "epoch": 50.96069868995633, "grad_norm": 3.5911456436834284e-06, "learning_rate": 2.8513287327549636e-06, "logits/chosen": -1.4378215074539185, "logits/rejected": -1.9443515539169312, "logps/chosen": -429.74591064453125, "logps/rejected": -4617.17626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3698599338531494, "rewards/margins": 42.0704231262207, "rewards/rejected": -43.440284729003906, "step": 11670 }, { "epoch": 51.004366812227076, "grad_norm": 8.599665407697538e-07, "learning_rate": 2.847555380711539e-06, "logits/chosen": -1.419886827468872, "logits/rejected": -1.862084984779358, "logps/chosen": -412.0184631347656, "logps/rejected": -4904.6689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4619414806365967, "rewards/margins": 44.55397415161133, "rewards/rejected": -46.01591873168945, "step": 11680 }, { "epoch": 51.048034934497814, "grad_norm": 2.730261793606099e-07, "learning_rate": 2.843781221121326e-06, "logits/chosen": -1.4328210353851318, "logits/rejected": -1.919994592666626, "logps/chosen": -452.95904541015625, "logps/rejected": -4773.349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.481802225112915, "rewards/margins": 43.40754318237305, "rewards/rejected": -44.88933563232422, "step": 11690 }, { "epoch": 51.09170305676856, "grad_norm": 2.134260782537464e-05, "learning_rate": 2.840006262753605e-06, "logits/chosen": -1.4670488834381104, "logits/rejected": -1.946480393409729, "logps/chosen": -415.45831298828125, "logps/rejected": -4803.19677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4249324798583984, "rewards/margins": 43.72990417480469, "rewards/rejected": -45.15483474731445, "step": 11700 }, { "epoch": 51.1353711790393, "grad_norm": 1.2277649185675328e-07, "learning_rate": 2.8362305143795123e-06, "logits/chosen": -1.4134185314178467, "logits/rejected": -1.853081464767456, "logps/chosen": -427.0337829589844, "logps/rejected": -4195.59228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.375425934791565, "rewards/margins": 38.0695686340332, "rewards/rejected": -39.44499588012695, "step": 11710 }, { "epoch": 51.17903930131004, "grad_norm": 2.210059525296815e-07, "learning_rate": 2.832453984772018e-06, "logits/chosen": -1.3534570932388306, "logits/rejected": -1.7638343572616577, "logps/chosen": -473.0602111816406, "logps/rejected": -3669.872314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4788639545440674, "rewards/margins": 32.879024505615234, "rewards/rejected": -34.357887268066406, "step": 11720 }, { "epoch": 51.222707423580786, "grad_norm": 3.1021239211423755e-07, "learning_rate": 2.8286766827059105e-06, "logits/chosen": -1.4361910820007324, "logits/rejected": -1.9386169910430908, "logps/chosen": -438.0216369628906, "logps/rejected": -4579.845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4160758256912231, "rewards/margins": 41.6064567565918, "rewards/rejected": -43.02253341674805, "step": 11730 }, { "epoch": 51.26637554585153, "grad_norm": 9.433962391078157e-07, "learning_rate": 2.8248986169577697e-06, "logits/chosen": -1.4424687623977661, "logits/rejected": -1.9419724941253662, "logps/chosen": -425.31658935546875, "logps/rejected": -4584.62255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5284219980239868, "rewards/margins": 41.557125091552734, "rewards/rejected": -43.08554458618164, "step": 11740 }, { "epoch": 51.31004366812227, "grad_norm": 6.610359979377283e-07, "learning_rate": 2.821119796305953e-06, "logits/chosen": -1.376015067100525, "logits/rejected": -1.8304592370986938, "logps/chosen": -440.4830627441406, "logps/rejected": -4062.50732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4048079252243042, "rewards/margins": 36.772544860839844, "rewards/rejected": -38.1773567199707, "step": 11750 }, { "epoch": 51.353711790393014, "grad_norm": 1.7265935353312882e-06, "learning_rate": 2.8173402295305703e-06, "logits/chosen": -1.4377520084381104, "logits/rejected": -1.910498857498169, "logps/chosen": -425.01904296875, "logps/rejected": -4769.33544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4898247718811035, "rewards/margins": 43.254844665527344, "rewards/rejected": -44.744667053222656, "step": 11760 }, { "epoch": 51.39737991266376, "grad_norm": 2.891095031867824e-07, "learning_rate": 2.8135599254134654e-06, "logits/chosen": -1.4256517887115479, "logits/rejected": -2.012678861618042, "logps/chosen": -399.3770751953125, "logps/rejected": -4973.0546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.42516028881073, "rewards/margins": 45.36750030517578, "rewards/rejected": -46.792659759521484, "step": 11770 }, { "epoch": 51.4410480349345, "grad_norm": 3.1695989915658066e-07, "learning_rate": 2.809778892738194e-06, "logits/chosen": -1.417304277420044, "logits/rejected": -1.8603636026382446, "logps/chosen": -445.78973388671875, "logps/rejected": -4135.14306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4710826873779297, "rewards/margins": 37.294532775878906, "rewards/rejected": -38.76561737060547, "step": 11780 }, { "epoch": 51.48471615720524, "grad_norm": 6.1282558422635416e-06, "learning_rate": 2.805997140290006e-06, "logits/chosen": -1.4213060140609741, "logits/rejected": -1.9099786281585693, "logps/chosen": -415.069580078125, "logps/rejected": -4740.88671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3939459323883057, "rewards/margins": 43.133636474609375, "rewards/rejected": -44.527584075927734, "step": 11790 }, { "epoch": 51.52838427947598, "grad_norm": 1.2987364007587747e-06, "learning_rate": 2.802214676855825e-06, "logits/chosen": -1.448643684387207, "logits/rejected": -1.9870922565460205, "logps/chosen": -417.45281982421875, "logps/rejected": -4802.10009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.351694107055664, "rewards/margins": 43.89908981323242, "rewards/rejected": -45.25078582763672, "step": 11800 }, { "epoch": 51.572052401746724, "grad_norm": 4.072397013460051e-07, "learning_rate": 2.7984315112242226e-06, "logits/chosen": -1.4824467897415161, "logits/rejected": -1.8940436840057373, "logps/chosen": -430.366943359375, "logps/rejected": -4975.56005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6622629165649414, "rewards/margins": 45.097389221191406, "rewards/rejected": -46.75965881347656, "step": 11810 }, { "epoch": 51.61572052401747, "grad_norm": 1.8001134730471427e-06, "learning_rate": 2.794647652185407e-06, "logits/chosen": -1.4430320262908936, "logits/rejected": -1.8538497686386108, "logps/chosen": -425.5279235839844, "logps/rejected": -4598.4970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4522989988327026, "rewards/margins": 41.64671325683594, "rewards/rejected": -43.09901809692383, "step": 11820 }, { "epoch": 51.65938864628821, "grad_norm": 2.7397784293288395e-07, "learning_rate": 2.7908631085311933e-06, "logits/chosen": -1.465795874595642, "logits/rejected": -2.0231778621673584, "logps/chosen": -402.7904052734375, "logps/rejected": -5281.2470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3869775533676147, "rewards/margins": 48.381900787353516, "rewards/rejected": -49.76887512207031, "step": 11830 }, { "epoch": 51.70305676855895, "grad_norm": 4.8366509021299046e-05, "learning_rate": 2.7870778890549895e-06, "logits/chosen": -1.4370918273925781, "logits/rejected": -1.9011719226837158, "logps/chosen": -403.48626708984375, "logps/rejected": -4832.31103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4023700952529907, "rewards/margins": 44.03832244873047, "rewards/rejected": -45.44069290161133, "step": 11840 }, { "epoch": 51.7467248908297, "grad_norm": 3.541460718555355e-07, "learning_rate": 2.783292002551773e-06, "logits/chosen": -1.4475233554840088, "logits/rejected": -1.8977673053741455, "logps/chosen": -430.8231506347656, "logps/rejected": -4468.9599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4301038980484009, "rewards/margins": 40.58808135986328, "rewards/rejected": -42.0181884765625, "step": 11850 }, { "epoch": 51.790393013100434, "grad_norm": 8.345978968167205e-07, "learning_rate": 2.7795054578180712e-06, "logits/chosen": -1.4075381755828857, "logits/rejected": -1.8880678415298462, "logps/chosen": -411.94287109375, "logps/rejected": -4827.80810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4315769672393799, "rewards/margins": 43.96772003173828, "rewards/rejected": -45.39929962158203, "step": 11860 }, { "epoch": 51.83406113537118, "grad_norm": 4.469886766738849e-07, "learning_rate": 2.775718263651942e-06, "logits/chosen": -1.4555108547210693, "logits/rejected": -1.8927037715911865, "logps/chosen": -417.2571716308594, "logps/rejected": -4661.271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4647462368011475, "rewards/margins": 42.318389892578125, "rewards/rejected": -43.78313446044922, "step": 11870 }, { "epoch": 51.877729257641924, "grad_norm": 3.23450283418902e-07, "learning_rate": 2.7719304288529503e-06, "logits/chosen": -1.4133474826812744, "logits/rejected": -1.9276237487792969, "logps/chosen": -431.44500732421875, "logps/rejected": -4720.2216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4492340087890625, "rewards/margins": 43.074066162109375, "rewards/rejected": -44.52329635620117, "step": 11880 }, { "epoch": 51.92139737991266, "grad_norm": 3.8184935606904795e-06, "learning_rate": 2.7681419622221515e-06, "logits/chosen": -1.457641839981079, "logits/rejected": -1.9287077188491821, "logps/chosen": -430.79412841796875, "logps/rejected": -4731.1845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5028009414672852, "rewards/margins": 42.9648551940918, "rewards/rejected": -44.4676513671875, "step": 11890 }, { "epoch": 51.96506550218341, "grad_norm": 5.349500891700728e-07, "learning_rate": 2.764352872562067e-06, "logits/chosen": -1.4171884059906006, "logits/rejected": -1.9505304098129272, "logps/chosen": -439.2677307128906, "logps/rejected": -4981.21484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4625412225723267, "rewards/margins": 45.4407958984375, "rewards/rejected": -46.90333938598633, "step": 11900 }, { "epoch": 52.00873362445415, "grad_norm": 1.9333667844929727e-07, "learning_rate": 2.7605631686766676e-06, "logits/chosen": -1.4309475421905518, "logits/rejected": -1.8942344188690186, "logps/chosen": -404.4856872558594, "logps/rejected": -4358.5302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.416320562362671, "rewards/margins": 39.539676666259766, "rewards/rejected": -40.955989837646484, "step": 11910 }, { "epoch": 52.05240174672489, "grad_norm": 1.3583816478494282e-06, "learning_rate": 2.756772859371351e-06, "logits/chosen": -1.4248652458190918, "logits/rejected": -1.8945258855819702, "logps/chosen": -451.40301513671875, "logps/rejected": -4511.0224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4760949611663818, "rewards/margins": 40.774600982666016, "rewards/rejected": -42.250694274902344, "step": 11920 }, { "epoch": 52.096069868995635, "grad_norm": 2.437239020093494e-07, "learning_rate": 2.75298195345292e-06, "logits/chosen": -1.389209508895874, "logits/rejected": -1.8577840328216553, "logps/chosen": -415.2703552246094, "logps/rejected": -4489.2041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5730606317520142, "rewards/margins": 40.628135681152344, "rewards/rejected": -42.20119094848633, "step": 11930 }, { "epoch": 52.13973799126637, "grad_norm": 3.0355511459788425e-07, "learning_rate": 2.7491904597295652e-06, "logits/chosen": -1.4496512413024902, "logits/rejected": -1.942181944847107, "logps/chosen": -391.204833984375, "logps/rejected": -5126.6083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4928947687149048, "rewards/margins": 46.87525939941406, "rewards/rejected": -48.36815643310547, "step": 11940 }, { "epoch": 52.18340611353712, "grad_norm": 1.8524747621330468e-07, "learning_rate": 2.7453983870108435e-06, "logits/chosen": -1.4149566888809204, "logits/rejected": -1.8873186111450195, "logps/chosen": -416.4837341308594, "logps/rejected": -4634.19482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.436570405960083, "rewards/margins": 42.14799499511719, "rewards/rejected": -43.58456039428711, "step": 11950 }, { "epoch": 52.22707423580786, "grad_norm": 2.7658502587056153e-07, "learning_rate": 2.7416057441076556e-06, "logits/chosen": -1.427449107170105, "logits/rejected": -1.893792748451233, "logps/chosen": -459.79052734375, "logps/rejected": -4349.9892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4872334003448486, "rewards/margins": 39.38713836669922, "rewards/rejected": -40.87437057495117, "step": 11960 }, { "epoch": 52.2707423580786, "grad_norm": 0.00010791383263734363, "learning_rate": 2.737812539832227e-06, "logits/chosen": -1.4549901485443115, "logits/rejected": -1.9474884271621704, "logps/chosen": -421.126708984375, "logps/rejected": -4884.7958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5139495134353638, "rewards/margins": 44.35620880126953, "rewards/rejected": -45.870155334472656, "step": 11970 }, { "epoch": 52.314410480349345, "grad_norm": 1.1515864262018494e-06, "learning_rate": 2.7340187829980887e-06, "logits/chosen": -1.4361560344696045, "logits/rejected": -1.8968204259872437, "logps/chosen": -420.84161376953125, "logps/rejected": -4787.27392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.448229432106018, "rewards/margins": 43.519981384277344, "rewards/rejected": -44.96820831298828, "step": 11980 }, { "epoch": 52.35807860262009, "grad_norm": 1.2321954929137313e-06, "learning_rate": 2.730224482420055e-06, "logits/chosen": -1.4700876474380493, "logits/rejected": -2.0477824211120605, "logps/chosen": -455.6871032714844, "logps/rejected": -5153.82373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7040891647338867, "rewards/margins": 46.800987243652344, "rewards/rejected": -48.50507736206055, "step": 11990 }, { "epoch": 52.40174672489083, "grad_norm": 5.070331431528379e-07, "learning_rate": 2.7264296469142027e-06, "logits/chosen": -1.4476184844970703, "logits/rejected": -1.9479970932006836, "logps/chosen": -436.80072021484375, "logps/rejected": -4760.5693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4735721349716187, "rewards/margins": 43.34977340698242, "rewards/rejected": -44.823341369628906, "step": 12000 }, { "epoch": 52.44541484716157, "grad_norm": 5.572368044094456e-06, "learning_rate": 2.7226342852978542e-06, "logits/chosen": -1.436248540878296, "logits/rejected": -1.9398643970489502, "logps/chosen": -442.75653076171875, "logps/rejected": -4522.04443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5629932880401611, "rewards/margins": 41.00690841674805, "rewards/rejected": -42.56990432739258, "step": 12010 }, { "epoch": 52.48908296943232, "grad_norm": 7.437919035603688e-05, "learning_rate": 2.718838406389551e-06, "logits/chosen": -1.4195164442062378, "logits/rejected": -1.9111322164535522, "logps/chosen": -424.71856689453125, "logps/rejected": -4596.14208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6110174655914307, "rewards/margins": 41.721107482910156, "rewards/rejected": -43.332122802734375, "step": 12020 }, { "epoch": 52.532751091703055, "grad_norm": 1.5352237065250155e-06, "learning_rate": 2.71504201900904e-06, "logits/chosen": -1.4373172521591187, "logits/rejected": -1.9427051544189453, "logps/chosen": -438.79156494140625, "logps/rejected": -4585.3408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5429799556732178, "rewards/margins": 41.617340087890625, "rewards/rejected": -43.16032028198242, "step": 12030 }, { "epoch": 52.5764192139738, "grad_norm": 2.462188910405795e-06, "learning_rate": 2.7112451319772447e-06, "logits/chosen": -1.5010778903961182, "logits/rejected": -2.043318271636963, "logps/chosen": -410.7059020996094, "logps/rejected": -5019.61083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5254533290863037, "rewards/margins": 45.58700942993164, "rewards/rejected": -47.11246109008789, "step": 12040 }, { "epoch": 52.620087336244545, "grad_norm": 2.8415055581675545e-07, "learning_rate": 2.707447754116255e-06, "logits/chosen": -1.4088375568389893, "logits/rejected": -1.848188042640686, "logps/chosen": -442.9283752441406, "logps/rejected": -4002.326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4525606632232666, "rewards/margins": 36.08225631713867, "rewards/rejected": -37.534820556640625, "step": 12050 }, { "epoch": 52.66375545851528, "grad_norm": 1.6708922773805107e-07, "learning_rate": 2.7036498942492977e-06, "logits/chosen": -1.4514856338500977, "logits/rejected": -1.9694201946258545, "logps/chosen": -415.81878662109375, "logps/rejected": -4762.15576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5803560018539429, "rewards/margins": 43.21453857421875, "rewards/rejected": -44.794898986816406, "step": 12060 }, { "epoch": 52.70742358078603, "grad_norm": 5.481337419943278e-07, "learning_rate": 2.6998515612007203e-06, "logits/chosen": -1.515594720840454, "logits/rejected": -2.021040678024292, "logps/chosen": -386.7232360839844, "logps/rejected": -4918.2509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4734137058258057, "rewards/margins": 44.75049591064453, "rewards/rejected": -46.223915100097656, "step": 12070 }, { "epoch": 52.751091703056765, "grad_norm": 2.3223616791823336e-07, "learning_rate": 2.6960527637959706e-06, "logits/chosen": -1.4548934698104858, "logits/rejected": -1.9755947589874268, "logps/chosen": -431.047607421875, "logps/rejected": -4917.9580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.557179570198059, "rewards/margins": 44.75972366333008, "rewards/rejected": -46.31690216064453, "step": 12080 }, { "epoch": 52.79475982532751, "grad_norm": 1.1487118220898405e-05, "learning_rate": 2.6922535108615733e-06, "logits/chosen": -1.4226105213165283, "logits/rejected": -1.937047004699707, "logps/chosen": -440.201171875, "logps/rejected": -4745.7216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.478046178817749, "rewards/margins": 43.20763397216797, "rewards/rejected": -44.6856803894043, "step": 12090 }, { "epoch": 52.838427947598255, "grad_norm": 1.5432231537316637e-05, "learning_rate": 2.6884538112251147e-06, "logits/chosen": -1.4456865787506104, "logits/rejected": -1.9762802124023438, "logps/chosen": -426.48419189453125, "logps/rejected": -4869.13330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3832743167877197, "rewards/margins": 44.486759185791016, "rewards/rejected": -45.870033264160156, "step": 12100 }, { "epoch": 52.88209606986899, "grad_norm": 1.9964035094400663e-05, "learning_rate": 2.6846536737152167e-06, "logits/chosen": -1.4571497440338135, "logits/rejected": -1.968412160873413, "logps/chosen": -440.266357421875, "logps/rejected": -4923.6982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5278570652008057, "rewards/margins": 44.7716064453125, "rewards/rejected": -46.299461364746094, "step": 12110 }, { "epoch": 52.92576419213974, "grad_norm": 1.6082959021746917e-06, "learning_rate": 2.6808531071615185e-06, "logits/chosen": -1.4551610946655273, "logits/rejected": -1.965179204940796, "logps/chosen": -442.1617736816406, "logps/rejected": -4594.78125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6145641803741455, "rewards/margins": 41.70242691040039, "rewards/rejected": -43.316993713378906, "step": 12120 }, { "epoch": 52.96943231441048, "grad_norm": 1.3127471124047175e-05, "learning_rate": 2.677052120394658e-06, "logits/chosen": -1.4403598308563232, "logits/rejected": -1.9745452404022217, "logps/chosen": -438.6407165527344, "logps/rejected": -4518.03759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5576634407043457, "rewards/margins": 40.934547424316406, "rewards/rejected": -42.49221420288086, "step": 12130 }, { "epoch": 53.01310043668122, "grad_norm": 2.3552107539868543e-07, "learning_rate": 2.6732507222462474e-06, "logits/chosen": -1.4471606016159058, "logits/rejected": -1.9454433917999268, "logps/chosen": -427.4395446777344, "logps/rejected": -4644.54541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4933946132659912, "rewards/margins": 42.22319793701172, "rewards/rejected": -43.71659469604492, "step": 12140 }, { "epoch": 53.056768558951966, "grad_norm": 4.140867114005119e-07, "learning_rate": 2.6694489215488555e-06, "logits/chosen": -1.4181022644042969, "logits/rejected": -1.888458251953125, "logps/chosen": -421.3631286621094, "logps/rejected": -4633.5078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4685760736465454, "rewards/margins": 42.0560417175293, "rewards/rejected": -43.524620056152344, "step": 12150 }, { "epoch": 53.10043668122271, "grad_norm": 6.500654457801027e-07, "learning_rate": 2.665646727135987e-06, "logits/chosen": -1.4829580783843994, "logits/rejected": -1.9429283142089844, "logps/chosen": -422.74774169921875, "logps/rejected": -4884.8310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5026649236679077, "rewards/margins": 44.417625427246094, "rewards/rejected": -45.920291900634766, "step": 12160 }, { "epoch": 53.14410480349345, "grad_norm": 2.0285662045307612e-07, "learning_rate": 2.661844147842062e-06, "logits/chosen": -1.422726035118103, "logits/rejected": -1.8779466152191162, "logps/chosen": -445.02166748046875, "logps/rejected": -4246.6298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.564371109008789, "rewards/margins": 38.28506851196289, "rewards/rejected": -39.84943389892578, "step": 12170 }, { "epoch": 53.18777292576419, "grad_norm": 1.3822617942240782e-05, "learning_rate": 2.658041192502392e-06, "logits/chosen": -1.4390161037445068, "logits/rejected": -1.9088243246078491, "logps/chosen": -441.6414489746094, "logps/rejected": -4226.36328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5371860265731812, "rewards/margins": 38.20948791503906, "rewards/rejected": -39.74667739868164, "step": 12180 }, { "epoch": 53.23144104803494, "grad_norm": 2.936279612119301e-07, "learning_rate": 2.6542378699531645e-06, "logits/chosen": -1.462172269821167, "logits/rejected": -1.965711236000061, "logps/chosen": -423.8837890625, "logps/rejected": -5082.7626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6441524028778076, "rewards/margins": 46.16539764404297, "rewards/rejected": -47.80955123901367, "step": 12190 }, { "epoch": 53.275109170305676, "grad_norm": 5.066121550481098e-07, "learning_rate": 2.650434189031421e-06, "logits/chosen": -1.41451096534729, "logits/rejected": -1.915482521057129, "logps/chosen": -438.945068359375, "logps/rejected": -4594.8427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.496106743812561, "rewards/margins": 41.66798782348633, "rewards/rejected": -43.16409683227539, "step": 12200 }, { "epoch": 53.31877729257642, "grad_norm": 2.707048602349273e-05, "learning_rate": 2.646630158575033e-06, "logits/chosen": -1.4808051586151123, "logits/rejected": -1.9272266626358032, "logps/chosen": -425.701171875, "logps/rejected": -4961.66943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5812855958938599, "rewards/margins": 45.11252975463867, "rewards/rejected": -46.69381332397461, "step": 12210 }, { "epoch": 53.36244541484716, "grad_norm": 3.126487104866903e-07, "learning_rate": 2.642825787422687e-06, "logits/chosen": -1.423189401626587, "logits/rejected": -1.9690239429473877, "logps/chosen": -453.307861328125, "logps/rejected": -4580.0126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4565967321395874, "rewards/margins": 41.608707427978516, "rewards/rejected": -43.06529998779297, "step": 12220 }, { "epoch": 53.4061135371179, "grad_norm": 9.818069587300781e-07, "learning_rate": 2.6390210844138593e-06, "logits/chosen": -1.5215518474578857, "logits/rejected": -2.0380959510803223, "logps/chosen": -408.47601318359375, "logps/rejected": -5525.8291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6176307201385498, "rewards/margins": 50.376792907714844, "rewards/rejected": -51.994422912597656, "step": 12230 }, { "epoch": 53.44978165938865, "grad_norm": 4.977575973277934e-07, "learning_rate": 2.635216058388797e-06, "logits/chosen": -1.4699041843414307, "logits/rejected": -1.9206714630126953, "logps/chosen": -454.009765625, "logps/rejected": -4660.94189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4686353206634521, "rewards/margins": 42.28188705444336, "rewards/rejected": -43.75052261352539, "step": 12240 }, { "epoch": 53.493449781659386, "grad_norm": 2.2502358257113971e-07, "learning_rate": 2.631410718188499e-06, "logits/chosen": -1.4316797256469727, "logits/rejected": -1.872908353805542, "logps/chosen": -437.5716857910156, "logps/rejected": -4518.5205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5310896635055542, "rewards/margins": 40.94329833984375, "rewards/rejected": -42.47438430786133, "step": 12250 }, { "epoch": 53.53711790393013, "grad_norm": 2.816383596331519e-07, "learning_rate": 2.6276050726546936e-06, "logits/chosen": -1.4679399728775024, "logits/rejected": -2.0104386806488037, "logps/chosen": -406.308349609375, "logps/rejected": -5172.6923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4250891208648682, "rewards/margins": 47.33789825439453, "rewards/rejected": -48.76298904418945, "step": 12260 }, { "epoch": 53.580786026200876, "grad_norm": 1.3795481579365482e-07, "learning_rate": 2.6237991306298183e-06, "logits/chosen": -1.4124395847320557, "logits/rejected": -1.8735783100128174, "logps/chosen": -407.40325927734375, "logps/rejected": -4561.142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.486264944076538, "rewards/margins": 41.49248504638672, "rewards/rejected": -42.9787483215332, "step": 12270 }, { "epoch": 53.624454148471614, "grad_norm": 5.246512543840151e-07, "learning_rate": 2.6199929009570003e-06, "logits/chosen": -1.4190800189971924, "logits/rejected": -1.8651520013809204, "logps/chosen": -440.62054443359375, "logps/rejected": -4307.09716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5419590473175049, "rewards/margins": 38.95093536376953, "rewards/rejected": -40.492897033691406, "step": 12280 }, { "epoch": 53.66812227074236, "grad_norm": 4.0879446128456753e-07, "learning_rate": 2.6161863924800346e-06, "logits/chosen": -1.4911357164382935, "logits/rejected": -2.0262646675109863, "logps/chosen": -419.8457946777344, "logps/rejected": -5173.11572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4579017162322998, "rewards/margins": 47.268798828125, "rewards/rejected": -48.72669982910156, "step": 12290 }, { "epoch": 53.7117903930131, "grad_norm": 2.7777002116936387e-07, "learning_rate": 2.612379614043364e-06, "logits/chosen": -1.4609897136688232, "logits/rejected": -1.9938371181488037, "logps/chosen": -439.2596130371094, "logps/rejected": -4917.1923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7976068258285522, "rewards/margins": 44.54057312011719, "rewards/rejected": -46.33818817138672, "step": 12300 }, { "epoch": 53.75545851528384, "grad_norm": 4.7714426462036924e-06, "learning_rate": 2.608572574492057e-06, "logits/chosen": -1.4446828365325928, "logits/rejected": -1.9864063262939453, "logps/chosen": -448.07379150390625, "logps/rejected": -4631.5595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6396583318710327, "rewards/margins": 41.96303176879883, "rewards/rejected": -43.602691650390625, "step": 12310 }, { "epoch": 53.799126637554586, "grad_norm": 1.0697863296278307e-07, "learning_rate": 2.6047652826717934e-06, "logits/chosen": -1.4301635026931763, "logits/rejected": -1.9242374897003174, "logps/chosen": -448.5745544433594, "logps/rejected": -4493.2353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5353386402130127, "rewards/margins": 40.668846130371094, "rewards/rejected": -42.204185485839844, "step": 12320 }, { "epoch": 53.842794759825324, "grad_norm": 1.3928938542385933e-07, "learning_rate": 2.600957747428833e-06, "logits/chosen": -1.4623804092407227, "logits/rejected": -2.049245834350586, "logps/chosen": -407.92852783203125, "logps/rejected": -4927.20166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.459661841392517, "rewards/margins": 44.93657684326172, "rewards/rejected": -46.396240234375, "step": 12330 }, { "epoch": 53.88646288209607, "grad_norm": 1.180717600573776e-06, "learning_rate": 2.597149977610007e-06, "logits/chosen": -1.4326183795928955, "logits/rejected": -1.9515259265899658, "logps/chosen": -430.63958740234375, "logps/rejected": -4714.4296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6416345834732056, "rewards/margins": 42.715518951416016, "rewards/rejected": -44.357154846191406, "step": 12340 }, { "epoch": 53.930131004366814, "grad_norm": 1.6664828014372043e-06, "learning_rate": 2.593341982062687e-06, "logits/chosen": -1.4025869369506836, "logits/rejected": -1.90470290184021, "logps/chosen": -450.494140625, "logps/rejected": -4371.97802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6187509298324585, "rewards/margins": 39.51118850708008, "rewards/rejected": -41.12993621826172, "step": 12350 }, { "epoch": 53.97379912663755, "grad_norm": 8.114363167181138e-07, "learning_rate": 2.5895337696347728e-06, "logits/chosen": -1.4752752780914307, "logits/rejected": -2.043368339538574, "logps/chosen": -434.677978515625, "logps/rejected": -4992.61962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.737086296081543, "rewards/margins": 45.260555267333984, "rewards/rejected": -46.99764633178711, "step": 12360 }, { "epoch": 54.0174672489083, "grad_norm": 2.929857996188683e-07, "learning_rate": 2.5857253491746646e-06, "logits/chosen": -1.4474247694015503, "logits/rejected": -1.9691970348358154, "logps/chosen": -454.27923583984375, "logps/rejected": -4418.7255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.70827317237854, "rewards/margins": 39.84721374511719, "rewards/rejected": -41.55548858642578, "step": 12370 }, { "epoch": 54.06113537117904, "grad_norm": 4.265843476370004e-06, "learning_rate": 2.5819167295312487e-06, "logits/chosen": -1.4831660985946655, "logits/rejected": -1.9887456893920898, "logps/chosen": -408.2938537597656, "logps/rejected": -5193.828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6100813150405884, "rewards/margins": 47.26054382324219, "rewards/rejected": -48.870628356933594, "step": 12380 }, { "epoch": 54.10480349344978, "grad_norm": 4.814130535972591e-06, "learning_rate": 2.578107919553873e-06, "logits/chosen": -1.4272406101226807, "logits/rejected": -1.9360663890838623, "logps/chosen": -436.21685791015625, "logps/rejected": -4573.5927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4807180166244507, "rewards/margins": 41.634822845458984, "rewards/rejected": -43.11553955078125, "step": 12390 }, { "epoch": 54.148471615720524, "grad_norm": 8.372453781789205e-07, "learning_rate": 2.574298928092328e-06, "logits/chosen": -1.4160513877868652, "logits/rejected": -1.906280755996704, "logps/chosen": -435.18450927734375, "logps/rejected": -4286.8662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5794851779937744, "rewards/margins": 38.67161178588867, "rewards/rejected": -40.251094818115234, "step": 12400 }, { "epoch": 54.19213973799127, "grad_norm": 5.366941474989475e-06, "learning_rate": 2.5704897639968257e-06, "logits/chosen": -1.4357855319976807, "logits/rejected": -1.8878953456878662, "logps/chosen": -444.85638427734375, "logps/rejected": -4598.91357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5580112934112549, "rewards/margins": 41.602210998535156, "rewards/rejected": -43.16022491455078, "step": 12410 }, { "epoch": 54.23580786026201, "grad_norm": 5.258880886745745e-06, "learning_rate": 2.566680436117979e-06, "logits/chosen": -1.4526817798614502, "logits/rejected": -1.9365146160125732, "logps/chosen": -427.2149353027344, "logps/rejected": -4523.9931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5398366451263428, "rewards/margins": 41.07023239135742, "rewards/rejected": -42.610069274902344, "step": 12420 }, { "epoch": 54.27947598253275, "grad_norm": 2.089932204601314e-07, "learning_rate": 2.5628709533067827e-06, "logits/chosen": -1.5329545736312866, "logits/rejected": -2.027444839477539, "logps/chosen": -425.6980895996094, "logps/rejected": -5257.78955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6763118505477905, "rewards/margins": 47.72982406616211, "rewards/rejected": -49.40613555908203, "step": 12430 }, { "epoch": 54.3231441048035, "grad_norm": 1.9981731561209398e-05, "learning_rate": 2.5590613244145897e-06, "logits/chosen": -1.4361743927001953, "logits/rejected": -1.9953300952911377, "logps/chosen": -431.74688720703125, "logps/rejected": -4771.0439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4868748188018799, "rewards/margins": 43.44542694091797, "rewards/rejected": -44.93230056762695, "step": 12440 }, { "epoch": 54.366812227074234, "grad_norm": 1.122521939243576e-05, "learning_rate": 2.5552515582930926e-06, "logits/chosen": -1.4816339015960693, "logits/rejected": -1.971239686012268, "logps/chosen": -438.5176696777344, "logps/rejected": -4728.12744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6007353067398071, "rewards/margins": 42.865699768066406, "rewards/rejected": -44.46643829345703, "step": 12450 }, { "epoch": 54.41048034934498, "grad_norm": 1.4414371054441288e-07, "learning_rate": 2.551441663794304e-06, "logits/chosen": -1.477146863937378, "logits/rejected": -2.071866035461426, "logps/chosen": -437.779052734375, "logps/rejected": -5250.73046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5269999504089355, "rewards/margins": 47.96680450439453, "rewards/rejected": -49.493804931640625, "step": 12460 }, { "epoch": 54.45414847161572, "grad_norm": 1.4317440541373523e-05, "learning_rate": 2.547631649770534e-06, "logits/chosen": -1.4760115146636963, "logits/rejected": -1.9806334972381592, "logps/chosen": -422.89239501953125, "logps/rejected": -4978.73046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.547848105430603, "rewards/margins": 45.35308074951172, "rewards/rejected": -46.90092849731445, "step": 12470 }, { "epoch": 54.49781659388646, "grad_norm": 2.1232140565972604e-07, "learning_rate": 2.543821525074371e-06, "logits/chosen": -1.4406248331069946, "logits/rejected": -1.9810550212860107, "logps/chosen": -448.30462646484375, "logps/rejected": -4735.4345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5985586643218994, "rewards/margins": 43.10649108886719, "rewards/rejected": -44.70505142211914, "step": 12480 }, { "epoch": 54.54148471615721, "grad_norm": 1.1869738279098391e-07, "learning_rate": 2.540011298558659e-06, "logits/chosen": -1.4332283735275269, "logits/rejected": -1.9637552499771118, "logps/chosen": -449.26922607421875, "logps/rejected": -4469.58935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4861505031585693, "rewards/margins": 40.61213302612305, "rewards/rejected": -42.09828186035156, "step": 12490 }, { "epoch": 54.585152838427945, "grad_norm": 1.1051575843868483e-07, "learning_rate": 2.5362009790764814e-06, "logits/chosen": -1.4776065349578857, "logits/rejected": -1.99038827419281, "logps/chosen": -440.1705017089844, "logps/rejected": -4819.6220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.595325231552124, "rewards/margins": 43.77268981933594, "rewards/rejected": -45.36801528930664, "step": 12500 }, { "epoch": 54.62882096069869, "grad_norm": 8.103930274603722e-08, "learning_rate": 2.5323905754811358e-06, "logits/chosen": -1.491644024848938, "logits/rejected": -2.006711721420288, "logps/chosen": -411.2992248535156, "logps/rejected": -5218.751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5893338918685913, "rewards/margins": 47.55390167236328, "rewards/rejected": -49.14323043823242, "step": 12510 }, { "epoch": 54.672489082969435, "grad_norm": 2.323356065293963e-07, "learning_rate": 2.5285800966261124e-06, "logits/chosen": -1.4083850383758545, "logits/rejected": -1.9766021966934204, "logps/chosen": -444.191162109375, "logps/rejected": -4688.47900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5664340257644653, "rewards/margins": 42.580928802490234, "rewards/rejected": -44.14736557006836, "step": 12520 }, { "epoch": 54.71615720524017, "grad_norm": 8.099677723601426e-06, "learning_rate": 2.524769551365083e-06, "logits/chosen": -1.4338030815124512, "logits/rejected": -1.974707841873169, "logps/chosen": -441.7421875, "logps/rejected": -4643.3671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5290552377700806, "rewards/margins": 42.2298698425293, "rewards/rejected": -43.75891876220703, "step": 12530 }, { "epoch": 54.75982532751092, "grad_norm": 2.66122377257562e-07, "learning_rate": 2.520958948551868e-06, "logits/chosen": -1.4505325555801392, "logits/rejected": -1.9752362966537476, "logps/chosen": -438.13970947265625, "logps/rejected": -5146.02099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5254319906234741, "rewards/margins": 46.960052490234375, "rewards/rejected": -48.48548126220703, "step": 12540 }, { "epoch": 54.80349344978166, "grad_norm": 3.22219048474034e-06, "learning_rate": 2.5171482970404244e-06, "logits/chosen": -1.4864561557769775, "logits/rejected": -1.9785178899765015, "logps/chosen": -449.0185546875, "logps/rejected": -4792.14599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5868487358093262, "rewards/margins": 43.314998626708984, "rewards/rejected": -44.90184783935547, "step": 12550 }, { "epoch": 54.8471615720524, "grad_norm": 1.0711967385469883e-05, "learning_rate": 2.51333760568482e-06, "logits/chosen": -1.436821460723877, "logits/rejected": -1.8689731359481812, "logps/chosen": -434.9810485839844, "logps/rejected": -4391.8349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5861101150512695, "rewards/margins": 39.600486755371094, "rewards/rejected": -41.18659591674805, "step": 12560 }, { "epoch": 54.890829694323145, "grad_norm": 9.533427360274327e-07, "learning_rate": 2.5095268833392177e-06, "logits/chosen": -1.4990692138671875, "logits/rejected": -1.9629253149032593, "logps/chosen": -425.04327392578125, "logps/rejected": -4772.67822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.568172812461853, "rewards/margins": 43.27117156982422, "rewards/rejected": -44.83934020996094, "step": 12570 }, { "epoch": 54.93449781659389, "grad_norm": 8.040958977312773e-07, "learning_rate": 2.505716138857851e-06, "logits/chosen": -1.4736508131027222, "logits/rejected": -2.0350942611694336, "logps/chosen": -438.8755798339844, "logps/rejected": -4951.9736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5630810260772705, "rewards/margins": 45.08133316040039, "rewards/rejected": -46.644412994384766, "step": 12580 }, { "epoch": 54.97816593886463, "grad_norm": 5.543432631938916e-07, "learning_rate": 2.5019053810950046e-06, "logits/chosen": -1.426945686340332, "logits/rejected": -1.9775631427764893, "logps/chosen": -459.88165283203125, "logps/rejected": -4740.8271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6308971643447876, "rewards/margins": 43.08897399902344, "rewards/rejected": -44.719871520996094, "step": 12590 }, { "epoch": 55.02183406113537, "grad_norm": 1.3590734034949436e-05, "learning_rate": 2.4980946189049954e-06, "logits/chosen": -1.478756308555603, "logits/rejected": -1.9798564910888672, "logps/chosen": -475.5086364746094, "logps/rejected": -4457.26904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7295036315917969, "rewards/margins": 40.19872283935547, "rewards/rejected": -41.928226470947266, "step": 12600 }, { "epoch": 55.06550218340611, "grad_norm": 1.3999053473693267e-07, "learning_rate": 2.4942838611421493e-06, "logits/chosen": -1.4846923351287842, "logits/rejected": -2.060204267501831, "logps/chosen": -419.66339111328125, "logps/rejected": -5276.11181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5649136304855347, "rewards/margins": 48.1241455078125, "rewards/rejected": -49.689064025878906, "step": 12610 }, { "epoch": 55.109170305676855, "grad_norm": 8.524610306653684e-07, "learning_rate": 2.4904731166607827e-06, "logits/chosen": -1.463383436203003, "logits/rejected": -1.9766671657562256, "logps/chosen": -414.315673828125, "logps/rejected": -4749.181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5146572589874268, "rewards/margins": 43.212860107421875, "rewards/rejected": -44.727516174316406, "step": 12620 }, { "epoch": 55.1528384279476, "grad_norm": 1.7090498611327032e-05, "learning_rate": 2.486662394315181e-06, "logits/chosen": -1.48789644241333, "logits/rejected": -2.0297093391418457, "logps/chosen": -425.4051818847656, "logps/rejected": -4831.20556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6145061254501343, "rewards/margins": 43.861080169677734, "rewards/rejected": -45.475589752197266, "step": 12630 }, { "epoch": 55.19650655021834, "grad_norm": 1.5631252808766544e-07, "learning_rate": 2.482851702959577e-06, "logits/chosen": -1.430006742477417, "logits/rejected": -1.891898512840271, "logps/chosen": -443.48876953125, "logps/rejected": -4290.46240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4826409816741943, "rewards/margins": 38.77524948120117, "rewards/rejected": -40.25789260864258, "step": 12640 }, { "epoch": 55.24017467248908, "grad_norm": 4.57597941008627e-07, "learning_rate": 2.4790410514481315e-06, "logits/chosen": -1.4682371616363525, "logits/rejected": -2.0238759517669678, "logps/chosen": -443.42449951171875, "logps/rejected": -4552.4501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.507336974143982, "rewards/margins": 41.34209060668945, "rewards/rejected": -42.849430084228516, "step": 12650 }, { "epoch": 55.28384279475983, "grad_norm": 1.1330821806709743e-06, "learning_rate": 2.475230448634917e-06, "logits/chosen": -1.453517198562622, "logits/rejected": -2.0034537315368652, "logps/chosen": -451.9126892089844, "logps/rejected": -4569.30224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7274048328399658, "rewards/margins": 41.25922393798828, "rewards/rejected": -42.98662185668945, "step": 12660 }, { "epoch": 55.327510917030565, "grad_norm": 4.22770208483907e-06, "learning_rate": 2.471419903373888e-06, "logits/chosen": -1.5051580667495728, "logits/rejected": -1.9728546142578125, "logps/chosen": -445.41815185546875, "logps/rejected": -4799.2060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6793216466903687, "rewards/margins": 43.45668411254883, "rewards/rejected": -45.13600540161133, "step": 12670 }, { "epoch": 55.37117903930131, "grad_norm": 2.0879673833666173e-07, "learning_rate": 2.467609424518866e-06, "logits/chosen": -1.4274318218231201, "logits/rejected": -1.9363114833831787, "logps/chosen": -479.6783142089844, "logps/rejected": -4108.3154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6480439901351929, "rewards/margins": 36.98530960083008, "rewards/rejected": -38.63335418701172, "step": 12680 }, { "epoch": 55.414847161572055, "grad_norm": 1.335581739499666e-06, "learning_rate": 2.4637990209235186e-06, "logits/chosen": -1.4438135623931885, "logits/rejected": -1.8956325054168701, "logps/chosen": -466.09906005859375, "logps/rejected": -4373.5703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6255943775177002, "rewards/margins": 39.419944763183594, "rewards/rejected": -41.04553985595703, "step": 12690 }, { "epoch": 55.45851528384279, "grad_norm": 1.8425202937292068e-06, "learning_rate": 2.4599887014413407e-06, "logits/chosen": -1.44582998752594, "logits/rejected": -1.9920088052749634, "logps/chosen": -467.98590087890625, "logps/rejected": -4454.6494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7313464879989624, "rewards/margins": 40.32701873779297, "rewards/rejected": -42.05836868286133, "step": 12700 }, { "epoch": 55.50218340611354, "grad_norm": 3.4104731997500426e-05, "learning_rate": 2.45617847492563e-06, "logits/chosen": -1.5197149515151978, "logits/rejected": -2.127277135848999, "logps/chosen": -424.7411193847656, "logps/rejected": -5498.3486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5318235158920288, "rewards/margins": 50.29171371459961, "rewards/rejected": -51.82353591918945, "step": 12710 }, { "epoch": 55.54585152838428, "grad_norm": 2.4666775017678392e-06, "learning_rate": 2.452368350229467e-06, "logits/chosen": -1.5067124366760254, "logits/rejected": -2.063223361968994, "logps/chosen": -434.0873107910156, "logps/rejected": -5198.0087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6852163076400757, "rewards/margins": 47.231292724609375, "rewards/rejected": -48.91650390625, "step": 12720 }, { "epoch": 55.58951965065502, "grad_norm": 2.458226997734625e-06, "learning_rate": 2.4485583362056975e-06, "logits/chosen": -1.4567387104034424, "logits/rejected": -1.9978545904159546, "logps/chosen": -428.8196716308594, "logps/rejected": -5110.154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5871111154556274, "rewards/margins": 46.53741455078125, "rewards/rejected": -48.12451934814453, "step": 12730 }, { "epoch": 55.633187772925766, "grad_norm": 1.0250087884105184e-06, "learning_rate": 2.444748441706908e-06, "logits/chosen": -1.4530220031738281, "logits/rejected": -2.0121989250183105, "logps/chosen": -419.84759521484375, "logps/rejected": -4821.3017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.55524742603302, "rewards/margins": 43.8536376953125, "rewards/rejected": -45.40888214111328, "step": 12740 }, { "epoch": 55.6768558951965, "grad_norm": 7.205641090940022e-07, "learning_rate": 2.4409386755854116e-06, "logits/chosen": -1.4074020385742188, "logits/rejected": -1.901084303855896, "logps/chosen": -445.76141357421875, "logps/rejected": -3926.47412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6152050495147705, "rewards/margins": 35.34877014160156, "rewards/rejected": -36.9639778137207, "step": 12750 }, { "epoch": 55.72052401746725, "grad_norm": 6.3992564709472e-05, "learning_rate": 2.4371290466932177e-06, "logits/chosen": -1.4192919731140137, "logits/rejected": -1.8537139892578125, "logps/chosen": -473.3977966308594, "logps/rejected": -3851.02685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.609503984451294, "rewards/margins": 34.503238677978516, "rewards/rejected": -36.11274337768555, "step": 12760 }, { "epoch": 55.76419213973799, "grad_norm": 7.45768031156775e-08, "learning_rate": 2.4333195638820213e-06, "logits/chosen": -1.468085527420044, "logits/rejected": -1.9812473058700562, "logps/chosen": -451.8775329589844, "logps/rejected": -4888.89501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6744998693466187, "rewards/margins": 44.441043853759766, "rewards/rejected": -46.11553955078125, "step": 12770 }, { "epoch": 55.80786026200873, "grad_norm": 4.176494418084523e-07, "learning_rate": 2.429510236003175e-06, "logits/chosen": -1.4709851741790771, "logits/rejected": -1.945555329322815, "logps/chosen": -470.9202575683594, "logps/rejected": -4350.11328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.684391736984253, "rewards/margins": 39.2481575012207, "rewards/rejected": -40.93254852294922, "step": 12780 }, { "epoch": 55.851528384279476, "grad_norm": 3.2424475544131786e-07, "learning_rate": 2.4257010719076726e-06, "logits/chosen": -1.4676355123519897, "logits/rejected": -2.018859386444092, "logps/chosen": -446.7916564941406, "logps/rejected": -4924.8271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.617676019668579, "rewards/margins": 44.865501403808594, "rewards/rejected": -46.48317337036133, "step": 12790 }, { "epoch": 55.89519650655022, "grad_norm": 1.3654913917819852e-07, "learning_rate": 2.4218920804461273e-06, "logits/chosen": -1.4322247505187988, "logits/rejected": -1.9864647388458252, "logps/chosen": -476.9500427246094, "logps/rejected": -4798.32568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8037874698638916, "rewards/margins": 43.397254943847656, "rewards/rejected": -45.20104217529297, "step": 12800 }, { "epoch": 55.93886462882096, "grad_norm": 3.385438563802867e-07, "learning_rate": 2.4180832704687517e-06, "logits/chosen": -1.400534749031067, "logits/rejected": -1.9473316669464111, "logps/chosen": -448.0462341308594, "logps/rejected": -4517.5087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7306673526763916, "rewards/margins": 40.827667236328125, "rewards/rejected": -42.55833435058594, "step": 12810 }, { "epoch": 55.9825327510917, "grad_norm": 1.6623915097236554e-06, "learning_rate": 2.4142746508253367e-06, "logits/chosen": -1.4978666305541992, "logits/rejected": -2.0294909477233887, "logps/chosen": -456.71630859375, "logps/rejected": -4973.63623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6768690347671509, "rewards/margins": 45.0997428894043, "rewards/rejected": -46.77661895751953, "step": 12820 }, { "epoch": 56.02620087336245, "grad_norm": 2.2324762808020977e-07, "learning_rate": 2.410466230365229e-06, "logits/chosen": -1.4900777339935303, "logits/rejected": -2.0312042236328125, "logps/chosen": -431.53289794921875, "logps/rejected": -5104.6923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.589396595954895, "rewards/margins": 46.465335845947266, "rewards/rejected": -48.054725646972656, "step": 12830 }, { "epoch": 56.069868995633186, "grad_norm": 1.081110280887426e-05, "learning_rate": 2.4066580179373134e-06, "logits/chosen": -1.457040786743164, "logits/rejected": -1.9589935541152954, "logps/chosen": -424.3412170410156, "logps/rejected": -4747.30224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5813026428222656, "rewards/margins": 43.110294342041016, "rewards/rejected": -44.69160079956055, "step": 12840 }, { "epoch": 56.11353711790393, "grad_norm": 2.2653832384130354e-05, "learning_rate": 2.402850022389994e-06, "logits/chosen": -1.4384098052978516, "logits/rejected": -1.9546897411346436, "logps/chosen": -461.48931884765625, "logps/rejected": -4313.10791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7047107219696045, "rewards/margins": 38.835960388183594, "rewards/rejected": -40.540672302246094, "step": 12850 }, { "epoch": 56.157205240174676, "grad_norm": 3.2454279368587527e-05, "learning_rate": 2.3990422525711676e-06, "logits/chosen": -1.4968178272247314, "logits/rejected": -2.014692544937134, "logps/chosen": -450.34368896484375, "logps/rejected": -4964.98583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6584885120391846, "rewards/margins": 45.129798889160156, "rewards/rejected": -46.78828430175781, "step": 12860 }, { "epoch": 56.200873362445414, "grad_norm": 1.016514763615358e-05, "learning_rate": 2.395234717328208e-06, "logits/chosen": -1.483875036239624, "logits/rejected": -2.034107208251953, "logps/chosen": -438.19000244140625, "logps/rejected": -4834.89892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.541884422302246, "rewards/margins": 43.975704193115234, "rewards/rejected": -45.51758575439453, "step": 12870 }, { "epoch": 56.24454148471616, "grad_norm": 1.8309724866611774e-07, "learning_rate": 2.391427425507943e-06, "logits/chosen": -1.439704179763794, "logits/rejected": -2.0023276805877686, "logps/chosen": -441.44195556640625, "logps/rejected": -4975.5458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6603533029556274, "rewards/margins": 45.23509979248047, "rewards/rejected": -46.89545440673828, "step": 12880 }, { "epoch": 56.2882096069869, "grad_norm": 6.330894693554735e-07, "learning_rate": 2.3876203859566373e-06, "logits/chosen": -1.4417731761932373, "logits/rejected": -2.008633852005005, "logps/chosen": -443.76702880859375, "logps/rejected": -4775.83251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6977994441986084, "rewards/margins": 43.37024688720703, "rewards/rejected": -45.06804656982422, "step": 12890 }, { "epoch": 56.33187772925764, "grad_norm": 1.1707783479839035e-07, "learning_rate": 2.3838136075199663e-06, "logits/chosen": -1.4665788412094116, "logits/rejected": -1.9649900197982788, "logps/chosen": -444.53924560546875, "logps/rejected": -4715.3544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7102073431015015, "rewards/margins": 42.701847076416016, "rewards/rejected": -44.412052154541016, "step": 12900 }, { "epoch": 56.375545851528386, "grad_norm": 9.654440524431242e-08, "learning_rate": 2.3800070990430006e-06, "logits/chosen": -1.4723206758499146, "logits/rejected": -2.0366315841674805, "logps/chosen": -436.041015625, "logps/rejected": -5047.03125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6110427379608154, "rewards/margins": 45.95507049560547, "rewards/rejected": -47.56612014770508, "step": 12910 }, { "epoch": 56.419213973799124, "grad_norm": 2.485758429559546e-06, "learning_rate": 2.3762008693701825e-06, "logits/chosen": -1.50799560546875, "logits/rejected": -2.0631110668182373, "logps/chosen": -436.5862731933594, "logps/rejected": -5137.052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.620528221130371, "rewards/margins": 46.82728958129883, "rewards/rejected": -48.44782257080078, "step": 12920 }, { "epoch": 56.46288209606987, "grad_norm": 2.3914664964015684e-07, "learning_rate": 2.3723949273453072e-06, "logits/chosen": -1.469202995300293, "logits/rejected": -1.970788598060608, "logps/chosen": -437.97314453125, "logps/rejected": -4820.1708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6666301488876343, "rewards/margins": 43.73198318481445, "rewards/rejected": -45.39861297607422, "step": 12930 }, { "epoch": 56.506550218340614, "grad_norm": 5.81294153720536e-07, "learning_rate": 2.368589281811502e-06, "logits/chosen": -1.5075219869613647, "logits/rejected": -2.0293924808502197, "logps/chosen": -414.05859375, "logps/rejected": -5165.01318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.572412371635437, "rewards/margins": 47.08009338378906, "rewards/rejected": -48.65250778198242, "step": 12940 }, { "epoch": 56.55021834061135, "grad_norm": 1.270463621947115e-07, "learning_rate": 2.364783941611204e-06, "logits/chosen": -1.4541107416152954, "logits/rejected": -1.9759531021118164, "logps/chosen": -457.9759216308594, "logps/rejected": -4566.9482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6375945806503296, "rewards/margins": 41.3604850769043, "rewards/rejected": -42.99808120727539, "step": 12950 }, { "epoch": 56.5938864628821, "grad_norm": 1.459470367937155e-05, "learning_rate": 2.360978915586142e-06, "logits/chosen": -1.4601155519485474, "logits/rejected": -2.00555682182312, "logps/chosen": -446.65692138671875, "logps/rejected": -4846.89794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6632190942764282, "rewards/margins": 43.94518280029297, "rewards/rejected": -45.60840606689453, "step": 12960 }, { "epoch": 56.63755458515284, "grad_norm": 4.466648345348359e-06, "learning_rate": 2.3571742125773136e-06, "logits/chosen": -1.4798564910888672, "logits/rejected": -1.9962644577026367, "logps/chosen": -449.2272033691406, "logps/rejected": -4895.11767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7195087671279907, "rewards/margins": 44.360477447509766, "rewards/rejected": -46.07998275756836, "step": 12970 }, { "epoch": 56.68122270742358, "grad_norm": 1.679494792555876e-07, "learning_rate": 2.353369841424967e-06, "logits/chosen": -1.5094074010849, "logits/rejected": -2.060783863067627, "logps/chosen": -405.0069274902344, "logps/rejected": -5190.955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5913101434707642, "rewards/margins": 47.313533782958984, "rewards/rejected": -48.904850006103516, "step": 12980 }, { "epoch": 56.724890829694324, "grad_norm": 1.3970182846964115e-05, "learning_rate": 2.3495658109685794e-06, "logits/chosen": -1.5109624862670898, "logits/rejected": -2.039804697036743, "logps/chosen": -443.304931640625, "logps/rejected": -5450.48681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5995641946792603, "rewards/margins": 49.8302001953125, "rewards/rejected": -51.42976760864258, "step": 12990 }, { "epoch": 56.76855895196506, "grad_norm": 2.73271716604005e-07, "learning_rate": 2.345762130046836e-06, "logits/chosen": -1.430136799812317, "logits/rejected": -1.9403098821640015, "logps/chosen": -499.3824157714844, "logps/rejected": -4247.7109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7233188152313232, "rewards/margins": 38.17449188232422, "rewards/rejected": -39.89780807495117, "step": 13000 }, { "epoch": 56.81222707423581, "grad_norm": 3.3179110000607285e-05, "learning_rate": 2.3419588074976094e-06, "logits/chosen": -1.5145256519317627, "logits/rejected": -2.092158317565918, "logps/chosen": -413.7486267089844, "logps/rejected": -5337.74169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5762217044830322, "rewards/margins": 48.74500274658203, "rewards/rejected": -50.32122039794922, "step": 13010 }, { "epoch": 56.85589519650655, "grad_norm": 3.354092278899297e-07, "learning_rate": 2.338155852157939e-06, "logits/chosen": -1.4592957496643066, "logits/rejected": -1.9984630346298218, "logps/chosen": -443.332275390625, "logps/rejected": -4744.1572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.662123680114746, "rewards/margins": 43.06119918823242, "rewards/rejected": -44.72332000732422, "step": 13020 }, { "epoch": 56.89956331877729, "grad_norm": 1.6299789448468536e-07, "learning_rate": 2.334353272864013e-06, "logits/chosen": -1.4796249866485596, "logits/rejected": -1.999035120010376, "logps/chosen": -465.04437255859375, "logps/rejected": -4511.21630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.805342435836792, "rewards/margins": 40.649532318115234, "rewards/rejected": -42.45487594604492, "step": 13030 }, { "epoch": 56.943231441048034, "grad_norm": 1.491185707610407e-07, "learning_rate": 2.330551078451145e-06, "logits/chosen": -1.5083433389663696, "logits/rejected": -2.102954387664795, "logps/chosen": -446.93341064453125, "logps/rejected": -5335.77880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6962569952011108, "rewards/margins": 48.69048309326172, "rewards/rejected": -50.386741638183594, "step": 13040 }, { "epoch": 56.98689956331878, "grad_norm": 1.2800641278794906e-06, "learning_rate": 2.3267492777537535e-06, "logits/chosen": -1.51211678981781, "logits/rejected": -2.0653116703033447, "logps/chosen": -428.07208251953125, "logps/rejected": -5026.13330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6187973022460938, "rewards/margins": 45.63203811645508, "rewards/rejected": -47.25083923339844, "step": 13050 }, { "epoch": 57.03056768558952, "grad_norm": 4.52890723161188e-07, "learning_rate": 2.3229478796053426e-06, "logits/chosen": -1.4663448333740234, "logits/rejected": -2.013582706451416, "logps/chosen": -446.67437744140625, "logps/rejected": -4649.17041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.692236304283142, "rewards/margins": 42.20492935180664, "rewards/rejected": -43.89716339111328, "step": 13060 }, { "epoch": 57.07423580786026, "grad_norm": 3.053435916114927e-06, "learning_rate": 2.3191468928384815e-06, "logits/chosen": -1.5327211618423462, "logits/rejected": -2.0485527515411377, "logps/chosen": -425.26934814453125, "logps/rejected": -5196.49267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6675608158111572, "rewards/margins": 47.311519622802734, "rewards/rejected": -48.97908020019531, "step": 13070 }, { "epoch": 57.11790393013101, "grad_norm": 5.347218292045446e-07, "learning_rate": 2.3153463262847837e-06, "logits/chosen": -1.4670398235321045, "logits/rejected": -1.9732961654663086, "logps/chosen": -453.6898498535156, "logps/rejected": -4429.5419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.738303780555725, "rewards/margins": 39.933738708496094, "rewards/rejected": -41.67203903198242, "step": 13080 }, { "epoch": 57.161572052401745, "grad_norm": 2.7825410078503336e-07, "learning_rate": 2.311546188774886e-06, "logits/chosen": -1.4835302829742432, "logits/rejected": -2.0599045753479004, "logps/chosen": -443.65130615234375, "logps/rejected": -4808.5166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6767628192901611, "rewards/margins": 43.5919303894043, "rewards/rejected": -45.26869583129883, "step": 13090 }, { "epoch": 57.20524017467249, "grad_norm": 3.258868197925202e-07, "learning_rate": 2.3077464891384275e-06, "logits/chosen": -1.5082679986953735, "logits/rejected": -2.0612568855285645, "logps/chosen": -439.58441162109375, "logps/rejected": -5117.79638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.673301339149475, "rewards/margins": 46.61034393310547, "rewards/rejected": -48.28364181518555, "step": 13100 }, { "epoch": 57.248908296943235, "grad_norm": 1.9822650627729997e-05, "learning_rate": 2.303947236204031e-06, "logits/chosen": -1.4327638149261475, "logits/rejected": -2.0087168216705322, "logps/chosen": -460.70941162109375, "logps/rejected": -4588.5087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.709838628768921, "rewards/margins": 41.591251373291016, "rewards/rejected": -43.30108642578125, "step": 13110 }, { "epoch": 57.29257641921397, "grad_norm": 3.2942459568075236e-05, "learning_rate": 2.3001484387992806e-06, "logits/chosen": -1.4842774868011475, "logits/rejected": -2.0825510025024414, "logps/chosen": -424.1761169433594, "logps/rejected": -5186.3916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5562937259674072, "rewards/margins": 47.311927795410156, "rewards/rejected": -48.86821746826172, "step": 13120 }, { "epoch": 57.33624454148472, "grad_norm": 1.070164233705641e-07, "learning_rate": 2.296350105750703e-06, "logits/chosen": -1.5161460638046265, "logits/rejected": -2.069890022277832, "logps/chosen": -423.8848571777344, "logps/rejected": -5285.92138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7415393590927124, "rewards/margins": 48.03425979614258, "rewards/rejected": -49.77579879760742, "step": 13130 }, { "epoch": 57.379912663755455, "grad_norm": 2.80636710164181e-07, "learning_rate": 2.2925522458837456e-06, "logits/chosen": -1.4405015707015991, "logits/rejected": -1.955693006515503, "logps/chosen": -469.8135681152344, "logps/rejected": -4661.9697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8411659002304077, "rewards/margins": 42.10148239135742, "rewards/rejected": -43.942649841308594, "step": 13140 }, { "epoch": 57.4235807860262, "grad_norm": 3.03263760873777e-06, "learning_rate": 2.2887548680227557e-06, "logits/chosen": -1.4854352474212646, "logits/rejected": -2.0087926387786865, "logps/chosen": -424.357177734375, "logps/rejected": -5189.2001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.642478346824646, "rewards/margins": 47.22490310668945, "rewards/rejected": -48.86737823486328, "step": 13150 }, { "epoch": 57.467248908296945, "grad_norm": 9.449189064319271e-07, "learning_rate": 2.284957980990962e-06, "logits/chosen": -1.509681224822998, "logits/rejected": -2.127243757247925, "logps/chosen": -422.28753662109375, "logps/rejected": -5318.1767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7327896356582642, "rewards/margins": 48.42406463623047, "rewards/rejected": -50.1568489074707, "step": 13160 }, { "epoch": 57.51091703056768, "grad_norm": 3.500117997850121e-07, "learning_rate": 2.281161593610449e-06, "logits/chosen": -1.4509772062301636, "logits/rejected": -1.961037278175354, "logps/chosen": -483.47174072265625, "logps/rejected": -4620.23681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.841170072555542, "rewards/margins": 41.5534553527832, "rewards/rejected": -43.394622802734375, "step": 13170 }, { "epoch": 57.55458515283843, "grad_norm": 2.755801579649455e-07, "learning_rate": 2.2773657147021466e-06, "logits/chosen": -1.5464094877243042, "logits/rejected": -2.0660080909729004, "logps/chosen": -468.4224548339844, "logps/rejected": -5175.75048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.727184534072876, "rewards/margins": 46.942100524902344, "rewards/rejected": -48.66929244995117, "step": 13180 }, { "epoch": 57.59825327510917, "grad_norm": 6.826532734165142e-07, "learning_rate": 2.2735703530857977e-06, "logits/chosen": -1.4224565029144287, "logits/rejected": -1.9609363079071045, "logps/chosen": -482.0692443847656, "logps/rejected": -4483.08935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8127119541168213, "rewards/margins": 40.47551727294922, "rewards/rejected": -42.288230895996094, "step": 13190 }, { "epoch": 57.64192139737991, "grad_norm": 3.5058045726160924e-06, "learning_rate": 2.2697755175799464e-06, "logits/chosen": -1.4801077842712402, "logits/rejected": -2.025376796722412, "logps/chosen": -470.66552734375, "logps/rejected": -4694.25341796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8131434917449951, "rewards/margins": 42.51116180419922, "rewards/rejected": -44.324302673339844, "step": 13200 }, { "epoch": 57.685589519650655, "grad_norm": 4.517797367412468e-06, "learning_rate": 2.265981217001912e-06, "logits/chosen": -1.4411823749542236, "logits/rejected": -2.0168051719665527, "logps/chosen": -470.81298828125, "logps/rejected": -4738.1845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6770923137664795, "rewards/margins": 43.007423400878906, "rewards/rejected": -44.68451690673828, "step": 13210 }, { "epoch": 57.7292576419214, "grad_norm": 2.556302015801963e-07, "learning_rate": 2.262187460167774e-06, "logits/chosen": -1.4909193515777588, "logits/rejected": -2.011530637741089, "logps/chosen": -429.4535217285156, "logps/rejected": -5016.08056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7280181646347046, "rewards/margins": 45.458274841308594, "rewards/rejected": -47.1862907409668, "step": 13220 }, { "epoch": 57.77292576419214, "grad_norm": 2.660391465445049e-05, "learning_rate": 2.2583942558923457e-06, "logits/chosen": -1.4429091215133667, "logits/rejected": -2.004086971282959, "logps/chosen": -471.51513671875, "logps/rejected": -4582.8642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7915083169937134, "rewards/margins": 41.42560958862305, "rewards/rejected": -43.21712112426758, "step": 13230 }, { "epoch": 57.81659388646288, "grad_norm": 2.1636178263833404e-06, "learning_rate": 2.254601612989157e-06, "logits/chosen": -1.4767439365386963, "logits/rejected": -2.074819803237915, "logps/chosen": -477.25323486328125, "logps/rejected": -4815.4443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7377477884292603, "rewards/margins": 43.684730529785156, "rewards/rejected": -45.42247772216797, "step": 13240 }, { "epoch": 57.86026200873363, "grad_norm": 1.406957523265245e-06, "learning_rate": 2.2508095402704356e-06, "logits/chosen": -1.4428410530090332, "logits/rejected": -1.995577096939087, "logps/chosen": -474.8155212402344, "logps/rejected": -4375.76318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.746320366859436, "rewards/margins": 39.501976013183594, "rewards/rejected": -41.248294830322266, "step": 13250 }, { "epoch": 57.903930131004365, "grad_norm": 1.884765845237377e-07, "learning_rate": 2.2470180465470802e-06, "logits/chosen": -1.467714548110962, "logits/rejected": -1.9398037195205688, "logps/chosen": -461.55322265625, "logps/rejected": -4460.57958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7106597423553467, "rewards/margins": 40.27861785888672, "rewards/rejected": -41.989280700683594, "step": 13260 }, { "epoch": 57.94759825327511, "grad_norm": 5.460850103726042e-06, "learning_rate": 2.24322714062865e-06, "logits/chosen": -1.5343282222747803, "logits/rejected": -2.0999538898468018, "logps/chosen": -431.86346435546875, "logps/rejected": -5359.51318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7278995513916016, "rewards/margins": 48.788307189941406, "rewards/rejected": -50.516204833984375, "step": 13270 }, { "epoch": 57.99126637554585, "grad_norm": 1.459101063154685e-07, "learning_rate": 2.2394368313233332e-06, "logits/chosen": -1.4339144229888916, "logits/rejected": -1.9894078969955444, "logps/chosen": -464.4307556152344, "logps/rejected": -4464.15283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6456005573272705, "rewards/margins": 40.418094635009766, "rewards/rejected": -42.063697814941406, "step": 13280 }, { "epoch": 58.03493449781659, "grad_norm": 5.004301263934519e-06, "learning_rate": 2.235647127437934e-06, "logits/chosen": -1.4629406929016113, "logits/rejected": -2.025174140930176, "logps/chosen": -458.61834716796875, "logps/rejected": -4658.30615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7187964916229248, "rewards/margins": 42.07164764404297, "rewards/rejected": -43.790443420410156, "step": 13290 }, { "epoch": 58.07860262008734, "grad_norm": 2.979901338861139e-07, "learning_rate": 2.2318580377778497e-06, "logits/chosen": -1.5205551385879517, "logits/rejected": -1.9822238683700562, "logps/chosen": -464.53216552734375, "logps/rejected": -4598.5732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7299388647079468, "rewards/margins": 41.53899383544922, "rewards/rejected": -43.26892852783203, "step": 13300 }, { "epoch": 58.122270742358076, "grad_norm": 1.3701048235677643e-06, "learning_rate": 2.22806957114705e-06, "logits/chosen": -1.4345728158950806, "logits/rejected": -1.967916488647461, "logps/chosen": -465.37615966796875, "logps/rejected": -4279.34228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6015125513076782, "rewards/margins": 38.630096435546875, "rewards/rejected": -40.23160934448242, "step": 13310 }, { "epoch": 58.16593886462882, "grad_norm": 1.625520060370105e-07, "learning_rate": 2.224281736348059e-06, "logits/chosen": -1.4812055826187134, "logits/rejected": -2.0623791217803955, "logps/chosen": -440.8025817871094, "logps/rejected": -5007.9248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7704544067382812, "rewards/margins": 45.4500617980957, "rewards/rejected": -47.22051239013672, "step": 13320 }, { "epoch": 58.209606986899566, "grad_norm": 2.4521815769101516e-05, "learning_rate": 2.220494542181929e-06, "logits/chosen": -1.4803192615509033, "logits/rejected": -1.9999973773956299, "logps/chosen": -445.9178771972656, "logps/rejected": -4655.12353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6284335851669312, "rewards/margins": 42.12186813354492, "rewards/rejected": -43.75030517578125, "step": 13330 }, { "epoch": 58.2532751091703, "grad_norm": 8.535987595872873e-06, "learning_rate": 2.2167079974482282e-06, "logits/chosen": -1.5090525150299072, "logits/rejected": -2.045536994934082, "logps/chosen": -429.61834716796875, "logps/rejected": -4805.1357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7309486865997314, "rewards/margins": 43.554134368896484, "rewards/rejected": -45.285072326660156, "step": 13340 }, { "epoch": 58.29694323144105, "grad_norm": 7.887010529247115e-08, "learning_rate": 2.212922110945011e-06, "logits/chosen": -1.4838732481002808, "logits/rejected": -2.0389533042907715, "logps/chosen": -450.5361328125, "logps/rejected": -4836.0419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6674392223358154, "rewards/margins": 43.83833312988281, "rewards/rejected": -45.50577926635742, "step": 13350 }, { "epoch": 58.34061135371179, "grad_norm": 5.322871813036344e-06, "learning_rate": 2.2091368914688067e-06, "logits/chosen": -1.450365424156189, "logits/rejected": -1.9623435735702515, "logps/chosen": -434.75421142578125, "logps/rejected": -4548.75732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.639021635055542, "rewards/margins": 41.221431732177734, "rewards/rejected": -42.86045455932617, "step": 13360 }, { "epoch": 58.38427947598253, "grad_norm": 1.4362374227229127e-07, "learning_rate": 2.205352347814594e-06, "logits/chosen": -1.5038893222808838, "logits/rejected": -2.063396692276001, "logps/chosen": -412.002685546875, "logps/rejected": -4807.9208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7137746810913086, "rewards/margins": 43.57341003417969, "rewards/rejected": -45.28718566894531, "step": 13370 }, { "epoch": 58.427947598253276, "grad_norm": 2.2365837151736664e-07, "learning_rate": 2.201568488775778e-06, "logits/chosen": -1.4607938528060913, "logits/rejected": -1.8894590139389038, "logps/chosen": -481.63983154296875, "logps/rejected": -4240.7802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7280492782592773, "rewards/margins": 38.13920974731445, "rewards/rejected": -39.86726379394531, "step": 13380 }, { "epoch": 58.47161572052402, "grad_norm": 2.613539765359959e-07, "learning_rate": 2.197785323144176e-06, "logits/chosen": -1.503171682357788, "logits/rejected": -2.0947117805480957, "logps/chosen": -431.17913818359375, "logps/rejected": -5141.57568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6745296716690063, "rewards/margins": 46.860374450683594, "rewards/rejected": -48.53490447998047, "step": 13390 }, { "epoch": 58.51528384279476, "grad_norm": 4.585705964297393e-07, "learning_rate": 2.194002859709994e-06, "logits/chosen": -1.469052791595459, "logits/rejected": -1.975557565689087, "logps/chosen": -437.834716796875, "logps/rejected": -4638.048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6335937976837158, "rewards/margins": 42.11051940917969, "rewards/rejected": -43.74411392211914, "step": 13400 }, { "epoch": 58.5589519650655, "grad_norm": 1.6833956280927906e-07, "learning_rate": 2.1902211072618067e-06, "logits/chosen": -1.4977054595947266, "logits/rejected": -2.0914859771728516, "logps/chosen": -433.5174865722656, "logps/rejected": -5003.966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7057063579559326, "rewards/margins": 45.4892463684082, "rewards/rejected": -47.194950103759766, "step": 13410 }, { "epoch": 58.60262008733624, "grad_norm": 2.240625823742493e-07, "learning_rate": 2.1864400745865354e-06, "logits/chosen": -1.5327098369598389, "logits/rejected": -2.145085334777832, "logps/chosen": -439.4842834472656, "logps/rejected": -5391.98583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7571121454238892, "rewards/margins": 49.1198616027832, "rewards/rejected": -50.876976013183594, "step": 13420 }, { "epoch": 58.646288209606986, "grad_norm": 5.08766095680023e-06, "learning_rate": 2.1826597704694306e-06, "logits/chosen": -1.4905712604522705, "logits/rejected": -2.064980983734131, "logps/chosen": -447.640380859375, "logps/rejected": -5087.18017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7571525573730469, "rewards/margins": 46.282203674316406, "rewards/rejected": -48.03935241699219, "step": 13430 }, { "epoch": 58.68995633187773, "grad_norm": 4.364977811372376e-06, "learning_rate": 2.1788802036940477e-06, "logits/chosen": -1.5483636856079102, "logits/rejected": -2.120408535003662, "logps/chosen": -458.4072265625, "logps/rejected": -5313.0458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.653040885925293, "rewards/margins": 48.27703857421875, "rewards/rejected": -49.930076599121094, "step": 13440 }, { "epoch": 58.73362445414847, "grad_norm": 1.6784574469888893e-07, "learning_rate": 2.1751013830422303e-06, "logits/chosen": -1.467636227607727, "logits/rejected": -2.0204949378967285, "logps/chosen": -484.6998596191406, "logps/rejected": -4651.8623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7869793176651, "rewards/margins": 41.99201202392578, "rewards/rejected": -43.778987884521484, "step": 13450 }, { "epoch": 58.777292576419214, "grad_norm": 1.9346630135961205e-07, "learning_rate": 2.1713233172940907e-06, "logits/chosen": -1.5083705186843872, "logits/rejected": -2.0668976306915283, "logps/chosen": -459.14385986328125, "logps/rejected": -4961.1494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.852651596069336, "rewards/margins": 44.80522155761719, "rewards/rejected": -46.657875061035156, "step": 13460 }, { "epoch": 58.82096069868996, "grad_norm": 2.8706983340240266e-07, "learning_rate": 2.167546015227983e-06, "logits/chosen": -1.4719281196594238, "logits/rejected": -2.0176758766174316, "logps/chosen": -438.48419189453125, "logps/rejected": -4737.38720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6827776432037354, "rewards/margins": 43.015464782714844, "rewards/rejected": -44.69824981689453, "step": 13470 }, { "epoch": 58.8646288209607, "grad_norm": 7.481976479604379e-07, "learning_rate": 2.163769485620489e-06, "logits/chosen": -1.4725337028503418, "logits/rejected": -2.073120355606079, "logps/chosen": -443.57269287109375, "logps/rejected": -4953.3857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7412331104278564, "rewards/margins": 44.87964630126953, "rewards/rejected": -46.620887756347656, "step": 13480 }, { "epoch": 58.90829694323144, "grad_norm": 3.41352229960577e-07, "learning_rate": 2.1599937372463956e-06, "logits/chosen": -1.5223793983459473, "logits/rejected": -2.09675669670105, "logps/chosen": -408.1232604980469, "logps/rejected": -5435.2646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6659225225448608, "rewards/margins": 49.57664108276367, "rewards/rejected": -51.24256134033203, "step": 13490 }, { "epoch": 58.951965065502186, "grad_norm": 5.528609079551902e-08, "learning_rate": 2.1562187788786742e-06, "logits/chosen": -1.445788025856018, "logits/rejected": -1.9984304904937744, "logps/chosen": -458.0321350097656, "logps/rejected": -4521.79541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7651891708374023, "rewards/margins": 40.88135528564453, "rewards/rejected": -42.64654541015625, "step": 13500 }, { "epoch": 58.995633187772924, "grad_norm": 2.776469754891507e-07, "learning_rate": 2.1524446192884614e-06, "logits/chosen": -1.4835646152496338, "logits/rejected": -2.0563392639160156, "logps/chosen": -436.71331787109375, "logps/rejected": -4693.1591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7182737588882446, "rewards/margins": 42.56946563720703, "rewards/rejected": -44.28773880004883, "step": 13510 }, { "epoch": 59.03930131004367, "grad_norm": 7.834524102525363e-07, "learning_rate": 2.148671267245037e-06, "logits/chosen": -1.5017794370651245, "logits/rejected": -2.0716428756713867, "logps/chosen": -463.3190002441406, "logps/rejected": -4934.2412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7941166162490845, "rewards/margins": 44.76545333862305, "rewards/rejected": -46.5595703125, "step": 13520 }, { "epoch": 59.082969432314414, "grad_norm": 1.6333183687151404e-07, "learning_rate": 2.1448987315158047e-06, "logits/chosen": -1.504333257675171, "logits/rejected": -2.121702194213867, "logps/chosen": -439.87841796875, "logps/rejected": -5126.818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6892683506011963, "rewards/margins": 46.69470977783203, "rewards/rejected": -48.38397979736328, "step": 13530 }, { "epoch": 59.12663755458515, "grad_norm": 2.0699419589154728e-07, "learning_rate": 2.14112702086627e-06, "logits/chosen": -1.5092554092407227, "logits/rejected": -2.0248842239379883, "logps/chosen": -458.89129638671875, "logps/rejected": -4514.93359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.803009271621704, "rewards/margins": 40.75127410888672, "rewards/rejected": -42.55428695678711, "step": 13540 }, { "epoch": 59.1703056768559, "grad_norm": 1.3799543758902477e-05, "learning_rate": 2.1373561440600256e-06, "logits/chosen": -1.4870681762695312, "logits/rejected": -2.0855343341827393, "logps/chosen": -443.79168701171875, "logps/rejected": -4850.416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.636915922164917, "rewards/margins": 44.17832565307617, "rewards/rejected": -45.81523895263672, "step": 13550 }, { "epoch": 59.213973799126634, "grad_norm": 1.5304425707799298e-05, "learning_rate": 2.133586109858722e-06, "logits/chosen": -1.4232184886932373, "logits/rejected": -1.9969714879989624, "logps/chosen": -462.67510986328125, "logps/rejected": -4311.99072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6470654010772705, "rewards/margins": 38.93431854248047, "rewards/rejected": -40.581382751464844, "step": 13560 }, { "epoch": 59.25764192139738, "grad_norm": 1.8788033300086665e-06, "learning_rate": 2.1298169270220542e-06, "logits/chosen": -1.4910637140274048, "logits/rejected": -2.174851894378662, "logps/chosen": -437.86968994140625, "logps/rejected": -5134.13916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7298822402954102, "rewards/margins": 46.817569732666016, "rewards/rejected": -48.547454833984375, "step": 13570 }, { "epoch": 59.301310043668124, "grad_norm": 2.453186418821786e-07, "learning_rate": 2.1260486043077387e-06, "logits/chosen": -1.5064524412155151, "logits/rejected": -2.0604331493377686, "logps/chosen": -448.9092712402344, "logps/rejected": -4894.8203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7555452585220337, "rewards/margins": 44.39373016357422, "rewards/rejected": -46.14927673339844, "step": 13580 }, { "epoch": 59.34497816593886, "grad_norm": 9.697862371331518e-06, "learning_rate": 2.1222811504714937e-06, "logits/chosen": -1.4781126976013184, "logits/rejected": -2.04685640335083, "logps/chosen": -471.66448974609375, "logps/rejected": -4512.49267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6942752599716187, "rewards/margins": 40.79763412475586, "rewards/rejected": -42.49190902709961, "step": 13590 }, { "epoch": 59.38864628820961, "grad_norm": 1.1386832239781775e-07, "learning_rate": 2.118514574267018e-06, "logits/chosen": -1.4714876413345337, "logits/rejected": -2.0523927211761475, "logps/chosen": -475.9703063964844, "logps/rejected": -4748.93896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8417656421661377, "rewards/margins": 42.91838836669922, "rewards/rejected": -44.760154724121094, "step": 13600 }, { "epoch": 59.43231441048035, "grad_norm": 1.708906502428294e-07, "learning_rate": 2.1147488844459723e-06, "logits/chosen": -1.5346524715423584, "logits/rejected": -2.162329912185669, "logps/chosen": -432.35748291015625, "logps/rejected": -5656.6025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8245136737823486, "rewards/margins": 51.58203887939453, "rewards/rejected": -53.40655517578125, "step": 13610 }, { "epoch": 59.47598253275109, "grad_norm": 6.198934285914108e-08, "learning_rate": 2.110984089757957e-06, "logits/chosen": -1.5106418132781982, "logits/rejected": -2.1472015380859375, "logps/chosen": -440.2648010253906, "logps/rejected": -5175.2216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.851083755493164, "rewards/margins": 47.02455139160156, "rewards/rejected": -48.875633239746094, "step": 13620 }, { "epoch": 59.519650655021834, "grad_norm": 1.9459549890990713e-07, "learning_rate": 2.1072201989504914e-06, "logits/chosen": -1.5101546049118042, "logits/rejected": -2.0668423175811768, "logps/chosen": -450.89788818359375, "logps/rejected": -5073.5693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8364280462265015, "rewards/margins": 45.96310043334961, "rewards/rejected": -47.799522399902344, "step": 13630 }, { "epoch": 59.56331877729258, "grad_norm": 1.722115623049673e-06, "learning_rate": 2.1034572207689967e-06, "logits/chosen": -1.4588617086410522, "logits/rejected": -2.0226118564605713, "logps/chosen": -482.44134521484375, "logps/rejected": -4821.31689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7610588073730469, "rewards/margins": 43.66681671142578, "rewards/rejected": -45.42787551879883, "step": 13640 }, { "epoch": 59.60698689956332, "grad_norm": 1.0821316287417774e-07, "learning_rate": 2.099695163956774e-06, "logits/chosen": -1.521596908569336, "logits/rejected": -2.0985751152038574, "logps/chosen": -463.3603515625, "logps/rejected": -4946.50439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6714200973510742, "rewards/margins": 44.999149322509766, "rewards/rejected": -46.670570373535156, "step": 13650 }, { "epoch": 59.65065502183406, "grad_norm": 2.9538953170386177e-07, "learning_rate": 2.095934037254981e-06, "logits/chosen": -1.5175142288208008, "logits/rejected": -2.141875743865967, "logps/chosen": -423.83056640625, "logps/rejected": -5437.2919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7051013708114624, "rewards/margins": 49.52438735961914, "rewards/rejected": -51.2294921875, "step": 13660 }, { "epoch": 59.6943231441048, "grad_norm": 1.0124917547043466e-07, "learning_rate": 2.0921738494026163e-06, "logits/chosen": -1.4959299564361572, "logits/rejected": -2.113556385040283, "logps/chosen": -442.9629821777344, "logps/rejected": -5347.0, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6680326461791992, "rewards/margins": 48.82322692871094, "rewards/rejected": -50.49126052856445, "step": 13670 }, { "epoch": 59.737991266375545, "grad_norm": 1.2660803605340182e-07, "learning_rate": 2.0884146091364957e-06, "logits/chosen": -1.4670398235321045, "logits/rejected": -2.060725688934326, "logps/chosen": -474.9122619628906, "logps/rejected": -4454.77880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6759955883026123, "rewards/margins": 40.302711486816406, "rewards/rejected": -41.97870635986328, "step": 13680 }, { "epoch": 59.78165938864629, "grad_norm": 5.707041120506907e-07, "learning_rate": 2.0846563251912355e-06, "logits/chosen": -1.480854868888855, "logits/rejected": -2.0224926471710205, "logps/chosen": -447.9397888183594, "logps/rejected": -4509.0537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6363544464111328, "rewards/margins": 40.774658203125, "rewards/rejected": -42.4110107421875, "step": 13690 }, { "epoch": 59.82532751091703, "grad_norm": 3.2752496927423675e-07, "learning_rate": 2.0808990062992278e-06, "logits/chosen": -1.4942280054092407, "logits/rejected": -2.06929349899292, "logps/chosen": -487.60772705078125, "logps/rejected": -4560.96484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8585513830184937, "rewards/margins": 41.180198669433594, "rewards/rejected": -43.03874969482422, "step": 13700 }, { "epoch": 59.86899563318777, "grad_norm": 3.378052825261112e-07, "learning_rate": 2.0771426611906244e-06, "logits/chosen": -1.4603619575500488, "logits/rejected": -1.9794495105743408, "logps/chosen": -453.4306640625, "logps/rejected": -4369.6318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8614673614501953, "rewards/margins": 39.32777786254883, "rewards/rejected": -41.18924331665039, "step": 13710 }, { "epoch": 59.91266375545852, "grad_norm": 3.1293520091041764e-07, "learning_rate": 2.073387298593311e-06, "logits/chosen": -1.5318264961242676, "logits/rejected": -2.0811142921447754, "logps/chosen": -417.0146484375, "logps/rejected": -5147.189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6272846460342407, "rewards/margins": 46.902854919433594, "rewards/rejected": -48.53014373779297, "step": 13720 }, { "epoch": 59.956331877729255, "grad_norm": 8.624689406897283e-07, "learning_rate": 2.069632927232893e-06, "logits/chosen": -1.5222253799438477, "logits/rejected": -2.1511495113372803, "logps/chosen": -425.64251708984375, "logps/rejected": -5358.41845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7526031732559204, "rewards/margins": 48.7219352722168, "rewards/rejected": -50.474544525146484, "step": 13730 }, { "epoch": 60.0, "grad_norm": 2.093630034355781e-05, "learning_rate": 2.0658795558326745e-06, "logits/chosen": -1.5165789127349854, "logits/rejected": -2.1296706199645996, "logps/chosen": -456.12127685546875, "logps/rejected": -5004.994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6876709461212158, "rewards/margins": 45.53487014770508, "rewards/rejected": -47.2225456237793, "step": 13740 }, { "epoch": 60.043668122270745, "grad_norm": 3.988004658214812e-06, "learning_rate": 2.062127193113632e-06, "logits/chosen": -1.4702246189117432, "logits/rejected": -2.034034252166748, "logps/chosen": -479.160400390625, "logps/rejected": -4821.6435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7466487884521484, "rewards/margins": 43.6899528503418, "rewards/rejected": -45.43659973144531, "step": 13750 }, { "epoch": 60.08733624454148, "grad_norm": 2.8637616858543807e-07, "learning_rate": 2.058375847794402e-06, "logits/chosen": -1.5220502614974976, "logits/rejected": -2.202252149581909, "logps/chosen": -415.2445373535156, "logps/rejected": -5452.19287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6950123310089111, "rewards/margins": 49.713905334472656, "rewards/rejected": -51.40891647338867, "step": 13760 }, { "epoch": 60.13100436681223, "grad_norm": 3.654718796442276e-07, "learning_rate": 2.0546255285912536e-06, "logits/chosen": -1.5288468599319458, "logits/rejected": -2.1367456912994385, "logps/chosen": -438.45526123046875, "logps/rejected": -5190.91162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7556610107421875, "rewards/margins": 47.19352340698242, "rewards/rejected": -48.949180603027344, "step": 13770 }, { "epoch": 60.17467248908297, "grad_norm": 1.8555202284886356e-07, "learning_rate": 2.0508762442180745e-06, "logits/chosen": -1.5022351741790771, "logits/rejected": -2.01562237739563, "logps/chosen": -474.58892822265625, "logps/rejected": -4902.79052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0616109371185303, "rewards/margins": 44.10356903076172, "rewards/rejected": -46.165184020996094, "step": 13780 }, { "epoch": 60.21834061135371, "grad_norm": 1.393532025828897e-05, "learning_rate": 2.0471280033863473e-06, "logits/chosen": -1.5128240585327148, "logits/rejected": -2.065723419189453, "logps/chosen": -480.97479248046875, "logps/rejected": -4771.7060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7944198846817017, "rewards/margins": 43.1580810546875, "rewards/rejected": -44.95249557495117, "step": 13790 }, { "epoch": 60.262008733624455, "grad_norm": 1.758510378638742e-06, "learning_rate": 2.0433808148051305e-06, "logits/chosen": -1.5093486309051514, "logits/rejected": -2.1023032665252686, "logps/chosen": -464.7124938964844, "logps/rejected": -4927.14208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8207170963287354, "rewards/margins": 44.5698356628418, "rewards/rejected": -46.39055633544922, "step": 13800 }, { "epoch": 60.30567685589519, "grad_norm": 3.308515285446634e-06, "learning_rate": 2.0396346871810347e-06, "logits/chosen": -1.4923962354660034, "logits/rejected": -2.016493558883667, "logps/chosen": -457.0655212402344, "logps/rejected": -4570.125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7786067724227905, "rewards/margins": 41.298316955566406, "rewards/rejected": -43.076927185058594, "step": 13810 }, { "epoch": 60.34934497816594, "grad_norm": 2.3160776096606593e-07, "learning_rate": 2.0358896292182086e-06, "logits/chosen": -1.527829885482788, "logits/rejected": -2.0653414726257324, "logps/chosen": -478.30999755859375, "logps/rejected": -4949.06298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.828730583190918, "rewards/margins": 44.908729553222656, "rewards/rejected": -46.737457275390625, "step": 13820 }, { "epoch": 60.39301310043668, "grad_norm": 6.465980241591383e-06, "learning_rate": 2.0321456496183136e-06, "logits/chosen": -1.5147114992141724, "logits/rejected": -2.0734968185424805, "logps/chosen": -463.6385192871094, "logps/rejected": -4781.2587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8338091373443604, "rewards/margins": 43.15046310424805, "rewards/rejected": -44.984275817871094, "step": 13830 }, { "epoch": 60.43668122270742, "grad_norm": 9.69431298406639e-08, "learning_rate": 2.0284027570805076e-06, "logits/chosen": -1.5561918020248413, "logits/rejected": -2.0882506370544434, "logps/chosen": -460.68609619140625, "logps/rejected": -4810.97216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8672664165496826, "rewards/margins": 43.37254333496094, "rewards/rejected": -45.23981475830078, "step": 13840 }, { "epoch": 60.480349344978166, "grad_norm": 1.8057286591589328e-07, "learning_rate": 2.024660960301421e-06, "logits/chosen": -1.4999009370803833, "logits/rejected": -2.0132265090942383, "logps/chosen": -465.78070068359375, "logps/rejected": -4398.6318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8255054950714111, "rewards/margins": 39.53237533569336, "rewards/rejected": -41.357879638671875, "step": 13850 }, { "epoch": 60.52401746724891, "grad_norm": 1.971267230648982e-07, "learning_rate": 2.020920267975139e-06, "logits/chosen": -1.5509140491485596, "logits/rejected": -2.177192449569702, "logps/chosen": -454.79974365234375, "logps/rejected": -5503.9287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7841488122940063, "rewards/margins": 50.20713424682617, "rewards/rejected": -51.99128341674805, "step": 13860 }, { "epoch": 60.56768558951965, "grad_norm": 5.3481952340400055e-06, "learning_rate": 2.0171806887931788e-06, "logits/chosen": -1.4900401830673218, "logits/rejected": -2.093536376953125, "logps/chosen": -470.38079833984375, "logps/rejected": -5059.61376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8591687679290771, "rewards/margins": 45.90233612060547, "rewards/rejected": -47.761497497558594, "step": 13870 }, { "epoch": 60.61135371179039, "grad_norm": 3.0473180856352117e-07, "learning_rate": 2.0134422314444742e-06, "logits/chosen": -1.4934104681015015, "logits/rejected": -2.0358686447143555, "logps/chosen": -438.75335693359375, "logps/rejected": -4540.16796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.832869291305542, "rewards/margins": 40.87308120727539, "rewards/rejected": -42.70594787597656, "step": 13880 }, { "epoch": 60.65502183406114, "grad_norm": 9.497984578643728e-08, "learning_rate": 2.0097049046153512e-06, "logits/chosen": -1.5107452869415283, "logits/rejected": -2.1305227279663086, "logps/chosen": -470.8369140625, "logps/rejected": -5317.2666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8714771270751953, "rewards/margins": 48.34286880493164, "rewards/rejected": -50.2143440246582, "step": 13890 }, { "epoch": 60.698689956331876, "grad_norm": 3.1572401386317467e-07, "learning_rate": 2.005968716989507e-06, "logits/chosen": -1.4994394779205322, "logits/rejected": -2.0289340019226074, "logps/chosen": -485.50994873046875, "logps/rejected": -4402.4248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.885911226272583, "rewards/margins": 39.652530670166016, "rewards/rejected": -41.5384407043457, "step": 13900 }, { "epoch": 60.74235807860262, "grad_norm": 1.318839006813205e-07, "learning_rate": 2.002233677247995e-06, "logits/chosen": -1.5495555400848389, "logits/rejected": -2.165398597717285, "logps/chosen": -431.0638732910156, "logps/rejected": -5170.39013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8640800714492798, "rewards/margins": 46.94824981689453, "rewards/rejected": -48.81232833862305, "step": 13910 }, { "epoch": 60.786026200873366, "grad_norm": 1.2247037385680952e-07, "learning_rate": 1.998499794069198e-06, "logits/chosen": -1.5037654638290405, "logits/rejected": -2.1510531902313232, "logps/chosen": -467.4527282714844, "logps/rejected": -5023.7197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8918291330337524, "rewards/margins": 45.60141372680664, "rewards/rejected": -47.49323654174805, "step": 13920 }, { "epoch": 60.8296943231441, "grad_norm": 1.9333804153777976e-07, "learning_rate": 1.9947670761288163e-06, "logits/chosen": -1.5409513711929321, "logits/rejected": -2.1991562843322754, "logps/chosen": -425.21685791015625, "logps/rejected": -5465.384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7830402851104736, "rewards/margins": 49.8467903137207, "rewards/rejected": -51.62983322143555, "step": 13930 }, { "epoch": 60.87336244541485, "grad_norm": 1.5598618058458344e-05, "learning_rate": 1.9910355320998383e-06, "logits/chosen": -1.4927326440811157, "logits/rejected": -2.1183478832244873, "logps/chosen": -463.0116271972656, "logps/rejected": -4864.4404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7678136825561523, "rewards/margins": 44.12775802612305, "rewards/rejected": -45.89557647705078, "step": 13940 }, { "epoch": 60.917030567685586, "grad_norm": 6.888641240491679e-07, "learning_rate": 1.9873051706525274e-06, "logits/chosen": -1.5108582973480225, "logits/rejected": -2.0413646697998047, "logps/chosen": -466.07086181640625, "logps/rejected": -4712.54931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.784274697303772, "rewards/margins": 42.5225715637207, "rewards/rejected": -44.306846618652344, "step": 13950 }, { "epoch": 60.96069868995633, "grad_norm": 7.95578496222153e-07, "learning_rate": 1.983576000454398e-06, "logits/chosen": -1.5030136108398438, "logits/rejected": -2.1562702655792236, "logps/chosen": -447.29205322265625, "logps/rejected": -4934.01416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8427722454071045, "rewards/margins": 44.632633209228516, "rewards/rejected": -46.475406646728516, "step": 13960 }, { "epoch": 61.004366812227076, "grad_norm": 1.5032749304431876e-07, "learning_rate": 1.979848030170196e-06, "logits/chosen": -1.4887615442276, "logits/rejected": -2.007485866546631, "logps/chosen": -477.56201171875, "logps/rejected": -4641.4345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8459352254867554, "rewards/margins": 41.912742614746094, "rewards/rejected": -43.7586784362793, "step": 13970 }, { "epoch": 61.048034934497814, "grad_norm": 4.562436444983589e-07, "learning_rate": 1.9761212684618825e-06, "logits/chosen": -1.5352375507354736, "logits/rejected": -2.1271910667419434, "logps/chosen": -464.0980529785156, "logps/rejected": -5196.98486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8756234645843506, "rewards/margins": 47.13768768310547, "rewards/rejected": -49.0133056640625, "step": 13980 }, { "epoch": 61.09170305676856, "grad_norm": 2.142430848055314e-06, "learning_rate": 1.9723957239886067e-06, "logits/chosen": -1.5228939056396484, "logits/rejected": -2.1100916862487793, "logps/chosen": -441.5269470214844, "logps/rejected": -4934.10888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.822970986366272, "rewards/margins": 44.57982635498047, "rewards/rejected": -46.402801513671875, "step": 13990 }, { "epoch": 61.1353711790393, "grad_norm": 3.362859616343567e-07, "learning_rate": 1.9686714054066917e-06, "logits/chosen": -1.492289662361145, "logits/rejected": -2.108802318572998, "logps/chosen": -465.8697204589844, "logps/rejected": -5044.94482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.807518720626831, "rewards/margins": 45.78709030151367, "rewards/rejected": -47.594608306884766, "step": 14000 }, { "epoch": 61.17903930131004, "grad_norm": 2.1581270105860349e-07, "learning_rate": 1.9649483213696107e-06, "logits/chosen": -1.5078301429748535, "logits/rejected": -2.008725166320801, "logps/chosen": -484.0540466308594, "logps/rejected": -4667.90087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9202909469604492, "rewards/margins": 42.02793884277344, "rewards/rejected": -43.94823455810547, "step": 14010 }, { "epoch": 61.222707423580786, "grad_norm": 3.313863450305951e-05, "learning_rate": 1.9612264805279708e-06, "logits/chosen": -1.5643267631530762, "logits/rejected": -2.2347700595855713, "logps/chosen": -431.9632873535156, "logps/rejected": -5707.6728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.792851209640503, "rewards/margins": 52.17475128173828, "rewards/rejected": -53.96760177612305, "step": 14020 }, { "epoch": 61.26637554585153, "grad_norm": 8.768398423980298e-07, "learning_rate": 1.9575058915294885e-06, "logits/chosen": -1.4230549335479736, "logits/rejected": -2.010751962661743, "logps/chosen": -566.4717407226562, "logps/rejected": -4302.1640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0396485328674316, "rewards/margins": 38.47015380859375, "rewards/rejected": -40.50980758666992, "step": 14030 }, { "epoch": 61.31004366812227, "grad_norm": 1.243516057757923e-07, "learning_rate": 1.9537865630189726e-06, "logits/chosen": -1.4427062273025513, "logits/rejected": -2.0698306560516357, "logps/chosen": -481.3190002441406, "logps/rejected": -4653.3818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7802146673202515, "rewards/margins": 42.1096305847168, "rewards/rejected": -43.88984680175781, "step": 14040 }, { "epoch": 61.353711790393014, "grad_norm": 6.941024465018606e-07, "learning_rate": 1.950068503638303e-06, "logits/chosen": -1.5260870456695557, "logits/rejected": -2.19512677192688, "logps/chosen": -445.063720703125, "logps/rejected": -5030.69873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8209049701690674, "rewards/margins": 45.749122619628906, "rewards/rejected": -47.570030212402344, "step": 14050 }, { "epoch": 61.39737991266376, "grad_norm": 1.510814976527867e-07, "learning_rate": 1.946351722026408e-06, "logits/chosen": -1.4685453176498413, "logits/rejected": -2.0027875900268555, "logps/chosen": -481.96588134765625, "logps/rejected": -4256.57421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7230291366577148, "rewards/margins": 38.3663444519043, "rewards/rejected": -40.08937454223633, "step": 14060 }, { "epoch": 61.4410480349345, "grad_norm": 9.62599486270293e-08, "learning_rate": 1.942636226819253e-06, "logits/chosen": -1.5455635786056519, "logits/rejected": -2.179812431335449, "logps/chosen": -458.1524353027344, "logps/rejected": -5201.51708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.826573371887207, "rewards/margins": 47.31471633911133, "rewards/rejected": -49.14128875732422, "step": 14070 }, { "epoch": 61.48471615720524, "grad_norm": 7.130030590530927e-07, "learning_rate": 1.93892202664981e-06, "logits/chosen": -1.5558580160140991, "logits/rejected": -2.201681137084961, "logps/chosen": -466.2372131347656, "logps/rejected": -5456.00927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.793459177017212, "rewards/margins": 49.7312126159668, "rewards/rejected": -51.5246696472168, "step": 14080 }, { "epoch": 61.52838427947598, "grad_norm": 1.5065747108319585e-07, "learning_rate": 1.9352091301480423e-06, "logits/chosen": -1.539573073387146, "logits/rejected": -2.154128074645996, "logps/chosen": -457.8382263183594, "logps/rejected": -4801.72216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7852236032485962, "rewards/margins": 43.530113220214844, "rewards/rejected": -45.31533432006836, "step": 14090 }, { "epoch": 61.572052401746724, "grad_norm": 9.992000570956645e-07, "learning_rate": 1.9314975459408854e-06, "logits/chosen": -1.5070358514785767, "logits/rejected": -2.1241040229797363, "logps/chosen": -479.36993408203125, "logps/rejected": -5138.0986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.928235650062561, "rewards/margins": 46.54695129394531, "rewards/rejected": -48.475181579589844, "step": 14100 }, { "epoch": 61.61572052401747, "grad_norm": 4.796591236133455e-07, "learning_rate": 1.927787282652224e-06, "logits/chosen": -1.5536508560180664, "logits/rejected": -2.2228927612304688, "logps/chosen": -459.5169372558594, "logps/rejected": -5326.0224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8739039897918701, "rewards/margins": 48.44709014892578, "rewards/rejected": -50.32099151611328, "step": 14110 }, { "epoch": 61.65938864628821, "grad_norm": 1.7228557990688402e-07, "learning_rate": 1.9240783489028763e-06, "logits/chosen": -1.5035784244537354, "logits/rejected": -2.079068660736084, "logps/chosen": -468.884521484375, "logps/rejected": -4934.60400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8180322647094727, "rewards/margins": 44.641090393066406, "rewards/rejected": -46.45912170410156, "step": 14120 }, { "epoch": 61.70305676855895, "grad_norm": 3.3605871929317543e-07, "learning_rate": 1.920370753310569e-06, "logits/chosen": -1.5069152116775513, "logits/rejected": -2.076737403869629, "logps/chosen": -470.0970153808594, "logps/rejected": -4781.60546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8515199422836304, "rewards/margins": 43.15180587768555, "rewards/rejected": -45.003326416015625, "step": 14130 }, { "epoch": 61.7467248908297, "grad_norm": 1.0359384478798944e-05, "learning_rate": 1.9166645044899207e-06, "logits/chosen": -1.5266202688217163, "logits/rejected": -2.189427137374878, "logps/chosen": -458.68292236328125, "logps/rejected": -5105.380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7995961904525757, "rewards/margins": 46.398963928222656, "rewards/rejected": -48.19856262207031, "step": 14140 }, { "epoch": 61.790393013100434, "grad_norm": 5.927520867513235e-07, "learning_rate": 1.9129596110524198e-06, "logits/chosen": -1.5082768201828003, "logits/rejected": -2.1153693199157715, "logps/chosen": -454.2806701660156, "logps/rejected": -5021.55517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.779311180114746, "rewards/margins": 45.64081573486328, "rewards/rejected": -47.420127868652344, "step": 14150 }, { "epoch": 61.83406113537118, "grad_norm": 7.7591802191591e-08, "learning_rate": 1.9092560816064043e-06, "logits/chosen": -1.5496082305908203, "logits/rejected": -2.0882084369659424, "logps/chosen": -443.16424560546875, "logps/rejected": -5082.4501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7532036304473877, "rewards/margins": 46.125728607177734, "rewards/rejected": -47.87893295288086, "step": 14160 }, { "epoch": 61.877729257641924, "grad_norm": 2.842951624846822e-05, "learning_rate": 1.905553924757049e-06, "logits/chosen": -1.5261017084121704, "logits/rejected": -2.126397132873535, "logps/chosen": -472.09478759765625, "logps/rejected": -4681.01416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8446096181869507, "rewards/margins": 42.240203857421875, "rewards/rejected": -44.084815979003906, "step": 14170 }, { "epoch": 61.92139737991266, "grad_norm": 4.31868087516428e-07, "learning_rate": 1.9018531491063318e-06, "logits/chosen": -1.5148565769195557, "logits/rejected": -2.117388963699341, "logps/chosen": -460.4156188964844, "logps/rejected": -4970.75634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7943614721298218, "rewards/margins": 45.1043586730957, "rewards/rejected": -46.89871597290039, "step": 14180 }, { "epoch": 61.96506550218341, "grad_norm": 4.705600215533363e-08, "learning_rate": 1.898153763253026e-06, "logits/chosen": -1.5296707153320312, "logits/rejected": -2.147860527038574, "logps/chosen": -428.6376953125, "logps/rejected": -4972.6611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.756744623184204, "rewards/margins": 45.15349578857422, "rewards/rejected": -46.910240173339844, "step": 14190 }, { "epoch": 62.00873362445415, "grad_norm": 1.7728469378799614e-07, "learning_rate": 1.8944557757926738e-06, "logits/chosen": -1.557539701461792, "logits/rejected": -2.2199184894561768, "logps/chosen": -427.31298828125, "logps/rejected": -5623.021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.754223108291626, "rewards/margins": 51.40049362182617, "rewards/rejected": -53.15471649169922, "step": 14200 }, { "epoch": 62.05240174672489, "grad_norm": 1.8447707898685539e-06, "learning_rate": 1.8907591953175694e-06, "logits/chosen": -1.4793589115142822, "logits/rejected": -1.9480966329574585, "logps/chosen": -494.89910888671875, "logps/rejected": -4132.89453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9131028652191162, "rewards/margins": 36.971336364746094, "rewards/rejected": -38.884437561035156, "step": 14210 }, { "epoch": 62.096069868995635, "grad_norm": 1.4524291859170021e-05, "learning_rate": 1.8870640304167375e-06, "logits/chosen": -1.4806181192398071, "logits/rejected": -1.9839591979980469, "logps/chosen": -492.00189208984375, "logps/rejected": -4626.4609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.843356728553772, "rewards/margins": 41.7861213684082, "rewards/rejected": -43.62947463989258, "step": 14220 }, { "epoch": 62.13973799126637, "grad_norm": 2.9307122609239876e-07, "learning_rate": 1.8833702896759134e-06, "logits/chosen": -1.5444905757904053, "logits/rejected": -2.1457252502441406, "logps/chosen": -476.143310546875, "logps/rejected": -5210.33740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8982086181640625, "rewards/margins": 47.36011505126953, "rewards/rejected": -49.25832748413086, "step": 14230 }, { "epoch": 62.18340611353712, "grad_norm": 4.992080751759178e-08, "learning_rate": 1.8796779816775229e-06, "logits/chosen": -1.5595355033874512, "logits/rejected": -2.1436238288879395, "logps/chosen": -462.42413330078125, "logps/rejected": -5118.5732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8133277893066406, "rewards/margins": 46.44742202758789, "rewards/rejected": -48.2607536315918, "step": 14240 }, { "epoch": 62.22707423580786, "grad_norm": 2.6430207796072093e-05, "learning_rate": 1.8759871150006629e-06, "logits/chosen": -1.487828254699707, "logits/rejected": -2.1298515796661377, "logps/chosen": -483.29412841796875, "logps/rejected": -4802.7177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8339107036590576, "rewards/margins": 43.523746490478516, "rewards/rejected": -45.3576545715332, "step": 14250 }, { "epoch": 62.2707423580786, "grad_norm": 1.4768997909661747e-07, "learning_rate": 1.8722976982210845e-06, "logits/chosen": -1.5656757354736328, "logits/rejected": -2.1623470783233643, "logps/chosen": -443.37054443359375, "logps/rejected": -5162.09033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.778028130531311, "rewards/margins": 46.9345817565918, "rewards/rejected": -48.71261215209961, "step": 14260 }, { "epoch": 62.314410480349345, "grad_norm": 3.7665283280974483e-06, "learning_rate": 1.8686097399111646e-06, "logits/chosen": -1.5166773796081543, "logits/rejected": -2.1560146808624268, "logps/chosen": -475.8854064941406, "logps/rejected": -5271.90478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.869657278060913, "rewards/margins": 47.938629150390625, "rewards/rejected": -49.80828094482422, "step": 14270 }, { "epoch": 62.35807860262009, "grad_norm": 2.2108230759713244e-05, "learning_rate": 1.8649232486398955e-06, "logits/chosen": -1.510085105895996, "logits/rejected": -2.0591254234313965, "logps/chosen": -479.84771728515625, "logps/rejected": -4747.50537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9037574529647827, "rewards/margins": 42.825687408447266, "rewards/rejected": -44.72945022583008, "step": 14280 }, { "epoch": 62.40174672489083, "grad_norm": 2.237799476963816e-06, "learning_rate": 1.861238232972859e-06, "logits/chosen": -1.5142552852630615, "logits/rejected": -2.133992910385132, "logps/chosen": -459.26666259765625, "logps/rejected": -5094.12060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7627410888671875, "rewards/margins": 46.24452590942383, "rewards/rejected": -48.007266998291016, "step": 14290 }, { "epoch": 62.44541484716157, "grad_norm": 5.8268221498590066e-08, "learning_rate": 1.8575547014722079e-06, "logits/chosen": -1.5480035543441772, "logits/rejected": -2.1296591758728027, "logps/chosen": -467.72723388671875, "logps/rejected": -4998.88037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9325978755950928, "rewards/margins": 45.23765182495117, "rewards/rejected": -47.17025375366211, "step": 14300 }, { "epoch": 62.48908296943232, "grad_norm": 6.705932425682224e-06, "learning_rate": 1.8538726626966491e-06, "logits/chosen": -1.5201631784439087, "logits/rejected": -2.051377773284912, "logps/chosen": -449.142333984375, "logps/rejected": -4751.8193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7694118022918701, "rewards/margins": 42.998512268066406, "rewards/rejected": -44.767921447753906, "step": 14310 }, { "epoch": 62.532751091703055, "grad_norm": 1.3486299317246891e-07, "learning_rate": 1.8501921252014183e-06, "logits/chosen": -1.4731218814849854, "logits/rejected": -2.059852123260498, "logps/chosen": -467.45550537109375, "logps/rejected": -4809.0625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.849461555480957, "rewards/margins": 43.49863815307617, "rewards/rejected": -45.34809494018555, "step": 14320 }, { "epoch": 62.5764192139738, "grad_norm": 1.6093970132930368e-06, "learning_rate": 1.846513097538265e-06, "logits/chosen": -1.5090440511703491, "logits/rejected": -2.121077299118042, "logps/chosen": -478.98663330078125, "logps/rejected": -4868.02783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9480787515640259, "rewards/margins": 44.000328063964844, "rewards/rejected": -45.94840621948242, "step": 14330 }, { "epoch": 62.620087336244545, "grad_norm": 1.1384254358290517e-07, "learning_rate": 1.8428355882554286e-06, "logits/chosen": -1.48274827003479, "logits/rejected": -2.0499823093414307, "logps/chosen": -495.35614013671875, "logps/rejected": -4917.3408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.878879189491272, "rewards/margins": 44.5367317199707, "rewards/rejected": -46.415611267089844, "step": 14340 }, { "epoch": 62.66375545851528, "grad_norm": 1.4773013285827913e-07, "learning_rate": 1.8391596058976214e-06, "logits/chosen": -1.5079190731048584, "logits/rejected": -2.1241562366485596, "logps/chosen": -463.649169921875, "logps/rejected": -5087.5068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.930780053138733, "rewards/margins": 46.082252502441406, "rewards/rejected": -48.013031005859375, "step": 14350 }, { "epoch": 62.70742358078603, "grad_norm": 2.4590183070969473e-07, "learning_rate": 1.8354851590060092e-06, "logits/chosen": -1.4760388135910034, "logits/rejected": -2.067986488342285, "logps/chosen": -461.194580078125, "logps/rejected": -4567.939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8565025329589844, "rewards/margins": 41.25921630859375, "rewards/rejected": -43.11572265625, "step": 14360 }, { "epoch": 62.751091703056765, "grad_norm": 1.8950245230411384e-07, "learning_rate": 1.8318122561181884e-06, "logits/chosen": -1.5070143938064575, "logits/rejected": -2.0951778888702393, "logps/chosen": -455.32354736328125, "logps/rejected": -4785.78076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8503797054290771, "rewards/margins": 43.212059020996094, "rewards/rejected": -45.06243896484375, "step": 14370 }, { "epoch": 62.79475982532751, "grad_norm": 5.394769221582832e-07, "learning_rate": 1.8281409057681686e-06, "logits/chosen": -1.5760440826416016, "logits/rejected": -2.2275502681732178, "logps/chosen": -446.56671142578125, "logps/rejected": -5602.59716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8195022344589233, "rewards/margins": 51.152374267578125, "rewards/rejected": -52.97187423706055, "step": 14380 }, { "epoch": 62.838427947598255, "grad_norm": 4.4113533898530654e-07, "learning_rate": 1.8244711164863518e-06, "logits/chosen": -1.4954383373260498, "logits/rejected": -2.0965447425842285, "logps/chosen": -451.66204833984375, "logps/rejected": -4866.25537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.922981858253479, "rewards/margins": 44.03639221191406, "rewards/rejected": -45.959373474121094, "step": 14390 }, { "epoch": 62.88209606986899, "grad_norm": 6.809415297216079e-06, "learning_rate": 1.8208028967995139e-06, "logits/chosen": -1.5252954959869385, "logits/rejected": -2.1278953552246094, "logps/chosen": -487.15570068359375, "logps/rejected": -5114.314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8449198007583618, "rewards/margins": 46.4113655090332, "rewards/rejected": -48.25628662109375, "step": 14400 }, { "epoch": 62.92576419213974, "grad_norm": 1.0220319262873288e-07, "learning_rate": 1.817136255230782e-06, "logits/chosen": -1.5278233289718628, "logits/rejected": -2.087367296218872, "logps/chosen": -466.75592041015625, "logps/rejected": -4774.77490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8141562938690186, "rewards/margins": 43.21363067626953, "rewards/rejected": -45.02778625488281, "step": 14410 }, { "epoch": 62.96943231441048, "grad_norm": 1.29084675578875e-07, "learning_rate": 1.8134712002996184e-06, "logits/chosen": -1.5478169918060303, "logits/rejected": -2.235537052154541, "logps/chosen": -472.565185546875, "logps/rejected": -5268.4423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8933124542236328, "rewards/margins": 47.8576774597168, "rewards/rejected": -49.7509880065918, "step": 14420 }, { "epoch": 63.01310043668122, "grad_norm": 0.00021771900607388408, "learning_rate": 1.8098077405217972e-06, "logits/chosen": -1.4644060134887695, "logits/rejected": -2.0563409328460693, "logps/chosen": -482.64105224609375, "logps/rejected": -4505.7705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9309364557266235, "rewards/margins": 40.60917282104492, "rewards/rejected": -42.54011154174805, "step": 14430 }, { "epoch": 63.056768558951966, "grad_norm": 9.088471118027717e-08, "learning_rate": 1.806145884409386e-06, "logits/chosen": -1.5396974086761475, "logits/rejected": -2.261767864227295, "logps/chosen": -445.10797119140625, "logps/rejected": -5472.8291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.876020073890686, "rewards/margins": 49.8260383605957, "rewards/rejected": -51.702056884765625, "step": 14440 }, { "epoch": 63.10043668122271, "grad_norm": 1.9292420497616952e-05, "learning_rate": 1.8024856404707285e-06, "logits/chosen": -1.4734030961990356, "logits/rejected": -2.0489165782928467, "logps/chosen": -483.7832946777344, "logps/rejected": -4617.54931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8137142658233643, "rewards/margins": 41.68446731567383, "rewards/rejected": -43.49817657470703, "step": 14450 }, { "epoch": 63.14410480349345, "grad_norm": 7.35181720941304e-07, "learning_rate": 1.7988270172104209e-06, "logits/chosen": -1.5011546611785889, "logits/rejected": -2.1840951442718506, "logps/chosen": -482.2273864746094, "logps/rejected": -5118.0556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8394473791122437, "rewards/margins": 46.57407760620117, "rewards/rejected": -48.41352081298828, "step": 14460 }, { "epoch": 63.18777292576419, "grad_norm": 2.689867430664943e-07, "learning_rate": 1.7951700231292932e-06, "logits/chosen": -1.5415067672729492, "logits/rejected": -2.163541316986084, "logps/chosen": -464.2513122558594, "logps/rejected": -5491.7177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8172889947891235, "rewards/margins": 49.898529052734375, "rewards/rejected": -51.7158203125, "step": 14470 }, { "epoch": 63.23144104803494, "grad_norm": 7.645622332734592e-08, "learning_rate": 1.7915146667243912e-06, "logits/chosen": -1.4973065853118896, "logits/rejected": -2.080029249191284, "logps/chosen": -487.33294677734375, "logps/rejected": -4669.13525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9838323593139648, "rewards/margins": 41.957130432128906, "rewards/rejected": -43.94096755981445, "step": 14480 }, { "epoch": 63.275109170305676, "grad_norm": 7.818666975751119e-08, "learning_rate": 1.7878609564889545e-06, "logits/chosen": -1.511974573135376, "logits/rejected": -2.1270852088928223, "logps/chosen": -453.50323486328125, "logps/rejected": -4896.9013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8742351531982422, "rewards/margins": 44.36071014404297, "rewards/rejected": -46.23494338989258, "step": 14490 }, { "epoch": 63.31877729257642, "grad_norm": 1.0916346733259417e-07, "learning_rate": 1.7842089009123996e-06, "logits/chosen": -1.50033700466156, "logits/rejected": -2.0860493183135986, "logps/chosen": -492.0978088378906, "logps/rejected": -4704.45166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9192874431610107, "rewards/margins": 42.475379943847656, "rewards/rejected": -44.39466857910156, "step": 14500 }, { "epoch": 63.36244541484716, "grad_norm": 2.6665930374770123e-07, "learning_rate": 1.7805585084802967e-06, "logits/chosen": -1.4814592599868774, "logits/rejected": -2.062351703643799, "logps/chosen": -521.9132690429688, "logps/rejected": -4483.58056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.966146469116211, "rewards/margins": 40.37755584716797, "rewards/rejected": -42.34370040893555, "step": 14510 }, { "epoch": 63.4061135371179, "grad_norm": 1.1588524223124154e-06, "learning_rate": 1.7769097876743519e-06, "logits/chosen": -1.451285719871521, "logits/rejected": -2.044127941131592, "logps/chosen": -466.480224609375, "logps/rejected": -4429.7568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9651962518692017, "rewards/margins": 39.839542388916016, "rewards/rejected": -41.80474090576172, "step": 14520 }, { "epoch": 63.44978165938865, "grad_norm": 2.2793773078223894e-07, "learning_rate": 1.7732627469723868e-06, "logits/chosen": -1.4947030544281006, "logits/rejected": -2.054466724395752, "logps/chosen": -480.5809631347656, "logps/rejected": -4560.279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8573325872421265, "rewards/margins": 41.176910400390625, "rewards/rejected": -43.034244537353516, "step": 14530 }, { "epoch": 63.493449781659386, "grad_norm": 9.239197905938039e-08, "learning_rate": 1.769617394848321e-06, "logits/chosen": -1.53141188621521, "logits/rejected": -2.144111156463623, "logps/chosen": -473.22479248046875, "logps/rejected": -5096.60546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9439541101455688, "rewards/margins": 46.122657775878906, "rewards/rejected": -48.06660842895508, "step": 14540 }, { "epoch": 63.53711790393013, "grad_norm": 2.9646738392473323e-07, "learning_rate": 1.76597373977215e-06, "logits/chosen": -1.5363187789916992, "logits/rejected": -2.11254620552063, "logps/chosen": -461.98846435546875, "logps/rejected": -5136.6064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9005720615386963, "rewards/margins": 46.52367401123047, "rewards/rejected": -48.42424774169922, "step": 14550 }, { "epoch": 63.580786026200876, "grad_norm": 1.0159034560392702e-07, "learning_rate": 1.7623317902099252e-06, "logits/chosen": -1.5135494470596313, "logits/rejected": -2.0493245124816895, "logps/chosen": -483.73516845703125, "logps/rejected": -4609.90234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9495986700057983, "rewards/margins": 41.40018844604492, "rewards/rejected": -43.34979248046875, "step": 14560 }, { "epoch": 63.624454148471614, "grad_norm": 8.717777012784378e-08, "learning_rate": 1.758691554623736e-06, "logits/chosen": -1.525939702987671, "logits/rejected": -2.164999008178711, "logps/chosen": -454.9383239746094, "logps/rejected": -5182.6455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7749300003051758, "rewards/margins": 47.167236328125, "rewards/rejected": -48.942161560058594, "step": 14570 }, { "epoch": 63.66812227074236, "grad_norm": 1.6866299114265095e-06, "learning_rate": 1.7550530414716887e-06, "logits/chosen": -1.5176894664764404, "logits/rejected": -2.168424129486084, "logps/chosen": -461.79315185546875, "logps/rejected": -5367.0625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9490108489990234, "rewards/margins": 48.635650634765625, "rewards/rejected": -50.58466339111328, "step": 14580 }, { "epoch": 63.7117903930131, "grad_norm": 1.1315981451520096e-06, "learning_rate": 1.7514162592078893e-06, "logits/chosen": -1.528201937675476, "logits/rejected": -2.147212266921997, "logps/chosen": -479.265625, "logps/rejected": -4943.146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8554397821426392, "rewards/margins": 44.78947067260742, "rewards/rejected": -46.64490509033203, "step": 14590 }, { "epoch": 63.75545851528384, "grad_norm": 7.856352910289212e-06, "learning_rate": 1.747781216282421e-06, "logits/chosen": -1.5488736629486084, "logits/rejected": -2.1642885208129883, "logps/chosen": -451.83746337890625, "logps/rejected": -5434.2470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0251410007476807, "rewards/margins": 49.207275390625, "rewards/rejected": -51.23241424560547, "step": 14600 }, { "epoch": 63.799126637554586, "grad_norm": 1.0930007441443427e-07, "learning_rate": 1.7441479211413243e-06, "logits/chosen": -1.5047327280044556, "logits/rejected": -2.090153217315674, "logps/chosen": -484.71435546875, "logps/rejected": -4603.0576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9563980102539062, "rewards/margins": 41.43427276611328, "rewards/rejected": -43.39067077636719, "step": 14610 }, { "epoch": 63.842794759825324, "grad_norm": 3.3385473779562045e-07, "learning_rate": 1.7405163822265803e-06, "logits/chosen": -1.5257196426391602, "logits/rejected": -2.236833095550537, "logps/chosen": -479.8304748535156, "logps/rejected": -5111.23681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9846442937850952, "rewards/margins": 46.2794075012207, "rewards/rejected": -48.26405715942383, "step": 14620 }, { "epoch": 63.88646288209607, "grad_norm": 1.4532068297681755e-05, "learning_rate": 1.7368866079760889e-06, "logits/chosen": -1.5264099836349487, "logits/rejected": -2.1802144050598145, "logps/chosen": -452.53497314453125, "logps/rejected": -5419.99951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8318824768066406, "rewards/margins": 49.36368179321289, "rewards/rejected": -51.1955680847168, "step": 14630 }, { "epoch": 63.930131004366814, "grad_norm": 1.2094681351329994e-07, "learning_rate": 1.7332586068236506e-06, "logits/chosen": -1.4809075593948364, "logits/rejected": -2.1507668495178223, "logps/chosen": -471.71563720703125, "logps/rejected": -5133.1875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9261219501495361, "rewards/margins": 46.57952880859375, "rewards/rejected": -48.505653381347656, "step": 14640 }, { "epoch": 63.97379912663755, "grad_norm": 3.179417661745615e-08, "learning_rate": 1.7296323871989454e-06, "logits/chosen": -1.489021897315979, "logits/rejected": -2.151763916015625, "logps/chosen": -482.7064514160156, "logps/rejected": -5065.3076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.999375581741333, "rewards/margins": 45.84077072143555, "rewards/rejected": -47.84014129638672, "step": 14650 }, { "epoch": 64.0174672489083, "grad_norm": 2.4360216219386053e-07, "learning_rate": 1.7260079575275137e-06, "logits/chosen": -1.551010251045227, "logits/rejected": -2.1613881587982178, "logps/chosen": -473.78680419921875, "logps/rejected": -5369.353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8375669717788696, "rewards/margins": 48.803035736083984, "rewards/rejected": -50.640602111816406, "step": 14660 }, { "epoch": 64.06113537117903, "grad_norm": 5.9062253240678523e-08, "learning_rate": 1.7223853262307383e-06, "logits/chosen": -1.5152769088745117, "logits/rejected": -2.142521381378174, "logps/chosen": -469.24188232421875, "logps/rejected": -5378.2109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8420995473861694, "rewards/margins": 48.95207977294922, "rewards/rejected": -50.7941780090332, "step": 14670 }, { "epoch": 64.10480349344978, "grad_norm": 2.071486279852192e-07, "learning_rate": 1.7187645017258198e-06, "logits/chosen": -1.550127625465393, "logits/rejected": -2.1602699756622314, "logps/chosen": -470.71142578125, "logps/rejected": -5008.4248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8867809772491455, "rewards/margins": 45.24613571166992, "rewards/rejected": -47.13291549682617, "step": 14680 }, { "epoch": 64.14847161572052, "grad_norm": 1.5766390332567792e-06, "learning_rate": 1.7151454924257666e-06, "logits/chosen": -1.5295865535736084, "logits/rejected": -2.1433639526367188, "logps/chosen": -496.81109619140625, "logps/rejected": -5200.33056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9029353857040405, "rewards/margins": 47.21824264526367, "rewards/rejected": -49.121177673339844, "step": 14690 }, { "epoch": 64.19213973799127, "grad_norm": 1.3990752208613361e-07, "learning_rate": 1.711528306739364e-06, "logits/chosen": -1.5237475633621216, "logits/rejected": -2.167267084121704, "logps/chosen": -454.5150451660156, "logps/rejected": -5035.474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9252870082855225, "rewards/margins": 45.63092803955078, "rewards/rejected": -47.55622100830078, "step": 14700 }, { "epoch": 64.23580786026201, "grad_norm": 1.247399965548079e-06, "learning_rate": 1.707912953071163e-06, "logits/chosen": -1.5190236568450928, "logits/rejected": -2.1725406646728516, "logps/chosen": -461.8885803222656, "logps/rejected": -4970.95361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.004711866378784, "rewards/margins": 44.86072540283203, "rewards/rejected": -46.86543655395508, "step": 14710 }, { "epoch": 64.27947598253274, "grad_norm": 7.212887033942355e-07, "learning_rate": 1.704299439821457e-06, "logits/chosen": -1.5153756141662598, "logits/rejected": -2.135781764984131, "logps/chosen": -479.5564880371094, "logps/rejected": -5002.63134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9140853881835938, "rewards/margins": 45.3197021484375, "rewards/rejected": -47.23378372192383, "step": 14720 }, { "epoch": 64.32314410480349, "grad_norm": 5.913863660616715e-08, "learning_rate": 1.7006877753862632e-06, "logits/chosen": -1.5214672088623047, "logits/rejected": -2.1132397651672363, "logps/chosen": -491.44683837890625, "logps/rejected": -4656.8408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9927839040756226, "rewards/margins": 41.86249542236328, "rewards/rejected": -43.85527801513672, "step": 14730 }, { "epoch": 64.36681222707423, "grad_norm": 1.1764975210173984e-07, "learning_rate": 1.6970779681573045e-06, "logits/chosen": -1.533541202545166, "logits/rejected": -2.247278928756714, "logps/chosen": -456.97637939453125, "logps/rejected": -5435.13720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.883253812789917, "rewards/margins": 49.586734771728516, "rewards/rejected": -51.4699821472168, "step": 14740 }, { "epoch": 64.41048034934498, "grad_norm": 2.1182257624960674e-07, "learning_rate": 1.693470026521986e-06, "logits/chosen": -1.547965168952942, "logits/rejected": -2.0963006019592285, "logps/chosen": -482.7301330566406, "logps/rejected": -4827.1416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9412320852279663, "rewards/margins": 43.564781188964844, "rewards/rejected": -45.506011962890625, "step": 14750 }, { "epoch": 64.45414847161572, "grad_norm": 1.8703217351309004e-08, "learning_rate": 1.689863958863381e-06, "logits/chosen": -1.514469861984253, "logits/rejected": -2.10373592376709, "logps/chosen": -464.2994689941406, "logps/rejected": -5066.52587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9599918127059937, "rewards/margins": 45.801902770996094, "rewards/rejected": -47.761898040771484, "step": 14760 }, { "epoch": 64.49781659388647, "grad_norm": 2.0481345589273816e-07, "learning_rate": 1.6862597735602034e-06, "logits/chosen": -1.5045851469039917, "logits/rejected": -2.1440958976745605, "logps/chosen": -458.99237060546875, "logps/rejected": -4860.0048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8306446075439453, "rewards/margins": 44.12404251098633, "rewards/rejected": -45.954681396484375, "step": 14770 }, { "epoch": 64.5414847161572, "grad_norm": 9.083195750354356e-06, "learning_rate": 1.6826574789868011e-06, "logits/chosen": -1.5692485570907593, "logits/rejected": -2.1509499549865723, "logps/chosen": -457.79876708984375, "logps/rejected": -5201.72900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8701770305633545, "rewards/margins": 47.1881103515625, "rewards/rejected": -49.058284759521484, "step": 14780 }, { "epoch": 64.58515283842794, "grad_norm": 3.6879628782316367e-06, "learning_rate": 1.6790570835131237e-06, "logits/chosen": -1.4686074256896973, "logits/rejected": -2.0894923210144043, "logps/chosen": -473.323974609375, "logps/rejected": -4511.572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.894529938697815, "rewards/margins": 40.672889709472656, "rewards/rejected": -42.56741714477539, "step": 14790 }, { "epoch": 64.62882096069869, "grad_norm": 7.862012702563567e-07, "learning_rate": 1.6754585955047081e-06, "logits/chosen": -1.4752978086471558, "logits/rejected": -2.1487488746643066, "logps/chosen": -449.4215393066406, "logps/rejected": -5135.8984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8836524486541748, "rewards/margins": 46.64937973022461, "rewards/rejected": -48.53303146362305, "step": 14800 }, { "epoch": 64.67248908296943, "grad_norm": 5.514550783490174e-07, "learning_rate": 1.6718620233226618e-06, "logits/chosen": -1.4804861545562744, "logits/rejected": -2.0963375568389893, "logps/chosen": -509.8076171875, "logps/rejected": -4765.99658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0548503398895264, "rewards/margins": 42.99925994873047, "rewards/rejected": -45.054107666015625, "step": 14810 }, { "epoch": 64.71615720524018, "grad_norm": 1.907974348185484e-06, "learning_rate": 1.668267375323638e-06, "logits/chosen": -1.5385081768035889, "logits/rejected": -2.1589720249176025, "logps/chosen": -477.7645568847656, "logps/rejected": -5132.87158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9318532943725586, "rewards/margins": 46.577857971191406, "rewards/rejected": -48.50971221923828, "step": 14820 }, { "epoch": 64.75982532751091, "grad_norm": 1.2436606690523117e-07, "learning_rate": 1.6646746598598223e-06, "logits/chosen": -1.5418169498443604, "logits/rejected": -2.1629278659820557, "logps/chosen": -482.0885314941406, "logps/rejected": -5364.9560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9517526626586914, "rewards/margins": 48.696903228759766, "rewards/rejected": -50.648651123046875, "step": 14830 }, { "epoch": 64.80349344978166, "grad_norm": 4.4656800358509736e-08, "learning_rate": 1.6610838852789078e-06, "logits/chosen": -1.495597243309021, "logits/rejected": -2.113538980484009, "logps/chosen": -479.7537536621094, "logps/rejected": -4616.603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.876848816871643, "rewards/margins": 41.66091537475586, "rewards/rejected": -43.53776550292969, "step": 14840 }, { "epoch": 64.8471615720524, "grad_norm": 5.303210759004948e-07, "learning_rate": 1.6574950599240786e-06, "logits/chosen": -1.51628577709198, "logits/rejected": -2.164079189300537, "logps/chosen": -495.8106384277344, "logps/rejected": -5257.2333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.207881450653076, "rewards/margins": 47.463287353515625, "rewards/rejected": -49.67116928100586, "step": 14850 }, { "epoch": 64.89082969432314, "grad_norm": 1.0120132881539066e-07, "learning_rate": 1.6539081921339875e-06, "logits/chosen": -1.545444369316101, "logits/rejected": -2.227687358856201, "logps/chosen": -452.2998962402344, "logps/rejected": -5287.79443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.926514983177185, "rewards/margins": 48.18767547607422, "rewards/rejected": -50.11418914794922, "step": 14860 }, { "epoch": 64.93449781659389, "grad_norm": 1.9158510928540097e-06, "learning_rate": 1.650323290242744e-06, "logits/chosen": -1.5214674472808838, "logits/rejected": -2.183239459991455, "logps/chosen": -483.62835693359375, "logps/rejected": -5223.75146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8398510217666626, "rewards/margins": 47.62804412841797, "rewards/rejected": -49.4678955078125, "step": 14870 }, { "epoch": 64.97816593886463, "grad_norm": 0.00017766319936739227, "learning_rate": 1.646740362579886e-06, "logits/chosen": -1.5739082098007202, "logits/rejected": -2.154524803161621, "logps/chosen": -447.73944091796875, "logps/rejected": -5090.56982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.987786889076233, "rewards/margins": 46.0156135559082, "rewards/rejected": -48.003395080566406, "step": 14880 }, { "epoch": 65.02183406113537, "grad_norm": 6.750557804598617e-08, "learning_rate": 1.6431594174703647e-06, "logits/chosen": -1.5609066486358643, "logits/rejected": -2.2340056896209717, "logps/chosen": -471.31768798828125, "logps/rejected": -5352.0146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9307712316513062, "rewards/margins": 48.6916389465332, "rewards/rejected": -50.62240982055664, "step": 14890 }, { "epoch": 65.06550218340611, "grad_norm": 4.139493138969338e-06, "learning_rate": 1.639580463234525e-06, "logits/chosen": -1.5490925312042236, "logits/rejected": -2.223043918609619, "logps/chosen": -461.92559814453125, "logps/rejected": -5494.9716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9549671411514282, "rewards/margins": 50.009056091308594, "rewards/rejected": -51.96403121948242, "step": 14900 }, { "epoch": 65.10917030567686, "grad_norm": 2.0097678976346003e-07, "learning_rate": 1.636003508188086e-06, "logits/chosen": -1.5023701190948486, "logits/rejected": -2.094789743423462, "logps/chosen": -466.8427734375, "logps/rejected": -4867.7255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.015115737915039, "rewards/margins": 43.95341873168945, "rewards/rejected": -45.96853256225586, "step": 14910 }, { "epoch": 65.1528384279476, "grad_norm": 3.672958782799806e-07, "learning_rate": 1.632428560642123e-06, "logits/chosen": -1.5338044166564941, "logits/rejected": -2.156409740447998, "logps/chosen": -466.532958984375, "logps/rejected": -5141.919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8941758871078491, "rewards/margins": 46.70330047607422, "rewards/rejected": -48.59748077392578, "step": 14920 }, { "epoch": 65.19650655021834, "grad_norm": 2.287372787146112e-05, "learning_rate": 1.6288556289030453e-06, "logits/chosen": -1.5044015645980835, "logits/rejected": -2.130368232727051, "logps/chosen": -483.02642822265625, "logps/rejected": -4725.2314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9548050165176392, "rewards/margins": 42.62761688232422, "rewards/rejected": -44.582420349121094, "step": 14930 }, { "epoch": 65.24017467248909, "grad_norm": 2.0825866646641498e-07, "learning_rate": 1.625284721272579e-06, "logits/chosen": -1.48587965965271, "logits/rejected": -2.085144519805908, "logps/chosen": -496.17108154296875, "logps/rejected": -4431.42529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9298121929168701, "rewards/margins": 39.87092590332031, "rewards/rejected": -41.800743103027344, "step": 14940 }, { "epoch": 65.28384279475982, "grad_norm": 9.169965153954406e-08, "learning_rate": 1.6217158460477468e-06, "logits/chosen": -1.5735887289047241, "logits/rejected": -2.232123851776123, "logps/chosen": -523.56103515625, "logps/rejected": -5080.6640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3175294399261475, "rewards/margins": 45.62297821044922, "rewards/rejected": -47.94050598144531, "step": 14950 }, { "epoch": 65.32751091703057, "grad_norm": 1.6264463104484483e-06, "learning_rate": 1.6181490115208484e-06, "logits/chosen": -1.608521819114685, "logits/rejected": -2.350233554840088, "logps/chosen": -446.341552734375, "logps/rejected": -5442.2958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8290555477142334, "rewards/margins": 49.67839050292969, "rewards/rejected": -51.5074462890625, "step": 14960 }, { "epoch": 65.37117903930131, "grad_norm": 1.2438284827762996e-07, "learning_rate": 1.6145842259794456e-06, "logits/chosen": -1.545543909072876, "logits/rejected": -2.20145583152771, "logps/chosen": -487.3348083496094, "logps/rejected": -5047.6748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9741888046264648, "rewards/margins": 45.751060485839844, "rewards/rejected": -47.725250244140625, "step": 14970 }, { "epoch": 65.41484716157206, "grad_norm": 8.780304121439531e-07, "learning_rate": 1.6110214977063345e-06, "logits/chosen": -1.5559403896331787, "logits/rejected": -2.1961381435394287, "logps/chosen": -462.6405334472656, "logps/rejected": -4898.20458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.037153482437134, "rewards/margins": 44.1705207824707, "rewards/rejected": -46.207672119140625, "step": 14980 }, { "epoch": 65.4585152838428, "grad_norm": 5.10235487159632e-08, "learning_rate": 1.607460834979534e-06, "logits/chosen": -1.6072183847427368, "logits/rejected": -2.277447462081909, "logps/chosen": -455.6698303222656, "logps/rejected": -5369.45751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8853168487548828, "rewards/margins": 48.920982360839844, "rewards/rejected": -50.806297302246094, "step": 14990 }, { "epoch": 65.50218340611353, "grad_norm": 2.6608496118190716e-07, "learning_rate": 1.603902246072263e-06, "logits/chosen": -1.5041605234146118, "logits/rejected": -2.1284267902374268, "logps/chosen": -490.9385681152344, "logps/rejected": -4794.0458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9012670516967773, "rewards/margins": 43.39781951904297, "rewards/rejected": -45.29908752441406, "step": 15000 }, { "epoch": 65.54585152838428, "grad_norm": 1.7355553975201424e-07, "learning_rate": 1.6003457392529215e-06, "logits/chosen": -1.5611135959625244, "logits/rejected": -2.283480167388916, "logps/chosen": -463.7470703125, "logps/rejected": -5448.966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9547576904296875, "rewards/margins": 49.43714141845703, "rewards/rejected": -51.39189529418945, "step": 15010 }, { "epoch": 65.58951965065502, "grad_norm": 2.5350863302497997e-07, "learning_rate": 1.5967913227850729e-06, "logits/chosen": -1.5729787349700928, "logits/rejected": -2.2545313835144043, "logps/chosen": -471.14080810546875, "logps/rejected": -5402.615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.134284734725952, "rewards/margins": 48.859397888183594, "rewards/rejected": -50.993690490722656, "step": 15020 }, { "epoch": 65.63318777292577, "grad_norm": 3.73391894059458e-06, "learning_rate": 1.5932390049274226e-06, "logits/chosen": -1.5287055969238281, "logits/rejected": -2.227620840072632, "logps/chosen": -473.24951171875, "logps/rejected": -5013.712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8475319147109985, "rewards/margins": 45.595802307128906, "rewards/rejected": -47.44333267211914, "step": 15030 }, { "epoch": 65.67685589519651, "grad_norm": 3.170144903462069e-06, "learning_rate": 1.5896887939338001e-06, "logits/chosen": -1.5098379850387573, "logits/rejected": -2.111452341079712, "logps/chosen": -465.85211181640625, "logps/rejected": -4926.0048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8506696224212646, "rewards/margins": 44.68646240234375, "rewards/rejected": -46.53712463378906, "step": 15040 }, { "epoch": 65.72052401746726, "grad_norm": 6.097993238608442e-08, "learning_rate": 1.5861406980531386e-06, "logits/chosen": -1.5997108221054077, "logits/rejected": -2.2991528511047363, "logps/chosen": -443.09912109375, "logps/rejected": -5702.390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8594917058944702, "rewards/margins": 52.10064697265625, "rewards/rejected": -53.96014404296875, "step": 15050 }, { "epoch": 65.76419213973799, "grad_norm": 3.654718555719594e-08, "learning_rate": 1.5825947255294605e-06, "logits/chosen": -1.58187997341156, "logits/rejected": -2.2148799896240234, "logps/chosen": -489.9889221191406, "logps/rejected": -5348.5068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0849123001098633, "rewards/margins": 48.390445709228516, "rewards/rejected": -50.47536087036133, "step": 15060 }, { "epoch": 65.80786026200873, "grad_norm": 8.912236062954613e-08, "learning_rate": 1.5790508846018493e-06, "logits/chosen": -1.4854915142059326, "logits/rejected": -2.0564169883728027, "logps/chosen": -475.8153381347656, "logps/rejected": -4480.65576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0728015899658203, "rewards/margins": 40.164772033691406, "rewards/rejected": -42.237571716308594, "step": 15070 }, { "epoch": 65.85152838427948, "grad_norm": 6.319898623410387e-07, "learning_rate": 1.57550918350444e-06, "logits/chosen": -1.5556399822235107, "logits/rejected": -2.2336668968200684, "logps/chosen": -480.7025451660156, "logps/rejected": -5035.3251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9568965435028076, "rewards/margins": 45.6185188293457, "rewards/rejected": -47.57542037963867, "step": 15080 }, { "epoch": 65.89519650655022, "grad_norm": 1.3062588403868584e-07, "learning_rate": 1.5719696304663932e-06, "logits/chosen": -1.5214914083480835, "logits/rejected": -2.1548516750335693, "logps/chosen": -493.59307861328125, "logps/rejected": -5061.505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9357267618179321, "rewards/margins": 45.87135696411133, "rewards/rejected": -47.80707931518555, "step": 15090 }, { "epoch": 65.93886462882097, "grad_norm": 3.6359001780791966e-07, "learning_rate": 1.5684322337118791e-06, "logits/chosen": -1.558870553970337, "logits/rejected": -2.253415822982788, "logps/chosen": -486.75848388671875, "logps/rejected": -5106.0830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0045247077941895, "rewards/margins": 46.320980072021484, "rewards/rejected": -48.32550811767578, "step": 15100 }, { "epoch": 65.9825327510917, "grad_norm": 1.6177820300722843e-05, "learning_rate": 1.564897001460059e-06, "logits/chosen": -1.5242403745651245, "logits/rejected": -2.180098533630371, "logps/chosen": -464.2691955566406, "logps/rejected": -5076.21435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8485819101333618, "rewards/margins": 46.11756134033203, "rewards/rejected": -47.96614074707031, "step": 15110 }, { "epoch": 66.02620087336244, "grad_norm": 1.9869213080327746e-07, "learning_rate": 1.5613639419250637e-06, "logits/chosen": -1.5221039056777954, "logits/rejected": -2.1630640029907227, "logps/chosen": -477.9140625, "logps/rejected": -4897.5068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8819968700408936, "rewards/margins": 44.40209197998047, "rewards/rejected": -46.284088134765625, "step": 15120 }, { "epoch": 66.06986899563319, "grad_norm": 2.0809727525698007e-06, "learning_rate": 1.557833063315976e-06, "logits/chosen": -1.581491231918335, "logits/rejected": -2.2855374813079834, "logps/chosen": -456.75146484375, "logps/rejected": -5727.86376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9024860858917236, "rewards/margins": 52.2380485534668, "rewards/rejected": -54.140533447265625, "step": 15130 }, { "epoch": 66.11353711790393, "grad_norm": 4.339389723948439e-08, "learning_rate": 1.5543043738368107e-06, "logits/chosen": -1.5735142230987549, "logits/rejected": -2.194976568222046, "logps/chosen": -446.5738220214844, "logps/rejected": -5438.87841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.855347990989685, "rewards/margins": 49.410316467285156, "rewards/rejected": -51.265663146972656, "step": 15140 }, { "epoch": 66.15720524017468, "grad_norm": 2.651749077924612e-07, "learning_rate": 1.5507778816864962e-06, "logits/chosen": -1.4760653972625732, "logits/rejected": -2.024789810180664, "logps/chosen": -513.3367919921875, "logps/rejected": -4590.42431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.996604323387146, "rewards/margins": 41.35921096801758, "rewards/rejected": -43.35581588745117, "step": 15150 }, { "epoch": 66.20087336244542, "grad_norm": 4.828841859508104e-08, "learning_rate": 1.5472535950588575e-06, "logits/chosen": -1.5866973400115967, "logits/rejected": -2.2664167881011963, "logps/chosen": -449.68157958984375, "logps/rejected": -5632.5048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8791913986206055, "rewards/margins": 51.309364318847656, "rewards/rejected": -53.18854904174805, "step": 15160 }, { "epoch": 66.24454148471615, "grad_norm": 2.770493007014336e-07, "learning_rate": 1.5437315221425924e-06, "logits/chosen": -1.5123684406280518, "logits/rejected": -2.1333017349243164, "logps/chosen": -483.54217529296875, "logps/rejected": -4945.8427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8853309154510498, "rewards/margins": 44.847145080566406, "rewards/rejected": -46.73247528076172, "step": 15170 }, { "epoch": 66.2882096069869, "grad_norm": 8.166012708871429e-06, "learning_rate": 1.5402116711212568e-06, "logits/chosen": -1.549749493598938, "logits/rejected": -2.110678195953369, "logps/chosen": -473.267333984375, "logps/rejected": -5115.80126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.105539083480835, "rewards/margins": 46.262550354003906, "rewards/rejected": -48.36808776855469, "step": 15180 }, { "epoch": 66.33187772925764, "grad_norm": 1.625174484397497e-07, "learning_rate": 1.5366940501732422e-06, "logits/chosen": -1.493618130683899, "logits/rejected": -2.189694881439209, "logps/chosen": -456.9281311035156, "logps/rejected": -4981.6787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8079826831817627, "rewards/margins": 45.30891036987305, "rewards/rejected": -47.11688995361328, "step": 15190 }, { "epoch": 66.37554585152839, "grad_norm": 1.0660664757151716e-07, "learning_rate": 1.5331786674717611e-06, "logits/chosen": -1.4957197904586792, "logits/rejected": -2.1314775943756104, "logps/chosen": -509.9996032714844, "logps/rejected": -4965.6728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9688918590545654, "rewards/margins": 44.928802490234375, "rewards/rejected": -46.8976936340332, "step": 15200 }, { "epoch": 66.41921397379913, "grad_norm": 1.2235330726387656e-07, "learning_rate": 1.5296655311848236e-06, "logits/chosen": -1.5116825103759766, "logits/rejected": -2.1416711807250977, "logps/chosen": -475.794677734375, "logps/rejected": -4984.8662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0128912925720215, "rewards/margins": 45.04216766357422, "rewards/rejected": -47.05506134033203, "step": 15210 }, { "epoch": 66.46288209606988, "grad_norm": 1.1354458975948702e-07, "learning_rate": 1.5261546494752209e-06, "logits/chosen": -1.5749263763427734, "logits/rejected": -2.179100275039673, "logps/chosen": -457.28045654296875, "logps/rejected": -5472.67578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.921451210975647, "rewards/margins": 49.7355842590332, "rewards/rejected": -51.65703201293945, "step": 15220 }, { "epoch": 66.5065502183406, "grad_norm": 1.406155432890093e-06, "learning_rate": 1.5226460305005045e-06, "logits/chosen": -1.510498285293579, "logits/rejected": -2.0567517280578613, "logps/chosen": -486.3173828125, "logps/rejected": -4573.322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.025068759918213, "rewards/margins": 41.060935974121094, "rewards/rejected": -43.08599853515625, "step": 15230 }, { "epoch": 66.55021834061135, "grad_norm": 1.333262176391125e-06, "learning_rate": 1.5191396824129696e-06, "logits/chosen": -1.5388157367706299, "logits/rejected": -2.10699462890625, "logps/chosen": -471.1400451660156, "logps/rejected": -5054.72802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9212539196014404, "rewards/margins": 45.72340774536133, "rewards/rejected": -47.644657135009766, "step": 15240 }, { "epoch": 66.5938864628821, "grad_norm": 5.003435710689706e-08, "learning_rate": 1.5156356133596356e-06, "logits/chosen": -1.537382960319519, "logits/rejected": -2.1691126823425293, "logps/chosen": -492.6481018066406, "logps/rejected": -4897.5732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0052356719970703, "rewards/margins": 44.27549743652344, "rewards/rejected": -46.280731201171875, "step": 15250 }, { "epoch": 66.63755458515284, "grad_norm": 2.475085272210081e-06, "learning_rate": 1.5121338314822253e-06, "logits/chosen": -1.5488481521606445, "logits/rejected": -2.174309730529785, "logps/chosen": -464.9195861816406, "logps/rejected": -5330.84814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9567972421646118, "rewards/margins": 48.30320358276367, "rewards/rejected": -50.26000213623047, "step": 15260 }, { "epoch": 66.68122270742359, "grad_norm": 8.828365065805798e-07, "learning_rate": 1.5086343449171475e-06, "logits/chosen": -1.5390610694885254, "logits/rejected": -2.1500277519226074, "logps/chosen": -503.16815185546875, "logps/rejected": -5041.58837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1288819313049316, "rewards/margins": 45.43232345581055, "rewards/rejected": -47.56120681762695, "step": 15270 }, { "epoch": 66.72489082969432, "grad_norm": 6.310910816627727e-08, "learning_rate": 1.5051371617954779e-06, "logits/chosen": -1.5028049945831299, "logits/rejected": -2.1290664672851562, "logps/chosen": -487.8313903808594, "logps/rejected": -5005.9921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9672489166259766, "rewards/margins": 45.29907989501953, "rewards/rejected": -47.266334533691406, "step": 15280 }, { "epoch": 66.76855895196506, "grad_norm": 1.2491629479065463e-07, "learning_rate": 1.5016422902429403e-06, "logits/chosen": -1.5639278888702393, "logits/rejected": -2.2552218437194824, "logps/chosen": -452.1459045410156, "logps/rejected": -5463.4052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9866615533828735, "rewards/margins": 49.661224365234375, "rewards/rejected": -51.64788818359375, "step": 15290 }, { "epoch": 66.8122270742358, "grad_norm": 3.0421629149474316e-07, "learning_rate": 1.4981497383798876e-06, "logits/chosen": -1.5388531684875488, "logits/rejected": -2.1593737602233887, "logps/chosen": -470.27288818359375, "logps/rejected": -5172.06005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9997107982635498, "rewards/margins": 46.796043395996094, "rewards/rejected": -48.795753479003906, "step": 15300 }, { "epoch": 66.85589519650655, "grad_norm": 2.3948661566485264e-05, "learning_rate": 1.4946595143212836e-06, "logits/chosen": -1.4998043775558472, "logits/rejected": -2.154015064239502, "logps/chosen": -477.94842529296875, "logps/rejected": -4796.89306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9469038248062134, "rewards/margins": 43.35892868041992, "rewards/rejected": -45.30582809448242, "step": 15310 }, { "epoch": 66.8995633187773, "grad_norm": 4.5940993306984377e-07, "learning_rate": 1.491171626176681e-06, "logits/chosen": -1.4454913139343262, "logits/rejected": -2.0479211807250977, "logps/chosen": -486.78570556640625, "logps/rejected": -4617.10009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9363586902618408, "rewards/margins": 41.55152893066406, "rewards/rejected": -43.487884521484375, "step": 15320 }, { "epoch": 66.94323144104804, "grad_norm": 9.576845803415092e-08, "learning_rate": 1.4876860820502071e-06, "logits/chosen": -1.5237029790878296, "logits/rejected": -2.1003787517547607, "logps/chosen": -473.94000244140625, "logps/rejected": -4770.76318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9498412609100342, "rewards/margins": 43.02189636230469, "rewards/rejected": -44.97173309326172, "step": 15330 }, { "epoch": 66.98689956331877, "grad_norm": 1.8784741419568084e-06, "learning_rate": 1.4842028900405422e-06, "logits/chosen": -1.547074556350708, "logits/rejected": -2.2040672302246094, "logps/chosen": -475.923095703125, "logps/rejected": -5254.79638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9908854961395264, "rewards/margins": 47.60289764404297, "rewards/rejected": -49.59378433227539, "step": 15340 }, { "epoch": 67.03056768558952, "grad_norm": 5.721413348696251e-06, "learning_rate": 1.4807220582409024e-06, "logits/chosen": -1.588322639465332, "logits/rejected": -2.2878592014312744, "logps/chosen": -457.26611328125, "logps/rejected": -5435.39404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9431276321411133, "rewards/margins": 49.46831130981445, "rewards/rejected": -51.41144561767578, "step": 15350 }, { "epoch": 67.07423580786026, "grad_norm": 3.799966132125495e-07, "learning_rate": 1.4772435947390184e-06, "logits/chosen": -1.5520403385162354, "logits/rejected": -2.232736110687256, "logps/chosen": -478.18707275390625, "logps/rejected": -4969.61962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0476949214935303, "rewards/margins": 44.971649169921875, "rewards/rejected": -47.01934051513672, "step": 15360 }, { "epoch": 67.117903930131, "grad_norm": 2.205280525874862e-08, "learning_rate": 1.473767507617119e-06, "logits/chosen": -1.5240862369537354, "logits/rejected": -2.1679790019989014, "logps/chosen": -480.6197204589844, "logps/rejected": -4928.978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8862926959991455, "rewards/margins": 44.70478057861328, "rewards/rejected": -46.591068267822266, "step": 15370 }, { "epoch": 67.16157205240175, "grad_norm": 8.733274486761972e-08, "learning_rate": 1.4702938049519106e-06, "logits/chosen": -1.5058773756027222, "logits/rejected": -2.09409236907959, "logps/chosen": -481.9976501464844, "logps/rejected": -4779.52880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9853750467300415, "rewards/margins": 43.13066482543945, "rewards/rejected": -45.11603927612305, "step": 15380 }, { "epoch": 67.20524017467248, "grad_norm": 8.494960118008773e-08, "learning_rate": 1.466822494814561e-06, "logits/chosen": -1.5510308742523193, "logits/rejected": -2.1816604137420654, "logps/chosen": -487.55224609375, "logps/rejected": -5057.89892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0288803577423096, "rewards/margins": 45.724220275878906, "rewards/rejected": -47.75309753417969, "step": 15390 }, { "epoch": 67.24890829694323, "grad_norm": 1.4067320931633542e-06, "learning_rate": 1.4633535852706775e-06, "logits/chosen": -1.5303648710250854, "logits/rejected": -2.184601306915283, "logps/chosen": -483.3497619628906, "logps/rejected": -5123.4404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0271761417388916, "rewards/margins": 46.34510040283203, "rewards/rejected": -48.37227249145508, "step": 15400 }, { "epoch": 67.29257641921397, "grad_norm": 4.42234239925771e-07, "learning_rate": 1.4598870843802898e-06, "logits/chosen": -1.5324976444244385, "logits/rejected": -2.1418004035949707, "logps/chosen": -500.5455017089844, "logps/rejected": -5003.4716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9422467947006226, "rewards/margins": 45.24094772338867, "rewards/rejected": -47.183197021484375, "step": 15410 }, { "epoch": 67.33624454148472, "grad_norm": 5.982455085943857e-07, "learning_rate": 1.456423000197832e-06, "logits/chosen": -1.5890858173370361, "logits/rejected": -2.2845795154571533, "logps/chosen": -473.3692321777344, "logps/rejected": -5707.68115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1574249267578125, "rewards/margins": 51.7771110534668, "rewards/rejected": -53.934539794921875, "step": 15420 }, { "epoch": 67.37991266375546, "grad_norm": 6.783498275149748e-06, "learning_rate": 1.4529613407721193e-06, "logits/chosen": -1.48823881149292, "logits/rejected": -2.1490554809570312, "logps/chosen": -498.68707275390625, "logps/rejected": -4657.59765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0396759510040283, "rewards/margins": 41.950599670410156, "rewards/rejected": -43.99028396606445, "step": 15430 }, { "epoch": 67.4235807860262, "grad_norm": 9.354175549331054e-06, "learning_rate": 1.4495021141463403e-06, "logits/chosen": -1.574141263961792, "logits/rejected": -2.2738680839538574, "logps/chosen": -481.2293395996094, "logps/rejected": -5442.3447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.933279275894165, "rewards/margins": 49.480995178222656, "rewards/rejected": -51.414276123046875, "step": 15440 }, { "epoch": 67.46724890829694, "grad_norm": 9.538376545067973e-06, "learning_rate": 1.4460453283580244e-06, "logits/chosen": -1.631675362586975, "logits/rejected": -2.3172767162323, "logps/chosen": -451.01837158203125, "logps/rejected": -5739.52197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8952674865722656, "rewards/margins": 52.3637580871582, "rewards/rejected": -54.25902557373047, "step": 15450 }, { "epoch": 67.51091703056768, "grad_norm": 2.117566108319893e-06, "learning_rate": 1.4425909914390311e-06, "logits/chosen": -1.5078445672988892, "logits/rejected": -2.08152437210083, "logps/chosen": -488.79827880859375, "logps/rejected": -4782.0732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0589327812194824, "rewards/margins": 43.09685134887695, "rewards/rejected": -45.155784606933594, "step": 15460 }, { "epoch": 67.55458515283843, "grad_norm": 1.0190536233429299e-05, "learning_rate": 1.4391391114155328e-06, "logits/chosen": -1.4992971420288086, "logits/rejected": -2.1170544624328613, "logps/chosen": -495.10296630859375, "logps/rejected": -4645.3349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0321545600891113, "rewards/margins": 41.89007568359375, "rewards/rejected": -43.92222595214844, "step": 15470 }, { "epoch": 67.59825327510917, "grad_norm": 1.5762111558201405e-07, "learning_rate": 1.4356896963079886e-06, "logits/chosen": -1.5264368057250977, "logits/rejected": -2.1633076667785645, "logps/chosen": -459.93572998046875, "logps/rejected": -5383.037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9130580425262451, "rewards/margins": 48.932220458984375, "rewards/rejected": -50.845279693603516, "step": 15480 }, { "epoch": 67.64192139737992, "grad_norm": 4.3342384663438734e-07, "learning_rate": 1.4322427541311348e-06, "logits/chosen": -1.5188043117523193, "logits/rejected": -2.064915418624878, "logps/chosen": -510.6953125, "logps/rejected": -4682.33203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0316147804260254, "rewards/margins": 42.06149673461914, "rewards/rejected": -44.09311294555664, "step": 15490 }, { "epoch": 67.68558951965065, "grad_norm": 1.1240216497223182e-08, "learning_rate": 1.4287982928939606e-06, "logits/chosen": -1.5110893249511719, "logits/rejected": -2.135260820388794, "logps/chosen": -450.6175842285156, "logps/rejected": -5137.021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.892899751663208, "rewards/margins": 46.610694885253906, "rewards/rejected": -48.503597259521484, "step": 15500 }, { "epoch": 67.7292576419214, "grad_norm": 1.5020305076376522e-07, "learning_rate": 1.4253563205996878e-06, "logits/chosen": -1.4861094951629639, "logits/rejected": -2.1328721046447754, "logps/chosen": -525.5271606445312, "logps/rejected": -4636.1279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0252890586853027, "rewards/margins": 41.82198715209961, "rewards/rejected": -43.8472785949707, "step": 15510 }, { "epoch": 67.77292576419214, "grad_norm": 6.571131358180181e-07, "learning_rate": 1.4219168452457593e-06, "logits/chosen": -1.4787509441375732, "logits/rejected": -2.100289821624756, "logps/chosen": -489.251708984375, "logps/rejected": -4753.12890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.934523582458496, "rewards/margins": 42.78926086425781, "rewards/rejected": -44.72378921508789, "step": 15520 }, { "epoch": 67.81659388646288, "grad_norm": 4.714200266646456e-08, "learning_rate": 1.4184798748238148e-06, "logits/chosen": -1.5492355823516846, "logits/rejected": -2.210399866104126, "logps/chosen": -452.28607177734375, "logps/rejected": -5245.4228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9087520837783813, "rewards/margins": 47.643218994140625, "rewards/rejected": -49.551979064941406, "step": 15530 }, { "epoch": 67.86026200873363, "grad_norm": 5.101699483650278e-07, "learning_rate": 1.4150454173196727e-06, "logits/chosen": -1.5006825923919678, "logits/rejected": -2.1529674530029297, "logps/chosen": -479.95166015625, "logps/rejected": -4767.0986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8676334619522095, "rewards/margins": 43.15973663330078, "rewards/rejected": -45.027374267578125, "step": 15540 }, { "epoch": 67.90393013100437, "grad_norm": 8.94349272632744e-07, "learning_rate": 1.4116134807133124e-06, "logits/chosen": -1.5687611103057861, "logits/rejected": -2.226445198059082, "logps/chosen": -479.16839599609375, "logps/rejected": -4998.15087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.952720284461975, "rewards/margins": 45.20158767700195, "rewards/rejected": -47.15430450439453, "step": 15550 }, { "epoch": 67.9475982532751, "grad_norm": 2.9657541042211026e-08, "learning_rate": 1.4081840729788588e-06, "logits/chosen": -1.569090485572815, "logits/rejected": -2.2177062034606934, "logps/chosen": -486.5943298339844, "logps/rejected": -5596.0302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.132146120071411, "rewards/margins": 50.7408332824707, "rewards/rejected": -52.87298583984375, "step": 15560 }, { "epoch": 67.99126637554585, "grad_norm": 7.060136864283825e-06, "learning_rate": 1.4047572020845577e-06, "logits/chosen": -1.4993005990982056, "logits/rejected": -2.152334213256836, "logps/chosen": -501.2742614746094, "logps/rejected": -4790.33447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0562291145324707, "rewards/margins": 43.16691970825195, "rewards/rejected": -45.22315216064453, "step": 15570 }, { "epoch": 68.0349344978166, "grad_norm": 7.461734586159188e-08, "learning_rate": 1.4013328759927624e-06, "logits/chosen": -1.5225889682769775, "logits/rejected": -2.1547200679779053, "logps/chosen": -490.367431640625, "logps/rejected": -4984.76220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0199759006500244, "rewards/margins": 45.12551498413086, "rewards/rejected": -47.14548873901367, "step": 15580 }, { "epoch": 68.07860262008734, "grad_norm": 1.7194855307385221e-07, "learning_rate": 1.397911102659914e-06, "logits/chosen": -1.559985876083374, "logits/rejected": -2.1783456802368164, "logps/chosen": -483.4305725097656, "logps/rejected": -4916.57666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1917550563812256, "rewards/margins": 44.28253173828125, "rewards/rejected": -46.47428512573242, "step": 15590 }, { "epoch": 68.12227074235808, "grad_norm": 1.823952578986985e-06, "learning_rate": 1.3944918900365192e-06, "logits/chosen": -1.5412489175796509, "logits/rejected": -2.258071184158325, "logps/chosen": -471.818115234375, "logps/rejected": -5553.5849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9216458797454834, "rewards/margins": 50.65459442138672, "rewards/rejected": -52.57623291015625, "step": 15600 }, { "epoch": 68.16593886462883, "grad_norm": 6.208253731706414e-08, "learning_rate": 1.391075246067139e-06, "logits/chosen": -1.5533347129821777, "logits/rejected": -2.2786521911621094, "logps/chosen": -512.73291015625, "logps/rejected": -5670.07177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1095170974731445, "rewards/margins": 51.50244903564453, "rewards/rejected": -53.611968994140625, "step": 15610 }, { "epoch": 68.20960698689956, "grad_norm": 2.3122307930037342e-07, "learning_rate": 1.3876611786903622e-06, "logits/chosen": -1.5567138195037842, "logits/rejected": -2.209900379180908, "logps/chosen": -452.857177734375, "logps/rejected": -5053.36865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9890425205230713, "rewards/margins": 45.74555587768555, "rewards/rejected": -47.73460006713867, "step": 15620 }, { "epoch": 68.2532751091703, "grad_norm": 4.775326260062804e-08, "learning_rate": 1.3842496958387938e-06, "logits/chosen": -1.5277512073516846, "logits/rejected": -2.244945764541626, "logps/chosen": -463.2525939941406, "logps/rejected": -5207.84130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.938711166381836, "rewards/margins": 47.344993591308594, "rewards/rejected": -49.2837028503418, "step": 15630 }, { "epoch": 68.29694323144105, "grad_norm": 7.022467558001995e-08, "learning_rate": 1.3808408054390312e-06, "logits/chosen": -1.5290849208831787, "logits/rejected": -2.1108815670013428, "logps/chosen": -459.84014892578125, "logps/rejected": -5005.73583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.040239095687866, "rewards/margins": 45.16535186767578, "rewards/rejected": -47.20558547973633, "step": 15640 }, { "epoch": 68.3406113537118, "grad_norm": 9.743409610611049e-08, "learning_rate": 1.3774345154116513e-06, "logits/chosen": -1.510505199432373, "logits/rejected": -2.130753755569458, "logps/chosen": -477.93798828125, "logps/rejected": -4786.7587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9470983743667603, "rewards/margins": 43.30025100708008, "rewards/rejected": -45.247352600097656, "step": 15650 }, { "epoch": 68.38427947598254, "grad_norm": 1.3564405165614859e-07, "learning_rate": 1.374030833671185e-06, "logits/chosen": -1.5278292894363403, "logits/rejected": -2.2013049125671387, "logps/chosen": -472.8514709472656, "logps/rejected": -4980.77783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9857717752456665, "rewards/margins": 45.123775482177734, "rewards/rejected": -47.10955047607422, "step": 15660 }, { "epoch": 68.42794759825327, "grad_norm": 2.1449010204505345e-07, "learning_rate": 1.3706297681261065e-06, "logits/chosen": -1.5797131061553955, "logits/rejected": -2.276050090789795, "logps/chosen": -489.8717346191406, "logps/rejected": -5279.93115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.305522918701172, "rewards/margins": 47.61042785644531, "rewards/rejected": -49.91595458984375, "step": 15670 }, { "epoch": 68.47161572052401, "grad_norm": 5.1912870129188494e-08, "learning_rate": 1.3672313266788107e-06, "logits/chosen": -1.5196090936660767, "logits/rejected": -2.144686698913574, "logps/chosen": -478.774658203125, "logps/rejected": -4826.3857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9616029262542725, "rewards/margins": 43.662071228027344, "rewards/rejected": -45.62367630004883, "step": 15680 }, { "epoch": 68.51528384279476, "grad_norm": 6.840256769036056e-08, "learning_rate": 1.3638355172255917e-06, "logits/chosen": -1.5234973430633545, "logits/rejected": -2.1246705055236816, "logps/chosen": -500.66680908203125, "logps/rejected": -5005.91259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0403647422790527, "rewards/margins": 45.23616409301758, "rewards/rejected": -47.276527404785156, "step": 15690 }, { "epoch": 68.5589519650655, "grad_norm": 3.474757484380611e-07, "learning_rate": 1.3604423476566342e-06, "logits/chosen": -1.5087400674819946, "logits/rejected": -2.2277286052703857, "logps/chosen": -518.1661376953125, "logps/rejected": -5265.4970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0382771492004395, "rewards/margins": 47.7477912902832, "rewards/rejected": -49.78607177734375, "step": 15700 }, { "epoch": 68.60262008733625, "grad_norm": 9.634967201603994e-09, "learning_rate": 1.3570518258559829e-06, "logits/chosen": -1.5759931802749634, "logits/rejected": -2.2657055854797363, "logps/chosen": -486.22430419921875, "logps/rejected": -5532.2236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1050217151641846, "rewards/margins": 50.210975646972656, "rewards/rejected": -52.31599807739258, "step": 15710 }, { "epoch": 68.646288209607, "grad_norm": 0.00014338174679152403, "learning_rate": 1.353663959701536e-06, "logits/chosen": -1.518316626548767, "logits/rejected": -2.2362771034240723, "logps/chosen": -484.07879638671875, "logps/rejected": -4975.7666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1023120880126953, "rewards/margins": 44.96372985839844, "rewards/rejected": -47.066036224365234, "step": 15720 }, { "epoch": 68.68995633187772, "grad_norm": 9.84976884456078e-07, "learning_rate": 1.350278757065016e-06, "logits/chosen": -1.5255910158157349, "logits/rejected": -2.192894697189331, "logps/chosen": -482.65423583984375, "logps/rejected": -4834.19091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0076093673706055, "rewards/margins": 43.71172332763672, "rewards/rejected": -45.719329833984375, "step": 15730 }, { "epoch": 68.73362445414847, "grad_norm": 3.6077932699608696e-06, "learning_rate": 1.3468962258119621e-06, "logits/chosen": -1.5469928979873657, "logits/rejected": -2.190136432647705, "logps/chosen": -501.9896545410156, "logps/rejected": -5148.8916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.000239849090576, "rewards/margins": 46.66492462158203, "rewards/rejected": -48.665164947509766, "step": 15740 }, { "epoch": 68.77729257641921, "grad_norm": 7.748908686514317e-08, "learning_rate": 1.3435163738017011e-06, "logits/chosen": -1.5230904817581177, "logits/rejected": -2.1817758083343506, "logps/chosen": -502.883056640625, "logps/rejected": -4977.5556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.10166335105896, "rewards/margins": 44.92555618286133, "rewards/rejected": -47.0272216796875, "step": 15750 }, { "epoch": 68.82096069868996, "grad_norm": 9.404486888605536e-08, "learning_rate": 1.3401392088873388e-06, "logits/chosen": -1.5473096370697021, "logits/rejected": -2.2821757793426514, "logps/chosen": -466.91278076171875, "logps/rejected": -5561.38720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.059546947479248, "rewards/margins": 50.55396270751953, "rewards/rejected": -52.61350631713867, "step": 15760 }, { "epoch": 68.8646288209607, "grad_norm": 1.5302020958926587e-05, "learning_rate": 1.3367647389157367e-06, "logits/chosen": -1.5243319272994995, "logits/rejected": -2.1317334175109863, "logps/chosen": -469.62884521484375, "logps/rejected": -5158.10400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9955003261566162, "rewards/margins": 46.727577209472656, "rewards/rejected": -48.72307586669922, "step": 15770 }, { "epoch": 68.90829694323143, "grad_norm": 6.086724955986142e-06, "learning_rate": 1.3333929717274919e-06, "logits/chosen": -1.568906545639038, "logits/rejected": -2.1797337532043457, "logps/chosen": -467.009033203125, "logps/rejected": -5222.4765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.104729413986206, "rewards/margins": 47.2421760559082, "rewards/rejected": -49.3469123840332, "step": 15780 }, { "epoch": 68.95196506550218, "grad_norm": 2.2265226394535834e-08, "learning_rate": 1.3300239151569251e-06, "logits/chosen": -1.545568823814392, "logits/rejected": -2.2637555599212646, "logps/chosen": -482.5064392089844, "logps/rejected": -5405.12646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.05165433883667, "rewards/margins": 49.09874725341797, "rewards/rejected": -51.1504020690918, "step": 15790 }, { "epoch": 68.99563318777292, "grad_norm": 3.494227317085258e-08, "learning_rate": 1.3266575770320571e-06, "logits/chosen": -1.490154504776001, "logits/rejected": -2.165717840194702, "logps/chosen": -492.59271240234375, "logps/rejected": -4928.1044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0314784049987793, "rewards/margins": 44.47550964355469, "rewards/rejected": -46.50699234008789, "step": 15800 }, { "epoch": 69.03930131004367, "grad_norm": 2.5752728137298263e-07, "learning_rate": 1.3232939651745908e-06, "logits/chosen": -1.5221914052963257, "logits/rejected": -2.2074742317199707, "logps/chosen": -502.8133239746094, "logps/rejected": -5304.1015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1152873039245605, "rewards/margins": 48.05229568481445, "rewards/rejected": -50.16758346557617, "step": 15810 }, { "epoch": 69.08296943231441, "grad_norm": 4.090712193166225e-08, "learning_rate": 1.3199330873998977e-06, "logits/chosen": -1.5215909481048584, "logits/rejected": -2.1981027126312256, "logps/chosen": -463.9056701660156, "logps/rejected": -5116.5166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9573380947113037, "rewards/margins": 46.39630889892578, "rewards/rejected": -48.35364532470703, "step": 15820 }, { "epoch": 69.12663755458516, "grad_norm": 8.72105970566326e-08, "learning_rate": 1.3165749515169968e-06, "logits/chosen": -1.5416090488433838, "logits/rejected": -2.249051570892334, "logps/chosen": -484.3662109375, "logps/rejected": -5293.97607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.010108709335327, "rewards/margins": 48.04289627075195, "rewards/rejected": -50.053001403808594, "step": 15830 }, { "epoch": 69.17030567685589, "grad_norm": 3.722810140142972e-06, "learning_rate": 1.3132195653285318e-06, "logits/chosen": -1.5337131023406982, "logits/rejected": -2.262655735015869, "logps/chosen": -478.590087890625, "logps/rejected": -5416.06103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9290539026260376, "rewards/margins": 49.291908264160156, "rewards/rejected": -51.22095489501953, "step": 15840 }, { "epoch": 69.21397379912663, "grad_norm": 2.597290352351411e-07, "learning_rate": 1.309866936630762e-06, "logits/chosen": -1.5953872203826904, "logits/rejected": -2.2144041061401367, "logps/chosen": -460.7474670410156, "logps/rejected": -5417.73974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9573211669921875, "rewards/margins": 49.06977844238281, "rewards/rejected": -51.027099609375, "step": 15850 }, { "epoch": 69.25764192139738, "grad_norm": 2.7591930411570643e-08, "learning_rate": 1.3065170732135397e-06, "logits/chosen": -1.588028907775879, "logits/rejected": -2.210726261138916, "logps/chosen": -444.21527099609375, "logps/rejected": -5261.9765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9405667781829834, "rewards/margins": 47.74651336669922, "rewards/rejected": -49.68708038330078, "step": 15860 }, { "epoch": 69.30131004366812, "grad_norm": 1.3079750322463414e-07, "learning_rate": 1.3031699828602873e-06, "logits/chosen": -1.5668047666549683, "logits/rejected": -2.2268598079681396, "logps/chosen": -508.1253356933594, "logps/rejected": -5175.21142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1131222248077393, "rewards/margins": 46.904388427734375, "rewards/rejected": -49.017513275146484, "step": 15870 }, { "epoch": 69.34497816593887, "grad_norm": 1.9378554969771512e-06, "learning_rate": 1.2998256733479896e-06, "logits/chosen": -1.5090807676315308, "logits/rejected": -2.100106716156006, "logps/chosen": -487.5975646972656, "logps/rejected": -4791.5859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0099990367889404, "rewards/margins": 43.26713180541992, "rewards/rejected": -45.277130126953125, "step": 15880 }, { "epoch": 69.38864628820961, "grad_norm": 1.601273326572262e-07, "learning_rate": 1.2964841524471672e-06, "logits/chosen": -1.5133397579193115, "logits/rejected": -2.109598159790039, "logps/chosen": -476.46783447265625, "logps/rejected": -4498.41162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0843417644500732, "rewards/margins": 40.4194450378418, "rewards/rejected": -42.5037841796875, "step": 15890 }, { "epoch": 69.43231441048034, "grad_norm": 1.473482302477585e-05, "learning_rate": 1.2931454279218595e-06, "logits/chosen": -1.4939334392547607, "logits/rejected": -2.0512351989746094, "logps/chosen": -497.9315490722656, "logps/rejected": -4440.7021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0166614055633545, "rewards/margins": 39.89679718017578, "rewards/rejected": -41.913455963134766, "step": 15900 }, { "epoch": 69.47598253275109, "grad_norm": 1.6126290433044706e-07, "learning_rate": 1.2898095075296145e-06, "logits/chosen": -1.5459438562393188, "logits/rejected": -2.201902151107788, "logps/chosen": -493.441162109375, "logps/rejected": -5214.29248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9963924884796143, "rewards/margins": 47.217384338378906, "rewards/rejected": -49.21377944946289, "step": 15910 }, { "epoch": 69.51965065502183, "grad_norm": 4.889443022580778e-08, "learning_rate": 1.2864763990214593e-06, "logits/chosen": -1.536858081817627, "logits/rejected": -2.2356173992156982, "logps/chosen": -457.50408935546875, "logps/rejected": -5179.76806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0184714794158936, "rewards/margins": 46.99771499633789, "rewards/rejected": -49.01618576049805, "step": 15920 }, { "epoch": 69.56331877729258, "grad_norm": 9.866519431800796e-08, "learning_rate": 1.2831461101418884e-06, "logits/chosen": -1.5638784170150757, "logits/rejected": -2.2893383502960205, "logps/chosen": -504.8409729003906, "logps/rejected": -5439.5693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1579580307006836, "rewards/margins": 49.33350372314453, "rewards/rejected": -51.49146270751953, "step": 15930 }, { "epoch": 69.60698689956332, "grad_norm": 3.5824871253566894e-07, "learning_rate": 1.2798186486288484e-06, "logits/chosen": -1.4934260845184326, "logits/rejected": -2.1878037452697754, "logps/chosen": -548.7427978515625, "logps/rejected": -4706.3017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1708714962005615, "rewards/margins": 42.337867736816406, "rewards/rejected": -44.50873565673828, "step": 15940 }, { "epoch": 69.65065502183405, "grad_norm": 1.1419054093230225e-07, "learning_rate": 1.2764940222137107e-06, "logits/chosen": -1.5580031871795654, "logits/rejected": -2.211928129196167, "logps/chosen": -462.77056884765625, "logps/rejected": -4917.17578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9763972759246826, "rewards/margins": 44.493003845214844, "rewards/rejected": -46.469398498535156, "step": 15950 }, { "epoch": 69.6943231441048, "grad_norm": 7.849854691894323e-06, "learning_rate": 1.2731722386212648e-06, "logits/chosen": -1.5812078714370728, "logits/rejected": -2.2198657989501953, "logps/chosen": -464.2748107910156, "logps/rejected": -5248.658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.99786376953125, "rewards/margins": 47.56464385986328, "rewards/rejected": -49.5625, "step": 15960 }, { "epoch": 69.73799126637554, "grad_norm": 1.235592384793988e-07, "learning_rate": 1.2698533055696926e-06, "logits/chosen": -1.503072738647461, "logits/rejected": -2.1640615463256836, "logps/chosen": -522.2886962890625, "logps/rejected": -4639.01318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.094058036804199, "rewards/margins": 41.718360900878906, "rewards/rejected": -43.81241989135742, "step": 15970 }, { "epoch": 69.78165938864629, "grad_norm": 9.600642745945602e-08, "learning_rate": 1.2665372307705527e-06, "logits/chosen": -1.5444588661193848, "logits/rejected": -2.146395683288574, "logps/chosen": -466.38934326171875, "logps/rejected": -4960.85107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9912497997283936, "rewards/margins": 44.81147766113281, "rewards/rejected": -46.80272674560547, "step": 15980 }, { "epoch": 69.82532751091703, "grad_norm": 1.1181348819089339e-07, "learning_rate": 1.263224021928761e-06, "logits/chosen": -1.5347137451171875, "logits/rejected": -2.2721405029296875, "logps/chosen": -472.9007263183594, "logps/rejected": -5358.9541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9888139963150024, "rewards/margins": 48.636959075927734, "rewards/rejected": -50.62577819824219, "step": 15990 }, { "epoch": 69.86899563318778, "grad_norm": 3.1363414066847576e-08, "learning_rate": 1.2599136867425776e-06, "logits/chosen": -1.5441443920135498, "logits/rejected": -2.1800918579101562, "logps/chosen": -483.5711975097656, "logps/rejected": -5045.59228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8998903036117554, "rewards/margins": 45.746116638183594, "rewards/rejected": -47.6460075378418, "step": 16000 }, { "epoch": 69.91266375545851, "grad_norm": 1.8974613251651413e-08, "learning_rate": 1.256606232903585e-06, "logits/chosen": -1.5310513973236084, "logits/rejected": -2.139108180999756, "logps/chosen": -475.4330139160156, "logps/rejected": -5052.609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9597305059432983, "rewards/margins": 45.71173095703125, "rewards/rejected": -47.67145919799805, "step": 16010 }, { "epoch": 69.95633187772926, "grad_norm": 1.0508446538184098e-06, "learning_rate": 1.253301668096667e-06, "logits/chosen": -1.5024495124816895, "logits/rejected": -2.219568967819214, "logps/chosen": -490.6515197753906, "logps/rejected": -5158.46240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.065485715866089, "rewards/margins": 46.7554931640625, "rewards/rejected": -48.82098388671875, "step": 16020 }, { "epoch": 70.0, "grad_norm": 1.173731107725095e-06, "learning_rate": 1.2500000000000007e-06, "logits/chosen": -1.6153695583343506, "logits/rejected": -2.2977027893066406, "logps/chosen": -514.8108520507812, "logps/rejected": -5289.8466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2019999027252197, "rewards/margins": 47.770103454589844, "rewards/rejected": -49.97210693359375, "step": 16030 }, { "epoch": 70.04366812227074, "grad_norm": 1.1480547500116498e-07, "learning_rate": 1.246701236285027e-06, "logits/chosen": -1.5574474334716797, "logits/rejected": -2.117877244949341, "logps/chosen": -466.7289123535156, "logps/rejected": -4939.59521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0322632789611816, "rewards/margins": 44.55817794799805, "rewards/rejected": -46.59044647216797, "step": 16040 }, { "epoch": 70.08733624454149, "grad_norm": 1.0920860883373308e-05, "learning_rate": 1.243405384616442e-06, "logits/chosen": -1.536647915840149, "logits/rejected": -2.2608652114868164, "logps/chosen": -480.7679748535156, "logps/rejected": -4930.5244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0052669048309326, "rewards/margins": 44.67133712768555, "rewards/rejected": -46.676605224609375, "step": 16050 }, { "epoch": 70.13100436681222, "grad_norm": 2.769018588288456e-07, "learning_rate": 1.2401124526521763e-06, "logits/chosen": -1.6085678339004517, "logits/rejected": -2.2967002391815186, "logps/chosen": -486.61114501953125, "logps/rejected": -5535.12255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.157395839691162, "rewards/margins": 50.17388916015625, "rewards/rejected": -52.3312873840332, "step": 16060 }, { "epoch": 70.17467248908297, "grad_norm": 1.676211521322002e-07, "learning_rate": 1.2368224480433732e-06, "logits/chosen": -1.5183576345443726, "logits/rejected": -2.1341164112091064, "logps/chosen": -479.7848205566406, "logps/rejected": -4565.6669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.164128065109253, "rewards/margins": 40.958030700683594, "rewards/rejected": -43.122154235839844, "step": 16070 }, { "epoch": 70.21834061135371, "grad_norm": 1.1897065921659765e-06, "learning_rate": 1.233535378434376e-06, "logits/chosen": -1.5020062923431396, "logits/rejected": -2.189990520477295, "logps/chosen": -466.09466552734375, "logps/rejected": -4890.21240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1220414638519287, "rewards/margins": 44.075504302978516, "rewards/rejected": -46.19754409790039, "step": 16080 }, { "epoch": 70.26200873362446, "grad_norm": 1.061525193275324e-08, "learning_rate": 1.2302512514627082e-06, "logits/chosen": -1.580475091934204, "logits/rejected": -2.2972118854522705, "logps/chosen": -439.4266662597656, "logps/rejected": -5587.984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8403675556182861, "rewards/margins": 51.035728454589844, "rewards/rejected": -52.87609100341797, "step": 16090 }, { "epoch": 70.3056768558952, "grad_norm": 1.6931546413062035e-06, "learning_rate": 1.2269700747590586e-06, "logits/chosen": -1.500331163406372, "logits/rejected": -2.184481620788574, "logps/chosen": -489.79437255859375, "logps/rejected": -4627.5791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.012877941131592, "rewards/margins": 41.63590621948242, "rewards/rejected": -43.648780822753906, "step": 16100 }, { "epoch": 70.34934497816595, "grad_norm": 2.3680202712968134e-06, "learning_rate": 1.2236918559472562e-06, "logits/chosen": -1.5974599123001099, "logits/rejected": -2.3117480278015137, "logps/chosen": -458.05609130859375, "logps/rejected": -5816.99755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0118842124938965, "rewards/margins": 52.95477294921875, "rewards/rejected": -54.9666633605957, "step": 16110 }, { "epoch": 70.39301310043668, "grad_norm": 3.0679367186365e-08, "learning_rate": 1.2204166026442624e-06, "logits/chosen": -1.563133716583252, "logits/rejected": -2.2956197261810303, "logps/chosen": -483.7726135253906, "logps/rejected": -5247.9765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9627386331558228, "rewards/margins": 47.69499969482422, "rewards/rejected": -49.657745361328125, "step": 16120 }, { "epoch": 70.43668122270742, "grad_norm": 4.65506173649494e-08, "learning_rate": 1.2171443224601443e-06, "logits/chosen": -1.5769739151000977, "logits/rejected": -2.251103639602661, "logps/chosen": -499.2472229003906, "logps/rejected": -4981.107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1029980182647705, "rewards/margins": 44.931617736816406, "rewards/rejected": -47.03461456298828, "step": 16130 }, { "epoch": 70.48034934497817, "grad_norm": 5.0837845803801076e-08, "learning_rate": 1.2138750229980608e-06, "logits/chosen": -1.5772325992584229, "logits/rejected": -2.3119893074035645, "logps/chosen": -504.11212158203125, "logps/rejected": -5138.32861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.049855947494507, "rewards/margins": 46.4584846496582, "rewards/rejected": -48.508338928222656, "step": 16140 }, { "epoch": 70.52401746724891, "grad_norm": 4.510465020689703e-08, "learning_rate": 1.2106087118542504e-06, "logits/chosen": -1.5586549043655396, "logits/rejected": -2.1295876502990723, "logps/chosen": -489.3207092285156, "logps/rejected": -4818.4697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.146087169647217, "rewards/margins": 43.26765823364258, "rewards/rejected": -45.41374969482422, "step": 16150 }, { "epoch": 70.56768558951966, "grad_norm": 1.6741505774022203e-05, "learning_rate": 1.2073453966180026e-06, "logits/chosen": -1.5472710132598877, "logits/rejected": -2.1955909729003906, "logps/chosen": -523.1842041015625, "logps/rejected": -4986.51904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0608508586883545, "rewards/margins": 45.001766204833984, "rewards/rejected": -47.06261444091797, "step": 16160 }, { "epoch": 70.61135371179039, "grad_norm": 1.2451075169006323e-07, "learning_rate": 1.2040850848716472e-06, "logits/chosen": -1.5496653318405151, "logits/rejected": -2.1962547302246094, "logps/chosen": -475.12957763671875, "logps/rejected": -5034.18798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.095456123352051, "rewards/margins": 45.46575927734375, "rewards/rejected": -47.56121826171875, "step": 16170 }, { "epoch": 70.65502183406113, "grad_norm": 3.7291003874152306e-06, "learning_rate": 1.2008277841905369e-06, "logits/chosen": -1.4933593273162842, "logits/rejected": -2.1131350994110107, "logps/chosen": -509.5865173339844, "logps/rejected": -4513.0556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.069972515106201, "rewards/margins": 40.48382568359375, "rewards/rejected": -42.553794860839844, "step": 16180 }, { "epoch": 70.69868995633188, "grad_norm": 5.281026501099943e-06, "learning_rate": 1.1975735021430279e-06, "logits/chosen": -1.495707631111145, "logits/rejected": -2.0965192317962646, "logps/chosen": -503.56353759765625, "logps/rejected": -5124.8994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.012282609939575, "rewards/margins": 46.40032958984375, "rewards/rejected": -48.412620544433594, "step": 16190 }, { "epoch": 70.74235807860262, "grad_norm": 6.849743304731709e-07, "learning_rate": 1.1943222462904603e-06, "logits/chosen": -1.561124563217163, "logits/rejected": -2.226485252380371, "logps/chosen": -482.1961364746094, "logps/rejected": -5365.35546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.100371837615967, "rewards/margins": 48.466514587402344, "rewards/rejected": -50.5668830871582, "step": 16200 }, { "epoch": 70.78602620087337, "grad_norm": 4.7004036779781833e-08, "learning_rate": 1.191074024187146e-06, "logits/chosen": -1.531817078590393, "logits/rejected": -2.2968461513519287, "logps/chosen": -509.4087829589844, "logps/rejected": -5127.6748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1008594036102295, "rewards/margins": 46.417518615722656, "rewards/rejected": -48.51837921142578, "step": 16210 }, { "epoch": 70.82969432314411, "grad_norm": 1.295876052932375e-07, "learning_rate": 1.187828843380346e-06, "logits/chosen": -1.550122857093811, "logits/rejected": -2.2292544841766357, "logps/chosen": -491.92193603515625, "logps/rejected": -5120.0087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2079391479492188, "rewards/margins": 46.2590446472168, "rewards/rejected": -48.46698760986328, "step": 16220 }, { "epoch": 70.87336244541484, "grad_norm": 3.439889311112018e-08, "learning_rate": 1.1845867114102532e-06, "logits/chosen": -1.556241750717163, "logits/rejected": -2.23750901222229, "logps/chosen": -465.98040771484375, "logps/rejected": -5245.966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.952005386352539, "rewards/margins": 47.61853790283203, "rewards/rejected": -49.5705451965332, "step": 16230 }, { "epoch": 70.91703056768559, "grad_norm": 1.6490942382785476e-07, "learning_rate": 1.1813476358099824e-06, "logits/chosen": -1.6102310419082642, "logits/rejected": -2.3240227699279785, "logps/chosen": -446.0685119628906, "logps/rejected": -5830.05908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9937187433242798, "rewards/margins": 53.18434524536133, "rewards/rejected": -55.17805862426758, "step": 16240 }, { "epoch": 70.96069868995633, "grad_norm": 1.2801374830405127e-07, "learning_rate": 1.1781116241055412e-06, "logits/chosen": -1.5483639240264893, "logits/rejected": -2.214693546295166, "logps/chosen": -471.1805114746094, "logps/rejected": -5261.5771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.00388765335083, "rewards/margins": 47.77763748168945, "rewards/rejected": -49.781524658203125, "step": 16250 }, { "epoch": 71.00436681222708, "grad_norm": 9.913550414634075e-08, "learning_rate": 1.1748786838158192e-06, "logits/chosen": -1.4883290529251099, "logits/rejected": -2.144876003265381, "logps/chosen": -485.03118896484375, "logps/rejected": -4800.35009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0326285362243652, "rewards/margins": 43.411529541015625, "rewards/rejected": -45.444156646728516, "step": 16260 }, { "epoch": 71.04803493449782, "grad_norm": 8.369166082504755e-08, "learning_rate": 1.171648822452572e-06, "logits/chosen": -1.5156681537628174, "logits/rejected": -2.1407735347747803, "logps/chosen": -473.44677734375, "logps/rejected": -4691.986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.054992914199829, "rewards/margins": 42.32884979248047, "rewards/rejected": -44.38383865356445, "step": 16270 }, { "epoch": 71.09170305676857, "grad_norm": 2.393018116161331e-07, "learning_rate": 1.1684220475203986e-06, "logits/chosen": -1.5337169170379639, "logits/rejected": -2.1940183639526367, "logps/chosen": -459.2457580566406, "logps/rejected": -5560.52490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0016391277313232, "rewards/margins": 50.53312301635742, "rewards/rejected": -52.534759521484375, "step": 16280 }, { "epoch": 71.1353711790393, "grad_norm": 1.1321427162368443e-06, "learning_rate": 1.1651983665167285e-06, "logits/chosen": -1.5681085586547852, "logits/rejected": -2.1665968894958496, "logps/chosen": -477.8076171875, "logps/rejected": -4766.5302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0537421703338623, "rewards/margins": 42.9572639465332, "rewards/rejected": -45.01100540161133, "step": 16290 }, { "epoch": 71.17903930131004, "grad_norm": 4.246979483676632e-08, "learning_rate": 1.1619777869318023e-06, "logits/chosen": -1.5229164361953735, "logits/rejected": -2.1675186157226562, "logps/chosen": -489.80035400390625, "logps/rejected": -4980.71630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0262820720672607, "rewards/margins": 45.03007888793945, "rewards/rejected": -47.05636215209961, "step": 16300 }, { "epoch": 71.22270742358079, "grad_norm": 6.402278145878253e-08, "learning_rate": 1.1587603162486525e-06, "logits/chosen": -1.5558388233184814, "logits/rejected": -2.1445984840393066, "logps/chosen": -484.8468322753906, "logps/rejected": -5044.16650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0704610347747803, "rewards/margins": 45.52789306640625, "rewards/rejected": -47.598350524902344, "step": 16310 }, { "epoch": 71.26637554585153, "grad_norm": 1.0223552102590984e-07, "learning_rate": 1.155545961943091e-06, "logits/chosen": -1.5084168910980225, "logits/rejected": -2.2111756801605225, "logps/chosen": -466.00543212890625, "logps/rejected": -5145.560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9966484308242798, "rewards/margins": 46.64103698730469, "rewards/rejected": -48.6376838684082, "step": 16320 }, { "epoch": 71.31004366812228, "grad_norm": 3.584050039681023e-08, "learning_rate": 1.1523347314836857e-06, "logits/chosen": -1.5346872806549072, "logits/rejected": -2.1643881797790527, "logps/chosen": -489.09149169921875, "logps/rejected": -4986.4599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0479607582092285, "rewards/margins": 44.99755096435547, "rewards/rejected": -47.04551315307617, "step": 16330 }, { "epoch": 71.353711790393, "grad_norm": 2.3157233985075986e-06, "learning_rate": 1.1491266323317495e-06, "logits/chosen": -1.533490777015686, "logits/rejected": -2.141753673553467, "logps/chosen": -518.0287475585938, "logps/rejected": -4780.87744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1882474422454834, "rewards/margins": 42.93500518798828, "rewards/rejected": -45.12324905395508, "step": 16340 }, { "epoch": 71.39737991266375, "grad_norm": 5.2860152488265705e-06, "learning_rate": 1.145921671941316e-06, "logits/chosen": -1.5287998914718628, "logits/rejected": -2.2382473945617676, "logps/chosen": -492.8653869628906, "logps/rejected": -5050.30859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1398658752441406, "rewards/margins": 45.51905059814453, "rewards/rejected": -47.658912658691406, "step": 16350 }, { "epoch": 71.4410480349345, "grad_norm": 1.5626969826922028e-08, "learning_rate": 1.14271985775913e-06, "logits/chosen": -1.4868600368499756, "logits/rejected": -2.0969555377960205, "logps/chosen": -518.31591796875, "logps/rejected": -4603.7734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.019700527191162, "rewards/margins": 41.422119140625, "rewards/rejected": -43.44181442260742, "step": 16360 }, { "epoch": 71.48471615720524, "grad_norm": 3.589042901994679e-08, "learning_rate": 1.1395211972246217e-06, "logits/chosen": -1.5351654291152954, "logits/rejected": -2.0643653869628906, "logps/chosen": -497.3377380371094, "logps/rejected": -4738.09375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0119810104370117, "rewards/margins": 42.76134490966797, "rewards/rejected": -44.7733268737793, "step": 16370 }, { "epoch": 71.52838427947599, "grad_norm": 1.6786486863280847e-07, "learning_rate": 1.136325697769897e-06, "logits/chosen": -1.5767955780029297, "logits/rejected": -2.3016598224639893, "logps/chosen": -444.73907470703125, "logps/rejected": -5484.60498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0093188285827637, "rewards/margins": 49.8408203125, "rewards/rejected": -51.85013961791992, "step": 16380 }, { "epoch": 71.57205240174673, "grad_norm": 6.663923381345377e-08, "learning_rate": 1.1331333668197169e-06, "logits/chosen": -1.496690273284912, "logits/rejected": -2.162174701690674, "logps/chosen": -485.3174743652344, "logps/rejected": -4920.57275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0351994037628174, "rewards/margins": 44.42694091796875, "rewards/rejected": -46.46213912963867, "step": 16390 }, { "epoch": 71.61572052401746, "grad_norm": 1.7461019097150232e-07, "learning_rate": 1.1299442117914777e-06, "logits/chosen": -1.5467867851257324, "logits/rejected": -2.235032320022583, "logps/chosen": -506.8978576660156, "logps/rejected": -4947.0009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0757088661193848, "rewards/margins": 44.68586349487305, "rewards/rejected": -46.761573791503906, "step": 16400 }, { "epoch": 71.6593886462882, "grad_norm": 1.2568323811724735e-07, "learning_rate": 1.1267582400951998e-06, "logits/chosen": -1.5171594619750977, "logits/rejected": -2.236454963684082, "logps/chosen": -472.03656005859375, "logps/rejected": -5153.427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.061649799346924, "rewards/margins": 46.64670181274414, "rewards/rejected": -48.70835494995117, "step": 16410 }, { "epoch": 71.70305676855895, "grad_norm": 7.881639950969443e-08, "learning_rate": 1.123575459133504e-06, "logits/chosen": -1.5708162784576416, "logits/rejected": -2.1740305423736572, "logps/chosen": -479.9773864746094, "logps/rejected": -4985.8642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0632271766662598, "rewards/margins": 44.9208984375, "rewards/rejected": -46.98412322998047, "step": 16420 }, { "epoch": 71.7467248908297, "grad_norm": 7.302903580906894e-06, "learning_rate": 1.1203958763016007e-06, "logits/chosen": -1.511374831199646, "logits/rejected": -2.106614589691162, "logps/chosen": -511.8898010253906, "logps/rejected": -4590.5458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1018340587615967, "rewards/margins": 41.297733306884766, "rewards/rejected": -43.399566650390625, "step": 16430 }, { "epoch": 71.79039301310044, "grad_norm": 4.1081410975304984e-05, "learning_rate": 1.1172194989872657e-06, "logits/chosen": -1.604804277420044, "logits/rejected": -2.282560110092163, "logps/chosen": -467.96624755859375, "logps/rejected": -5367.638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.137197256088257, "rewards/margins": 48.55754089355469, "rewards/rejected": -50.694740295410156, "step": 16440 }, { "epoch": 71.83406113537117, "grad_norm": 2.607854882079359e-07, "learning_rate": 1.1140463345708303e-06, "logits/chosen": -1.5298020839691162, "logits/rejected": -2.2233452796936035, "logps/chosen": -490.5372009277344, "logps/rejected": -5074.984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0224807262420654, "rewards/margins": 45.84126663208008, "rewards/rejected": -47.86375045776367, "step": 16450 }, { "epoch": 71.87772925764192, "grad_norm": 2.1848818277685435e-07, "learning_rate": 1.1108763904251573e-06, "logits/chosen": -1.515533685684204, "logits/rejected": -2.072023391723633, "logps/chosen": -489.913330078125, "logps/rejected": -4721.2880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9574816226959229, "rewards/margins": 42.489986419677734, "rewards/rejected": -44.447471618652344, "step": 16460 }, { "epoch": 71.92139737991266, "grad_norm": 2.1880595754863733e-05, "learning_rate": 1.1077096739156304e-06, "logits/chosen": -1.562440037727356, "logits/rejected": -2.2341065406799316, "logps/chosen": -477.2184143066406, "logps/rejected": -5077.9599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9892399311065674, "rewards/margins": 46.01566696166992, "rewards/rejected": -48.00490188598633, "step": 16470 }, { "epoch": 71.9650655021834, "grad_norm": 3.0358397076746647e-06, "learning_rate": 1.1045461924001325e-06, "logits/chosen": -1.5233168601989746, "logits/rejected": -2.1685051918029785, "logps/chosen": -487.5445251464844, "logps/rejected": -4908.4140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.008352756500244, "rewards/margins": 44.4114990234375, "rewards/rejected": -46.41985321044922, "step": 16480 }, { "epoch": 72.00873362445415, "grad_norm": 3.4880806917425844e-07, "learning_rate": 1.1013859532290283e-06, "logits/chosen": -1.5212476253509521, "logits/rejected": -2.172212600708008, "logps/chosen": -486.4403381347656, "logps/rejected": -4911.5263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9964815378189087, "rewards/margins": 44.36435317993164, "rewards/rejected": -46.36083984375, "step": 16490 }, { "epoch": 72.0524017467249, "grad_norm": 4.798698204676441e-07, "learning_rate": 1.0982289637451523e-06, "logits/chosen": -1.5741153955459595, "logits/rejected": -2.2823588848114014, "logps/chosen": -445.96978759765625, "logps/rejected": -5517.240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.021470069885254, "rewards/margins": 50.210350036621094, "rewards/rejected": -52.2318229675293, "step": 16500 }, { "epoch": 72.09606986899563, "grad_norm": 5.3766593637331095e-08, "learning_rate": 1.0950752312837846e-06, "logits/chosen": -1.502290964126587, "logits/rejected": -2.182279109954834, "logps/chosen": -467.2933044433594, "logps/rejected": -4791.654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9198226928710938, "rewards/margins": 43.333580017089844, "rewards/rejected": -45.2534065246582, "step": 16510 }, { "epoch": 72.13973799126637, "grad_norm": 1.35602341052343e-07, "learning_rate": 1.0919247631726412e-06, "logits/chosen": -1.5525505542755127, "logits/rejected": -2.3396525382995605, "logps/chosen": -478.382080078125, "logps/rejected": -5465.91650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9595963954925537, "rewards/margins": 49.80097198486328, "rewards/rejected": -51.76055908203125, "step": 16520 }, { "epoch": 72.18340611353712, "grad_norm": 5.140697936944069e-07, "learning_rate": 1.08877756673185e-06, "logits/chosen": -1.490734338760376, "logits/rejected": -2.1376917362213135, "logps/chosen": -511.835205078125, "logps/rejected": -4574.94091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0428943634033203, "rewards/margins": 41.22228240966797, "rewards/rejected": -43.265174865722656, "step": 16530 }, { "epoch": 72.22707423580786, "grad_norm": 3.02139825189109e-06, "learning_rate": 1.0856336492739408e-06, "logits/chosen": -1.5555689334869385, "logits/rejected": -2.2163238525390625, "logps/chosen": -454.83990478515625, "logps/rejected": -5208.892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9664701223373413, "rewards/margins": 47.25320816040039, "rewards/rejected": -49.21968078613281, "step": 16540 }, { "epoch": 72.2707423580786, "grad_norm": 3.8166578917136516e-06, "learning_rate": 1.08249301810382e-06, "logits/chosen": -1.5684826374053955, "logits/rejected": -2.311370611190796, "logps/chosen": -480.8885192871094, "logps/rejected": -5035.27978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0528411865234375, "rewards/margins": 45.61363220214844, "rewards/rejected": -47.666473388671875, "step": 16550 }, { "epoch": 72.31441048034935, "grad_norm": 1.5180855730965766e-07, "learning_rate": 1.079355680518763e-06, "logits/chosen": -1.5479731559753418, "logits/rejected": -2.195298194885254, "logps/chosen": -474.92718505859375, "logps/rejected": -5215.7236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.150768756866455, "rewards/margins": 47.171775817871094, "rewards/rejected": -49.322547912597656, "step": 16560 }, { "epoch": 72.35807860262008, "grad_norm": 4.440451170347264e-08, "learning_rate": 1.0762216438083905e-06, "logits/chosen": -1.5604922771453857, "logits/rejected": -2.2988672256469727, "logps/chosen": -482.82183837890625, "logps/rejected": -5420.82275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0849387645721436, "rewards/margins": 49.19118118286133, "rewards/rejected": -51.27611541748047, "step": 16570 }, { "epoch": 72.40174672489083, "grad_norm": 3.719937889590742e-08, "learning_rate": 1.0730909152546521e-06, "logits/chosen": -1.5721595287322998, "logits/rejected": -2.3317267894744873, "logps/chosen": -475.1343688964844, "logps/rejected": -5380.58740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0080864429473877, "rewards/margins": 48.8605842590332, "rewards/rejected": -50.86866760253906, "step": 16580 }, { "epoch": 72.44541484716157, "grad_norm": 0.00014066935425243165, "learning_rate": 1.0699635021318139e-06, "logits/chosen": -1.5389584302902222, "logits/rejected": -2.1598854064941406, "logps/chosen": -498.37078857421875, "logps/rejected": -4979.130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.038012981414795, "rewards/margins": 45.028385162353516, "rewards/rejected": -47.0663948059082, "step": 16590 }, { "epoch": 72.48908296943232, "grad_norm": 4.8806263293872974e-08, "learning_rate": 1.0668394117064365e-06, "logits/chosen": -1.5225152969360352, "logits/rejected": -2.201777935028076, "logps/chosen": -504.90087890625, "logps/rejected": -5140.04150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.061842918395996, "rewards/margins": 46.5479621887207, "rewards/rejected": -48.60980224609375, "step": 16600 }, { "epoch": 72.53275109170306, "grad_norm": 4.949225673411107e-08, "learning_rate": 1.063718651237359e-06, "logits/chosen": -1.523221731185913, "logits/rejected": -2.195481061935425, "logps/chosen": -509.58660888671875, "logps/rejected": -4947.921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1522367000579834, "rewards/margins": 44.566070556640625, "rewards/rejected": -46.71830749511719, "step": 16610 }, { "epoch": 72.5764192139738, "grad_norm": 3.6469601881815945e-06, "learning_rate": 1.060601227975688e-06, "logits/chosen": -1.5376088619232178, "logits/rejected": -2.1855294704437256, "logps/chosen": -512.0128173828125, "logps/rejected": -5077.1650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0279464721679688, "rewards/margins": 45.87759780883789, "rewards/rejected": -47.905540466308594, "step": 16620 }, { "epoch": 72.62008733624454, "grad_norm": 1.8443340594354213e-05, "learning_rate": 1.0574871491647724e-06, "logits/chosen": -1.5710272789001465, "logits/rejected": -2.266608715057373, "logps/chosen": -458.9325256347656, "logps/rejected": -5447.21240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0944747924804688, "rewards/margins": 49.414730072021484, "rewards/rejected": -51.50920486450195, "step": 16630 }, { "epoch": 72.66375545851528, "grad_norm": 2.1745249734246595e-06, "learning_rate": 1.054376422040189e-06, "logits/chosen": -1.5870659351348877, "logits/rejected": -2.2579185962677, "logps/chosen": -468.8096618652344, "logps/rejected": -5500.19140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0674800872802734, "rewards/margins": 49.95899200439453, "rewards/rejected": -52.02647018432617, "step": 16640 }, { "epoch": 72.70742358078603, "grad_norm": 2.383697308946751e-07, "learning_rate": 1.0512690538297313e-06, "logits/chosen": -1.5219838619232178, "logits/rejected": -2.099872589111328, "logps/chosen": -492.32891845703125, "logps/rejected": -4741.650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0840208530426025, "rewards/margins": 42.69744873046875, "rewards/rejected": -44.78146743774414, "step": 16650 }, { "epoch": 72.75109170305677, "grad_norm": 9.390123158966195e-07, "learning_rate": 1.0481650517533843e-06, "logits/chosen": -1.5530263185501099, "logits/rejected": -2.1811463832855225, "logps/chosen": -483.46337890625, "logps/rejected": -5003.2236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0100326538085938, "rewards/margins": 45.224185943603516, "rewards/rejected": -47.234214782714844, "step": 16660 }, { "epoch": 72.79475982532752, "grad_norm": 1.565873874333261e-07, "learning_rate": 1.0450644230233137e-06, "logits/chosen": -1.5059921741485596, "logits/rejected": -2.0440640449523926, "logps/chosen": -529.6397705078125, "logps/rejected": -4294.41259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1008195877075195, "rewards/margins": 38.37276077270508, "rewards/rejected": -40.47357940673828, "step": 16670 }, { "epoch": 72.83842794759825, "grad_norm": 2.7437646255516326e-08, "learning_rate": 1.0419671748438486e-06, "logits/chosen": -1.5773427486419678, "logits/rejected": -2.224491596221924, "logps/chosen": -480.4296875, "logps/rejected": -5302.6748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.046046257019043, "rewards/margins": 48.04249572753906, "rewards/rejected": -50.08854293823242, "step": 16680 }, { "epoch": 72.882096069869, "grad_norm": 9.716599347866023e-07, "learning_rate": 1.0388733144114605e-06, "logits/chosen": -1.5577988624572754, "logits/rejected": -2.206200122833252, "logps/chosen": -478.3238220214844, "logps/rejected": -4997.1826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.042773485183716, "rewards/margins": 45.09539031982422, "rewards/rejected": -47.13816452026367, "step": 16690 }, { "epoch": 72.92576419213974, "grad_norm": 5.2350850429368335e-06, "learning_rate": 1.035782848914749e-06, "logits/chosen": -1.5212233066558838, "logits/rejected": -2.1575417518615723, "logps/chosen": -517.7586669921875, "logps/rejected": -4829.38623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.094076633453369, "rewards/margins": 43.48552322387695, "rewards/rejected": -45.5796012878418, "step": 16700 }, { "epoch": 72.96943231441048, "grad_norm": 7.5241202633633e-07, "learning_rate": 1.0326957855344305e-06, "logits/chosen": -1.5025551319122314, "logits/rejected": -2.1193535327911377, "logps/chosen": -493.70904541015625, "logps/rejected": -4593.966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.085026979446411, "rewards/margins": 41.303218841552734, "rewards/rejected": -43.388248443603516, "step": 16710 }, { "epoch": 73.01310043668123, "grad_norm": 2.9778266560320572e-08, "learning_rate": 1.029612131443312e-06, "logits/chosen": -1.5335355997085571, "logits/rejected": -2.1485047340393066, "logps/chosen": -480.8843688964844, "logps/rejected": -4880.46826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0383479595184326, "rewards/margins": 44.06157302856445, "rewards/rejected": -46.099918365478516, "step": 16720 }, { "epoch": 73.05676855895196, "grad_norm": 1.2782211272519163e-07, "learning_rate": 1.026531893806279e-06, "logits/chosen": -1.4968953132629395, "logits/rejected": -2.0585947036743164, "logps/chosen": -506.4622497558594, "logps/rejected": -4704.24853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.066641330718994, "rewards/margins": 42.30229568481445, "rewards/rejected": -44.368934631347656, "step": 16730 }, { "epoch": 73.1004366812227, "grad_norm": 2.1026980563677815e-08, "learning_rate": 1.0234550797802823e-06, "logits/chosen": -1.57510507106781, "logits/rejected": -2.2555902004241943, "logps/chosen": -468.58416748046875, "logps/rejected": -5429.8291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0653038024902344, "rewards/margins": 49.34462356567383, "rewards/rejected": -51.40993118286133, "step": 16740 }, { "epoch": 73.14410480349345, "grad_norm": 6.719062726382607e-07, "learning_rate": 1.0203816965143134e-06, "logits/chosen": -1.5015151500701904, "logits/rejected": -2.1418333053588867, "logps/chosen": -484.6119079589844, "logps/rejected": -4597.7041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9991273880004883, "rewards/margins": 41.40471267700195, "rewards/rejected": -43.403839111328125, "step": 16750 }, { "epoch": 73.1877729257642, "grad_norm": 9.770220329228416e-08, "learning_rate": 1.0173117511493962e-06, "logits/chosen": -1.5330771207809448, "logits/rejected": -2.1983652114868164, "logps/chosen": -505.0390625, "logps/rejected": -4820.998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.007676362991333, "rewards/margins": 43.499168395996094, "rewards/rejected": -45.50684356689453, "step": 16760 }, { "epoch": 73.23144104803494, "grad_norm": 9.437097809361315e-08, "learning_rate": 1.0142452508185656e-06, "logits/chosen": -1.5491389036178589, "logits/rejected": -2.260786533355713, "logps/chosen": -476.7389221191406, "logps/rejected": -4974.8037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0469837188720703, "rewards/margins": 44.967716217041016, "rewards/rejected": -47.01470184326172, "step": 16770 }, { "epoch": 73.27510917030568, "grad_norm": 1.4994466062295413e-07, "learning_rate": 1.0111822026468515e-06, "logits/chosen": -1.5637379884719849, "logits/rejected": -2.301193952560425, "logps/chosen": -495.3892517089844, "logps/rejected": -5360.4853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.167275905609131, "rewards/margins": 48.55290985107422, "rewards/rejected": -50.720184326171875, "step": 16780 }, { "epoch": 73.31877729257641, "grad_norm": 5.6338987783957585e-08, "learning_rate": 1.008122613751261e-06, "logits/chosen": -1.5316059589385986, "logits/rejected": -2.173288583755493, "logps/chosen": -506.9043884277344, "logps/rejected": -4614.53173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.132206678390503, "rewards/margins": 41.54144287109375, "rewards/rejected": -43.67365264892578, "step": 16790 }, { "epoch": 73.36244541484716, "grad_norm": 8.778110186374214e-07, "learning_rate": 1.0050664912407666e-06, "logits/chosen": -1.5100481510162354, "logits/rejected": -2.1551711559295654, "logps/chosen": -519.6949462890625, "logps/rejected": -4788.56640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.188960552215576, "rewards/margins": 43.063804626464844, "rewards/rejected": -45.252769470214844, "step": 16800 }, { "epoch": 73.4061135371179, "grad_norm": 2.329291606015025e-08, "learning_rate": 1.002013842216286e-06, "logits/chosen": -1.5482120513916016, "logits/rejected": -2.2133257389068604, "logps/chosen": -481.949951171875, "logps/rejected": -4968.2548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9693500995635986, "rewards/margins": 45.039207458496094, "rewards/rejected": -47.0085563659668, "step": 16810 }, { "epoch": 73.44978165938865, "grad_norm": 1.8400768510808756e-07, "learning_rate": 9.989646737706638e-07, "logits/chosen": -1.6246973276138306, "logits/rejected": -2.3667826652526855, "logps/chosen": -442.7994079589844, "logps/rejected": -5814.5810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9984734058380127, "rewards/margins": 53.04393768310547, "rewards/rejected": -55.04240036010742, "step": 16820 }, { "epoch": 73.4934497816594, "grad_norm": 8.368469316242453e-06, "learning_rate": 9.959189929886608e-07, "logits/chosen": -1.5449755191802979, "logits/rejected": -2.244891405105591, "logps/chosen": -492.9447326660156, "logps/rejected": -5278.64306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0252983570098877, "rewards/margins": 47.944244384765625, "rewards/rejected": -49.96954345703125, "step": 16830 }, { "epoch": 73.53711790393012, "grad_norm": 6.047075486693409e-08, "learning_rate": 9.928768069469314e-07, "logits/chosen": -1.5762611627578735, "logits/rejected": -2.276527166366577, "logps/chosen": -468.2183532714844, "logps/rejected": -5320.1318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.092625617980957, "rewards/margins": 48.20329666137695, "rewards/rejected": -50.295928955078125, "step": 16840 }, { "epoch": 73.58078602620087, "grad_norm": 1.5166650741333169e-07, "learning_rate": 9.898381227140115e-07, "logits/chosen": -1.5770690441131592, "logits/rejected": -2.3350601196289062, "logps/chosen": -452.5640563964844, "logps/rejected": -5394.9755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0086669921875, "rewards/margins": 49.019264221191406, "rewards/rejected": -51.027931213378906, "step": 16850 }, { "epoch": 73.62445414847161, "grad_norm": 1.364043026238298e-05, "learning_rate": 9.868029473503015e-07, "logits/chosen": -1.5185050964355469, "logits/rejected": -2.21333646774292, "logps/chosen": -513.3566284179688, "logps/rejected": -5211.62841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0524258613586426, "rewards/margins": 47.22362518310547, "rewards/rejected": -49.27605056762695, "step": 16860 }, { "epoch": 73.66812227074236, "grad_norm": 3.5040143431064885e-08, "learning_rate": 9.837712879080464e-07, "logits/chosen": -1.5284394025802612, "logits/rejected": -2.187720775604248, "logps/chosen": -485.44732666015625, "logps/rejected": -5096.998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9860429763793945, "rewards/margins": 46.26398468017578, "rewards/rejected": -48.25002670288086, "step": 16870 }, { "epoch": 73.7117903930131, "grad_norm": 6.791178996762907e-08, "learning_rate": 9.807431514313227e-07, "logits/chosen": -1.527571678161621, "logits/rejected": -2.1810333728790283, "logps/chosen": -486.04022216796875, "logps/rejected": -5178.9072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.022939443588257, "rewards/margins": 46.898704528808594, "rewards/rejected": -48.92164993286133, "step": 16880 }, { "epoch": 73.75545851528385, "grad_norm": 2.6112935130717807e-07, "learning_rate": 9.777185449560216e-07, "logits/chosen": -1.5576575994491577, "logits/rejected": -2.261733055114746, "logps/chosen": -487.524658203125, "logps/rejected": -5170.74609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0627036094665527, "rewards/margins": 46.81733703613281, "rewards/rejected": -48.880035400390625, "step": 16890 }, { "epoch": 73.79912663755458, "grad_norm": 8.329211463997191e-08, "learning_rate": 9.746974755098346e-07, "logits/chosen": -1.5039806365966797, "logits/rejected": -2.269275188446045, "logps/chosen": -502.991455078125, "logps/rejected": -5056.08349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1605467796325684, "rewards/margins": 45.64634323120117, "rewards/rejected": -47.806888580322266, "step": 16900 }, { "epoch": 73.84279475982532, "grad_norm": 4.145915919882176e-08, "learning_rate": 9.7167995011223e-07, "logits/chosen": -1.555320143699646, "logits/rejected": -2.2618751525878906, "logps/chosen": -503.83465576171875, "logps/rejected": -5183.63427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2122137546539307, "rewards/margins": 46.82286071777344, "rewards/rejected": -49.03507614135742, "step": 16910 }, { "epoch": 73.88646288209607, "grad_norm": 2.634688967159713e-06, "learning_rate": 9.686659757744467e-07, "logits/chosen": -1.5734977722167969, "logits/rejected": -2.203158140182495, "logps/chosen": -494.7818298339844, "logps/rejected": -5068.87744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0963282585144043, "rewards/margins": 45.854679107666016, "rewards/rejected": -47.951011657714844, "step": 16920 }, { "epoch": 73.93013100436681, "grad_norm": 3.564831788766981e-08, "learning_rate": 9.656555594994688e-07, "logits/chosen": -1.574498176574707, "logits/rejected": -2.3353896141052246, "logps/chosen": -494.7876892089844, "logps/rejected": -5716.05712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9992549419403076, "rewards/margins": 52.173095703125, "rewards/rejected": -54.17235565185547, "step": 16930 }, { "epoch": 73.97379912663756, "grad_norm": 1.7252055112835262e-05, "learning_rate": 9.626487082820132e-07, "logits/chosen": -1.4823716878890991, "logits/rejected": -2.137070655822754, "logps/chosen": -489.3260803222656, "logps/rejected": -4658.84033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.098966598510742, "rewards/margins": 41.946372985839844, "rewards/rejected": -44.04533767700195, "step": 16940 }, { "epoch": 74.0174672489083, "grad_norm": 1.6306007985908494e-05, "learning_rate": 9.59645429108518e-07, "logits/chosen": -1.5403363704681396, "logits/rejected": -2.2288708686828613, "logps/chosen": -496.7203063964844, "logps/rejected": -5038.87109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0444672107696533, "rewards/margins": 45.69407272338867, "rewards/rejected": -47.73854446411133, "step": 16950 }, { "epoch": 74.06113537117903, "grad_norm": 2.325563657283502e-07, "learning_rate": 9.56645728957117e-07, "logits/chosen": -1.5628986358642578, "logits/rejected": -2.2645468711853027, "logps/chosen": -483.3057556152344, "logps/rejected": -5093.02392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1190035343170166, "rewards/margins": 45.972755432128906, "rewards/rejected": -48.091758728027344, "step": 16960 }, { "epoch": 74.10480349344978, "grad_norm": 1.3583463177245994e-07, "learning_rate": 9.536496147976284e-07, "logits/chosen": -1.609283685684204, "logits/rejected": -2.3218226432800293, "logps/chosen": -466.75006103515625, "logps/rejected": -5636.6005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0413777828216553, "rewards/margins": 51.179908752441406, "rewards/rejected": -53.221290588378906, "step": 16970 }, { "epoch": 74.14847161572052, "grad_norm": 1.0514007204985844e-06, "learning_rate": 9.506570935915418e-07, "logits/chosen": -1.5199064016342163, "logits/rejected": -2.20080828666687, "logps/chosen": -502.71307373046875, "logps/rejected": -5002.3330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0342767238616943, "rewards/margins": 45.29444122314453, "rewards/rejected": -47.32871627807617, "step": 16980 }, { "epoch": 74.19213973799127, "grad_norm": 5.115311381631443e-07, "learning_rate": 9.47668172291994e-07, "logits/chosen": -1.5531738996505737, "logits/rejected": -2.305696725845337, "logps/chosen": -469.22698974609375, "logps/rejected": -5489.51416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0315957069396973, "rewards/margins": 49.89432907104492, "rewards/rejected": -51.92592239379883, "step": 16990 }, { "epoch": 74.23580786026201, "grad_norm": 1.8429122398812577e-07, "learning_rate": 9.446828578437603e-07, "logits/chosen": -1.5437653064727783, "logits/rejected": -2.2266221046447754, "logps/chosen": -511.6173400878906, "logps/rejected": -4939.1259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1181771755218506, "rewards/margins": 44.596168518066406, "rewards/rejected": -46.7143440246582, "step": 17000 }, { "epoch": 74.27947598253274, "grad_norm": 1.2350567481927236e-08, "learning_rate": 9.417011571832363e-07, "logits/chosen": -1.5862239599227905, "logits/rejected": -2.2976508140563965, "logps/chosen": -471.7152404785156, "logps/rejected": -5333.79150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0671281814575195, "rewards/margins": 48.249114990234375, "rewards/rejected": -50.316246032714844, "step": 17010 }, { "epoch": 74.32314410480349, "grad_norm": 2.53588420881991e-07, "learning_rate": 9.387230772384173e-07, "logits/chosen": -1.5595158338546753, "logits/rejected": -2.215867280960083, "logps/chosen": -480.0438537597656, "logps/rejected": -5190.2705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1089975833892822, "rewards/margins": 46.973350524902344, "rewards/rejected": -49.08234786987305, "step": 17020 }, { "epoch": 74.36681222707423, "grad_norm": 4.774939119755259e-08, "learning_rate": 9.357486249288891e-07, "logits/chosen": -1.5200501680374146, "logits/rejected": -2.1823315620422363, "logps/chosen": -483.0244140625, "logps/rejected": -4518.01806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.059532880783081, "rewards/margins": 40.60491180419922, "rewards/rejected": -42.66444778442383, "step": 17030 }, { "epoch": 74.41048034934498, "grad_norm": 5.055835254290544e-08, "learning_rate": 9.32777807165808e-07, "logits/chosen": -1.5069867372512817, "logits/rejected": -2.1948838233947754, "logps/chosen": -493.7545471191406, "logps/rejected": -4877.58984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0879616737365723, "rewards/margins": 43.977718353271484, "rewards/rejected": -46.065677642822266, "step": 17040 }, { "epoch": 74.45414847161572, "grad_norm": 1.0649788286038122e-07, "learning_rate": 9.298106308518847e-07, "logits/chosen": -1.579777717590332, "logits/rejected": -2.2626891136169434, "logps/chosen": -511.61468505859375, "logps/rejected": -5293.45751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1754941940307617, "rewards/margins": 47.936866760253906, "rewards/rejected": -50.11235809326172, "step": 17050 }, { "epoch": 74.49781659388647, "grad_norm": 8.157634300233997e-08, "learning_rate": 9.268471028813683e-07, "logits/chosen": -1.474181056022644, "logits/rejected": -2.121483564376831, "logps/chosen": -505.98724365234375, "logps/rejected": -4629.32421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2503886222839355, "rewards/margins": 41.56143569946289, "rewards/rejected": -43.81182098388672, "step": 17060 }, { "epoch": 74.5414847161572, "grad_norm": 2.586845343606097e-07, "learning_rate": 9.238872301400331e-07, "logits/chosen": -1.5212739706039429, "logits/rejected": -2.1798856258392334, "logps/chosen": -489.8119201660156, "logps/rejected": -4959.0185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0627570152282715, "rewards/margins": 44.82457733154297, "rewards/rejected": -46.887325286865234, "step": 17070 }, { "epoch": 74.58515283842794, "grad_norm": 5.049721286935199e-08, "learning_rate": 9.209310195051582e-07, "logits/chosen": -1.5673414468765259, "logits/rejected": -2.220824718475342, "logps/chosen": -478.6371154785156, "logps/rejected": -5217.8681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9731245040893555, "rewards/margins": 47.331443786621094, "rewards/rejected": -49.304561614990234, "step": 17080 }, { "epoch": 74.62882096069869, "grad_norm": 1.9615690931283244e-07, "learning_rate": 9.179784778455153e-07, "logits/chosen": -1.5996406078338623, "logits/rejected": -2.32774019241333, "logps/chosen": -474.42974853515625, "logps/rejected": -5493.2119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0883243083953857, "rewards/margins": 49.92566680908203, "rewards/rejected": -52.01399612426758, "step": 17090 }, { "epoch": 74.67248908296943, "grad_norm": 1.545656832452233e-07, "learning_rate": 9.150296120213517e-07, "logits/chosen": -1.5516612529754639, "logits/rejected": -2.243556022644043, "logps/chosen": -480.7969665527344, "logps/rejected": -4963.67041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0981602668762207, "rewards/margins": 44.87146759033203, "rewards/rejected": -46.96962356567383, "step": 17100 }, { "epoch": 74.71615720524018, "grad_norm": 1.9747627482655195e-06, "learning_rate": 9.120844288843714e-07, "logits/chosen": -1.5664869546890259, "logits/rejected": -2.309048891067505, "logps/chosen": -477.70263671875, "logps/rejected": -5553.9072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0345587730407715, "rewards/margins": 50.474510192871094, "rewards/rejected": -52.509071350097656, "step": 17110 }, { "epoch": 74.75982532751091, "grad_norm": 6.507801333314064e-07, "learning_rate": 9.091429352777245e-07, "logits/chosen": -1.4980403184890747, "logits/rejected": -2.1431727409362793, "logps/chosen": -490.21337890625, "logps/rejected": -4866.79638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9604694843292236, "rewards/margins": 44.06129455566406, "rewards/rejected": -46.021759033203125, "step": 17120 }, { "epoch": 74.80349344978166, "grad_norm": 4.65935109340129e-08, "learning_rate": 9.062051380359857e-07, "logits/chosen": -1.6102354526519775, "logits/rejected": -2.23724627494812, "logps/chosen": -456.345703125, "logps/rejected": -5299.7568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1029036045074463, "rewards/margins": 47.93202209472656, "rewards/rejected": -50.03492736816406, "step": 17130 }, { "epoch": 74.8471615720524, "grad_norm": 3.381575908999329e-08, "learning_rate": 9.032710439851444e-07, "logits/chosen": -1.5778038501739502, "logits/rejected": -2.3487296104431152, "logps/chosen": -488.81927490234375, "logps/rejected": -5824.9921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.014099597930908, "rewards/margins": 53.16375732421875, "rewards/rejected": -55.17784881591797, "step": 17140 }, { "epoch": 74.89082969432314, "grad_norm": 2.2965866538760295e-08, "learning_rate": 9.003406599425821e-07, "logits/chosen": -1.5368404388427734, "logits/rejected": -2.1772828102111816, "logps/chosen": -508.5589294433594, "logps/rejected": -4836.45458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.167860269546509, "rewards/margins": 43.49061584472656, "rewards/rejected": -45.658470153808594, "step": 17150 }, { "epoch": 74.93449781659389, "grad_norm": 2.9786111130113233e-06, "learning_rate": 8.974139927170633e-07, "logits/chosen": -1.5937087535858154, "logits/rejected": -2.2758803367614746, "logps/chosen": -470.43096923828125, "logps/rejected": -5392.91162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.02254056930542, "rewards/margins": 48.98891067504883, "rewards/rejected": -51.01144790649414, "step": 17160 }, { "epoch": 74.97816593886463, "grad_norm": 1.43772571086465e-07, "learning_rate": 8.944910491087136e-07, "logits/chosen": -1.5591139793395996, "logits/rejected": -2.2547333240509033, "logps/chosen": -469.7919006347656, "logps/rejected": -5141.32861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9389750957489014, "rewards/margins": 46.703224182128906, "rewards/rejected": -48.6422004699707, "step": 17170 }, { "epoch": 75.02183406113537, "grad_norm": 1.3010927625989092e-07, "learning_rate": 8.915718359090086e-07, "logits/chosen": -1.567814826965332, "logits/rejected": -2.245777130126953, "logps/chosen": -492.8226013183594, "logps/rejected": -5044.29150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.222503185272217, "rewards/margins": 45.44511032104492, "rewards/rejected": -47.6676139831543, "step": 17180 }, { "epoch": 75.06550218340611, "grad_norm": 1.851744935824256e-06, "learning_rate": 8.886563599007572e-07, "logits/chosen": -1.5588715076446533, "logits/rejected": -2.2094674110412598, "logps/chosen": -478.68621826171875, "logps/rejected": -5046.62548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.044840097427368, "rewards/margins": 45.61030578613281, "rewards/rejected": -47.655147552490234, "step": 17190 }, { "epoch": 75.10917030567686, "grad_norm": 3.9250490703535586e-07, "learning_rate": 8.857446278580812e-07, "logits/chosen": -1.529370665550232, "logits/rejected": -2.1345367431640625, "logps/chosen": -495.1468811035156, "logps/rejected": -4580.86767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0637903213500977, "rewards/margins": 41.091407775878906, "rewards/rejected": -43.15519714355469, "step": 17200 }, { "epoch": 75.1528384279476, "grad_norm": 3.266241801517723e-07, "learning_rate": 8.828366465464078e-07, "logits/chosen": -1.559741735458374, "logits/rejected": -2.2468273639678955, "logps/chosen": -485.8902282714844, "logps/rejected": -5137.67822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1243672370910645, "rewards/margins": 46.400726318359375, "rewards/rejected": -48.52509689331055, "step": 17210 }, { "epoch": 75.19650655021834, "grad_norm": 1.928227825200989e-08, "learning_rate": 8.799324227224448e-07, "logits/chosen": -1.5383577346801758, "logits/rejected": -2.117037534713745, "logps/chosen": -495.01007080078125, "logps/rejected": -4827.0859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.162529230117798, "rewards/margins": 43.35430145263672, "rewards/rejected": -45.5168342590332, "step": 17220 }, { "epoch": 75.24017467248909, "grad_norm": 1.0716339645147143e-07, "learning_rate": 8.770319631341745e-07, "logits/chosen": -1.5488598346710205, "logits/rejected": -2.207184076309204, "logps/chosen": -512.9387817382812, "logps/rejected": -4905.412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1804120540618896, "rewards/margins": 44.2103385925293, "rewards/rejected": -46.39075469970703, "step": 17230 }, { "epoch": 75.28384279475982, "grad_norm": 4.065392870232311e-06, "learning_rate": 8.741352745208276e-07, "logits/chosen": -1.6046186685562134, "logits/rejected": -2.3743062019348145, "logps/chosen": -454.48077392578125, "logps/rejected": -5773.47900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0248522758483887, "rewards/margins": 52.67908477783203, "rewards/rejected": -54.703941345214844, "step": 17240 }, { "epoch": 75.32751091703057, "grad_norm": 1.0365434310598556e-07, "learning_rate": 8.712423636128777e-07, "logits/chosen": -1.5834473371505737, "logits/rejected": -2.2455880641937256, "logps/chosen": -478.2728576660156, "logps/rejected": -5249.9521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1466727256774902, "rewards/margins": 47.48479461669922, "rewards/rejected": -49.631465911865234, "step": 17250 }, { "epoch": 75.37117903930131, "grad_norm": 7.2896442923886126e-06, "learning_rate": 8.683532371320175e-07, "logits/chosen": -1.5694561004638672, "logits/rejected": -2.2985520362854004, "logps/chosen": -498.90655517578125, "logps/rejected": -5258.9482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0485904216766357, "rewards/margins": 47.635765075683594, "rewards/rejected": -49.684349060058594, "step": 17260 }, { "epoch": 75.41484716157206, "grad_norm": 5.591361439282045e-06, "learning_rate": 8.65467901791148e-07, "logits/chosen": -1.6235735416412354, "logits/rejected": -2.297290563583374, "logps/chosen": -462.10626220703125, "logps/rejected": -5592.9716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.11576509475708, "rewards/margins": 50.642452239990234, "rewards/rejected": -52.75822067260742, "step": 17270 }, { "epoch": 75.4585152838428, "grad_norm": 4.6094711636069254e-08, "learning_rate": 8.625863642943625e-07, "logits/chosen": -1.5521939992904663, "logits/rejected": -2.3013501167297363, "logps/chosen": -481.5906677246094, "logps/rejected": -5334.02783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.036780595779419, "rewards/margins": 48.4516716003418, "rewards/rejected": -50.48845672607422, "step": 17280 }, { "epoch": 75.50218340611353, "grad_norm": 9.616138742760785e-07, "learning_rate": 8.597086313369271e-07, "logits/chosen": -1.6292591094970703, "logits/rejected": -2.312532901763916, "logps/chosen": -484.59423828125, "logps/rejected": -5607.611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1159110069274902, "rewards/margins": 50.87813949584961, "rewards/rejected": -52.994049072265625, "step": 17290 }, { "epoch": 75.54585152838428, "grad_norm": 4.6084119315272123e-07, "learning_rate": 8.568347096052714e-07, "logits/chosen": -1.5573208332061768, "logits/rejected": -2.32319974899292, "logps/chosen": -479.76123046875, "logps/rejected": -5180.19287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.084080934524536, "rewards/margins": 46.976776123046875, "rewards/rejected": -49.06085968017578, "step": 17300 }, { "epoch": 75.58951965065502, "grad_norm": 6.727091377770933e-08, "learning_rate": 8.539646057769672e-07, "logits/chosen": -1.5125153064727783, "logits/rejected": -2.1609504222869873, "logps/chosen": -504.9024353027344, "logps/rejected": -4785.50537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1914708614349365, "rewards/margins": 43.034915924072266, "rewards/rejected": -45.22639083862305, "step": 17310 }, { "epoch": 75.63318777292577, "grad_norm": 1.2248859533268513e-06, "learning_rate": 8.510983265207152e-07, "logits/chosen": -1.580060601234436, "logits/rejected": -2.3674371242523193, "logps/chosen": -451.47100830078125, "logps/rejected": -5564.60400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9724903106689453, "rewards/margins": 50.78972625732422, "rewards/rejected": -52.76221466064453, "step": 17320 }, { "epoch": 75.67685589519651, "grad_norm": 4.178322739149481e-06, "learning_rate": 8.482358784963312e-07, "logits/chosen": -1.5176531076431274, "logits/rejected": -2.165562391281128, "logps/chosen": -537.4932861328125, "logps/rejected": -4791.04833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1513190269470215, "rewards/margins": 43.057212829589844, "rewards/rejected": -45.208534240722656, "step": 17330 }, { "epoch": 75.72052401746726, "grad_norm": 6.037433977916588e-08, "learning_rate": 8.453772683547296e-07, "logits/chosen": -1.6071456670761108, "logits/rejected": -2.4728751182556152, "logps/chosen": -452.46197509765625, "logps/rejected": -5804.68212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0385828018188477, "rewards/margins": 53.005638122558594, "rewards/rejected": -55.044227600097656, "step": 17340 }, { "epoch": 75.76419213973799, "grad_norm": 1.943265350196803e-07, "learning_rate": 8.425225027379047e-07, "logits/chosen": -1.5613572597503662, "logits/rejected": -2.2824230194091797, "logps/chosen": -469.0834045410156, "logps/rejected": -5372.72705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.048161745071411, "rewards/margins": 48.6826057434082, "rewards/rejected": -50.73077392578125, "step": 17350 }, { "epoch": 75.80786026200873, "grad_norm": 4.2837879391613283e-07, "learning_rate": 8.396715882789206e-07, "logits/chosen": -1.5194190740585327, "logits/rejected": -2.1667118072509766, "logps/chosen": -499.8330078125, "logps/rejected": -4694.9931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.177760362625122, "rewards/margins": 42.134464263916016, "rewards/rejected": -44.312232971191406, "step": 17360 }, { "epoch": 75.85152838427948, "grad_norm": 1.2058988866127538e-07, "learning_rate": 8.368245316018927e-07, "logits/chosen": -1.5600725412368774, "logits/rejected": -2.2648186683654785, "logps/chosen": -474.26953125, "logps/rejected": -5090.3505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.084118366241455, "rewards/margins": 46.037681579589844, "rewards/rejected": -48.121803283691406, "step": 17370 }, { "epoch": 75.89519650655022, "grad_norm": 3.055074642177744e-07, "learning_rate": 8.339813393219715e-07, "logits/chosen": -1.5317928791046143, "logits/rejected": -2.2142281532287598, "logps/chosen": -511.3606872558594, "logps/rejected": -5052.6298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.218567371368408, "rewards/margins": 45.48804473876953, "rewards/rejected": -47.70661163330078, "step": 17380 }, { "epoch": 75.93886462882097, "grad_norm": 2.293032410011042e-07, "learning_rate": 8.311420180453306e-07, "logits/chosen": -1.5518079996109009, "logits/rejected": -2.2872345447540283, "logps/chosen": -490.91107177734375, "logps/rejected": -5064.4892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1183109283447266, "rewards/margins": 45.76416778564453, "rewards/rejected": -47.882469177246094, "step": 17390 }, { "epoch": 75.9825327510917, "grad_norm": 7.212980535669394e-07, "learning_rate": 8.283065743691476e-07, "logits/chosen": -1.5217396020889282, "logits/rejected": -2.244783878326416, "logps/chosen": -529.3804931640625, "logps/rejected": -4901.4521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1541032791137695, "rewards/margins": 44.25351333618164, "rewards/rejected": -46.407615661621094, "step": 17400 }, { "epoch": 76.02620087336244, "grad_norm": 4.475804565656134e-08, "learning_rate": 8.254750148815893e-07, "logits/chosen": -1.538402795791626, "logits/rejected": -2.204352855682373, "logps/chosen": -509.80877685546875, "logps/rejected": -4894.1669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.129505157470703, "rewards/margins": 44.214447021484375, "rewards/rejected": -46.34395217895508, "step": 17410 }, { "epoch": 76.06986899563319, "grad_norm": 8.422136769871241e-08, "learning_rate": 8.226473461618025e-07, "logits/chosen": -1.4864139556884766, "logits/rejected": -2.1088550090789795, "logps/chosen": -529.6809692382812, "logps/rejected": -4478.53271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2411041259765625, "rewards/margins": 40.1042594909668, "rewards/rejected": -42.345359802246094, "step": 17420 }, { "epoch": 76.11353711790393, "grad_norm": 1.1192817946931125e-07, "learning_rate": 8.198235747798894e-07, "logits/chosen": -1.551371455192566, "logits/rejected": -2.263636350631714, "logps/chosen": -500.57421875, "logps/rejected": -5288.9951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1506552696228027, "rewards/margins": 47.89799880981445, "rewards/rejected": -50.04865646362305, "step": 17430 }, { "epoch": 76.15720524017468, "grad_norm": 2.138228378187624e-08, "learning_rate": 8.170037072968967e-07, "logits/chosen": -1.5661661624908447, "logits/rejected": -2.2808425426483154, "logps/chosen": -477.93939208984375, "logps/rejected": -4999.54150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1600821018218994, "rewards/margins": 45.168006896972656, "rewards/rejected": -47.328086853027344, "step": 17440 }, { "epoch": 76.20087336244542, "grad_norm": 2.360057161573852e-07, "learning_rate": 8.141877502648035e-07, "logits/chosen": -1.5774086713790894, "logits/rejected": -2.217646360397339, "logps/chosen": -478.25543212890625, "logps/rejected": -5441.77685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1136817932128906, "rewards/margins": 49.16707229614258, "rewards/rejected": -51.28075408935547, "step": 17450 }, { "epoch": 76.24454148471615, "grad_norm": 1.830167238291666e-07, "learning_rate": 8.113757102264991e-07, "logits/chosen": -1.5601706504821777, "logits/rejected": -2.269602060317993, "logps/chosen": -472.03668212890625, "logps/rejected": -5221.76025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1263844966888428, "rewards/margins": 47.33037567138672, "rewards/rejected": -49.45676040649414, "step": 17460 }, { "epoch": 76.2882096069869, "grad_norm": 1.5941822358536178e-07, "learning_rate": 8.085675937157747e-07, "logits/chosen": -1.510697603225708, "logits/rejected": -2.200955867767334, "logps/chosen": -521.8001098632812, "logps/rejected": -4969.546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2264721393585205, "rewards/margins": 44.793270111083984, "rewards/rejected": -47.01974105834961, "step": 17470 }, { "epoch": 76.33187772925764, "grad_norm": 1.9642359362698622e-07, "learning_rate": 8.057634072573048e-07, "logits/chosen": -1.5605323314666748, "logits/rejected": -2.180887222290039, "logps/chosen": -513.1412963867188, "logps/rejected": -4804.3369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.109276056289673, "rewards/margins": 43.216064453125, "rewards/rejected": -45.325340270996094, "step": 17480 }, { "epoch": 76.37554585152839, "grad_norm": 2.0552645711143258e-07, "learning_rate": 8.029631573666305e-07, "logits/chosen": -1.5543962717056274, "logits/rejected": -2.1391425132751465, "logps/chosen": -480.77410888671875, "logps/rejected": -5030.02197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1020188331604004, "rewards/margins": 45.471275329589844, "rewards/rejected": -47.57329559326172, "step": 17490 }, { "epoch": 76.41921397379913, "grad_norm": 3.373686003781738e-07, "learning_rate": 8.001668505501464e-07, "logits/chosen": -1.5281187295913696, "logits/rejected": -2.232811689376831, "logps/chosen": -493.952392578125, "logps/rejected": -5218.74609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0871121883392334, "rewards/margins": 47.3055419921875, "rewards/rejected": -49.39265823364258, "step": 17500 }, { "epoch": 76.46288209606988, "grad_norm": 1.0052723748835214e-07, "learning_rate": 7.973744933050892e-07, "logits/chosen": -1.5894837379455566, "logits/rejected": -2.415571689605713, "logps/chosen": -451.4183044433594, "logps/rejected": -6189.74365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.009143829345703, "rewards/margins": 56.53083038330078, "rewards/rejected": -58.53997039794922, "step": 17510 }, { "epoch": 76.5065502183406, "grad_norm": 3.52413921640989e-08, "learning_rate": 7.945860921195142e-07, "logits/chosen": -1.540244221687317, "logits/rejected": -2.2986488342285156, "logps/chosen": -492.10137939453125, "logps/rejected": -4994.1025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.138068675994873, "rewards/margins": 45.0892333984375, "rewards/rejected": -47.22730255126953, "step": 17520 }, { "epoch": 76.55021834061135, "grad_norm": 6.168861737992991e-06, "learning_rate": 7.918016534722861e-07, "logits/chosen": -1.5326412916183472, "logits/rejected": -2.2495603561401367, "logps/chosen": -498.753173828125, "logps/rejected": -5139.8056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0692391395568848, "rewards/margins": 46.529048919677734, "rewards/rejected": -48.59828567504883, "step": 17530 }, { "epoch": 76.5938864628821, "grad_norm": 5.550986697798094e-08, "learning_rate": 7.890211838330642e-07, "logits/chosen": -1.572508454322815, "logits/rejected": -2.3039255142211914, "logps/chosen": -494.89288330078125, "logps/rejected": -5722.8203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0555741786956787, "rewards/margins": 52.08788299560547, "rewards/rejected": -54.143463134765625, "step": 17540 }, { "epoch": 76.63755458515284, "grad_norm": 3.4101778348356e-07, "learning_rate": 7.862446896622833e-07, "logits/chosen": -1.5279654264450073, "logits/rejected": -2.1050636768341064, "logps/chosen": -500.6246032714844, "logps/rejected": -4886.33154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0612168312072754, "rewards/margins": 44.0349235534668, "rewards/rejected": -46.09614181518555, "step": 17550 }, { "epoch": 76.68122270742359, "grad_norm": 2.0213072136828786e-08, "learning_rate": 7.834721774111431e-07, "logits/chosen": -1.5472685098648071, "logits/rejected": -2.1903598308563232, "logps/chosen": -461.8130798339844, "logps/rejected": -5388.51904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0020768642425537, "rewards/margins": 48.84917449951172, "rewards/rejected": -50.85124969482422, "step": 17560 }, { "epoch": 76.72489082969432, "grad_norm": 3.672778150096235e-08, "learning_rate": 7.807036535215915e-07, "logits/chosen": -1.4962573051452637, "logits/rejected": -2.1480255126953125, "logps/chosen": -506.3169860839844, "logps/rejected": -4755.021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0933876037597656, "rewards/margins": 42.8555908203125, "rewards/rejected": -44.948978424072266, "step": 17570 }, { "epoch": 76.76855895196506, "grad_norm": 1.784861424828616e-06, "learning_rate": 7.779391244263079e-07, "logits/chosen": -1.5783271789550781, "logits/rejected": -2.3437137603759766, "logps/chosen": -462.0106506347656, "logps/rejected": -5316.1806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.033573627471924, "rewards/margins": 48.2994384765625, "rewards/rejected": -50.333011627197266, "step": 17580 }, { "epoch": 76.8122270742358, "grad_norm": 1.0020573448483388e-07, "learning_rate": 7.751785965486894e-07, "logits/chosen": -1.553841471672058, "logits/rejected": -2.1897330284118652, "logps/chosen": -502.7197265625, "logps/rejected": -4748.92236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.193828821182251, "rewards/margins": 42.69153594970703, "rewards/rejected": -44.8853645324707, "step": 17590 }, { "epoch": 76.85589519650655, "grad_norm": 8.99540261937486e-09, "learning_rate": 7.724220763028381e-07, "logits/chosen": -1.5349124670028687, "logits/rejected": -2.2840070724487305, "logps/chosen": -513.4065551757812, "logps/rejected": -5313.28271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.088693857192993, "rewards/margins": 48.246212005615234, "rewards/rejected": -50.33490753173828, "step": 17600 }, { "epoch": 76.8995633187773, "grad_norm": 2.8047012541032114e-08, "learning_rate": 7.696695700935447e-07, "logits/chosen": -1.5404460430145264, "logits/rejected": -2.194408893585205, "logps/chosen": -527.6150512695312, "logps/rejected": -4842.0751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1251890659332275, "rewards/margins": 43.57947540283203, "rewards/rejected": -45.70466613769531, "step": 17610 }, { "epoch": 76.94323144104804, "grad_norm": 4.4475208842298894e-07, "learning_rate": 7.669210843162705e-07, "logits/chosen": -1.5566335916519165, "logits/rejected": -2.260542154312134, "logps/chosen": -471.57879638671875, "logps/rejected": -5188.43603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.104754686355591, "rewards/margins": 46.7780876159668, "rewards/rejected": -48.88283920288086, "step": 17620 }, { "epoch": 76.98689956331877, "grad_norm": 1.8653896617581004e-07, "learning_rate": 7.641766253571381e-07, "logits/chosen": -1.52683424949646, "logits/rejected": -2.2798666954040527, "logps/chosen": -482.896728515625, "logps/rejected": -5232.03271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0499699115753174, "rewards/margins": 47.47846603393555, "rewards/rejected": -49.52843475341797, "step": 17630 }, { "epoch": 77.03056768558952, "grad_norm": 1.430918256398424e-07, "learning_rate": 7.61436199592912e-07, "logits/chosen": -1.5668723583221436, "logits/rejected": -2.283461093902588, "logps/chosen": -487.3564453125, "logps/rejected": -5229.169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2086472511291504, "rewards/margins": 47.17548370361328, "rewards/rejected": -49.384132385253906, "step": 17640 }, { "epoch": 77.07423580786026, "grad_norm": 1.3978333796347883e-07, "learning_rate": 7.586998133909848e-07, "logits/chosen": -1.609471082687378, "logits/rejected": -2.391422986984253, "logps/chosen": -462.52349853515625, "logps/rejected": -5895.6240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.071927785873413, "rewards/margins": 53.71466827392578, "rewards/rejected": -55.786598205566406, "step": 17650 }, { "epoch": 77.117903930131, "grad_norm": 2.6898879743055543e-07, "learning_rate": 7.559674731093672e-07, "logits/chosen": -1.5583319664001465, "logits/rejected": -2.3622887134552, "logps/chosen": -475.31292724609375, "logps/rejected": -5698.75634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0452611446380615, "rewards/margins": 51.96388626098633, "rewards/rejected": -54.00914764404297, "step": 17660 }, { "epoch": 77.16157205240175, "grad_norm": 6.983926295274531e-07, "learning_rate": 7.532391850966653e-07, "logits/chosen": -1.523988962173462, "logits/rejected": -2.2342495918273926, "logps/chosen": -491.8072814941406, "logps/rejected": -5082.42626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1289334297180176, "rewards/margins": 46.013694763183594, "rewards/rejected": -48.14263153076172, "step": 17670 }, { "epoch": 77.20524017467248, "grad_norm": 3.9004729369744037e-07, "learning_rate": 7.505149556920698e-07, "logits/chosen": -1.5754526853561401, "logits/rejected": -2.265498399734497, "logps/chosen": -478.4258728027344, "logps/rejected": -5371.92578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1120736598968506, "rewards/margins": 48.77333068847656, "rewards/rejected": -50.885406494140625, "step": 17680 }, { "epoch": 77.24890829694323, "grad_norm": 3.2935828588530757e-08, "learning_rate": 7.477947912253436e-07, "logits/chosen": -1.5849485397338867, "logits/rejected": -2.3794474601745605, "logps/chosen": -464.3802185058594, "logps/rejected": -5555.6123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0544233322143555, "rewards/margins": 50.564002990722656, "rewards/rejected": -52.61842727661133, "step": 17690 }, { "epoch": 77.29257641921397, "grad_norm": 3.207409568117743e-08, "learning_rate": 7.450786980168037e-07, "logits/chosen": -1.5042797327041626, "logits/rejected": -2.210176706314087, "logps/chosen": -503.423095703125, "logps/rejected": -4978.2763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.169160842895508, "rewards/margins": 44.890357971191406, "rewards/rejected": -47.05952453613281, "step": 17700 }, { "epoch": 77.33624454148472, "grad_norm": 1.6356258392518437e-08, "learning_rate": 7.423666823773057e-07, "logits/chosen": -1.5211480855941772, "logits/rejected": -2.265665054321289, "logps/chosen": -514.6231689453125, "logps/rejected": -5230.4306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3541646003723145, "rewards/margins": 47.166893005371094, "rewards/rejected": -49.521053314208984, "step": 17710 }, { "epoch": 77.37991266375546, "grad_norm": 4.072804679646526e-08, "learning_rate": 7.39658750608234e-07, "logits/chosen": -1.566131353378296, "logits/rejected": -2.219505548477173, "logps/chosen": -503.41058349609375, "logps/rejected": -4953.8154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.077516794204712, "rewards/margins": 44.77325439453125, "rewards/rejected": -46.850772857666016, "step": 17720 }, { "epoch": 77.4235807860262, "grad_norm": 1.1335765882181756e-07, "learning_rate": 7.369549090014821e-07, "logits/chosen": -1.5486040115356445, "logits/rejected": -2.279125213623047, "logps/chosen": -493.400390625, "logps/rejected": -5272.9443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1218631267547607, "rewards/margins": 47.70347213745117, "rewards/rejected": -49.82533264160156, "step": 17730 }, { "epoch": 77.46724890829694, "grad_norm": 7.046867518072294e-07, "learning_rate": 7.342551638394385e-07, "logits/chosen": -1.5653743743896484, "logits/rejected": -2.256406307220459, "logps/chosen": -507.1962890625, "logps/rejected": -5031.0478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.028327226638794, "rewards/margins": 45.526161193847656, "rewards/rejected": -47.55448913574219, "step": 17740 }, { "epoch": 77.51091703056768, "grad_norm": 2.4140152024991375e-08, "learning_rate": 7.315595213949791e-07, "logits/chosen": -1.560900092124939, "logits/rejected": -2.263974666595459, "logps/chosen": -491.4043884277344, "logps/rejected": -5171.7509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1188902854919434, "rewards/margins": 46.76490783691406, "rewards/rejected": -48.88379669189453, "step": 17750 }, { "epoch": 77.55458515283843, "grad_norm": 3.250505868317404e-06, "learning_rate": 7.288679879314417e-07, "logits/chosen": -1.5376485586166382, "logits/rejected": -2.27022123336792, "logps/chosen": -483.1536560058594, "logps/rejected": -5561.4892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1252236366271973, "rewards/margins": 50.53376388549805, "rewards/rejected": -52.65899658203125, "step": 17760 }, { "epoch": 77.59825327510917, "grad_norm": 1.7291043973796017e-08, "learning_rate": 7.261805697026178e-07, "logits/chosen": -1.4838443994522095, "logits/rejected": -2.1561269760131836, "logps/chosen": -524.5227661132812, "logps/rejected": -4681.70458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.14980411529541, "rewards/margins": 42.163150787353516, "rewards/rejected": -44.312950134277344, "step": 17770 }, { "epoch": 77.64192139737992, "grad_norm": 2.568318010817876e-07, "learning_rate": 7.234972729527401e-07, "logits/chosen": -1.5581250190734863, "logits/rejected": -2.237409830093384, "logps/chosen": -515.911865234375, "logps/rejected": -4709.05126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.187102794647217, "rewards/margins": 42.22227096557617, "rewards/rejected": -44.40937423706055, "step": 17780 }, { "epoch": 77.68558951965065, "grad_norm": 5.380860441156863e-06, "learning_rate": 7.208181039164608e-07, "logits/chosen": -1.6147871017456055, "logits/rejected": -2.3627612590789795, "logps/chosen": -476.1498107910156, "logps/rejected": -5648.0634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0269455909729004, "rewards/margins": 51.43198776245117, "rewards/rejected": -53.45893096923828, "step": 17790 }, { "epoch": 77.7292576419214, "grad_norm": 1.407492036390326e-06, "learning_rate": 7.181430688188449e-07, "logits/chosen": -1.5079153776168823, "logits/rejected": -2.222238540649414, "logps/chosen": -504.89141845703125, "logps/rejected": -4714.7333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1783416271209717, "rewards/margins": 42.46442413330078, "rewards/rejected": -44.64276885986328, "step": 17800 }, { "epoch": 77.77292576419214, "grad_norm": 2.7379526882682187e-08, "learning_rate": 7.154721738753509e-07, "logits/chosen": -1.5139477252960205, "logits/rejected": -2.2705676555633545, "logps/chosen": -505.8843688964844, "logps/rejected": -4928.60546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.075596809387207, "rewards/margins": 44.53554916381836, "rewards/rejected": -46.61114501953125, "step": 17810 }, { "epoch": 77.81659388646288, "grad_norm": 3.3261282465802535e-08, "learning_rate": 7.128054252918165e-07, "logits/chosen": -1.5452836751937866, "logits/rejected": -2.2445244789123535, "logps/chosen": -490.113525390625, "logps/rejected": -5217.39501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.028186321258545, "rewards/margins": 47.26244354248047, "rewards/rejected": -49.29063415527344, "step": 17820 }, { "epoch": 77.86026200873363, "grad_norm": 2.817474415982413e-06, "learning_rate": 7.101428292644477e-07, "logits/chosen": -1.5686066150665283, "logits/rejected": -2.2924091815948486, "logps/chosen": -481.73370361328125, "logps/rejected": -5689.8291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1094250679016113, "rewards/margins": 51.70561599731445, "rewards/rejected": -53.815032958984375, "step": 17830 }, { "epoch": 77.90393013100437, "grad_norm": 1.2239483537652834e-07, "learning_rate": 7.074843919797988e-07, "logits/chosen": -1.5486990213394165, "logits/rejected": -2.1952993869781494, "logps/chosen": -490.902587890625, "logps/rejected": -4978.498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1307058334350586, "rewards/margins": 44.8730354309082, "rewards/rejected": -47.00374221801758, "step": 17840 }, { "epoch": 77.9475982532751, "grad_norm": 1.334802841222771e-07, "learning_rate": 7.04830119614765e-07, "logits/chosen": -1.5764782428741455, "logits/rejected": -2.2534751892089844, "logps/chosen": -470.400390625, "logps/rejected": -5044.6357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1443731784820557, "rewards/margins": 45.49967575073242, "rewards/rejected": -47.64405059814453, "step": 17850 }, { "epoch": 77.99126637554585, "grad_norm": 1.1200797478268895e-07, "learning_rate": 7.021800183365607e-07, "logits/chosen": -1.5800282955169678, "logits/rejected": -2.2154574394226074, "logps/chosen": -479.6239318847656, "logps/rejected": -4852.8662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.079049587249756, "rewards/margins": 43.729305267333984, "rewards/rejected": -45.80835723876953, "step": 17860 }, { "epoch": 78.0349344978166, "grad_norm": 3.265675967717189e-08, "learning_rate": 6.995340943027118e-07, "logits/chosen": -1.5565848350524902, "logits/rejected": -2.2906875610351562, "logps/chosen": -471.625, "logps/rejected": -5419.857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0502285957336426, "rewards/margins": 49.257041931152344, "rewards/rejected": -51.307273864746094, "step": 17870 }, { "epoch": 78.07860262008734, "grad_norm": 2.2239958001448685e-07, "learning_rate": 6.968923536610356e-07, "logits/chosen": -1.5643713474273682, "logits/rejected": -2.270648956298828, "logps/chosen": -501.7513732910156, "logps/rejected": -5296.1298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.085536479949951, "rewards/margins": 47.990875244140625, "rewards/rejected": -50.0764045715332, "step": 17880 }, { "epoch": 78.12227074235808, "grad_norm": 2.785847640312604e-07, "learning_rate": 6.942548025496312e-07, "logits/chosen": -1.6085460186004639, "logits/rejected": -2.354980945587158, "logps/chosen": -515.6088256835938, "logps/rejected": -5940.4267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.22633695602417, "rewards/margins": 54.04276657104492, "rewards/rejected": -56.26910400390625, "step": 17890 }, { "epoch": 78.16593886462883, "grad_norm": 3.713329690680131e-05, "learning_rate": 6.916214470968638e-07, "logits/chosen": -1.5242677927017212, "logits/rejected": -2.1166391372680664, "logps/chosen": -505.1756286621094, "logps/rejected": -4815.7607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2181763648986816, "rewards/margins": 43.16231918334961, "rewards/rejected": -45.380496978759766, "step": 17900 }, { "epoch": 78.20960698689956, "grad_norm": 5.700712623667199e-08, "learning_rate": 6.889922934213469e-07, "logits/chosen": -1.5218464136123657, "logits/rejected": -2.237886905670166, "logps/chosen": -497.31060791015625, "logps/rejected": -5035.11572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.089973211288452, "rewards/margins": 45.44379425048828, "rewards/rejected": -47.53376388549805, "step": 17910 }, { "epoch": 78.2532751091703, "grad_norm": 1.0580331070497213e-07, "learning_rate": 6.863673476319352e-07, "logits/chosen": -1.5777819156646729, "logits/rejected": -2.2239372730255127, "logps/chosen": -509.9125061035156, "logps/rejected": -4947.45458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0917484760284424, "rewards/margins": 44.61288070678711, "rewards/rejected": -46.704627990722656, "step": 17920 }, { "epoch": 78.29694323144105, "grad_norm": 2.9831465702759392e-06, "learning_rate": 6.837466158277026e-07, "logits/chosen": -1.5487680435180664, "logits/rejected": -2.254380702972412, "logps/chosen": -503.88482666015625, "logps/rejected": -4904.9501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.044926881790161, "rewards/margins": 44.343849182128906, "rewards/rejected": -46.38877487182617, "step": 17930 }, { "epoch": 78.3406113537118, "grad_norm": 3.5399170333506954e-08, "learning_rate": 6.811301040979349e-07, "logits/chosen": -1.5313894748687744, "logits/rejected": -2.209765672683716, "logps/chosen": -509.1566467285156, "logps/rejected": -5049.103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0869863033294678, "rewards/margins": 45.58631134033203, "rewards/rejected": -47.673301696777344, "step": 17940 }, { "epoch": 78.38427947598254, "grad_norm": 4.0961022082937356e-08, "learning_rate": 6.785178185221095e-07, "logits/chosen": -1.5848023891448975, "logits/rejected": -2.2906885147094727, "logps/chosen": -493.07135009765625, "logps/rejected": -5050.5126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2910830974578857, "rewards/margins": 45.52043151855469, "rewards/rejected": -47.8115234375, "step": 17950 }, { "epoch": 78.42794759825327, "grad_norm": 1.5713222530173437e-08, "learning_rate": 6.759097651698876e-07, "logits/chosen": -1.469245195388794, "logits/rejected": -2.1777749061584473, "logps/chosen": -510.1184997558594, "logps/rejected": -4458.36083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1233906745910645, "rewards/margins": 40.07390213012695, "rewards/rejected": -42.19729232788086, "step": 17960 }, { "epoch": 78.47161572052401, "grad_norm": 5.780129967363536e-08, "learning_rate": 6.733059501010936e-07, "logits/chosen": -1.49532151222229, "logits/rejected": -2.1215739250183105, "logps/chosen": -515.3208618164062, "logps/rejected": -4805.0556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0597848892211914, "rewards/margins": 43.24360656738281, "rewards/rejected": -45.30338668823242, "step": 17970 }, { "epoch": 78.51528384279476, "grad_norm": 1.5110062156644423e-05, "learning_rate": 6.707063793657065e-07, "logits/chosen": -1.57858407497406, "logits/rejected": -2.3052420616149902, "logps/chosen": -495.00836181640625, "logps/rejected": -5388.26904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.12679123878479, "rewards/margins": 48.78981018066406, "rewards/rejected": -50.916603088378906, "step": 17980 }, { "epoch": 78.5589519650655, "grad_norm": 4.03855221159259e-06, "learning_rate": 6.681110590038436e-07, "logits/chosen": -1.5490458011627197, "logits/rejected": -2.308788299560547, "logps/chosen": -493.3232421875, "logps/rejected": -5360.39990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.147622585296631, "rewards/margins": 48.56153106689453, "rewards/rejected": -50.70914840698242, "step": 17990 }, { "epoch": 78.60262008733625, "grad_norm": 7.35318510279205e-08, "learning_rate": 6.655199950457441e-07, "logits/chosen": -1.623205542564392, "logits/rejected": -2.352109432220459, "logps/chosen": -483.6859436035156, "logps/rejected": -5689.63818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2111449241638184, "rewards/margins": 51.706321716308594, "rewards/rejected": -53.91747283935547, "step": 18000 }, { "epoch": 78.646288209607, "grad_norm": 2.4598920613395993e-07, "learning_rate": 6.629331935117605e-07, "logits/chosen": -1.5705082416534424, "logits/rejected": -2.311166286468506, "logps/chosen": -513.0453491210938, "logps/rejected": -5443.771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1237893104553223, "rewards/margins": 49.32170104980469, "rewards/rejected": -51.44548797607422, "step": 18010 }, { "epoch": 78.68995633187772, "grad_norm": 1.1672708904384714e-07, "learning_rate": 6.603506604123386e-07, "logits/chosen": -1.5354379415512085, "logits/rejected": -2.2955119609832764, "logps/chosen": -505.68658447265625, "logps/rejected": -5323.5419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2381887435913086, "rewards/margins": 48.139442443847656, "rewards/rejected": -50.37763214111328, "step": 18020 }, { "epoch": 78.73362445414847, "grad_norm": 9.668612331864205e-08, "learning_rate": 6.577724017480094e-07, "logits/chosen": -1.5389200448989868, "logits/rejected": -2.256551742553711, "logps/chosen": -484.35015869140625, "logps/rejected": -5017.2890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1478209495544434, "rewards/margins": 45.28059768676758, "rewards/rejected": -47.42841339111328, "step": 18030 }, { "epoch": 78.77729257641921, "grad_norm": 1.959923917266928e-08, "learning_rate": 6.551984235093692e-07, "logits/chosen": -1.5562992095947266, "logits/rejected": -2.29587984085083, "logps/chosen": -525.4049072265625, "logps/rejected": -5405.087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1020398139953613, "rewards/margins": 49.151649475097656, "rewards/rejected": -51.253692626953125, "step": 18040 }, { "epoch": 78.82096069868996, "grad_norm": 6.755797350580879e-06, "learning_rate": 6.526287316770713e-07, "logits/chosen": -1.568526268005371, "logits/rejected": -2.281202793121338, "logps/chosen": -467.80291748046875, "logps/rejected": -4963.6904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.102130889892578, "rewards/margins": 44.77842712402344, "rewards/rejected": -46.88056182861328, "step": 18050 }, { "epoch": 78.8646288209607, "grad_norm": 2.862295436334803e-06, "learning_rate": 6.500633322218075e-07, "logits/chosen": -1.5914331674575806, "logits/rejected": -2.2897517681121826, "logps/chosen": -494.41705322265625, "logps/rejected": -5268.6474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.206160068511963, "rewards/margins": 47.65984344482422, "rewards/rejected": -49.86600112915039, "step": 18060 }, { "epoch": 78.90829694323143, "grad_norm": 6.943421978511885e-08, "learning_rate": 6.475022311042978e-07, "logits/chosen": -1.5407088994979858, "logits/rejected": -2.2264788150787354, "logps/chosen": -492.6553649902344, "logps/rejected": -5018.0087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.083946943283081, "rewards/margins": 45.39408874511719, "rewards/rejected": -47.47803497314453, "step": 18070 }, { "epoch": 78.95196506550218, "grad_norm": 5.571499297544538e-08, "learning_rate": 6.449454342752748e-07, "logits/chosen": -1.5394277572631836, "logits/rejected": -2.1696860790252686, "logps/chosen": -490.7560119628906, "logps/rejected": -4864.47216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0892739295959473, "rewards/margins": 43.88694381713867, "rewards/rejected": -45.97622299194336, "step": 18080 }, { "epoch": 78.99563318777292, "grad_norm": 5.065395367034103e-08, "learning_rate": 6.423929476754687e-07, "logits/chosen": -1.5597392320632935, "logits/rejected": -2.152639389038086, "logps/chosen": -469.4261779785156, "logps/rejected": -4954.89599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.155160427093506, "rewards/margins": 44.560001373291016, "rewards/rejected": -46.71516036987305, "step": 18090 }, { "epoch": 79.03930131004367, "grad_norm": 1.3551148667129002e-05, "learning_rate": 6.398447772355965e-07, "logits/chosen": -1.5449631214141846, "logits/rejected": -2.270000696182251, "logps/chosen": -491.01727294921875, "logps/rejected": -5031.99072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1635451316833496, "rewards/margins": 45.448116302490234, "rewards/rejected": -47.61166000366211, "step": 18100 }, { "epoch": 79.08296943231441, "grad_norm": 1.5487995599310123e-07, "learning_rate": 6.373009288763457e-07, "logits/chosen": -1.5215939283370972, "logits/rejected": -2.1622776985168457, "logps/chosen": -501.2561950683594, "logps/rejected": -5129.65283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1454310417175293, "rewards/margins": 46.33928298950195, "rewards/rejected": -48.48471450805664, "step": 18110 }, { "epoch": 79.12663755458516, "grad_norm": 5.392121949782174e-08, "learning_rate": 6.347614085083601e-07, "logits/chosen": -1.5759155750274658, "logits/rejected": -2.25722336769104, "logps/chosen": -492.514892578125, "logps/rejected": -5008.208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1253411769866943, "rewards/margins": 45.30928421020508, "rewards/rejected": -47.43462371826172, "step": 18120 }, { "epoch": 79.17030567685589, "grad_norm": 3.7793977177400634e-07, "learning_rate": 6.322262220322314e-07, "logits/chosen": -1.5314953327178955, "logits/rejected": -2.171168565750122, "logps/chosen": -522.1646118164062, "logps/rejected": -4837.49853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1571364402770996, "rewards/margins": 43.68157196044922, "rewards/rejected": -45.838706970214844, "step": 18130 }, { "epoch": 79.21397379912663, "grad_norm": 3.091497872016518e-08, "learning_rate": 6.29695375338478e-07, "logits/chosen": -1.5277959108352661, "logits/rejected": -2.1960389614105225, "logps/chosen": -509.5425720214844, "logps/rejected": -4773.15087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.153456211090088, "rewards/margins": 42.99101638793945, "rewards/rejected": -45.14447021484375, "step": 18140 }, { "epoch": 79.25764192139738, "grad_norm": 4.01995209208885e-08, "learning_rate": 6.271688743075346e-07, "logits/chosen": -1.55730402469635, "logits/rejected": -2.286653995513916, "logps/chosen": -484.0492248535156, "logps/rejected": -5308.4755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0962471961975098, "rewards/margins": 48.088348388671875, "rewards/rejected": -50.184593200683594, "step": 18150 }, { "epoch": 79.30131004366812, "grad_norm": 6.981478140198296e-08, "learning_rate": 6.246467248097416e-07, "logits/chosen": -1.5440571308135986, "logits/rejected": -2.2945873737335205, "logps/chosen": -491.1336364746094, "logps/rejected": -5299.1826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1450002193450928, "rewards/margins": 47.94771957397461, "rewards/rejected": -50.09272003173828, "step": 18160 }, { "epoch": 79.34497816593887, "grad_norm": 4.463884533525797e-08, "learning_rate": 6.221289327053254e-07, "logits/chosen": -1.574920892715454, "logits/rejected": -2.2919068336486816, "logps/chosen": -479.3565979003906, "logps/rejected": -5157.48828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0191750526428223, "rewards/margins": 46.83890151977539, "rewards/rejected": -48.85807418823242, "step": 18170 }, { "epoch": 79.38864628820961, "grad_norm": 2.321735941721249e-06, "learning_rate": 6.196155038443899e-07, "logits/chosen": -1.5406776666641235, "logits/rejected": -2.217175006866455, "logps/chosen": -491.28076171875, "logps/rejected": -5005.2568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.214352607727051, "rewards/margins": 45.11161804199219, "rewards/rejected": -47.32596969604492, "step": 18180 }, { "epoch": 79.43231441048034, "grad_norm": 3.475196545321006e-08, "learning_rate": 6.171064440669014e-07, "logits/chosen": -1.5180517435073853, "logits/rejected": -2.205939769744873, "logps/chosen": -503.730712890625, "logps/rejected": -4954.5419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.147749185562134, "rewards/margins": 44.72630310058594, "rewards/rejected": -46.87405014038086, "step": 18190 }, { "epoch": 79.47598253275109, "grad_norm": 8.273526498214481e-08, "learning_rate": 6.146017592026732e-07, "logits/chosen": -1.585371494293213, "logits/rejected": -2.3791544437408447, "logps/chosen": -459.5474548339844, "logps/rejected": -5458.9384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.058866500854492, "rewards/margins": 49.6002082824707, "rewards/rejected": -51.65907669067383, "step": 18200 }, { "epoch": 79.51965065502183, "grad_norm": 2.0665703238766487e-08, "learning_rate": 6.121014550713522e-07, "logits/chosen": -1.6088998317718506, "logits/rejected": -2.3790767192840576, "logps/chosen": -485.6259765625, "logps/rejected": -5361.48828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1518123149871826, "rewards/margins": 48.521278381347656, "rewards/rejected": -50.67308807373047, "step": 18210 }, { "epoch": 79.56331877729258, "grad_norm": 4.8009031725289765e-08, "learning_rate": 6.096055374824117e-07, "logits/chosen": -1.5716356039047241, "logits/rejected": -2.228257417678833, "logps/chosen": -491.72509765625, "logps/rejected": -5046.66259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.073725700378418, "rewards/margins": 45.47257995605469, "rewards/rejected": -47.54631042480469, "step": 18220 }, { "epoch": 79.60698689956332, "grad_norm": 6.970663419638714e-08, "learning_rate": 6.071140122351276e-07, "logits/chosen": -1.5728868246078491, "logits/rejected": -2.298963785171509, "logps/chosen": -496.0947265625, "logps/rejected": -5481.6318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0588412284851074, "rewards/margins": 49.739131927490234, "rewards/rejected": -51.7979736328125, "step": 18230 }, { "epoch": 79.65065502183405, "grad_norm": 3.096325498577811e-08, "learning_rate": 6.046268851185721e-07, "logits/chosen": -1.6309232711791992, "logits/rejected": -2.4288361072540283, "logps/chosen": -471.99432373046875, "logps/rejected": -5695.0771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1240367889404297, "rewards/margins": 51.737770080566406, "rewards/rejected": -53.8618049621582, "step": 18240 }, { "epoch": 79.6943231441048, "grad_norm": 1.3597825960311577e-07, "learning_rate": 6.021441619115992e-07, "logits/chosen": -1.5256259441375732, "logits/rejected": -2.197906017303467, "logps/chosen": -494.50555419921875, "logps/rejected": -5306.81787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1818463802337646, "rewards/margins": 47.941917419433594, "rewards/rejected": -50.12376022338867, "step": 18250 }, { "epoch": 79.73799126637554, "grad_norm": 3.424108091533958e-08, "learning_rate": 5.996658483828286e-07, "logits/chosen": -1.5316531658172607, "logits/rejected": -2.1369919776916504, "logps/chosen": -481.76708984375, "logps/rejected": -4844.93603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.167462110519409, "rewards/margins": 43.59283447265625, "rewards/rejected": -45.760292053222656, "step": 18260 }, { "epoch": 79.78165938864629, "grad_norm": 5.55634638737644e-06, "learning_rate": 5.971919502906356e-07, "logits/chosen": -1.5541832447052002, "logits/rejected": -2.1631579399108887, "logps/chosen": -501.189697265625, "logps/rejected": -4931.0146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1766092777252197, "rewards/margins": 44.497745513916016, "rewards/rejected": -46.67435073852539, "step": 18270 }, { "epoch": 79.82532751091703, "grad_norm": 8.972573365275718e-07, "learning_rate": 5.947224733831364e-07, "logits/chosen": -1.482508659362793, "logits/rejected": -2.1649272441864014, "logps/chosen": -517.25146484375, "logps/rejected": -4759.99169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9703166484832764, "rewards/margins": 43.06678009033203, "rewards/rejected": -45.0370979309082, "step": 18280 }, { "epoch": 79.86899563318778, "grad_norm": 9.492767630077142e-09, "learning_rate": 5.922574233981729e-07, "logits/chosen": -1.5549391508102417, "logits/rejected": -2.1604163646698, "logps/chosen": -475.57537841796875, "logps/rejected": -4832.38525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.045043706893921, "rewards/margins": 43.635215759277344, "rewards/rejected": -45.68026351928711, "step": 18290 }, { "epoch": 79.91266375545851, "grad_norm": 7.159360612352657e-08, "learning_rate": 5.897968060633016e-07, "logits/chosen": -1.5485526323318481, "logits/rejected": -2.25748872756958, "logps/chosen": -521.1834716796875, "logps/rejected": -5057.25537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.109869956970215, "rewards/margins": 45.73933792114258, "rewards/rejected": -47.849212646484375, "step": 18300 }, { "epoch": 79.95633187772926, "grad_norm": 5.691212293542224e-08, "learning_rate": 5.873406270957804e-07, "logits/chosen": -1.4984101057052612, "logits/rejected": -2.2894601821899414, "logps/chosen": -487.2186584472656, "logps/rejected": -5025.35205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.032866954803467, "rewards/margins": 45.492042541503906, "rewards/rejected": -47.52490997314453, "step": 18310 }, { "epoch": 80.0, "grad_norm": 1.234753549954572e-06, "learning_rate": 5.848888922025553e-07, "logits/chosen": -1.5408216714859009, "logits/rejected": -2.303743839263916, "logps/chosen": -487.00689697265625, "logps/rejected": -5229.34619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0612215995788574, "rewards/margins": 47.412330627441406, "rewards/rejected": -49.473548889160156, "step": 18320 }, { "epoch": 80.04366812227074, "grad_norm": 5.498605951269445e-08, "learning_rate": 5.824416070802439e-07, "logits/chosen": -1.55045485496521, "logits/rejected": -2.2574684619903564, "logps/chosen": -500.6322326660156, "logps/rejected": -5022.7490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.074428081512451, "rewards/margins": 45.4283561706543, "rewards/rejected": -47.50278091430664, "step": 18330 }, { "epoch": 80.08733624454149, "grad_norm": 1.0714205185812085e-05, "learning_rate": 5.799987774151275e-07, "logits/chosen": -1.6041628122329712, "logits/rejected": -2.3175196647644043, "logps/chosen": -506.21160888671875, "logps/rejected": -5477.4716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2077064514160156, "rewards/margins": 49.638065338134766, "rewards/rejected": -51.84577560424805, "step": 18340 }, { "epoch": 80.13100436681222, "grad_norm": 4.0691081779092705e-08, "learning_rate": 5.775604088831327e-07, "logits/chosen": -1.5977519750595093, "logits/rejected": -2.3840277194976807, "logps/chosen": -480.15093994140625, "logps/rejected": -5732.7685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0104739665985107, "rewards/margins": 52.30266189575195, "rewards/rejected": -54.313133239746094, "step": 18350 }, { "epoch": 80.17467248908297, "grad_norm": 1.437702119356768e-07, "learning_rate": 5.751265071498227e-07, "logits/chosen": -1.5794017314910889, "logits/rejected": -2.2515695095062256, "logps/chosen": -489.32049560546875, "logps/rejected": -5038.8134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2772088050842285, "rewards/margins": 45.483375549316406, "rewards/rejected": -47.760581970214844, "step": 18360 }, { "epoch": 80.21834061135371, "grad_norm": 2.4907222264462717e-06, "learning_rate": 5.72697077870382e-07, "logits/chosen": -1.4977715015411377, "logits/rejected": -2.185391664505005, "logps/chosen": -516.8343505859375, "logps/rejected": -5015.6142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2313594818115234, "rewards/margins": 45.176979064941406, "rewards/rejected": -47.40833282470703, "step": 18370 }, { "epoch": 80.26200873362446, "grad_norm": 2.2064391994104897e-07, "learning_rate": 5.70272126689601e-07, "logits/chosen": -1.573055624961853, "logits/rejected": -2.298557996749878, "logps/chosen": -504.06121826171875, "logps/rejected": -5188.73681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0683329105377197, "rewards/margins": 47.05071258544922, "rewards/rejected": -49.119056701660156, "step": 18380 }, { "epoch": 80.3056768558952, "grad_norm": 2.6104113839533423e-08, "learning_rate": 5.678516592418671e-07, "logits/chosen": -1.5338317155838013, "logits/rejected": -2.3185935020446777, "logps/chosen": -478.8980407714844, "logps/rejected": -5120.36474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.081113338470459, "rewards/margins": 46.433387756347656, "rewards/rejected": -48.51449966430664, "step": 18390 }, { "epoch": 80.34934497816595, "grad_norm": 2.7003460996215227e-08, "learning_rate": 5.654356811511494e-07, "logits/chosen": -1.5721081495285034, "logits/rejected": -2.260341167449951, "logps/chosen": -465.873046875, "logps/rejected": -5385.8330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.018019199371338, "rewards/margins": 49.00542449951172, "rewards/rejected": -51.0234489440918, "step": 18400 }, { "epoch": 80.39301310043668, "grad_norm": 4.862616189903018e-08, "learning_rate": 5.63024198030987e-07, "logits/chosen": -1.5382086038589478, "logits/rejected": -2.2810542583465576, "logps/chosen": -504.3267517089844, "logps/rejected": -5135.93896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1081345081329346, "rewards/margins": 46.513954162597656, "rewards/rejected": -48.62208938598633, "step": 18410 }, { "epoch": 80.43668122270742, "grad_norm": 6.025878520094676e-08, "learning_rate": 5.606172154844722e-07, "logits/chosen": -1.5634090900421143, "logits/rejected": -2.2946953773498535, "logps/chosen": -489.49334716796875, "logps/rejected": -5395.3427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.102436065673828, "rewards/margins": 48.88236618041992, "rewards/rejected": -50.98480987548828, "step": 18420 }, { "epoch": 80.48034934497817, "grad_norm": 2.8031937446101362e-08, "learning_rate": 5.582147391042433e-07, "logits/chosen": -1.5429985523223877, "logits/rejected": -2.194932460784912, "logps/chosen": -493.970947265625, "logps/rejected": -4740.49267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1617431640625, "rewards/margins": 42.57118225097656, "rewards/rejected": -44.73292541503906, "step": 18430 }, { "epoch": 80.52401746724891, "grad_norm": 3.865539259594939e-06, "learning_rate": 5.558167744724666e-07, "logits/chosen": -1.6036243438720703, "logits/rejected": -2.407362461090088, "logps/chosen": -490.7032165527344, "logps/rejected": -5560.71875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1991260051727295, "rewards/margins": 50.42707443237305, "rewards/rejected": -52.626197814941406, "step": 18440 }, { "epoch": 80.56768558951966, "grad_norm": 2.3405225331133226e-06, "learning_rate": 5.534233271608239e-07, "logits/chosen": -1.5298309326171875, "logits/rejected": -2.3146159648895264, "logps/chosen": -486.67822265625, "logps/rejected": -5386.77880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0711116790771484, "rewards/margins": 48.96936798095703, "rewards/rejected": -51.04048156738281, "step": 18450 }, { "epoch": 80.61135371179039, "grad_norm": 4.306225987841997e-08, "learning_rate": 5.51034402730506e-07, "logits/chosen": -1.530177354812622, "logits/rejected": -2.2456047534942627, "logps/chosen": -482.64996337890625, "logps/rejected": -5446.39111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0249533653259277, "rewards/margins": 49.53765106201172, "rewards/rejected": -51.56260299682617, "step": 18460 }, { "epoch": 80.65502183406113, "grad_norm": 1.0762124498066715e-05, "learning_rate": 5.486500067321898e-07, "logits/chosen": -1.5740141868591309, "logits/rejected": -2.299592971801758, "logps/chosen": -480.8076171875, "logps/rejected": -5266.42578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1739048957824707, "rewards/margins": 47.643951416015625, "rewards/rejected": -49.8178596496582, "step": 18470 }, { "epoch": 80.69868995633188, "grad_norm": 2.0317001143476886e-06, "learning_rate": 5.46270144706032e-07, "logits/chosen": -1.608454704284668, "logits/rejected": -2.4187002182006836, "logps/chosen": -484.75115966796875, "logps/rejected": -5715.400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.156719207763672, "rewards/margins": 51.9863395690918, "rewards/rejected": -54.14305877685547, "step": 18480 }, { "epoch": 80.74235807860262, "grad_norm": 9.081459435381968e-08, "learning_rate": 5.438948221816559e-07, "logits/chosen": -1.5383667945861816, "logits/rejected": -2.294163227081299, "logps/chosen": -497.6513671875, "logps/rejected": -5209.0791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1218512058258057, "rewards/margins": 47.134056091308594, "rewards/rejected": -49.25590515136719, "step": 18490 }, { "epoch": 80.78602620087337, "grad_norm": 3.446456504253603e-07, "learning_rate": 5.415240446781348e-07, "logits/chosen": -1.6134674549102783, "logits/rejected": -2.364523410797119, "logps/chosen": -514.8486328125, "logps/rejected": -5927.2392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1344411373138428, "rewards/margins": 53.85693359375, "rewards/rejected": -55.991363525390625, "step": 18500 }, { "epoch": 80.82969432314411, "grad_norm": 2.317792096412497e-08, "learning_rate": 5.391578177039833e-07, "logits/chosen": -1.4925636053085327, "logits/rejected": -2.1967225074768066, "logps/chosen": -523.7938232421875, "logps/rejected": -4780.2744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1347763538360596, "rewards/margins": 43.082454681396484, "rewards/rejected": -45.21723175048828, "step": 18510 }, { "epoch": 80.87336244541484, "grad_norm": 7.68112268706864e-06, "learning_rate": 5.367961467571437e-07, "logits/chosen": -1.5803630352020264, "logits/rejected": -2.310115098953247, "logps/chosen": -476.42120361328125, "logps/rejected": -5220.63037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.156223773956299, "rewards/margins": 47.226341247558594, "rewards/rejected": -49.382564544677734, "step": 18520 }, { "epoch": 80.91703056768559, "grad_norm": 3.7840433981909384e-06, "learning_rate": 5.344390373249698e-07, "logits/chosen": -1.5674893856048584, "logits/rejected": -2.3790740966796875, "logps/chosen": -491.81512451171875, "logps/rejected": -5607.7763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.102224349975586, "rewards/margins": 51.02739715576172, "rewards/rejected": -53.1296272277832, "step": 18530 }, { "epoch": 80.96069868995633, "grad_norm": 1.4624837801411701e-08, "learning_rate": 5.320864948842169e-07, "logits/chosen": -1.5352723598480225, "logits/rejected": -2.203213930130005, "logps/chosen": -506.213134765625, "logps/rejected": -4844.7841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.124800205230713, "rewards/margins": 43.633697509765625, "rewards/rejected": -45.75850296020508, "step": 18540 }, { "epoch": 81.00436681222708, "grad_norm": 1.7306106709970328e-08, "learning_rate": 5.297385249010329e-07, "logits/chosen": -1.563829779624939, "logits/rejected": -2.243943929672241, "logps/chosen": -516.3101806640625, "logps/rejected": -5127.5634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1561408042907715, "rewards/margins": 46.18073654174805, "rewards/rejected": -48.336883544921875, "step": 18550 }, { "epoch": 81.04803493449782, "grad_norm": 1.802757842477938e-08, "learning_rate": 5.27395132830937e-07, "logits/chosen": -1.5025827884674072, "logits/rejected": -2.1732747554779053, "logps/chosen": -494.1034240722656, "logps/rejected": -4619.169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.115730047225952, "rewards/margins": 41.58292007446289, "rewards/rejected": -43.69865036010742, "step": 18560 }, { "epoch": 81.09170305676857, "grad_norm": 2.0059147941911842e-08, "learning_rate": 5.250563241188125e-07, "logits/chosen": -1.595794677734375, "logits/rejected": -2.3683552742004395, "logps/chosen": -464.2557067871094, "logps/rejected": -5360.20166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0820000171661377, "rewards/margins": 48.687416076660156, "rewards/rejected": -50.76941680908203, "step": 18570 }, { "epoch": 81.1353711790393, "grad_norm": 3.423848476352617e-07, "learning_rate": 5.227221041988955e-07, "logits/chosen": -1.5182018280029297, "logits/rejected": -2.1802358627319336, "logps/chosen": -512.763671875, "logps/rejected": -4706.083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.124315023422241, "rewards/margins": 42.278114318847656, "rewards/rejected": -44.402427673339844, "step": 18580 }, { "epoch": 81.17903930131004, "grad_norm": 1.0110868115205351e-07, "learning_rate": 5.203924784947573e-07, "logits/chosen": -1.538079023361206, "logits/rejected": -2.2404446601867676, "logps/chosen": -506.8516540527344, "logps/rejected": -4966.8720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1368446350097656, "rewards/margins": 44.76094055175781, "rewards/rejected": -46.89778137207031, "step": 18590 }, { "epoch": 81.22270742358079, "grad_norm": 2.441565235708139e-08, "learning_rate": 5.180674524192958e-07, "logits/chosen": -1.5486811399459839, "logits/rejected": -2.199934720993042, "logps/chosen": -494.77581787109375, "logps/rejected": -4889.09814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.130126476287842, "rewards/margins": 44.093475341796875, "rewards/rejected": -46.22360610961914, "step": 18600 }, { "epoch": 81.26637554585153, "grad_norm": 1.2307884869493473e-07, "learning_rate": 5.157470313747226e-07, "logits/chosen": -1.5349228382110596, "logits/rejected": -2.2907567024230957, "logps/chosen": -494.1238708496094, "logps/rejected": -5282.880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0821800231933594, "rewards/margins": 47.9254035949707, "rewards/rejected": -50.00758361816406, "step": 18610 }, { "epoch": 81.31004366812228, "grad_norm": 5.670781175960183e-07, "learning_rate": 5.134312207525472e-07, "logits/chosen": -1.5746269226074219, "logits/rejected": -2.361664295196533, "logps/chosen": -472.59210205078125, "logps/rejected": -5671.5966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1172268390655518, "rewards/margins": 51.564369201660156, "rewards/rejected": -53.68159103393555, "step": 18620 }, { "epoch": 81.353711790393, "grad_norm": 4.507894103129941e-08, "learning_rate": 5.111200259335689e-07, "logits/chosen": -1.6018314361572266, "logits/rejected": -2.25754976272583, "logps/chosen": -487.5419921875, "logps/rejected": -5087.93212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1003329753875732, "rewards/margins": 45.9892578125, "rewards/rejected": -48.08959197998047, "step": 18630 }, { "epoch": 81.39737991266375, "grad_norm": 4.3848085454978964e-08, "learning_rate": 5.088134522878601e-07, "logits/chosen": -1.5505603551864624, "logits/rejected": -2.221550226211548, "logps/chosen": -475.7909240722656, "logps/rejected": -4867.3095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.130077362060547, "rewards/margins": 43.963462829589844, "rewards/rejected": -46.093544006347656, "step": 18640 }, { "epoch": 81.4410480349345, "grad_norm": 3.5385017301482635e-08, "learning_rate": 5.065115051747587e-07, "logits/chosen": -1.5999926328659058, "logits/rejected": -2.3202826976776123, "logps/chosen": -498.62860107421875, "logps/rejected": -5398.1591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1117749214172363, "rewards/margins": 48.91621017456055, "rewards/rejected": -51.02798080444336, "step": 18650 }, { "epoch": 81.48471615720524, "grad_norm": 3.4786127183388617e-06, "learning_rate": 5.042141899428493e-07, "logits/chosen": -1.57149338722229, "logits/rejected": -2.216576337814331, "logps/chosen": -469.44488525390625, "logps/rejected": -5178.12890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2068259716033936, "rewards/margins": 46.779014587402344, "rewards/rejected": -48.98583221435547, "step": 18660 }, { "epoch": 81.52838427947599, "grad_norm": 1.8966587507943459e-06, "learning_rate": 5.019215119299578e-07, "logits/chosen": -1.5485076904296875, "logits/rejected": -2.2408173084259033, "logps/chosen": -498.66180419921875, "logps/rejected": -4905.2177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.296992778778076, "rewards/margins": 44.049293518066406, "rewards/rejected": -46.346282958984375, "step": 18670 }, { "epoch": 81.57205240174673, "grad_norm": 6.640889907558e-08, "learning_rate": 4.996334764631322e-07, "logits/chosen": -1.64052414894104, "logits/rejected": -2.3758559226989746, "logps/chosen": -476.7099609375, "logps/rejected": -5962.7177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0339882373809814, "rewards/margins": 54.3066520690918, "rewards/rejected": -56.34063720703125, "step": 18680 }, { "epoch": 81.61572052401746, "grad_norm": 1.3554588787328636e-06, "learning_rate": 4.973500888586363e-07, "logits/chosen": -1.5862382650375366, "logits/rejected": -2.332458019256592, "logps/chosen": -458.78155517578125, "logps/rejected": -5632.927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0560081005096436, "rewards/margins": 51.2911491394043, "rewards/rejected": -53.3471565246582, "step": 18690 }, { "epoch": 81.6593886462882, "grad_norm": 1.0424932365677467e-06, "learning_rate": 4.950713544219338e-07, "logits/chosen": -1.5961616039276123, "logits/rejected": -2.3324177265167236, "logps/chosen": -456.3353576660156, "logps/rejected": -5602.77880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.084015369415283, "rewards/margins": 50.975914001464844, "rewards/rejected": -53.0599365234375, "step": 18700 }, { "epoch": 81.70305676855895, "grad_norm": 4.412630158225045e-06, "learning_rate": 4.927972784476747e-07, "logits/chosen": -1.5194932222366333, "logits/rejected": -2.2061057090759277, "logps/chosen": -507.22589111328125, "logps/rejected": -5022.8759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.05723237991333, "rewards/margins": 45.40882873535156, "rewards/rejected": -47.46605682373047, "step": 18710 }, { "epoch": 81.7467248908297, "grad_norm": 7.943903326283236e-07, "learning_rate": 4.905278662196886e-07, "logits/chosen": -1.5360219478607178, "logits/rejected": -2.180177688598633, "logps/chosen": -476.1107482910156, "logps/rejected": -5080.3779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.044579029083252, "rewards/margins": 45.97101593017578, "rewards/rejected": -48.01559066772461, "step": 18720 }, { "epoch": 81.79039301310044, "grad_norm": 3.245727425969085e-05, "learning_rate": 4.882631230109655e-07, "logits/chosen": -1.5953346490859985, "logits/rejected": -2.2688755989074707, "logps/chosen": -475.198974609375, "logps/rejected": -5689.49560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0291309356689453, "rewards/margins": 51.76732635498047, "rewards/rejected": -53.79645919799805, "step": 18730 }, { "epoch": 81.83406113537117, "grad_norm": 2.0340609176044093e-07, "learning_rate": 4.860030540836494e-07, "logits/chosen": -1.5576536655426025, "logits/rejected": -2.297942638397217, "logps/chosen": -492.4039611816406, "logps/rejected": -5085.6650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.126817226409912, "rewards/margins": 45.97492599487305, "rewards/rejected": -48.10174560546875, "step": 18740 }, { "epoch": 81.87772925764192, "grad_norm": 3.888996792072667e-08, "learning_rate": 4.837476646890215e-07, "logits/chosen": -1.5745655298233032, "logits/rejected": -2.2390363216400146, "logps/chosen": -484.2349548339844, "logps/rejected": -5161.275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0967235565185547, "rewards/margins": 46.69506072998047, "rewards/rejected": -48.791786193847656, "step": 18750 }, { "epoch": 81.92139737991266, "grad_norm": 5.14988380767662e-08, "learning_rate": 4.814969600674926e-07, "logits/chosen": -1.5293748378753662, "logits/rejected": -2.2273781299591064, "logps/chosen": -488.80804443359375, "logps/rejected": -5051.52978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.119969606399536, "rewards/margins": 45.614891052246094, "rewards/rejected": -47.73486328125, "step": 18760 }, { "epoch": 81.9650655021834, "grad_norm": 4.6172284239620926e-07, "learning_rate": 4.792509454485852e-07, "logits/chosen": -1.4830107688903809, "logits/rejected": -2.2317326068878174, "logps/chosen": -510.9253845214844, "logps/rejected": -4991.95849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.146266222000122, "rewards/margins": 45.10388946533203, "rewards/rejected": -47.25014877319336, "step": 18770 }, { "epoch": 82.00873362445415, "grad_norm": 1.5785180981460087e-07, "learning_rate": 4.770096260509275e-07, "logits/chosen": -1.5242445468902588, "logits/rejected": -2.2351789474487305, "logps/chosen": -491.97784423828125, "logps/rejected": -5021.61083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1122593879699707, "rewards/margins": 45.39934158325195, "rewards/rejected": -47.5115966796875, "step": 18780 }, { "epoch": 82.0524017467249, "grad_norm": 2.997724996239204e-07, "learning_rate": 4.7477300708223706e-07, "logits/chosen": -1.6010462045669556, "logits/rejected": -2.321256637573242, "logps/chosen": -495.34869384765625, "logps/rejected": -5325.3271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.063931941986084, "rewards/margins": 48.33562088012695, "rewards/rejected": -50.39955139160156, "step": 18790 }, { "epoch": 82.09606986899563, "grad_norm": 3.0801557940619945e-08, "learning_rate": 4.725410937393093e-07, "logits/chosen": -1.580620527267456, "logits/rejected": -2.257338047027588, "logps/chosen": -466.720703125, "logps/rejected": -5150.96337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2244839668273926, "rewards/margins": 46.43663787841797, "rewards/rejected": -48.6611213684082, "step": 18800 }, { "epoch": 82.13973799126637, "grad_norm": 3.6150715165858045e-06, "learning_rate": 4.7031389120800796e-07, "logits/chosen": -1.6123911142349243, "logits/rejected": -2.377401828765869, "logps/chosen": -475.62628173828125, "logps/rejected": -5587.2841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0706233978271484, "rewards/margins": 50.85451126098633, "rewards/rejected": -52.925132751464844, "step": 18810 }, { "epoch": 82.18340611353712, "grad_norm": 1.789844318554292e-08, "learning_rate": 4.6809140466324926e-07, "logits/chosen": -1.6014522314071655, "logits/rejected": -2.407608985900879, "logps/chosen": -480.36712646484375, "logps/rejected": -5585.0380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1003715991973877, "rewards/margins": 50.803749084472656, "rewards/rejected": -52.90412521362305, "step": 18820 }, { "epoch": 82.22707423580786, "grad_norm": 4.736937946249408e-06, "learning_rate": 4.658736392689922e-07, "logits/chosen": -1.5385956764221191, "logits/rejected": -2.1919491291046143, "logps/chosen": -507.65557861328125, "logps/rejected": -4818.8818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.391148090362549, "rewards/margins": 43.166324615478516, "rewards/rejected": -45.557472229003906, "step": 18830 }, { "epoch": 82.2707423580786, "grad_norm": 4.269602631502462e-06, "learning_rate": 4.636606001782271e-07, "logits/chosen": -1.5839356184005737, "logits/rejected": -2.345244884490967, "logps/chosen": -478.3262634277344, "logps/rejected": -5516.20947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.291332960128784, "rewards/margins": 49.886268615722656, "rewards/rejected": -52.1776008605957, "step": 18840 }, { "epoch": 82.31441048034935, "grad_norm": 1.6110318601680353e-06, "learning_rate": 4.614522925329626e-07, "logits/chosen": -1.5934925079345703, "logits/rejected": -2.30959153175354, "logps/chosen": -463.9148864746094, "logps/rejected": -5757.5263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.096113443374634, "rewards/margins": 52.36482620239258, "rewards/rejected": -54.46092987060547, "step": 18850 }, { "epoch": 82.35807860262008, "grad_norm": 2.866911944612185e-07, "learning_rate": 4.5924872146421244e-07, "logits/chosen": -1.523824691772461, "logits/rejected": -2.1545796394348145, "logps/chosen": -509.383544921875, "logps/rejected": -4986.8603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.237231492996216, "rewards/margins": 44.948673248291016, "rewards/rejected": -47.18589782714844, "step": 18860 }, { "epoch": 82.40174672489083, "grad_norm": 2.0120106856601682e-06, "learning_rate": 4.570498920919858e-07, "logits/chosen": -1.5515865087509155, "logits/rejected": -2.2407658100128174, "logps/chosen": -486.46875, "logps/rejected": -5339.42236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.173452377319336, "rewards/margins": 48.35625076293945, "rewards/rejected": -50.529701232910156, "step": 18870 }, { "epoch": 82.44541484716157, "grad_norm": 2.249182987138952e-06, "learning_rate": 4.5485580952527586e-07, "logits/chosen": -1.5259455442428589, "logits/rejected": -2.2413296699523926, "logps/chosen": -496.6222229003906, "logps/rejected": -4796.42138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.070608139038086, "rewards/margins": 43.30272674560547, "rewards/rejected": -45.37334060668945, "step": 18880 }, { "epoch": 82.48908296943232, "grad_norm": 2.1661618687849532e-08, "learning_rate": 4.5266647886204356e-07, "logits/chosen": -1.5521008968353271, "logits/rejected": -2.241018295288086, "logps/chosen": -500.5743713378906, "logps/rejected": -5058.4169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.129383087158203, "rewards/margins": 45.7262077331543, "rewards/rejected": -47.8555908203125, "step": 18890 }, { "epoch": 82.53275109170306, "grad_norm": 8.642055198005102e-07, "learning_rate": 4.504819051892118e-07, "logits/chosen": -1.5446417331695557, "logits/rejected": -2.2650516033172607, "logps/chosen": -489.1493225097656, "logps/rejected": -5241.66796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0877928733825684, "rewards/margins": 47.46232223510742, "rewards/rejected": -49.550113677978516, "step": 18900 }, { "epoch": 82.5764192139738, "grad_norm": 4.121714914610879e-07, "learning_rate": 4.483020935826485e-07, "logits/chosen": -1.497913122177124, "logits/rejected": -2.2031948566436768, "logps/chosen": -515.8338623046875, "logps/rejected": -4818.86962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.103304386138916, "rewards/margins": 43.47686004638672, "rewards/rejected": -45.580169677734375, "step": 18910 }, { "epoch": 82.62008733624454, "grad_norm": 4.0720654486280755e-08, "learning_rate": 4.461270491071562e-07, "logits/chosen": -1.584851622581482, "logits/rejected": -2.352008819580078, "logps/chosen": -481.04534912109375, "logps/rejected": -5472.0732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1930880546569824, "rewards/margins": 49.62505340576172, "rewards/rejected": -51.818138122558594, "step": 18920 }, { "epoch": 82.66375545851528, "grad_norm": 4.692478678476169e-06, "learning_rate": 4.4395677681646513e-07, "logits/chosen": -1.5494201183319092, "logits/rejected": -2.333789348602295, "logps/chosen": -529.8005981445312, "logps/rejected": -5524.9208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2985947132110596, "rewards/margins": 50.060150146484375, "rewards/rejected": -52.358741760253906, "step": 18930 }, { "epoch": 82.70742358078603, "grad_norm": 3.3090007752623707e-08, "learning_rate": 4.417912817532133e-07, "logits/chosen": -1.5990984439849854, "logits/rejected": -2.3388173580169678, "logps/chosen": -475.70599365234375, "logps/rejected": -5460.1689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0431976318359375, "rewards/margins": 49.695167541503906, "rewards/rejected": -51.738365173339844, "step": 18940 }, { "epoch": 82.75109170305677, "grad_norm": 6.512380469088006e-06, "learning_rate": 4.396305689489394e-07, "logits/chosen": -1.612099289894104, "logits/rejected": -2.303638458251953, "logps/chosen": -490.9554138183594, "logps/rejected": -5393.841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.066244602203369, "rewards/margins": 48.839683532714844, "rewards/rejected": -50.905921936035156, "step": 18950 }, { "epoch": 82.79475982532752, "grad_norm": 1.0094987713685342e-07, "learning_rate": 4.374746434240723e-07, "logits/chosen": -1.555336833000183, "logits/rejected": -2.326612949371338, "logps/chosen": -484.94189453125, "logps/rejected": -5308.9462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2023260593414307, "rewards/margins": 48.04278564453125, "rewards/rejected": -50.245113372802734, "step": 18960 }, { "epoch": 82.83842794759825, "grad_norm": 1.5046500907442588e-08, "learning_rate": 4.353235101879158e-07, "logits/chosen": -1.5174442529678345, "logits/rejected": -2.1504080295562744, "logps/chosen": -507.5682678222656, "logps/rejected": -4710.59619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0413217544555664, "rewards/margins": 42.52840042114258, "rewards/rejected": -44.569725036621094, "step": 18970 }, { "epoch": 82.882096069869, "grad_norm": 1.3372619851347677e-07, "learning_rate": 4.3317717423863955e-07, "logits/chosen": -1.4926555156707764, "logits/rejected": -2.1306891441345215, "logps/chosen": -524.8743286132812, "logps/rejected": -4601.4345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2455313205718994, "rewards/margins": 41.208152770996094, "rewards/rejected": -43.45368194580078, "step": 18980 }, { "epoch": 82.92576419213974, "grad_norm": 7.174020205777466e-08, "learning_rate": 4.3103564056326737e-07, "logits/chosen": -1.5459754467010498, "logits/rejected": -2.266282320022583, "logps/chosen": -488.1231384277344, "logps/rejected": -5213.71533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.186633586883545, "rewards/margins": 47.21937561035156, "rewards/rejected": -49.40601348876953, "step": 18990 }, { "epoch": 82.96943231441048, "grad_norm": 5.171865677576711e-08, "learning_rate": 4.288989141376637e-07, "logits/chosen": -1.5823898315429688, "logits/rejected": -2.3754148483276367, "logps/chosen": -466.28778076171875, "logps/rejected": -5690.953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1079068183898926, "rewards/margins": 51.822967529296875, "rewards/rejected": -53.930877685546875, "step": 19000 }, { "epoch": 83.01310043668123, "grad_norm": 1.0952549511666392e-06, "learning_rate": 4.2676699992652256e-07, "logits/chosen": -1.5911957025527954, "logits/rejected": -2.292746067047119, "logps/chosen": -480.84197998046875, "logps/rejected": -5483.5009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0517449378967285, "rewards/margins": 49.74610900878906, "rewards/rejected": -51.7978515625, "step": 19010 }, { "epoch": 83.05676855895196, "grad_norm": 5.138648361999346e-08, "learning_rate": 4.246399028833603e-07, "logits/chosen": -1.5462353229522705, "logits/rejected": -2.2732434272766113, "logps/chosen": -502.5686950683594, "logps/rejected": -5114.2529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1379623413085938, "rewards/margins": 46.283355712890625, "rewards/rejected": -48.421321868896484, "step": 19020 }, { "epoch": 83.1004366812227, "grad_norm": 7.186771512070478e-08, "learning_rate": 4.225176279504975e-07, "logits/chosen": -1.577935814857483, "logits/rejected": -2.2726187705993652, "logps/chosen": -480.1861267089844, "logps/rejected": -5150.29443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.266909122467041, "rewards/margins": 46.482322692871094, "rewards/rejected": -48.74922561645508, "step": 19030 }, { "epoch": 83.14410480349345, "grad_norm": 6.287238520141943e-09, "learning_rate": 4.204001800590504e-07, "logits/chosen": -1.6012601852416992, "logits/rejected": -2.367095947265625, "logps/chosen": -467.31982421875, "logps/rejected": -5946.65869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0367372035980225, "rewards/margins": 54.24263381958008, "rewards/rejected": -56.27937698364258, "step": 19040 }, { "epoch": 83.1877729257642, "grad_norm": 1.2858655230084218e-05, "learning_rate": 4.182875641289219e-07, "logits/chosen": -1.576353907585144, "logits/rejected": -2.272916555404663, "logps/chosen": -494.37493896484375, "logps/rejected": -5068.06396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1817424297332764, "rewards/margins": 45.722633361816406, "rewards/rejected": -47.90437698364258, "step": 19050 }, { "epoch": 83.23144104803494, "grad_norm": 1.309378628793573e-07, "learning_rate": 4.1617978506878514e-07, "logits/chosen": -1.5636765956878662, "logits/rejected": -2.231480360031128, "logps/chosen": -499.0029296875, "logps/rejected": -4924.7041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0922133922576904, "rewards/margins": 44.498443603515625, "rewards/rejected": -46.59065628051758, "step": 19060 }, { "epoch": 83.27510917030568, "grad_norm": 1.121504857256944e-07, "learning_rate": 4.1407684777607674e-07, "logits/chosen": -1.5843716859817505, "logits/rejected": -2.2513649463653564, "logps/chosen": -502.2549743652344, "logps/rejected": -5375.20458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2450945377349854, "rewards/margins": 48.53095626831055, "rewards/rejected": -50.77605056762695, "step": 19070 }, { "epoch": 83.31877729257641, "grad_norm": 1.7931282430463787e-05, "learning_rate": 4.119787571369829e-07, "logits/chosen": -1.5559346675872803, "logits/rejected": -2.2744193077087402, "logps/chosen": -505.3816833496094, "logps/rejected": -5288.4560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.268620014190674, "rewards/margins": 47.6877326965332, "rewards/rejected": -49.95635223388672, "step": 19080 }, { "epoch": 83.36244541484716, "grad_norm": 9.324599719348839e-08, "learning_rate": 4.0988551802642856e-07, "logits/chosen": -1.5436437129974365, "logits/rejected": -2.28792667388916, "logps/chosen": -488.3130798339844, "logps/rejected": -5525.72802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1154181957244873, "rewards/margins": 50.17821502685547, "rewards/rejected": -52.29363250732422, "step": 19090 }, { "epoch": 83.4061135371179, "grad_norm": 3.226097005000989e-08, "learning_rate": 4.0779713530806506e-07, "logits/chosen": -1.5304944515228271, "logits/rejected": -2.29361629486084, "logps/chosen": -514.78173828125, "logps/rejected": -5018.3623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2988009452819824, "rewards/margins": 45.144248962402344, "rewards/rejected": -47.44304656982422, "step": 19100 }, { "epoch": 83.44978165938865, "grad_norm": 3.0906585388161896e-06, "learning_rate": 4.0571361383426125e-07, "logits/chosen": -1.5614368915557861, "logits/rejected": -2.2677483558654785, "logps/chosen": -472.5225524902344, "logps/rejected": -5106.1572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.112255811691284, "rewards/margins": 46.19322204589844, "rewards/rejected": -48.30547332763672, "step": 19110 }, { "epoch": 83.4934497816594, "grad_norm": 2.2593594094714815e-07, "learning_rate": 4.0363495844609134e-07, "logits/chosen": -1.5689438581466675, "logits/rejected": -2.3159613609313965, "logps/chosen": -491.56134033203125, "logps/rejected": -5128.533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.243269681930542, "rewards/margins": 46.29480743408203, "rewards/rejected": -48.5380744934082, "step": 19120 }, { "epoch": 83.53711790393012, "grad_norm": 7.035417637214021e-06, "learning_rate": 4.0156117397332077e-07, "logits/chosen": -1.5820597410202026, "logits/rejected": -2.265023708343506, "logps/chosen": -487.0723571777344, "logps/rejected": -5469.9033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.161221742630005, "rewards/margins": 49.55350875854492, "rewards/rejected": -51.7147331237793, "step": 19130 }, { "epoch": 83.58078602620087, "grad_norm": 7.03305662907292e-08, "learning_rate": 3.994922652344005e-07, "logits/chosen": -1.6056209802627563, "logits/rejected": -2.313842535018921, "logps/chosen": -482.5357360839844, "logps/rejected": -5725.74462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1142430305480957, "rewards/margins": 51.89069366455078, "rewards/rejected": -54.00493240356445, "step": 19140 }, { "epoch": 83.62445414847161, "grad_norm": 3.682131715123898e-07, "learning_rate": 3.974282370364499e-07, "logits/chosen": -1.4865977764129639, "logits/rejected": -2.183180570602417, "logps/chosen": -500.1593322753906, "logps/rejected": -4554.8515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.144853115081787, "rewards/margins": 40.884925842285156, "rewards/rejected": -43.02977752685547, "step": 19150 }, { "epoch": 83.66812227074236, "grad_norm": 4.827734954158073e-06, "learning_rate": 3.9536909417524886e-07, "logits/chosen": -1.4792354106903076, "logits/rejected": -2.1788721084594727, "logps/chosen": -510.38690185546875, "logps/rejected": -4533.705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.114609956741333, "rewards/margins": 40.69552993774414, "rewards/rejected": -42.81013488769531, "step": 19160 }, { "epoch": 83.7117903930131, "grad_norm": 6.296135140575833e-08, "learning_rate": 3.9331484143522893e-07, "logits/chosen": -1.5908949375152588, "logits/rejected": -2.3490185737609863, "logps/chosen": -458.65045166015625, "logps/rejected": -5409.4072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0668258666992188, "rewards/margins": 49.05574035644531, "rewards/rejected": -51.1225700378418, "step": 19170 }, { "epoch": 83.75545851528385, "grad_norm": 6.16184700456954e-08, "learning_rate": 3.912654835894564e-07, "logits/chosen": -1.5169603824615479, "logits/rejected": -2.2194011211395264, "logps/chosen": -527.4072265625, "logps/rejected": -4831.03515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.136065721511841, "rewards/margins": 43.447715759277344, "rewards/rejected": -45.58379364013672, "step": 19180 }, { "epoch": 83.79912663755458, "grad_norm": 1.1605384176088611e-06, "learning_rate": 3.8922102539962427e-07, "logits/chosen": -1.5358597040176392, "logits/rejected": -2.1630120277404785, "logps/chosen": -520.7862548828125, "logps/rejected": -4645.13623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1855309009552, "rewards/margins": 41.6270637512207, "rewards/rejected": -43.812599182128906, "step": 19190 }, { "epoch": 83.84279475982532, "grad_norm": 1.823484538747479e-07, "learning_rate": 3.87181471616043e-07, "logits/chosen": -1.572108507156372, "logits/rejected": -2.3298707008361816, "logps/chosen": -477.72442626953125, "logps/rejected": -5171.65625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0466465950012207, "rewards/margins": 46.93382263183594, "rewards/rejected": -48.98046875, "step": 19200 }, { "epoch": 83.88646288209607, "grad_norm": 3.96904307833122e-06, "learning_rate": 3.8514682697762706e-07, "logits/chosen": -1.5859041213989258, "logits/rejected": -2.2905640602111816, "logps/chosen": -459.92755126953125, "logps/rejected": -5245.72900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1303000450134277, "rewards/margins": 47.493064880371094, "rewards/rejected": -49.62337112426758, "step": 19210 }, { "epoch": 83.93013100436681, "grad_norm": 5.7482953666952996e-08, "learning_rate": 3.8311709621188297e-07, "logits/chosen": -1.5585719347000122, "logits/rejected": -2.2985308170318604, "logps/chosen": -494.5392150878906, "logps/rejected": -5088.5390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1388769149780273, "rewards/margins": 46.08189010620117, "rewards/rejected": -48.22077178955078, "step": 19220 }, { "epoch": 83.97379912663756, "grad_norm": 3.800633072881829e-05, "learning_rate": 3.810922840349027e-07, "logits/chosen": -1.4849426746368408, "logits/rejected": -2.2067105770111084, "logps/chosen": -527.8108520507812, "logps/rejected": -4748.3740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.045558214187622, "rewards/margins": 42.92285919189453, "rewards/rejected": -44.96841049194336, "step": 19230 }, { "epoch": 84.0174672489083, "grad_norm": 1.543254150900505e-08, "learning_rate": 3.7907239515134697e-07, "logits/chosen": -1.5836178064346313, "logits/rejected": -2.263866424560547, "logps/chosen": -482.0669860839844, "logps/rejected": -5094.6943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.094357967376709, "rewards/margins": 46.06950759887695, "rewards/rejected": -48.16386413574219, "step": 19240 }, { "epoch": 84.06113537117903, "grad_norm": 9.604841566719458e-09, "learning_rate": 3.7705743425443755e-07, "logits/chosen": -1.5439178943634033, "logits/rejected": -2.311354160308838, "logps/chosen": -500.1302795410156, "logps/rejected": -5301.9501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1007630825042725, "rewards/margins": 48.12688064575195, "rewards/rejected": -50.2276496887207, "step": 19250 }, { "epoch": 84.10480349344978, "grad_norm": 3.9277089646134527e-08, "learning_rate": 3.750474060259493e-07, "logits/chosen": -1.5651706457138062, "logits/rejected": -2.2286839485168457, "logps/chosen": -490.82452392578125, "logps/rejected": -5176.6123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.081531047821045, "rewards/margins": 46.88728713989258, "rewards/rejected": -48.96881866455078, "step": 19260 }, { "epoch": 84.14847161572052, "grad_norm": 1.7922679111512365e-05, "learning_rate": 3.7304231513619225e-07, "logits/chosen": -1.5778967142105103, "logits/rejected": -2.238898515701294, "logps/chosen": -506.08087158203125, "logps/rejected": -4892.7109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2238376140594482, "rewards/margins": 44.03321075439453, "rewards/rejected": -46.257041931152344, "step": 19270 }, { "epoch": 84.19213973799127, "grad_norm": 2.894862131498324e-08, "learning_rate": 3.7104216624400503e-07, "logits/chosen": -1.5744071006774902, "logits/rejected": -2.287381887435913, "logps/chosen": -480.57244873046875, "logps/rejected": -5428.69189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1153783798217773, "rewards/margins": 49.3353157043457, "rewards/rejected": -51.4506950378418, "step": 19280 }, { "epoch": 84.23580786026201, "grad_norm": 2.9422089979381274e-06, "learning_rate": 3.6904696399674616e-07, "logits/chosen": -1.5403966903686523, "logits/rejected": -2.244507312774658, "logps/chosen": -499.4248962402344, "logps/rejected": -5032.12353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.198294162750244, "rewards/margins": 45.34444046020508, "rewards/rejected": -47.54273223876953, "step": 19290 }, { "epoch": 84.27947598253274, "grad_norm": 7.964743019455235e-08, "learning_rate": 3.6705671303027687e-07, "logits/chosen": -1.5295541286468506, "logits/rejected": -2.246896743774414, "logps/chosen": -480.85601806640625, "logps/rejected": -5002.2998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.18554949760437, "rewards/margins": 45.171180725097656, "rewards/rejected": -47.35673141479492, "step": 19300 }, { "epoch": 84.32314410480349, "grad_norm": 3.4639688148320026e-08, "learning_rate": 3.6507141796895686e-07, "logits/chosen": -1.5859673023223877, "logits/rejected": -2.2876157760620117, "logps/chosen": -499.9637756347656, "logps/rejected": -5374.52685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.094696521759033, "rewards/margins": 48.71870422363281, "rewards/rejected": -50.81340026855469, "step": 19310 }, { "epoch": 84.36681222707423, "grad_norm": 4.857483290383141e-08, "learning_rate": 3.6309108342563015e-07, "logits/chosen": -1.5970566272735596, "logits/rejected": -2.3383071422576904, "logps/chosen": -493.0108947753906, "logps/rejected": -5306.70458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1486692428588867, "rewards/margins": 48.0697021484375, "rewards/rejected": -50.21836853027344, "step": 19320 }, { "epoch": 84.41048034934498, "grad_norm": 4.8011218572246144e-08, "learning_rate": 3.6111571400161387e-07, "logits/chosen": -1.527093768119812, "logits/rejected": -2.2176501750946045, "logps/chosen": -510.78350830078125, "logps/rejected": -4703.2431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1862783432006836, "rewards/margins": 42.31322479248047, "rewards/rejected": -44.49950408935547, "step": 19330 }, { "epoch": 84.45414847161572, "grad_norm": 7.070721304055972e-06, "learning_rate": 3.591453142866899e-07, "logits/chosen": -1.557036280632019, "logits/rejected": -2.2213666439056396, "logps/chosen": -486.72686767578125, "logps/rejected": -4786.49560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0936338901519775, "rewards/margins": 43.199005126953125, "rewards/rejected": -45.292640686035156, "step": 19340 }, { "epoch": 84.49781659388647, "grad_norm": 7.262846666768947e-07, "learning_rate": 3.57179888859093e-07, "logits/chosen": -1.6121017932891846, "logits/rejected": -2.3079869747161865, "logps/chosen": -488.1998596191406, "logps/rejected": -5518.0693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1246695518493652, "rewards/margins": 50.0385627746582, "rewards/rejected": -52.16322708129883, "step": 19350 }, { "epoch": 84.5414847161572, "grad_norm": 6.818473660385695e-06, "learning_rate": 3.5521944228549903e-07, "logits/chosen": -1.552321434020996, "logits/rejected": -2.296072483062744, "logps/chosen": -477.5176696777344, "logps/rejected": -5239.09619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.080317735671997, "rewards/margins": 47.54497146606445, "rewards/rejected": -49.62528610229492, "step": 19360 }, { "epoch": 84.58515283842794, "grad_norm": 5.2820350015613284e-08, "learning_rate": 3.532639791210157e-07, "logits/chosen": -1.5530576705932617, "logits/rejected": -2.197892904281616, "logps/chosen": -492.68939208984375, "logps/rejected": -4720.2177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.16201114654541, "rewards/margins": 42.400325775146484, "rewards/rejected": -44.562339782714844, "step": 19370 }, { "epoch": 84.62882096069869, "grad_norm": 3.0031197477579575e-08, "learning_rate": 3.5131350390917334e-07, "logits/chosen": -1.6395683288574219, "logits/rejected": -2.355489730834961, "logps/chosen": -502.8497009277344, "logps/rejected": -5624.71875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1755192279815674, "rewards/margins": 50.89862823486328, "rewards/rejected": -53.07414627075195, "step": 19380 }, { "epoch": 84.67248908296943, "grad_norm": 2.1019443674777578e-07, "learning_rate": 3.493680211819103e-07, "logits/chosen": -1.5564067363739014, "logits/rejected": -2.218263626098633, "logps/chosen": -476.64141845703125, "logps/rejected": -5357.171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1198925971984863, "rewards/margins": 48.517513275146484, "rewards/rejected": -50.63740921020508, "step": 19390 }, { "epoch": 84.71615720524018, "grad_norm": 4.386085322535247e-08, "learning_rate": 3.474275354595666e-07, "logits/chosen": -1.5686836242675781, "logits/rejected": -2.253365993499756, "logps/chosen": -495.8636779785156, "logps/rejected": -5077.05224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.164057970046997, "rewards/margins": 45.8026237487793, "rewards/rejected": -47.96668243408203, "step": 19400 }, { "epoch": 84.75982532751091, "grad_norm": 4.1801742264988264e-07, "learning_rate": 3.454920512508719e-07, "logits/chosen": -1.6259832382202148, "logits/rejected": -2.483484983444214, "logps/chosen": -464.68585205078125, "logps/rejected": -5987.8564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.08894681930542, "rewards/margins": 54.59685134887695, "rewards/rejected": -56.68579864501953, "step": 19410 }, { "epoch": 84.80349344978166, "grad_norm": 7.943960222483625e-08, "learning_rate": 3.435615730529329e-07, "logits/chosen": -1.6228630542755127, "logits/rejected": -2.3208425045013428, "logps/chosen": -489.46923828125, "logps/rejected": -5787.439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1238956451416016, "rewards/margins": 52.60987091064453, "rewards/rejected": -54.7337646484375, "step": 19420 }, { "epoch": 84.8471615720524, "grad_norm": 9.805941972627444e-08, "learning_rate": 3.416361053512274e-07, "logits/chosen": -1.5525022745132446, "logits/rejected": -2.245651960372925, "logps/chosen": -485.76043701171875, "logps/rejected": -5064.39990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2241406440734863, "rewards/margins": 45.64731216430664, "rewards/rejected": -47.87145233154297, "step": 19430 }, { "epoch": 84.89082969432314, "grad_norm": 2.9383565062559734e-06, "learning_rate": 3.3971565261958854e-07, "logits/chosen": -1.5707292556762695, "logits/rejected": -2.327641248703003, "logps/chosen": -487.531982421875, "logps/rejected": -5191.3818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1749911308288574, "rewards/margins": 47.00001907348633, "rewards/rejected": -49.175010681152344, "step": 19440 }, { "epoch": 84.93449781659389, "grad_norm": 2.0379967841815545e-07, "learning_rate": 3.3780021932019986e-07, "logits/chosen": -1.589458703994751, "logits/rejected": -2.284543991088867, "logps/chosen": -495.2982482910156, "logps/rejected": -5411.1943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.230839967727661, "rewards/margins": 49.02727508544922, "rewards/rejected": -51.25811004638672, "step": 19450 }, { "epoch": 84.97816593886463, "grad_norm": 1.2606270457731703e-07, "learning_rate": 3.358898099035793e-07, "logits/chosen": -1.5350637435913086, "logits/rejected": -2.2437808513641357, "logps/chosen": -478.5104064941406, "logps/rejected": -5072.55078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.201319694519043, "rewards/margins": 45.84177017211914, "rewards/rejected": -48.0430908203125, "step": 19460 }, { "epoch": 85.02183406113537, "grad_norm": 3.4109470416023854e-06, "learning_rate": 3.33984428808575e-07, "logits/chosen": -1.5634770393371582, "logits/rejected": -2.2876460552215576, "logps/chosen": -492.05731201171875, "logps/rejected": -5474.9365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1280124187469482, "rewards/margins": 49.64388656616211, "rewards/rejected": -51.77190017700195, "step": 19470 }, { "epoch": 85.06550218340611, "grad_norm": 4.806858619723292e-08, "learning_rate": 3.3208408046234904e-07, "logits/chosen": -1.602598786354065, "logits/rejected": -2.3447155952453613, "logps/chosen": -487.4937438964844, "logps/rejected": -5834.85302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1049485206604004, "rewards/margins": 53.036460876464844, "rewards/rejected": -55.14141082763672, "step": 19480 }, { "epoch": 85.10917030567686, "grad_norm": 7.909480471545037e-09, "learning_rate": 3.30188769280371e-07, "logits/chosen": -1.5239986181259155, "logits/rejected": -2.243525505065918, "logps/chosen": -527.2245483398438, "logps/rejected": -5116.1962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2215161323547363, "rewards/margins": 46.20650863647461, "rewards/rejected": -48.42802429199219, "step": 19490 }, { "epoch": 85.1528384279476, "grad_norm": 1.580131089683636e-08, "learning_rate": 3.282984996664076e-07, "logits/chosen": -1.5999497175216675, "logits/rejected": -2.304098129272461, "logps/chosen": -497.6376953125, "logps/rejected": -5479.18994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.278522491455078, "rewards/margins": 49.43734359741211, "rewards/rejected": -51.71586990356445, "step": 19500 }, { "epoch": 85.19650655021834, "grad_norm": 1.4246164343327838e-08, "learning_rate": 3.264132760125091e-07, "logits/chosen": -1.5549557209014893, "logits/rejected": -2.2888083457946777, "logps/chosen": -523.0711059570312, "logps/rejected": -5164.16259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.204104423522949, "rewards/margins": 46.638427734375, "rewards/rejected": -48.842533111572266, "step": 19510 }, { "epoch": 85.24017467248909, "grad_norm": 1.002477363606799e-07, "learning_rate": 3.2453310269900397e-07, "logits/chosen": -1.5480397939682007, "logits/rejected": -2.318152904510498, "logps/chosen": -502.42681884765625, "logps/rejected": -5362.4609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2059643268585205, "rewards/margins": 48.53284454345703, "rewards/rejected": -50.738807678222656, "step": 19520 }, { "epoch": 85.28384279475982, "grad_norm": 5.207865314319757e-08, "learning_rate": 3.2265798409448377e-07, "logits/chosen": -1.5568745136260986, "logits/rejected": -2.3136935234069824, "logps/chosen": -491.8072204589844, "logps/rejected": -5453.36376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.023921489715576, "rewards/margins": 49.656883239746094, "rewards/rejected": -51.680809020996094, "step": 19530 }, { "epoch": 85.32751091703057, "grad_norm": 2.7969945209890393e-07, "learning_rate": 3.2078792455579765e-07, "logits/chosen": -1.5428476333618164, "logits/rejected": -2.274570941925049, "logps/chosen": -507.0179748535156, "logps/rejected": -4996.984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.096226215362549, "rewards/margins": 45.1682243347168, "rewards/rejected": -47.26444625854492, "step": 19540 }, { "epoch": 85.37117903930131, "grad_norm": 6.634730765305575e-08, "learning_rate": 3.189229284280382e-07, "logits/chosen": -1.5390331745147705, "logits/rejected": -2.247204065322876, "logps/chosen": -488.64215087890625, "logps/rejected": -5491.91455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1467461585998535, "rewards/margins": 49.73606491088867, "rewards/rejected": -51.8828125, "step": 19550 }, { "epoch": 85.41484716157206, "grad_norm": 3.0711718272801857e-08, "learning_rate": 3.170630000445346e-07, "logits/chosen": -1.5443092584609985, "logits/rejected": -2.2901129722595215, "logps/chosen": -489.04638671875, "logps/rejected": -5114.51904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.175940990447998, "rewards/margins": 46.16167068481445, "rewards/rejected": -48.337608337402344, "step": 19560 }, { "epoch": 85.4585152838428, "grad_norm": 2.0409970278481955e-07, "learning_rate": 3.152081437268398e-07, "logits/chosen": -1.572504997253418, "logits/rejected": -2.3631889820098877, "logps/chosen": -474.25213623046875, "logps/rejected": -5624.158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0639755725860596, "rewards/margins": 51.08721923828125, "rewards/rejected": -53.15119552612305, "step": 19570 }, { "epoch": 85.50218340611353, "grad_norm": 6.297767248192974e-07, "learning_rate": 3.1335836378472233e-07, "logits/chosen": -1.518080711364746, "logits/rejected": -2.2653961181640625, "logps/chosen": -476.650634765625, "logps/rejected": -4965.93408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.106555700302124, "rewards/margins": 44.845703125, "rewards/rejected": -46.95225524902344, "step": 19580 }, { "epoch": 85.54585152838428, "grad_norm": 3.838017198564293e-06, "learning_rate": 3.115136645161568e-07, "logits/chosen": -1.6104261875152588, "logits/rejected": -2.312394142150879, "logps/chosen": -476.70440673828125, "logps/rejected": -5393.1474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1440443992614746, "rewards/margins": 48.890464782714844, "rewards/rejected": -51.034507751464844, "step": 19590 }, { "epoch": 85.58951965065502, "grad_norm": 5.0182231951438986e-08, "learning_rate": 3.0967405020731033e-07, "logits/chosen": -1.5365079641342163, "logits/rejected": -2.2834620475769043, "logps/chosen": -503.73516845703125, "logps/rejected": -5018.7880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.172755241394043, "rewards/margins": 45.368751525878906, "rewards/rejected": -47.54151153564453, "step": 19600 }, { "epoch": 85.63318777292577, "grad_norm": 4.775409657637416e-08, "learning_rate": 3.0783952513253806e-07, "logits/chosen": -1.6219806671142578, "logits/rejected": -2.507572650909424, "logps/chosen": -443.21435546875, "logps/rejected": -5982.49853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.036068916320801, "rewards/margins": 54.62187957763672, "rewards/rejected": -56.657958984375, "step": 19610 }, { "epoch": 85.67685589519651, "grad_norm": 2.0503192610853134e-07, "learning_rate": 3.0601009355436834e-07, "logits/chosen": -1.5229629278182983, "logits/rejected": -2.185985803604126, "logps/chosen": -537.8176879882812, "logps/rejected": -4788.94921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2197694778442383, "rewards/margins": 43.03478240966797, "rewards/rejected": -45.254547119140625, "step": 19620 }, { "epoch": 85.72052401746726, "grad_norm": 1.0229876410984204e-07, "learning_rate": 3.0418575972349403e-07, "logits/chosen": -1.5462985038757324, "logits/rejected": -2.2641139030456543, "logps/chosen": -502.3267517089844, "logps/rejected": -5213.48583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1839280128479004, "rewards/margins": 47.16411590576172, "rewards/rejected": -49.348052978515625, "step": 19630 }, { "epoch": 85.76419213973799, "grad_norm": 7.867449121984006e-08, "learning_rate": 3.023665278787666e-07, "logits/chosen": -1.6077568531036377, "logits/rejected": -2.3561110496520996, "logps/chosen": -485.5042419433594, "logps/rejected": -5551.55712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1230340003967285, "rewards/margins": 50.379032135009766, "rewards/rejected": -52.50206756591797, "step": 19640 }, { "epoch": 85.80786026200873, "grad_norm": 3.113369662240758e-08, "learning_rate": 3.005524022471798e-07, "logits/chosen": -1.6147282123565674, "logits/rejected": -2.387632131576538, "logps/chosen": -467.6737365722656, "logps/rejected": -5345.998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.129769802093506, "rewards/margins": 48.49060821533203, "rewards/rejected": -50.62037658691406, "step": 19650 }, { "epoch": 85.85152838427948, "grad_norm": 1.5853419939460548e-08, "learning_rate": 2.987433870438641e-07, "logits/chosen": -1.5320026874542236, "logits/rejected": -2.199389696121216, "logps/chosen": -535.3780517578125, "logps/rejected": -4817.57421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2085280418395996, "rewards/margins": 43.24765396118164, "rewards/rejected": -45.456180572509766, "step": 19660 }, { "epoch": 85.89519650655022, "grad_norm": 7.363877546521116e-08, "learning_rate": 2.9693948647207624e-07, "logits/chosen": -1.5143260955810547, "logits/rejected": -2.148230791091919, "logps/chosen": -523.8875732421875, "logps/rejected": -4505.84130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.25679349899292, "rewards/margins": 40.349647521972656, "rewards/rejected": -42.60643768310547, "step": 19670 }, { "epoch": 85.93886462882097, "grad_norm": 1.4376706435145246e-07, "learning_rate": 2.951407047231897e-07, "logits/chosen": -1.5504359006881714, "logits/rejected": -2.2325692176818848, "logps/chosen": -514.2340087890625, "logps/rejected": -4819.7900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1682770252227783, "rewards/margins": 43.41352081298828, "rewards/rejected": -45.58179473876953, "step": 19680 }, { "epoch": 85.9825327510917, "grad_norm": 1.4332517155822943e-06, "learning_rate": 2.9334704597668213e-07, "logits/chosen": -1.535875916481018, "logits/rejected": -2.252805233001709, "logps/chosen": -494.696533203125, "logps/rejected": -4782.42529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.16654372215271, "rewards/margins": 43.115623474121094, "rewards/rejected": -45.28217315673828, "step": 19690 }, { "epoch": 86.02620087336244, "grad_norm": 3.645292705010985e-08, "learning_rate": 2.915585144001304e-07, "logits/chosen": -1.6356195211410522, "logits/rejected": -2.422306537628174, "logps/chosen": -461.64910888671875, "logps/rejected": -5842.57861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1238579750061035, "rewards/margins": 53.235435485839844, "rewards/rejected": -55.35930633544922, "step": 19700 }, { "epoch": 86.06986899563319, "grad_norm": 1.066089982747035e-08, "learning_rate": 2.897751141491967e-07, "logits/chosen": -1.5768171548843384, "logits/rejected": -2.4604029655456543, "logps/chosen": -466.32696533203125, "logps/rejected": -5308.2978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1246399879455566, "rewards/margins": 48.24998092651367, "rewards/rejected": -50.3746223449707, "step": 19710 }, { "epoch": 86.11353711790393, "grad_norm": 1.6928045278597374e-05, "learning_rate": 2.8799684936761996e-07, "logits/chosen": -1.5341461896896362, "logits/rejected": -2.2333078384399414, "logps/chosen": -498.8076171875, "logps/rejected": -5003.75927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1392593383789062, "rewards/margins": 45.17450714111328, "rewards/rejected": -47.31376266479492, "step": 19720 }, { "epoch": 86.15720524017468, "grad_norm": 3.61638443509234e-06, "learning_rate": 2.862237241872101e-07, "logits/chosen": -1.5186423063278198, "logits/rejected": -2.2616405487060547, "logps/chosen": -508.37408447265625, "logps/rejected": -5118.8798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2249081134796143, "rewards/margins": 46.180843353271484, "rewards/rejected": -48.4057502746582, "step": 19730 }, { "epoch": 86.20087336244542, "grad_norm": 1.4212539534887091e-07, "learning_rate": 2.8445574272783186e-07, "logits/chosen": -1.5722206830978394, "logits/rejected": -2.365119457244873, "logps/chosen": -509.32769775390625, "logps/rejected": -5087.46875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4149298667907715, "rewards/margins": 45.82640838623047, "rewards/rejected": -48.24134063720703, "step": 19740 }, { "epoch": 86.24454148471615, "grad_norm": 1.5189883956704742e-05, "learning_rate": 2.82692909097399e-07, "logits/chosen": -1.541709065437317, "logits/rejected": -2.1277475357055664, "logps/chosen": -553.6173706054688, "logps/rejected": -4437.58056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2687079906463623, "rewards/margins": 39.58857727050781, "rewards/rejected": -41.85728454589844, "step": 19750 }, { "epoch": 86.2882096069869, "grad_norm": 3.975434138611257e-06, "learning_rate": 2.809352273918661e-07, "logits/chosen": -1.621159553527832, "logits/rejected": -2.285393714904785, "logps/chosen": -487.9529724121094, "logps/rejected": -5192.7568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1987197399139404, "rewards/margins": 46.85503005981445, "rewards/rejected": -49.053749084472656, "step": 19760 }, { "epoch": 86.33187772925764, "grad_norm": 3.0716116865056964e-07, "learning_rate": 2.791827016952153e-07, "logits/chosen": -1.588104009628296, "logits/rejected": -2.3206734657287598, "logps/chosen": -481.8457946777344, "logps/rejected": -5413.12646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0956833362579346, "rewards/margins": 49.1613883972168, "rewards/rejected": -51.2570686340332, "step": 19770 }, { "epoch": 86.37554585152839, "grad_norm": 6.764133161029847e-08, "learning_rate": 2.7743533607944936e-07, "logits/chosen": -1.575331687927246, "logits/rejected": -2.3440957069396973, "logps/chosen": -509.0668029785156, "logps/rejected": -5387.63916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2669217586517334, "rewards/margins": 48.69877624511719, "rewards/rejected": -50.96570587158203, "step": 19780 }, { "epoch": 86.41921397379913, "grad_norm": 1.059713320796309e-06, "learning_rate": 2.756931346045824e-07, "logits/chosen": -1.5027400255203247, "logits/rejected": -2.1315133571624756, "logps/chosen": -514.4561157226562, "logps/rejected": -4364.15087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.234323263168335, "rewards/margins": 38.99996566772461, "rewards/rejected": -41.234291076660156, "step": 19790 }, { "epoch": 86.46288209606988, "grad_norm": 2.4066633179230936e-08, "learning_rate": 2.7395610131862816e-07, "logits/chosen": -1.5776771306991577, "logits/rejected": -2.2286438941955566, "logps/chosen": -479.99267578125, "logps/rejected": -5219.5283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1452252864837646, "rewards/margins": 47.248931884765625, "rewards/rejected": -49.394161224365234, "step": 19800 }, { "epoch": 86.5065502183406, "grad_norm": 3.022253446959296e-06, "learning_rate": 2.722242402575928e-07, "logits/chosen": -1.5768821239471436, "logits/rejected": -2.323822259902954, "logps/chosen": -486.86138916015625, "logps/rejected": -5420.23046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0424342155456543, "rewards/margins": 49.27257537841797, "rewards/rejected": -51.31501007080078, "step": 19810 }, { "epoch": 86.55021834061135, "grad_norm": 8.721079648967087e-08, "learning_rate": 2.7049755544546476e-07, "logits/chosen": -1.5514212846755981, "logits/rejected": -2.317405939102173, "logps/chosen": -472.773193359375, "logps/rejected": -5069.2216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.080364227294922, "rewards/margins": 45.92066955566406, "rewards/rejected": -48.00102996826172, "step": 19820 }, { "epoch": 86.5938864628821, "grad_norm": 7.040741993776097e-07, "learning_rate": 2.687760508942064e-07, "logits/chosen": -1.6043792963027954, "logits/rejected": -2.42596435546875, "logps/chosen": -493.00140380859375, "logps/rejected": -5435.99853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.210507869720459, "rewards/margins": 49.31052780151367, "rewards/rejected": -51.521034240722656, "step": 19830 }, { "epoch": 86.63755458515284, "grad_norm": 2.7204439359988597e-08, "learning_rate": 2.670597306037412e-07, "logits/chosen": -1.5302464962005615, "logits/rejected": -2.348991870880127, "logps/chosen": -479.04876708984375, "logps/rejected": -5168.2431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1182665824890137, "rewards/margins": 46.80312728881836, "rewards/rejected": -48.92139434814453, "step": 19840 }, { "epoch": 86.68122270742359, "grad_norm": 1.5008094546753248e-06, "learning_rate": 2.6534859856195e-07, "logits/chosen": -1.5468051433563232, "logits/rejected": -2.29113507270813, "logps/chosen": -511.2704162597656, "logps/rejected": -5299.2587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3012924194335938, "rewards/margins": 47.8231086730957, "rewards/rejected": -50.12440490722656, "step": 19850 }, { "epoch": 86.72489082969432, "grad_norm": 1.3426930691160597e-07, "learning_rate": 2.636426587446561e-07, "logits/chosen": -1.5740551948547363, "logits/rejected": -2.3124094009399414, "logps/chosen": -485.1036682128906, "logps/rejected": -5390.0302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.10416841506958, "rewards/margins": 48.94598388671875, "rewards/rejected": -51.050148010253906, "step": 19860 }, { "epoch": 86.76855895196506, "grad_norm": 1.1175875385363135e-06, "learning_rate": 2.619419151156205e-07, "logits/chosen": -1.5315743684768677, "logits/rejected": -2.2081046104431152, "logps/chosen": -482.61114501953125, "logps/rejected": -5190.67138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0910680294036865, "rewards/margins": 47.03160095214844, "rewards/rejected": -49.12266540527344, "step": 19870 }, { "epoch": 86.8122270742358, "grad_norm": 2.6305664674685524e-07, "learning_rate": 2.602463716265302e-07, "logits/chosen": -1.561855435371399, "logits/rejected": -2.2502601146698, "logps/chosen": -494.0650329589844, "logps/rejected": -4938.7783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1366710662841797, "rewards/margins": 44.60629653930664, "rewards/rejected": -46.74296951293945, "step": 19880 }, { "epoch": 86.85589519650655, "grad_norm": 4.9458683643289404e-08, "learning_rate": 2.585560322169894e-07, "logits/chosen": -1.5491034984588623, "logits/rejected": -2.167715549468994, "logps/chosen": -523.4365844726562, "logps/rejected": -4905.25439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4806132316589355, "rewards/margins": 43.753868103027344, "rewards/rejected": -46.2344856262207, "step": 19890 }, { "epoch": 86.8995633187773, "grad_norm": 4.967429523020085e-06, "learning_rate": 2.568709008145104e-07, "logits/chosen": -1.53353750705719, "logits/rejected": -2.2035458087921143, "logps/chosen": -538.1681518554688, "logps/rejected": -4892.88330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.160893678665161, "rewards/margins": 44.128822326660156, "rewards/rejected": -46.28971481323242, "step": 19900 }, { "epoch": 86.94323144104804, "grad_norm": 2.670738114158982e-07, "learning_rate": 2.551909813345049e-07, "logits/chosen": -1.6036913394927979, "logits/rejected": -2.362546920776367, "logps/chosen": -497.70819091796875, "logps/rejected": -5399.890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.155869722366333, "rewards/margins": 48.88512420654297, "rewards/rejected": -51.040992736816406, "step": 19910 }, { "epoch": 86.98689956331877, "grad_norm": 5.81602724584502e-08, "learning_rate": 2.5351627768027604e-07, "logits/chosen": -1.5831420421600342, "logits/rejected": -2.3367247581481934, "logps/chosen": -454.83428955078125, "logps/rejected": -5469.02685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.061924457550049, "rewards/margins": 49.731727600097656, "rewards/rejected": -51.79364776611328, "step": 19920 }, { "epoch": 87.03056768558952, "grad_norm": 9.267164719650196e-08, "learning_rate": 2.5184679374300553e-07, "logits/chosen": -1.576861023902893, "logits/rejected": -2.2972700595855713, "logps/chosen": -480.48760986328125, "logps/rejected": -5127.41259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1381256580352783, "rewards/margins": 46.39738082885742, "rewards/rejected": -48.53550720214844, "step": 19930 }, { "epoch": 87.07423580786026, "grad_norm": 1.0067567858585228e-06, "learning_rate": 2.501825334017488e-07, "logits/chosen": -1.5618183612823486, "logits/rejected": -2.216675281524658, "logps/chosen": -511.3865661621094, "logps/rejected": -5168.5107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.139359712600708, "rewards/margins": 46.648719787597656, "rewards/rejected": -48.7880744934082, "step": 19940 }, { "epoch": 87.117903930131, "grad_norm": 7.606673282247968e-07, "learning_rate": 2.485235005234238e-07, "logits/chosen": -1.5286606550216675, "logits/rejected": -2.356675624847412, "logps/chosen": -504.47283935546875, "logps/rejected": -5465.4189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.154846668243408, "rewards/margins": 49.66178512573242, "rewards/rejected": -51.816627502441406, "step": 19950 }, { "epoch": 87.16157205240175, "grad_norm": 1.9363539414607554e-07, "learning_rate": 2.468696989628008e-07, "logits/chosen": -1.5495903491973877, "logits/rejected": -2.295459032058716, "logps/chosen": -520.575439453125, "logps/rejected": -4886.435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2318899631500244, "rewards/margins": 43.93592071533203, "rewards/rejected": -46.16781234741211, "step": 19960 }, { "epoch": 87.20524017467248, "grad_norm": 3.7619352614401635e-08, "learning_rate": 2.4522113256249896e-07, "logits/chosen": -1.5696837902069092, "logits/rejected": -2.3542144298553467, "logps/chosen": -504.97808837890625, "logps/rejected": -5255.88232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2483019828796387, "rewards/margins": 47.513607025146484, "rewards/rejected": -49.76190948486328, "step": 19970 }, { "epoch": 87.24890829694323, "grad_norm": 1.2201161047172009e-08, "learning_rate": 2.4357780515296996e-07, "logits/chosen": -1.549822211265564, "logits/rejected": -2.308803081512451, "logps/chosen": -490.06451416015625, "logps/rejected": -4930.7158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1089391708374023, "rewards/margins": 44.51953887939453, "rewards/rejected": -46.62847137451172, "step": 19980 }, { "epoch": 87.29257641921397, "grad_norm": 7.38423461072049e-08, "learning_rate": 2.4193972055249344e-07, "logits/chosen": -1.5960581302642822, "logits/rejected": -2.262871503829956, "logps/chosen": -506.9082946777344, "logps/rejected": -5111.11865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.054957628250122, "rewards/margins": 46.25556945800781, "rewards/rejected": -48.310523986816406, "step": 19990 }, { "epoch": 87.33624454148472, "grad_norm": 6.29420914192307e-08, "learning_rate": 2.4030688256716784e-07, "logits/chosen": -1.5412662029266357, "logits/rejected": -2.2195651531219482, "logps/chosen": -528.048583984375, "logps/rejected": -5096.08642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.298283100128174, "rewards/margins": 45.932212829589844, "rewards/rejected": -48.230491638183594, "step": 20000 }, { "epoch": 87.37991266375546, "grad_norm": 1.0291220201295825e-07, "learning_rate": 2.3867929499090197e-07, "logits/chosen": -1.5338428020477295, "logits/rejected": -2.2154061794281006, "logps/chosen": -513.4488525390625, "logps/rejected": -5028.9951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1157889366149902, "rewards/margins": 45.373538970947266, "rewards/rejected": -47.48933029174805, "step": 20010 }, { "epoch": 87.4235807860262, "grad_norm": 1.2028081556887093e-07, "learning_rate": 2.3705696160540303e-07, "logits/chosen": -1.546244502067566, "logits/rejected": -2.2572181224823, "logps/chosen": -477.53741455078125, "logps/rejected": -5051.09326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1997194290161133, "rewards/margins": 45.64438247680664, "rewards/rejected": -47.8441047668457, "step": 20020 }, { "epoch": 87.46724890829694, "grad_norm": 8.795257279612667e-08, "learning_rate": 2.3543988618017238e-07, "logits/chosen": -1.5292659997940063, "logits/rejected": -2.1576991081237793, "logps/chosen": -522.4639892578125, "logps/rejected": -4695.78955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.124091148376465, "rewards/margins": 42.28856658935547, "rewards/rejected": -44.412654876708984, "step": 20030 }, { "epoch": 87.51091703056768, "grad_norm": 4.0438586158091496e-08, "learning_rate": 2.3382807247249284e-07, "logits/chosen": -1.618896484375, "logits/rejected": -2.4187943935394287, "logps/chosen": -452.494384765625, "logps/rejected": -5833.3876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.124875068664551, "rewards/margins": 53.20500564575195, "rewards/rejected": -55.32988739013672, "step": 20040 }, { "epoch": 87.55458515283843, "grad_norm": 4.191300479605362e-08, "learning_rate": 2.3222152422742146e-07, "logits/chosen": -1.6193863153457642, "logits/rejected": -2.3983407020568848, "logps/chosen": -472.2981872558594, "logps/rejected": -5591.86669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.08034610748291, "rewards/margins": 50.884986877441406, "rewards/rejected": -52.96533966064453, "step": 20050 }, { "epoch": 87.59825327510917, "grad_norm": 7.551697957090816e-08, "learning_rate": 2.3062024517778337e-07, "logits/chosen": -1.56856107711792, "logits/rejected": -2.308684825897217, "logps/chosen": -480.7168884277344, "logps/rejected": -5065.5400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.176499843597412, "rewards/margins": 45.70417022705078, "rewards/rejected": -47.880672454833984, "step": 20060 }, { "epoch": 87.64192139737992, "grad_norm": 2.2316039667657265e-08, "learning_rate": 2.2902423904415828e-07, "logits/chosen": -1.570724606513977, "logits/rejected": -2.2068681716918945, "logps/chosen": -551.1436767578125, "logps/rejected": -5074.4775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.255876064300537, "rewards/margins": 45.80157470703125, "rewards/rejected": -48.05745315551758, "step": 20070 }, { "epoch": 87.68558951965065, "grad_norm": 1.259760015806965e-07, "learning_rate": 2.2743350953487424e-07, "logits/chosen": -1.51918625831604, "logits/rejected": -2.2072465419769287, "logps/chosen": -504.935546875, "logps/rejected": -4861.46337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1274266242980957, "rewards/margins": 43.827022552490234, "rewards/rejected": -45.95444869995117, "step": 20080 }, { "epoch": 87.7292576419214, "grad_norm": 3.8142992804125623e-08, "learning_rate": 2.2584806034600116e-07, "logits/chosen": -1.5586763620376587, "logits/rejected": -2.223032236099243, "logps/chosen": -491.58270263671875, "logps/rejected": -5158.8955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2024600505828857, "rewards/margins": 46.525291442871094, "rewards/rejected": -48.727752685546875, "step": 20090 }, { "epoch": 87.77292576419214, "grad_norm": 3.599404484294862e-07, "learning_rate": 2.2426789516133725e-07, "logits/chosen": -1.575391411781311, "logits/rejected": -2.328822374343872, "logps/chosen": -491.063232421875, "logps/rejected": -5149.32568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1414332389831543, "rewards/margins": 46.64269256591797, "rewards/rejected": -48.78412628173828, "step": 20100 }, { "epoch": 87.81659388646288, "grad_norm": 5.5204294706700066e-08, "learning_rate": 2.2269301765240558e-07, "logits/chosen": -1.5953717231750488, "logits/rejected": -2.3214962482452393, "logps/chosen": -490.5855407714844, "logps/rejected": -5481.27880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.156073570251465, "rewards/margins": 49.75634002685547, "rewards/rejected": -51.91242218017578, "step": 20110 }, { "epoch": 87.86026200873363, "grad_norm": 1.4935121577466874e-07, "learning_rate": 2.2112343147844274e-07, "logits/chosen": -1.554450273513794, "logits/rejected": -2.3522963523864746, "logps/chosen": -478.61041259765625, "logps/rejected": -5212.12255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.085361957550049, "rewards/margins": 47.240352630615234, "rewards/rejected": -49.32571029663086, "step": 20120 }, { "epoch": 87.90393013100437, "grad_norm": 1.4510825596494768e-08, "learning_rate": 2.1955914028639004e-07, "logits/chosen": -1.5207313299179077, "logits/rejected": -2.192288398742676, "logps/chosen": -534.2432250976562, "logps/rejected": -4850.71728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.209442615509033, "rewards/margins": 43.615882873535156, "rewards/rejected": -45.82532501220703, "step": 20130 }, { "epoch": 87.9475982532751, "grad_norm": 5.315699351538027e-07, "learning_rate": 2.1800014771088674e-07, "logits/chosen": -1.5624139308929443, "logits/rejected": -2.2671871185302734, "logps/chosen": -508.35113525390625, "logps/rejected": -5006.93896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1320595741271973, "rewards/margins": 45.28299331665039, "rewards/rejected": -47.4150505065918, "step": 20140 }, { "epoch": 87.99126637554585, "grad_norm": 2.9153627542640615e-06, "learning_rate": 2.1644645737426028e-07, "logits/chosen": -1.6003834009170532, "logits/rejected": -2.3253746032714844, "logps/chosen": -505.3858947753906, "logps/rejected": -5506.880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1876626014709473, "rewards/margins": 49.959983825683594, "rewards/rejected": -52.14764404296875, "step": 20150 }, { "epoch": 88.0349344978166, "grad_norm": 4.872261122736497e-08, "learning_rate": 2.148980728865188e-07, "logits/chosen": -1.5689184665679932, "logits/rejected": -2.314410924911499, "logps/chosen": -494.57110595703125, "logps/rejected": -5221.5068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.164499044418335, "rewards/margins": 47.321449279785156, "rewards/rejected": -49.48594665527344, "step": 20160 }, { "epoch": 88.07860262008734, "grad_norm": 9.102796232094237e-08, "learning_rate": 2.133549978453414e-07, "logits/chosen": -1.590908408164978, "logits/rejected": -2.360880136489868, "logps/chosen": -507.94482421875, "logps/rejected": -5484.19580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1597368717193604, "rewards/margins": 49.75914764404297, "rewards/rejected": -51.91887664794922, "step": 20170 }, { "epoch": 88.12227074235808, "grad_norm": 2.0338361267058526e-08, "learning_rate": 2.118172358360718e-07, "logits/chosen": -1.589264154434204, "logits/rejected": -2.333348274230957, "logps/chosen": -502.1153259277344, "logps/rejected": -5261.26708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2034926414489746, "rewards/margins": 47.64642333984375, "rewards/rejected": -49.849910736083984, "step": 20180 }, { "epoch": 88.16593886462883, "grad_norm": 3.698498577744931e-08, "learning_rate": 2.1028479043170764e-07, "logits/chosen": -1.59165358543396, "logits/rejected": -2.408752918243408, "logps/chosen": -485.9725646972656, "logps/rejected": -5638.86083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1232595443725586, "rewards/margins": 51.22536087036133, "rewards/rejected": -53.3486213684082, "step": 20190 }, { "epoch": 88.20960698689956, "grad_norm": 3.110132688774572e-08, "learning_rate": 2.0875766519289436e-07, "logits/chosen": -1.5485429763793945, "logits/rejected": -2.256307601928711, "logps/chosen": -505.70257568359375, "logps/rejected": -5012.3408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.205604076385498, "rewards/margins": 45.24106216430664, "rewards/rejected": -47.44666290283203, "step": 20200 }, { "epoch": 88.2532751091703, "grad_norm": 8.747932831076561e-08, "learning_rate": 2.0723586366791615e-07, "logits/chosen": -1.5720551013946533, "logits/rejected": -2.3056375980377197, "logps/chosen": -498.73846435546875, "logps/rejected": -5273.5537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1820647716522217, "rewards/margins": 47.69045639038086, "rewards/rejected": -49.872520446777344, "step": 20210 }, { "epoch": 88.29694323144105, "grad_norm": 2.5052896276584666e-06, "learning_rate": 2.0571938939268593e-07, "logits/chosen": -1.591700792312622, "logits/rejected": -2.284336566925049, "logps/chosen": -455.2349548339844, "logps/rejected": -5668.79931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.130709409713745, "rewards/margins": 51.50035858154297, "rewards/rejected": -53.631065368652344, "step": 20220 }, { "epoch": 88.3406113537118, "grad_norm": 2.4777364140391217e-06, "learning_rate": 2.0420824589074073e-07, "logits/chosen": -1.5880963802337646, "logits/rejected": -2.3965892791748047, "logps/chosen": -485.65234375, "logps/rejected": -5392.328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.204761028289795, "rewards/margins": 48.81044006347656, "rewards/rejected": -51.01519775390625, "step": 20230 }, { "epoch": 88.38427947598254, "grad_norm": 9.242996964517438e-07, "learning_rate": 2.027024366732297e-07, "logits/chosen": -1.5620625019073486, "logits/rejected": -2.3303780555725098, "logps/chosen": -526.5432739257812, "logps/rejected": -5382.2841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4511678218841553, "rewards/margins": 48.485008239746094, "rewards/rejected": -50.936180114746094, "step": 20240 }, { "epoch": 88.42794759825327, "grad_norm": 3.7470741909841835e-07, "learning_rate": 2.0120196523890968e-07, "logits/chosen": -1.5805120468139648, "logits/rejected": -2.3446598052978516, "logps/chosen": -504.28790283203125, "logps/rejected": -5446.830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.233950138092041, "rewards/margins": 49.395450592041016, "rewards/rejected": -51.62939453125, "step": 20250 }, { "epoch": 88.47161572052401, "grad_norm": 8.486804554941676e-07, "learning_rate": 1.9970683507413297e-07, "logits/chosen": -1.5204975605010986, "logits/rejected": -2.2747650146484375, "logps/chosen": -522.8360595703125, "logps/rejected": -5037.2578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2908101081848145, "rewards/margins": 45.40169143676758, "rewards/rejected": -47.69249725341797, "step": 20260 }, { "epoch": 88.51528384279476, "grad_norm": 2.9956372184781313e-07, "learning_rate": 1.9821704965284376e-07, "logits/chosen": -1.540094017982483, "logits/rejected": -2.2212791442871094, "logps/chosen": -519.7288208007812, "logps/rejected": -5013.80224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1517131328582764, "rewards/margins": 45.26677703857422, "rewards/rejected": -47.418487548828125, "step": 20270 }, { "epoch": 88.5589519650655, "grad_norm": 7.793564970414406e-08, "learning_rate": 1.967326124365651e-07, "logits/chosen": -1.5142768621444702, "logits/rejected": -2.298435926437378, "logps/chosen": -510.47802734375, "logps/rejected": -5000.4609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.177060604095459, "rewards/margins": 45.084468841552734, "rewards/rejected": -47.261531829833984, "step": 20280 }, { "epoch": 88.60262008733625, "grad_norm": 8.593347709049132e-09, "learning_rate": 1.9525352687439548e-07, "logits/chosen": -1.5093830823898315, "logits/rejected": -2.213759660720825, "logps/chosen": -528.6026611328125, "logps/rejected": -4802.61767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.258549213409424, "rewards/margins": 43.14468765258789, "rewards/rejected": -45.403236389160156, "step": 20290 }, { "epoch": 88.646288209607, "grad_norm": 5.4408289679356266e-08, "learning_rate": 1.9377979640299832e-07, "logits/chosen": -1.5373201370239258, "logits/rejected": -2.335198402404785, "logps/chosen": -481.39306640625, "logps/rejected": -5208.01953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.080726146697998, "rewards/margins": 47.23645782470703, "rewards/rejected": -49.31718826293945, "step": 20300 }, { "epoch": 88.68995633187772, "grad_norm": 7.101458080333435e-08, "learning_rate": 1.92311424446594e-07, "logits/chosen": -1.5717371702194214, "logits/rejected": -2.1932406425476074, "logps/chosen": -534.1177368164062, "logps/rejected": -4617.95361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1907622814178467, "rewards/margins": 41.39619064331055, "rewards/rejected": -43.586952209472656, "step": 20310 }, { "epoch": 88.73362445414847, "grad_norm": 7.40683766825847e-08, "learning_rate": 1.9084841441695285e-07, "logits/chosen": -1.50114107131958, "logits/rejected": -2.2229666709899902, "logps/chosen": -540.7572021484375, "logps/rejected": -4775.5986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.219709634780884, "rewards/margins": 42.82616424560547, "rewards/rejected": -45.045875549316406, "step": 20320 }, { "epoch": 88.77729257641921, "grad_norm": 1.2179530031164974e-06, "learning_rate": 1.8939076971338593e-07, "logits/chosen": -1.6027815341949463, "logits/rejected": -2.3463995456695557, "logps/chosen": -496.9857482910156, "logps/rejected": -5140.02734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2080416679382324, "rewards/margins": 46.34941864013672, "rewards/rejected": -48.557456970214844, "step": 20330 }, { "epoch": 88.82096069868996, "grad_norm": 3.544445518287332e-08, "learning_rate": 1.8793849372273937e-07, "logits/chosen": -1.5759422779083252, "logits/rejected": -2.3006863594055176, "logps/chosen": -500.14849853515625, "logps/rejected": -5391.3876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.113483190536499, "rewards/margins": 48.869346618652344, "rewards/rejected": -50.982826232910156, "step": 20340 }, { "epoch": 88.8646288209607, "grad_norm": 1.3298895070820094e-06, "learning_rate": 1.8649158981938337e-07, "logits/chosen": -1.5867822170257568, "logits/rejected": -2.3180091381073, "logps/chosen": -483.6250915527344, "logps/rejected": -5402.92431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0952160358428955, "rewards/margins": 49.0889778137207, "rewards/rejected": -51.184200286865234, "step": 20350 }, { "epoch": 88.90829694323143, "grad_norm": 9.511280063681675e-06, "learning_rate": 1.8505006136520788e-07, "logits/chosen": -1.602698564529419, "logits/rejected": -2.4133942127227783, "logps/chosen": -475.3961486816406, "logps/rejected": -5929.00146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0878233909606934, "rewards/margins": 54.0263671875, "rewards/rejected": -56.11419677734375, "step": 20360 }, { "epoch": 88.95196506550218, "grad_norm": 7.478747273613132e-07, "learning_rate": 1.8361391170961057e-07, "logits/chosen": -1.560412883758545, "logits/rejected": -2.2727391719818115, "logps/chosen": -483.004150390625, "logps/rejected": -5288.9326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1652417182922363, "rewards/margins": 47.895408630371094, "rewards/rejected": -50.06064987182617, "step": 20370 }, { "epoch": 88.99563318777292, "grad_norm": 5.614113772894386e-08, "learning_rate": 1.821831441894939e-07, "logits/chosen": -1.5592987537384033, "logits/rejected": -2.3218846321105957, "logps/chosen": -485.5718688964844, "logps/rejected": -5440.1005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1182847023010254, "rewards/margins": 49.328399658203125, "rewards/rejected": -51.446678161621094, "step": 20380 }, { "epoch": 89.03930131004367, "grad_norm": 3.2814885023264395e-07, "learning_rate": 1.8075776212925418e-07, "logits/chosen": -1.5975546836853027, "logits/rejected": -2.4573721885681152, "logps/chosen": -463.64019775390625, "logps/rejected": -5763.8271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.14636492729187, "rewards/margins": 52.57942581176758, "rewards/rejected": -54.725791931152344, "step": 20390 }, { "epoch": 89.08296943231441, "grad_norm": 7.015029527474514e-08, "learning_rate": 1.7933776884077297e-07, "logits/chosen": -1.5674240589141846, "logits/rejected": -2.3296737670898438, "logps/chosen": -488.156982421875, "logps/rejected": -5300.2666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1534581184387207, "rewards/margins": 48.00684356689453, "rewards/rejected": -50.160301208496094, "step": 20400 }, { "epoch": 89.12663755458516, "grad_norm": 1.9327560934207346e-06, "learning_rate": 1.779231676234136e-07, "logits/chosen": -1.6083955764770508, "logits/rejected": -2.352151870727539, "logps/chosen": -459.3759765625, "logps/rejected": -5158.224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.099944591522217, "rewards/margins": 46.661659240722656, "rewards/rejected": -48.7616081237793, "step": 20410 }, { "epoch": 89.17030567685589, "grad_norm": 2.143700167218838e-08, "learning_rate": 1.7651396176400909e-07, "logits/chosen": -1.518507957458496, "logits/rejected": -2.272850513458252, "logps/chosen": -524.5264892578125, "logps/rejected": -5309.642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.202812671661377, "rewards/margins": 48.08041000366211, "rewards/rejected": -50.283226013183594, "step": 20420 }, { "epoch": 89.21397379912663, "grad_norm": 7.77726553173789e-08, "learning_rate": 1.7511015453685582e-07, "logits/chosen": -1.572983980178833, "logits/rejected": -2.2350850105285645, "logps/chosen": -506.692626953125, "logps/rejected": -5199.6552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2751917839050293, "rewards/margins": 46.798057556152344, "rewards/rejected": -49.07324981689453, "step": 20430 }, { "epoch": 89.25764192139738, "grad_norm": 3.75680471305106e-08, "learning_rate": 1.737117492037091e-07, "logits/chosen": -1.5570557117462158, "logits/rejected": -2.237325668334961, "logps/chosen": -479.0467224121094, "logps/rejected": -4908.1953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.159846305847168, "rewards/margins": 44.286903381347656, "rewards/rejected": -46.44675064086914, "step": 20440 }, { "epoch": 89.30131004366812, "grad_norm": 7.414558668387474e-08, "learning_rate": 1.7231874901377039e-07, "logits/chosen": -1.5714446306228638, "logits/rejected": -2.2955589294433594, "logps/chosen": -519.8417358398438, "logps/rejected": -5257.083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.156888484954834, "rewards/margins": 47.45368194580078, "rewards/rejected": -49.61057662963867, "step": 20450 }, { "epoch": 89.34497816593887, "grad_norm": 4.209478544968631e-06, "learning_rate": 1.7093115720368286e-07, "logits/chosen": -1.5942747592926025, "logits/rejected": -2.322469472885132, "logps/chosen": -505.9986877441406, "logps/rejected": -5328.7294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1504974365234375, "rewards/margins": 48.194061279296875, "rewards/rejected": -50.34456253051758, "step": 20460 }, { "epoch": 89.38864628820961, "grad_norm": 9.638825742551425e-09, "learning_rate": 1.6954897699752394e-07, "logits/chosen": -1.4961802959442139, "logits/rejected": -2.180471181869507, "logps/chosen": -516.7916870117188, "logps/rejected": -4735.65478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1263768672943115, "rewards/margins": 42.684940338134766, "rewards/rejected": -44.811309814453125, "step": 20470 }, { "epoch": 89.43231441048034, "grad_norm": 2.470862674793469e-06, "learning_rate": 1.6817221160679604e-07, "logits/chosen": -1.583526611328125, "logits/rejected": -2.301706314086914, "logps/chosen": -484.1678161621094, "logps/rejected": -5100.56201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.059201955795288, "rewards/margins": 46.241676330566406, "rewards/rejected": -48.300880432128906, "step": 20480 }, { "epoch": 89.47598253275109, "grad_norm": 3.600864786240378e-08, "learning_rate": 1.6680086423042168e-07, "logits/chosen": -1.5877211093902588, "logits/rejected": -2.3750569820404053, "logps/chosen": -475.25091552734375, "logps/rejected": -5248.21533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.088592052459717, "rewards/margins": 47.52494430541992, "rewards/rejected": -49.6135368347168, "step": 20490 }, { "epoch": 89.51965065502183, "grad_norm": 1.4272330936679963e-07, "learning_rate": 1.6543493805473404e-07, "logits/chosen": -1.5441358089447021, "logits/rejected": -2.2811174392700195, "logps/chosen": -498.2664489746094, "logps/rejected": -5034.70703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1631391048431396, "rewards/margins": 45.50168228149414, "rewards/rejected": -47.664817810058594, "step": 20500 }, { "epoch": 89.56331877729258, "grad_norm": 2.044710930797441e-06, "learning_rate": 1.640744362534691e-07, "logits/chosen": -1.550804853439331, "logits/rejected": -2.2608861923217773, "logps/chosen": -474.9814453125, "logps/rejected": -5142.12451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0971834659576416, "rewards/margins": 46.50384521484375, "rewards/rejected": -48.60103225708008, "step": 20510 }, { "epoch": 89.60698689956332, "grad_norm": 4.838589286749261e-08, "learning_rate": 1.6271936198775985e-07, "logits/chosen": -1.5930439233779907, "logits/rejected": -2.403390407562256, "logps/chosen": -487.228515625, "logps/rejected": -5590.4775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1829674243927, "rewards/margins": 50.707374572753906, "rewards/rejected": -52.890342712402344, "step": 20520 }, { "epoch": 89.65065502183405, "grad_norm": 3.4937443170369924e-07, "learning_rate": 1.6136971840612996e-07, "logits/chosen": -1.6398718357086182, "logits/rejected": -2.5343668460845947, "logps/chosen": -465.5310974121094, "logps/rejected": -5773.5947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.133321762084961, "rewards/margins": 52.558555603027344, "rewards/rejected": -54.69187545776367, "step": 20530 }, { "epoch": 89.6943231441048, "grad_norm": 1.830613576159714e-06, "learning_rate": 1.600255086444827e-07, "logits/chosen": -1.583202600479126, "logits/rejected": -2.4266579151153564, "logps/chosen": -489.29833984375, "logps/rejected": -5421.978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1516315937042236, "rewards/margins": 49.15673065185547, "rewards/rejected": -51.3083610534668, "step": 20540 }, { "epoch": 89.73799126637554, "grad_norm": 1.9976600121279063e-08, "learning_rate": 1.586867358260963e-07, "logits/chosen": -1.5252184867858887, "logits/rejected": -2.248433828353882, "logps/chosen": -502.0533142089844, "logps/rejected": -4887.02783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.178032398223877, "rewards/margins": 44.18206024169922, "rewards/rejected": -46.36008834838867, "step": 20550 }, { "epoch": 89.78165938864629, "grad_norm": 1.3289414728437653e-08, "learning_rate": 1.5735340306161752e-07, "logits/chosen": -1.5913482904434204, "logits/rejected": -2.3716933727264404, "logps/chosen": -511.39581298828125, "logps/rejected": -5462.22998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2359180450439453, "rewards/margins": 49.52855682373047, "rewards/rejected": -51.76447677612305, "step": 20560 }, { "epoch": 89.82532751091703, "grad_norm": 6.495893516641707e-08, "learning_rate": 1.5602551344905097e-07, "logits/chosen": -1.601406455039978, "logits/rejected": -2.360764503479004, "logps/chosen": -491.6211853027344, "logps/rejected": -5554.0107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.332726240158081, "rewards/margins": 50.312461853027344, "rewards/rejected": -52.64518356323242, "step": 20570 }, { "epoch": 89.86899563318778, "grad_norm": 3.837061647845412e-07, "learning_rate": 1.5470307007375618e-07, "logits/chosen": -1.6136815547943115, "logits/rejected": -2.386681079864502, "logps/chosen": -479.53204345703125, "logps/rejected": -5799.10693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2035441398620605, "rewards/margins": 52.7314567565918, "rewards/rejected": -54.93500900268555, "step": 20580 }, { "epoch": 89.91266375545851, "grad_norm": 5.88301800435577e-08, "learning_rate": 1.533860760084374e-07, "logits/chosen": -1.646097183227539, "logits/rejected": -2.3646116256713867, "logps/chosen": -484.29248046875, "logps/rejected": -5764.0654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1200807094573975, "rewards/margins": 52.416839599609375, "rewards/rejected": -54.53691864013672, "step": 20590 }, { "epoch": 89.95633187772926, "grad_norm": 1.5331383113791253e-08, "learning_rate": 1.5207453431313717e-07, "logits/chosen": -1.5774564743041992, "logits/rejected": -2.3221170902252197, "logps/chosen": -540.7818603515625, "logps/rejected": -5141.9658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4461216926574707, "rewards/margins": 46.204872131347656, "rewards/rejected": -48.65099334716797, "step": 20600 }, { "epoch": 90.0, "grad_norm": 6.47472676269572e-08, "learning_rate": 1.507684480352292e-07, "logits/chosen": -1.5028767585754395, "logits/rejected": -2.2491278648376465, "logps/chosen": -510.49139404296875, "logps/rejected": -5017.1123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.194105625152588, "rewards/margins": 45.363162994384766, "rewards/rejected": -47.5572624206543, "step": 20610 }, { "epoch": 90.04366812227074, "grad_norm": 4.89339990628579e-07, "learning_rate": 1.494678202094124e-07, "logits/chosen": -1.4920275211334229, "logits/rejected": -2.2001423835754395, "logps/chosen": -529.4451293945312, "logps/rejected": -4789.59130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2518324851989746, "rewards/margins": 43.07288360595703, "rewards/rejected": -45.32471466064453, "step": 20620 }, { "epoch": 90.08733624454149, "grad_norm": 2.6103715623209737e-07, "learning_rate": 1.4817265385770275e-07, "logits/chosen": -1.5603458881378174, "logits/rejected": -2.286374568939209, "logps/chosen": -488.0533142089844, "logps/rejected": -5002.0009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1903183460235596, "rewards/margins": 45.182861328125, "rewards/rejected": -47.37318801879883, "step": 20630 }, { "epoch": 90.13100436681222, "grad_norm": 1.6010483064244705e-07, "learning_rate": 1.4688295198942576e-07, "logits/chosen": -1.5823813676834106, "logits/rejected": -2.3112094402313232, "logps/chosen": -491.254150390625, "logps/rejected": -5251.05517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.029723644256592, "rewards/margins": 47.703636169433594, "rewards/rejected": -49.733360290527344, "step": 20640 }, { "epoch": 90.17467248908297, "grad_norm": 7.873244877389318e-08, "learning_rate": 1.4559871760121108e-07, "logits/chosen": -1.5806214809417725, "logits/rejected": -2.2832930088043213, "logps/chosen": -489.77874755859375, "logps/rejected": -5324.73876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.163963794708252, "rewards/margins": 48.288909912109375, "rewards/rejected": -50.45287322998047, "step": 20650 }, { "epoch": 90.21834061135371, "grad_norm": 5.3432453032397535e-08, "learning_rate": 1.4431995367698349e-07, "logits/chosen": -1.6036021709442139, "logits/rejected": -2.3443734645843506, "logps/chosen": -489.05255126953125, "logps/rejected": -5430.56005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.194514751434326, "rewards/margins": 49.18391799926758, "rewards/rejected": -51.3784294128418, "step": 20660 }, { "epoch": 90.26200873362446, "grad_norm": 5.344152705070529e-08, "learning_rate": 1.430466631879582e-07, "logits/chosen": -1.550215482711792, "logits/rejected": -2.303628444671631, "logps/chosen": -468.3631286621094, "logps/rejected": -5133.79443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.057590961456299, "rewards/margins": 46.472984313964844, "rewards/rejected": -48.53057098388672, "step": 20670 }, { "epoch": 90.3056768558952, "grad_norm": 1.3048794513429772e-07, "learning_rate": 1.417788490926328e-07, "logits/chosen": -1.578238844871521, "logits/rejected": -2.298619508743286, "logps/chosen": -501.4214782714844, "logps/rejected": -5186.89404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1798295974731445, "rewards/margins": 46.899845123291016, "rewards/rejected": -49.07966995239258, "step": 20680 }, { "epoch": 90.34934497816595, "grad_norm": 7.230047119691181e-08, "learning_rate": 1.4051651433677982e-07, "logits/chosen": -1.5815839767456055, "logits/rejected": -2.3413023948669434, "logps/chosen": -494.50164794921875, "logps/rejected": -5359.6015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.109595775604248, "rewards/margins": 48.588401794433594, "rewards/rejected": -50.697998046875, "step": 20690 }, { "epoch": 90.39301310043668, "grad_norm": 7.427711361451795e-08, "learning_rate": 1.392596618534406e-07, "logits/chosen": -1.5857053995132446, "logits/rejected": -2.2571794986724854, "logps/chosen": -482.4603576660156, "logps/rejected": -5036.0439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1969456672668457, "rewards/margins": 45.433082580566406, "rewards/rejected": -47.630027770996094, "step": 20700 }, { "epoch": 90.43668122270742, "grad_norm": 7.861657608722631e-08, "learning_rate": 1.380082945629188e-07, "logits/chosen": -1.5457603931427002, "logits/rejected": -2.2832136154174805, "logps/chosen": -512.0953369140625, "logps/rejected": -5082.7568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3793437480926514, "rewards/margins": 45.71562576293945, "rewards/rejected": -48.094970703125, "step": 20710 }, { "epoch": 90.48034934497817, "grad_norm": 4.177080986408289e-08, "learning_rate": 1.3676241537277342e-07, "logits/chosen": -1.5751314163208008, "logits/rejected": -2.22908353805542, "logps/chosen": -503.72198486328125, "logps/rejected": -5106.537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3475182056427, "rewards/margins": 45.93096160888672, "rewards/rejected": -48.278480529785156, "step": 20720 }, { "epoch": 90.52401746724891, "grad_norm": 3.347050324753408e-05, "learning_rate": 1.3552202717781072e-07, "logits/chosen": -1.5071251392364502, "logits/rejected": -2.265125274658203, "logps/chosen": -490.09228515625, "logps/rejected": -5057.05712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1955208778381348, "rewards/margins": 45.69789505004883, "rewards/rejected": -47.89341354370117, "step": 20730 }, { "epoch": 90.56768558951966, "grad_norm": 1.6895970077359938e-08, "learning_rate": 1.3428713286008005e-07, "logits/chosen": -1.5871410369873047, "logits/rejected": -2.3224198818206787, "logps/chosen": -494.9087829589844, "logps/rejected": -5367.279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1893255710601807, "rewards/margins": 48.67522430419922, "rewards/rejected": -50.86455154418945, "step": 20740 }, { "epoch": 90.61135371179039, "grad_norm": 2.2199219025881782e-07, "learning_rate": 1.3305773528886456e-07, "logits/chosen": -1.5686370134353638, "logits/rejected": -2.319000244140625, "logps/chosen": -503.8790588378906, "logps/rejected": -5121.42529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2800891399383545, "rewards/margins": 46.1439094543457, "rewards/rejected": -48.42399978637695, "step": 20750 }, { "epoch": 90.65502183406113, "grad_norm": 1.2550401900547684e-08, "learning_rate": 1.318338373206754e-07, "logits/chosen": -1.5859028100967407, "logits/rejected": -2.281161308288574, "logps/chosen": -483.4513244628906, "logps/rejected": -4962.91552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1412386894226074, "rewards/margins": 44.83409881591797, "rewards/rejected": -46.975337982177734, "step": 20760 }, { "epoch": 90.69868995633188, "grad_norm": 1.2639378804293046e-08, "learning_rate": 1.3061544179924752e-07, "logits/chosen": -1.5414268970489502, "logits/rejected": -2.232145309448242, "logps/chosen": -498.0986328125, "logps/rejected": -5042.361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.12843656539917, "rewards/margins": 45.64985275268555, "rewards/rejected": -47.77829360961914, "step": 20770 }, { "epoch": 90.74235807860262, "grad_norm": 7.89166639895028e-08, "learning_rate": 1.2940255155552888e-07, "logits/chosen": -1.5559160709381104, "logits/rejected": -2.266854763031006, "logps/chosen": -510.17333984375, "logps/rejected": -5163.5908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2492663860321045, "rewards/margins": 46.58482360839844, "rewards/rejected": -48.83409118652344, "step": 20780 }, { "epoch": 90.78602620087337, "grad_norm": 1.993872632008891e-08, "learning_rate": 1.281951694076758e-07, "logits/chosen": -1.5924556255340576, "logits/rejected": -2.3074471950531006, "logps/chosen": -496.6692810058594, "logps/rejected": -5315.1875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1654467582702637, "rewards/margins": 47.96519088745117, "rewards/rejected": -50.130638122558594, "step": 20790 }, { "epoch": 90.82969432314411, "grad_norm": 1.3187852361584132e-07, "learning_rate": 1.2699329816104778e-07, "logits/chosen": -1.5974174737930298, "logits/rejected": -2.3510005474090576, "logps/chosen": -480.8475646972656, "logps/rejected": -5740.48095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.081080675125122, "rewards/margins": 52.21710205078125, "rewards/rejected": -54.298179626464844, "step": 20800 }, { "epoch": 90.87336244541484, "grad_norm": 2.4455223588263222e-08, "learning_rate": 1.257969406081988e-07, "logits/chosen": -1.5474618673324585, "logits/rejected": -2.2395224571228027, "logps/chosen": -489.22100830078125, "logps/rejected": -5153.7587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0823967456817627, "rewards/margins": 46.725093841552734, "rewards/rejected": -48.807491302490234, "step": 20810 }, { "epoch": 90.91703056768559, "grad_norm": 1.0406033382993206e-07, "learning_rate": 1.2460609952887198e-07, "logits/chosen": -1.569563627243042, "logits/rejected": -2.2350809574127197, "logps/chosen": -504.487060546875, "logps/rejected": -5211.22998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2064945697784424, "rewards/margins": 47.10076141357422, "rewards/rejected": -49.30725860595703, "step": 20820 }, { "epoch": 90.96069868995633, "grad_norm": 6.165119993511827e-08, "learning_rate": 1.2342077768999372e-07, "logits/chosen": -1.6066137552261353, "logits/rejected": -2.437335252761841, "logps/chosen": -477.2857971191406, "logps/rejected": -5893.734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2427024841308594, "rewards/margins": 53.58252716064453, "rewards/rejected": -55.825233459472656, "step": 20830 }, { "epoch": 91.00436681222708, "grad_norm": 2.3354405304270372e-07, "learning_rate": 1.2224097784566484e-07, "logits/chosen": -1.5496009588241577, "logits/rejected": -2.2188477516174316, "logps/chosen": -508.091796875, "logps/rejected": -4979.73974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1179864406585693, "rewards/margins": 45.017127990722656, "rewards/rejected": -47.13511657714844, "step": 20840 }, { "epoch": 91.04803493449782, "grad_norm": 3.24421014138974e-06, "learning_rate": 1.2106670273715677e-07, "logits/chosen": -1.6068508625030518, "logits/rejected": -2.3243751525878906, "logps/chosen": -484.13800048828125, "logps/rejected": -5533.91357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.261599063873291, "rewards/margins": 50.085838317871094, "rewards/rejected": -52.347434997558594, "step": 20850 }, { "epoch": 91.09170305676857, "grad_norm": 3.89721124767605e-06, "learning_rate": 1.1989795509290414e-07, "logits/chosen": -1.5675002336502075, "logits/rejected": -2.3068675994873047, "logps/chosen": -490.00775146484375, "logps/rejected": -5274.6298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1328225135803223, "rewards/margins": 47.7006721496582, "rewards/rejected": -49.83349609375, "step": 20860 }, { "epoch": 91.1353711790393, "grad_norm": 4.1741578495110235e-08, "learning_rate": 1.1873473762849863e-07, "logits/chosen": -1.5609042644500732, "logits/rejected": -2.256824016571045, "logps/chosen": -516.1639404296875, "logps/rejected": -5190.2041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2525477409362793, "rewards/margins": 46.709571838378906, "rewards/rejected": -48.962120056152344, "step": 20870 }, { "epoch": 91.17903930131004, "grad_norm": 1.2186204122313182e-07, "learning_rate": 1.175770530466816e-07, "logits/chosen": -1.6066923141479492, "logits/rejected": -2.3001255989074707, "logps/chosen": -490.7269592285156, "logps/rejected": -5431.01806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2477996349334717, "rewards/margins": 49.185672760009766, "rewards/rejected": -51.4334716796875, "step": 20880 }, { "epoch": 91.22270742358079, "grad_norm": 1.9440133558364836e-07, "learning_rate": 1.1642490403733997e-07, "logits/chosen": -1.5886482000350952, "logits/rejected": -2.3057610988616943, "logps/chosen": -483.81304931640625, "logps/rejected": -5223.3134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1166019439697266, "rewards/margins": 47.37724685668945, "rewards/rejected": -49.49384307861328, "step": 20890 }, { "epoch": 91.26637554585153, "grad_norm": 1.322341388601739e-07, "learning_rate": 1.1527829327749763e-07, "logits/chosen": -1.5449857711791992, "logits/rejected": -2.216214179992676, "logps/chosen": -515.6695556640625, "logps/rejected": -4945.64794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1660752296447754, "rewards/margins": 44.54115676879883, "rewards/rejected": -46.70723342895508, "step": 20900 }, { "epoch": 91.31004366812228, "grad_norm": 1.2025399073954465e-07, "learning_rate": 1.1413722343131095e-07, "logits/chosen": -1.5545628070831299, "logits/rejected": -2.31830096244812, "logps/chosen": -499.43157958984375, "logps/rejected": -5315.60400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.121551990509033, "rewards/margins": 48.21003723144531, "rewards/rejected": -50.33158874511719, "step": 20910 }, { "epoch": 91.353711790393, "grad_norm": 5.334541667378737e-06, "learning_rate": 1.130016971500622e-07, "logits/chosen": -1.6108663082122803, "logits/rejected": -2.4090986251831055, "logps/chosen": -487.30340576171875, "logps/rejected": -5690.13330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.160243034362793, "rewards/margins": 51.65126419067383, "rewards/rejected": -53.81150436401367, "step": 20920 }, { "epoch": 91.39737991266375, "grad_norm": 3.5689299919367836e-08, "learning_rate": 1.1187171707215228e-07, "logits/chosen": -1.542555570602417, "logits/rejected": -2.174635410308838, "logps/chosen": -488.93121337890625, "logps/rejected": -4949.2587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1159534454345703, "rewards/margins": 44.687522888183594, "rewards/rejected": -46.80347442626953, "step": 20930 }, { "epoch": 91.4410480349345, "grad_norm": 1.66820004169947e-05, "learning_rate": 1.1074728582309652e-07, "logits/chosen": -1.548744797706604, "logits/rejected": -2.325456142425537, "logps/chosen": -471.8363342285156, "logps/rejected": -5271.96875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1642024517059326, "rewards/margins": 47.76015090942383, "rewards/rejected": -49.92435073852539, "step": 20940 }, { "epoch": 91.48471615720524, "grad_norm": 2.6012085176137102e-08, "learning_rate": 1.0962840601551644e-07, "logits/chosen": -1.5636603832244873, "logits/rejected": -2.2581124305725098, "logps/chosen": -530.1372680664062, "logps/rejected": -5056.125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2333292961120605, "rewards/margins": 45.70207214355469, "rewards/rejected": -47.935401916503906, "step": 20950 }, { "epoch": 91.52838427947599, "grad_norm": 1.1226454590581273e-06, "learning_rate": 1.0851508024913554e-07, "logits/chosen": -1.5776195526123047, "logits/rejected": -2.362244129180908, "logps/chosen": -501.9842224121094, "logps/rejected": -5566.2548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.203885078430176, "rewards/margins": 50.45074462890625, "rewards/rejected": -52.654632568359375, "step": 20960 }, { "epoch": 91.57205240174673, "grad_norm": 3.70956862737413e-08, "learning_rate": 1.074073111107718e-07, "logits/chosen": -1.5621118545532227, "logits/rejected": -2.345371961593628, "logps/chosen": -481.9496154785156, "logps/rejected": -5286.33251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1579089164733887, "rewards/margins": 47.907779693603516, "rewards/rejected": -50.0656852722168, "step": 20970 }, { "epoch": 91.61572052401746, "grad_norm": 1.4642084150942419e-07, "learning_rate": 1.0630510117433351e-07, "logits/chosen": -1.5407111644744873, "logits/rejected": -2.3080005645751953, "logps/chosen": -502.4901428222656, "logps/rejected": -5213.087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.090610980987549, "rewards/margins": 47.25653839111328, "rewards/rejected": -49.34714889526367, "step": 20980 }, { "epoch": 91.6593886462882, "grad_norm": 9.650587504170897e-08, "learning_rate": 1.0520845300081045e-07, "logits/chosen": -1.5124695301055908, "logits/rejected": -2.192631483078003, "logps/chosen": -501.15057373046875, "logps/rejected": -5011.38330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.159416913986206, "rewards/margins": 45.2108039855957, "rewards/rejected": -47.37022018432617, "step": 20990 }, { "epoch": 91.70305676855895, "grad_norm": 3.5360612667529926e-06, "learning_rate": 1.0411736913827103e-07, "logits/chosen": -1.56930673122406, "logits/rejected": -2.3119072914123535, "logps/chosen": -484.4143981933594, "logps/rejected": -5042.05859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.132357358932495, "rewards/margins": 45.56493377685547, "rewards/rejected": -47.697296142578125, "step": 21000 }, { "epoch": 91.7467248908297, "grad_norm": 4.9103244154588845e-08, "learning_rate": 1.0303185212185485e-07, "logits/chosen": -1.5682209730148315, "logits/rejected": -2.2341079711914062, "logps/chosen": -490.2748107910156, "logps/rejected": -4821.45458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1478984355926514, "rewards/margins": 43.45924758911133, "rewards/rejected": -45.607147216796875, "step": 21010 }, { "epoch": 91.79039301310044, "grad_norm": 2.5740756271605727e-08, "learning_rate": 1.0195190447376547e-07, "logits/chosen": -1.5327608585357666, "logits/rejected": -2.1402087211608887, "logps/chosen": -516.1490478515625, "logps/rejected": -4778.87255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2591638565063477, "rewards/margins": 42.912906646728516, "rewards/rejected": -45.17206954956055, "step": 21020 }, { "epoch": 91.83406113537117, "grad_norm": 2.9087401098754046e-06, "learning_rate": 1.0087752870326817e-07, "logits/chosen": -1.5496528148651123, "logits/rejected": -2.2965526580810547, "logps/chosen": -508.87811279296875, "logps/rejected": -5482.1875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.178062915802002, "rewards/margins": 49.715797424316406, "rewards/rejected": -51.89385223388672, "step": 21030 }, { "epoch": 91.87772925764192, "grad_norm": 3.147140550931097e-07, "learning_rate": 9.980872730667973e-08, "logits/chosen": -1.5345841646194458, "logits/rejected": -2.2341582775115967, "logps/chosen": -482.833984375, "logps/rejected": -5168.55224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0949244499206543, "rewards/margins": 46.79941177368164, "rewards/rejected": -48.89434051513672, "step": 21040 }, { "epoch": 91.92139737991266, "grad_norm": 3.66126821851017e-07, "learning_rate": 9.8745502767367e-08, "logits/chosen": -1.5954501628875732, "logits/rejected": -2.334500551223755, "logps/chosen": -502.3904724121094, "logps/rejected": -5550.92333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2362728118896484, "rewards/margins": 50.30675506591797, "rewards/rejected": -52.54302978515625, "step": 21050 }, { "epoch": 91.9650655021834, "grad_norm": 2.7412449964860076e-08, "learning_rate": 9.768785755573689e-08, "logits/chosen": -1.5213890075683594, "logits/rejected": -2.250189781188965, "logps/chosen": -533.7860107421875, "logps/rejected": -4892.3447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.212358236312866, "rewards/margins": 44.07600784301758, "rewards/rejected": -46.288368225097656, "step": 21060 }, { "epoch": 92.00873362445415, "grad_norm": 1.5147520557907243e-07, "learning_rate": 9.663579412923424e-08, "logits/chosen": -1.5268088579177856, "logits/rejected": -2.159318208694458, "logps/chosen": -479.06182861328125, "logps/rejected": -4719.19287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.145684242248535, "rewards/margins": 42.497230529785156, "rewards/rejected": -44.642913818359375, "step": 21070 }, { "epoch": 92.0524017467249, "grad_norm": 5.104077922890023e-06, "learning_rate": 9.558931493233393e-08, "logits/chosen": -1.5486923456192017, "logits/rejected": -2.180666208267212, "logps/chosen": -512.9549560546875, "logps/rejected": -4818.15185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.177396059036255, "rewards/margins": 43.408538818359375, "rewards/rejected": -45.58592987060547, "step": 21080 }, { "epoch": 92.09606986899563, "grad_norm": 2.01179559820253e-07, "learning_rate": 9.454842239653595e-08, "logits/chosen": -1.6026932001113892, "logits/rejected": -2.357661247253418, "logps/chosen": -474.7216796875, "logps/rejected": -5576.95947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1884992122650146, "rewards/margins": 50.68706512451172, "rewards/rejected": -52.87556076049805, "step": 21090 }, { "epoch": 92.13973799126637, "grad_norm": 1.0674802289504779e-07, "learning_rate": 9.351311894036014e-08, "logits/chosen": -1.5995962619781494, "logits/rejected": -2.380763530731201, "logps/chosen": -468.05718994140625, "logps/rejected": -5580.31640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.085693359375, "rewards/margins": 50.648250579833984, "rewards/rejected": -52.73394775390625, "step": 21100 }, { "epoch": 92.18340611353712, "grad_norm": 2.692121366582849e-06, "learning_rate": 9.248340696933867e-08, "logits/chosen": -1.5354344844818115, "logits/rejected": -2.232229709625244, "logps/chosen": -518.2478637695312, "logps/rejected": -4992.82568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.24051570892334, "rewards/margins": 44.997703552246094, "rewards/rejected": -47.238216400146484, "step": 21110 }, { "epoch": 92.22707423580786, "grad_norm": 4.671986779133616e-08, "learning_rate": 9.14592888760138e-08, "logits/chosen": -1.5704840421676636, "logits/rejected": -2.2751026153564453, "logps/chosen": -489.39422607421875, "logps/rejected": -5265.44140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.149589776992798, "rewards/margins": 47.685646057128906, "rewards/rejected": -49.835227966308594, "step": 21120 }, { "epoch": 92.2707423580786, "grad_norm": 4.170530381866932e-08, "learning_rate": 9.044076703992905e-08, "logits/chosen": -1.5012702941894531, "logits/rejected": -2.1808905601501465, "logps/chosen": -507.99652099609375, "logps/rejected": -5029.02880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.183527708053589, "rewards/margins": 45.312461853027344, "rewards/rejected": -47.495994567871094, "step": 21130 }, { "epoch": 92.31441048034935, "grad_norm": 4.990324642956389e-08, "learning_rate": 8.942784382762499e-08, "logits/chosen": -1.565462350845337, "logits/rejected": -2.370842456817627, "logps/chosen": -498.53826904296875, "logps/rejected": -5784.3740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0818488597869873, "rewards/margins": 52.67264938354492, "rewards/rejected": -54.7545051574707, "step": 21140 }, { "epoch": 92.35807860262008, "grad_norm": 1.977971080471851e-08, "learning_rate": 8.842052159263426e-08, "logits/chosen": -1.6360013484954834, "logits/rejected": -2.439387083053589, "logps/chosen": -493.08184814453125, "logps/rejected": -5943.39453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1589395999908447, "rewards/margins": 54.09241485595703, "rewards/rejected": -56.2513542175293, "step": 21150 }, { "epoch": 92.40174672489083, "grad_norm": 6.547478029710426e-08, "learning_rate": 8.741880267547632e-08, "logits/chosen": -1.5197030305862427, "logits/rejected": -2.237135410308838, "logps/chosen": -502.28106689453125, "logps/rejected": -4839.05517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.137362003326416, "rewards/margins": 43.61191940307617, "rewards/rejected": -45.7492790222168, "step": 21160 }, { "epoch": 92.44541484716157, "grad_norm": 1.1086466096838248e-05, "learning_rate": 8.642268940364907e-08, "logits/chosen": -1.5483752489089966, "logits/rejected": -2.245239019393921, "logps/chosen": -514.0629272460938, "logps/rejected": -4703.2373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.249408721923828, "rewards/margins": 42.118080139160156, "rewards/rejected": -44.36749267578125, "step": 21170 }, { "epoch": 92.48908296943232, "grad_norm": 6.293706764622737e-08, "learning_rate": 8.543218409162779e-08, "logits/chosen": -1.5973520278930664, "logits/rejected": -2.3363945484161377, "logps/chosen": -448.3724060058594, "logps/rejected": -5435.6015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0387959480285645, "rewards/margins": 49.424503326416016, "rewards/rejected": -51.46329879760742, "step": 21180 }, { "epoch": 92.53275109170306, "grad_norm": 1.5998471875057876e-08, "learning_rate": 8.444728904085737e-08, "logits/chosen": -1.5910866260528564, "logits/rejected": -2.3055100440979004, "logps/chosen": -508.332275390625, "logps/rejected": -5320.9580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.269416332244873, "rewards/margins": 47.990966796875, "rewards/rejected": -50.26038360595703, "step": 21190 }, { "epoch": 92.5764192139738, "grad_norm": 1.0849497525994007e-08, "learning_rate": 8.346800653974557e-08, "logits/chosen": -1.5318958759307861, "logits/rejected": -2.2214157581329346, "logps/chosen": -510.3533630371094, "logps/rejected": -4888.3505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1380531787872314, "rewards/margins": 44.00347137451172, "rewards/rejected": -46.14152908325195, "step": 21200 }, { "epoch": 92.62008733624454, "grad_norm": 6.314654058670317e-08, "learning_rate": 8.249433886366148e-08, "logits/chosen": -1.574426531791687, "logits/rejected": -2.3100998401641846, "logps/chosen": -481.7127380371094, "logps/rejected": -5416.63134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1634654998779297, "rewards/margins": 49.12548828125, "rewards/rejected": -51.2889518737793, "step": 21210 }, { "epoch": 92.66375545851528, "grad_norm": 2.3052490369233104e-06, "learning_rate": 8.152628827492681e-08, "logits/chosen": -1.6127212047576904, "logits/rejected": -2.333390474319458, "logps/chosen": -483.96978759765625, "logps/rejected": -5442.77783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.091092586517334, "rewards/margins": 49.45130920410156, "rewards/rejected": -51.54240036010742, "step": 21220 }, { "epoch": 92.70742358078603, "grad_norm": 9.16884193628057e-07, "learning_rate": 8.056385702281178e-08, "logits/chosen": -1.5597542524337769, "logits/rejected": -2.1972763538360596, "logps/chosen": -504.21893310546875, "logps/rejected": -4903.93603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.200068235397339, "rewards/margins": 44.150230407714844, "rewards/rejected": -46.35029983520508, "step": 21230 }, { "epoch": 92.75109170305677, "grad_norm": 1.9047184508534017e-06, "learning_rate": 7.960704734353175e-08, "logits/chosen": -1.543243408203125, "logits/rejected": -2.2363038063049316, "logps/chosen": -509.7508850097656, "logps/rejected": -4937.85107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.102735757827759, "rewards/margins": 44.54497146606445, "rewards/rejected": -46.647705078125, "step": 21240 }, { "epoch": 92.79475982532752, "grad_norm": 1.225660427935207e-05, "learning_rate": 7.865586146023813e-08, "logits/chosen": -1.5647671222686768, "logits/rejected": -2.253258228302002, "logps/chosen": -504.36859130859375, "logps/rejected": -4985.09521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.274061441421509, "rewards/margins": 44.934967041015625, "rewards/rejected": -47.20902633666992, "step": 21250 }, { "epoch": 92.83842794759825, "grad_norm": 1.9696581887850398e-06, "learning_rate": 7.771030158301662e-08, "logits/chosen": -1.5780158042907715, "logits/rejected": -2.246424913406372, "logps/chosen": -502.1859436035156, "logps/rejected": -5095.7978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1731679439544678, "rewards/margins": 46.04511260986328, "rewards/rejected": -48.21828079223633, "step": 21260 }, { "epoch": 92.882096069869, "grad_norm": 1.3014279485256296e-05, "learning_rate": 7.677036990888121e-08, "logits/chosen": -1.5620059967041016, "logits/rejected": -2.2946271896362305, "logps/chosen": -486.97259521484375, "logps/rejected": -5310.73486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0570173263549805, "rewards/margins": 48.250675201416016, "rewards/rejected": -50.30769348144531, "step": 21270 }, { "epoch": 92.92576419213974, "grad_norm": 1.4868515079566334e-06, "learning_rate": 7.583606862176713e-08, "logits/chosen": -1.5970029830932617, "logits/rejected": -2.347914218902588, "logps/chosen": -500.41748046875, "logps/rejected": -5462.10791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2285561561584473, "rewards/margins": 49.41918182373047, "rewards/rejected": -51.647743225097656, "step": 21280 }, { "epoch": 92.96943231441048, "grad_norm": 1.3131304215907929e-08, "learning_rate": 7.490739989252926e-08, "logits/chosen": -1.5960941314697266, "logits/rejected": -2.3314194679260254, "logps/chosen": -475.6039123535156, "logps/rejected": -5708.5029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1502723693847656, "rewards/margins": 51.92163848876953, "rewards/rejected": -54.0719108581543, "step": 21290 }, { "epoch": 93.01310043668123, "grad_norm": 3.619875442948516e-06, "learning_rate": 7.39843658789341e-08, "logits/chosen": -1.528524398803711, "logits/rejected": -2.2512078285217285, "logps/chosen": -513.2106323242188, "logps/rejected": -4766.21533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1508281230926514, "rewards/margins": 42.804969787597656, "rewards/rejected": -44.95579528808594, "step": 21300 }, { "epoch": 93.05676855895196, "grad_norm": 1.27236352768358e-05, "learning_rate": 7.306696872565661e-08, "logits/chosen": -1.5721570253372192, "logits/rejected": -2.302830457687378, "logps/chosen": -504.0419921875, "logps/rejected": -5398.2265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1302342414855957, "rewards/margins": 49.00538635253906, "rewards/rejected": -51.1356201171875, "step": 21310 }, { "epoch": 93.1004366812227, "grad_norm": 2.134365616676504e-08, "learning_rate": 7.215521056427283e-08, "logits/chosen": -1.5788930654525757, "logits/rejected": -2.3440399169921875, "logps/chosen": -471.5693359375, "logps/rejected": -5234.02099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0709152221679688, "rewards/margins": 47.452919006347656, "rewards/rejected": -49.52383804321289, "step": 21320 }, { "epoch": 93.14410480349345, "grad_norm": 2.2763298516880408e-08, "learning_rate": 7.12490935132587e-08, "logits/chosen": -1.5364412069320679, "logits/rejected": -2.2026219367980957, "logps/chosen": -506.30108642578125, "logps/rejected": -4921.0947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2556633949279785, "rewards/margins": 44.142372131347656, "rewards/rejected": -46.39803695678711, "step": 21330 }, { "epoch": 93.1877729257642, "grad_norm": 9.0593935855553e-09, "learning_rate": 7.034861967798207e-08, "logits/chosen": -1.6273338794708252, "logits/rejected": -2.4861903190612793, "logps/chosen": -502.4532775878906, "logps/rejected": -5986.9677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.279041290283203, "rewards/margins": 54.395103454589844, "rewards/rejected": -56.67415237426758, "step": 21340 }, { "epoch": 93.23144104803494, "grad_norm": 8.159743788979872e-08, "learning_rate": 6.945379115069789e-08, "logits/chosen": -1.5565048456192017, "logits/rejected": -2.2742958068847656, "logps/chosen": -490.07177734375, "logps/rejected": -5249.52490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2036473751068115, "rewards/margins": 47.5150032043457, "rewards/rejected": -49.718650817871094, "step": 21350 }, { "epoch": 93.27510917030568, "grad_norm": 1.193467980490598e-07, "learning_rate": 6.85646100105461e-08, "logits/chosen": -1.5699818134307861, "logits/rejected": -2.2882676124572754, "logps/chosen": -487.75048828125, "logps/rejected": -5227.68505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.162698745727539, "rewards/margins": 47.350223541259766, "rewards/rejected": -49.51292037963867, "step": 21360 }, { "epoch": 93.31877729257641, "grad_norm": 2.8281209219261778e-08, "learning_rate": 6.768107832354292e-08, "logits/chosen": -1.6014655828475952, "logits/rejected": -2.3089535236358643, "logps/chosen": -502.5697326660156, "logps/rejected": -5048.099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.169940233230591, "rewards/margins": 45.52894592285156, "rewards/rejected": -47.69888687133789, "step": 21370 }, { "epoch": 93.36244541484716, "grad_norm": 1.348352594757351e-06, "learning_rate": 6.680319814257929e-08, "logits/chosen": -1.6389129161834717, "logits/rejected": -2.415682792663574, "logps/chosen": -468.89825439453125, "logps/rejected": -5924.5693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.127011775970459, "rewards/margins": 53.93092727661133, "rewards/rejected": -56.05793380737305, "step": 21380 }, { "epoch": 93.4061135371179, "grad_norm": 2.4291878126166124e-08, "learning_rate": 6.593097150741495e-08, "logits/chosen": -1.5956447124481201, "logits/rejected": -2.2829837799072266, "logps/chosen": -491.88555908203125, "logps/rejected": -5112.4453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.120248556137085, "rewards/margins": 46.209693908691406, "rewards/rejected": -48.3299446105957, "step": 21390 }, { "epoch": 93.44978165938865, "grad_norm": 2.2493994442206637e-08, "learning_rate": 6.506440044467266e-08, "logits/chosen": -1.5502456426620483, "logits/rejected": -2.299412727355957, "logps/chosen": -479.6705627441406, "logps/rejected": -5622.00244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.101736068725586, "rewards/margins": 51.07154083251953, "rewards/rejected": -53.17328643798828, "step": 21400 }, { "epoch": 93.4934497816594, "grad_norm": 4.5107875700328064e-08, "learning_rate": 6.420348696783485e-08, "logits/chosen": -1.5527751445770264, "logits/rejected": -2.2916407585144043, "logps/chosen": -504.95196533203125, "logps/rejected": -4954.29931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.151071071624756, "rewards/margins": 44.69712448120117, "rewards/rejected": -46.84819412231445, "step": 21410 }, { "epoch": 93.53711790393012, "grad_norm": 1.1103883678656e-07, "learning_rate": 6.334823307723891e-08, "logits/chosen": -1.544683575630188, "logits/rejected": -2.2189157009124756, "logps/chosen": -512.2808227539062, "logps/rejected": -4786.31005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1419677734375, "rewards/margins": 43.04999923706055, "rewards/rejected": -45.19197082519531, "step": 21420 }, { "epoch": 93.58078602620087, "grad_norm": 8.315459073439625e-09, "learning_rate": 6.249864076007167e-08, "logits/chosen": -1.5471606254577637, "logits/rejected": -2.2500500679016113, "logps/chosen": -511.30914306640625, "logps/rejected": -4768.7783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2825350761413574, "rewards/margins": 42.88930130004883, "rewards/rejected": -45.17182922363281, "step": 21430 }, { "epoch": 93.62445414847161, "grad_norm": 1.8417689221380888e-07, "learning_rate": 6.165471199036543e-08, "logits/chosen": -1.5782109498977661, "logits/rejected": -2.283475399017334, "logps/chosen": -483.7455139160156, "logps/rejected": -5370.42333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1353094577789307, "rewards/margins": 48.58485794067383, "rewards/rejected": -50.72016525268555, "step": 21440 }, { "epoch": 93.66812227074236, "grad_norm": 7.941783136563847e-06, "learning_rate": 6.081644872899334e-08, "logits/chosen": -1.5779550075531006, "logits/rejected": -2.3850414752960205, "logps/chosen": -469.15924072265625, "logps/rejected": -5778.1904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0961782932281494, "rewards/margins": 52.66881561279297, "rewards/rejected": -54.7650032043457, "step": 21450 }, { "epoch": 93.7117903930131, "grad_norm": 2.3827770623712452e-07, "learning_rate": 5.99838529236646e-08, "logits/chosen": -1.5748286247253418, "logits/rejected": -2.3285298347473145, "logps/chosen": -489.7041015625, "logps/rejected": -5380.62255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0567374229431152, "rewards/margins": 48.835914611816406, "rewards/rejected": -50.89264678955078, "step": 21460 }, { "epoch": 93.75545851528385, "grad_norm": 2.3075550178185255e-08, "learning_rate": 5.9156926508919247e-08, "logits/chosen": -1.563035011291504, "logits/rejected": -2.361307382583618, "logps/chosen": -507.92156982421875, "logps/rejected": -5264.896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.177999973297119, "rewards/margins": 47.69069290161133, "rewards/rejected": -49.868690490722656, "step": 21470 }, { "epoch": 93.79912663755458, "grad_norm": 5.113492883252661e-06, "learning_rate": 5.8335671406126726e-08, "logits/chosen": -1.5350656509399414, "logits/rejected": -2.297152042388916, "logps/chosen": -517.76416015625, "logps/rejected": -5317.0263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2579994201660156, "rewards/margins": 48.1352424621582, "rewards/rejected": -50.39324188232422, "step": 21480 }, { "epoch": 93.84279475982532, "grad_norm": 1.813472998476443e-07, "learning_rate": 5.7520089523476486e-08, "logits/chosen": -1.5736373662948608, "logits/rejected": -2.2471070289611816, "logps/chosen": -485.80902099609375, "logps/rejected": -5320.3427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1856725215911865, "rewards/margins": 48.12138366699219, "rewards/rejected": -50.30706024169922, "step": 21490 }, { "epoch": 93.88646288209607, "grad_norm": 4.929594443788646e-07, "learning_rate": 5.6710182755977674e-08, "logits/chosen": -1.638450264930725, "logits/rejected": -2.3495559692382812, "logps/chosen": -482.6884765625, "logps/rejected": -5680.16357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1838793754577637, "rewards/margins": 51.456092834472656, "rewards/rejected": -53.63997268676758, "step": 21500 }, { "epoch": 93.93013100436681, "grad_norm": 3.1793851775031726e-07, "learning_rate": 5.5905952985453036e-08, "logits/chosen": -1.5564143657684326, "logits/rejected": -2.323711633682251, "logps/chosen": -492.8301696777344, "logps/rejected": -5335.37646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1489615440368652, "rewards/margins": 48.440879821777344, "rewards/rejected": -50.589839935302734, "step": 21510 }, { "epoch": 93.97379912663756, "grad_norm": 8.064679280478093e-08, "learning_rate": 5.5107402080534775e-08, "logits/chosen": -1.602556586265564, "logits/rejected": -2.4078128337860107, "logps/chosen": -453.181640625, "logps/rejected": -5787.2265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1153838634490967, "rewards/margins": 52.714256286621094, "rewards/rejected": -54.82964324951172, "step": 21520 }, { "epoch": 94.0174672489083, "grad_norm": 7.286521310817438e-08, "learning_rate": 5.431453189666036e-08, "logits/chosen": -1.553371548652649, "logits/rejected": -2.277085781097412, "logps/chosen": -492.1890563964844, "logps/rejected": -4993.041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.184237480163574, "rewards/margins": 45.114906311035156, "rewards/rejected": -47.29914855957031, "step": 21530 }, { "epoch": 94.06113537117903, "grad_norm": 6.137709454029297e-08, "learning_rate": 5.3527344276067814e-08, "logits/chosen": -1.5784703493118286, "logits/rejected": -2.278059482574463, "logps/chosen": -481.80120849609375, "logps/rejected": -5473.591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1141200065612793, "rewards/margins": 49.73929977416992, "rewards/rejected": -51.853416442871094, "step": 21540 }, { "epoch": 94.10480349344978, "grad_norm": 7.332146761142327e-08, "learning_rate": 5.274584104779157e-08, "logits/chosen": -1.5508967638015747, "logits/rejected": -2.267148494720459, "logps/chosen": -523.6217041015625, "logps/rejected": -5191.57958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2042386531829834, "rewards/margins": 46.987125396728516, "rewards/rejected": -49.19136047363281, "step": 21550 }, { "epoch": 94.14847161572052, "grad_norm": 7.839367228982076e-08, "learning_rate": 5.197002402765855e-08, "logits/chosen": -1.4983634948730469, "logits/rejected": -2.1605491638183594, "logps/chosen": -518.6705932617188, "logps/rejected": -4548.986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.130662441253662, "rewards/margins": 40.92028045654297, "rewards/rejected": -43.05094528198242, "step": 21560 }, { "epoch": 94.19213973799127, "grad_norm": 1.1093514837849023e-07, "learning_rate": 5.119989501828404e-08, "logits/chosen": -1.521436095237732, "logits/rejected": -2.2899229526519775, "logps/chosen": -504.375732421875, "logps/rejected": -4854.4990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2436656951904297, "rewards/margins": 43.646915435791016, "rewards/rejected": -45.89058303833008, "step": 21570 }, { "epoch": 94.23580786026201, "grad_norm": 1.6166814329198176e-08, "learning_rate": 5.043545580906695e-08, "logits/chosen": -1.5429779291152954, "logits/rejected": -2.2755935192108154, "logps/chosen": -514.6619873046875, "logps/rejected": -5054.98046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.151000499725342, "rewards/margins": 45.62970733642578, "rewards/rejected": -47.78070831298828, "step": 21580 }, { "epoch": 94.27947598253274, "grad_norm": 1.006103542527543e-07, "learning_rate": 4.967670817618564e-08, "logits/chosen": -1.5419247150421143, "logits/rejected": -2.3221209049224854, "logps/chosen": -486.74542236328125, "logps/rejected": -5042.18896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.156130313873291, "rewards/margins": 45.600746154785156, "rewards/rejected": -47.75687026977539, "step": 21590 }, { "epoch": 94.32314410480349, "grad_norm": 3.3350668029240197e-07, "learning_rate": 4.892365388259463e-08, "logits/chosen": -1.6326963901519775, "logits/rejected": -2.5063748359680176, "logps/chosen": -514.669677734375, "logps/rejected": -5954.6376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3529746532440186, "rewards/margins": 53.93852615356445, "rewards/rejected": -56.2914924621582, "step": 21600 }, { "epoch": 94.36681222707423, "grad_norm": 6.25663323583355e-08, "learning_rate": 4.8176294678018986e-08, "logits/chosen": -1.5886201858520508, "logits/rejected": -2.3943088054656982, "logps/chosen": -470.26446533203125, "logps/rejected": -5694.9931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1463875770568848, "rewards/margins": 51.76975631713867, "rewards/rejected": -53.9161376953125, "step": 21610 }, { "epoch": 94.41048034934498, "grad_norm": 1.1496990933742795e-05, "learning_rate": 4.7434632298952175e-08, "logits/chosen": -1.5622222423553467, "logits/rejected": -2.2007625102996826, "logps/chosen": -517.651611328125, "logps/rejected": -4765.484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.117607355117798, "rewards/margins": 43.0259895324707, "rewards/rejected": -45.143592834472656, "step": 21620 }, { "epoch": 94.45414847161572, "grad_norm": 3.3331021650757016e-08, "learning_rate": 4.669866846865101e-08, "logits/chosen": -1.5765937566757202, "logits/rejected": -2.328029155731201, "logps/chosen": -487.8656311035156, "logps/rejected": -5408.3369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.147719383239746, "rewards/margins": 49.077423095703125, "rewards/rejected": -51.22513961791992, "step": 21630 }, { "epoch": 94.49781659388647, "grad_norm": 1.1746986092511742e-06, "learning_rate": 4.5968404897130944e-08, "logits/chosen": -1.585365653038025, "logits/rejected": -2.238795757293701, "logps/chosen": -495.97125244140625, "logps/rejected": -5069.73974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.19926118850708, "rewards/margins": 45.78336715698242, "rewards/rejected": -47.982627868652344, "step": 21640 }, { "epoch": 94.5414847161572, "grad_norm": 1.828223160070783e-08, "learning_rate": 4.5243843281163605e-08, "logits/chosen": -1.5786041021347046, "logits/rejected": -2.343799352645874, "logps/chosen": -492.4720764160156, "logps/rejected": -5317.34716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0971152782440186, "rewards/margins": 48.209632873535156, "rewards/rejected": -50.3067512512207, "step": 21650 }, { "epoch": 94.58515283842794, "grad_norm": 1.1322957338323459e-07, "learning_rate": 4.452498530427146e-08, "logits/chosen": -1.5658739805221558, "logits/rejected": -2.37677264213562, "logps/chosen": -462.2744140625, "logps/rejected": -5598.2314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.118326425552368, "rewards/margins": 50.85395812988281, "rewards/rejected": -52.9722785949707, "step": 21660 }, { "epoch": 94.62882096069869, "grad_norm": 4.3422314290003525e-06, "learning_rate": 4.381183263672512e-08, "logits/chosen": -1.4685533046722412, "logits/rejected": -2.175133466720581, "logps/chosen": -552.8456420898438, "logps/rejected": -4516.6083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.266451358795166, "rewards/margins": 40.481483459472656, "rewards/rejected": -42.74793243408203, "step": 21670 }, { "epoch": 94.67248908296943, "grad_norm": 1.1195016076403357e-07, "learning_rate": 4.310438693553853e-08, "logits/chosen": -1.572379469871521, "logits/rejected": -2.2625300884246826, "logps/chosen": -471.1055603027344, "logps/rejected": -5419.3291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.157600164413452, "rewards/margins": 49.08916091918945, "rewards/rejected": -51.24676513671875, "step": 21680 }, { "epoch": 94.71615720524018, "grad_norm": 8.190105852926636e-07, "learning_rate": 4.240264984446574e-08, "logits/chosen": -1.5712676048278809, "logits/rejected": -2.309319257736206, "logps/chosen": -478.093505859375, "logps/rejected": -5229.24951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.045367956161499, "rewards/margins": 47.42658233642578, "rewards/rejected": -49.471946716308594, "step": 21690 }, { "epoch": 94.75982532751091, "grad_norm": 4.4114198378898815e-06, "learning_rate": 4.1706622993996085e-08, "logits/chosen": -1.5438964366912842, "logits/rejected": -2.2301764488220215, "logps/chosen": -520.3084106445312, "logps/rejected": -4754.6259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2195487022399902, "rewards/margins": 42.803916931152344, "rewards/rejected": -45.023468017578125, "step": 21700 }, { "epoch": 94.80349344978166, "grad_norm": 5.409162480726638e-08, "learning_rate": 4.1016308001352315e-08, "logits/chosen": -1.605583906173706, "logits/rejected": -2.3619773387908936, "logps/chosen": -497.9981384277344, "logps/rejected": -5548.3955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.171841621398926, "rewards/margins": 50.32939910888672, "rewards/rejected": -52.50123977661133, "step": 21710 }, { "epoch": 94.8471615720524, "grad_norm": 6.24311271906394e-08, "learning_rate": 4.033170647048501e-08, "logits/chosen": -1.5549142360687256, "logits/rejected": -2.3096518516540527, "logps/chosen": -493.60260009765625, "logps/rejected": -5422.63330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.127559185028076, "rewards/margins": 49.10123062133789, "rewards/rejected": -51.228790283203125, "step": 21720 }, { "epoch": 94.89082969432314, "grad_norm": 1.9344971494382684e-06, "learning_rate": 3.965281999206899e-08, "logits/chosen": -1.5613130331039429, "logits/rejected": -2.29085636138916, "logps/chosen": -521.3421020507812, "logps/rejected": -5245.31005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.259320020675659, "rewards/margins": 47.395755767822266, "rewards/rejected": -49.65507125854492, "step": 21730 }, { "epoch": 94.93449781659389, "grad_norm": 4.166074583936646e-08, "learning_rate": 3.897965014350108e-08, "logits/chosen": -1.565076470375061, "logits/rejected": -2.2725424766540527, "logps/chosen": -480.03173828125, "logps/rejected": -5485.4921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.112964153289795, "rewards/margins": 49.85528564453125, "rewards/rejected": -51.9682502746582, "step": 21740 }, { "epoch": 94.97816593886463, "grad_norm": 4.8634787720771874e-08, "learning_rate": 3.8312198488894834e-08, "logits/chosen": -1.5848207473754883, "logits/rejected": -2.3464207649230957, "logps/chosen": -486.22918701171875, "logps/rejected": -5643.54638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1825103759765625, "rewards/margins": 51.232208251953125, "rewards/rejected": -53.41472244262695, "step": 21750 }, { "epoch": 95.02183406113537, "grad_norm": 2.0050236094973047e-06, "learning_rate": 3.765046657907778e-08, "logits/chosen": -1.5473544597625732, "logits/rejected": -2.2435059547424316, "logps/chosen": -502.49810791015625, "logps/rejected": -5094.70361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1631834506988525, "rewards/margins": 45.940589904785156, "rewards/rejected": -48.10377502441406, "step": 21760 }, { "epoch": 95.06550218340611, "grad_norm": 4.4257336387767447e-08, "learning_rate": 3.6994455951587236e-08, "logits/chosen": -1.5377929210662842, "logits/rejected": -2.282567024230957, "logps/chosen": -514.750244140625, "logps/rejected": -5306.20556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.183572292327881, "rewards/margins": 48.04158020019531, "rewards/rejected": -50.22515106201172, "step": 21770 }, { "epoch": 95.10917030567686, "grad_norm": 3.666154289084958e-08, "learning_rate": 3.63441681306681e-08, "logits/chosen": -1.5300323963165283, "logits/rejected": -2.3456614017486572, "logps/chosen": -503.034912109375, "logps/rejected": -5370.54541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1459145545959473, "rewards/margins": 48.63688278198242, "rewards/rejected": -50.78279495239258, "step": 21780 }, { "epoch": 95.1528384279476, "grad_norm": 6.321977736631906e-08, "learning_rate": 3.569960462726674e-08, "logits/chosen": -1.548288106918335, "logits/rejected": -2.273969888687134, "logps/chosen": -516.2819213867188, "logps/rejected": -5092.64599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2239248752593994, "rewards/margins": 45.903594970703125, "rewards/rejected": -48.12751770019531, "step": 21790 }, { "epoch": 95.19650655021834, "grad_norm": 6.853910182147079e-08, "learning_rate": 3.506076693903071e-08, "logits/chosen": -1.5343507528305054, "logits/rejected": -2.245554208755493, "logps/chosen": -513.7796630859375, "logps/rejected": -5085.72021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1808981895446777, "rewards/margins": 45.99697494506836, "rewards/rejected": -48.177879333496094, "step": 21800 }, { "epoch": 95.24017467248909, "grad_norm": 8.743834612030319e-07, "learning_rate": 3.442765655030239e-08, "logits/chosen": -1.5467197895050049, "logits/rejected": -2.1989266872406006, "logps/chosen": -513.6165161132812, "logps/rejected": -4824.9287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.198669672012329, "rewards/margins": 43.44375991821289, "rewards/rejected": -45.642425537109375, "step": 21810 }, { "epoch": 95.28384279475982, "grad_norm": 1.6275862943575784e-05, "learning_rate": 3.3800274932117294e-08, "logits/chosen": -1.5977429151535034, "logits/rejected": -2.3228161334991455, "logps/chosen": -509.684326171875, "logps/rejected": -5443.146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1896800994873047, "rewards/margins": 49.19330596923828, "rewards/rejected": -51.38298416137695, "step": 21820 }, { "epoch": 95.32751091703057, "grad_norm": 1.137474781157679e-07, "learning_rate": 3.3178623542199916e-08, "logits/chosen": -1.4778639078140259, "logits/rejected": -2.1508350372314453, "logps/chosen": -504.01177978515625, "logps/rejected": -4783.619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1982321739196777, "rewards/margins": 43.023887634277344, "rewards/rejected": -45.22211456298828, "step": 21830 }, { "epoch": 95.37117903930131, "grad_norm": 2.6946985545699242e-05, "learning_rate": 3.2562703824960685e-08, "logits/chosen": -1.5482423305511475, "logits/rejected": -2.1914782524108887, "logps/chosen": -475.28759765625, "logps/rejected": -4823.6767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.115234136581421, "rewards/margins": 43.524173736572266, "rewards/rejected": -45.63941192626953, "step": 21840 }, { "epoch": 95.41484716157206, "grad_norm": 1.7056256075069032e-08, "learning_rate": 3.195251721149206e-08, "logits/chosen": -1.5603687763214111, "logits/rejected": -2.2762277126312256, "logps/chosen": -526.9652099609375, "logps/rejected": -5038.46728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1963584423065186, "rewards/margins": 45.467262268066406, "rewards/rejected": -47.66362380981445, "step": 21850 }, { "epoch": 95.4585152838428, "grad_norm": 4.504826246084846e-08, "learning_rate": 3.134806511956606e-08, "logits/chosen": -1.5655925273895264, "logits/rejected": -2.229057550430298, "logps/chosen": -508.3543395996094, "logps/rejected": -5000.0029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2818472385406494, "rewards/margins": 45.02355194091797, "rewards/rejected": -47.305397033691406, "step": 21860 }, { "epoch": 95.50218340611353, "grad_norm": 1.0238841380703566e-07, "learning_rate": 3.074934895363063e-08, "logits/chosen": -1.5483663082122803, "logits/rejected": -2.2076258659362793, "logps/chosen": -501.52667236328125, "logps/rejected": -4886.9677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.223254442214966, "rewards/margins": 43.889991760253906, "rewards/rejected": -46.113243103027344, "step": 21870 }, { "epoch": 95.54585152838428, "grad_norm": 1.0159737097714447e-07, "learning_rate": 3.015637010480576e-08, "logits/chosen": -1.6234012842178345, "logits/rejected": -2.395294427871704, "logps/chosen": -523.8230590820312, "logps/rejected": -5550.19384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3609871864318848, "rewards/margins": 50.176326751708984, "rewards/rejected": -52.53731155395508, "step": 21880 }, { "epoch": 95.58951965065502, "grad_norm": 3.485750702180757e-08, "learning_rate": 2.9569129950880738e-08, "logits/chosen": -1.5409586429595947, "logits/rejected": -2.2679355144500732, "logps/chosen": -501.5267639160156, "logps/rejected": -5048.7529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1385443210601807, "rewards/margins": 45.639183044433594, "rewards/rejected": -47.77772903442383, "step": 21890 }, { "epoch": 95.63318777292577, "grad_norm": 1.0787519953376644e-08, "learning_rate": 2.8987629856311593e-08, "logits/chosen": -1.5997580289840698, "logits/rejected": -2.3817906379699707, "logps/chosen": -478.2579040527344, "logps/rejected": -5429.1513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.198641300201416, "rewards/margins": 49.249603271484375, "rewards/rejected": -51.4482536315918, "step": 21900 }, { "epoch": 95.67685589519651, "grad_norm": 2.5177034464274907e-08, "learning_rate": 2.841187117221672e-08, "logits/chosen": -1.5686402320861816, "logits/rejected": -2.3156979084014893, "logps/chosen": -489.77520751953125, "logps/rejected": -5105.970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1709227561950684, "rewards/margins": 46.21480941772461, "rewards/rejected": -48.3857307434082, "step": 21910 }, { "epoch": 95.72052401746726, "grad_norm": 8.870308092430865e-08, "learning_rate": 2.7841855236374892e-08, "logits/chosen": -1.5774781703948975, "logits/rejected": -2.2659263610839844, "logps/chosen": -496.140625, "logps/rejected": -4827.1845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2015182971954346, "rewards/margins": 43.480979919433594, "rewards/rejected": -45.6824951171875, "step": 21920 }, { "epoch": 95.76419213973799, "grad_norm": 1.1026884085708988e-07, "learning_rate": 2.727758337322056e-08, "logits/chosen": -1.5775907039642334, "logits/rejected": -2.2508811950683594, "logps/chosen": -525.9554443359375, "logps/rejected": -5257.9443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2338883876800537, "rewards/margins": 47.53468322753906, "rewards/rejected": -49.76857376098633, "step": 21930 }, { "epoch": 95.80786026200873, "grad_norm": 2.5276818154628287e-08, "learning_rate": 2.6719056893842467e-08, "logits/chosen": -1.591060996055603, "logits/rejected": -2.3154988288879395, "logps/chosen": -484.95599365234375, "logps/rejected": -5262.09423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1480872631073, "rewards/margins": 47.62765884399414, "rewards/rejected": -49.77574920654297, "step": 21940 }, { "epoch": 95.85152838427948, "grad_norm": 8.957101983026909e-07, "learning_rate": 2.6166277095980575e-08, "logits/chosen": -1.596012830734253, "logits/rejected": -2.3881258964538574, "logps/chosen": -499.38958740234375, "logps/rejected": -5643.404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1044790744781494, "rewards/margins": 51.31095504760742, "rewards/rejected": -53.415435791015625, "step": 21950 }, { "epoch": 95.89519650655022, "grad_norm": 5.137529811626008e-07, "learning_rate": 2.5619245264021097e-08, "logits/chosen": -1.54124915599823, "logits/rejected": -2.2417843341827393, "logps/chosen": -536.9600830078125, "logps/rejected": -4893.80322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2193305492401123, "rewards/margins": 44.04945755004883, "rewards/rejected": -46.2687873840332, "step": 21960 }, { "epoch": 95.93886462882097, "grad_norm": 3.906838746819231e-08, "learning_rate": 2.507796266899537e-08, "logits/chosen": -1.5873432159423828, "logits/rejected": -2.384469509124756, "logps/chosen": -504.06396484375, "logps/rejected": -5762.56787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.207623243331909, "rewards/margins": 52.35416793823242, "rewards/rejected": -54.561790466308594, "step": 21970 }, { "epoch": 95.9825327510917, "grad_norm": 9.770711275634743e-08, "learning_rate": 2.454243056857708e-08, "logits/chosen": -1.577067255973816, "logits/rejected": -2.390934467315674, "logps/chosen": -502.5943298339844, "logps/rejected": -5119.12841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2257204055786133, "rewards/margins": 46.279869079589844, "rewards/rejected": -48.50558090209961, "step": 21980 }, { "epoch": 96.02620087336244, "grad_norm": 1.8482418081128192e-07, "learning_rate": 2.4012650207076992e-08, "logits/chosen": -1.5535672903060913, "logits/rejected": -2.1739113330841064, "logps/chosen": -511.85467529296875, "logps/rejected": -4897.37890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.293898344039917, "rewards/margins": 44.00139617919922, "rewards/rejected": -46.295291900634766, "step": 21990 }, { "epoch": 96.06986899563319, "grad_norm": 3.220820800669795e-05, "learning_rate": 2.348862281544323e-08, "logits/chosen": -1.606191635131836, "logits/rejected": -2.3267147541046143, "logps/chosen": -504.95428466796875, "logps/rejected": -5631.6533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0762228965759277, "rewards/margins": 51.18680191040039, "rewards/rejected": -53.263023376464844, "step": 22000 }, { "epoch": 96.11353711790393, "grad_norm": 2.7747499937803493e-08, "learning_rate": 2.297034961125627e-08, "logits/chosen": -1.558685541152954, "logits/rejected": -2.3289191722869873, "logps/chosen": -495.75946044921875, "logps/rejected": -5149.7392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1264255046844482, "rewards/margins": 46.60261154174805, "rewards/rejected": -48.72903060913086, "step": 22010 }, { "epoch": 96.15720524017468, "grad_norm": 1.6990535500774474e-06, "learning_rate": 2.2457831798726183e-08, "logits/chosen": -1.5400038957595825, "logits/rejected": -2.228858232498169, "logps/chosen": -486.87939453125, "logps/rejected": -5027.66796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1295716762542725, "rewards/margins": 45.50623321533203, "rewards/rejected": -47.63580322265625, "step": 22020 }, { "epoch": 96.20087336244542, "grad_norm": 1.0943756120964795e-06, "learning_rate": 2.1951070568691226e-08, "logits/chosen": -1.5769660472869873, "logits/rejected": -2.3465447425842285, "logps/chosen": -483.47869873046875, "logps/rejected": -5475.68896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.114734649658203, "rewards/margins": 49.769309997558594, "rewards/rejected": -51.8840446472168, "step": 22030 }, { "epoch": 96.24454148471615, "grad_norm": 2.8732024462202245e-08, "learning_rate": 2.145006709861397e-08, "logits/chosen": -1.5408927202224731, "logits/rejected": -2.25759220123291, "logps/chosen": -508.3583984375, "logps/rejected": -5133.6865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3224291801452637, "rewards/margins": 46.19702911376953, "rewards/rejected": -48.51945114135742, "step": 22040 }, { "epoch": 96.2882096069869, "grad_norm": 6.349476038906066e-08, "learning_rate": 2.0954822552578524e-08, "logits/chosen": -1.5808883905410767, "logits/rejected": -2.371778964996338, "logps/chosen": -483.89434814453125, "logps/rejected": -5651.927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1067967414855957, "rewards/margins": 51.361656188964844, "rewards/rejected": -53.46844482421875, "step": 22050 }, { "epoch": 96.33187772925764, "grad_norm": 3.04196825445566e-07, "learning_rate": 2.046533808128803e-08, "logits/chosen": -1.4873555898666382, "logits/rejected": -2.254756450653076, "logps/chosen": -519.1737060546875, "logps/rejected": -4873.46533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.197577476501465, "rewards/margins": 43.87956619262695, "rewards/rejected": -46.07714080810547, "step": 22060 }, { "epoch": 96.37554585152839, "grad_norm": 1.2947234497876459e-08, "learning_rate": 1.9981614822062455e-08, "logits/chosen": -1.610792875289917, "logits/rejected": -2.3841466903686523, "logps/chosen": -487.64105224609375, "logps/rejected": -5686.1455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.168769359588623, "rewards/margins": 51.758087158203125, "rewards/rejected": -53.926856994628906, "step": 22070 }, { "epoch": 96.41921397379913, "grad_norm": 5.773851380307072e-07, "learning_rate": 1.950365389883524e-08, "logits/chosen": -1.5652010440826416, "logits/rejected": -2.3169169425964355, "logps/chosen": -494.14227294921875, "logps/rejected": -5536.3427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1414661407470703, "rewards/margins": 50.303504943847656, "rewards/rejected": -52.444969177246094, "step": 22080 }, { "epoch": 96.46288209606988, "grad_norm": 1.3379876297305761e-06, "learning_rate": 1.9031456422151374e-08, "logits/chosen": -1.5439174175262451, "logits/rejected": -2.1981894969940186, "logps/chosen": -507.359130859375, "logps/rejected": -4915.10693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1305031776428223, "rewards/margins": 44.25476837158203, "rewards/rejected": -46.38526916503906, "step": 22090 }, { "epoch": 96.5065502183406, "grad_norm": 3.536048692701906e-08, "learning_rate": 1.8565023489164335e-08, "logits/chosen": -1.5601392984390259, "logits/rejected": -2.243194580078125, "logps/chosen": -524.4635009765625, "logps/rejected": -4965.5517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.378744125366211, "rewards/margins": 44.52002716064453, "rewards/rejected": -46.89876937866211, "step": 22100 }, { "epoch": 96.55021834061135, "grad_norm": 4.3474572735968134e-08, "learning_rate": 1.8104356183633043e-08, "logits/chosen": -1.5766903162002563, "logits/rejected": -2.3281543254852295, "logps/chosen": -506.583251953125, "logps/rejected": -5199.509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.365673780441284, "rewards/margins": 46.7408561706543, "rewards/rejected": -49.10652542114258, "step": 22110 }, { "epoch": 96.5938864628821, "grad_norm": 2.827115095673892e-06, "learning_rate": 1.7649455575920737e-08, "logits/chosen": -1.5840212106704712, "logits/rejected": -2.3740952014923096, "logps/chosen": -516.0411376953125, "logps/rejected": -5398.1484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2836480140686035, "rewards/margins": 48.82084655761719, "rewards/rejected": -51.104496002197266, "step": 22120 }, { "epoch": 96.63755458515284, "grad_norm": 2.4443162120099124e-06, "learning_rate": 1.720032272299138e-08, "logits/chosen": -1.6422836780548096, "logits/rejected": -2.3948585987091064, "logps/chosen": -468.54669189453125, "logps/rejected": -5718.71728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1082088947296143, "rewards/margins": 51.9952392578125, "rewards/rejected": -54.10345458984375, "step": 22130 }, { "epoch": 96.68122270742359, "grad_norm": 2.1052998893158754e-06, "learning_rate": 1.6756958668407985e-08, "logits/chosen": -1.5758535861968994, "logits/rejected": -2.321321725845337, "logps/chosen": -509.23663330078125, "logps/rejected": -5052.23291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1262106895446777, "rewards/margins": 45.64453125, "rewards/rejected": -47.77074432373047, "step": 22140 }, { "epoch": 96.72489082969432, "grad_norm": 3.0549125259238584e-08, "learning_rate": 1.6319364442328456e-08, "logits/chosen": -1.5335466861724854, "logits/rejected": -2.2044968605041504, "logps/chosen": -513.9124145507812, "logps/rejected": -5032.31396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.209749460220337, "rewards/margins": 45.387393951416016, "rewards/rejected": -47.597145080566406, "step": 22150 }, { "epoch": 96.76855895196506, "grad_norm": 6.623383972832082e-08, "learning_rate": 1.5887541061505864e-08, "logits/chosen": -1.5348103046417236, "logits/rejected": -2.2630467414855957, "logps/chosen": -528.924072265625, "logps/rejected": -5030.80810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.243969202041626, "rewards/margins": 45.35555648803711, "rewards/rejected": -47.599525451660156, "step": 22160 }, { "epoch": 96.8122270742358, "grad_norm": 5.083196632910923e-06, "learning_rate": 1.546148952928428e-08, "logits/chosen": -1.585292100906372, "logits/rejected": -2.323965549468994, "logps/chosen": -487.38330078125, "logps/rejected": -5519.82177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1528286933898926, "rewards/margins": 50.19267272949219, "rewards/rejected": -52.345497131347656, "step": 22170 }, { "epoch": 96.85589519650655, "grad_norm": 1.7208887146292618e-08, "learning_rate": 1.5041210835596288e-08, "logits/chosen": -1.5815995931625366, "logits/rejected": -2.3686394691467285, "logps/chosen": -484.900634765625, "logps/rejected": -5594.9638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.102370500564575, "rewards/margins": 50.89983367919922, "rewards/rejected": -53.0022087097168, "step": 22180 }, { "epoch": 96.8995633187773, "grad_norm": 1.621896962721455e-08, "learning_rate": 1.4626705956961862e-08, "logits/chosen": -1.6441618204116821, "logits/rejected": -2.3971123695373535, "logps/chosen": -451.503662109375, "logps/rejected": -6054.2099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0843849182128906, "rewards/margins": 55.228118896484375, "rewards/rejected": -57.31250762939453, "step": 22190 }, { "epoch": 96.94323144104804, "grad_norm": 3.3608984742882056e-08, "learning_rate": 1.4217975856485599e-08, "logits/chosen": -1.5783318281173706, "logits/rejected": -2.3226983547210693, "logps/chosen": -507.7396545410156, "logps/rejected": -5469.0419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3319363594055176, "rewards/margins": 49.45454406738281, "rewards/rejected": -51.78647994995117, "step": 22200 }, { "epoch": 96.98689956331877, "grad_norm": 4.5015196996368376e-08, "learning_rate": 1.3815021483853663e-08, "logits/chosen": -1.571743369102478, "logits/rejected": -2.3245930671691895, "logps/chosen": -517.5416259765625, "logps/rejected": -5256.41064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2981317043304443, "rewards/margins": 47.41664123535156, "rewards/rejected": -49.71477127075195, "step": 22210 }, { "epoch": 97.03056768558952, "grad_norm": 2.4395156112252516e-08, "learning_rate": 1.3417843775332673e-08, "logits/chosen": -1.5273582935333252, "logits/rejected": -2.1247458457946777, "logps/chosen": -517.7322998046875, "logps/rejected": -4519.79345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3215792179107666, "rewards/margins": 40.35070037841797, "rewards/rejected": -42.67227554321289, "step": 22220 }, { "epoch": 97.07423580786026, "grad_norm": 1.4083348042354146e-07, "learning_rate": 1.3026443653767762e-08, "logits/chosen": -1.54063081741333, "logits/rejected": -2.2073440551757812, "logps/chosen": -495.29278564453125, "logps/rejected": -5137.7490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1657052040100098, "rewards/margins": 46.45738983154297, "rewards/rejected": -48.62310028076172, "step": 22230 }, { "epoch": 97.117903930131, "grad_norm": 4.6945139970940436e-08, "learning_rate": 1.2640822028578415e-08, "logits/chosen": -1.5340585708618164, "logits/rejected": -2.2112011909484863, "logps/chosen": -501.0341796875, "logps/rejected": -4646.40478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1529762744903564, "rewards/margins": 41.74941635131836, "rewards/rejected": -43.90239334106445, "step": 22240 }, { "epoch": 97.16157205240175, "grad_norm": 8.740884902525875e-07, "learning_rate": 1.226097979575902e-08, "logits/chosen": -1.4911226034164429, "logits/rejected": -2.147104501724243, "logps/chosen": -510.85894775390625, "logps/rejected": -4607.39794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1084723472595215, "rewards/margins": 41.4498176574707, "rewards/rejected": -43.558292388916016, "step": 22250 }, { "epoch": 97.20524017467248, "grad_norm": 7.454415056969873e-08, "learning_rate": 1.188691783787499e-08, "logits/chosen": -1.5657029151916504, "logits/rejected": -2.258323907852173, "logps/chosen": -487.0245666503906, "logps/rejected": -4921.8193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.24072003364563, "rewards/margins": 44.274208068847656, "rewards/rejected": -46.514930725097656, "step": 22260 }, { "epoch": 97.24890829694323, "grad_norm": 2.8410237208422962e-08, "learning_rate": 1.1518637024061086e-08, "logits/chosen": -1.6179898977279663, "logits/rejected": -2.293379306793213, "logps/chosen": -467.3805236816406, "logps/rejected": -5463.85205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.103555679321289, "rewards/margins": 49.55562973022461, "rewards/rejected": -51.6591911315918, "step": 22270 }, { "epoch": 97.29257641921397, "grad_norm": 1.1728832585846796e-06, "learning_rate": 1.115613821002004e-08, "logits/chosen": -1.5684841871261597, "logits/rejected": -2.2144885063171387, "logps/chosen": -497.44012451171875, "logps/rejected": -4953.2685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.176267147064209, "rewards/margins": 44.573768615722656, "rewards/rejected": -46.750038146972656, "step": 22280 }, { "epoch": 97.33624454148472, "grad_norm": 3.679436256884054e-09, "learning_rate": 1.0799422238019774e-08, "logits/chosen": -1.587745189666748, "logits/rejected": -2.3732120990753174, "logps/chosen": -475.486572265625, "logps/rejected": -5771.7919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.159440279006958, "rewards/margins": 52.45970916748047, "rewards/rejected": -54.61915969848633, "step": 22290 }, { "epoch": 97.37991266375546, "grad_norm": 1.7034828111047283e-06, "learning_rate": 1.0448489936891459e-08, "logits/chosen": -1.5546151399612427, "logits/rejected": -2.2618632316589355, "logps/chosen": -518.1719360351562, "logps/rejected": -5080.359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.246748447418213, "rewards/margins": 45.77846145629883, "rewards/rejected": -48.02521514892578, "step": 22300 }, { "epoch": 97.4235807860262, "grad_norm": 9.593616016416912e-08, "learning_rate": 1.0103342122028125e-08, "logits/chosen": -1.5513814687728882, "logits/rejected": -2.2800357341766357, "logps/chosen": -518.7020263671875, "logps/rejected": -4788.064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3069307804107666, "rewards/margins": 42.95105743408203, "rewards/rejected": -45.25798416137695, "step": 22310 }, { "epoch": 97.46724890829694, "grad_norm": 6.004752376225106e-07, "learning_rate": 9.763979595383e-09, "logits/chosen": -1.5686168670654297, "logits/rejected": -2.2722134590148926, "logps/chosen": -500.3663024902344, "logps/rejected": -5348.16796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1429026126861572, "rewards/margins": 48.365936279296875, "rewards/rejected": -50.50884246826172, "step": 22320 }, { "epoch": 97.51091703056768, "grad_norm": 2.5501886596626523e-06, "learning_rate": 9.430403145466449e-09, "logits/chosen": -1.6393486261367798, "logits/rejected": -2.392918348312378, "logps/chosen": -481.1686096191406, "logps/rejected": -5994.81201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.194166660308838, "rewards/margins": 54.6356201171875, "rewards/rejected": -56.82978439331055, "step": 22330 }, { "epoch": 97.55458515283843, "grad_norm": 1.6292386416755698e-07, "learning_rate": 9.102613547345429e-09, "logits/chosen": -1.596688985824585, "logits/rejected": -2.3177106380462646, "logps/chosen": -499.0672302246094, "logps/rejected": -5244.86572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1896238327026367, "rewards/margins": 47.41016387939453, "rewards/rejected": -49.59979248046875, "step": 22340 }, { "epoch": 97.59825327510917, "grad_norm": 2.8742902579063112e-08, "learning_rate": 8.780611562640707e-09, "logits/chosen": -1.5905288457870483, "logits/rejected": -2.3486034870147705, "logps/chosen": -494.80078125, "logps/rejected": -5304.64208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0976600646972656, "rewards/margins": 48.20231246948242, "rewards/rejected": -50.29997634887695, "step": 22350 }, { "epoch": 97.64192139737992, "grad_norm": 7.045975685465508e-08, "learning_rate": 8.464397939524915e-09, "logits/chosen": -1.4800219535827637, "logits/rejected": -2.2192771434783936, "logps/chosen": -516.0133056640625, "logps/rejected": -5081.16357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.233473300933838, "rewards/margins": 45.896827697753906, "rewards/rejected": -48.13030242919922, "step": 22360 }, { "epoch": 97.68558951965065, "grad_norm": 7.810376804226529e-07, "learning_rate": 8.153973412723114e-09, "logits/chosen": -1.4995759725570679, "logits/rejected": -2.1502737998962402, "logps/chosen": -546.3670043945312, "logps/rejected": -4780.72509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2079074382781982, "rewards/margins": 42.93921661376953, "rewards/rejected": -45.147117614746094, "step": 22370 }, { "epoch": 97.7292576419214, "grad_norm": 1.0943738974755606e-08, "learning_rate": 7.84933870350807e-09, "logits/chosen": -1.5866998434066772, "logits/rejected": -2.310051202774048, "logps/chosen": -479.3548278808594, "logps/rejected": -5319.51513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.143651247024536, "rewards/margins": 48.145904541015625, "rewards/rejected": -50.289546966552734, "step": 22380 }, { "epoch": 97.77292576419214, "grad_norm": 1.0868587340285442e-07, "learning_rate": 7.550494519699968e-09, "logits/chosen": -1.5713742971420288, "logits/rejected": -2.3106887340545654, "logps/chosen": -488.30877685546875, "logps/rejected": -5700.43896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.124364137649536, "rewards/margins": 51.830421447753906, "rewards/rejected": -53.95479202270508, "step": 22390 }, { "epoch": 97.81659388646288, "grad_norm": 2.7814698350976735e-08, "learning_rate": 7.257441555664768e-09, "logits/chosen": -1.611724615097046, "logits/rejected": -2.4368233680725098, "logps/chosen": -480.9991760253906, "logps/rejected": -5756.91259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0902953147888184, "rewards/margins": 52.44062423706055, "rewards/rejected": -54.530921936035156, "step": 22400 }, { "epoch": 97.86026200873363, "grad_norm": 1.1048344015703484e-07, "learning_rate": 6.970180492312795e-09, "logits/chosen": -1.5710586309432983, "logits/rejected": -2.3189644813537598, "logps/chosen": -505.2456970214844, "logps/rejected": -5278.7646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.235618829727173, "rewards/margins": 47.73380661010742, "rewards/rejected": -49.96942901611328, "step": 22410 }, { "epoch": 97.90393013100437, "grad_norm": 2.924902842487633e-07, "learning_rate": 6.688711997096531e-09, "logits/chosen": -1.5557688474655151, "logits/rejected": -2.373011827468872, "logps/chosen": -499.2859802246094, "logps/rejected": -5170.7998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1905312538146973, "rewards/margins": 46.795284271240234, "rewards/rejected": -48.985816955566406, "step": 22420 }, { "epoch": 97.9475982532751, "grad_norm": 1.1737245176400362e-07, "learning_rate": 6.413036724010057e-09, "logits/chosen": -1.6012451648712158, "logits/rejected": -2.3112847805023193, "logps/chosen": -484.39630126953125, "logps/rejected": -5464.30908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1590301990509033, "rewards/margins": 49.480587005615234, "rewards/rejected": -51.639625549316406, "step": 22430 }, { "epoch": 97.99126637554585, "grad_norm": 1.335325923727202e-07, "learning_rate": 6.143155313585447e-09, "logits/chosen": -1.5581109523773193, "logits/rejected": -2.2537341117858887, "logps/chosen": -526.9837646484375, "logps/rejected": -4831.501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.225618362426758, "rewards/margins": 43.50852966308594, "rewards/rejected": -45.73414611816406, "step": 22440 }, { "epoch": 98.0349344978166, "grad_norm": 3.3293306032810394e-08, "learning_rate": 5.879068392894427e-09, "logits/chosen": -1.5991623401641846, "logits/rejected": -2.3720407485961914, "logps/chosen": -481.92840576171875, "logps/rejected": -5658.09716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.262376308441162, "rewards/margins": 51.246925354003906, "rewards/rejected": -53.509300231933594, "step": 22450 }, { "epoch": 98.07860262008734, "grad_norm": 1.454062359946571e-07, "learning_rate": 5.6207765755439406e-09, "logits/chosen": -1.5582330226898193, "logits/rejected": -2.361130475997925, "logps/chosen": -497.95611572265625, "logps/rejected": -5436.0048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.111832857131958, "rewards/margins": 49.411781311035156, "rewards/rejected": -51.52360916137695, "step": 22460 }, { "epoch": 98.12227074235808, "grad_norm": 2.4154227630427004e-08, "learning_rate": 5.3682804616767005e-09, "logits/chosen": -1.6164470911026, "logits/rejected": -2.3296782970428467, "logps/chosen": -476.84326171875, "logps/rejected": -5776.2451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1318936347961426, "rewards/margins": 52.53319549560547, "rewards/rejected": -54.66508865356445, "step": 22470 }, { "epoch": 98.16593886462883, "grad_norm": 7.557544247180371e-05, "learning_rate": 5.1215806379681375e-09, "logits/chosen": -1.515891194343567, "logits/rejected": -2.259845733642578, "logps/chosen": -516.1510009765625, "logps/rejected": -5080.76953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.234349489212036, "rewards/margins": 45.922637939453125, "rewards/rejected": -48.15699005126953, "step": 22480 }, { "epoch": 98.20960698689956, "grad_norm": 2.5637387358482985e-06, "learning_rate": 4.88067767762751e-09, "logits/chosen": -1.5731834173202515, "logits/rejected": -2.2697319984436035, "logps/chosen": -503.49395751953125, "logps/rejected": -4867.18505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2130026817321777, "rewards/margins": 43.85417938232422, "rewards/rejected": -46.067176818847656, "step": 22490 }, { "epoch": 98.2532751091703, "grad_norm": 1.1311380770375123e-07, "learning_rate": 4.645572140393462e-09, "logits/chosen": -1.55946946144104, "logits/rejected": -2.279724597930908, "logps/chosen": -500.11785888671875, "logps/rejected": -5164.390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.154696464538574, "rewards/margins": 46.73111343383789, "rewards/rejected": -48.88581085205078, "step": 22500 }, { "epoch": 98.29694323144105, "grad_norm": 4.7710189831687806e-08, "learning_rate": 4.416264572535134e-09, "logits/chosen": -1.6455707550048828, "logits/rejected": -2.3937103748321533, "logps/chosen": -471.5122985839844, "logps/rejected": -5466.9560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.08850359916687, "rewards/margins": 49.69451141357422, "rewards/rejected": -51.78301239013672, "step": 22510 }, { "epoch": 98.3406113537118, "grad_norm": 1.50462482834371e-08, "learning_rate": 4.1927555068499436e-09, "logits/chosen": -1.5957558155059814, "logits/rejected": -2.301534652709961, "logps/chosen": -475.291259765625, "logps/rejected": -5308.1259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1720550060272217, "rewards/margins": 48.08402633666992, "rewards/rejected": -50.25608444213867, "step": 22520 }, { "epoch": 98.38427947598254, "grad_norm": 3.113827826807522e-08, "learning_rate": 3.975045462662475e-09, "logits/chosen": -1.567442774772644, "logits/rejected": -2.3438634872436523, "logps/chosen": -487.6040954589844, "logps/rejected": -5414.8466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0880770683288574, "rewards/margins": 49.271602630615234, "rewards/rejected": -51.35968017578125, "step": 22530 }, { "epoch": 98.42794759825327, "grad_norm": 2.0266164358188655e-06, "learning_rate": 3.763134945823088e-09, "logits/chosen": -1.596217393875122, "logits/rejected": -2.3445239067077637, "logps/chosen": -485.24755859375, "logps/rejected": -5257.46435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1570136547088623, "rewards/margins": 47.54899978637695, "rewards/rejected": -49.70600891113281, "step": 22540 }, { "epoch": 98.47161572052401, "grad_norm": 4.655129315957902e-08, "learning_rate": 3.5570244487070914e-09, "logits/chosen": -1.6455434560775757, "logits/rejected": -2.3635506629943848, "logps/chosen": -465.0580139160156, "logps/rejected": -5337.7548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2345664501190186, "rewards/margins": 48.25553512573242, "rewards/rejected": -50.49010467529297, "step": 22550 }, { "epoch": 98.51528384279476, "grad_norm": 2.855847497288144e-05, "learning_rate": 3.3567144502133496e-09, "logits/chosen": -1.5289356708526611, "logits/rejected": -2.1916232109069824, "logps/chosen": -532.4598999023438, "logps/rejected": -4591.8896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.218754291534424, "rewards/margins": 41.166263580322266, "rewards/rejected": -43.38501739501953, "step": 22560 }, { "epoch": 98.5589519650655, "grad_norm": 1.763228434796859e-06, "learning_rate": 3.162205415762898e-09, "logits/chosen": -1.576440691947937, "logits/rejected": -2.3243987560272217, "logps/chosen": -532.6723022460938, "logps/rejected": -5319.13037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.311850070953369, "rewards/margins": 48.0040397644043, "rewards/rejected": -50.315887451171875, "step": 22570 }, { "epoch": 98.60262008733625, "grad_norm": 2.4057553749393e-08, "learning_rate": 2.973497797298941e-09, "logits/chosen": -1.5425456762313843, "logits/rejected": -2.2082526683807373, "logps/chosen": -520.7813720703125, "logps/rejected": -5287.90673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3151018619537354, "rewards/margins": 47.73129653930664, "rewards/rejected": -50.04639434814453, "step": 22580 }, { "epoch": 98.646288209607, "grad_norm": 1.586190409231896e-07, "learning_rate": 2.7905920332843563e-09, "logits/chosen": -1.581878900527954, "logits/rejected": -2.2530651092529297, "logps/chosen": -488.3167419433594, "logps/rejected": -5085.93798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.071187734603882, "rewards/margins": 45.9550895690918, "rewards/rejected": -48.026275634765625, "step": 22590 }, { "epoch": 98.68995633187772, "grad_norm": 2.6294383658256935e-08, "learning_rate": 2.6134885487016927e-09, "logits/chosen": -1.518121361732483, "logits/rejected": -2.157557249069214, "logps/chosen": -502.11065673828125, "logps/rejected": -4740.10400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1128532886505127, "rewards/margins": 42.611324310302734, "rewards/rejected": -44.72417449951172, "step": 22600 }, { "epoch": 98.73362445414847, "grad_norm": 5.308678336055425e-08, "learning_rate": 2.4421877550520614e-09, "logits/chosen": -1.545386791229248, "logits/rejected": -2.3277313709259033, "logps/chosen": -498.0686950683594, "logps/rejected": -5221.06396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2309186458587646, "rewards/margins": 47.17081069946289, "rewards/rejected": -49.40172576904297, "step": 22610 }, { "epoch": 98.77729257641921, "grad_norm": 4.791846638036317e-08, "learning_rate": 2.2766900503534695e-09, "logits/chosen": -1.5802152156829834, "logits/rejected": -2.328673839569092, "logps/chosen": -481.89678955078125, "logps/rejected": -5354.3349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.273165464401245, "rewards/margins": 48.488929748535156, "rewards/rejected": -50.7620964050293, "step": 22620 }, { "epoch": 98.82096069868996, "grad_norm": 4.07161549489816e-08, "learning_rate": 2.116995819140821e-09, "logits/chosen": -1.5782228708267212, "logits/rejected": -2.3393900394439697, "logps/chosen": -487.47314453125, "logps/rejected": -5573.1044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2446539402008057, "rewards/margins": 50.442955017089844, "rewards/rejected": -52.6876106262207, "step": 22630 }, { "epoch": 98.8646288209607, "grad_norm": 1.2391071935068664e-05, "learning_rate": 1.9631054324648067e-09, "logits/chosen": -1.5875341892242432, "logits/rejected": -2.3147377967834473, "logps/chosen": -490.0957946777344, "logps/rejected": -5405.099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.137072801589966, "rewards/margins": 48.914215087890625, "rewards/rejected": -51.05128860473633, "step": 22640 }, { "epoch": 98.90829694323143, "grad_norm": 2.2100368806654875e-07, "learning_rate": 1.8150192478902374e-09, "logits/chosen": -1.5743370056152344, "logits/rejected": -2.257927417755127, "logps/chosen": -475.0313415527344, "logps/rejected": -4796.1181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0111074447631836, "rewards/margins": 43.287879943847656, "rewards/rejected": -45.298988342285156, "step": 22650 }, { "epoch": 98.95196506550218, "grad_norm": 5.3403967614002576e-08, "learning_rate": 1.6727376094963221e-09, "logits/chosen": -1.5629334449768066, "logits/rejected": -2.318726062774658, "logps/chosen": -494.1142578125, "logps/rejected": -5323.40380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.083012104034424, "rewards/margins": 48.3044319152832, "rewards/rejected": -50.3874397277832, "step": 22660 }, { "epoch": 98.99563318777292, "grad_norm": 4.158670241438978e-08, "learning_rate": 1.5362608478750041e-09, "logits/chosen": -1.5633680820465088, "logits/rejected": -2.324082851409912, "logps/chosen": -496.28143310546875, "logps/rejected": -5465.53515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1832194328308105, "rewards/margins": 49.463356018066406, "rewards/rejected": -51.646568298339844, "step": 22670 }, { "epoch": 99.03930131004367, "grad_norm": 1.2390837386896314e-06, "learning_rate": 1.405589280130959e-09, "logits/chosen": -1.6295849084854126, "logits/rejected": -2.435253620147705, "logps/chosen": -479.39837646484375, "logps/rejected": -5766.70166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1714680194854736, "rewards/margins": 52.4443359375, "rewards/rejected": -54.61580276489258, "step": 22680 }, { "epoch": 99.08296943231441, "grad_norm": 9.681620651864388e-08, "learning_rate": 1.280723209880208e-09, "logits/chosen": -1.5249125957489014, "logits/rejected": -2.261436939239502, "logps/chosen": -534.6063232421875, "logps/rejected": -5087.6259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1371896266937256, "rewards/margins": 46.051170349121094, "rewards/rejected": -48.18836212158203, "step": 22690 }, { "epoch": 99.12663755458516, "grad_norm": 6.140515985979533e-07, "learning_rate": 1.1616629272495628e-09, "logits/chosen": -1.6153122186660767, "logits/rejected": -2.386347532272339, "logps/chosen": -470.0567321777344, "logps/rejected": -5952.54638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0329341888427734, "rewards/margins": 54.25049591064453, "rewards/rejected": -56.2834358215332, "step": 22700 }, { "epoch": 99.17030567685589, "grad_norm": 5.630483272145984e-08, "learning_rate": 1.0484087088766248e-09, "logits/chosen": -1.6280105113983154, "logits/rejected": -2.4439454078674316, "logps/chosen": -471.09393310546875, "logps/rejected": -5813.4580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0907485485076904, "rewards/margins": 52.937522888183594, "rewards/rejected": -55.02827072143555, "step": 22710 }, { "epoch": 99.21397379912663, "grad_norm": 1.1107300825700479e-07, "learning_rate": 9.409608179078433e-10, "logits/chosen": -1.55889892578125, "logits/rejected": -2.290982723236084, "logps/chosen": -479.2071228027344, "logps/rejected": -5243.5322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1637537479400635, "rewards/margins": 47.509193420410156, "rewards/rejected": -49.672950744628906, "step": 22720 }, { "epoch": 99.25764192139738, "grad_norm": 8.500315463242509e-09, "learning_rate": 8.393195039993473e-10, "logits/chosen": -1.5412005186080933, "logits/rejected": -2.2968082427978516, "logps/chosen": -486.2464294433594, "logps/rejected": -5167.2021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1012465953826904, "rewards/margins": 46.882057189941406, "rewards/rejected": -48.983306884765625, "step": 22730 }, { "epoch": 99.30131004366812, "grad_norm": 4.1479615732690695e-06, "learning_rate": 7.434850033147257e-10, "logits/chosen": -1.6059620380401611, "logits/rejected": -2.391437292098999, "logps/chosen": -451.3169860839844, "logps/rejected": -5651.17041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1313717365264893, "rewards/margins": 51.367767333984375, "rewards/rejected": -53.49913787841797, "step": 22740 }, { "epoch": 99.34497816593887, "grad_norm": 5.106218344085727e-07, "learning_rate": 6.534575385264142e-10, "logits/chosen": -1.5285513401031494, "logits/rejected": -2.2402267456054688, "logps/chosen": -527.4456176757812, "logps/rejected": -5071.6630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1951308250427246, "rewards/margins": 45.79731369018555, "rewards/rejected": -47.99243927001953, "step": 22750 }, { "epoch": 99.38864628820961, "grad_norm": 4.822781989900991e-07, "learning_rate": 5.692373188134758e-10, "logits/chosen": -1.5373455286026, "logits/rejected": -2.2763946056365967, "logps/chosen": -490.80206298828125, "logps/rejected": -5090.31640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0811073780059814, "rewards/margins": 46.01261520385742, "rewards/rejected": -48.093727111816406, "step": 22760 }, { "epoch": 99.43231441048034, "grad_norm": 3.7347923161907506e-08, "learning_rate": 4.908245398618782e-10, "logits/chosen": -1.5258173942565918, "logits/rejected": -2.1729278564453125, "logps/chosen": -522.0613403320312, "logps/rejected": -4620.38427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3498997688293457, "rewards/margins": 41.3654899597168, "rewards/rejected": -43.71539306640625, "step": 22770 }, { "epoch": 99.47598253275109, "grad_norm": 5.322610652395374e-06, "learning_rate": 4.182193838647708e-10, "logits/chosen": -1.579285740852356, "logits/rejected": -2.241133689880371, "logps/chosen": -491.9681091308594, "logps/rejected": -4884.6884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1667182445526123, "rewards/margins": 44.128997802734375, "rewards/rejected": -46.29571533203125, "step": 22780 }, { "epoch": 99.51965065502183, "grad_norm": 6.140185046317292e-08, "learning_rate": 3.514220195199869e-10, "logits/chosen": -1.586801290512085, "logits/rejected": -2.3343584537506104, "logps/chosen": -477.4095764160156, "logps/rejected": -5200.7470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1348705291748047, "rewards/margins": 47.07790756225586, "rewards/rejected": -49.2127799987793, "step": 22790 }, { "epoch": 99.56331877729258, "grad_norm": 2.5673908035833126e-06, "learning_rate": 2.9043260203170944e-10, "logits/chosen": -1.6095893383026123, "logits/rejected": -2.4118828773498535, "logps/chosen": -485.484375, "logps/rejected": -5609.4970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.172553062438965, "rewards/margins": 50.91709518432617, "rewards/rejected": -53.08964920043945, "step": 22800 }, { "epoch": 99.60698689956332, "grad_norm": 4.616350046451505e-08, "learning_rate": 2.3525127310936035e-10, "logits/chosen": -1.5534355640411377, "logits/rejected": -2.29658842086792, "logps/chosen": -504.73748779296875, "logps/rejected": -5232.25048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1188876628875732, "rewards/margins": 47.39024353027344, "rewards/rejected": -49.509132385253906, "step": 22810 }, { "epoch": 99.65065502183405, "grad_norm": 5.23290352079487e-08, "learning_rate": 1.8587816096676815e-10, "logits/chosen": -1.5898157358169556, "logits/rejected": -2.406322479248047, "logps/chosen": -492.0469665527344, "logps/rejected": -5679.98876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2210469245910645, "rewards/margins": 51.5106315612793, "rewards/rejected": -53.7316780090332, "step": 22820 }, { "epoch": 99.6943231441048, "grad_norm": 3.432174382276182e-08, "learning_rate": 1.4231338032300034e-10, "logits/chosen": -1.5883640050888062, "logits/rejected": -2.322314739227295, "logps/chosen": -478.75811767578125, "logps/rejected": -5211.638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.090974807739258, "rewards/margins": 47.191158294677734, "rewards/rejected": -49.282135009765625, "step": 22830 }, { "epoch": 99.73799126637554, "grad_norm": 3.005291963641479e-08, "learning_rate": 1.0455703240069836e-10, "logits/chosen": -1.5497264862060547, "logits/rejected": -2.251467227935791, "logps/chosen": -507.6072692871094, "logps/rejected": -5084.78173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1044788360595703, "rewards/margins": 45.945892333984375, "rewards/rejected": -48.050376892089844, "step": 22840 }, { "epoch": 99.78165938864629, "grad_norm": 9.211458295403274e-09, "learning_rate": 7.260920492718759e-11, "logits/chosen": -1.5724613666534424, "logits/rejected": -2.304027557373047, "logps/chosen": -501.4747619628906, "logps/rejected": -5479.3427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.20947265625, "rewards/margins": 49.61025619506836, "rewards/rejected": -51.819740295410156, "step": 22850 }, { "epoch": 99.82532751091703, "grad_norm": 1.595213455214673e-08, "learning_rate": 4.6469972133367234e-11, "logits/chosen": -1.5069221258163452, "logits/rejected": -2.178095579147339, "logps/chosen": -500.69708251953125, "logps/rejected": -4586.1259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.139969825744629, "rewards/margins": 41.18637466430664, "rewards/rejected": -43.32634735107422, "step": 22860 }, { "epoch": 99.86899563318778, "grad_norm": 1.1945046617370372e-07, "learning_rate": 2.61393947537103e-11, "logits/chosen": -1.543365716934204, "logits/rejected": -2.207709789276123, "logps/chosen": -513.8220825195312, "logps/rejected": -4849.166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.297729730606079, "rewards/margins": 43.62523651123047, "rewards/rejected": -45.92296600341797, "step": 22870 }, { "epoch": 99.91266375545851, "grad_norm": 4.602354296218232e-08, "learning_rate": 1.1617520026818707e-11, "logits/chosen": -1.5648740530014038, "logits/rejected": -2.3396196365356445, "logps/chosen": -505.2884826660156, "logps/rejected": -5371.5537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2433743476867676, "rewards/margins": 48.5499153137207, "rewards/rejected": -50.79328918457031, "step": 22880 }, { "epoch": 99.95633187772926, "grad_norm": 4.146462758548137e-08, "learning_rate": 2.9043816937579517e-12, "logits/chosen": -1.5217851400375366, "logits/rejected": -2.1906237602233887, "logps/chosen": -520.7916870117188, "logps/rejected": -4753.595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2267818450927734, "rewards/margins": 42.683746337890625, "rewards/rejected": -44.910526275634766, "step": 22890 }, { "epoch": 100.0, "grad_norm": 6.454570075908991e-07, "learning_rate": 0.0, "logits/chosen": -1.5770223140716553, "logits/rejected": -2.3020753860473633, "logps/chosen": -504.50909423828125, "logps/rejected": -5462.984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1251556873321533, "rewards/margins": 49.54574203491211, "rewards/rejected": -51.6708984375, "step": 22900 }, { "epoch": 100.0, "step": 22900, "total_flos": 0.0, "train_loss": 0.009056582814454938, "train_runtime": 219591.7805, "train_samples_per_second": 6.668, "train_steps_per_second": 0.104 } ], "logging_steps": 10, "max_steps": 22900, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }