{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 5250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.523809523809524e-09, "logits/chosen": -1.7113145589828491, "logits/rejected": -1.7829445600509644, "logps/chosen": -121.08319091796875, "logps/rejected": -131.56979370117188, "loss": 0.2593, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 9.523809523809525e-08, "logits/chosen": -2.028289556503296, "logits/rejected": -1.4606963396072388, "logps/chosen": -279.7530212402344, "logps/rejected": -261.1387634277344, "loss": 0.3059, "rewards/accuracies": 0.5, "rewards/chosen": 3.5178614780306816e-05, "rewards/margins": 9.017097909236327e-06, "rewards/rejected": 2.6161513233091682e-05, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.904761904761905e-07, "logits/chosen": -1.939586877822876, "logits/rejected": -1.254178762435913, "logps/chosen": -286.6382141113281, "logps/rejected": -257.4527282714844, "loss": 0.2792, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 1.1774925042118412e-05, "rewards/margins": -5.420007073553279e-05, "rewards/rejected": 6.59749930491671e-05, "step": 20 }, { "epoch": 0.01, "learning_rate": 2.8571428571428575e-07, "logits/chosen": -1.819902777671814, "logits/rejected": -1.3389551639556885, "logps/chosen": -217.68228149414062, "logps/rejected": -197.06698608398438, "loss": 0.2739, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 2.47562365984777e-05, "rewards/margins": 0.00018780089158099145, "rewards/rejected": -0.0001630446349736303, "step": 30 }, { "epoch": 0.01, "learning_rate": 3.80952380952381e-07, "logits/chosen": -1.9389543533325195, "logits/rejected": -1.4040112495422363, "logps/chosen": -302.9512634277344, "logps/rejected": -271.6506042480469, "loss": 0.3421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 4.011381679447368e-05, "rewards/margins": 0.0009296025382354856, "rewards/rejected": -0.0008894886705093086, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.7619047619047623e-07, "logits/chosen": -2.1501097679138184, "logits/rejected": -1.4992334842681885, "logps/chosen": -265.8793640136719, "logps/rejected": -175.6634521484375, "loss": 0.3662, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0003351033083163202, "rewards/margins": 0.0017578952247276902, "rewards/rejected": -0.0020929984748363495, "step": 50 }, { "epoch": 0.01, "learning_rate": 5.714285714285715e-07, "logits/chosen": -2.114790439605713, "logits/rejected": -1.4490571022033691, "logps/chosen": -356.48333740234375, "logps/rejected": -267.05938720703125, "loss": 0.3275, "rewards/accuracies": 0.75, "rewards/chosen": 0.00033402262488380075, "rewards/margins": 0.007354515604674816, "rewards/rejected": -0.007020492106676102, "step": 60 }, { "epoch": 0.01, "learning_rate": 6.666666666666667e-07, "logits/chosen": -1.8004682064056396, "logits/rejected": -1.5449601411819458, "logps/chosen": -203.9654541015625, "logps/rejected": -216.85122680664062, "loss": 0.2988, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0069874427281320095, "rewards/margins": 0.011947698891162872, "rewards/rejected": -0.01893514022231102, "step": 70 }, { "epoch": 0.02, "learning_rate": 7.61904761904762e-07, "logits/chosen": -1.92642343044281, "logits/rejected": -1.4140938520431519, "logps/chosen": -285.9252624511719, "logps/rejected": -288.25262451171875, "loss": 0.3116, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.02321251854300499, "rewards/margins": 0.025494087487459183, "rewards/rejected": -0.048706598579883575, "step": 80 }, { "epoch": 0.02, "learning_rate": 8.571428571428572e-07, "logits/chosen": -1.9828475713729858, "logits/rejected": -1.3835010528564453, "logps/chosen": -322.59832763671875, "logps/rejected": -374.05609130859375, "loss": 0.2413, "rewards/accuracies": 0.875, "rewards/chosen": -0.029907772317528725, "rewards/margins": 0.07653582096099854, "rewards/rejected": -0.10644359886646271, "step": 90 }, { "epoch": 0.02, "learning_rate": 9.523809523809525e-07, "logits/chosen": -1.718085527420044, "logits/rejected": -1.3718289136886597, "logps/chosen": -255.760986328125, "logps/rejected": -395.42840576171875, "loss": 0.2578, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04886468127369881, "rewards/margins": 0.1050916537642479, "rewards/rejected": -0.1539563536643982, "step": 100 }, { "epoch": 0.02, "learning_rate": 1.0476190476190478e-06, "logits/chosen": -1.785557508468628, "logits/rejected": -1.2490421533584595, "logps/chosen": -371.412841796875, "logps/rejected": -538.618408203125, "loss": 0.1895, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.0791202038526535, "rewards/margins": 0.23141446709632874, "rewards/rejected": -0.31053465604782104, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.142857142857143e-06, "logits/chosen": -1.7252378463745117, "logits/rejected": -1.0109264850616455, "logps/chosen": -464.755859375, "logps/rejected": -727.7872314453125, "loss": 0.1577, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16773268580436707, "rewards/margins": 0.27710023522377014, "rewards/rejected": -0.4448328912258148, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.2380952380952382e-06, "logits/chosen": -1.691554307937622, "logits/rejected": -1.0183074474334717, "logps/chosen": -472.3408203125, "logps/rejected": -782.1011962890625, "loss": 0.1538, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21022073924541473, "rewards/margins": 0.3267260193824768, "rewards/rejected": -0.5369467735290527, "step": 130 }, { "epoch": 0.03, "learning_rate": 1.3333333333333334e-06, "logits/chosen": -1.6522079706192017, "logits/rejected": -0.9902011752128601, "logps/chosen": -403.8924255371094, "logps/rejected": -740.5509033203125, "loss": 0.1223, "rewards/accuracies": 0.875, "rewards/chosen": -0.1563374102115631, "rewards/margins": 0.36456286907196045, "rewards/rejected": -0.5209003686904907, "step": 140 }, { "epoch": 0.03, "learning_rate": 1.4285714285714286e-06, "logits/chosen": -1.687640905380249, "logits/rejected": -1.182498574256897, "logps/chosen": -469.69464111328125, "logps/rejected": -816.5303955078125, "loss": 0.1505, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21341899037361145, "rewards/margins": 0.33306747674942017, "rewards/rejected": -0.546486496925354, "step": 150 }, { "epoch": 0.03, "learning_rate": 1.523809523809524e-06, "logits/chosen": -1.7321513891220093, "logits/rejected": -0.8273302912712097, "logps/chosen": -500.68914794921875, "logps/rejected": -866.5822143554688, "loss": 0.1224, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2013246715068817, "rewards/margins": 0.39632850885391235, "rewards/rejected": -0.5976532101631165, "step": 160 }, { "epoch": 0.03, "learning_rate": 1.6190476190476193e-06, "logits/chosen": -1.6940206289291382, "logits/rejected": -1.1097806692123413, "logps/chosen": -477.9801330566406, "logps/rejected": -813.2344360351562, "loss": 0.1101, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2076355218887329, "rewards/margins": 0.35763829946517944, "rewards/rejected": -0.5652738809585571, "step": 170 }, { "epoch": 0.03, "learning_rate": 1.7142857142857145e-06, "logits/chosen": -1.4584693908691406, "logits/rejected": -1.034733533859253, "logps/chosen": -459.72259521484375, "logps/rejected": -791.3391723632812, "loss": 0.1411, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24488899111747742, "rewards/margins": 0.3414311110973358, "rewards/rejected": -0.5863201022148132, "step": 180 }, { "epoch": 0.04, "learning_rate": 1.8095238095238097e-06, "logits/chosen": -1.511499047279358, "logits/rejected": -1.1169841289520264, "logps/chosen": -454.05712890625, "logps/rejected": -742.1346435546875, "loss": 0.1842, "rewards/accuracies": 0.75, "rewards/chosen": -0.19414553046226501, "rewards/margins": 0.2850378155708313, "rewards/rejected": -0.4791833460330963, "step": 190 }, { "epoch": 0.04, "learning_rate": 1.904761904761905e-06, "logits/chosen": -1.466512680053711, "logits/rejected": -0.847357451915741, "logps/chosen": -436.92529296875, "logps/rejected": -765.6014404296875, "loss": 0.1101, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18620623648166656, "rewards/margins": 0.3699783682823181, "rewards/rejected": -0.5561846494674683, "step": 200 }, { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -1.2183196544647217, "logits/rejected": -1.0017187595367432, "logps/chosen": -625.4891967773438, "logps/rejected": -926.8502197265625, "loss": 0.1371, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4119042456150055, "rewards/margins": 0.26195627450942993, "rewards/rejected": -0.673860490322113, "step": 210 }, { "epoch": 0.04, "learning_rate": 2.0952380952380955e-06, "logits/chosen": -1.3527754545211792, "logits/rejected": -0.8047201037406921, "logps/chosen": -600.1972045898438, "logps/rejected": -971.2520751953125, "loss": 0.0953, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.36095115542411804, "rewards/margins": 0.39113304018974304, "rewards/rejected": -0.7520841956138611, "step": 220 }, { "epoch": 0.04, "learning_rate": 2.1904761904761908e-06, "logits/chosen": -1.5290424823760986, "logits/rejected": -0.819190502166748, "logps/chosen": -660.8310546875, "logps/rejected": -928.98046875, "loss": 0.1472, "rewards/accuracies": 0.875, "rewards/chosen": -0.3549632430076599, "rewards/margins": 0.34809479117393494, "rewards/rejected": -0.7030580639839172, "step": 230 }, { "epoch": 0.05, "learning_rate": 2.285714285714286e-06, "logits/chosen": -1.2489688396453857, "logits/rejected": -0.7796673774719238, "logps/chosen": -513.8331298828125, "logps/rejected": -878.4765625, "loss": 0.0869, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.26155805587768555, "rewards/margins": 0.37282976508140564, "rewards/rejected": -0.6343878507614136, "step": 240 }, { "epoch": 0.05, "learning_rate": 2.380952380952381e-06, "logits/chosen": -1.529165506362915, "logits/rejected": -0.7751346826553345, "logps/chosen": -569.2841186523438, "logps/rejected": -762.9376220703125, "loss": 0.1727, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29488152265548706, "rewards/margins": 0.29403209686279297, "rewards/rejected": -0.5889135599136353, "step": 250 }, { "epoch": 0.05, "learning_rate": 2.4761904761904764e-06, "logits/chosen": -1.544243335723877, "logits/rejected": -0.7881235480308533, "logps/chosen": -517.7400512695312, "logps/rejected": -858.8873901367188, "loss": 0.1112, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23570850491523743, "rewards/margins": 0.3838581442832947, "rewards/rejected": -0.6195666193962097, "step": 260 }, { "epoch": 0.05, "learning_rate": 2.571428571428571e-06, "logits/chosen": -1.280453085899353, "logits/rejected": -1.0881712436676025, "logps/chosen": -423.7964782714844, "logps/rejected": -871.5652465820312, "loss": 0.1083, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.23875761032104492, "rewards/margins": 0.398102343082428, "rewards/rejected": -0.6368598937988281, "step": 270 }, { "epoch": 0.05, "learning_rate": 2.666666666666667e-06, "logits/chosen": -1.4761348962783813, "logits/rejected": -0.9910094141960144, "logps/chosen": -492.68243408203125, "logps/rejected": -867.07666015625, "loss": 0.2136, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.26442137360572815, "rewards/margins": 0.35982996225357056, "rewards/rejected": -0.6242512464523315, "step": 280 }, { "epoch": 0.06, "learning_rate": 2.7619047619047625e-06, "logits/chosen": -1.693885087966919, "logits/rejected": -1.0468710660934448, "logps/chosen": -469.68780517578125, "logps/rejected": -781.9990234375, "loss": 0.0859, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14694778621196747, "rewards/margins": 0.395540326833725, "rewards/rejected": -0.5424880981445312, "step": 290 }, { "epoch": 0.06, "learning_rate": 2.8571428571428573e-06, "logits/chosen": -1.7993671894073486, "logits/rejected": -1.2026454210281372, "logps/chosen": -551.7943115234375, "logps/rejected": -833.18408203125, "loss": 0.154, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24280384182929993, "rewards/margins": 0.31480950117111206, "rewards/rejected": -0.5576133131980896, "step": 300 }, { "epoch": 0.06, "learning_rate": 2.9523809523809525e-06, "logits/chosen": -1.9355138540267944, "logits/rejected": -1.2813931703567505, "logps/chosen": -472.1820373535156, "logps/rejected": -693.4636840820312, "loss": 0.1554, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.202456995844841, "rewards/margins": 0.2504492700099945, "rewards/rejected": -0.45290622115135193, "step": 310 }, { "epoch": 0.06, "learning_rate": 3.047619047619048e-06, "logits/chosen": -1.603163480758667, "logits/rejected": -0.8913636207580566, "logps/chosen": -417.87060546875, "logps/rejected": -738.0120849609375, "loss": 0.1249, "rewards/accuracies": 0.875, "rewards/chosen": -0.12579551339149475, "rewards/margins": 0.34292665123939514, "rewards/rejected": -0.4687221646308899, "step": 320 }, { "epoch": 0.06, "learning_rate": 3.142857142857143e-06, "logits/chosen": -1.7196691036224365, "logits/rejected": -0.969390869140625, "logps/chosen": -552.02490234375, "logps/rejected": -777.7071533203125, "loss": 0.1821, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2423911988735199, "rewards/margins": 0.3138866424560547, "rewards/rejected": -0.556277871131897, "step": 330 }, { "epoch": 0.06, "learning_rate": 3.2380952380952385e-06, "logits/chosen": -1.744593620300293, "logits/rejected": -0.9106618165969849, "logps/chosen": -615.979248046875, "logps/rejected": -780.6832885742188, "loss": 0.2282, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29525917768478394, "rewards/margins": 0.2746427655220032, "rewards/rejected": -0.5699019432067871, "step": 340 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -1.6773769855499268, "logits/rejected": -1.1781260967254639, "logps/chosen": -491.15960693359375, "logps/rejected": -787.4660034179688, "loss": 0.1746, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20059335231781006, "rewards/margins": 0.33873653411865234, "rewards/rejected": -0.5393298864364624, "step": 350 }, { "epoch": 0.07, "learning_rate": 3.428571428571429e-06, "logits/chosen": -1.6867252588272095, "logits/rejected": -1.0679597854614258, "logps/chosen": -525.0442504882812, "logps/rejected": -823.7645263671875, "loss": 0.167, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.25112444162368774, "rewards/margins": 0.32643139362335205, "rewards/rejected": -0.5775558352470398, "step": 360 }, { "epoch": 0.07, "learning_rate": 3.523809523809524e-06, "logits/chosen": -1.7583757638931274, "logits/rejected": -0.9177656173706055, "logps/chosen": -541.0323486328125, "logps/rejected": -968.3751220703125, "loss": 0.0773, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.21084997057914734, "rewards/margins": 0.4845653474330902, "rewards/rejected": -0.6954153776168823, "step": 370 }, { "epoch": 0.07, "learning_rate": 3.6190476190476194e-06, "logits/chosen": -1.7165918350219727, "logits/rejected": -1.2400273084640503, "logps/chosen": -429.61920166015625, "logps/rejected": -730.7445068359375, "loss": 0.1409, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1769169270992279, "rewards/margins": 0.3175029158592224, "rewards/rejected": -0.49441981315612793, "step": 380 }, { "epoch": 0.07, "learning_rate": 3.7142857142857146e-06, "logits/chosen": -1.7115312814712524, "logits/rejected": -1.0104091167449951, "logps/chosen": -473.62109375, "logps/rejected": -733.7723388671875, "loss": 0.183, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1993667632341385, "rewards/margins": 0.31865304708480835, "rewards/rejected": -0.5180197954177856, "step": 390 }, { "epoch": 0.08, "learning_rate": 3.80952380952381e-06, "logits/chosen": -1.5717957019805908, "logits/rejected": -1.273874282836914, "logps/chosen": -420.313232421875, "logps/rejected": -682.5123291015625, "loss": 0.1499, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22465018928050995, "rewards/margins": 0.2600460350513458, "rewards/rejected": -0.48469629883766174, "step": 400 }, { "epoch": 0.08, "learning_rate": 3.9047619047619055e-06, "logits/chosen": -1.4259899854660034, "logits/rejected": -1.1554677486419678, "logps/chosen": -514.4977416992188, "logps/rejected": -754.0169067382812, "loss": 0.2103, "rewards/accuracies": 0.625, "rewards/chosen": -0.3204768896102905, "rewards/margins": 0.2178756296634674, "rewards/rejected": -0.5383524894714355, "step": 410 }, { "epoch": 0.08, "learning_rate": 4.000000000000001e-06, "logits/chosen": -1.4882583618164062, "logits/rejected": -1.0473524332046509, "logps/chosen": -575.58349609375, "logps/rejected": -880.0328369140625, "loss": 0.1268, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.31017592549324036, "rewards/margins": 0.3222498297691345, "rewards/rejected": -0.6324257850646973, "step": 420 }, { "epoch": 0.08, "learning_rate": 4.095238095238096e-06, "logits/chosen": -1.4517797231674194, "logits/rejected": -0.8593630790710449, "logps/chosen": -457.4549255371094, "logps/rejected": -789.6470947265625, "loss": 0.1291, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17690476775169373, "rewards/margins": 0.39645010232925415, "rewards/rejected": -0.5733548402786255, "step": 430 }, { "epoch": 0.08, "learning_rate": 4.190476190476191e-06, "logits/chosen": -1.6190814971923828, "logits/rejected": -1.0680263042449951, "logps/chosen": -450.77655029296875, "logps/rejected": -780.7911376953125, "loss": 0.167, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20334550738334656, "rewards/margins": 0.35270360112190247, "rewards/rejected": -0.5560490489006042, "step": 440 }, { "epoch": 0.09, "learning_rate": 4.2857142857142855e-06, "logits/chosen": -1.443664312362671, "logits/rejected": -0.8826562166213989, "logps/chosen": -398.62225341796875, "logps/rejected": -740.305419921875, "loss": 0.1262, "rewards/accuracies": 0.875, "rewards/chosen": -0.15009760856628418, "rewards/margins": 0.3754437267780304, "rewards/rejected": -0.5255413055419922, "step": 450 }, { "epoch": 0.09, "learning_rate": 4.3809523809523815e-06, "logits/chosen": -1.5959932804107666, "logits/rejected": -0.8810084462165833, "logps/chosen": -488.99346923828125, "logps/rejected": -833.8523559570312, "loss": 0.0984, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18980169296264648, "rewards/margins": 0.41263097524642944, "rewards/rejected": -0.6024327278137207, "step": 460 }, { "epoch": 0.09, "learning_rate": 4.476190476190477e-06, "logits/chosen": -1.6928768157958984, "logits/rejected": -1.2329580783843994, "logps/chosen": -390.4802551269531, "logps/rejected": -750.1564331054688, "loss": 0.094, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15772318840026855, "rewards/margins": 0.3710814416408539, "rewards/rejected": -0.5288046002388, "step": 470 }, { "epoch": 0.09, "learning_rate": 4.571428571428572e-06, "logits/chosen": -1.3085224628448486, "logits/rejected": -0.9855268597602844, "logps/chosen": -428.2975158691406, "logps/rejected": -769.2637939453125, "loss": 0.1723, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2199603021144867, "rewards/margins": 0.3273308277130127, "rewards/rejected": -0.5472911596298218, "step": 480 }, { "epoch": 0.09, "learning_rate": 4.666666666666667e-06, "logits/chosen": -1.4104373455047607, "logits/rejected": -0.98779296875, "logps/chosen": -449.3177185058594, "logps/rejected": -727.2005615234375, "loss": 0.1534, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22446668148040771, "rewards/margins": 0.30466535687446594, "rewards/rejected": -0.529132068157196, "step": 490 }, { "epoch": 0.1, "learning_rate": 4.761904761904762e-06, "logits/chosen": -1.4807837009429932, "logits/rejected": -1.004685640335083, "logps/chosen": -436.981201171875, "logps/rejected": -773.7215576171875, "loss": 0.1651, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2306755781173706, "rewards/margins": 0.34396785497665405, "rewards/rejected": -0.5746434330940247, "step": 500 }, { "epoch": 0.1, "learning_rate": 4.857142857142858e-06, "logits/chosen": -1.3188155889511108, "logits/rejected": -0.7900226712226868, "logps/chosen": -587.2340698242188, "logps/rejected": -955.2352294921875, "loss": 0.1779, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3364206850528717, "rewards/margins": 0.36678546667099, "rewards/rejected": -0.7032061815261841, "step": 510 }, { "epoch": 0.1, "learning_rate": 4.952380952380953e-06, "logits/chosen": -1.2925165891647339, "logits/rejected": -0.800969123840332, "logps/chosen": -516.4209594726562, "logps/rejected": -842.2839965820312, "loss": 0.1101, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.25947386026382446, "rewards/margins": 0.39055201411247253, "rewards/rejected": -0.6500259637832642, "step": 520 }, { "epoch": 0.1, "learning_rate": 4.999986185163754e-06, "logits/chosen": -1.674224615097046, "logits/rejected": -1.1314308643341064, "logps/chosen": -413.9242248535156, "logps/rejected": -712.67626953125, "loss": 0.1149, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1751125603914261, "rewards/margins": 0.3478858768939972, "rewards/rejected": -0.5229984521865845, "step": 530 }, { "epoch": 0.1, "learning_rate": 4.999875667389858e-06, "logits/chosen": -1.3373454809188843, "logits/rejected": -0.8919671177864075, "logps/chosen": -582.5571899414062, "logps/rejected": -986.0074462890625, "loss": 0.1212, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2830522656440735, "rewards/margins": 0.45472225546836853, "rewards/rejected": -0.7377746105194092, "step": 540 }, { "epoch": 0.1, "learning_rate": 4.999654636727765e-06, "logits/chosen": -1.3477157354354858, "logits/rejected": -0.6459788680076599, "logps/chosen": -647.1373291015625, "logps/rejected": -1002.1474609375, "loss": 0.1762, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.34693655371665955, "rewards/margins": 0.35530444979667664, "rewards/rejected": -0.702241063117981, "step": 550 }, { "epoch": 0.11, "learning_rate": 4.999323102948655e-06, "logits/chosen": -1.499768614768982, "logits/rejected": -1.134403109550476, "logps/chosen": -578.779052734375, "logps/rejected": -820.9362182617188, "loss": 0.1754, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.25975677371025085, "rewards/margins": 0.29770857095718384, "rewards/rejected": -0.5574653148651123, "step": 560 }, { "epoch": 0.11, "learning_rate": 4.998881080708759e-06, "logits/chosen": -1.4155423641204834, "logits/rejected": -0.8051935434341431, "logps/chosen": -488.83868408203125, "logps/rejected": -709.1486206054688, "loss": 0.1371, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19319114089012146, "rewards/margins": 0.3253104090690613, "rewards/rejected": -0.5185015797615051, "step": 570 }, { "epoch": 0.11, "learning_rate": 4.998328589548711e-06, "logits/chosen": -1.6082518100738525, "logits/rejected": -0.843612015247345, "logps/chosen": -525.6066284179688, "logps/rejected": -797.774658203125, "loss": 0.143, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17293401062488556, "rewards/margins": 0.32926812767982483, "rewards/rejected": -0.5022021532058716, "step": 580 }, { "epoch": 0.11, "learning_rate": 4.997665653892682e-06, "logits/chosen": -1.4975395202636719, "logits/rejected": -0.9308058619499207, "logps/chosen": -433.9872131347656, "logps/rejected": -735.2569580078125, "loss": 0.1708, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17491111159324646, "rewards/margins": 0.32181793451309204, "rewards/rejected": -0.4967289865016937, "step": 590 }, { "epoch": 0.11, "learning_rate": 4.996892303047306e-06, "logits/chosen": -1.2255983352661133, "logits/rejected": -0.8828571438789368, "logps/chosen": -408.93450927734375, "logps/rejected": -793.7689208984375, "loss": 0.0971, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2011801302433014, "rewards/margins": 0.40020784735679626, "rewards/rejected": -0.6013879776000977, "step": 600 }, { "epoch": 0.12, "learning_rate": 4.996008571200375e-06, "logits/chosen": -1.259902834892273, "logits/rejected": -0.9317649602890015, "logps/chosen": -557.2103271484375, "logps/rejected": -975.3890380859375, "loss": 0.1618, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2946203052997589, "rewards/margins": 0.37234818935394287, "rewards/rejected": -0.6669684648513794, "step": 610 }, { "epoch": 0.12, "learning_rate": 4.995014497419336e-06, "logits/chosen": -1.1454195976257324, "logits/rejected": -0.8106245994567871, "logps/chosen": -775.7911376953125, "logps/rejected": -1003.7286376953125, "loss": 0.2111, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5175495147705078, "rewards/margins": 0.2467096596956253, "rewards/rejected": -0.7642592191696167, "step": 620 }, { "epoch": 0.12, "learning_rate": 4.993910125649561e-06, "logits/chosen": -0.8495442271232605, "logits/rejected": -0.5572858452796936, "logps/chosen": -812.4605712890625, "logps/rejected": -1038.9599609375, "loss": 0.2176, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5830896496772766, "rewards/margins": 0.22826501727104187, "rewards/rejected": -0.8113546371459961, "step": 630 }, { "epoch": 0.12, "learning_rate": 4.992695504712402e-06, "logits/chosen": -0.9184828996658325, "logits/rejected": -0.5267876982688904, "logps/chosen": -670.436279296875, "logps/rejected": -992.8828125, "loss": 0.1104, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.427330881357193, "rewards/margins": 0.3289671242237091, "rewards/rejected": -0.7562979459762573, "step": 640 }, { "epoch": 0.12, "learning_rate": 4.9913706883030385e-06, "logits/chosen": -1.2897741794586182, "logits/rejected": -0.5673663020133972, "logps/chosen": -650.5546264648438, "logps/rejected": -846.64111328125, "loss": 0.1204, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.25516167283058167, "rewards/margins": 0.3617474138736725, "rewards/rejected": -0.6169090867042542, "step": 650 }, { "epoch": 0.13, "learning_rate": 4.989935734988098e-06, "logits/chosen": -1.174782156944275, "logits/rejected": -0.737097442150116, "logps/chosen": -526.2172241210938, "logps/rejected": -764.712890625, "loss": 0.1754, "rewards/accuracies": 0.875, "rewards/chosen": -0.2307799756526947, "rewards/margins": 0.3241864740848541, "rewards/rejected": -0.554966390132904, "step": 660 }, { "epoch": 0.13, "learning_rate": 4.988390708203068e-06, "logits/chosen": -1.4817001819610596, "logits/rejected": -0.9144975543022156, "logps/chosen": -442.36944580078125, "logps/rejected": -775.7092895507812, "loss": 0.1669, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1493905782699585, "rewards/margins": 0.37323418259620667, "rewards/rejected": -0.5226247906684875, "step": 670 }, { "epoch": 0.13, "learning_rate": 4.9867356762494955e-06, "logits/chosen": -1.3501349687576294, "logits/rejected": -0.9058934450149536, "logps/chosen": -484.36834716796875, "logps/rejected": -831.0919189453125, "loss": 0.1572, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.206304669380188, "rewards/margins": 0.3766896426677704, "rewards/rejected": -0.5829943418502808, "step": 680 }, { "epoch": 0.13, "learning_rate": 4.984970712291963e-06, "logits/chosen": -1.4077775478363037, "logits/rejected": -0.7955089807510376, "logps/chosen": -469.12152099609375, "logps/rejected": -780.0720825195312, "loss": 0.1487, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1690974235534668, "rewards/margins": 0.37666061520576477, "rewards/rejected": -0.5457580089569092, "step": 690 }, { "epoch": 0.13, "learning_rate": 4.983095894354858e-06, "logits/chosen": -1.3341376781463623, "logits/rejected": -0.666893720626831, "logps/chosen": -518.5508422851562, "logps/rejected": -831.0877685546875, "loss": 0.1304, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21632568538188934, "rewards/margins": 0.3642910122871399, "rewards/rejected": -0.5806167125701904, "step": 700 }, { "epoch": 0.14, "learning_rate": 4.981111305318918e-06, "logits/chosen": -1.3198024034500122, "logits/rejected": -0.8952147364616394, "logps/chosen": -526.7291259765625, "logps/rejected": -1011.0609130859375, "loss": 0.088, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.25172096490859985, "rewards/margins": 0.40057438611984253, "rewards/rejected": -0.6522954106330872, "step": 710 }, { "epoch": 0.14, "learning_rate": 4.979017032917576e-06, "logits/chosen": -1.3708412647247314, "logits/rejected": -0.9907933473587036, "logps/chosen": -517.3992919921875, "logps/rejected": -899.32666015625, "loss": 0.1537, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2503722310066223, "rewards/margins": 0.3891766667366028, "rewards/rejected": -0.6395488977432251, "step": 720 }, { "epoch": 0.14, "learning_rate": 4.97681316973307e-06, "logits/chosen": -1.3622843027114868, "logits/rejected": -0.9490324258804321, "logps/chosen": -449.09423828125, "logps/rejected": -770.49365234375, "loss": 0.1697, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2143503874540329, "rewards/margins": 0.3202458620071411, "rewards/rejected": -0.5345962643623352, "step": 730 }, { "epoch": 0.14, "learning_rate": 4.9744998131923625e-06, "logits/chosen": -1.4741694927215576, "logits/rejected": -0.9516903162002563, "logps/chosen": -438.42486572265625, "logps/rejected": -765.147216796875, "loss": 0.1417, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1841164231300354, "rewards/margins": 0.33546537160873413, "rewards/rejected": -0.5195817351341248, "step": 740 }, { "epoch": 0.14, "learning_rate": 4.9720770655628216e-06, "logits/chosen": -1.3768764734268188, "logits/rejected": -0.7704869508743286, "logps/chosen": -416.5518493652344, "logps/rejected": -805.7056884765625, "loss": 0.1395, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1929038166999817, "rewards/margins": 0.4216538071632385, "rewards/rejected": -0.6145576238632202, "step": 750 }, { "epoch": 0.14, "learning_rate": 4.969545033947711e-06, "logits/chosen": -1.4679723978042603, "logits/rejected": -0.9132329821586609, "logps/chosen": -568.6488647460938, "logps/rejected": -732.5473022460938, "loss": 0.1762, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2675512135028839, "rewards/margins": 0.28329747915267944, "rewards/rejected": -0.5508487224578857, "step": 760 }, { "epoch": 0.15, "learning_rate": 4.966903830281449e-06, "logits/chosen": -1.3681315183639526, "logits/rejected": -0.9185358881950378, "logps/chosen": -393.38238525390625, "logps/rejected": -793.5931396484375, "loss": 0.0983, "rewards/accuracies": 0.875, "rewards/chosen": -0.16901245713233948, "rewards/margins": 0.3828689157962799, "rewards/rejected": -0.5518813729286194, "step": 770 }, { "epoch": 0.15, "learning_rate": 4.964153571324658e-06, "logits/chosen": -1.419542908668518, "logits/rejected": -1.1343485116958618, "logps/chosen": -411.88037109375, "logps/rejected": -766.6087646484375, "loss": 0.1541, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18760547041893005, "rewards/margins": 0.32066410779953003, "rewards/rejected": -0.5082695484161377, "step": 780 }, { "epoch": 0.15, "learning_rate": 4.96129437865901e-06, "logits/chosen": -1.7101234197616577, "logits/rejected": -1.1676263809204102, "logps/chosen": -414.43841552734375, "logps/rejected": -807.0685424804688, "loss": 0.115, "rewards/accuracies": 0.875, "rewards/chosen": -0.17872503399848938, "rewards/margins": 0.40818625688552856, "rewards/rejected": -0.5869112014770508, "step": 790 }, { "epoch": 0.15, "learning_rate": 4.958326378681849e-06, "logits/chosen": -1.3850364685058594, "logits/rejected": -0.9428398013114929, "logps/chosen": -509.8709411621094, "logps/rejected": -952.4793090820312, "loss": 0.1242, "rewards/accuracies": 0.875, "rewards/chosen": -0.2186633050441742, "rewards/margins": 0.40059369802474976, "rewards/rejected": -0.6192570924758911, "step": 800 }, { "epoch": 0.15, "learning_rate": 4.955249702600598e-06, "logits/chosen": -1.7035572528839111, "logits/rejected": -1.0030953884124756, "logps/chosen": -482.3668518066406, "logps/rejected": -760.6826171875, "loss": 0.1448, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16139402985572815, "rewards/margins": 0.3800256848335266, "rewards/rejected": -0.5414197444915771, "step": 810 }, { "epoch": 0.16, "learning_rate": 4.952064486426965e-06, "logits/chosen": -1.4991743564605713, "logits/rejected": -1.0307657718658447, "logps/chosen": -469.8226013183594, "logps/rejected": -826.0535888671875, "loss": 0.145, "rewards/accuracies": 0.875, "rewards/chosen": -0.20374660193920135, "rewards/margins": 0.3607513904571533, "rewards/rejected": -0.5644980072975159, "step": 820 }, { "epoch": 0.16, "learning_rate": 4.948770870970929e-06, "logits/chosen": -1.3923485279083252, "logits/rejected": -0.8306543231010437, "logps/chosen": -523.4690551757812, "logps/rejected": -916.36865234375, "loss": 0.0926, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.26951366662979126, "rewards/margins": 0.40860167145729065, "rewards/rejected": -0.6781154274940491, "step": 830 }, { "epoch": 0.16, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -1.6405500173568726, "logits/rejected": -0.9856799244880676, "logps/chosen": -528.2949829101562, "logps/rejected": -839.9332885742188, "loss": 0.1581, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24082037806510925, "rewards/margins": 0.3766017258167267, "rewards/rejected": -0.6174221038818359, "step": 840 }, { "epoch": 0.16, "learning_rate": 4.941859029405354e-06, "logits/chosen": -1.3943941593170166, "logits/rejected": -0.8641621470451355, "logps/chosen": -430.3617248535156, "logps/rejected": -675.5490112304688, "loss": 0.1329, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14924706518650055, "rewards/margins": 0.3124435544013977, "rewards/rejected": -0.4616905748844147, "step": 850 }, { "epoch": 0.16, "learning_rate": 4.938241108850039e-06, "logits/chosen": -1.6560602188110352, "logits/rejected": -1.0957581996917725, "logps/chosen": -432.77471923828125, "logps/rejected": -737.4432373046875, "loss": 0.0997, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15601494908332825, "rewards/margins": 0.35231441259384155, "rewards/rejected": -0.5083293318748474, "step": 860 }, { "epoch": 0.17, "learning_rate": 4.934515400107266e-06, "logits/chosen": -1.5005276203155518, "logits/rejected": -1.0103951692581177, "logps/chosen": -493.9891662597656, "logps/rejected": -829.7702026367188, "loss": 0.1281, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2181667536497116, "rewards/margins": 0.3699623644351959, "rewards/rejected": -0.5881291627883911, "step": 870 }, { "epoch": 0.17, "learning_rate": 4.930682067880759e-06, "logits/chosen": -1.4195506572723389, "logits/rejected": -1.1666053533554077, "logps/chosen": -388.73052978515625, "logps/rejected": -766.9281616210938, "loss": 0.1619, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15257635712623596, "rewards/margins": 0.3825012147426605, "rewards/rejected": -0.5350775718688965, "step": 880 }, { "epoch": 0.17, "learning_rate": 4.926741281631991e-06, "logits/chosen": -1.346895456314087, "logits/rejected": -0.9589405059814453, "logps/chosen": -513.4127197265625, "logps/rejected": -815.2869873046875, "loss": 0.1642, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2702198624610901, "rewards/margins": 0.3384886085987091, "rewards/rejected": -0.6087085008621216, "step": 890 }, { "epoch": 0.17, "learning_rate": 4.922693215572695e-06, "logits/chosen": -1.5491969585418701, "logits/rejected": -1.0861209630966187, "logps/chosen": -548.6007080078125, "logps/rejected": -929.33154296875, "loss": 0.1494, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24969181418418884, "rewards/margins": 0.3573285937309265, "rewards/rejected": -0.6070204377174377, "step": 900 }, { "epoch": 0.17, "learning_rate": 4.91853804865716e-06, "logits/chosen": -1.4057750701904297, "logits/rejected": -0.8807083964347839, "logps/chosen": -478.3196716308594, "logps/rejected": -706.2791137695312, "loss": 0.1319, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20771124958992004, "rewards/margins": 0.280539333820343, "rewards/rejected": -0.48825058341026306, "step": 910 }, { "epoch": 0.18, "learning_rate": 4.91427596457432e-06, "logits/chosen": -1.3523952960968018, "logits/rejected": -0.7593857645988464, "logps/chosen": -461.0428161621094, "logps/rejected": -753.4593505859375, "loss": 0.1406, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17721110582351685, "rewards/margins": 0.354949414730072, "rewards/rejected": -0.5321604609489441, "step": 920 }, { "epoch": 0.18, "learning_rate": 4.909907151739634e-06, "logits/chosen": -1.4453586339950562, "logits/rejected": -0.7822146415710449, "logps/chosen": -508.92962646484375, "logps/rejected": -890.5587768554688, "loss": 0.1022, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22609654068946838, "rewards/margins": 0.3926360607147217, "rewards/rejected": -0.6187326312065125, "step": 930 }, { "epoch": 0.18, "learning_rate": 4.905431803286756e-06, "logits/chosen": -1.3609312772750854, "logits/rejected": -0.9119062423706055, "logps/chosen": -519.1769409179688, "logps/rejected": -908.7677001953125, "loss": 0.1252, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.23803889751434326, "rewards/margins": 0.4088461995124817, "rewards/rejected": -0.6468850374221802, "step": 940 }, { "epoch": 0.18, "learning_rate": 4.900850117059e-06, "logits/chosen": -1.1450023651123047, "logits/rejected": -0.8150935173034668, "logps/chosen": -350.58306884765625, "logps/rejected": -832.9949340820312, "loss": 0.1233, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17586003243923187, "rewards/margins": 0.44040846824645996, "rewards/rejected": -0.616268515586853, "step": 950 }, { "epoch": 0.18, "learning_rate": 4.8961622956005895e-06, "logits/chosen": -1.590212345123291, "logits/rejected": -0.8001310229301453, "logps/chosen": -450.46435546875, "logps/rejected": -700.1336669921875, "loss": 0.1167, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14180727303028107, "rewards/margins": 0.357746422290802, "rewards/rejected": -0.49955374002456665, "step": 960 }, { "epoch": 0.18, "learning_rate": 4.891368546147707e-06, "logits/chosen": -1.3055294752120972, "logits/rejected": -0.6857399940490723, "logps/chosen": -536.6131591796875, "logps/rejected": -900.9932861328125, "loss": 0.1398, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2533701956272125, "rewards/margins": 0.42889460921287537, "rewards/rejected": -0.6822648644447327, "step": 970 }, { "epoch": 0.19, "learning_rate": 4.88646908061933e-06, "logits/chosen": -1.4798675775527954, "logits/rejected": -0.8037222027778625, "logps/chosen": -497.0403747558594, "logps/rejected": -850.4873046875, "loss": 0.1054, "rewards/accuracies": 0.875, "rewards/chosen": -0.18305550515651703, "rewards/margins": 0.39606842398643494, "rewards/rejected": -0.579123854637146, "step": 980 }, { "epoch": 0.19, "learning_rate": 4.881464115607866e-06, "logits/chosen": -1.579298734664917, "logits/rejected": -0.8653982281684875, "logps/chosen": -421.3480529785156, "logps/rejected": -758.8841552734375, "loss": 0.1458, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1381353884935379, "rewards/margins": 0.3948342204093933, "rewards/rejected": -0.53296959400177, "step": 990 }, { "epoch": 0.19, "learning_rate": 4.876353872369573e-06, "logits/chosen": -1.5196287631988525, "logits/rejected": -0.8134954571723938, "logps/chosen": -416.38775634765625, "logps/rejected": -739.5912475585938, "loss": 0.157, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15544332563877106, "rewards/margins": 0.3720504641532898, "rewards/rejected": -0.5274937152862549, "step": 1000 }, { "epoch": 0.19, "learning_rate": 4.871138576814782e-06, "logits/chosen": -1.1543328762054443, "logits/rejected": -0.9774447679519653, "logps/chosen": -523.8226318359375, "logps/rejected": -829.33447265625, "loss": 0.2085, "rewards/accuracies": 0.75, "rewards/chosen": -0.32543349266052246, "rewards/margins": 0.27706000208854675, "rewards/rejected": -0.6024935841560364, "step": 1010 }, { "epoch": 0.19, "learning_rate": 4.865818459497911e-06, "logits/chosen": -1.6374155282974243, "logits/rejected": -0.898185133934021, "logps/chosen": -562.8090209960938, "logps/rejected": -871.3721923828125, "loss": 0.0982, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2930522561073303, "rewards/margins": 0.35717350244522095, "rewards/rejected": -0.6502257585525513, "step": 1020 }, { "epoch": 0.2, "learning_rate": 4.860393755607266e-06, "logits/chosen": -1.3139742612838745, "logits/rejected": -0.7514779567718506, "logps/chosen": -549.5879516601562, "logps/rejected": -863.8126220703125, "loss": 0.0997, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.24689829349517822, "rewards/margins": 0.4052574038505554, "rewards/rejected": -0.6521557569503784, "step": 1030 }, { "epoch": 0.2, "learning_rate": 4.854864704954654e-06, "logits/chosen": -1.4909948110580444, "logits/rejected": -1.0483014583587646, "logps/chosen": -508.62957763671875, "logps/rejected": -1044.468017578125, "loss": 0.0734, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2170809507369995, "rewards/margins": 0.4909609258174896, "rewards/rejected": -0.708041787147522, "step": 1040 }, { "epoch": 0.2, "learning_rate": 4.849231551964771e-06, "logits/chosen": -1.7164112329483032, "logits/rejected": -0.9476574659347534, "logps/chosen": -502.0357360839844, "logps/rejected": -799.1134033203125, "loss": 0.1546, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21094997227191925, "rewards/margins": 0.3885822296142578, "rewards/rejected": -0.5995321869850159, "step": 1050 }, { "epoch": 0.2, "learning_rate": 4.843494545664407e-06, "logits/chosen": -1.5129632949829102, "logits/rejected": -1.0171369314193726, "logps/chosen": -539.0646362304688, "logps/rejected": -809.5582275390625, "loss": 0.1233, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.25936758518218994, "rewards/margins": 0.320014625787735, "rewards/rejected": -0.5793822407722473, "step": 1060 }, { "epoch": 0.2, "learning_rate": 4.837653939671427e-06, "logits/chosen": -1.3291027545928955, "logits/rejected": -0.7198957204818726, "logps/chosen": -556.0474243164062, "logps/rejected": -840.3240356445312, "loss": 0.19, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2588450312614441, "rewards/margins": 0.36577850580215454, "rewards/rejected": -0.6246234774589539, "step": 1070 }, { "epoch": 0.21, "learning_rate": 4.8317099921835695e-06, "logits/chosen": -1.4946625232696533, "logits/rejected": -0.5066057443618774, "logps/chosen": -513.8456420898438, "logps/rejected": -849.84033203125, "loss": 0.1387, "rewards/accuracies": 0.875, "rewards/chosen": -0.20519308745861053, "rewards/margins": 0.44010305404663086, "rewards/rejected": -0.6452962160110474, "step": 1080 }, { "epoch": 0.21, "learning_rate": 4.825662965967023e-06, "logits/chosen": -1.3602453470230103, "logits/rejected": -1.0017006397247314, "logps/chosen": -384.7378845214844, "logps/rejected": -774.0015258789062, "loss": 0.1483, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18176120519638062, "rewards/margins": 0.3585227429866791, "rewards/rejected": -0.5402839779853821, "step": 1090 }, { "epoch": 0.21, "learning_rate": 4.819513128344814e-06, "logits/chosen": -1.2881540060043335, "logits/rejected": -0.6988876461982727, "logps/chosen": -475.30072021484375, "logps/rejected": -805.0797119140625, "loss": 0.1382, "rewards/accuracies": 0.75, "rewards/chosen": -0.1878626048564911, "rewards/margins": 0.36243921518325806, "rewards/rejected": -0.5503018498420715, "step": 1100 }, { "epoch": 0.21, "learning_rate": 4.813260751184992e-06, "logits/chosen": -1.6283468008041382, "logits/rejected": -0.87245112657547, "logps/chosen": -523.123046875, "logps/rejected": -765.8057861328125, "loss": 0.2018, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20071080327033997, "rewards/margins": 0.32597780227661133, "rewards/rejected": -0.5266886353492737, "step": 1110 }, { "epoch": 0.21, "learning_rate": 4.806906110888606e-06, "logits/chosen": -1.4184439182281494, "logits/rejected": -1.1425096988677979, "logps/chosen": -445.09710693359375, "logps/rejected": -863.4700927734375, "loss": 0.09, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19077089428901672, "rewards/margins": 0.4160831570625305, "rewards/rejected": -0.6068540215492249, "step": 1120 }, { "epoch": 0.22, "learning_rate": 4.8004494883774885e-06, "logits/chosen": -1.4304182529449463, "logits/rejected": -0.8100128173828125, "logps/chosen": -404.84661865234375, "logps/rejected": -636.9589233398438, "loss": 0.1338, "rewards/accuracies": 0.75, "rewards/chosen": -0.14372007548809052, "rewards/margins": 0.33636176586151123, "rewards/rejected": -0.48008185625076294, "step": 1130 }, { "epoch": 0.22, "learning_rate": 4.793891169081835e-06, "logits/chosen": -1.3676493167877197, "logits/rejected": -0.8645700216293335, "logps/chosen": -380.33709716796875, "logps/rejected": -678.5465087890625, "loss": 0.184, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16710279881954193, "rewards/margins": 0.3574886918067932, "rewards/rejected": -0.5245914459228516, "step": 1140 }, { "epoch": 0.22, "learning_rate": 4.787231442927587e-06, "logits/chosen": -1.4248979091644287, "logits/rejected": -0.8837774991989136, "logps/chosen": -431.97088623046875, "logps/rejected": -706.1895751953125, "loss": 0.1454, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15340907871723175, "rewards/margins": 0.31958648562431335, "rewards/rejected": -0.4729955792427063, "step": 1150 }, { "epoch": 0.22, "learning_rate": 4.780470604323616e-06, "logits/chosen": -1.3061199188232422, "logits/rejected": -0.6609403491020203, "logps/chosen": -449.4695739746094, "logps/rejected": -765.0759887695312, "loss": 0.1285, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19215217232704163, "rewards/margins": 0.3205837607383728, "rewards/rejected": -0.5127360224723816, "step": 1160 }, { "epoch": 0.22, "learning_rate": 4.773608952148706e-06, "logits/chosen": -1.1837317943572998, "logits/rejected": -0.8264517784118652, "logps/chosen": -535.846435546875, "logps/rejected": -957.7952270507812, "loss": 0.0995, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2933195233345032, "rewards/margins": 0.37555357813835144, "rewards/rejected": -0.6688731908798218, "step": 1170 }, { "epoch": 0.22, "learning_rate": 4.766646789738342e-06, "logits/chosen": -1.0671017169952393, "logits/rejected": -0.4882294237613678, "logps/chosen": -598.9127197265625, "logps/rejected": -935.1517333984375, "loss": 0.1316, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.32276248931884766, "rewards/margins": 0.34344300627708435, "rewards/rejected": -0.6662055253982544, "step": 1180 }, { "epoch": 0.23, "learning_rate": 4.759584424871302e-06, "logits/chosen": -1.3479318618774414, "logits/rejected": -0.8438467979431152, "logps/chosen": -468.11932373046875, "logps/rejected": -848.8878173828125, "loss": 0.1182, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22558064758777618, "rewards/margins": 0.3687564730644226, "rewards/rejected": -0.5943371057510376, "step": 1190 }, { "epoch": 0.23, "learning_rate": 4.752422169756048e-06, "logits/chosen": -1.1192941665649414, "logits/rejected": -0.5167874097824097, "logps/chosen": -496.1790466308594, "logps/rejected": -690.4857177734375, "loss": 0.202, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.28107157349586487, "rewards/margins": 0.25104498863220215, "rewards/rejected": -0.5321165323257446, "step": 1200 }, { "epoch": 0.23, "learning_rate": 4.745160341016927e-06, "logits/chosen": -1.2510173320770264, "logits/rejected": -0.7643812894821167, "logps/chosen": -406.9855041503906, "logps/rejected": -798.9572143554688, "loss": 0.0858, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14904753863811493, "rewards/margins": 0.3928123414516449, "rewards/rejected": -0.5418598055839539, "step": 1210 }, { "epoch": 0.23, "learning_rate": 4.737799259680172e-06, "logits/chosen": -1.218090534210205, "logits/rejected": -0.8292972445487976, "logps/chosen": -474.9144592285156, "logps/rejected": -748.02734375, "loss": 0.174, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19175249338150024, "rewards/margins": 0.3255236744880676, "rewards/rejected": -0.5172761678695679, "step": 1220 }, { "epoch": 0.23, "learning_rate": 4.730339251159709e-06, "logits/chosen": -1.2376108169555664, "logits/rejected": -0.8005326390266418, "logps/chosen": -493.998046875, "logps/rejected": -853.19921875, "loss": 0.1213, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2122408151626587, "rewards/margins": 0.37617719173431396, "rewards/rejected": -0.5884180665016174, "step": 1230 }, { "epoch": 0.24, "learning_rate": 4.722780645242775e-06, "logits/chosen": -1.2497247457504272, "logits/rejected": -0.6390712261199951, "logps/chosen": -538.4122924804688, "logps/rejected": -969.2047729492188, "loss": 0.092, "rewards/accuracies": 0.875, "rewards/chosen": -0.2643589973449707, "rewards/margins": 0.40035945177078247, "rewards/rejected": -0.664718508720398, "step": 1240 }, { "epoch": 0.24, "learning_rate": 4.715123776075337e-06, "logits/chosen": -0.8258478045463562, "logits/rejected": -0.4656530022621155, "logps/chosen": -531.392578125, "logps/rejected": -863.3152465820312, "loss": 0.1419, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29133012890815735, "rewards/margins": 0.3196151852607727, "rewards/rejected": -0.6109453439712524, "step": 1250 }, { "epoch": 0.24, "learning_rate": 4.707368982147318e-06, "logits/chosen": -1.047585129737854, "logits/rejected": -0.5903953313827515, "logps/chosen": -617.4971313476562, "logps/rejected": -1009.921875, "loss": 0.1654, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3208053708076477, "rewards/margins": 0.3935851454734802, "rewards/rejected": -0.7143905758857727, "step": 1260 }, { "epoch": 0.24, "learning_rate": 4.699516606277638e-06, "logits/chosen": -1.1315219402313232, "logits/rejected": -0.5336170196533203, "logps/chosen": -593.93798828125, "logps/rejected": -869.85009765625, "loss": 0.1386, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.24149493873119354, "rewards/margins": 0.33821454644203186, "rewards/rejected": -0.5797094702720642, "step": 1270 }, { "epoch": 0.24, "learning_rate": 4.691566995599056e-06, "logits/chosen": -1.1146339178085327, "logits/rejected": -0.7034021019935608, "logps/chosen": -502.23370361328125, "logps/rejected": -924.3167724609375, "loss": 0.14, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24341730773448944, "rewards/margins": 0.34996703267097473, "rewards/rejected": -0.5933843851089478, "step": 1280 }, { "epoch": 0.25, "learning_rate": 4.683520501542825e-06, "logits/chosen": -1.1319735050201416, "logits/rejected": -0.7827624082565308, "logps/chosen": -475.17987060546875, "logps/rejected": -786.6981201171875, "loss": 0.1916, "rewards/accuracies": 0.75, "rewards/chosen": -0.19619041681289673, "rewards/margins": 0.35823652148246765, "rewards/rejected": -0.554426908493042, "step": 1290 }, { "epoch": 0.25, "learning_rate": 4.675377479823153e-06, "logits/chosen": -1.2866129875183105, "logits/rejected": -0.6678817272186279, "logps/chosen": -420.416015625, "logps/rejected": -792.9000244140625, "loss": 0.0887, "rewards/accuracies": 0.875, "rewards/chosen": -0.18195226788520813, "rewards/margins": 0.40985745191574097, "rewards/rejected": -0.5918096303939819, "step": 1300 }, { "epoch": 0.25, "learning_rate": 4.667138290421483e-06, "logits/chosen": -1.2954565286636353, "logits/rejected": -0.8432362675666809, "logps/chosen": -419.9427185058594, "logps/rejected": -866.1632690429688, "loss": 0.1241, "rewards/accuracies": 0.875, "rewards/chosen": -0.18678149580955505, "rewards/margins": 0.426344633102417, "rewards/rejected": -0.6131261587142944, "step": 1310 }, { "epoch": 0.25, "learning_rate": 4.658803297570578e-06, "logits/chosen": -1.374985694885254, "logits/rejected": -0.8182689547538757, "logps/chosen": -421.45343017578125, "logps/rejected": -829.6100463867188, "loss": 0.1466, "rewards/accuracies": 0.875, "rewards/chosen": -0.14659619331359863, "rewards/margins": 0.3917675316333771, "rewards/rejected": -0.5383636355400085, "step": 1320 }, { "epoch": 0.25, "learning_rate": 4.650372869738415e-06, "logits/chosen": -1.273526906967163, "logits/rejected": -0.7884925603866577, "logps/chosen": -419.8927307128906, "logps/rejected": -820.8694458007812, "loss": 0.1098, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13696891069412231, "rewards/margins": 0.4055593013763428, "rewards/rejected": -0.5425282120704651, "step": 1330 }, { "epoch": 0.26, "learning_rate": 4.641847379611898e-06, "logits/chosen": -0.9489263296127319, "logits/rejected": -0.6098904609680176, "logps/chosen": -357.75946044921875, "logps/rejected": -664.9490356445312, "loss": 0.1431, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1590057611465454, "rewards/margins": 0.3229936361312866, "rewards/rejected": -0.4819994568824768, "step": 1340 }, { "epoch": 0.26, "learning_rate": 4.633227204080389e-06, "logits/chosen": -1.4600012302398682, "logits/rejected": -0.8520862460136414, "logps/chosen": -481.3115234375, "logps/rejected": -826.6591796875, "loss": 0.1543, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20665964484214783, "rewards/margins": 0.3753765821456909, "rewards/rejected": -0.5820361971855164, "step": 1350 }, { "epoch": 0.26, "learning_rate": 4.624512724219038e-06, "logits/chosen": -1.1773959398269653, "logits/rejected": -0.9468321800231934, "logps/chosen": -362.19403076171875, "logps/rejected": -629.0232543945312, "loss": 0.1772, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1530476212501526, "rewards/margins": 0.29461896419525146, "rewards/rejected": -0.44766658544540405, "step": 1360 }, { "epoch": 0.26, "learning_rate": 4.6157043252719374e-06, "logits/chosen": -1.0850251913070679, "logits/rejected": -0.6847610473632812, "logps/chosen": -319.2314147949219, "logps/rejected": -628.9642944335938, "loss": 0.1077, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14382942020893097, "rewards/margins": 0.34486326575279236, "rewards/rejected": -0.48869267106056213, "step": 1370 }, { "epoch": 0.26, "learning_rate": 4.606802396635098e-06, "logits/chosen": -0.8110812306404114, "logits/rejected": -0.6392208337783813, "logps/chosen": -526.5760498046875, "logps/rejected": -942.75390625, "loss": 0.1426, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2978757321834564, "rewards/margins": 0.3286246657371521, "rewards/rejected": -0.6265004277229309, "step": 1380 }, { "epoch": 0.26, "learning_rate": 4.597807331839229e-06, "logits/chosen": -1.068227767944336, "logits/rejected": -0.5740073919296265, "logps/chosen": -447.18157958984375, "logps/rejected": -830.3377075195312, "loss": 0.1028, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1874638795852661, "rewards/margins": 0.37846532464027405, "rewards/rejected": -0.565929114818573, "step": 1390 }, { "epoch": 0.27, "learning_rate": 4.588719528532342e-06, "logits/chosen": -1.1014790534973145, "logits/rejected": -0.6974457502365112, "logps/chosen": -431.90380859375, "logps/rejected": -686.2915649414062, "loss": 0.1313, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19059441983699799, "rewards/margins": 0.2825468182563782, "rewards/rejected": -0.47314128279685974, "step": 1400 }, { "epoch": 0.27, "learning_rate": 4.5795393884621735e-06, "logits/chosen": -1.2448312044143677, "logits/rejected": -0.7283969521522522, "logps/chosen": -431.7861328125, "logps/rejected": -876.5538940429688, "loss": 0.1334, "rewards/accuracies": 0.875, "rewards/chosen": -0.17322006821632385, "rewards/margins": 0.4247727394104004, "rewards/rejected": -0.5979927778244019, "step": 1410 }, { "epoch": 0.27, "learning_rate": 4.5702673174584236e-06, "logits/chosen": -1.1754173040390015, "logits/rejected": -0.5933743715286255, "logps/chosen": -447.909912109375, "logps/rejected": -711.0015869140625, "loss": 0.1486, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18655391037464142, "rewards/margins": 0.3598090708255768, "rewards/rejected": -0.5463629364967346, "step": 1420 }, { "epoch": 0.27, "learning_rate": 4.560903725414816e-06, "logits/chosen": -1.0851469039916992, "logits/rejected": -0.46483176946640015, "logps/chosen": -607.8848266601562, "logps/rejected": -975.83544921875, "loss": 0.1592, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.31289446353912354, "rewards/margins": 0.3732256293296814, "rewards/rejected": -0.6861200928688049, "step": 1430 }, { "epoch": 0.27, "learning_rate": 4.551449026270979e-06, "logits/chosen": -1.13060462474823, "logits/rejected": -0.39518287777900696, "logps/chosen": -524.8216552734375, "logps/rejected": -839.0323486328125, "loss": 0.1687, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2758244276046753, "rewards/margins": 0.3789781928062439, "rewards/rejected": -0.6548025608062744, "step": 1440 }, { "epoch": 0.28, "learning_rate": 4.541903637994142e-06, "logits/chosen": -1.133201241493225, "logits/rejected": -0.5753670334815979, "logps/chosen": -415.52069091796875, "logps/rejected": -837.8951416015625, "loss": 0.1523, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18679054081439972, "rewards/margins": 0.4556844234466553, "rewards/rejected": -0.6424749493598938, "step": 1450 }, { "epoch": 0.28, "learning_rate": 4.532267982560662e-06, "logits/chosen": -1.2704496383666992, "logits/rejected": -0.8664214015007019, "logps/chosen": -368.68011474609375, "logps/rejected": -737.5970458984375, "loss": 0.1687, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.139316126704216, "rewards/margins": 0.36525264382362366, "rewards/rejected": -0.5045687556266785, "step": 1460 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -1.1816556453704834, "logits/rejected": -0.9350358247756958, "logps/chosen": -393.341552734375, "logps/rejected": -777.5223388671875, "loss": 0.1075, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1571137011051178, "rewards/margins": 0.4040326476097107, "rewards/rejected": -0.5611463785171509, "step": 1470 }, { "epoch": 0.28, "learning_rate": 4.512727578062733e-06, "logits/chosen": -1.2275192737579346, "logits/rejected": -0.859209418296814, "logps/chosen": -393.1356506347656, "logps/rejected": -720.8304443359375, "loss": 0.1788, "rewards/accuracies": 0.75, "rewards/chosen": -0.15080639719963074, "rewards/margins": 0.34938356280326843, "rewards/rejected": -0.5001899003982544, "step": 1480 }, { "epoch": 0.28, "learning_rate": 4.502823692827859e-06, "logits/chosen": -1.4731618165969849, "logits/rejected": -1.0550423860549927, "logps/chosen": -387.7159118652344, "logps/rejected": -686.4938354492188, "loss": 0.1407, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15858078002929688, "rewards/margins": 0.3455260396003723, "rewards/rejected": -0.5041068196296692, "step": 1490 }, { "epoch": 0.29, "learning_rate": 4.492831268057307e-06, "logits/chosen": -1.2440235614776611, "logits/rejected": -0.8325130343437195, "logps/chosen": -415.19219970703125, "logps/rejected": -803.2659301757812, "loss": 0.1205, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1737671047449112, "rewards/margins": 0.39250996708869934, "rewards/rejected": -0.566277027130127, "step": 1500 }, { "epoch": 0.29, "learning_rate": 4.482750745489733e-06, "logits/chosen": -1.571380853652954, "logits/rejected": -0.7591809034347534, "logps/chosen": -441.29962158203125, "logps/rejected": -801.0969848632812, "loss": 0.1312, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14847534894943237, "rewards/margins": 0.42510849237442017, "rewards/rejected": -0.5735839009284973, "step": 1510 }, { "epoch": 0.29, "learning_rate": 4.472582570758367e-06, "logits/chosen": -1.3355686664581299, "logits/rejected": -0.8329195976257324, "logps/chosen": -486.95367431640625, "logps/rejected": -919.5452270507812, "loss": 0.0697, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18455496430397034, "rewards/margins": 0.4339834153652191, "rewards/rejected": -0.6185383796691895, "step": 1520 }, { "epoch": 0.29, "learning_rate": 4.4623271933713065e-06, "logits/chosen": -1.2127671241760254, "logits/rejected": -1.007833480834961, "logps/chosen": -394.58245849609375, "logps/rejected": -899.5948486328125, "loss": 0.1205, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18245461583137512, "rewards/margins": 0.4174475073814392, "rewards/rejected": -0.5999020934104919, "step": 1530 }, { "epoch": 0.29, "learning_rate": 4.451985066691649e-06, "logits/chosen": -1.5780236721038818, "logits/rejected": -0.9145193099975586, "logps/chosen": -398.3710021972656, "logps/rejected": -661.3423461914062, "loss": 0.1102, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12732096016407013, "rewards/margins": 0.34847724437713623, "rewards/rejected": -0.47579821944236755, "step": 1540 }, { "epoch": 0.3, "learning_rate": 4.441556647917447e-06, "logits/chosen": -1.6722930669784546, "logits/rejected": -1.0362961292266846, "logps/chosen": -427.39422607421875, "logps/rejected": -796.1749877929688, "loss": 0.1145, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14822420477867126, "rewards/margins": 0.3913078010082245, "rewards/rejected": -0.5395320653915405, "step": 1550 }, { "epoch": 0.3, "learning_rate": 4.431042398061499e-06, "logits/chosen": -1.3874180316925049, "logits/rejected": -0.9036419987678528, "logps/chosen": -454.18310546875, "logps/rejected": -874.29931640625, "loss": 0.1171, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19960516691207886, "rewards/margins": 0.41976088285446167, "rewards/rejected": -0.6193661093711853, "step": 1560 }, { "epoch": 0.3, "learning_rate": 4.420442781930971e-06, "logits/chosen": -1.712471604347229, "logits/rejected": -0.9809194803237915, "logps/chosen": -539.1209106445312, "logps/rejected": -793.5512084960938, "loss": 0.1741, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2442115843296051, "rewards/margins": 0.3468310236930847, "rewards/rejected": -0.5910425186157227, "step": 1570 }, { "epoch": 0.3, "learning_rate": 4.409758268106842e-06, "logits/chosen": -1.493787169456482, "logits/rejected": -0.8585420846939087, "logps/chosen": -470.8426818847656, "logps/rejected": -866.9586181640625, "loss": 0.0836, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.171084925532341, "rewards/margins": 0.45530468225479126, "rewards/rejected": -0.6263895630836487, "step": 1580 }, { "epoch": 0.3, "learning_rate": 4.398989328923196e-06, "logits/chosen": -1.444780945777893, "logits/rejected": -1.017703890800476, "logps/chosen": -429.22357177734375, "logps/rejected": -931.8521728515625, "loss": 0.1076, "rewards/accuracies": 0.875, "rewards/chosen": -0.18251177668571472, "rewards/margins": 0.47312384843826294, "rewards/rejected": -0.6556357145309448, "step": 1590 }, { "epoch": 0.3, "learning_rate": 4.388136440446338e-06, "logits/chosen": -1.4514683485031128, "logits/rejected": -0.6711660623550415, "logps/chosen": -517.3246459960938, "logps/rejected": -855.2369384765625, "loss": 0.1814, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.28433957695961, "rewards/margins": 0.38134485483169556, "rewards/rejected": -0.6656844615936279, "step": 1600 }, { "epoch": 0.31, "learning_rate": 4.377200082453748e-06, "logits/chosen": -1.393810749053955, "logits/rejected": -0.7458035349845886, "logps/chosen": -606.813232421875, "logps/rejected": -913.16064453125, "loss": 0.139, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.32184717059135437, "rewards/margins": 0.35339492559432983, "rewards/rejected": -0.6752421259880066, "step": 1610 }, { "epoch": 0.31, "learning_rate": 4.366180738412876e-06, "logits/chosen": -1.3189208507537842, "logits/rejected": -0.7809714078903198, "logps/chosen": -459.47650146484375, "logps/rejected": -829.1685791015625, "loss": 0.1395, "rewards/accuracies": 0.875, "rewards/chosen": -0.256475567817688, "rewards/margins": 0.39739376306533813, "rewards/rejected": -0.6538693308830261, "step": 1620 }, { "epoch": 0.31, "learning_rate": 4.355078895459761e-06, "logits/chosen": -1.4062083959579468, "logits/rejected": -0.8051255345344543, "logps/chosen": -427.11968994140625, "logps/rejected": -803.963623046875, "loss": 0.114, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18415488302707672, "rewards/margins": 0.4355524182319641, "rewards/rejected": -0.6197072863578796, "step": 1630 }, { "epoch": 0.31, "learning_rate": 4.343895044377504e-06, "logits/chosen": -1.3500722646713257, "logits/rejected": -0.6051512956619263, "logps/chosen": -394.1636047363281, "logps/rejected": -738.2877807617188, "loss": 0.1245, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11112014949321747, "rewards/margins": 0.41807299852371216, "rewards/rejected": -0.5291931629180908, "step": 1640 }, { "epoch": 0.31, "learning_rate": 4.332629679574566e-06, "logits/chosen": -1.2942713499069214, "logits/rejected": -1.0370981693267822, "logps/chosen": -376.4662170410156, "logps/rejected": -710.51611328125, "loss": 0.1777, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1924479454755783, "rewards/margins": 0.31794387102127075, "rewards/rejected": -0.5103917717933655, "step": 1650 }, { "epoch": 0.32, "learning_rate": 4.321283299062916e-06, "logits/chosen": -1.3147811889648438, "logits/rejected": -0.8949203491210938, "logps/chosen": -420.4652404785156, "logps/rejected": -804.53173828125, "loss": 0.1583, "rewards/accuracies": 0.75, "rewards/chosen": -0.1741524189710617, "rewards/margins": 0.39506280422210693, "rewards/rejected": -0.5692151784896851, "step": 1660 }, { "epoch": 0.32, "learning_rate": 4.309856404436013e-06, "logits/chosen": -1.158186674118042, "logits/rejected": -0.8108803629875183, "logps/chosen": -457.42071533203125, "logps/rejected": -785.7427978515625, "loss": 0.1174, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20928213000297546, "rewards/margins": 0.36331549286842346, "rewards/rejected": -0.5725975632667542, "step": 1670 }, { "epoch": 0.32, "learning_rate": 4.2983495008466285e-06, "logits/chosen": -1.3354493379592896, "logits/rejected": -0.7906332015991211, "logps/chosen": -527.8662109375, "logps/rejected": -855.6251831054688, "loss": 0.1363, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2757346034049988, "rewards/margins": 0.3397030830383301, "rewards/rejected": -0.6154377460479736, "step": 1680 }, { "epoch": 0.32, "learning_rate": 4.2867630969845235e-06, "logits/chosen": -1.2590032815933228, "logits/rejected": -1.0363199710845947, "logps/chosen": -427.4825134277344, "logps/rejected": -799.4268798828125, "loss": 0.1587, "rewards/accuracies": 0.75, "rewards/chosen": -0.16773132979869843, "rewards/margins": 0.3677247166633606, "rewards/rejected": -0.5354560613632202, "step": 1690 }, { "epoch": 0.32, "learning_rate": 4.275097705053951e-06, "logits/chosen": -1.303935170173645, "logits/rejected": -1.0342575311660767, "logps/chosen": -378.9317626953125, "logps/rejected": -769.5296630859375, "loss": 0.128, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17553675174713135, "rewards/margins": 0.3647393584251404, "rewards/rejected": -0.5402761697769165, "step": 1700 }, { "epoch": 0.33, "learning_rate": 4.263353840751023e-06, "logits/chosen": -1.5875033140182495, "logits/rejected": -0.7812114953994751, "logps/chosen": -425.4710998535156, "logps/rejected": -781.4553833007812, "loss": 0.1243, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13237932324409485, "rewards/margins": 0.4179268479347229, "rewards/rejected": -0.5503062009811401, "step": 1710 }, { "epoch": 0.33, "learning_rate": 4.251532023240901e-06, "logits/chosen": -1.277547836303711, "logits/rejected": -0.762844443321228, "logps/chosen": -393.67938232421875, "logps/rejected": -680.7984619140625, "loss": 0.1041, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17114858329296112, "rewards/margins": 0.33264437317848206, "rewards/rejected": -0.503792941570282, "step": 1720 }, { "epoch": 0.33, "learning_rate": 4.239632775134857e-06, "logits/chosen": -1.450622320175171, "logits/rejected": -0.9876900911331177, "logps/chosen": -349.21258544921875, "logps/rejected": -782.7197875976562, "loss": 0.1559, "rewards/accuracies": 0.875, "rewards/chosen": -0.11185978353023529, "rewards/margins": 0.425804078578949, "rewards/rejected": -0.5376638770103455, "step": 1730 }, { "epoch": 0.33, "learning_rate": 4.227656622467162e-06, "logits/chosen": -1.5200934410095215, "logits/rejected": -0.9623802304267883, "logps/chosen": -504.7323303222656, "logps/rejected": -807.3109741210938, "loss": 0.1588, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14490297436714172, "rewards/margins": 0.347685843706131, "rewards/rejected": -0.4925888180732727, "step": 1740 }, { "epoch": 0.33, "learning_rate": 4.215604094671835e-06, "logits/chosen": -1.5886991024017334, "logits/rejected": -0.9334489107131958, "logps/chosen": -473.9207458496094, "logps/rejected": -793.2481689453125, "loss": 0.095, "rewards/accuracies": 0.875, "rewards/chosen": -0.15691755712032318, "rewards/margins": 0.38926559686660767, "rewards/rejected": -0.546183168888092, "step": 1750 }, { "epoch": 0.34, "learning_rate": 4.203475724559235e-06, "logits/chosen": -1.2285364866256714, "logits/rejected": -0.643894374370575, "logps/chosen": -494.7124938964844, "logps/rejected": -829.3150634765625, "loss": 0.0922, "rewards/accuracies": 0.875, "rewards/chosen": -0.22315111756324768, "rewards/margins": 0.3633202612400055, "rewards/rejected": -0.5864713191986084, "step": 1760 }, { "epoch": 0.34, "learning_rate": 4.191272048292514e-06, "logits/chosen": -1.3814643621444702, "logits/rejected": -0.6023943424224854, "logps/chosen": -457.51690673828125, "logps/rejected": -909.46923828125, "loss": 0.0909, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19872768223285675, "rewards/margins": 0.46659260988235474, "rewards/rejected": -0.6653203368186951, "step": 1770 }, { "epoch": 0.34, "learning_rate": 4.178993605363904e-06, "logits/chosen": -1.3051964044570923, "logits/rejected": -0.6130464673042297, "logps/chosen": -461.83795166015625, "logps/rejected": -825.04296875, "loss": 0.1708, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1880657970905304, "rewards/margins": 0.3798215985298157, "rewards/rejected": -0.5678874254226685, "step": 1780 }, { "epoch": 0.34, "learning_rate": 4.166640938570879e-06, "logits/chosen": -1.2396810054779053, "logits/rejected": -0.9185633659362793, "logps/chosen": -390.1123352050781, "logps/rejected": -770.3753662109375, "loss": 0.1099, "rewards/accuracies": 0.875, "rewards/chosen": -0.1455954909324646, "rewards/margins": 0.3625917434692383, "rewards/rejected": -0.5081872344017029, "step": 1790 }, { "epoch": 0.34, "learning_rate": 4.154214593992149e-06, "logits/chosen": -1.5201972723007202, "logits/rejected": -0.7863736152648926, "logps/chosen": -354.16754150390625, "logps/rejected": -735.108154296875, "loss": 0.0712, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12628301978111267, "rewards/margins": 0.4373646378517151, "rewards/rejected": -0.5636476278305054, "step": 1800 }, { "epoch": 0.34, "learning_rate": 4.1417151209635265e-06, "logits/chosen": -1.4209377765655518, "logits/rejected": -0.749359667301178, "logps/chosen": -459.08087158203125, "logps/rejected": -778.1527099609375, "loss": 0.105, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1211809366941452, "rewards/margins": 0.42598724365234375, "rewards/rejected": -0.5471681952476501, "step": 1810 }, { "epoch": 0.35, "learning_rate": 4.129143072053639e-06, "logits/chosen": -1.2357739210128784, "logits/rejected": -0.9745093584060669, "logps/chosen": -421.0030212402344, "logps/rejected": -780.2088623046875, "loss": 0.1684, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.15728859603405, "rewards/margins": 0.3233485817909241, "rewards/rejected": -0.48063722252845764, "step": 1820 }, { "epoch": 0.35, "learning_rate": 4.116499003039499e-06, "logits/chosen": -1.643001914024353, "logits/rejected": -1.0855792760849, "logps/chosen": -335.1356506347656, "logps/rejected": -630.9529418945312, "loss": 0.1635, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08409885317087173, "rewards/margins": 0.3365354537963867, "rewards/rejected": -0.42063432931900024, "step": 1830 }, { "epoch": 0.35, "learning_rate": 4.103783472881942e-06, "logits/chosen": -1.4597373008728027, "logits/rejected": -1.0348209142684937, "logps/chosen": -395.40252685546875, "logps/rejected": -761.2767944335938, "loss": 0.171, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18111345171928406, "rewards/margins": 0.35594743490219116, "rewards/rejected": -0.5370609164237976, "step": 1840 }, { "epoch": 0.35, "learning_rate": 4.0909970437009094e-06, "logits/chosen": -1.7463467121124268, "logits/rejected": -0.9515039324760437, "logps/chosen": -426.24627685546875, "logps/rejected": -742.1693725585938, "loss": 0.1089, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1377401202917099, "rewards/margins": 0.38225287199020386, "rewards/rejected": -0.5199930667877197, "step": 1850 }, { "epoch": 0.35, "learning_rate": 4.078140280750598e-06, "logits/chosen": -1.3167990446090698, "logits/rejected": -0.7928259968757629, "logps/chosen": -464.2420349121094, "logps/rejected": -920.0849609375, "loss": 0.0518, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.21042916178703308, "rewards/margins": 0.4646771550178528, "rewards/rejected": -0.6751063466072083, "step": 1860 }, { "epoch": 0.36, "learning_rate": 4.065213752394478e-06, "logits/chosen": -1.4257256984710693, "logits/rejected": -1.0735701322555542, "logps/chosen": -401.20086669921875, "logps/rejected": -794.1649169921875, "loss": 0.1479, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18912364542484283, "rewards/margins": 0.37552380561828613, "rewards/rejected": -0.5646474361419678, "step": 1870 }, { "epoch": 0.36, "learning_rate": 4.052218030080162e-06, "logits/chosen": -1.4914369583129883, "logits/rejected": -0.9137973785400391, "logps/chosen": -488.27484130859375, "logps/rejected": -823.7702026367188, "loss": 0.1836, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2203754186630249, "rewards/margins": 0.3536859452724457, "rewards/rejected": -0.5740613341331482, "step": 1880 }, { "epoch": 0.36, "learning_rate": 4.039153688314146e-06, "logits/chosen": -1.1514006853103638, "logits/rejected": -0.6609013676643372, "logps/chosen": -412.44091796875, "logps/rejected": -750.6569213867188, "loss": 0.1157, "rewards/accuracies": 0.875, "rewards/chosen": -0.17269805073738098, "rewards/margins": 0.36748582124710083, "rewards/rejected": -0.5401839017868042, "step": 1890 }, { "epoch": 0.36, "learning_rate": 4.026021304636408e-06, "logits/chosen": -1.3765889406204224, "logits/rejected": -0.857066810131073, "logps/chosen": -382.23944091796875, "logps/rejected": -860.0387573242188, "loss": 0.1393, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13182446360588074, "rewards/margins": 0.4192369878292084, "rewards/rejected": -0.5510615110397339, "step": 1900 }, { "epoch": 0.36, "learning_rate": 4.012821459594881e-06, "logits/chosen": -1.6032158136367798, "logits/rejected": -0.890593409538269, "logps/chosen": -431.50830078125, "logps/rejected": -871.9166259765625, "loss": 0.151, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.17882555723190308, "rewards/margins": 0.43978428840637207, "rewards/rejected": -0.6186097860336304, "step": 1910 }, { "epoch": 0.37, "learning_rate": 3.999554736719785e-06, "logits/chosen": -1.435840368270874, "logits/rejected": -0.6874290704727173, "logps/chosen": -489.1138610839844, "logps/rejected": -839.9866333007812, "loss": 0.0999, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20601758360862732, "rewards/margins": 0.4006109833717346, "rewards/rejected": -0.6066285371780396, "step": 1920 }, { "epoch": 0.37, "learning_rate": 3.986221722497832e-06, "logits/chosen": -1.4978009462356567, "logits/rejected": -0.91753751039505, "logps/chosen": -379.32415771484375, "logps/rejected": -669.0810546875, "loss": 0.1397, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08929023891687393, "rewards/margins": 0.3167429268360138, "rewards/rejected": -0.4060331881046295, "step": 1930 }, { "epoch": 0.37, "learning_rate": 3.9728230063463e-06, "logits/chosen": -1.571081280708313, "logits/rejected": -0.8805214166641235, "logps/chosen": -522.83056640625, "logps/rejected": -795.2962646484375, "loss": 0.1523, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17227482795715332, "rewards/margins": 0.3463408350944519, "rewards/rejected": -0.5186156630516052, "step": 1940 }, { "epoch": 0.37, "learning_rate": 3.9593591805869755e-06, "logits/chosen": -1.4741287231445312, "logits/rejected": -0.9440715909004211, "logps/chosen": -412.04083251953125, "logps/rejected": -726.40576171875, "loss": 0.1481, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1741792857646942, "rewards/margins": 0.36757954955101013, "rewards/rejected": -0.5417588949203491, "step": 1950 }, { "epoch": 0.37, "learning_rate": 3.945830840419966e-06, "logits/chosen": -1.4948309659957886, "logits/rejected": -0.78379887342453, "logps/chosen": -537.1199340820312, "logps/rejected": -901.3912353515625, "loss": 0.1092, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18375089764595032, "rewards/margins": 0.4507162570953369, "rewards/rejected": -0.6344671249389648, "step": 1960 }, { "epoch": 0.38, "learning_rate": 3.932238583897395e-06, "logits/chosen": -1.5086297988891602, "logits/rejected": -0.949896514415741, "logps/chosen": -372.39447021484375, "logps/rejected": -758.90478515625, "loss": 0.1295, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11385353654623032, "rewards/margins": 0.40675097703933716, "rewards/rejected": -0.5206045508384705, "step": 1970 }, { "epoch": 0.38, "learning_rate": 3.918583011896955e-06, "logits/chosen": -1.3602194786071777, "logits/rejected": -0.715534508228302, "logps/chosen": -456.2061462402344, "logps/rejected": -817.2261962890625, "loss": 0.1277, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19313010573387146, "rewards/margins": 0.40127572417259216, "rewards/rejected": -0.5944058299064636, "step": 1980 }, { "epoch": 0.38, "learning_rate": 3.904864728095349e-06, "logits/chosen": -1.5365194082260132, "logits/rejected": -1.1429431438446045, "logps/chosen": -446.4228515625, "logps/rejected": -743.8895263671875, "loss": 0.1664, "rewards/accuracies": 0.75, "rewards/chosen": -0.19800862669944763, "rewards/margins": 0.31907951831817627, "rewards/rejected": -0.5170881152153015, "step": 1990 }, { "epoch": 0.38, "learning_rate": 3.891084338941603e-06, "logits/chosen": -1.3897597789764404, "logits/rejected": -0.7664422988891602, "logps/chosen": -603.8766479492188, "logps/rejected": -897.9757690429688, "loss": 0.1212, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2676590085029602, "rewards/margins": 0.37152546644210815, "rewards/rejected": -0.6391844749450684, "step": 2000 }, { "epoch": 0.38, "learning_rate": 3.8772424536302565e-06, "logits/chosen": -1.5130223035812378, "logits/rejected": -0.7249564528465271, "logps/chosen": -579.6700439453125, "logps/rejected": -857.4832153320312, "loss": 0.1189, "rewards/accuracies": 0.875, "rewards/chosen": -0.27131420373916626, "rewards/margins": 0.36394548416137695, "rewards/rejected": -0.6352596879005432, "step": 2010 }, { "epoch": 0.38, "learning_rate": 3.863339684074432e-06, "logits/chosen": -1.472622275352478, "logits/rejected": -0.8101608157157898, "logps/chosen": -504.273193359375, "logps/rejected": -777.370361328125, "loss": 0.1604, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2295054942369461, "rewards/margins": 0.32737064361572266, "rewards/rejected": -0.5568761825561523, "step": 2020 }, { "epoch": 0.39, "learning_rate": 3.849376644878783e-06, "logits/chosen": -1.4834973812103271, "logits/rejected": -0.8286339044570923, "logps/chosen": -503.57244873046875, "logps/rejected": -823.2725830078125, "loss": 0.1232, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15912900865077972, "rewards/margins": 0.3788989186286926, "rewards/rejected": -0.5380278825759888, "step": 2030 }, { "epoch": 0.39, "learning_rate": 3.835353953312322e-06, "logits/chosen": -1.3164693117141724, "logits/rejected": -0.8920178413391113, "logps/chosen": -406.12384033203125, "logps/rejected": -757.328857421875, "loss": 0.151, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1965499371290207, "rewards/margins": 0.3058350384235382, "rewards/rejected": -0.5023850202560425, "step": 2040 }, { "epoch": 0.39, "learning_rate": 3.821272229281139e-06, "logits/chosen": -1.697614073753357, "logits/rejected": -1.1096159219741821, "logps/chosen": -452.93994140625, "logps/rejected": -718.0389404296875, "loss": 0.159, "rewards/accuracies": 0.75, "rewards/chosen": -0.165342777967453, "rewards/margins": 0.32132482528686523, "rewards/rejected": -0.48666754364967346, "step": 2050 }, { "epoch": 0.39, "learning_rate": 3.8071320953009906e-06, "logits/chosen": -1.6494042873382568, "logits/rejected": -1.0395376682281494, "logps/chosen": -371.68634033203125, "logps/rejected": -667.0049438476562, "loss": 0.185, "rewards/accuracies": 0.75, "rewards/chosen": -0.12940135598182678, "rewards/margins": 0.3338983654975891, "rewards/rejected": -0.4632996916770935, "step": 2060 }, { "epoch": 0.39, "learning_rate": 3.792934176469782e-06, "logits/chosen": -1.5945041179656982, "logits/rejected": -1.0604790449142456, "logps/chosen": -431.1544494628906, "logps/rejected": -749.507568359375, "loss": 0.1422, "rewards/accuracies": 0.875, "rewards/chosen": -0.14418260753154755, "rewards/margins": 0.3612174689769745, "rewards/rejected": -0.5054000616073608, "step": 2070 }, { "epoch": 0.4, "learning_rate": 3.7786791004399353e-06, "logits/chosen": -1.569284200668335, "logits/rejected": -1.0308313369750977, "logps/chosen": -459.44879150390625, "logps/rejected": -814.486328125, "loss": 0.112, "rewards/accuracies": 0.875, "rewards/chosen": -0.19183216989040375, "rewards/margins": 0.38468846678733826, "rewards/rejected": -0.5765206217765808, "step": 2080 }, { "epoch": 0.4, "learning_rate": 3.764367497390642e-06, "logits/chosen": -1.2742561101913452, "logits/rejected": -0.921722412109375, "logps/chosen": -513.5171508789062, "logps/rejected": -870.3267822265625, "loss": 0.158, "rewards/accuracies": 0.875, "rewards/chosen": -0.23014836013317108, "rewards/margins": 0.345438688993454, "rewards/rejected": -0.5755869746208191, "step": 2090 }, { "epoch": 0.4, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.3299344778060913, "logits/rejected": -1.0471184253692627, "logps/chosen": -482.66851806640625, "logps/rejected": -798.5118408203125, "loss": 0.1466, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23691225051879883, "rewards/margins": 0.32488545775413513, "rewards/rejected": -0.5617977380752563, "step": 2100 }, { "epoch": 0.4, "learning_rate": 3.7355772434170523e-06, "logits/chosen": -1.4452104568481445, "logits/rejected": -0.8449694514274597, "logps/chosen": -471.0455627441406, "logps/rejected": -711.90087890625, "loss": 0.1545, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20376411080360413, "rewards/margins": 0.2989460527896881, "rewards/rejected": -0.5027101635932922, "step": 2110 }, { "epoch": 0.4, "learning_rate": 3.7210998652337016e-06, "logits/chosen": -1.723384141921997, "logits/rejected": -1.089850664138794, "logps/chosen": -453.136962890625, "logps/rejected": -806.6472778320312, "loss": 0.0844, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20005285739898682, "rewards/margins": 0.3868790864944458, "rewards/rejected": -0.5869318842887878, "step": 2120 }, { "epoch": 0.41, "learning_rate": 3.7065685054565277e-06, "logits/chosen": -1.6450010538101196, "logits/rejected": -0.8151381611824036, "logps/chosen": -527.9044799804688, "logps/rejected": -858.47265625, "loss": 0.1405, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18734359741210938, "rewards/margins": 0.39840954542160034, "rewards/rejected": -0.5857530832290649, "step": 2130 }, { "epoch": 0.41, "learning_rate": 3.691983806478494e-06, "logits/chosen": -1.5075774192810059, "logits/rejected": -1.223270058631897, "logps/chosen": -428.3919982910156, "logps/rejected": -777.3948974609375, "loss": 0.1382, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21032114326953888, "rewards/margins": 0.3191841244697571, "rewards/rejected": -0.5295053124427795, "step": 2140 }, { "epoch": 0.41, "learning_rate": 3.677346413050551e-06, "logits/chosen": -1.3537256717681885, "logits/rejected": -0.9050661325454712, "logps/chosen": -446.18646240234375, "logps/rejected": -862.1671752929688, "loss": 0.1144, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16960960626602173, "rewards/margins": 0.41824159026145935, "rewards/rejected": -0.5878511071205139, "step": 2150 }, { "epoch": 0.41, "learning_rate": 3.6626569722531268e-06, "logits/chosen": -1.3302949666976929, "logits/rejected": -0.9915385246276855, "logps/chosen": -365.4180603027344, "logps/rejected": -839.2130737304688, "loss": 0.13, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.12275992333889008, "rewards/margins": 0.43054452538490295, "rewards/rejected": -0.5533044934272766, "step": 2160 }, { "epoch": 0.41, "learning_rate": 3.6479161334675294e-06, "logits/chosen": -1.3166197538375854, "logits/rejected": -0.9454595446586609, "logps/chosen": -344.5181579589844, "logps/rejected": -900.7305908203125, "loss": 0.0661, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.14482378959655762, "rewards/margins": 0.4969552159309387, "rewards/rejected": -0.6417790651321411, "step": 2170 }, { "epoch": 0.42, "learning_rate": 3.6331245483472353e-06, "logits/chosen": -1.5919032096862793, "logits/rejected": -1.0869430303573608, "logps/chosen": -442.857421875, "logps/rejected": -775.5303344726562, "loss": 0.1115, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16661079227924347, "rewards/margins": 0.38238829374313354, "rewards/rejected": -0.5489991307258606, "step": 2180 }, { "epoch": 0.42, "learning_rate": 3.6182828707890816e-06, "logits/chosen": -1.4749866724014282, "logits/rejected": -1.0844981670379639, "logps/chosen": -376.8042907714844, "logps/rejected": -815.3419189453125, "loss": 0.1582, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1809140145778656, "rewards/margins": 0.4053187370300293, "rewards/rejected": -0.5862327814102173, "step": 2190 }, { "epoch": 0.42, "learning_rate": 3.6033917569043604e-06, "logits/chosen": -1.4776077270507812, "logits/rejected": -1.0328203439712524, "logps/chosen": -326.08050537109375, "logps/rejected": -754.1819458007812, "loss": 0.0787, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11942918598651886, "rewards/margins": 0.43238258361816406, "rewards/rejected": -0.5518117547035217, "step": 2200 }, { "epoch": 0.42, "learning_rate": 3.588451864989811e-06, "logits/chosen": -1.3959901332855225, "logits/rejected": -0.9821311831474304, "logps/chosen": -401.9287414550781, "logps/rejected": -789.0911865234375, "loss": 0.1135, "rewards/accuracies": 0.875, "rewards/chosen": -0.15951037406921387, "rewards/margins": 0.36037665605545044, "rewards/rejected": -0.5198870301246643, "step": 2210 }, { "epoch": 0.42, "learning_rate": 3.5734638554985234e-06, "logits/chosen": -1.5786325931549072, "logits/rejected": -0.9880353808403015, "logps/chosen": -483.30474853515625, "logps/rejected": -711.1782836914062, "loss": 0.1822, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17052625119686127, "rewards/margins": 0.332608163356781, "rewards/rejected": -0.5031344890594482, "step": 2220 }, { "epoch": 0.42, "learning_rate": 3.5584283910107343e-06, "logits/chosen": -1.2413654327392578, "logits/rejected": -0.7022978663444519, "logps/chosen": -418.43896484375, "logps/rejected": -816.5313720703125, "loss": 0.1195, "rewards/accuracies": 0.875, "rewards/chosen": -0.16614460945129395, "rewards/margins": 0.4376896321773529, "rewards/rejected": -0.6038342714309692, "step": 2230 }, { "epoch": 0.43, "learning_rate": 3.543346136204545e-06, "logits/chosen": -1.4568989276885986, "logits/rejected": -0.8339802622795105, "logps/chosen": -459.564453125, "logps/rejected": -706.9010620117188, "loss": 0.1976, "rewards/accuracies": 0.75, "rewards/chosen": -0.2152753323316574, "rewards/margins": 0.2935459017753601, "rewards/rejected": -0.5088212490081787, "step": 2240 }, { "epoch": 0.43, "learning_rate": 3.5282177578265295e-06, "logits/chosen": -1.4024317264556885, "logits/rejected": -1.0421273708343506, "logps/chosen": -419.47906494140625, "logps/rejected": -856.0556640625, "loss": 0.0961, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18188200891017914, "rewards/margins": 0.4091735780239105, "rewards/rejected": -0.5910555124282837, "step": 2250 }, { "epoch": 0.43, "learning_rate": 3.5130439246622635e-06, "logits/chosen": -1.320522665977478, "logits/rejected": -0.9211158752441406, "logps/chosen": -483.330322265625, "logps/rejected": -861.43115234375, "loss": 0.0711, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22316622734069824, "rewards/margins": 0.4078037142753601, "rewards/rejected": -0.6309698820114136, "step": 2260 }, { "epoch": 0.43, "learning_rate": 3.497825307506758e-06, "logits/chosen": -1.4052175283432007, "logits/rejected": -0.963665783405304, "logps/chosen": -456.56231689453125, "logps/rejected": -938.4904174804688, "loss": 0.1624, "rewards/accuracies": 0.875, "rewards/chosen": -0.23595952987670898, "rewards/margins": 0.40208953619003296, "rewards/rejected": -0.6380491256713867, "step": 2270 }, { "epoch": 0.43, "learning_rate": 3.4825625791348093e-06, "logits/chosen": -1.4312849044799805, "logits/rejected": -0.7254976034164429, "logps/chosen": -507.756591796875, "logps/rejected": -794.6476440429688, "loss": 0.1988, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22778639197349548, "rewards/margins": 0.3561779856681824, "rewards/rejected": -0.583964467048645, "step": 2280 }, { "epoch": 0.44, "learning_rate": 3.467256414271249e-06, "logits/chosen": -1.4247713088989258, "logits/rejected": -0.8696607351303101, "logps/chosen": -435.68536376953125, "logps/rejected": -820.4193115234375, "loss": 0.1053, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18320433795452118, "rewards/margins": 0.38653475046157837, "rewards/rejected": -0.5697391033172607, "step": 2290 }, { "epoch": 0.44, "learning_rate": 3.4519074895611245e-06, "logits/chosen": -1.3859965801239014, "logits/rejected": -0.9295135736465454, "logps/chosen": -445.33648681640625, "logps/rejected": -792.642822265625, "loss": 0.0741, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.16965916752815247, "rewards/margins": 0.37729209661483765, "rewards/rejected": -0.5469512343406677, "step": 2300 }, { "epoch": 0.44, "learning_rate": 3.436516483539781e-06, "logits/chosen": -1.523033857345581, "logits/rejected": -0.8833671808242798, "logps/chosen": -464.05181884765625, "logps/rejected": -727.8582763671875, "loss": 0.1405, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1917046308517456, "rewards/margins": 0.337018758058548, "rewards/rejected": -0.5287233591079712, "step": 2310 }, { "epoch": 0.44, "learning_rate": 3.421084076602867e-06, "logits/chosen": -1.4240258932113647, "logits/rejected": -0.863364040851593, "logps/chosen": -426.357666015625, "logps/rejected": -693.9955444335938, "loss": 0.1258, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.165890172123909, "rewards/margins": 0.3513902723789215, "rewards/rejected": -0.5172804594039917, "step": 2320 }, { "epoch": 0.44, "learning_rate": 3.405610950976257e-06, "logits/chosen": -1.2388966083526611, "logits/rejected": -0.9140247106552124, "logps/chosen": -453.939697265625, "logps/rejected": -819.9384765625, "loss": 0.1048, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20850443840026855, "rewards/margins": 0.359141081571579, "rewards/rejected": -0.5676454901695251, "step": 2330 }, { "epoch": 0.45, "learning_rate": 3.3900977906858923e-06, "logits/chosen": -1.4696143865585327, "logits/rejected": -0.9584190249443054, "logps/chosen": -466.117431640625, "logps/rejected": -714.6159057617188, "loss": 0.1581, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17706574499607086, "rewards/margins": 0.33817774057388306, "rewards/rejected": -0.5152435302734375, "step": 2340 }, { "epoch": 0.45, "learning_rate": 3.3745452815275375e-06, "logits/chosen": -1.289514183998108, "logits/rejected": -0.9463627934455872, "logps/chosen": -371.3036804199219, "logps/rejected": -765.1273193359375, "loss": 0.1176, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13886871933937073, "rewards/margins": 0.3995223939418793, "rewards/rejected": -0.53839111328125, "step": 2350 }, { "epoch": 0.45, "learning_rate": 3.3589541110364678e-06, "logits/chosen": -1.2370373010635376, "logits/rejected": -0.6820043325424194, "logps/chosen": -522.7747192382812, "logps/rejected": -875.4880981445312, "loss": 0.1115, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2540190815925598, "rewards/margins": 0.3914617598056793, "rewards/rejected": -0.6454808712005615, "step": 2360 }, { "epoch": 0.45, "learning_rate": 3.3433249684570757e-06, "logits/chosen": -1.5264694690704346, "logits/rejected": -0.9731807708740234, "logps/chosen": -439.20184326171875, "logps/rejected": -872.1959838867188, "loss": 0.136, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19613762199878693, "rewards/margins": 0.44037193059921265, "rewards/rejected": -0.6365095376968384, "step": 2370 }, { "epoch": 0.45, "learning_rate": 3.3276585447123957e-06, "logits/chosen": -1.5532701015472412, "logits/rejected": -0.9050699472427368, "logps/chosen": -556.3533935546875, "logps/rejected": -820.2437744140625, "loss": 0.1493, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2102535218000412, "rewards/margins": 0.3648611903190613, "rewards/rejected": -0.5751147270202637, "step": 2380 }, { "epoch": 0.46, "learning_rate": 3.3119555323735664e-06, "logits/chosen": -1.1724474430084229, "logits/rejected": -1.1024209260940552, "logps/chosen": -386.05987548828125, "logps/rejected": -792.771240234375, "loss": 0.1179, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20424416661262512, "rewards/margins": 0.36083582043647766, "rewards/rejected": -0.5650800466537476, "step": 2390 }, { "epoch": 0.46, "learning_rate": 3.2962166256292116e-06, "logits/chosen": -1.1040351390838623, "logits/rejected": -0.9207507967948914, "logps/chosen": -488.7312927246094, "logps/rejected": -954.8610229492188, "loss": 0.0996, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2594430446624756, "rewards/margins": 0.4115273952484131, "rewards/rejected": -0.6709704995155334, "step": 2400 }, { "epoch": 0.46, "learning_rate": 3.2804425202547494e-06, "logits/chosen": -1.128282904624939, "logits/rejected": -0.9015064239501953, "logps/chosen": -530.8614501953125, "logps/rejected": -890.9010620117188, "loss": 0.1558, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31275075674057007, "rewards/margins": 0.30815526843070984, "rewards/rejected": -0.6209059953689575, "step": 2410 }, { "epoch": 0.46, "learning_rate": 3.2646339135816386e-06, "logits/chosen": -1.144333004951477, "logits/rejected": -0.7270548939704895, "logps/chosen": -543.6788330078125, "logps/rejected": -883.6707763671875, "loss": 0.1397, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.28174489736557007, "rewards/margins": 0.32797712087631226, "rewards/rejected": -0.6097220182418823, "step": 2420 }, { "epoch": 0.46, "learning_rate": 3.2487915044665485e-06, "logits/chosen": -1.3582704067230225, "logits/rejected": -0.6314566135406494, "logps/chosen": -522.9893798828125, "logps/rejected": -895.6492309570312, "loss": 0.0814, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.20856209099292755, "rewards/margins": 0.432664155960083, "rewards/rejected": -0.6412262320518494, "step": 2430 }, { "epoch": 0.46, "learning_rate": 3.2329159932604638e-06, "logits/chosen": -1.405543565750122, "logits/rejected": -0.7963299751281738, "logps/chosen": -490.7499084472656, "logps/rejected": -815.951904296875, "loss": 0.1283, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18544915318489075, "rewards/margins": 0.3879033625125885, "rewards/rejected": -0.5733525156974792, "step": 2440 }, { "epoch": 0.47, "learning_rate": 3.217008081777726e-06, "logits/chosen": -1.344856858253479, "logits/rejected": -0.7599430680274963, "logps/chosen": -535.1017456054688, "logps/rejected": -859.4102783203125, "loss": 0.1267, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20572932064533234, "rewards/margins": 0.4008665084838867, "rewards/rejected": -0.6065958738327026, "step": 2450 }, { "epoch": 0.47, "learning_rate": 3.201068473265007e-06, "logits/chosen": -1.2307863235473633, "logits/rejected": -1.0346344709396362, "logps/chosen": -398.70684814453125, "logps/rejected": -804.5445556640625, "loss": 0.1037, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1733686625957489, "rewards/margins": 0.38196641206741333, "rewards/rejected": -0.5553351044654846, "step": 2460 }, { "epoch": 0.47, "learning_rate": 3.1850978723702213e-06, "logits/chosen": -1.254408597946167, "logits/rejected": -0.6472093462944031, "logps/chosen": -360.87603759765625, "logps/rejected": -761.4263916015625, "loss": 0.1173, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.14552150666713715, "rewards/margins": 0.4115825295448303, "rewards/rejected": -0.5571039915084839, "step": 2470 }, { "epoch": 0.47, "learning_rate": 3.1690969851113724e-06, "logits/chosen": -1.2257192134857178, "logits/rejected": -0.8046920895576477, "logps/chosen": -441.1246643066406, "logps/rejected": -777.4317626953125, "loss": 0.1368, "rewards/accuracies": 0.875, "rewards/chosen": -0.20118165016174316, "rewards/margins": 0.33475762605667114, "rewards/rejected": -0.5359392166137695, "step": 2480 }, { "epoch": 0.47, "learning_rate": 3.1530665188453463e-06, "logits/chosen": -1.3253055810928345, "logits/rejected": -0.8583431243896484, "logps/chosen": -451.05047607421875, "logps/rejected": -876.6774291992188, "loss": 0.0748, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17275722324848175, "rewards/margins": 0.44297927618026733, "rewards/rejected": -0.6157364845275879, "step": 2490 }, { "epoch": 0.48, "learning_rate": 3.137007182236637e-06, "logits/chosen": -1.2480649948120117, "logits/rejected": -0.7860890626907349, "logps/chosen": -461.1922912597656, "logps/rejected": -869.2349853515625, "loss": 0.1784, "rewards/accuracies": 0.875, "rewards/chosen": -0.18814244866371155, "rewards/margins": 0.37167888879776, "rewards/rejected": -0.5598213076591492, "step": 2500 }, { "epoch": 0.48, "learning_rate": 3.1209196852260204e-06, "logits/chosen": -1.3208725452423096, "logits/rejected": -0.744006335735321, "logps/chosen": -523.9013671875, "logps/rejected": -777.8002319335938, "loss": 0.1543, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2342323362827301, "rewards/margins": 0.323478639125824, "rewards/rejected": -0.5577110052108765, "step": 2510 }, { "epoch": 0.48, "learning_rate": 3.1048047389991693e-06, "logits/chosen": -1.2717176675796509, "logits/rejected": -0.9031423330307007, "logps/chosen": -452.52947998046875, "logps/rejected": -807.1881713867188, "loss": 0.169, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2156309187412262, "rewards/margins": 0.36098700761795044, "rewards/rejected": -0.576617956161499, "step": 2520 }, { "epoch": 0.48, "learning_rate": 3.0886630559552144e-06, "logits/chosen": -1.6132745742797852, "logits/rejected": -0.9631025195121765, "logps/chosen": -513.3320922851562, "logps/rejected": -903.8419189453125, "loss": 0.1101, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2295852154493332, "rewards/margins": 0.4146207273006439, "rewards/rejected": -0.6442059278488159, "step": 2530 }, { "epoch": 0.48, "learning_rate": 3.072495349675249e-06, "logits/chosen": -1.3435112237930298, "logits/rejected": -0.7896081209182739, "logps/chosen": -482.578857421875, "logps/rejected": -837.3626708984375, "loss": 0.1587, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20463089644908905, "rewards/margins": 0.3846661448478699, "rewards/rejected": -0.5892971158027649, "step": 2540 }, { "epoch": 0.49, "learning_rate": 3.056302334890786e-06, "logits/chosen": -1.2083903551101685, "logits/rejected": -0.7614855766296387, "logps/chosen": -493.5707092285156, "logps/rejected": -799.3251953125, "loss": 0.1548, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21667031943798065, "rewards/margins": 0.3692856431007385, "rewards/rejected": -0.5859559774398804, "step": 2550 }, { "epoch": 0.49, "learning_rate": 3.04008472745216e-06, "logits/chosen": -1.1920064687728882, "logits/rejected": -0.9130862951278687, "logps/chosen": -469.92877197265625, "logps/rejected": -890.9716796875, "loss": 0.1503, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22620613873004913, "rewards/margins": 0.38288000226020813, "rewards/rejected": -0.6090861558914185, "step": 2560 }, { "epoch": 0.49, "learning_rate": 3.0238432442968803e-06, "logits/chosen": -1.672978162765503, "logits/rejected": -1.0872669219970703, "logps/chosen": -467.4820251464844, "logps/rejected": -860.1666870117188, "loss": 0.2054, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.205267995595932, "rewards/margins": 0.3794306516647339, "rewards/rejected": -0.5846985578536987, "step": 2570 }, { "epoch": 0.49, "learning_rate": 3.0075786034179407e-06, "logits/chosen": -1.3752741813659668, "logits/rejected": -0.9237263798713684, "logps/chosen": -435.983642578125, "logps/rejected": -821.51171875, "loss": 0.1441, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17119482159614563, "rewards/margins": 0.40766802430152893, "rewards/rejected": -0.5788628458976746, "step": 2580 }, { "epoch": 0.49, "learning_rate": 2.9912915238320755e-06, "logits/chosen": -1.3044788837432861, "logits/rejected": -0.7048360109329224, "logps/chosen": -479.43719482421875, "logps/rejected": -723.38525390625, "loss": 0.1693, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18799373507499695, "rewards/margins": 0.3109305500984192, "rewards/rejected": -0.4989243149757385, "step": 2590 }, { "epoch": 0.5, "learning_rate": 2.974982725547976e-06, "logits/chosen": -1.43780517578125, "logits/rejected": -0.7967668175697327, "logps/chosen": -426.555419921875, "logps/rejected": -832.5520629882812, "loss": 0.1024, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.17962546646595, "rewards/margins": 0.40819692611694336, "rewards/rejected": -0.5878223180770874, "step": 2600 }, { "epoch": 0.5, "learning_rate": 2.958652929534456e-06, "logits/chosen": -1.35874342918396, "logits/rejected": -0.9064292907714844, "logps/chosen": -517.111083984375, "logps/rejected": -766.7357177734375, "loss": 0.2069, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21054640412330627, "rewards/margins": 0.2949979901313782, "rewards/rejected": -0.5055444240570068, "step": 2610 }, { "epoch": 0.5, "learning_rate": 2.9423028576885894e-06, "logits/chosen": -1.2635751962661743, "logits/rejected": -0.901788055896759, "logps/chosen": -391.90875244140625, "logps/rejected": -699.8291625976562, "loss": 0.1595, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19072213768959045, "rewards/margins": 0.31554335355758667, "rewards/rejected": -0.5062655210494995, "step": 2620 }, { "epoch": 0.5, "learning_rate": 2.9259332328037852e-06, "logits/chosen": -1.2277098894119263, "logits/rejected": -0.6430261135101318, "logps/chosen": -400.28375244140625, "logps/rejected": -710.203369140625, "loss": 0.0919, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16125573217868805, "rewards/margins": 0.3740059435367584, "rewards/rejected": -0.5352616906166077, "step": 2630 }, { "epoch": 0.5, "learning_rate": 2.9095447785378446e-06, "logits/chosen": -1.2901400327682495, "logits/rejected": -0.7990323305130005, "logps/chosen": -350.6937561035156, "logps/rejected": -716.0798950195312, "loss": 0.1187, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1526123285293579, "rewards/margins": 0.3751480281352997, "rewards/rejected": -0.52776038646698, "step": 2640 }, { "epoch": 0.5, "learning_rate": 2.893138219380964e-06, "logits/chosen": -1.4287517070770264, "logits/rejected": -1.139864444732666, "logps/chosen": -383.1647033691406, "logps/rejected": -838.623046875, "loss": 0.1509, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1482759416103363, "rewards/margins": 0.38814201951026917, "rewards/rejected": -0.5364179015159607, "step": 2650 }, { "epoch": 0.51, "learning_rate": 2.876714280623708e-06, "logits/chosen": -1.4933322668075562, "logits/rejected": -0.9903583526611328, "logps/chosen": -309.16656494140625, "logps/rejected": -721.7738647460938, "loss": 0.1211, "rewards/accuracies": 0.875, "rewards/chosen": -0.09150397032499313, "rewards/margins": 0.4101769030094147, "rewards/rejected": -0.5016809105873108, "step": 2660 }, { "epoch": 0.51, "learning_rate": 2.8602736883249504e-06, "logits/chosen": -1.3666273355484009, "logits/rejected": -1.0327959060668945, "logps/chosen": -398.209716796875, "logps/rejected": -816.3187866210938, "loss": 0.1232, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.13451527059078217, "rewards/margins": 0.4265384078025818, "rewards/rejected": -0.5610536932945251, "step": 2670 }, { "epoch": 0.51, "learning_rate": 2.843817169279772e-06, "logits/chosen": -1.4964332580566406, "logits/rejected": -0.7607043981552124, "logps/chosen": -439.7552185058594, "logps/rejected": -774.8445434570312, "loss": 0.1113, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15827129781246185, "rewards/margins": 0.42455607652664185, "rewards/rejected": -0.5828274488449097, "step": 2680 }, { "epoch": 0.51, "learning_rate": 2.8273454509873333e-06, "logits/chosen": -1.4142652750015259, "logits/rejected": -0.8390611410140991, "logps/chosen": -426.1036071777344, "logps/rejected": -777.2252807617188, "loss": 0.1042, "rewards/accuracies": 0.875, "rewards/chosen": -0.16529937088489532, "rewards/margins": 0.4135681688785553, "rewards/rejected": -0.5788675546646118, "step": 2690 }, { "epoch": 0.51, "learning_rate": 2.8108592616187135e-06, "logits/chosen": -1.5498806238174438, "logits/rejected": -0.8601928949356079, "logps/chosen": -501.33416748046875, "logps/rejected": -877.6973876953125, "loss": 0.0862, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16485340893268585, "rewards/margins": 0.443823903799057, "rewards/rejected": -0.608677327632904, "step": 2700 }, { "epoch": 0.52, "learning_rate": 2.7943593299847186e-06, "logits/chosen": -1.3238379955291748, "logits/rejected": -0.7528778314590454, "logps/chosen": -387.79180908203125, "logps/rejected": -844.4595947265625, "loss": 0.1042, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.13337145745754242, "rewards/margins": 0.4644166827201843, "rewards/rejected": -0.5977880954742432, "step": 2710 }, { "epoch": 0.52, "learning_rate": 2.7778463855036656e-06, "logits/chosen": -1.409462571144104, "logits/rejected": -1.0474766492843628, "logps/chosen": -417.47100830078125, "logps/rejected": -823.1915893554688, "loss": 0.1305, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18547403812408447, "rewards/margins": 0.3866049647331238, "rewards/rejected": -0.5720790028572083, "step": 2720 }, { "epoch": 0.52, "learning_rate": 2.761321158169134e-06, "logits/chosen": -1.6865055561065674, "logits/rejected": -0.799423098564148, "logps/chosen": -366.95648193359375, "logps/rejected": -715.5989990234375, "loss": 0.0931, "rewards/accuracies": 0.875, "rewards/chosen": -0.12677359580993652, "rewards/margins": 0.40284356474876404, "rewards/rejected": -0.5296172499656677, "step": 2730 }, { "epoch": 0.52, "learning_rate": 2.7447843785176958e-06, "logits/chosen": -1.4143977165222168, "logits/rejected": -0.8999509811401367, "logps/chosen": -478.70880126953125, "logps/rejected": -663.4302978515625, "loss": 0.2503, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18810780346393585, "rewards/margins": 0.27217328548431396, "rewards/rejected": -0.4602810740470886, "step": 2740 }, { "epoch": 0.52, "learning_rate": 2.728236777596621e-06, "logits/chosen": -1.5873699188232422, "logits/rejected": -0.9471092224121094, "logps/chosen": -425.58740234375, "logps/rejected": -758.2723999023438, "loss": 0.1842, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14836880564689636, "rewards/margins": 0.34633857011795044, "rewards/rejected": -0.4947074055671692, "step": 2750 }, { "epoch": 0.53, "learning_rate": 2.7116790869315583e-06, "logits/chosen": -1.442857265472412, "logits/rejected": -1.0407084226608276, "logps/chosen": -436.34124755859375, "logps/rejected": -738.1000366210938, "loss": 0.1919, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18492010235786438, "rewards/margins": 0.2991054356098175, "rewards/rejected": -0.4840255379676819, "step": 2760 }, { "epoch": 0.53, "learning_rate": 2.695112038494198e-06, "logits/chosen": -1.3978681564331055, "logits/rejected": -0.9016796946525574, "logps/chosen": -418.29351806640625, "logps/rejected": -726.4403686523438, "loss": 0.1329, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12680137157440186, "rewards/margins": 0.369097501039505, "rewards/rejected": -0.49589887261390686, "step": 2770 }, { "epoch": 0.53, "learning_rate": 2.6785363646699125e-06, "logits/chosen": -1.7985395193099976, "logits/rejected": -1.133002519607544, "logps/chosen": -400.7041320800781, "logps/rejected": -686.7782592773438, "loss": 0.1518, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11872676759958267, "rewards/margins": 0.3131948411464691, "rewards/rejected": -0.4319216310977936, "step": 2780 }, { "epoch": 0.53, "learning_rate": 2.6619527982253796e-06, "logits/chosen": -1.6900510787963867, "logits/rejected": -1.206436038017273, "logps/chosen": -384.26641845703125, "logps/rejected": -766.3925170898438, "loss": 0.1109, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1556965410709381, "rewards/margins": 0.3801839351654053, "rewards/rejected": -0.535880446434021, "step": 2790 }, { "epoch": 0.53, "learning_rate": 2.6453620722761897e-06, "logits/chosen": -1.5424431562423706, "logits/rejected": -1.2472230195999146, "logps/chosen": -439.0528869628906, "logps/rejected": -720.8370361328125, "loss": 0.1705, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22447606921195984, "rewards/margins": 0.2818499505519867, "rewards/rejected": -0.5063260197639465, "step": 2800 }, { "epoch": 0.54, "learning_rate": 2.628764920254435e-06, "logits/chosen": -1.3175987005233765, "logits/rejected": -0.9514663815498352, "logps/chosen": -438.6607360839844, "logps/rejected": -754.0586547851562, "loss": 0.132, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2103569060564041, "rewards/margins": 0.3280227780342102, "rewards/rejected": -0.5383796691894531, "step": 2810 }, { "epoch": 0.54, "learning_rate": 2.6121620758762877e-06, "logits/chosen": -1.3671855926513672, "logits/rejected": -0.9159896969795227, "logps/chosen": -489.04180908203125, "logps/rejected": -868.951171875, "loss": 0.1518, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20348939299583435, "rewards/margins": 0.3958008885383606, "rewards/rejected": -0.5992902517318726, "step": 2820 }, { "epoch": 0.54, "learning_rate": 2.595554273109564e-06, "logits/chosen": -1.5791618824005127, "logits/rejected": -0.9889088869094849, "logps/chosen": -479.42156982421875, "logps/rejected": -851.93017578125, "loss": 0.1121, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20812928676605225, "rewards/margins": 0.41650962829589844, "rewards/rejected": -0.6246390342712402, "step": 2830 }, { "epoch": 0.54, "learning_rate": 2.5789422461412776e-06, "logits/chosen": -1.4336564540863037, "logits/rejected": -0.9394222497940063, "logps/chosen": -521.36376953125, "logps/rejected": -932.8818359375, "loss": 0.0961, "rewards/accuracies": 0.875, "rewards/chosen": -0.22008386254310608, "rewards/margins": 0.41366496682167053, "rewards/rejected": -0.6337487697601318, "step": 2840 }, { "epoch": 0.54, "learning_rate": 2.5623267293451827e-06, "logits/chosen": -1.578136682510376, "logits/rejected": -0.938866138458252, "logps/chosen": -473.4164123535156, "logps/rejected": -816.0501098632812, "loss": 0.1097, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1769019067287445, "rewards/margins": 0.4292300343513489, "rewards/rejected": -0.6061318516731262, "step": 2850 }, { "epoch": 0.54, "learning_rate": 2.5457084572493094e-06, "logits/chosen": -1.7006841897964478, "logits/rejected": -1.0005595684051514, "logps/chosen": -452.9979553222656, "logps/rejected": -733.2164916992188, "loss": 0.1356, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11702127754688263, "rewards/margins": 0.37832674384117126, "rewards/rejected": -0.4953480362892151, "step": 2860 }, { "epoch": 0.55, "learning_rate": 2.5290881645034932e-06, "logits/chosen": -1.5898138284683228, "logits/rejected": -0.9643374681472778, "logps/chosen": -439.3221130371094, "logps/rejected": -822.1959228515625, "loss": 0.1114, "rewards/accuracies": 0.875, "rewards/chosen": -0.17275038361549377, "rewards/margins": 0.43534818291664124, "rewards/rejected": -0.608098566532135, "step": 2870 }, { "epoch": 0.55, "learning_rate": 2.5124665858468956e-06, "logits/chosen": -1.5048192739486694, "logits/rejected": -1.026029348373413, "logps/chosen": -449.2183532714844, "logps/rejected": -727.6793212890625, "loss": 0.158, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1632029116153717, "rewards/margins": 0.3238483965396881, "rewards/rejected": -0.4870513081550598, "step": 2880 }, { "epoch": 0.55, "learning_rate": 2.4958444560755268e-06, "logits/chosen": -1.4397449493408203, "logits/rejected": -0.8777030110359192, "logps/chosen": -450.47406005859375, "logps/rejected": -793.674072265625, "loss": 0.2757, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17526718974113464, "rewards/margins": 0.4246237277984619, "rewards/rejected": -0.5998908877372742, "step": 2890 }, { "epoch": 0.55, "learning_rate": 2.479222510009758e-06, "logits/chosen": -1.3214969635009766, "logits/rejected": -0.7859054803848267, "logps/chosen": -610.2854614257812, "logps/rejected": -943.8984375, "loss": 0.0738, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22706536948680878, "rewards/margins": 0.4159700274467468, "rewards/rejected": -0.6430354118347168, "step": 2900 }, { "epoch": 0.55, "learning_rate": 2.4626014824618418e-06, "logits/chosen": -1.3536314964294434, "logits/rejected": -1.0574783086776733, "logps/chosen": -475.11676025390625, "logps/rejected": -789.6507568359375, "loss": 0.1278, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24545446038246155, "rewards/margins": 0.3121533989906311, "rewards/rejected": -0.5576078295707703, "step": 2910 }, { "epoch": 0.56, "learning_rate": 2.445982108203422e-06, "logits/chosen": -1.6086533069610596, "logits/rejected": -0.9352880716323853, "logps/chosen": -512.791259765625, "logps/rejected": -846.0035400390625, "loss": 0.0639, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.20734083652496338, "rewards/margins": 0.4228171706199646, "rewards/rejected": -0.6301580667495728, "step": 2920 }, { "epoch": 0.56, "learning_rate": 2.4293651219330614e-06, "logits/chosen": -1.1689852476119995, "logits/rejected": -0.6801749467849731, "logps/chosen": -340.7318115234375, "logps/rejected": -687.781494140625, "loss": 0.1497, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.12665708363056183, "rewards/margins": 0.37592196464538574, "rewards/rejected": -0.5025790929794312, "step": 2930 }, { "epoch": 0.56, "learning_rate": 2.4127512582437486e-06, "logits/chosen": -1.4409878253936768, "logits/rejected": -0.9979079961776733, "logps/chosen": -396.3074645996094, "logps/rejected": -764.2388916015625, "loss": 0.1157, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17770974338054657, "rewards/margins": 0.3630302846431732, "rewards/rejected": -0.5407400727272034, "step": 2940 }, { "epoch": 0.56, "learning_rate": 2.3961412515904337e-06, "logits/chosen": -1.4215434789657593, "logits/rejected": -0.7983264327049255, "logps/chosen": -519.9447021484375, "logps/rejected": -903.4754638671875, "loss": 0.0901, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2369469404220581, "rewards/margins": 0.4165986180305481, "rewards/rejected": -0.6535454988479614, "step": 2950 }, { "epoch": 0.56, "learning_rate": 2.3795358362575618e-06, "logits/chosen": -1.5344135761260986, "logits/rejected": -0.9927012324333191, "logps/chosen": -483.18701171875, "logps/rejected": -713.0413818359375, "loss": 0.1953, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23308631777763367, "rewards/margins": 0.26673564314842224, "rewards/rejected": -0.4998219907283783, "step": 2960 }, { "epoch": 0.57, "learning_rate": 2.3629357463266e-06, "logits/chosen": -1.278691291809082, "logits/rejected": -0.6941768527030945, "logps/chosen": -529.7501220703125, "logps/rejected": -935.9373168945312, "loss": 0.0946, "rewards/accuracies": 0.875, "rewards/chosen": -0.2584139406681061, "rewards/margins": 0.39817047119140625, "rewards/rejected": -0.6565844416618347, "step": 2970 }, { "epoch": 0.57, "learning_rate": 2.346341715643601e-06, "logits/chosen": -1.3031790256500244, "logits/rejected": -0.7735547423362732, "logps/chosen": -473.7268981933594, "logps/rejected": -891.9313354492188, "loss": 0.1105, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2315966635942459, "rewards/margins": 0.41763585805892944, "rewards/rejected": -0.6492325067520142, "step": 2980 }, { "epoch": 0.57, "learning_rate": 2.32975447778675e-06, "logits/chosen": -1.3092204332351685, "logits/rejected": -1.0105704069137573, "logps/chosen": -376.337158203125, "logps/rejected": -686.8082275390625, "loss": 0.1705, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1494036465883255, "rewards/margins": 0.3057688772678375, "rewards/rejected": -0.4551725387573242, "step": 2990 }, { "epoch": 0.57, "learning_rate": 2.3131747660339396e-06, "logits/chosen": -1.5149450302124023, "logits/rejected": -0.7866371870040894, "logps/chosen": -506.11688232421875, "logps/rejected": -708.8211669921875, "loss": 0.1836, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19512823224067688, "rewards/margins": 0.29869595170021057, "rewards/rejected": -0.49382415413856506, "step": 3000 }, { "epoch": 0.57, "learning_rate": 2.296603313330355e-06, "logits/chosen": -1.3197240829467773, "logits/rejected": -1.1178325414657593, "logps/chosen": -400.4996337890625, "logps/rejected": -856.5397338867188, "loss": 0.1288, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16635629534721375, "rewards/margins": 0.4014052450656891, "rewards/rejected": -0.5677615404129028, "step": 3010 }, { "epoch": 0.58, "learning_rate": 2.280040852256068e-06, "logits/chosen": -1.7349259853363037, "logits/rejected": -0.9797713160514832, "logps/chosen": -457.20330810546875, "logps/rejected": -820.8341674804688, "loss": 0.0913, "rewards/accuracies": 0.875, "rewards/chosen": -0.14074428379535675, "rewards/margins": 0.4176979959011078, "rewards/rejected": -0.5584422945976257, "step": 3020 }, { "epoch": 0.58, "learning_rate": 2.2634881149936576e-06, "logits/chosen": -1.2639141082763672, "logits/rejected": -0.9197381138801575, "logps/chosen": -368.921142578125, "logps/rejected": -796.1903076171875, "loss": 0.078, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14747993648052216, "rewards/margins": 0.41828060150146484, "rewards/rejected": -0.5657604932785034, "step": 3030 }, { "epoch": 0.58, "learning_rate": 2.246945833295836e-06, "logits/chosen": -1.4472001791000366, "logits/rejected": -1.089580774307251, "logps/chosen": -497.215576171875, "logps/rejected": -864.5166015625, "loss": 0.1665, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2171151340007782, "rewards/margins": 0.3655165135860443, "rewards/rejected": -0.5826317071914673, "step": 3040 }, { "epoch": 0.58, "learning_rate": 2.230414738453104e-06, "logits/chosen": -1.1785532236099243, "logits/rejected": -0.8585962057113647, "logps/chosen": -466.18017578125, "logps/rejected": -794.7415161132812, "loss": 0.1084, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22700226306915283, "rewards/margins": 0.3384191393852234, "rewards/rejected": -0.5654214024543762, "step": 3050 }, { "epoch": 0.58, "learning_rate": 2.2138955612614206e-06, "logits/chosen": -1.2824327945709229, "logits/rejected": -1.0094388723373413, "logps/chosen": -426.47479248046875, "logps/rejected": -781.0972900390625, "loss": 0.1623, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19283132255077362, "rewards/margins": 0.3139778971672058, "rewards/rejected": -0.5068092346191406, "step": 3060 }, { "epoch": 0.58, "learning_rate": 2.1973890319898965e-06, "logits/chosen": -1.3589626550674438, "logits/rejected": -0.9314709901809692, "logps/chosen": -481.0269470214844, "logps/rejected": -845.1025390625, "loss": 0.1671, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1950252503156662, "rewards/margins": 0.3560760021209717, "rewards/rejected": -0.5511012077331543, "step": 3070 }, { "epoch": 0.59, "learning_rate": 2.1808958803485134e-06, "logits/chosen": -1.3495731353759766, "logits/rejected": -0.6527240872383118, "logps/chosen": -513.8931884765625, "logps/rejected": -848.3651123046875, "loss": 0.1026, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22484417259693146, "rewards/margins": 0.3976616859436035, "rewards/rejected": -0.622505784034729, "step": 3080 }, { "epoch": 0.59, "learning_rate": 2.1644168354558623e-06, "logits/chosen": -1.3754124641418457, "logits/rejected": -0.866012454032898, "logps/chosen": -495.58782958984375, "logps/rejected": -820.9801635742188, "loss": 0.1242, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1740478128194809, "rewards/margins": 0.3611810803413391, "rewards/rejected": -0.5352289080619812, "step": 3090 }, { "epoch": 0.59, "learning_rate": 2.1479526258069086e-06, "logits/chosen": -1.3570363521575928, "logits/rejected": -0.8174545168876648, "logps/chosen": -472.06134033203125, "logps/rejected": -858.0859375, "loss": 0.1, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20966234803199768, "rewards/margins": 0.41604742407798767, "rewards/rejected": -0.6257097721099854, "step": 3100 }, { "epoch": 0.59, "learning_rate": 2.1315039792407975e-06, "logits/chosen": -1.6513493061065674, "logits/rejected": -0.9292311668395996, "logps/chosen": -441.37884521484375, "logps/rejected": -777.484375, "loss": 0.1398, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1936938464641571, "rewards/margins": 0.37766391038894653, "rewards/rejected": -0.5713577270507812, "step": 3110 }, { "epoch": 0.59, "learning_rate": 2.115071622908666e-06, "logits/chosen": -1.4296609163284302, "logits/rejected": -1.032963752746582, "logps/chosen": -449.71856689453125, "logps/rejected": -795.1239624023438, "loss": 0.1065, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17733417451381683, "rewards/margins": 0.38898175954818726, "rewards/rejected": -0.5663160085678101, "step": 3120 }, { "epoch": 0.6, "learning_rate": 2.0986562832415063e-06, "logits/chosen": -1.5180729627609253, "logits/rejected": -1.0662920475006104, "logps/chosen": -442.786865234375, "logps/rejected": -774.1068115234375, "loss": 0.1287, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17527981102466583, "rewards/margins": 0.3450900912284851, "rewards/rejected": -0.5203698873519897, "step": 3130 }, { "epoch": 0.6, "learning_rate": 2.082258685918047e-06, "logits/chosen": -1.3603161573410034, "logits/rejected": -0.9249211549758911, "logps/chosen": -434.474853515625, "logps/rejected": -705.5407104492188, "loss": 0.1595, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1986636221408844, "rewards/margins": 0.3334785997867584, "rewards/rejected": -0.5321422815322876, "step": 3140 }, { "epoch": 0.6, "learning_rate": 2.0658795558326745e-06, "logits/chosen": -1.4363136291503906, "logits/rejected": -1.056292176246643, "logps/chosen": -468.195068359375, "logps/rejected": -817.7234497070312, "loss": 0.1253, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2013559639453888, "rewards/margins": 0.38898658752441406, "rewards/rejected": -0.5903424620628357, "step": 3150 }, { "epoch": 0.6, "learning_rate": 2.049519617063389e-06, "logits/chosen": -1.3909878730773926, "logits/rejected": -0.953696608543396, "logps/chosen": -423.89337158203125, "logps/rejected": -744.966552734375, "loss": 0.158, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18486112356185913, "rewards/margins": 0.317654550075531, "rewards/rejected": -0.5025156736373901, "step": 3160 }, { "epoch": 0.6, "learning_rate": 2.033179592839792e-06, "logits/chosen": -1.3011595010757446, "logits/rejected": -0.5752730369567871, "logps/chosen": -562.1802978515625, "logps/rejected": -921.9736328125, "loss": 0.1342, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.22026212513446808, "rewards/margins": 0.4167749285697937, "rewards/rejected": -0.6370370984077454, "step": 3170 }, { "epoch": 0.61, "learning_rate": 2.0168602055111175e-06, "logits/chosen": -1.3257206678390503, "logits/rejected": -0.7853381037712097, "logps/chosen": -534.0789184570312, "logps/rejected": -853.47216796875, "loss": 0.1813, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22569552063941956, "rewards/margins": 0.3468303084373474, "rewards/rejected": -0.5725258588790894, "step": 3180 }, { "epoch": 0.61, "learning_rate": 2.0005621765142942e-06, "logits/chosen": -1.7177248001098633, "logits/rejected": -0.8074045181274414, "logps/chosen": -497.9803771972656, "logps/rejected": -830.1541137695312, "loss": 0.1373, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17401547729969025, "rewards/margins": 0.3853972852230072, "rewards/rejected": -0.5594127774238586, "step": 3190 }, { "epoch": 0.61, "learning_rate": 1.9842862263420565e-06, "logits/chosen": -1.281773328781128, "logits/rejected": -1.088385820388794, "logps/chosen": -508.61102294921875, "logps/rejected": -966.55712890625, "loss": 0.1346, "rewards/accuracies": 0.875, "rewards/chosen": -0.27652305364608765, "rewards/margins": 0.38310542702674866, "rewards/rejected": -0.6596284508705139, "step": 3200 }, { "epoch": 0.61, "learning_rate": 1.9680330745110954e-06, "logits/chosen": -1.438289999961853, "logits/rejected": -0.9979110956192017, "logps/chosen": -532.2144165039062, "logps/rejected": -793.0737915039062, "loss": 0.1565, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2952241897583008, "rewards/margins": 0.30770084261894226, "rewards/rejected": -0.6029250621795654, "step": 3210 }, { "epoch": 0.61, "learning_rate": 1.9518034395302413e-06, "logits/chosen": -1.528386116027832, "logits/rejected": -0.9441434741020203, "logps/chosen": -555.3648681640625, "logps/rejected": -846.9880981445312, "loss": 0.1653, "rewards/accuracies": 0.875, "rewards/chosen": -0.2668333649635315, "rewards/margins": 0.334044873714447, "rewards/rejected": -0.6008782386779785, "step": 3220 }, { "epoch": 0.62, "learning_rate": 1.9355980388687145e-06, "logits/chosen": -1.3972076177597046, "logits/rejected": -0.9894139170646667, "logps/chosen": -429.25860595703125, "logps/rejected": -806.5794067382812, "loss": 0.1412, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1995122730731964, "rewards/margins": 0.35712283849716187, "rewards/rejected": -0.5566351413726807, "step": 3230 }, { "epoch": 0.62, "learning_rate": 1.9194175889243942e-06, "logits/chosen": -1.347569227218628, "logits/rejected": -0.6678511500358582, "logps/chosen": -455.98223876953125, "logps/rejected": -695.4212036132812, "loss": 0.1293, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.156851664185524, "rewards/margins": 0.3653084635734558, "rewards/rejected": -0.5221601724624634, "step": 3240 }, { "epoch": 0.62, "learning_rate": 1.903262804992156e-06, "logits/chosen": -1.5471915006637573, "logits/rejected": -1.1331428289413452, "logps/chosen": -420.92010498046875, "logps/rejected": -766.4700317382812, "loss": 0.1414, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18272459506988525, "rewards/margins": 0.3516112267971039, "rewards/rejected": -0.5343358516693115, "step": 3250 }, { "epoch": 0.62, "learning_rate": 1.8871344012322504e-06, "logits/chosen": -1.3458797931671143, "logits/rejected": -0.7993073463439941, "logps/chosen": -450.0318298339844, "logps/rejected": -791.7275390625, "loss": 0.1445, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15974757075309753, "rewards/margins": 0.38430100679397583, "rewards/rejected": -0.544048547744751, "step": 3260 }, { "epoch": 0.62, "learning_rate": 1.8710330906387288e-06, "logits/chosen": -1.5286681652069092, "logits/rejected": -0.9678757786750793, "logps/chosen": -443.01434326171875, "logps/rejected": -846.7928466796875, "loss": 0.1332, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.169307142496109, "rewards/margins": 0.3826208710670471, "rewards/rejected": -0.5519279837608337, "step": 3270 }, { "epoch": 0.62, "learning_rate": 1.8549595850079272e-06, "logits/chosen": -1.424188256263733, "logits/rejected": -0.8401851654052734, "logps/chosen": -478.42138671875, "logps/rejected": -863.6852416992188, "loss": 0.1309, "rewards/accuracies": 0.875, "rewards/chosen": -0.20009620487689972, "rewards/margins": 0.4254828095436096, "rewards/rejected": -0.6255789399147034, "step": 3280 }, { "epoch": 0.63, "learning_rate": 1.8389145949069953e-06, "logits/chosen": -1.4969661235809326, "logits/rejected": -0.9382787942886353, "logps/chosen": -549.80224609375, "logps/rejected": -853.8473510742188, "loss": 0.1453, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.26673033833503723, "rewards/margins": 0.3659650981426239, "rewards/rejected": -0.6326954364776611, "step": 3290 }, { "epoch": 0.63, "learning_rate": 1.8228988296424877e-06, "logits/chosen": -1.4803087711334229, "logits/rejected": -0.8367649912834167, "logps/chosen": -537.7322998046875, "logps/rejected": -841.1953125, "loss": 0.1481, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2574412226676941, "rewards/margins": 0.3756985068321228, "rewards/rejected": -0.6331397294998169, "step": 3300 }, { "epoch": 0.63, "learning_rate": 1.806912997229008e-06, "logits/chosen": -1.519325613975525, "logits/rejected": -0.9406343698501587, "logps/chosen": -466.12841796875, "logps/rejected": -867.13671875, "loss": 0.1006, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22245760262012482, "rewards/margins": 0.40345969796180725, "rewards/rejected": -0.6259172558784485, "step": 3310 }, { "epoch": 0.63, "learning_rate": 1.7909578043579037e-06, "logits/chosen": -1.702344536781311, "logits/rejected": -1.1771308183670044, "logps/chosen": -429.8223571777344, "logps/rejected": -793.71728515625, "loss": 0.1645, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21643200516700745, "rewards/margins": 0.3711642622947693, "rewards/rejected": -0.5875962376594543, "step": 3320 }, { "epoch": 0.63, "learning_rate": 1.7750339563660346e-06, "logits/chosen": -1.456526756286621, "logits/rejected": -0.8993295431137085, "logps/chosen": -432.34246826171875, "logps/rejected": -719.046875, "loss": 0.1577, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18716856837272644, "rewards/margins": 0.31780749559402466, "rewards/rejected": -0.5049760937690735, "step": 3330 }, { "epoch": 0.64, "learning_rate": 1.759142157204583e-06, "logits/chosen": -1.7000188827514648, "logits/rejected": -0.777019202709198, "logps/chosen": -515.3919677734375, "logps/rejected": -785.5335083007812, "loss": 0.1507, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19055669009685516, "rewards/margins": 0.38672685623168945, "rewards/rejected": -0.5772835612297058, "step": 3340 }, { "epoch": 0.64, "learning_rate": 1.7432831094079357e-06, "logits/chosen": -1.2466986179351807, "logits/rejected": -1.032116413116455, "logps/chosen": -402.2715759277344, "logps/rejected": -852.4505004882812, "loss": 0.1106, "rewards/accuracies": 0.875, "rewards/chosen": -0.177595853805542, "rewards/margins": 0.41448336839675903, "rewards/rejected": -0.592079222202301, "step": 3350 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -1.3990333080291748, "logits/rejected": -0.7502264976501465, "logps/chosen": -439.033203125, "logps/rejected": -812.1687622070312, "loss": 0.1315, "rewards/accuracies": 0.875, "rewards/chosen": -0.16386166214942932, "rewards/margins": 0.40608271956443787, "rewards/rejected": -0.5699443221092224, "step": 3360 }, { "epoch": 0.64, "learning_rate": 1.7116660707763637e-06, "logits/chosen": -1.4179266691207886, "logits/rejected": -1.0428603887557983, "logps/chosen": -367.4630432128906, "logps/rejected": -698.6688232421875, "loss": 0.1427, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12984821200370789, "rewards/margins": 0.3746800124645233, "rewards/rejected": -0.5045282244682312, "step": 3370 }, { "epoch": 0.64, "learning_rate": 1.695909477647054e-06, "logits/chosen": -1.5399088859558105, "logits/rejected": -1.347699761390686, "logps/chosen": -322.19732666015625, "logps/rejected": -746.3214721679688, "loss": 0.0915, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11821082979440689, "rewards/margins": 0.38466712832450867, "rewards/rejected": -0.502877950668335, "step": 3380 }, { "epoch": 0.65, "learning_rate": 1.6801884312319893e-06, "logits/chosen": -1.6122725009918213, "logits/rejected": -1.1032224893569946, "logps/chosen": -382.66937255859375, "logps/rejected": -783.5607299804688, "loss": 0.1432, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.10153687000274658, "rewards/margins": 0.3991749882698059, "rewards/rejected": -0.5007117986679077, "step": 3390 }, { "epoch": 0.65, "learning_rate": 1.6645036265170314e-06, "logits/chosen": -1.3864188194274902, "logits/rejected": -0.8151859045028687, "logps/chosen": -449.18487548828125, "logps/rejected": -802.3887939453125, "loss": 0.1268, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16347984969615936, "rewards/margins": 0.3677942156791687, "rewards/rejected": -0.5312740206718445, "step": 3400 }, { "epoch": 0.65, "learning_rate": 1.648855756885893e-06, "logits/chosen": -1.8782033920288086, "logits/rejected": -1.179089069366455, "logps/chosen": -452.5113220214844, "logps/rejected": -829.25537109375, "loss": 0.0867, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.11498744785785675, "rewards/margins": 0.4453309178352356, "rewards/rejected": -0.5603184103965759, "step": 3410 }, { "epoch": 0.65, "learning_rate": 1.633245514089482e-06, "logits/chosen": -1.430738091468811, "logits/rejected": -1.1076922416687012, "logps/chosen": -357.9158020019531, "logps/rejected": -674.6552124023438, "loss": 0.1592, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15646475553512573, "rewards/margins": 0.33161425590515137, "rewards/rejected": -0.4880790114402771, "step": 3420 }, { "epoch": 0.65, "learning_rate": 1.6176735882153284e-06, "logits/chosen": -1.502147912979126, "logits/rejected": -1.1247570514678955, "logps/chosen": -403.4971923828125, "logps/rejected": -723.5167846679688, "loss": 0.1839, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14581628143787384, "rewards/margins": 0.3280007839202881, "rewards/rejected": -0.47381705045700073, "step": 3430 }, { "epoch": 0.66, "learning_rate": 1.6021406676570667e-06, "logits/chosen": -1.838065505027771, "logits/rejected": -0.7035130858421326, "logps/chosen": -548.0798950195312, "logps/rejected": -907.0427856445312, "loss": 0.1221, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18870152533054352, "rewards/margins": 0.45663541555404663, "rewards/rejected": -0.645336925983429, "step": 3440 }, { "epoch": 0.66, "learning_rate": 1.5866474390840126e-06, "logits/chosen": -1.2180145978927612, "logits/rejected": -0.6911731958389282, "logps/chosen": -360.14068603515625, "logps/rejected": -701.0031127929688, "loss": 0.1478, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12714442610740662, "rewards/margins": 0.37260302901268005, "rewards/rejected": -0.49974745512008667, "step": 3450 }, { "epoch": 0.66, "learning_rate": 1.5711945874108053e-06, "logits/chosen": -1.2224258184432983, "logits/rejected": -0.8641057014465332, "logps/chosen": -421.90386962890625, "logps/rejected": -749.3494873046875, "loss": 0.128, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22099857032299042, "rewards/margins": 0.32465022802352905, "rewards/rejected": -0.5456488728523254, "step": 3460 }, { "epoch": 0.66, "learning_rate": 1.5557827957671249e-06, "logits/chosen": -1.4962098598480225, "logits/rejected": -0.7952414751052856, "logps/chosen": -501.6507263183594, "logps/rejected": -846.8707275390625, "loss": 0.0788, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20171129703521729, "rewards/margins": 0.3923703134059906, "rewards/rejected": -0.5940815806388855, "step": 3470 }, { "epoch": 0.66, "learning_rate": 1.5404127454674994e-06, "logits/chosen": -1.5189071893692017, "logits/rejected": -0.7931365966796875, "logps/chosen": -436.37261962890625, "logps/rejected": -771.4508056640625, "loss": 0.117, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17762836813926697, "rewards/margins": 0.3814625144004822, "rewards/rejected": -0.559090793132782, "step": 3480 }, { "epoch": 0.66, "learning_rate": 1.5250851159811809e-06, "logits/chosen": -1.3067513704299927, "logits/rejected": -1.024893879890442, "logps/chosen": -472.52783203125, "logps/rejected": -934.4078979492188, "loss": 0.1351, "rewards/accuracies": 0.875, "rewards/chosen": -0.21360650658607483, "rewards/margins": 0.39003968238830566, "rewards/rejected": -0.6036461591720581, "step": 3490 }, { "epoch": 0.67, "learning_rate": 1.509800584902108e-06, "logits/chosen": -1.5230262279510498, "logits/rejected": -0.7585245966911316, "logps/chosen": -620.0811157226562, "logps/rejected": -883.2972412109375, "loss": 0.1144, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.25273051857948303, "rewards/margins": 0.3818441331386566, "rewards/rejected": -0.6345746517181396, "step": 3500 }, { "epoch": 0.67, "learning_rate": 1.4945598279189565e-06, "logits/chosen": -1.5459498167037964, "logits/rejected": -1.0424516201019287, "logps/chosen": -531.4592895507812, "logps/rejected": -861.1756591796875, "loss": 0.1068, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20926180481910706, "rewards/margins": 0.3746519386768341, "rewards/rejected": -0.5839136838912964, "step": 3510 }, { "epoch": 0.67, "learning_rate": 1.4793635187852622e-06, "logits/chosen": -1.3689630031585693, "logits/rejected": -1.0043765306472778, "logps/chosen": -466.04742431640625, "logps/rejected": -848.6345825195312, "loss": 0.1315, "rewards/accuracies": 0.875, "rewards/chosen": -0.18529382348060608, "rewards/margins": 0.40114277601242065, "rewards/rejected": -0.5864366292953491, "step": 3520 }, { "epoch": 0.67, "learning_rate": 1.4642123292896406e-06, "logits/chosen": -1.2944557666778564, "logits/rejected": -1.0542017221450806, "logps/chosen": -412.70294189453125, "logps/rejected": -751.0271606445312, "loss": 0.1322, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19071893393993378, "rewards/margins": 0.35320931673049927, "rewards/rejected": -0.5439282655715942, "step": 3530 }, { "epoch": 0.67, "learning_rate": 1.4491069292260867e-06, "logits/chosen": -1.3565905094146729, "logits/rejected": -0.857538104057312, "logps/chosen": -451.68157958984375, "logps/rejected": -717.7445068359375, "loss": 0.1594, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20329537987709045, "rewards/margins": 0.3223893940448761, "rewards/rejected": -0.5256848335266113, "step": 3540 }, { "epoch": 0.68, "learning_rate": 1.4340479863643658e-06, "logits/chosen": -1.3103424310684204, "logits/rejected": -0.8766673803329468, "logps/chosen": -401.0528564453125, "logps/rejected": -818.9484252929688, "loss": 0.103, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17303694784641266, "rewards/margins": 0.4090678095817566, "rewards/rejected": -0.5821046829223633, "step": 3550 }, { "epoch": 0.68, "learning_rate": 1.4190361664204936e-06, "logits/chosen": -1.248764991760254, "logits/rejected": -0.9546260833740234, "logps/chosen": -418.53302001953125, "logps/rejected": -729.8751220703125, "loss": 0.1891, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2053443193435669, "rewards/margins": 0.2881602644920349, "rewards/rejected": -0.4935045838356018, "step": 3560 }, { "epoch": 0.68, "learning_rate": 1.4040721330273063e-06, "logits/chosen": -1.2963250875473022, "logits/rejected": -0.8378680944442749, "logps/chosen": -461.37744140625, "logps/rejected": -820.8511962890625, "loss": 0.092, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16947893798351288, "rewards/margins": 0.4276408553123474, "rewards/rejected": -0.5971198081970215, "step": 3570 }, { "epoch": 0.68, "learning_rate": 1.3891565477051242e-06, "logits/chosen": -1.5343642234802246, "logits/rejected": -0.9604697227478027, "logps/chosen": -463.36749267578125, "logps/rejected": -785.0738525390625, "loss": 0.1551, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18954695761203766, "rewards/margins": 0.3703259825706482, "rewards/rejected": -0.5598729848861694, "step": 3580 }, { "epoch": 0.68, "learning_rate": 1.3742900698325034e-06, "logits/chosen": -1.4683080911636353, "logits/rejected": -0.9448213577270508, "logps/chosen": -421.1533203125, "logps/rejected": -709.0045166015625, "loss": 0.1152, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12771710753440857, "rewards/margins": 0.367117702960968, "rewards/rejected": -0.4948348104953766, "step": 3590 }, { "epoch": 0.69, "learning_rate": 1.3594733566170925e-06, "logits/chosen": -1.5439958572387695, "logits/rejected": -0.8940641283988953, "logps/chosen": -415.951416015625, "logps/rejected": -817.1143798828125, "loss": 0.098, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15611250698566437, "rewards/margins": 0.39666497707366943, "rewards/rejected": -0.5527775287628174, "step": 3600 }, { "epoch": 0.69, "learning_rate": 1.3447070630665771e-06, "logits/chosen": -1.6253535747528076, "logits/rejected": -1.0041207075119019, "logps/chosen": -445.95611572265625, "logps/rejected": -789.1965942382812, "loss": 0.109, "rewards/accuracies": 0.875, "rewards/chosen": -0.15041926503181458, "rewards/margins": 0.39495956897735596, "rewards/rejected": -0.5453788042068481, "step": 3610 }, { "epoch": 0.69, "learning_rate": 1.329991841959717e-06, "logits/chosen": -1.4438072443008423, "logits/rejected": -0.9927061200141907, "logps/chosen": -389.11492919921875, "logps/rejected": -704.0327758789062, "loss": 0.1459, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13252386450767517, "rewards/margins": 0.33694010972976685, "rewards/rejected": -0.4694640040397644, "step": 3620 }, { "epoch": 0.69, "learning_rate": 1.3153283438175036e-06, "logits/chosen": -1.3442825078964233, "logits/rejected": -0.8384577035903931, "logps/chosen": -428.9632873535156, "logps/rejected": -847.52490234375, "loss": 0.1157, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.15260396897792816, "rewards/margins": 0.38510820269584656, "rewards/rejected": -0.5377122163772583, "step": 3630 }, { "epoch": 0.69, "learning_rate": 1.3007172168743854e-06, "logits/chosen": -1.3540570735931396, "logits/rejected": -0.8695130348205566, "logps/chosen": -380.8751220703125, "logps/rejected": -795.1818237304688, "loss": 0.1161, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15014532208442688, "rewards/margins": 0.4058755040168762, "rewards/rejected": -0.5560208559036255, "step": 3640 }, { "epoch": 0.7, "learning_rate": 1.2861591070496193e-06, "logits/chosen": -1.3193162679672241, "logits/rejected": -0.6444579362869263, "logps/chosen": -463.63739013671875, "logps/rejected": -793.9559936523438, "loss": 0.1421, "rewards/accuracies": 0.875, "rewards/chosen": -0.16820880770683289, "rewards/margins": 0.37421607971191406, "rewards/rejected": -0.5424249172210693, "step": 3650 }, { "epoch": 0.7, "learning_rate": 1.271654657918722e-06, "logits/chosen": -1.444557785987854, "logits/rejected": -0.8822986483573914, "logps/chosen": -412.38421630859375, "logps/rejected": -794.8355712890625, "loss": 0.1247, "rewards/accuracies": 0.875, "rewards/chosen": -0.14085407555103302, "rewards/margins": 0.40679341554641724, "rewards/rejected": -0.5476474165916443, "step": 3660 }, { "epoch": 0.7, "learning_rate": 1.2572045106850051e-06, "logits/chosen": -1.2360610961914062, "logits/rejected": -0.9763832092285156, "logps/chosen": -446.79791259765625, "logps/rejected": -782.455078125, "loss": 0.1783, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16961245238780975, "rewards/margins": 0.34499895572662354, "rewards/rejected": -0.5146113634109497, "step": 3670 }, { "epoch": 0.7, "learning_rate": 1.2428093041512418e-06, "logits/chosen": -1.3006641864776611, "logits/rejected": -0.8986924886703491, "logps/chosen": -377.9948425292969, "logps/rejected": -789.4093017578125, "loss": 0.1373, "rewards/accuracies": 0.875, "rewards/chosen": -0.14963093400001526, "rewards/margins": 0.4097939133644104, "rewards/rejected": -0.5594248175621033, "step": 3680 }, { "epoch": 0.7, "learning_rate": 1.2284696746914216e-06, "logits/chosen": -1.5449702739715576, "logits/rejected": -1.0956764221191406, "logps/chosen": -472.701171875, "logps/rejected": -823.234375, "loss": 0.1116, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.22493131458759308, "rewards/margins": 0.37620171904563904, "rewards/rejected": -0.6011329889297485, "step": 3690 }, { "epoch": 0.7, "learning_rate": 1.2141862562226164e-06, "logits/chosen": -1.4426448345184326, "logits/rejected": -0.7812067270278931, "logps/chosen": -388.9836120605469, "logps/rejected": -786.234619140625, "loss": 0.0986, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1418767273426056, "rewards/margins": 0.41504424810409546, "rewards/rejected": -0.5569210648536682, "step": 3700 }, { "epoch": 0.71, "learning_rate": 1.1999596801769617e-06, "logits/chosen": -1.250902533531189, "logits/rejected": -0.7321473360061646, "logps/chosen": -448.20123291015625, "logps/rejected": -798.972900390625, "loss": 0.1181, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19077713787555695, "rewards/margins": 0.4014213979244232, "rewards/rejected": -0.5921986103057861, "step": 3710 }, { "epoch": 0.71, "learning_rate": 1.185790575473738e-06, "logits/chosen": -1.3282699584960938, "logits/rejected": -1.113930344581604, "logps/chosen": -373.06341552734375, "logps/rejected": -710.2935180664062, "loss": 0.1935, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16925348341464996, "rewards/margins": 0.3049162030220032, "rewards/rejected": -0.47416967153549194, "step": 3720 }, { "epoch": 0.71, "learning_rate": 1.1716795684915728e-06, "logits/chosen": -1.5629644393920898, "logits/rejected": -0.824572741985321, "logps/chosen": -519.9608154296875, "logps/rejected": -859.3759765625, "loss": 0.107, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.17091256380081177, "rewards/margins": 0.4353674352169037, "rewards/rejected": -0.6062799692153931, "step": 3730 }, { "epoch": 0.71, "learning_rate": 1.1576272830407418e-06, "logits/chosen": -1.335302472114563, "logits/rejected": -0.5338510274887085, "logps/chosen": -415.7828674316406, "logps/rejected": -716.2789306640625, "loss": 0.1378, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14363467693328857, "rewards/margins": 0.40200501680374146, "rewards/rejected": -0.5456396341323853, "step": 3740 }, { "epoch": 0.71, "learning_rate": 1.1436343403356019e-06, "logits/chosen": -1.2487661838531494, "logits/rejected": -0.8410084843635559, "logps/chosen": -435.2356872558594, "logps/rejected": -938.2427978515625, "loss": 0.0817, "rewards/accuracies": 1.0, "rewards/chosen": -0.17009887099266052, "rewards/margins": 0.4728882312774658, "rewards/rejected": -0.6429871320724487, "step": 3750 }, { "epoch": 0.72, "learning_rate": 1.129701358967123e-06, "logits/chosen": -1.5160211324691772, "logits/rejected": -0.8383380174636841, "logps/chosen": -415.8152770996094, "logps/rejected": -751.87841796875, "loss": 0.0965, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.15089921653270721, "rewards/margins": 0.3740783631801605, "rewards/rejected": -0.5249775648117065, "step": 3760 }, { "epoch": 0.72, "learning_rate": 1.11582895487554e-06, "logits/chosen": -1.5249440670013428, "logits/rejected": -0.9139940142631531, "logps/chosen": -519.3067626953125, "logps/rejected": -860.0250854492188, "loss": 0.1018, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.16489791870117188, "rewards/margins": 0.4452267587184906, "rewards/rejected": -0.6101247072219849, "step": 3770 }, { "epoch": 0.72, "learning_rate": 1.1020177413231334e-06, "logits/chosen": -1.6652414798736572, "logits/rejected": -0.9461329579353333, "logps/chosen": -387.9971923828125, "logps/rejected": -721.0979614257812, "loss": 0.1091, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1290167272090912, "rewards/margins": 0.39111775159835815, "rewards/rejected": -0.520134449005127, "step": 3780 }, { "epoch": 0.72, "learning_rate": 1.0882683288671041e-06, "logits/chosen": -1.3538219928741455, "logits/rejected": -0.8247833251953125, "logps/chosen": -562.599365234375, "logps/rejected": -884.5865478515625, "loss": 0.161, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2595978379249573, "rewards/margins": 0.3135002553462982, "rewards/rejected": -0.5730981230735779, "step": 3790 }, { "epoch": 0.72, "learning_rate": 1.0745813253325957e-06, "logits/chosen": -1.2573899030685425, "logits/rejected": -0.805769145488739, "logps/chosen": -464.7861328125, "logps/rejected": -805.7566528320312, "loss": 0.1376, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18592938780784607, "rewards/margins": 0.374401330947876, "rewards/rejected": -0.5603306889533997, "step": 3800 }, { "epoch": 0.73, "learning_rate": 1.0609573357858166e-06, "logits/chosen": -1.5432183742523193, "logits/rejected": -1.1239479780197144, "logps/chosen": -417.8250427246094, "logps/rejected": -786.6776123046875, "loss": 0.1468, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16033196449279785, "rewards/margins": 0.36673012375831604, "rewards/rejected": -0.5270620584487915, "step": 3810 }, { "epoch": 0.73, "learning_rate": 1.0473969625072922e-06, "logits/chosen": -1.169342279434204, "logits/rejected": -0.9967323541641235, "logps/chosen": -430.4443359375, "logps/rejected": -731.06591796875, "loss": 0.1346, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1649528443813324, "rewards/margins": 0.34193381667137146, "rewards/rejected": -0.5068866610527039, "step": 3820 }, { "epoch": 0.73, "learning_rate": 1.0339008049652427e-06, "logits/chosen": -1.4612452983856201, "logits/rejected": -0.6827516555786133, "logps/chosen": -466.76007080078125, "logps/rejected": -736.04541015625, "loss": 0.1028, "rewards/accuracies": 0.875, "rewards/chosen": -0.1540287435054779, "rewards/margins": 0.3944511413574219, "rewards/rejected": -0.5484797954559326, "step": 3830 }, { "epoch": 0.73, "learning_rate": 1.0204694597890814e-06, "logits/chosen": -1.5688393115997314, "logits/rejected": -0.937122642993927, "logps/chosen": -463.2225646972656, "logps/rejected": -714.1920776367188, "loss": 0.1475, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17180675268173218, "rewards/margins": 0.3516649603843689, "rewards/rejected": -0.5234717130661011, "step": 3840 }, { "epoch": 0.73, "learning_rate": 1.0071035207430352e-06, "logits/chosen": -1.2152472734451294, "logits/rejected": -0.7218281030654907, "logps/chosen": -379.1766662597656, "logps/rejected": -760.9722900390625, "loss": 0.1052, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15967470407485962, "rewards/margins": 0.4116497039794922, "rewards/rejected": -0.5713244080543518, "step": 3850 }, { "epoch": 0.74, "learning_rate": 9.938035786999018e-07, "logits/chosen": -1.54374098777771, "logits/rejected": -0.8507475852966309, "logps/chosen": -453.748291015625, "logps/rejected": -870.9924926757812, "loss": 0.1048, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12629708647727966, "rewards/margins": 0.46032625436782837, "rewards/rejected": -0.5866233110427856, "step": 3860 }, { "epoch": 0.74, "learning_rate": 9.805702216149252e-07, "logits/chosen": -1.5807303190231323, "logits/rejected": -0.846795916557312, "logps/chosen": -481.73455810546875, "logps/rejected": -741.826904296875, "loss": 0.1044, "rewards/accuracies": 0.875, "rewards/chosen": -0.18064181506633759, "rewards/margins": 0.3742702007293701, "rewards/rejected": -0.5549120306968689, "step": 3870 }, { "epoch": 0.74, "learning_rate": 9.674040344998056e-07, "logits/chosen": -1.44862961769104, "logits/rejected": -0.9098674654960632, "logps/chosen": -427.7437438964844, "logps/rejected": -712.8436889648438, "loss": 0.1025, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17423871159553528, "rewards/margins": 0.3249169886112213, "rewards/rejected": -0.4991557002067566, "step": 3880 }, { "epoch": 0.74, "learning_rate": 9.543055993968339e-07, "logits/chosen": -1.2700507640838623, "logits/rejected": -0.736456036567688, "logps/chosen": -482.3666076660156, "logps/rejected": -779.4149169921875, "loss": 0.1826, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.191014364361763, "rewards/margins": 0.35905060172080994, "rewards/rejected": -0.5500649213790894, "step": 3890 }, { "epoch": 0.74, "learning_rate": 9.412754953531664e-07, "logits/chosen": -1.4608185291290283, "logits/rejected": -0.7372334003448486, "logps/chosen": -476.26025390625, "logps/rejected": -861.3048706054688, "loss": 0.0983, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18024982511997223, "rewards/margins": 0.4462791085243225, "rewards/rejected": -0.6265289187431335, "step": 3900 }, { "epoch": 0.74, "learning_rate": 9.283142983952231e-07, "logits/chosen": -1.4416873455047607, "logits/rejected": -0.9548746347427368, "logps/chosen": -451.0269470214844, "logps/rejected": -842.4944458007812, "loss": 0.141, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18560755252838135, "rewards/margins": 0.3866332769393921, "rewards/rejected": -0.5722408294677734, "step": 3910 }, { "epoch": 0.75, "learning_rate": 9.154225815032242e-07, "logits/chosen": -1.1411993503570557, "logits/rejected": -0.7601999044418335, "logps/chosen": -476.3477478027344, "logps/rejected": -879.9486083984375, "loss": 0.1363, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22499904036521912, "rewards/margins": 0.36296606063842773, "rewards/rejected": -0.587965190410614, "step": 3920 }, { "epoch": 0.75, "learning_rate": 9.026009145858608e-07, "logits/chosen": -1.2628328800201416, "logits/rejected": -0.6380741000175476, "logps/chosen": -559.0230102539062, "logps/rejected": -924.6239013671875, "loss": 0.1345, "rewards/accuracies": 0.875, "rewards/chosen": -0.20459707081317902, "rewards/margins": 0.4145767092704773, "rewards/rejected": -0.6191738247871399, "step": 3930 }, { "epoch": 0.75, "learning_rate": 8.898498644550973e-07, "logits/chosen": -1.5411838293075562, "logits/rejected": -0.969692587852478, "logps/chosen": -488.0791015625, "logps/rejected": -855.1666870117188, "loss": 0.0915, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.17836572229862213, "rewards/margins": 0.41328635811805725, "rewards/rejected": -0.5916520953178406, "step": 3940 }, { "epoch": 0.75, "learning_rate": 8.771699948011203e-07, "logits/chosen": -1.236838936805725, "logits/rejected": -0.7630554437637329, "logps/chosen": -478.7364196777344, "logps/rejected": -849.2463989257812, "loss": 0.1036, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18724744021892548, "rewards/margins": 0.40733084082603455, "rewards/rejected": -0.5945782661437988, "step": 3950 }, { "epoch": 0.75, "learning_rate": 8.645618661674144e-07, "logits/chosen": -1.4421743154525757, "logits/rejected": -0.870509147644043, "logps/chosen": -475.2801208496094, "logps/rejected": -817.0428466796875, "loss": 0.1415, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21000473201274872, "rewards/margins": 0.4020620286464691, "rewards/rejected": -0.6120668053627014, "step": 3960 }, { "epoch": 0.76, "learning_rate": 8.520260359259822e-07, "logits/chosen": -1.368212103843689, "logits/rejected": -0.8514496088027954, "logps/chosen": -402.638671875, "logps/rejected": -713.1438598632812, "loss": 0.1582, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16343197226524353, "rewards/margins": 0.35957595705986023, "rewards/rejected": -0.5230079293251038, "step": 3970 }, { "epoch": 0.76, "learning_rate": 8.395630582527075e-07, "logits/chosen": -1.6237843036651611, "logits/rejected": -0.8098438382148743, "logps/chosen": -454.4287109375, "logps/rejected": -873.5592041015625, "loss": 0.0917, "rewards/accuracies": 0.875, "rewards/chosen": -0.14572009444236755, "rewards/margins": 0.43538030982017517, "rewards/rejected": -0.5811004042625427, "step": 3980 }, { "epoch": 0.76, "learning_rate": 8.271734841028553e-07, "logits/chosen": -1.2719871997833252, "logits/rejected": -0.756710946559906, "logps/chosen": -433.4811096191406, "logps/rejected": -842.3396606445312, "loss": 0.147, "rewards/accuracies": 0.875, "rewards/chosen": -0.20472273230552673, "rewards/margins": 0.38545867800712585, "rewards/rejected": -0.5901814103126526, "step": 3990 }, { "epoch": 0.76, "learning_rate": 8.148578611867114e-07, "logits/chosen": -1.391553521156311, "logits/rejected": -1.0407650470733643, "logps/chosen": -421.53411865234375, "logps/rejected": -877.9796752929688, "loss": 0.1027, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19400069117546082, "rewards/margins": 0.3898494839668274, "rewards/rejected": -0.5838501453399658, "step": 4000 }, { "epoch": 0.76, "learning_rate": 8.026167339453792e-07, "logits/chosen": -1.4463930130004883, "logits/rejected": -0.7711666226387024, "logps/chosen": -580.5037841796875, "logps/rejected": -872.8985595703125, "loss": 0.1638, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2652667164802551, "rewards/margins": 0.35486751794815063, "rewards/rejected": -0.6201342344284058, "step": 4010 }, { "epoch": 0.77, "learning_rate": 7.904506435266998e-07, "logits/chosen": -1.4647196531295776, "logits/rejected": -0.9126921892166138, "logps/chosen": -465.64764404296875, "logps/rejected": -733.6016845703125, "loss": 0.0908, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17749914526939392, "rewards/margins": 0.3689937889575958, "rewards/rejected": -0.5464929342269897, "step": 4020 }, { "epoch": 0.77, "learning_rate": 7.783601277613378e-07, "logits/chosen": -1.391645073890686, "logits/rejected": -0.7750099897384644, "logps/chosen": -471.76123046875, "logps/rejected": -904.63134765625, "loss": 0.0879, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20311009883880615, "rewards/margins": 0.45500802993774414, "rewards/rejected": -0.6581181287765503, "step": 4030 }, { "epoch": 0.77, "learning_rate": 7.66345721139003e-07, "logits/chosen": -1.6218795776367188, "logits/rejected": -0.8566554188728333, "logps/chosen": -409.74554443359375, "logps/rejected": -808.111572265625, "loss": 0.1327, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15976402163505554, "rewards/margins": 0.40532198548316956, "rewards/rejected": -0.5650860071182251, "step": 4040 }, { "epoch": 0.77, "learning_rate": 7.544079547848183e-07, "logits/chosen": -1.3969422578811646, "logits/rejected": -0.883095383644104, "logps/chosen": -499.48297119140625, "logps/rejected": -921.9962158203125, "loss": 0.0878, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.21744024753570557, "rewards/margins": 0.4132118225097656, "rewards/rejected": -0.6306520700454712, "step": 4050 }, { "epoch": 0.77, "learning_rate": 7.425473564358457e-07, "logits/chosen": -1.238007664680481, "logits/rejected": -0.8165663480758667, "logps/chosen": -426.135498046875, "logps/rejected": -730.955078125, "loss": 0.1786, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16805721819400787, "rewards/margins": 0.35494813323020935, "rewards/rejected": -0.5230053663253784, "step": 4060 }, { "epoch": 0.78, "learning_rate": 7.307644504177539e-07, "logits/chosen": -1.4750516414642334, "logits/rejected": -0.8497706651687622, "logps/chosen": -467.21551513671875, "logps/rejected": -730.787841796875, "loss": 0.1782, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19409474730491638, "rewards/margins": 0.3357428014278412, "rewards/rejected": -0.5298374891281128, "step": 4070 }, { "epoch": 0.78, "learning_rate": 7.190597576216385e-07, "logits/chosen": -1.490150809288025, "logits/rejected": -1.1103893518447876, "logps/chosen": -412.3334045410156, "logps/rejected": -742.7979125976562, "loss": 0.1168, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17318478226661682, "rewards/margins": 0.3787384331226349, "rewards/rejected": -0.5519231557846069, "step": 4080 }, { "epoch": 0.78, "learning_rate": 7.074337954809945e-07, "logits/chosen": -1.3836214542388916, "logits/rejected": -0.9024607539176941, "logps/chosen": -436.68670654296875, "logps/rejected": -792.5675048828125, "loss": 0.1417, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19213493168354034, "rewards/margins": 0.3616899847984314, "rewards/rejected": -0.5538249015808105, "step": 4090 }, { "epoch": 0.78, "learning_rate": 6.958870779488447e-07, "logits/chosen": -1.5166480541229248, "logits/rejected": -0.8364191055297852, "logps/chosen": -523.4935302734375, "logps/rejected": -885.0479736328125, "loss": 0.1174, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22820933163166046, "rewards/margins": 0.3788641691207886, "rewards/rejected": -0.6070734262466431, "step": 4100 }, { "epoch": 0.78, "learning_rate": 6.844201154750176e-07, "logits/chosen": -1.3214032649993896, "logits/rejected": -0.9453784823417664, "logps/chosen": -437.3863220214844, "logps/rejected": -746.4293212890625, "loss": 0.198, "rewards/accuracies": 0.75, "rewards/chosen": -0.21481084823608398, "rewards/margins": 0.34201231598854065, "rewards/rejected": -0.556823194026947, "step": 4110 }, { "epoch": 0.78, "learning_rate": 6.730334149835788e-07, "logits/chosen": -1.3478151559829712, "logits/rejected": -0.535720944404602, "logps/chosen": -450.205078125, "logps/rejected": -761.8121337890625, "loss": 0.098, "rewards/accuracies": 0.875, "rewards/chosen": -0.19289442896842957, "rewards/margins": 0.40821319818496704, "rewards/rejected": -0.6011075973510742, "step": 4120 }, { "epoch": 0.79, "learning_rate": 6.617274798504286e-07, "logits/chosen": -1.619482398033142, "logits/rejected": -0.9454183578491211, "logps/chosen": -449.7792053222656, "logps/rejected": -817.1019897460938, "loss": 0.1318, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16370134055614471, "rewards/margins": 0.37233811616897583, "rewards/rejected": -0.5360394716262817, "step": 4130 }, { "epoch": 0.79, "learning_rate": 6.505028098810407e-07, "logits/chosen": -1.521736741065979, "logits/rejected": -0.7800843119621277, "logps/chosen": -497.5032653808594, "logps/rejected": -861.6238403320312, "loss": 0.1208, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.202129527926445, "rewards/margins": 0.40128570795059204, "rewards/rejected": -0.6034151911735535, "step": 4140 }, { "epoch": 0.79, "learning_rate": 6.393599012883709e-07, "logits/chosen": -1.374193787574768, "logits/rejected": -0.7791138887405396, "logps/chosen": -432.4004821777344, "logps/rejected": -829.2775268554688, "loss": 0.1482, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1640288531780243, "rewards/margins": 0.42727237939834595, "rewards/rejected": -0.5913012027740479, "step": 4150 }, { "epoch": 0.79, "learning_rate": 6.282992466709247e-07, "logits/chosen": -1.5286866426467896, "logits/rejected": -0.7935647964477539, "logps/chosen": -419.816162109375, "logps/rejected": -692.2298583984375, "loss": 0.1456, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19588129222393036, "rewards/margins": 0.32750824093818665, "rewards/rejected": -0.5233895182609558, "step": 4160 }, { "epoch": 0.79, "learning_rate": 6.17321334990973e-07, "logits/chosen": -1.6117595434188843, "logits/rejected": -0.9909737706184387, "logps/chosen": -394.4982604980469, "logps/rejected": -729.9796752929688, "loss": 0.1427, "rewards/accuracies": 0.875, "rewards/chosen": -0.16090139746665955, "rewards/margins": 0.351857990026474, "rewards/rejected": -0.5127593874931335, "step": 4170 }, { "epoch": 0.8, "learning_rate": 6.064266515529419e-07, "logits/chosen": -1.3305212259292603, "logits/rejected": -0.8925421833992004, "logps/chosen": -395.07757568359375, "logps/rejected": -736.6002197265625, "loss": 0.1354, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17954333126544952, "rewards/margins": 0.35855793952941895, "rewards/rejected": -0.538101315498352, "step": 4180 }, { "epoch": 0.8, "learning_rate": 5.956156779819586e-07, "logits/chosen": -1.6572856903076172, "logits/rejected": -0.9387000799179077, "logps/chosen": -408.9671630859375, "logps/rejected": -833.0213012695312, "loss": 0.0902, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15286390483379364, "rewards/margins": 0.4327394962310791, "rewards/rejected": -0.5856034755706787, "step": 4190 }, { "epoch": 0.8, "learning_rate": 5.848888922025553e-07, "logits/chosen": -1.697501540184021, "logits/rejected": -0.9727057218551636, "logps/chosen": -474.0279235839844, "logps/rejected": -866.3361206054688, "loss": 0.0948, "rewards/accuracies": 0.875, "rewards/chosen": -0.1627417355775833, "rewards/margins": 0.43979042768478394, "rewards/rejected": -0.6025322079658508, "step": 4200 }, { "epoch": 0.8, "learning_rate": 5.742467684175473e-07, "logits/chosen": -1.5084253549575806, "logits/rejected": -0.8783987760543823, "logps/chosen": -433.1919860839844, "logps/rejected": -800.1793212890625, "loss": 0.0927, "rewards/accuracies": 0.875, "rewards/chosen": -0.14110364019870758, "rewards/margins": 0.40417689085006714, "rewards/rejected": -0.5452805757522583, "step": 4210 }, { "epoch": 0.8, "learning_rate": 5.636897770870667e-07, "logits/chosen": -1.709836721420288, "logits/rejected": -0.889247715473175, "logps/chosen": -466.48138427734375, "logps/rejected": -861.8689575195312, "loss": 0.0912, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14159590005874634, "rewards/margins": 0.46425190567970276, "rewards/rejected": -0.6058478355407715, "step": 4220 }, { "epoch": 0.81, "learning_rate": 5.532183849077651e-07, "logits/chosen": -1.4882949590682983, "logits/rejected": -0.7598873376846313, "logps/chosen": -465.96044921875, "logps/rejected": -884.3361206054688, "loss": 0.1123, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16574141383171082, "rewards/margins": 0.4332696497440338, "rewards/rejected": -0.5990110635757446, "step": 4230 }, { "epoch": 0.81, "learning_rate": 5.428330547921809e-07, "logits/chosen": -1.658866286277771, "logits/rejected": -0.9485718011856079, "logps/chosen": -372.85821533203125, "logps/rejected": -769.7677001953125, "loss": 0.1153, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11111323535442352, "rewards/margins": 0.4141567349433899, "rewards/rejected": -0.5252699255943298, "step": 4240 }, { "epoch": 0.81, "learning_rate": 5.32534245848278e-07, "logits/chosen": -1.2115930318832397, "logits/rejected": -1.0179760456085205, "logps/chosen": -450.6280212402344, "logps/rejected": -879.4969482421875, "loss": 0.1049, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19777178764343262, "rewards/margins": 0.41542357206344604, "rewards/rejected": -0.6131953597068787, "step": 4250 }, { "epoch": 0.81, "learning_rate": 5.223224133591475e-07, "logits/chosen": -1.3883769512176514, "logits/rejected": -0.7162304520606995, "logps/chosen": -513.7536010742188, "logps/rejected": -814.691650390625, "loss": 0.1312, "rewards/accuracies": 0.875, "rewards/chosen": -0.2120884209871292, "rewards/margins": 0.3639156222343445, "rewards/rejected": -0.5760040283203125, "step": 4260 }, { "epoch": 0.81, "learning_rate": 5.121980087628802e-07, "logits/chosen": -1.4801558256149292, "logits/rejected": -0.8288172483444214, "logps/chosen": -525.2527465820312, "logps/rejected": -936.9075927734375, "loss": 0.0729, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19098223745822906, "rewards/margins": 0.4570366442203522, "rewards/rejected": -0.6480189561843872, "step": 4270 }, { "epoch": 0.82, "learning_rate": 5.021614796326155e-07, "logits/chosen": -1.4908511638641357, "logits/rejected": -0.7371522188186646, "logps/chosen": -500.620849609375, "logps/rejected": -914.1804809570312, "loss": 0.0795, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18082840740680695, "rewards/margins": 0.47826212644577026, "rewards/rejected": -0.6590905785560608, "step": 4280 }, { "epoch": 0.82, "learning_rate": 4.922132696567463e-07, "logits/chosen": -1.6899621486663818, "logits/rejected": -0.8043138384819031, "logps/chosen": -482.46844482421875, "logps/rejected": -821.4468994140625, "loss": 0.1242, "rewards/accuracies": 0.875, "rewards/chosen": -0.15933868288993835, "rewards/margins": 0.42340874671936035, "rewards/rejected": -0.5827474594116211, "step": 4290 }, { "epoch": 0.82, "learning_rate": 4.823538186193097e-07, "logits/chosen": -1.549431324005127, "logits/rejected": -0.7732067108154297, "logps/chosen": -473.0877380371094, "logps/rejected": -836.0955200195312, "loss": 0.1255, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20345333218574524, "rewards/margins": 0.42582082748413086, "rewards/rejected": -0.6292742490768433, "step": 4300 }, { "epoch": 0.82, "learning_rate": 4.725835623805494e-07, "logits/chosen": -1.4686667919158936, "logits/rejected": -0.9636458158493042, "logps/chosen": -393.032958984375, "logps/rejected": -674.8143310546875, "loss": 0.1521, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.14761511981487274, "rewards/margins": 0.3062286078929901, "rewards/rejected": -0.45384377241134644, "step": 4310 }, { "epoch": 0.82, "learning_rate": 4.6290293285763816e-07, "logits/chosen": -1.386617660522461, "logits/rejected": -1.0373371839523315, "logps/chosen": -459.452392578125, "logps/rejected": -740.64306640625, "loss": 0.2032, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2361104041337967, "rewards/margins": 0.2924925982952118, "rewards/rejected": -0.5286030173301697, "step": 4320 }, { "epoch": 0.82, "learning_rate": 4.533123580055909e-07, "logits/chosen": -1.3563830852508545, "logits/rejected": -0.9716880917549133, "logps/chosen": -383.36346435546875, "logps/rejected": -767.8079833984375, "loss": 0.1528, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16464294493198395, "rewards/margins": 0.37139827013015747, "rewards/rejected": -0.5360411405563354, "step": 4330 }, { "epoch": 0.83, "learning_rate": 4.438122617983442e-07, "logits/chosen": -1.3582484722137451, "logits/rejected": -0.7382173538208008, "logps/chosen": -465.4404296875, "logps/rejected": -732.1175537109375, "loss": 0.1458, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20467007160186768, "rewards/margins": 0.3647896945476532, "rewards/rejected": -0.5694597363471985, "step": 4340 }, { "epoch": 0.83, "learning_rate": 4.344030642100133e-07, "logits/chosen": -1.3873969316482544, "logits/rejected": -0.8275535702705383, "logps/chosen": -389.4620361328125, "logps/rejected": -763.936767578125, "loss": 0.1232, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12089165300130844, "rewards/margins": 0.38350221514701843, "rewards/rejected": -0.5043938755989075, "step": 4350 }, { "epoch": 0.83, "learning_rate": 4.250851811963236e-07, "logits/chosen": -1.3748615980148315, "logits/rejected": -0.7311490178108215, "logps/chosen": -455.83978271484375, "logps/rejected": -791.9259643554688, "loss": 0.1194, "rewards/accuracies": 0.875, "rewards/chosen": -0.13805212080478668, "rewards/margins": 0.41027718782424927, "rewards/rejected": -0.54832923412323, "step": 4360 }, { "epoch": 0.83, "learning_rate": 4.158590246762278e-07, "logits/chosen": -1.3977850675582886, "logits/rejected": -0.8129779696464539, "logps/chosen": -450.1375427246094, "logps/rejected": -821.4510498046875, "loss": 0.1306, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18041059374809265, "rewards/margins": 0.40551385283470154, "rewards/rejected": -0.5859244465827942, "step": 4370 }, { "epoch": 0.83, "learning_rate": 4.0672500251369204e-07, "logits/chosen": -1.2478625774383545, "logits/rejected": -0.8130487203598022, "logps/chosen": -502.88055419921875, "logps/rejected": -878.3136596679688, "loss": 0.0997, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23530392348766327, "rewards/margins": 0.4077521860599518, "rewards/rejected": -0.6430560946464539, "step": 4380 }, { "epoch": 0.84, "learning_rate": 3.976835184996644e-07, "logits/chosen": -1.3088743686676025, "logits/rejected": -0.8174660801887512, "logps/chosen": -423.864501953125, "logps/rejected": -757.5335693359375, "loss": 0.0826, "rewards/accuracies": 0.875, "rewards/chosen": -0.20466938614845276, "rewards/margins": 0.3389818072319031, "rewards/rejected": -0.5436512231826782, "step": 4390 }, { "epoch": 0.84, "learning_rate": 3.887349723342304e-07, "logits/chosen": -1.4382240772247314, "logits/rejected": -1.023268461227417, "logps/chosen": -381.9620361328125, "logps/rejected": -665.144775390625, "loss": 0.1571, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1409345120191574, "rewards/margins": 0.31821009516716003, "rewards/rejected": -0.45914459228515625, "step": 4400 }, { "epoch": 0.84, "learning_rate": 3.798797596089351e-07, "logits/chosen": -1.3674557209014893, "logits/rejected": -0.8583385348320007, "logps/chosen": -382.4273986816406, "logps/rejected": -753.339599609375, "loss": 0.1386, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16308340430259705, "rewards/margins": 0.3720029294490814, "rewards/rejected": -0.5350863337516785, "step": 4410 }, { "epoch": 0.84, "learning_rate": 3.711182717893011e-07, "logits/chosen": -1.2204480171203613, "logits/rejected": -0.9321855306625366, "logps/chosen": -465.8548889160156, "logps/rejected": -927.3399658203125, "loss": 0.1223, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21986384689807892, "rewards/margins": 0.39553216099739075, "rewards/rejected": -0.6153961420059204, "step": 4420 }, { "epoch": 0.84, "learning_rate": 3.624508961975215e-07, "logits/chosen": -1.4013726711273193, "logits/rejected": -0.9713338613510132, "logps/chosen": -393.87689208984375, "logps/rejected": -672.8309326171875, "loss": 0.1463, "rewards/accuracies": 0.75, "rewards/chosen": -0.16915014386177063, "rewards/margins": 0.3168584406375885, "rewards/rejected": -0.48600858449935913, "step": 4430 }, { "epoch": 0.85, "learning_rate": 3.538780159953348e-07, "logits/chosen": -1.3150888681411743, "logits/rejected": -0.9644551277160645, "logps/chosen": -436.5584411621094, "logps/rejected": -813.3245239257812, "loss": 0.1283, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19782361388206482, "rewards/margins": 0.3530764877796173, "rewards/rejected": -0.5509001016616821, "step": 4440 }, { "epoch": 0.85, "learning_rate": 3.454000101670901e-07, "logits/chosen": -1.491090178489685, "logits/rejected": -0.9154464602470398, "logps/chosen": -471.2315979003906, "logps/rejected": -882.4977416992188, "loss": 0.0743, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17308983206748962, "rewards/margins": 0.4131808876991272, "rewards/rejected": -0.5862706899642944, "step": 4450 }, { "epoch": 0.85, "learning_rate": 3.3701725350299143e-07, "logits/chosen": -1.5308401584625244, "logits/rejected": -1.2103668451309204, "logps/chosen": -511.11285400390625, "logps/rejected": -838.2838745117188, "loss": 0.1267, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2404261827468872, "rewards/margins": 0.3354942798614502, "rewards/rejected": -0.5759204626083374, "step": 4460 }, { "epoch": 0.85, "learning_rate": 3.2873011658252796e-07, "logits/chosen": -1.4060428142547607, "logits/rejected": -0.8161015510559082, "logps/chosen": -416.7906799316406, "logps/rejected": -783.955078125, "loss": 0.1187, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1477649211883545, "rewards/margins": 0.391218364238739, "rewards/rejected": -0.5389832854270935, "step": 4470 }, { "epoch": 0.85, "learning_rate": 3.2053896575809426e-07, "logits/chosen": -1.3495150804519653, "logits/rejected": -0.8987475633621216, "logps/chosen": -448.47674560546875, "logps/rejected": -756.5149536132812, "loss": 0.0944, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16199520230293274, "rewards/margins": 0.3771488070487976, "rewards/rejected": -0.5391440391540527, "step": 4480 }, { "epoch": 0.86, "learning_rate": 3.124441631387931e-07, "logits/chosen": -1.2932629585266113, "logits/rejected": -0.6764540672302246, "logps/chosen": -404.88018798828125, "logps/rejected": -753.2144775390625, "loss": 0.1476, "rewards/accuracies": 0.875, "rewards/chosen": -0.1409321427345276, "rewards/margins": 0.37771934270858765, "rewards/rejected": -0.51865154504776, "step": 4490 }, { "epoch": 0.86, "learning_rate": 3.044460665744284e-07, "logits/chosen": -1.3475325107574463, "logits/rejected": -0.8611810803413391, "logps/chosen": -400.4355163574219, "logps/rejected": -813.8939819335938, "loss": 0.1124, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14284108579158783, "rewards/margins": 0.4387703835964203, "rewards/rejected": -0.5816114544868469, "step": 4500 }, { "epoch": 0.86, "learning_rate": 2.9654502963968575e-07, "logits/chosen": -1.3193926811218262, "logits/rejected": -0.7504812479019165, "logps/chosen": -419.36614990234375, "logps/rejected": -723.6893310546875, "loss": 0.1229, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1548554003238678, "rewards/margins": 0.36528313159942627, "rewards/rejected": -0.5201385617256165, "step": 4510 }, { "epoch": 0.86, "learning_rate": 2.8874140161849915e-07, "logits/chosen": -1.4137651920318604, "logits/rejected": -0.8418663740158081, "logps/chosen": -461.66387939453125, "logps/rejected": -877.03173828125, "loss": 0.1047, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.17651605606079102, "rewards/margins": 0.4517923891544342, "rewards/rejected": -0.6283084750175476, "step": 4520 }, { "epoch": 0.86, "learning_rate": 2.810355274886148e-07, "logits/chosen": -1.5542514324188232, "logits/rejected": -0.8681430816650391, "logps/chosen": -431.9638671875, "logps/rejected": -669.0515747070312, "loss": 0.2253, "rewards/accuracies": 0.75, "rewards/chosen": -0.16327472031116486, "rewards/margins": 0.3313491940498352, "rewards/rejected": -0.49462389945983887, "step": 4530 }, { "epoch": 0.86, "learning_rate": 2.7342774790633686e-07, "logits/chosen": -1.4139035940170288, "logits/rejected": -0.7520237565040588, "logps/chosen": -409.57757568359375, "logps/rejected": -730.0147094726562, "loss": 0.1311, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1794741153717041, "rewards/margins": 0.36604803800582886, "rewards/rejected": -0.5455222129821777, "step": 4540 }, { "epoch": 0.87, "learning_rate": 2.6591839919146963e-07, "logits/chosen": -1.6184698343276978, "logits/rejected": -0.9271749258041382, "logps/chosen": -429.667724609375, "logps/rejected": -801.033203125, "loss": 0.1088, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.157302588224411, "rewards/margins": 0.4193267226219177, "rewards/rejected": -0.5766293406486511, "step": 4550 }, { "epoch": 0.87, "learning_rate": 2.58507813312448e-07, "logits/chosen": -1.7002979516983032, "logits/rejected": -0.8093093633651733, "logps/chosen": -564.6036376953125, "logps/rejected": -899.65478515625, "loss": 0.1012, "rewards/accuracies": 0.875, "rewards/chosen": -0.19250407814979553, "rewards/margins": 0.43440237641334534, "rewards/rejected": -0.6269065141677856, "step": 4560 }, { "epoch": 0.87, "learning_rate": 2.511963178716648e-07, "logits/chosen": -1.4212658405303955, "logits/rejected": -0.8180297613143921, "logps/chosen": -479.9620056152344, "logps/rejected": -847.6258544921875, "loss": 0.1247, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19775906205177307, "rewards/margins": 0.3775900900363922, "rewards/rejected": -0.5753491520881653, "step": 4570 }, { "epoch": 0.87, "learning_rate": 2.439842360909864e-07, "logits/chosen": -1.4214054346084595, "logits/rejected": -0.9303551912307739, "logps/chosen": -380.6754150390625, "logps/rejected": -777.2127075195312, "loss": 0.0936, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13218122720718384, "rewards/margins": 0.40698352456092834, "rewards/rejected": -0.5391647219657898, "step": 4580 }, { "epoch": 0.87, "learning_rate": 2.3687188679746314e-07, "logits/chosen": -1.7164958715438843, "logits/rejected": -0.9716140031814575, "logps/chosen": -446.3602600097656, "logps/rejected": -821.6305541992188, "loss": 0.1009, "rewards/accuracies": 0.875, "rewards/chosen": -0.16869202256202698, "rewards/margins": 0.42478853464126587, "rewards/rejected": -0.5934805870056152, "step": 4590 }, { "epoch": 0.88, "learning_rate": 2.2985958440923772e-07, "logits/chosen": -1.152823805809021, "logits/rejected": -0.765326976776123, "logps/chosen": -458.0760192871094, "logps/rejected": -875.1578979492188, "loss": 0.0814, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20262520015239716, "rewards/margins": 0.3884681165218353, "rewards/rejected": -0.5910933017730713, "step": 4600 }, { "epoch": 0.88, "learning_rate": 2.2294763892164284e-07, "logits/chosen": -1.1877996921539307, "logits/rejected": -0.7322112321853638, "logps/chosen": -459.49981689453125, "logps/rejected": -807.7608642578125, "loss": 0.1603, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21152301132678986, "rewards/margins": 0.3710188865661621, "rewards/rejected": -0.5825419425964355, "step": 4610 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.153120756149292, "logits/rejected": -0.6857002973556519, "logps/chosen": -417.62103271484375, "logps/rejected": -697.7780151367188, "loss": 0.1322, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18486586213111877, "rewards/margins": 0.3198961913585663, "rewards/rejected": -0.5047620534896851, "step": 4620 }, { "epoch": 0.88, "learning_rate": 2.094260364336026e-07, "logits/chosen": -1.3663113117218018, "logits/rejected": -0.9139379262924194, "logps/chosen": -505.7391662597656, "logps/rejected": -849.6533203125, "loss": 0.1105, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19234949350357056, "rewards/margins": 0.43087753653526306, "rewards/rejected": -0.6232270002365112, "step": 4630 }, { "epoch": 0.88, "learning_rate": 2.0281697718742333e-07, "logits/chosen": -1.5367798805236816, "logits/rejected": -0.8568869829177856, "logps/chosen": -487.6544494628906, "logps/rejected": -815.6954956054688, "loss": 0.0735, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17859289050102234, "rewards/margins": 0.4275876581668854, "rewards/rejected": -0.6061805486679077, "step": 4640 }, { "epoch": 0.89, "learning_rate": 1.9630947032398068e-07, "logits/chosen": -1.4071314334869385, "logits/rejected": -0.9722201228141785, "logps/chosen": -504.32421875, "logps/rejected": -812.99365234375, "loss": 0.1356, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1926029622554779, "rewards/margins": 0.3552549183368683, "rewards/rejected": -0.547857940196991, "step": 4650 }, { "epoch": 0.89, "learning_rate": 1.899038035229342e-07, "logits/chosen": -1.448122262954712, "logits/rejected": -0.6743729710578918, "logps/chosen": -499.5284118652344, "logps/rejected": -911.1188354492188, "loss": 0.0833, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.18546457588672638, "rewards/margins": 0.43846940994262695, "rewards/rejected": -0.6239340305328369, "step": 4660 }, { "epoch": 0.89, "learning_rate": 1.8360025996186138e-07, "logits/chosen": -1.5241035223007202, "logits/rejected": -0.9024316668510437, "logps/chosen": -469.5726013183594, "logps/rejected": -828.2664794921875, "loss": 0.1317, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17633484303951263, "rewards/margins": 0.4017368257045746, "rewards/rejected": -0.5780717134475708, "step": 4670 }, { "epoch": 0.89, "learning_rate": 1.7739911830374352e-07, "logits/chosen": -1.3173071146011353, "logits/rejected": -1.0588247776031494, "logps/chosen": -465.42083740234375, "logps/rejected": -809.115234375, "loss": 0.1688, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20326296985149384, "rewards/margins": 0.36358997225761414, "rewards/rejected": -0.5668529272079468, "step": 4680 }, { "epoch": 0.89, "learning_rate": 1.713006526846439e-07, "logits/chosen": -1.3476272821426392, "logits/rejected": -0.8063791394233704, "logps/chosen": -416.9029846191406, "logps/rejected": -692.3648071289062, "loss": 0.1977, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15405414998531342, "rewards/margins": 0.3257124423980713, "rewards/rejected": -0.4797666072845459, "step": 4690 }, { "epoch": 0.9, "learning_rate": 1.6530513270159116e-07, "logits/chosen": -1.3068596124649048, "logits/rejected": -0.8614629507064819, "logps/chosen": -427.1305236816406, "logps/rejected": -765.303955078125, "loss": 0.1312, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20539817214012146, "rewards/margins": 0.36558598279953003, "rewards/rejected": -0.5709841847419739, "step": 4700 }, { "epoch": 0.9, "learning_rate": 1.59412823400657e-07, "logits/chosen": -1.3714582920074463, "logits/rejected": -0.9266483187675476, "logps/chosen": -459.03741455078125, "logps/rejected": -855.5986328125, "loss": 0.1438, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2372446060180664, "rewards/margins": 0.3688667416572571, "rewards/rejected": -0.6061113476753235, "step": 4710 }, { "epoch": 0.9, "learning_rate": 1.5362398526524463e-07, "logits/chosen": -1.4393783807754517, "logits/rejected": -0.9933551549911499, "logps/chosen": -433.999755859375, "logps/rejected": -725.7553100585938, "loss": 0.1554, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1959311068058014, "rewards/margins": 0.35494834184646606, "rewards/rejected": -0.5508793592453003, "step": 4720 }, { "epoch": 0.9, "learning_rate": 1.4793887420457008e-07, "logits/chosen": -1.3897453546524048, "logits/rejected": -0.6994045972824097, "logps/chosen": -498.5926818847656, "logps/rejected": -937.7908325195312, "loss": 0.064, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19319121539592743, "rewards/margins": 0.4900970458984375, "rewards/rejected": -0.6832882165908813, "step": 4730 }, { "epoch": 0.9, "learning_rate": 1.4235774154234855e-07, "logits/chosen": -1.421291470527649, "logits/rejected": -0.9582284092903137, "logps/chosen": -426.8997497558594, "logps/rejected": -782.2342529296875, "loss": 0.1265, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2021675556898117, "rewards/margins": 0.3871116638183594, "rewards/rejected": -0.589279294013977, "step": 4740 }, { "epoch": 0.9, "learning_rate": 1.368808340056879e-07, "logits/chosen": -1.4983841180801392, "logits/rejected": -1.023857831954956, "logps/chosen": -433.0885314941406, "logps/rejected": -726.4335327148438, "loss": 0.1339, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14796069264411926, "rewards/margins": 0.3529846966266632, "rewards/rejected": -0.5009453892707825, "step": 4750 }, { "epoch": 0.91, "learning_rate": 1.31508393714177e-07, "logits/chosen": -1.455814003944397, "logits/rejected": -0.792477011680603, "logps/chosen": -533.9439697265625, "logps/rejected": -792.9277954101562, "loss": 0.1262, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19613765180110931, "rewards/margins": 0.3652915060520172, "rewards/rejected": -0.5614292025566101, "step": 4760 }, { "epoch": 0.91, "learning_rate": 1.2624065816918414e-07, "logits/chosen": -1.1501885652542114, "logits/rejected": -0.9396678805351257, "logps/chosen": -369.74554443359375, "logps/rejected": -856.7259521484375, "loss": 0.0888, "rewards/accuracies": 0.875, "rewards/chosen": -0.15025801956653595, "rewards/margins": 0.4369124472141266, "rewards/rejected": -0.587170422077179, "step": 4770 }, { "epoch": 0.91, "learning_rate": 1.210778602433596e-07, "logits/chosen": -1.4099371433258057, "logits/rejected": -0.7466511726379395, "logps/chosen": -525.0029296875, "logps/rejected": -824.8199462890625, "loss": 0.1365, "rewards/accuracies": 0.875, "rewards/chosen": -0.19673535227775574, "rewards/margins": 0.4014459550380707, "rewards/rejected": -0.5981813669204712, "step": 4780 }, { "epoch": 0.91, "learning_rate": 1.1602022817033709e-07, "logits/chosen": -1.4918625354766846, "logits/rejected": -0.8371130228042603, "logps/chosen": -502.255859375, "logps/rejected": -801.0471801757812, "loss": 0.1564, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2125789374113083, "rewards/margins": 0.3338918089866638, "rewards/rejected": -0.5464707612991333, "step": 4790 }, { "epoch": 0.91, "learning_rate": 1.1106798553464804e-07, "logits/chosen": -1.6684515476226807, "logits/rejected": -0.8784621953964233, "logps/chosen": -443.15673828125, "logps/rejected": -848.1588745117188, "loss": 0.0931, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17932195961475372, "rewards/margins": 0.4225996434688568, "rewards/rejected": -0.6019216179847717, "step": 4800 }, { "epoch": 0.92, "learning_rate": 1.0622135126183514e-07, "logits/chosen": -1.196455478668213, "logits/rejected": -0.9832308888435364, "logps/chosen": -426.50213623046875, "logps/rejected": -813.4904174804688, "loss": 0.1476, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19113151729106903, "rewards/margins": 0.3521652817726135, "rewards/rejected": -0.5432968139648438, "step": 4810 }, { "epoch": 0.92, "learning_rate": 1.0148053960877396e-07, "logits/chosen": -1.4141533374786377, "logits/rejected": -0.9519413113594055, "logps/chosen": -483.4219665527344, "logps/rejected": -789.8177490234375, "loss": 0.0892, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1719743311405182, "rewards/margins": 0.3786942660808563, "rewards/rejected": -0.5506685972213745, "step": 4820 }, { "epoch": 0.92, "learning_rate": 9.684576015420277e-08, "logits/chosen": -1.3775125741958618, "logits/rejected": -0.8111193776130676, "logps/chosen": -493.9146423339844, "logps/rejected": -829.2967529296875, "loss": 0.1279, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2263471633195877, "rewards/margins": 0.38264352083206177, "rewards/rejected": -0.6089906096458435, "step": 4830 }, { "epoch": 0.92, "learning_rate": 9.23172177894574e-08, "logits/chosen": -1.4640599489212036, "logits/rejected": -0.8311370611190796, "logps/chosen": -520.7279052734375, "logps/rejected": -880.76904296875, "loss": 0.1237, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2344060242176056, "rewards/margins": 0.37462249398231506, "rewards/rejected": -0.6090285181999207, "step": 4840 }, { "epoch": 0.92, "learning_rate": 8.78951127094127e-08, "logits/chosen": -1.6641457080841064, "logits/rejected": -0.9889407157897949, "logps/chosen": -523.6626586914062, "logps/rejected": -852.4313354492188, "loss": 0.1384, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20114929974079132, "rewards/margins": 0.3651053011417389, "rewards/rejected": -0.5662545561790466, "step": 4850 }, { "epoch": 0.93, "learning_rate": 8.357964040363209e-08, "logits/chosen": -1.3955992460250854, "logits/rejected": -0.8952843546867371, "logps/chosen": -484.54644775390625, "logps/rejected": -893.9944458007812, "loss": 0.1004, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18811412155628204, "rewards/margins": 0.4302886128425598, "rewards/rejected": -0.6184026598930359, "step": 4860 }, { "epoch": 0.93, "learning_rate": 7.937099164772699e-08, "logits/chosen": -1.3297057151794434, "logits/rejected": -0.7479932308197021, "logps/chosen": -449.45147705078125, "logps/rejected": -788.517578125, "loss": 0.113, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19342221319675446, "rewards/margins": 0.3470451831817627, "rewards/rejected": -0.540467381477356, "step": 4870 }, { "epoch": 0.93, "learning_rate": 7.526935249492245e-08, "logits/chosen": -1.5318547487258911, "logits/rejected": -0.9399830102920532, "logps/chosen": -473.74224853515625, "logps/rejected": -750.028564453125, "loss": 0.1155, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20122995972633362, "rewards/margins": 0.33083269000053406, "rewards/rejected": -0.5320626497268677, "step": 4880 }, { "epoch": 0.93, "learning_rate": 7.127490426783124e-08, "logits/chosen": -1.4715876579284668, "logits/rejected": -0.7455822825431824, "logps/chosen": -502.35235595703125, "logps/rejected": -776.6228637695312, "loss": 0.1745, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20015093684196472, "rewards/margins": 0.34923356771469116, "rewards/rejected": -0.5493844747543335, "step": 4890 }, { "epoch": 0.93, "learning_rate": 6.738782355044048e-08, "logits/chosen": -1.200661063194275, "logits/rejected": -0.9478243589401245, "logps/chosen": -425.23931884765625, "logps/rejected": -743.1181030273438, "loss": 0.1932, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23534779250621796, "rewards/margins": 0.30353227257728577, "rewards/rejected": -0.5388800501823425, "step": 4900 }, { "epoch": 0.94, "learning_rate": 6.360828218030191e-08, "logits/chosen": -1.2701832056045532, "logits/rejected": -0.7628316879272461, "logps/chosen": -370.7064514160156, "logps/rejected": -716.0216674804688, "loss": 0.1292, "rewards/accuracies": 0.75, "rewards/chosen": -0.15757031738758087, "rewards/margins": 0.39811402559280396, "rewards/rejected": -0.5556842684745789, "step": 4910 }, { "epoch": 0.94, "learning_rate": 5.993644724093889e-08, "logits/chosen": -1.255086898803711, "logits/rejected": -1.0340642929077148, "logps/chosen": -465.6233825683594, "logps/rejected": -842.9586791992188, "loss": 0.1625, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24477574229240417, "rewards/margins": 0.33079880475997925, "rewards/rejected": -0.5755745768547058, "step": 4920 }, { "epoch": 0.94, "learning_rate": 5.637248105445775e-08, "logits/chosen": -1.675337553024292, "logits/rejected": -0.9043560028076172, "logps/chosen": -400.72821044921875, "logps/rejected": -750.5416259765625, "loss": 0.1298, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15603816509246826, "rewards/margins": 0.3755912780761719, "rewards/rejected": -0.5316294431686401, "step": 4930 }, { "epoch": 0.94, "learning_rate": 5.291654117437262e-08, "logits/chosen": -1.4090471267700195, "logits/rejected": -0.9016807675361633, "logps/chosen": -502.94073486328125, "logps/rejected": -859.86328125, "loss": 0.1331, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18255341053009033, "rewards/margins": 0.3954446017742157, "rewards/rejected": -0.5779979825019836, "step": 4940 }, { "epoch": 0.94, "learning_rate": 4.956878037864044e-08, "logits/chosen": -1.1109745502471924, "logits/rejected": -0.7918599843978882, "logps/chosen": -482.2723693847656, "logps/rejected": -851.2086181640625, "loss": 0.1092, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21681293845176697, "rewards/margins": 0.39912086725234985, "rewards/rejected": -0.6159338355064392, "step": 4950 }, { "epoch": 0.94, "learning_rate": 4.632934666290778e-08, "logits/chosen": -1.373295545578003, "logits/rejected": -0.9285712242126465, "logps/chosen": -463.3414001464844, "logps/rejected": -845.3052978515625, "loss": 0.1073, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18632186949253082, "rewards/margins": 0.41335123777389526, "rewards/rejected": -0.5996731519699097, "step": 4960 }, { "epoch": 0.95, "learning_rate": 4.319838323396691e-08, "logits/chosen": -1.432554841041565, "logits/rejected": -0.7453271150588989, "logps/chosen": -502.40679931640625, "logps/rejected": -861.9630737304688, "loss": 0.0935, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16671831905841827, "rewards/margins": 0.4450034201145172, "rewards/rejected": -0.6117216944694519, "step": 4970 }, { "epoch": 0.95, "learning_rate": 4.017602850342584e-08, "logits/chosen": -1.4223308563232422, "logits/rejected": -0.6028486490249634, "logps/chosen": -599.6661987304688, "logps/rejected": -825.7252197265625, "loss": 0.1397, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23755793273448944, "rewards/margins": 0.3622376322746277, "rewards/rejected": -0.5997955203056335, "step": 4980 }, { "epoch": 0.95, "learning_rate": 3.7262416081589866e-08, "logits/chosen": -1.5748474597930908, "logits/rejected": -0.9502396583557129, "logps/chosen": -500.4580993652344, "logps/rejected": -839.21337890625, "loss": 0.1541, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22722406685352325, "rewards/margins": 0.3656982481479645, "rewards/rejected": -0.5929223299026489, "step": 4990 }, { "epoch": 0.95, "learning_rate": 3.445767477155443e-08, "logits/chosen": -1.2585406303405762, "logits/rejected": -1.0101072788238525, "logps/chosen": -390.9880676269531, "logps/rejected": -817.3690185546875, "loss": 0.1239, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15722188353538513, "rewards/margins": 0.38227564096450806, "rewards/rejected": -0.5394975543022156, "step": 5000 }, { "epoch": 0.95, "learning_rate": 3.1761928563510956e-08, "logits/chosen": -1.2074508666992188, "logits/rejected": -0.9428434371948242, "logps/chosen": -454.01983642578125, "logps/rejected": -776.2493286132812, "loss": 0.1456, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19975072145462036, "rewards/margins": 0.35644984245300293, "rewards/rejected": -0.5562005639076233, "step": 5010 }, { "epoch": 0.96, "learning_rate": 2.917529662926549e-08, "logits/chosen": -1.3608087301254272, "logits/rejected": -0.8991681337356567, "logps/chosen": -417.16668701171875, "logps/rejected": -666.2408447265625, "loss": 0.2158, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2125294953584671, "rewards/margins": 0.29053133726119995, "rewards/rejected": -0.5030608177185059, "step": 5020 }, { "epoch": 0.96, "learning_rate": 2.669789331697148e-08, "logits/chosen": -1.3365575075149536, "logits/rejected": -0.9594219326972961, "logps/chosen": -447.53741455078125, "logps/rejected": -783.70556640625, "loss": 0.1886, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18960712850093842, "rewards/margins": 0.33845216035842896, "rewards/rejected": -0.5280593037605286, "step": 5030 }, { "epoch": 0.96, "learning_rate": 2.4329828146074096e-08, "logits/chosen": -1.3161144256591797, "logits/rejected": -0.6097744703292847, "logps/chosen": -506.3562927246094, "logps/rejected": -829.1726684570312, "loss": 0.1065, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1814567595720291, "rewards/margins": 0.40894073247909546, "rewards/rejected": -0.5903975367546082, "step": 5040 }, { "epoch": 0.96, "learning_rate": 2.20712058024683e-08, "logits/chosen": -1.2939664125442505, "logits/rejected": -0.7289565205574036, "logps/chosen": -443.138671875, "logps/rejected": -803.8553466796875, "loss": 0.1043, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16932852566242218, "rewards/margins": 0.38964149355888367, "rewards/rejected": -0.558970034122467, "step": 5050 }, { "epoch": 0.96, "learning_rate": 1.9922126133870568e-08, "logits/chosen": -1.326685905456543, "logits/rejected": -0.7256841063499451, "logps/chosen": -496.09930419921875, "logps/rejected": -816.048095703125, "loss": 0.126, "rewards/accuracies": 0.875, "rewards/chosen": -0.19692137837409973, "rewards/margins": 0.4010583460330963, "rewards/rejected": -0.5979796648025513, "step": 5060 }, { "epoch": 0.97, "learning_rate": 1.7882684145406616e-08, "logits/chosen": -1.3308762311935425, "logits/rejected": -0.8497464060783386, "logps/chosen": -410.64923095703125, "logps/rejected": -770.8076171875, "loss": 0.1206, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19414843618869781, "rewards/margins": 0.37417155504226685, "rewards/rejected": -0.5683199763298035, "step": 5070 }, { "epoch": 0.97, "learning_rate": 1.595296999541057e-08, "logits/chosen": -1.542405366897583, "logits/rejected": -1.019696593284607, "logps/chosen": -443.45343017578125, "logps/rejected": -756.2444458007812, "loss": 0.1761, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20788124203681946, "rewards/margins": 0.33360883593559265, "rewards/rejected": -0.5414900779724121, "step": 5080 }, { "epoch": 0.97, "learning_rate": 1.4133068991437903e-08, "logits/chosen": -1.218247652053833, "logits/rejected": -0.7920243740081787, "logps/chosen": -437.85516357421875, "logps/rejected": -781.8002319335938, "loss": 0.173, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18266211450099945, "rewards/margins": 0.35743165016174316, "rewards/rejected": -0.5400937795639038, "step": 5090 }, { "epoch": 0.97, "learning_rate": 1.2423061586496476e-08, "logits/chosen": -1.7509443759918213, "logits/rejected": -1.0197384357452393, "logps/chosen": -436.962890625, "logps/rejected": -770.4503784179688, "loss": 0.0864, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13664960861206055, "rewards/margins": 0.4247468411922455, "rewards/rejected": -0.5613964200019836, "step": 5100 }, { "epoch": 0.97, "learning_rate": 1.0823023375489128e-08, "logits/chosen": -1.427309513092041, "logits/rejected": -1.1378382444381714, "logps/chosen": -444.826171875, "logps/rejected": -750.93408203125, "loss": 0.1514, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15703561902046204, "rewards/margins": 0.33195358514785767, "rewards/rejected": -0.4889891743659973, "step": 5110 }, { "epoch": 0.98, "learning_rate": 9.333025091870507e-09, "logits/chosen": -1.4458156824111938, "logits/rejected": -1.017441749572754, "logps/chosen": -494.0660095214844, "logps/rejected": -810.0037841796875, "loss": 0.1606, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22721855342388153, "rewards/margins": 0.32751986384391785, "rewards/rejected": -0.554738461971283, "step": 5120 }, { "epoch": 0.98, "learning_rate": 7.95313260452263e-09, "logits/chosen": -1.3676284551620483, "logits/rejected": -0.8140841722488403, "logps/chosen": -473.8611755371094, "logps/rejected": -841.09375, "loss": 0.1369, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20075416564941406, "rewards/margins": 0.3758813738822937, "rewards/rejected": -0.576635479927063, "step": 5130 }, { "epoch": 0.98, "learning_rate": 6.683406914840818e-09, "logits/chosen": -1.156714916229248, "logits/rejected": -0.8874901533126831, "logps/chosen": -370.44805908203125, "logps/rejected": -785.7064819335938, "loss": 0.1292, "rewards/accuracies": 0.875, "rewards/chosen": -0.168834388256073, "rewards/margins": 0.400814950466156, "rewards/rejected": -0.569649338722229, "step": 5140 }, { "epoch": 0.98, "learning_rate": 5.523904154037529e-09, "logits/chosen": -1.6311709880828857, "logits/rejected": -1.0963709354400635, "logps/chosen": -480.2413635253906, "logps/rejected": -966.0230712890625, "loss": 0.0733, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18045267462730408, "rewards/margins": 0.48554015159606934, "rewards/rejected": -0.6659928560256958, "step": 5150 }, { "epoch": 0.98, "learning_rate": 4.474675580662113e-09, "logits/chosen": -1.5690613985061646, "logits/rejected": -0.7699793577194214, "logps/chosen": -451.502197265625, "logps/rejected": -831.6986083984375, "loss": 0.1035, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17425307631492615, "rewards/margins": 0.4356904923915863, "rewards/rejected": -0.6099435091018677, "step": 5160 }, { "epoch": 0.98, "learning_rate": 3.5357675783331823e-09, "logits/chosen": -1.4267027378082275, "logits/rejected": -0.8987632989883423, "logps/chosen": -465.96588134765625, "logps/rejected": -779.2655029296875, "loss": 0.1278, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20712223649024963, "rewards/margins": 0.3628089129924774, "rewards/rejected": -0.569931149482727, "step": 5170 }, { "epoch": 0.99, "learning_rate": 2.7072216536885855e-09, "logits/chosen": -1.3238601684570312, "logits/rejected": -1.0434250831604004, "logps/chosen": -418.7799377441406, "logps/rejected": -822.6767578125, "loss": 0.1146, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21473300457000732, "rewards/margins": 0.364638090133667, "rewards/rejected": -0.5793711543083191, "step": 5180 }, { "epoch": 0.99, "learning_rate": 1.989074434551874e-09, "logits/chosen": -1.3712234497070312, "logits/rejected": -0.7260132431983948, "logps/chosen": -471.7567443847656, "logps/rejected": -767.3343505859375, "loss": 0.1612, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19968119263648987, "rewards/margins": 0.3473806381225586, "rewards/rejected": -0.5470618009567261, "step": 5190 }, { "epoch": 0.99, "learning_rate": 1.3813576683111007e-09, "logits/chosen": -1.2893695831298828, "logits/rejected": -0.8476071357727051, "logps/chosen": -434.0755920410156, "logps/rejected": -796.0404663085938, "loss": 0.1489, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18195714056491852, "rewards/margins": 0.38474491238594055, "rewards/rejected": -0.5667020082473755, "step": 5200 }, { "epoch": 0.99, "learning_rate": 8.840982205160498e-10, "logits/chosen": -1.480252981185913, "logits/rejected": -0.7479499578475952, "logps/chosen": -519.5296630859375, "logps/rejected": -808.7293090820312, "loss": 0.127, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19645431637763977, "rewards/margins": 0.3963874578475952, "rewards/rejected": -0.5928417444229126, "step": 5210 }, { "epoch": 0.99, "learning_rate": 4.973180736911332e-10, "logits/chosen": -1.4346215724945068, "logits/rejected": -0.7164343595504761, "logps/chosen": -503.864990234375, "logps/rejected": -758.35302734375, "loss": 0.0973, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18495044112205505, "rewards/margins": 0.3679296374320984, "rewards/rejected": -0.5528801083564758, "step": 5220 }, { "epoch": 1.0, "learning_rate": 2.2103432636366718e-10, "logits/chosen": -1.4827239513397217, "logits/rejected": -0.9676238894462585, "logps/chosen": -490.35986328125, "logps/rejected": -746.24462890625, "loss": 0.17, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1694318950176239, "rewards/margins": 0.3603275716304779, "rewards/rejected": -0.529759407043457, "step": 5230 }, { "epoch": 1.0, "learning_rate": 5.525919230670029e-11, "logits/chosen": -1.542119026184082, "logits/rejected": -0.8365074396133423, "logps/chosen": -474.637939453125, "logps/rejected": -848.3474731445312, "loss": 0.1145, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21273811161518097, "rewards/margins": 0.4206463396549225, "rewards/rejected": -0.6333843469619751, "step": 5240 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -1.372284173965454, "logits/rejected": -0.9636751413345337, "logps/chosen": -452.578369140625, "logps/rejected": -803.23486328125, "loss": 0.0984, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17300735414028168, "rewards/margins": 0.3813256323337555, "rewards/rejected": -0.554332971572876, "step": 5250 }, { "epoch": 1.0, "step": 5250, "total_flos": 0.0, "train_loss": 0.13633786882105328, "train_runtime": 22084.2637, "train_samples_per_second": 0.951, "train_steps_per_second": 0.238 } ], "logging_steps": 10, "max_steps": 5250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }