{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 2907, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.7182130584192438e-09, "logits/chosen": -1.8825098276138306, "logits/rejected": -1.6692813634872437, "logps/chosen": -107.98798370361328, "logps/rejected": -99.48463439941406, "loss": 1.0, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.718213058419244e-08, "logits/chosen": -1.912903904914856, "logits/rejected": -1.679023265838623, "logps/chosen": -232.9512481689453, "logps/rejected": -205.12588500976562, "loss": 1.0008, "rewards/accuracies": 0.4861111044883728, "rewards/chosen": 0.004386746324598789, "rewards/margins": 0.0018502763705328107, "rewards/rejected": 0.0025364691391587257, "step": 10 }, { "epoch": 0.02, "learning_rate": 3.436426116838488e-08, "logits/chosen": -2.007096529006958, "logits/rejected": -1.9176105260849, "logps/chosen": -270.76263427734375, "logps/rejected": -241.11474609375, "loss": 0.9998, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.013296608813107014, "rewards/margins": 0.011036807671189308, "rewards/rejected": 0.0022598025389015675, "step": 20 }, { "epoch": 0.03, "learning_rate": 5.154639175257731e-08, "logits/chosen": -1.996591329574585, "logits/rejected": -1.9179508686065674, "logps/chosen": -264.7063903808594, "logps/rejected": -223.7097930908203, "loss": 1.0021, "rewards/accuracies": 0.5, "rewards/chosen": -0.0007220959523692727, "rewards/margins": 0.004284311085939407, "rewards/rejected": -0.0050064073875546455, "step": 30 }, { "epoch": 0.04, "learning_rate": 6.872852233676976e-08, "logits/chosen": -2.0471560955047607, "logits/rejected": -1.9881904125213623, "logps/chosen": -309.6553955078125, "logps/rejected": -272.2703552246094, "loss": 0.9983, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": 0.0036458049435168505, "rewards/margins": 0.0004331391828600317, "rewards/rejected": 0.003212666604667902, "step": 40 }, { "epoch": 0.05, "learning_rate": 8.59106529209622e-08, "logits/chosen": -1.822840929031372, "logits/rejected": -1.8331642150878906, "logps/chosen": -333.86126708984375, "logps/rejected": -206.30355834960938, "loss": 1.0007, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.000272188161034137, "rewards/margins": 0.007083783857524395, "rewards/rejected": -0.006811595521867275, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.0309278350515462e-07, "logits/chosen": -1.7849409580230713, "logits/rejected": -1.8800642490386963, "logps/chosen": -249.81332397460938, "logps/rejected": -225.6211395263672, "loss": 1.0016, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.005839993245899677, "rewards/margins": -0.003313907189294696, "rewards/rejected": -0.0025260853581130505, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.202749140893471e-07, "logits/chosen": -1.9829210042953491, "logits/rejected": -1.818284034729004, "logps/chosen": -328.28521728515625, "logps/rejected": -236.7754364013672, "loss": 0.9958, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.002883409382775426, "rewards/margins": 0.003594732377678156, "rewards/rejected": -0.0007113233441486955, "step": 70 }, { "epoch": 0.08, "learning_rate": 1.3745704467353952e-07, "logits/chosen": -1.973283052444458, "logits/rejected": -1.9369332790374756, "logps/chosen": -258.3551940917969, "logps/rejected": -212.8301544189453, "loss": 0.9948, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.0014923412818461657, "rewards/margins": 0.004948171321302652, "rewards/rejected": -0.0034558300394564867, "step": 80 }, { "epoch": 0.09, "learning_rate": 1.5463917525773197e-07, "logits/chosen": -1.9635775089263916, "logits/rejected": -1.8522634506225586, "logps/chosen": -256.3603820800781, "logps/rejected": -208.6153106689453, "loss": 0.9963, "rewards/accuracies": 0.5625, "rewards/chosen": -0.003207797883078456, "rewards/margins": 0.00904668215662241, "rewards/rejected": -0.01225447840988636, "step": 90 }, { "epoch": 0.1, "learning_rate": 1.718213058419244e-07, "logits/chosen": -1.7430416345596313, "logits/rejected": -1.8678181171417236, "logps/chosen": -271.9082946777344, "logps/rejected": -195.18521118164062, "loss": 0.9982, "rewards/accuracies": 0.5, "rewards/chosen": 0.00269494135864079, "rewards/margins": 0.0009464768809266388, "rewards/rejected": 0.0017484650015830994, "step": 100 }, { "epoch": 0.11, "learning_rate": 1.8900343642611682e-07, "logits/chosen": -1.938391089439392, "logits/rejected": -1.8031425476074219, "logps/chosen": -250.1339874267578, "logps/rejected": -235.4950714111328, "loss": 0.9934, "rewards/accuracies": 0.5625, "rewards/chosen": 0.004927259869873524, "rewards/margins": 0.009144905023276806, "rewards/rejected": -0.004217647016048431, "step": 110 }, { "epoch": 0.12, "learning_rate": 2.0618556701030925e-07, "logits/chosen": -1.836554765701294, "logits/rejected": -1.8783756494522095, "logps/chosen": -319.69873046875, "logps/rejected": -234.7427215576172, "loss": 0.9934, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.002145239384844899, "rewards/margins": 0.012560705654323101, "rewards/rejected": -0.010415466502308846, "step": 120 }, { "epoch": 0.13, "learning_rate": 2.2336769759450173e-07, "logits/chosen": -1.9726636409759521, "logits/rejected": -1.9809471368789673, "logps/chosen": -283.53985595703125, "logps/rejected": -239.5770721435547, "loss": 0.9926, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.012158048339188099, "rewards/margins": 0.019386615604162216, "rewards/rejected": -0.007228570524603128, "step": 130 }, { "epoch": 0.14, "learning_rate": 2.405498281786942e-07, "logits/chosen": -2.024657726287842, "logits/rejected": -1.880409836769104, "logps/chosen": -269.13629150390625, "logps/rejected": -213.2104949951172, "loss": 0.9939, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": 0.0044730305671691895, "rewards/margins": 0.00196995516307652, "rewards/rejected": 0.0025030761025846004, "step": 140 }, { "epoch": 0.15, "learning_rate": 2.5773195876288655e-07, "logits/chosen": -1.8670217990875244, "logits/rejected": -2.0206334590911865, "logps/chosen": -268.1510925292969, "logps/rejected": -225.56527709960938, "loss": 0.9891, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.011090939864516258, "rewards/margins": 0.014944592490792274, "rewards/rejected": -0.0038536519277840853, "step": 150 }, { "epoch": 0.17, "learning_rate": 2.7491408934707903e-07, "logits/chosen": -2.1929473876953125, "logits/rejected": -2.0768256187438965, "logps/chosen": -311.89215087890625, "logps/rejected": -241.4412078857422, "loss": 0.9796, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.014715956524014473, "rewards/margins": 0.023210834711790085, "rewards/rejected": -0.008494878187775612, "step": 160 }, { "epoch": 0.18, "learning_rate": 2.9209621993127146e-07, "logits/chosen": -2.0199875831604004, "logits/rejected": -2.028744697570801, "logps/chosen": -270.58197021484375, "logps/rejected": -221.40823364257812, "loss": 0.9859, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.008927601389586926, "rewards/margins": 0.004688750021159649, "rewards/rejected": 0.004238851368427277, "step": 170 }, { "epoch": 0.19, "learning_rate": 3.0927835051546394e-07, "logits/chosen": -1.9151681661605835, "logits/rejected": -1.9035532474517822, "logps/chosen": -215.2420196533203, "logps/rejected": -195.22682189941406, "loss": 0.9779, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.010352599434554577, "rewards/margins": 0.02298773266375065, "rewards/rejected": -0.012635131366550922, "step": 180 }, { "epoch": 0.2, "learning_rate": 3.2646048109965636e-07, "logits/chosen": -1.9542953968048096, "logits/rejected": -1.8753105401992798, "logps/chosen": -266.09234619140625, "logps/rejected": -196.09292602539062, "loss": 0.9728, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.01909886673092842, "rewards/margins": 0.028360243886709213, "rewards/rejected": -0.009261379018425941, "step": 190 }, { "epoch": 0.21, "learning_rate": 3.436426116838488e-07, "logits/chosen": -1.8358113765716553, "logits/rejected": -1.9175758361816406, "logps/chosen": -221.7442169189453, "logps/rejected": -154.5623016357422, "loss": 0.9761, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.018382752314209938, "rewards/margins": 0.026139695197343826, "rewards/rejected": -0.007756945677101612, "step": 200 }, { "epoch": 0.22, "learning_rate": 3.608247422680412e-07, "logits/chosen": -1.9191606044769287, "logits/rejected": -1.90776789188385, "logps/chosen": -299.2118225097656, "logps/rejected": -205.9092559814453, "loss": 0.9656, "rewards/accuracies": 0.6875, "rewards/chosen": 0.016829822212457657, "rewards/margins": 0.03627743944525719, "rewards/rejected": -0.01944761723279953, "step": 210 }, { "epoch": 0.23, "learning_rate": 3.7800687285223364e-07, "logits/chosen": -1.827575445175171, "logits/rejected": -1.7433605194091797, "logps/chosen": -220.52767944335938, "logps/rejected": -210.1706085205078, "loss": 0.9578, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.00790109671652317, "rewards/margins": 0.03705426678061485, "rewards/rejected": -0.029153168201446533, "step": 220 }, { "epoch": 0.24, "learning_rate": 3.9518900343642607e-07, "logits/chosen": -1.8479468822479248, "logits/rejected": -1.7774213552474976, "logps/chosen": -235.8931427001953, "logps/rejected": -207.35610961914062, "loss": 0.9595, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01835930533707142, "rewards/margins": 0.04197651147842407, "rewards/rejected": -0.023617204278707504, "step": 230 }, { "epoch": 0.25, "learning_rate": 4.123711340206185e-07, "logits/chosen": -1.9261528253555298, "logits/rejected": -1.9167568683624268, "logps/chosen": -313.67254638671875, "logps/rejected": -203.87185668945312, "loss": 0.9514, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.016246426850557327, "rewards/margins": 0.03881000727415085, "rewards/rejected": -0.022563578560948372, "step": 240 }, { "epoch": 0.26, "learning_rate": 4.2955326460481097e-07, "logits/chosen": -1.9955661296844482, "logits/rejected": -1.906306266784668, "logps/chosen": -255.4471435546875, "logps/rejected": -228.58023071289062, "loss": 0.945, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02427186816930771, "rewards/margins": 0.06289727985858917, "rewards/rejected": -0.03862541541457176, "step": 250 }, { "epoch": 0.27, "learning_rate": 4.4673539518900345e-07, "logits/chosen": -1.961930274963379, "logits/rejected": -1.925752878189087, "logps/chosen": -267.53271484375, "logps/rejected": -198.67776489257812, "loss": 0.935, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.03963503614068031, "rewards/margins": 0.08463665097951889, "rewards/rejected": -0.045001622289419174, "step": 260 }, { "epoch": 0.28, "learning_rate": 4.639175257731959e-07, "logits/chosen": -2.0039916038513184, "logits/rejected": -2.0741240978240967, "logps/chosen": -292.31549072265625, "logps/rejected": -226.5825958251953, "loss": 0.9226, "rewards/accuracies": 0.625, "rewards/chosen": 0.00682613393291831, "rewards/margins": 0.052050817757844925, "rewards/rejected": -0.04522468149662018, "step": 270 }, { "epoch": 0.29, "learning_rate": 4.810996563573884e-07, "logits/chosen": -1.9262123107910156, "logits/rejected": -1.8254365921020508, "logps/chosen": -271.57513427734375, "logps/rejected": -218.11032104492188, "loss": 0.9163, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.05085228011012077, "rewards/margins": 0.0943886935710907, "rewards/rejected": -0.04353641718626022, "step": 280 }, { "epoch": 0.3, "learning_rate": 4.982817869415807e-07, "logits/chosen": -2.0598292350769043, "logits/rejected": -1.8968032598495483, "logps/chosen": -271.613037109375, "logps/rejected": -210.1744384765625, "loss": 0.9097, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.0635281652212143, "rewards/margins": 0.12082493305206299, "rewards/rejected": -0.0572967603802681, "step": 290 }, { "epoch": 0.31, "learning_rate": 4.982798165137615e-07, "logits/chosen": -1.8396161794662476, "logits/rejected": -1.779496431350708, "logps/chosen": -227.2111358642578, "logps/rejected": -224.27841186523438, "loss": 0.8899, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.007174974773079157, "rewards/margins": 0.07386655360460281, "rewards/rejected": -0.06669158488512039, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.963685015290519e-07, "logits/chosen": -1.9974682331085205, "logits/rejected": -2.1091110706329346, "logps/chosen": -316.4252014160156, "logps/rejected": -260.0753173828125, "loss": 0.8829, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.04557584971189499, "rewards/margins": 0.1289386749267578, "rewards/rejected": -0.08336281031370163, "step": 310 }, { "epoch": 0.33, "learning_rate": 4.944571865443424e-07, "logits/chosen": -1.846778154373169, "logits/rejected": -1.7403427362442017, "logps/chosen": -248.9180450439453, "logps/rejected": -179.14321899414062, "loss": 0.8495, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.02153083309531212, "rewards/margins": 0.15574803948402405, "rewards/rejected": -0.13421721756458282, "step": 320 }, { "epoch": 0.34, "learning_rate": 4.92545871559633e-07, "logits/chosen": -1.9045463800430298, "logits/rejected": -1.9483264684677124, "logps/chosen": -325.5777282714844, "logps/rejected": -232.71499633789062, "loss": 0.8641, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.03281577304005623, "rewards/margins": 0.12244097143411636, "rewards/rejected": -0.08962519466876984, "step": 330 }, { "epoch": 0.35, "learning_rate": 4.906345565749235e-07, "logits/chosen": -1.721255898475647, "logits/rejected": -1.6868213415145874, "logps/chosen": -236.89242553710938, "logps/rejected": -216.6375732421875, "loss": 0.8565, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.004082153085619211, "rewards/margins": 0.11858376115560532, "rewards/rejected": -0.11450158059597015, "step": 340 }, { "epoch": 0.36, "learning_rate": 4.88723241590214e-07, "logits/chosen": -1.8765642642974854, "logits/rejected": -1.8209621906280518, "logps/chosen": -297.5986328125, "logps/rejected": -230.46334838867188, "loss": 0.8171, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.040931276977062225, "rewards/margins": 0.19416043162345886, "rewards/rejected": -0.15322914719581604, "step": 350 }, { "epoch": 0.37, "learning_rate": 4.868119266055046e-07, "logits/chosen": -1.9383245706558228, "logits/rejected": -1.9298241138458252, "logps/chosen": -283.68145751953125, "logps/rejected": -255.94949340820312, "loss": 0.7955, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.003488479647785425, "rewards/margins": 0.1996344029903412, "rewards/rejected": -0.20312288403511047, "step": 360 }, { "epoch": 0.38, "learning_rate": 4.849006116207951e-07, "logits/chosen": -2.060605764389038, "logits/rejected": -1.9366719722747803, "logps/chosen": -284.1970520019531, "logps/rejected": -244.7628936767578, "loss": 0.7797, "rewards/accuracies": 0.6875, "rewards/chosen": 0.06834305822849274, "rewards/margins": 0.258323609828949, "rewards/rejected": -0.18998056650161743, "step": 370 }, { "epoch": 0.39, "learning_rate": 4.829892966360856e-07, "logits/chosen": -1.9930108785629272, "logits/rejected": -1.8641077280044556, "logps/chosen": -305.69573974609375, "logps/rejected": -261.52850341796875, "loss": 0.8123, "rewards/accuracies": 0.75, "rewards/chosen": 0.0743982270359993, "rewards/margins": 0.23559841513633728, "rewards/rejected": -0.1612001657485962, "step": 380 }, { "epoch": 0.4, "learning_rate": 4.810779816513762e-07, "logits/chosen": -1.9459863901138306, "logits/rejected": -1.8753139972686768, "logps/chosen": -236.7461700439453, "logps/rejected": -215.2508087158203, "loss": 0.781, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.02323376014828682, "rewards/margins": 0.22391769289970398, "rewards/rejected": -0.2471514195203781, "step": 390 }, { "epoch": 0.41, "learning_rate": 4.791666666666667e-07, "logits/chosen": -1.850494623184204, "logits/rejected": -1.8692734241485596, "logps/chosen": -270.0162658691406, "logps/rejected": -190.91690063476562, "loss": 0.7595, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03287973254919052, "rewards/margins": 0.2878955900669098, "rewards/rejected": -0.2550157904624939, "step": 400 }, { "epoch": 0.42, "learning_rate": 4.772553516819572e-07, "logits/chosen": -1.9037336111068726, "logits/rejected": -1.889154076576233, "logps/chosen": -283.4707336425781, "logps/rejected": -263.4888000488281, "loss": 0.7516, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.023483851924538612, "rewards/margins": 0.3119986057281494, "rewards/rejected": -0.2885147035121918, "step": 410 }, { "epoch": 0.43, "learning_rate": 4.753440366972477e-07, "logits/chosen": -2.035512924194336, "logits/rejected": -1.9027979373931885, "logps/chosen": -223.506103515625, "logps/rejected": -222.5137176513672, "loss": 0.7745, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.08041305840015411, "rewards/margins": 0.13345971703529358, "rewards/rejected": -0.2138727903366089, "step": 420 }, { "epoch": 0.44, "learning_rate": 4.7343272171253825e-07, "logits/chosen": -1.9109729528427124, "logits/rejected": -1.9474281072616577, "logps/chosen": -241.5768280029297, "logps/rejected": -223.0868682861328, "loss": 0.7334, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05294427275657654, "rewards/margins": 0.23036575317382812, "rewards/rejected": -0.28331005573272705, "step": 430 }, { "epoch": 0.45, "learning_rate": 4.715214067278288e-07, "logits/chosen": -1.9270904064178467, "logits/rejected": -1.9717504978179932, "logps/chosen": -278.0482177734375, "logps/rejected": -216.4290008544922, "loss": 0.6853, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.031850665807724, "rewards/margins": 0.4009469151496887, "rewards/rejected": -0.43279749155044556, "step": 440 }, { "epoch": 0.46, "learning_rate": 4.696100917431192e-07, "logits/chosen": -1.8353502750396729, "logits/rejected": -1.9710232019424438, "logps/chosen": -314.343505859375, "logps/rejected": -263.90374755859375, "loss": 0.7716, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0073336451314389706, "rewards/margins": 0.2296525537967682, "rewards/rejected": -0.22231896221637726, "step": 450 }, { "epoch": 0.47, "learning_rate": 4.6769877675840974e-07, "logits/chosen": -1.8592478036880493, "logits/rejected": -1.7888505458831787, "logps/chosen": -253.55599975585938, "logps/rejected": -232.0161895751953, "loss": 0.6498, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07120613008737564, "rewards/margins": 0.2692238688468933, "rewards/rejected": -0.34042999148368835, "step": 460 }, { "epoch": 0.49, "learning_rate": 4.6578746177370027e-07, "logits/chosen": -1.830445647239685, "logits/rejected": -1.8560142517089844, "logps/chosen": -196.4136962890625, "logps/rejected": -182.5649871826172, "loss": 0.6393, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.06338037550449371, "rewards/margins": 0.29189833998680115, "rewards/rejected": -0.35527873039245605, "step": 470 }, { "epoch": 0.5, "learning_rate": 4.638761467889908e-07, "logits/chosen": -1.887717604637146, "logits/rejected": -1.8548129796981812, "logps/chosen": -274.1175537109375, "logps/rejected": -214.11209106445312, "loss": 0.687, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.057955551892519, "rewards/margins": 0.43419212102890015, "rewards/rejected": -0.4921477437019348, "step": 480 }, { "epoch": 0.51, "learning_rate": 4.6196483180428133e-07, "logits/chosen": -1.9349689483642578, "logits/rejected": -1.9116013050079346, "logps/chosen": -269.4372863769531, "logps/rejected": -212.83779907226562, "loss": 0.6166, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.041172344237565994, "rewards/margins": 0.3499607443809509, "rewards/rejected": -0.39113301038742065, "step": 490 }, { "epoch": 0.52, "learning_rate": 4.600535168195718e-07, "logits/chosen": -1.8402849435806274, "logits/rejected": -1.827770471572876, "logps/chosen": -213.6937255859375, "logps/rejected": -208.28866577148438, "loss": 0.6061, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.18305625021457672, "rewards/margins": 0.20005738735198975, "rewards/rejected": -0.3831135928630829, "step": 500 }, { "epoch": 0.53, "learning_rate": 4.5814220183486234e-07, "logits/chosen": -1.8466438055038452, "logits/rejected": -1.8012769222259521, "logps/chosen": -292.2630310058594, "logps/rejected": -263.00091552734375, "loss": 0.5948, "rewards/accuracies": 0.6875, "rewards/chosen": -0.13879308104515076, "rewards/margins": 0.31510522961616516, "rewards/rejected": -0.45389825105667114, "step": 510 }, { "epoch": 0.54, "learning_rate": 4.562308868501529e-07, "logits/chosen": -1.909223198890686, "logits/rejected": -1.7789217233657837, "logps/chosen": -255.5054931640625, "logps/rejected": -254.28439331054688, "loss": 0.609, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.03209429234266281, "rewards/margins": 0.5229288935661316, "rewards/rejected": -0.555023193359375, "step": 520 }, { "epoch": 0.55, "learning_rate": 4.543195718654434e-07, "logits/chosen": -1.7467231750488281, "logits/rejected": -1.8646119832992554, "logps/chosen": -221.7431640625, "logps/rejected": -200.1488494873047, "loss": 0.6115, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2338542938232422, "rewards/margins": 0.2427963763475418, "rewards/rejected": -0.4766507148742676, "step": 530 }, { "epoch": 0.56, "learning_rate": 4.5240825688073394e-07, "logits/chosen": -1.8729002475738525, "logits/rejected": -1.6796636581420898, "logps/chosen": -280.7690734863281, "logps/rejected": -226.85562133789062, "loss": 0.5931, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08351187407970428, "rewards/margins": 0.5537512898445129, "rewards/rejected": -0.6372631788253784, "step": 540 }, { "epoch": 0.57, "learning_rate": 4.504969418960244e-07, "logits/chosen": -2.0048041343688965, "logits/rejected": -1.637963056564331, "logps/chosen": -254.12939453125, "logps/rejected": -259.4322814941406, "loss": 0.5974, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.12823496758937836, "rewards/margins": 0.29864269495010376, "rewards/rejected": -0.4268776476383209, "step": 550 }, { "epoch": 0.58, "learning_rate": 4.4858562691131495e-07, "logits/chosen": -2.012173652648926, "logits/rejected": -1.9192218780517578, "logps/chosen": -291.0408935546875, "logps/rejected": -280.9718933105469, "loss": 0.524, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.1138150691986084, "rewards/margins": 0.5268052816390991, "rewards/rejected": -0.6406203508377075, "step": 560 }, { "epoch": 0.59, "learning_rate": 4.466743119266055e-07, "logits/chosen": -1.9362258911132812, "logits/rejected": -1.841904878616333, "logps/chosen": -275.77545166015625, "logps/rejected": -239.5581817626953, "loss": 0.5702, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2778164744377136, "rewards/margins": 0.3516896367073059, "rewards/rejected": -0.6295061111450195, "step": 570 }, { "epoch": 0.6, "learning_rate": 4.44762996941896e-07, "logits/chosen": -1.8746612071990967, "logits/rejected": -1.979645013809204, "logps/chosen": -292.9163513183594, "logps/rejected": -251.6305694580078, "loss": 0.493, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10857485234737396, "rewards/margins": 0.647341251373291, "rewards/rejected": -0.7559161186218262, "step": 580 }, { "epoch": 0.61, "learning_rate": 4.4285168195718655e-07, "logits/chosen": -1.7814861536026, "logits/rejected": -1.7613455057144165, "logps/chosen": -233.90591430664062, "logps/rejected": -235.93832397460938, "loss": 0.4851, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2671836018562317, "rewards/margins": 0.46907442808151245, "rewards/rejected": -0.7362579107284546, "step": 590 }, { "epoch": 0.62, "learning_rate": 4.40940366972477e-07, "logits/chosen": -1.9857203960418701, "logits/rejected": -1.810185194015503, "logps/chosen": -234.3582763671875, "logps/rejected": -216.35922241210938, "loss": 0.514, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.29425790905952454, "rewards/margins": 0.4220006465911865, "rewards/rejected": -0.7162585258483887, "step": 600 }, { "epoch": 0.63, "learning_rate": 4.3902905198776756e-07, "logits/chosen": -1.8552411794662476, "logits/rejected": -1.839869499206543, "logps/chosen": -275.1326904296875, "logps/rejected": -226.4814910888672, "loss": 0.4751, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.1326485425233841, "rewards/margins": 0.5507789254188538, "rewards/rejected": -0.6834274530410767, "step": 610 }, { "epoch": 0.64, "learning_rate": 4.371177370030581e-07, "logits/chosen": -1.9499883651733398, "logits/rejected": -1.8449184894561768, "logps/chosen": -274.458984375, "logps/rejected": -231.59921264648438, "loss": 0.3819, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2783154845237732, "rewards/margins": 0.6603595614433289, "rewards/rejected": -0.938675045967102, "step": 620 }, { "epoch": 0.65, "learning_rate": 4.352064220183486e-07, "logits/chosen": -1.7157669067382812, "logits/rejected": -1.7650973796844482, "logps/chosen": -215.53311157226562, "logps/rejected": -200.57073974609375, "loss": 0.434, "rewards/accuracies": 0.6875, "rewards/chosen": -0.27303647994995117, "rewards/margins": 0.4611503481864929, "rewards/rejected": -0.7341868281364441, "step": 630 }, { "epoch": 0.66, "learning_rate": 4.3329510703363915e-07, "logits/chosen": -1.8946468830108643, "logits/rejected": -1.8263728618621826, "logps/chosen": -255.1796417236328, "logps/rejected": -215.44821166992188, "loss": 0.3838, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2462444007396698, "rewards/margins": 0.8292306661605835, "rewards/rejected": -1.0754752159118652, "step": 640 }, { "epoch": 0.67, "learning_rate": 4.313837920489297e-07, "logits/chosen": -1.8516194820404053, "logits/rejected": -1.8784716129302979, "logps/chosen": -231.55343627929688, "logps/rejected": -226.981201171875, "loss": 0.3954, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.19073012471199036, "rewards/margins": 0.6038515567779541, "rewards/rejected": -0.7945817112922668, "step": 650 }, { "epoch": 0.68, "learning_rate": 4.2947247706422016e-07, "logits/chosen": -1.8391790390014648, "logits/rejected": -1.9652955532073975, "logps/chosen": -252.66983032226562, "logps/rejected": -235.1903533935547, "loss": 0.3506, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.33843857049942017, "rewards/margins": 0.6845678091049194, "rewards/rejected": -1.0230063199996948, "step": 660 }, { "epoch": 0.69, "learning_rate": 4.275611620795107e-07, "logits/chosen": -1.9520423412322998, "logits/rejected": -1.8214895725250244, "logps/chosen": -308.54522705078125, "logps/rejected": -243.960693359375, "loss": 0.3558, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.36050236225128174, "rewards/margins": 0.6172996759414673, "rewards/rejected": -0.9778021574020386, "step": 670 }, { "epoch": 0.7, "learning_rate": 4.2564984709480123e-07, "logits/chosen": -1.9856923818588257, "logits/rejected": -1.8237674236297607, "logps/chosen": -291.28717041015625, "logps/rejected": -226.1394500732422, "loss": 0.4798, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.4318040907382965, "rewards/margins": 0.5425896644592285, "rewards/rejected": -0.9743936657905579, "step": 680 }, { "epoch": 0.71, "learning_rate": 4.2373853211009176e-07, "logits/chosen": -2.0162062644958496, "logits/rejected": -1.994715690612793, "logps/chosen": -265.1205139160156, "logps/rejected": -243.013916015625, "loss": 0.4168, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4797751307487488, "rewards/margins": 0.790332555770874, "rewards/rejected": -1.2701075077056885, "step": 690 }, { "epoch": 0.72, "learning_rate": 4.2182721712538224e-07, "logits/chosen": -1.9402358531951904, "logits/rejected": -1.7293392419815063, "logps/chosen": -283.6700744628906, "logps/rejected": -226.171142578125, "loss": 0.3693, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.17032106220722198, "rewards/margins": 1.0015548467636108, "rewards/rejected": -1.171876072883606, "step": 700 }, { "epoch": 0.73, "learning_rate": 4.199159021406727e-07, "logits/chosen": -1.9769999980926514, "logits/rejected": -1.6814262866973877, "logps/chosen": -243.1233367919922, "logps/rejected": -221.1171875, "loss": 0.3979, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5523873567581177, "rewards/margins": 0.596055269241333, "rewards/rejected": -1.1484426259994507, "step": 710 }, { "epoch": 0.74, "learning_rate": 4.1800458715596325e-07, "logits/chosen": -1.927851915359497, "logits/rejected": -1.8016504049301147, "logps/chosen": -311.8240966796875, "logps/rejected": -259.55096435546875, "loss": 0.3839, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5330373644828796, "rewards/margins": 0.41053658723831177, "rewards/rejected": -0.9435739517211914, "step": 720 }, { "epoch": 0.75, "learning_rate": 4.160932721712538e-07, "logits/chosen": -1.934565544128418, "logits/rejected": -1.8886038064956665, "logps/chosen": -286.6680908203125, "logps/rejected": -258.78961181640625, "loss": 0.3174, "rewards/accuracies": 0.6875, "rewards/chosen": -0.48792019486427307, "rewards/margins": 0.4587516784667969, "rewards/rejected": -0.9466718435287476, "step": 730 }, { "epoch": 0.76, "learning_rate": 4.141819571865443e-07, "logits/chosen": -1.7957875728607178, "logits/rejected": -1.8168989419937134, "logps/chosen": -255.51919555664062, "logps/rejected": -200.88294982910156, "loss": 0.2208, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5373031497001648, "rewards/margins": 0.7255297899246216, "rewards/rejected": -1.2628331184387207, "step": 740 }, { "epoch": 0.77, "learning_rate": 4.1227064220183485e-07, "logits/chosen": -1.784045934677124, "logits/rejected": -1.66024649143219, "logps/chosen": -261.5906677246094, "logps/rejected": -236.45486450195312, "loss": 0.2914, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6871198415756226, "rewards/margins": 0.8158855438232422, "rewards/rejected": -1.5030055046081543, "step": 750 }, { "epoch": 0.78, "learning_rate": 4.103593272171253e-07, "logits/chosen": -1.8295629024505615, "logits/rejected": -1.8517471551895142, "logps/chosen": -273.9855651855469, "logps/rejected": -268.47784423828125, "loss": 0.2635, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6461771130561829, "rewards/margins": 0.6871333122253418, "rewards/rejected": -1.3333103656768799, "step": 760 }, { "epoch": 0.79, "learning_rate": 4.0844801223241586e-07, "logits/chosen": -1.7927148342132568, "logits/rejected": -1.8215078115463257, "logps/chosen": -280.751220703125, "logps/rejected": -242.97573852539062, "loss": 0.1871, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.48614630103111267, "rewards/margins": 0.6972783207893372, "rewards/rejected": -1.183424472808838, "step": 770 }, { "epoch": 0.8, "learning_rate": 4.065366972477064e-07, "logits/chosen": -1.9858176708221436, "logits/rejected": -1.8561077117919922, "logps/chosen": -287.98516845703125, "logps/rejected": -252.1136932373047, "loss": 0.2167, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.689589262008667, "rewards/margins": 0.799708366394043, "rewards/rejected": -1.4892975091934204, "step": 780 }, { "epoch": 0.82, "learning_rate": 4.046253822629969e-07, "logits/chosen": -1.8583405017852783, "logits/rejected": -1.8265085220336914, "logps/chosen": -265.4439697265625, "logps/rejected": -234.42135620117188, "loss": 0.0371, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.582925021648407, "rewards/margins": 0.8660266995429993, "rewards/rejected": -1.4489517211914062, "step": 790 }, { "epoch": 0.83, "learning_rate": 4.0271406727828745e-07, "logits/chosen": -1.8910176753997803, "logits/rejected": -1.8912360668182373, "logps/chosen": -259.268310546875, "logps/rejected": -229.13623046875, "loss": 0.2407, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.8472847938537598, "rewards/margins": 0.5785714387893677, "rewards/rejected": -1.425856351852417, "step": 800 }, { "epoch": 0.84, "learning_rate": 4.00802752293578e-07, "logits/chosen": -1.8274396657943726, "logits/rejected": -1.7036545276641846, "logps/chosen": -273.0886535644531, "logps/rejected": -241.332763671875, "loss": 0.0946, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7010241746902466, "rewards/margins": 0.7486527562141418, "rewards/rejected": -1.449676752090454, "step": 810 }, { "epoch": 0.85, "learning_rate": 3.9889143730886847e-07, "logits/chosen": -2.0030179023742676, "logits/rejected": -1.938431978225708, "logps/chosen": -308.9624328613281, "logps/rejected": -252.2946319580078, "loss": 0.1224, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5951758623123169, "rewards/margins": 1.0006935596466064, "rewards/rejected": -1.5958693027496338, "step": 820 }, { "epoch": 0.86, "learning_rate": 3.96980122324159e-07, "logits/chosen": -1.889276146888733, "logits/rejected": -1.7101726531982422, "logps/chosen": -264.00048828125, "logps/rejected": -214.24649047851562, "loss": -0.045, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6674525737762451, "rewards/margins": 1.1390091180801392, "rewards/rejected": -1.8064616918563843, "step": 830 }, { "epoch": 0.87, "learning_rate": 3.9506880733944953e-07, "logits/chosen": -1.877962350845337, "logits/rejected": -1.7040239572525024, "logps/chosen": -257.03582763671875, "logps/rejected": -227.9223175048828, "loss": 0.3336, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.072399377822876, "rewards/margins": 0.5846762657165527, "rewards/rejected": -1.6570755243301392, "step": 840 }, { "epoch": 0.88, "learning_rate": 3.9315749235474006e-07, "logits/chosen": -1.840431809425354, "logits/rejected": -1.892249345779419, "logps/chosen": -278.1147766113281, "logps/rejected": -272.54327392578125, "loss": 0.1259, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8901047706604004, "rewards/margins": 0.7304579019546509, "rewards/rejected": -1.6205627918243408, "step": 850 }, { "epoch": 0.89, "learning_rate": 3.912461773700306e-07, "logits/chosen": -2.031358003616333, "logits/rejected": -1.8924942016601562, "logps/chosen": -300.32110595703125, "logps/rejected": -295.2864074707031, "loss": 0.0646, "rewards/accuracies": 0.75, "rewards/chosen": -0.8579466938972473, "rewards/margins": 1.062766432762146, "rewards/rejected": -1.920713186264038, "step": 860 }, { "epoch": 0.9, "learning_rate": 3.8933486238532107e-07, "logits/chosen": -1.6753685474395752, "logits/rejected": -1.6504751443862915, "logps/chosen": -313.80853271484375, "logps/rejected": -248.9676513671875, "loss": 0.0988, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7647205591201782, "rewards/margins": 1.3286702632904053, "rewards/rejected": -2.093390941619873, "step": 870 }, { "epoch": 0.91, "learning_rate": 3.874235474006116e-07, "logits/chosen": -1.8231757879257202, "logits/rejected": -1.7661590576171875, "logps/chosen": -287.50677490234375, "logps/rejected": -241.2566375732422, "loss": -0.0319, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6531956195831299, "rewards/margins": 1.3242131471633911, "rewards/rejected": -1.9774086475372314, "step": 880 }, { "epoch": 0.92, "learning_rate": 3.8551223241590214e-07, "logits/chosen": -1.8550631999969482, "logits/rejected": -1.7476036548614502, "logps/chosen": -261.09033203125, "logps/rejected": -238.5321502685547, "loss": 0.1266, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9579564332962036, "rewards/margins": 1.210017442703247, "rewards/rejected": -2.1679739952087402, "step": 890 }, { "epoch": 0.93, "learning_rate": 3.8360091743119267e-07, "logits/chosen": -1.9090309143066406, "logits/rejected": -1.6929905414581299, "logps/chosen": -267.00323486328125, "logps/rejected": -239.107177734375, "loss": -0.0438, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9265840649604797, "rewards/margins": 1.2149267196655273, "rewards/rejected": -2.1415107250213623, "step": 900 }, { "epoch": 0.94, "learning_rate": 3.816896024464832e-07, "logits/chosen": -1.7493388652801514, "logits/rejected": -1.9231748580932617, "logps/chosen": -240.63134765625, "logps/rejected": -220.7272186279297, "loss": -0.0152, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0793166160583496, "rewards/margins": 0.8653289675712585, "rewards/rejected": -1.9446455240249634, "step": 910 }, { "epoch": 0.95, "learning_rate": 3.797782874617737e-07, "logits/chosen": -1.9112157821655273, "logits/rejected": -2.013073682785034, "logps/chosen": -284.13824462890625, "logps/rejected": -243.91806030273438, "loss": -0.0423, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1301993131637573, "rewards/margins": 0.8508475422859192, "rewards/rejected": -1.9810469150543213, "step": 920 }, { "epoch": 0.96, "learning_rate": 3.778669724770642e-07, "logits/chosen": -1.779415488243103, "logits/rejected": -1.688969612121582, "logps/chosen": -264.59710693359375, "logps/rejected": -212.7026824951172, "loss": -0.0335, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0757131576538086, "rewards/margins": 1.205359697341919, "rewards/rejected": -2.2810730934143066, "step": 930 }, { "epoch": 0.97, "learning_rate": 3.7595565749235474e-07, "logits/chosen": -1.9989725351333618, "logits/rejected": -1.9091949462890625, "logps/chosen": -287.1456604003906, "logps/rejected": -226.24771118164062, "loss": 0.0097, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.8323071599006653, "rewards/margins": 1.2861956357955933, "rewards/rejected": -2.1185028553009033, "step": 940 }, { "epoch": 0.98, "learning_rate": 3.740443425076452e-07, "logits/chosen": -1.9547460079193115, "logits/rejected": -1.8703094720840454, "logps/chosen": -287.35198974609375, "logps/rejected": -261.2727966308594, "loss": -0.0247, "rewards/accuracies": 0.625, "rewards/chosen": -1.3492201566696167, "rewards/margins": 0.8363786935806274, "rewards/rejected": -2.185598850250244, "step": 950 }, { "epoch": 0.99, "learning_rate": 3.7213302752293575e-07, "logits/chosen": -1.86129891872406, "logits/rejected": -1.7522859573364258, "logps/chosen": -273.3508605957031, "logps/rejected": -244.61569213867188, "loss": -0.2007, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0919667482376099, "rewards/margins": 0.9706255793571472, "rewards/rejected": -2.0625922679901123, "step": 960 }, { "epoch": 1.0, "eval_logits/chosen": -2.097571611404419, "eval_logits/rejected": -1.9875798225402832, "eval_logps/chosen": -301.2187805175781, "eval_logps/rejected": -259.9261779785156, "eval_loss": -0.09876806288957596, "eval_rewards/accuracies": 0.6746031641960144, "eval_rewards/chosen": -1.1416146755218506, "eval_rewards/margins": 1.3577306270599365, "eval_rewards/rejected": -2.499345302581787, "eval_runtime": 238.6969, "eval_samples_per_second": 8.379, "eval_steps_per_second": 0.264, "step": 969 }, { "epoch": 1.0, "learning_rate": 3.702217125382263e-07, "logits/chosen": -1.9615962505340576, "logits/rejected": -1.7775417566299438, "logps/chosen": -293.53924560546875, "logps/rejected": -264.0589599609375, "loss": -0.1153, "rewards/accuracies": 0.6875, "rewards/chosen": -1.265012264251709, "rewards/margins": 1.2176616191864014, "rewards/rejected": -2.4826738834381104, "step": 970 }, { "epoch": 1.01, "learning_rate": 3.6831039755351677e-07, "logits/chosen": -1.7482898235321045, "logits/rejected": -1.872532844543457, "logps/chosen": -272.051025390625, "logps/rejected": -255.67703247070312, "loss": -0.1987, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1273905038833618, "rewards/margins": 1.4816131591796875, "rewards/rejected": -2.609003782272339, "step": 980 }, { "epoch": 1.02, "learning_rate": 3.663990825688073e-07, "logits/chosen": -1.8804614543914795, "logits/rejected": -1.6789354085922241, "logps/chosen": -262.64892578125, "logps/rejected": -241.96353149414062, "loss": 0.1311, "rewards/accuracies": 0.6875, "rewards/chosen": -1.6520445346832275, "rewards/margins": 0.846520721912384, "rewards/rejected": -2.4985649585723877, "step": 990 }, { "epoch": 1.03, "learning_rate": 3.6448776758409783e-07, "logits/chosen": -1.989675760269165, "logits/rejected": -1.7391383647918701, "logps/chosen": -288.917724609375, "logps/rejected": -266.159912109375, "loss": -0.0103, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2550522089004517, "rewards/margins": 1.0477917194366455, "rewards/rejected": -2.3028438091278076, "step": 1000 }, { "epoch": 1.04, "learning_rate": 3.6257645259938836e-07, "logits/chosen": -1.848597526550293, "logits/rejected": -1.7234575748443604, "logps/chosen": -254.86672973632812, "logps/rejected": -258.8224792480469, "loss": -0.1996, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1828529834747314, "rewards/margins": 1.4569838047027588, "rewards/rejected": -2.6398367881774902, "step": 1010 }, { "epoch": 1.05, "learning_rate": 3.606651376146789e-07, "logits/chosen": -1.6995182037353516, "logits/rejected": -1.8116445541381836, "logps/chosen": -258.5903015136719, "logps/rejected": -209.5008544921875, "loss": -0.3461, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5025261640548706, "rewards/margins": 1.1272718906402588, "rewards/rejected": -2.629798412322998, "step": 1020 }, { "epoch": 1.06, "learning_rate": 3.5875382262996937e-07, "logits/chosen": -1.7140220403671265, "logits/rejected": -1.7449703216552734, "logps/chosen": -283.09796142578125, "logps/rejected": -295.8421325683594, "loss": -0.232, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.183707594871521, "rewards/margins": 1.0434316396713257, "rewards/rejected": -2.2271392345428467, "step": 1030 }, { "epoch": 1.07, "learning_rate": 3.568425076452599e-07, "logits/chosen": -1.8618385791778564, "logits/rejected": -1.7049167156219482, "logps/chosen": -295.1578063964844, "logps/rejected": -260.01678466796875, "loss": -0.2474, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6388660669326782, "rewards/margins": 0.968669056892395, "rewards/rejected": -2.6075356006622314, "step": 1040 }, { "epoch": 1.08, "learning_rate": 3.5493119266055044e-07, "logits/chosen": -1.7115428447723389, "logits/rejected": -1.6858937740325928, "logps/chosen": -273.44049072265625, "logps/rejected": -214.3227996826172, "loss": -0.3821, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.6126127243041992, "rewards/margins": 1.3140199184417725, "rewards/rejected": -2.9266326427459717, "step": 1050 }, { "epoch": 1.09, "learning_rate": 3.5301987767584097e-07, "logits/chosen": -1.7064205408096313, "logits/rejected": -1.6017926931381226, "logps/chosen": -268.70416259765625, "logps/rejected": -272.87066650390625, "loss": -0.3771, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.3269054889678955, "rewards/margins": 1.9102023839950562, "rewards/rejected": -3.237107515335083, "step": 1060 }, { "epoch": 1.1, "learning_rate": 3.511085626911315e-07, "logits/chosen": -1.8583990335464478, "logits/rejected": -1.5322113037109375, "logps/chosen": -333.03924560546875, "logps/rejected": -262.2927551269531, "loss": -0.4133, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.4196674823760986, "rewards/margins": 1.6081445217132568, "rewards/rejected": -3.0278122425079346, "step": 1070 }, { "epoch": 1.11, "learning_rate": 3.49197247706422e-07, "logits/chosen": -1.6976341009140015, "logits/rejected": -1.6782453060150146, "logps/chosen": -233.9385223388672, "logps/rejected": -220.5770263671875, "loss": -0.4013, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9680936336517334, "rewards/margins": 0.8472940325737, "rewards/rejected": -2.8153879642486572, "step": 1080 }, { "epoch": 1.12, "learning_rate": 3.472859327217125e-07, "logits/chosen": -1.7105554342269897, "logits/rejected": -1.4681594371795654, "logps/chosen": -300.76226806640625, "logps/rejected": -273.32696533203125, "loss": -0.3314, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4938586950302124, "rewards/margins": 1.572718858718872, "rewards/rejected": -3.066577196121216, "step": 1090 }, { "epoch": 1.14, "learning_rate": 3.4537461773700304e-07, "logits/chosen": -1.5856693983078003, "logits/rejected": -1.7974258661270142, "logps/chosen": -225.534423828125, "logps/rejected": -204.51913452148438, "loss": -0.311, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.020268201828003, "rewards/margins": 1.0005836486816406, "rewards/rejected": -3.0208516120910645, "step": 1100 }, { "epoch": 1.15, "learning_rate": 3.434633027522936e-07, "logits/chosen": -1.7942464351654053, "logits/rejected": -1.6902000904083252, "logps/chosen": -293.67706298828125, "logps/rejected": -255.2344512939453, "loss": -0.3927, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.663823127746582, "rewards/margins": 1.3553807735443115, "rewards/rejected": -3.0192039012908936, "step": 1110 }, { "epoch": 1.16, "learning_rate": 3.415519877675841e-07, "logits/chosen": -1.7715240716934204, "logits/rejected": -1.643781065940857, "logps/chosen": -252.25729370117188, "logps/rejected": -269.1502685546875, "loss": -0.4037, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5638118982315063, "rewards/margins": 1.3495436906814575, "rewards/rejected": -2.9133553504943848, "step": 1120 }, { "epoch": 1.17, "learning_rate": 3.3964067278287464e-07, "logits/chosen": -1.7634483575820923, "logits/rejected": -1.7055590152740479, "logps/chosen": -297.56658935546875, "logps/rejected": -240.73849487304688, "loss": -0.4873, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.6788049936294556, "rewards/margins": 1.7267656326293945, "rewards/rejected": -3.4055705070495605, "step": 1130 }, { "epoch": 1.18, "learning_rate": 3.377293577981651e-07, "logits/chosen": -1.6601498126983643, "logits/rejected": -1.5715376138687134, "logps/chosen": -248.473388671875, "logps/rejected": -251.2434844970703, "loss": -0.6724, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1505351066589355, "rewards/margins": 1.4957424402236938, "rewards/rejected": -3.646277666091919, "step": 1140 }, { "epoch": 1.19, "learning_rate": 3.3581804281345565e-07, "logits/chosen": -1.7773525714874268, "logits/rejected": -1.5578742027282715, "logps/chosen": -305.3437805175781, "logps/rejected": -243.5063018798828, "loss": -0.2898, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0654540061950684, "rewards/margins": 1.2946364879608154, "rewards/rejected": -3.3600902557373047, "step": 1150 }, { "epoch": 1.2, "learning_rate": 3.339067278287462e-07, "logits/chosen": -1.5956556797027588, "logits/rejected": -1.5084911584854126, "logps/chosen": -269.35736083984375, "logps/rejected": -251.64675903320312, "loss": -0.5003, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1167237758636475, "rewards/margins": 1.4275890588760376, "rewards/rejected": -3.5443129539489746, "step": 1160 }, { "epoch": 1.21, "learning_rate": 3.319954128440367e-07, "logits/chosen": -1.8844757080078125, "logits/rejected": -1.7271381616592407, "logps/chosen": -299.93609619140625, "logps/rejected": -243.735595703125, "loss": -0.5037, "rewards/accuracies": 0.6875, "rewards/chosen": -2.080448627471924, "rewards/margins": 1.4392142295837402, "rewards/rejected": -3.519662857055664, "step": 1170 }, { "epoch": 1.22, "learning_rate": 3.3008409785932725e-07, "logits/chosen": -1.6695266962051392, "logits/rejected": -1.7183201313018799, "logps/chosen": -266.7185974121094, "logps/rejected": -248.3719482421875, "loss": -0.5103, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.2367124557495117, "rewards/margins": 1.7176882028579712, "rewards/rejected": -3.9544003009796143, "step": 1180 }, { "epoch": 1.23, "learning_rate": 3.2817278287461773e-07, "logits/chosen": -1.7001529932022095, "logits/rejected": -1.6251550912857056, "logps/chosen": -256.841064453125, "logps/rejected": -273.620849609375, "loss": -0.6167, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.5783565044403076, "rewards/margins": 1.5206773281097412, "rewards/rejected": -4.099034309387207, "step": 1190 }, { "epoch": 1.24, "learning_rate": 3.262614678899082e-07, "logits/chosen": -1.7264814376831055, "logits/rejected": -1.5964632034301758, "logps/chosen": -270.0255126953125, "logps/rejected": -285.0582580566406, "loss": -0.8394, "rewards/accuracies": 0.6875, "rewards/chosen": -2.6388707160949707, "rewards/margins": 1.2730156183242798, "rewards/rejected": -3.911886692047119, "step": 1200 }, { "epoch": 1.25, "learning_rate": 3.2435015290519874e-07, "logits/chosen": -1.704097032546997, "logits/rejected": -1.581463098526001, "logps/chosen": -246.20022583007812, "logps/rejected": -232.1162872314453, "loss": -0.4922, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0876688957214355, "rewards/margins": 1.698892593383789, "rewards/rejected": -3.7865612506866455, "step": 1210 }, { "epoch": 1.26, "learning_rate": 3.2243883792048927e-07, "logits/chosen": -1.7485994100570679, "logits/rejected": -1.7491531372070312, "logps/chosen": -307.69842529296875, "logps/rejected": -313.2864685058594, "loss": -0.4846, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.471188545227051, "rewards/margins": 1.2810360193252563, "rewards/rejected": -3.7522246837615967, "step": 1220 }, { "epoch": 1.27, "learning_rate": 3.205275229357798e-07, "logits/chosen": -1.7512283325195312, "logits/rejected": -1.559390902519226, "logps/chosen": -316.75592041015625, "logps/rejected": -306.21160888671875, "loss": -0.5032, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.4955337047576904, "rewards/margins": 1.243558645248413, "rewards/rejected": -3.7390923500061035, "step": 1230 }, { "epoch": 1.28, "learning_rate": 3.186162079510703e-07, "logits/chosen": -1.6653553247451782, "logits/rejected": -1.6318597793579102, "logps/chosen": -300.54473876953125, "logps/rejected": -305.09423828125, "loss": -0.7512, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3271665573120117, "rewards/margins": 1.914544701576233, "rewards/rejected": -4.241711616516113, "step": 1240 }, { "epoch": 1.29, "learning_rate": 3.167048929663608e-07, "logits/chosen": -1.6567821502685547, "logits/rejected": -1.5714284181594849, "logps/chosen": -250.9105682373047, "logps/rejected": -234.5415802001953, "loss": -0.7222, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.356020927429199, "rewards/margins": 1.9912364482879639, "rewards/rejected": -4.347257614135742, "step": 1250 }, { "epoch": 1.3, "learning_rate": 3.1479357798165134e-07, "logits/chosen": -1.7020305395126343, "logits/rejected": -1.78921377658844, "logps/chosen": -308.86163330078125, "logps/rejected": -269.3481140136719, "loss": -0.6017, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.404141902923584, "rewards/margins": 1.7603607177734375, "rewards/rejected": -4.1645026206970215, "step": 1260 }, { "epoch": 1.31, "learning_rate": 3.128822629969419e-07, "logits/chosen": -1.7761846780776978, "logits/rejected": -1.8085511922836304, "logps/chosen": -312.18280029296875, "logps/rejected": -287.8406677246094, "loss": -0.6923, "rewards/accuracies": 0.75, "rewards/chosen": -2.880483388900757, "rewards/margins": 2.123534917831421, "rewards/rejected": -5.004018306732178, "step": 1270 }, { "epoch": 1.32, "learning_rate": 3.109709480122324e-07, "logits/chosen": -1.5170055627822876, "logits/rejected": -1.7044398784637451, "logps/chosen": -265.9775085449219, "logps/rejected": -256.2364501953125, "loss": -0.5623, "rewards/accuracies": 0.6875, "rewards/chosen": -2.711970806121826, "rewards/margins": 1.4848562479019165, "rewards/rejected": -4.196827411651611, "step": 1280 }, { "epoch": 1.33, "learning_rate": 3.0905963302752294e-07, "logits/chosen": -1.6849790811538696, "logits/rejected": -1.4959250688552856, "logps/chosen": -266.03338623046875, "logps/rejected": -265.19293212890625, "loss": -0.5952, "rewards/accuracies": 0.6875, "rewards/chosen": -2.8066792488098145, "rewards/margins": 1.4092586040496826, "rewards/rejected": -4.215937614440918, "step": 1290 }, { "epoch": 1.34, "learning_rate": 3.071483180428134e-07, "logits/chosen": -1.6794811487197876, "logits/rejected": -1.7896066904067993, "logps/chosen": -349.8259582519531, "logps/rejected": -259.8904113769531, "loss": -0.8813, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.7685770988464355, "rewards/margins": 2.0453274250030518, "rewards/rejected": -4.813904285430908, "step": 1300 }, { "epoch": 1.35, "learning_rate": 3.0523700305810395e-07, "logits/chosen": -1.826123833656311, "logits/rejected": -1.6994121074676514, "logps/chosen": -292.24188232421875, "logps/rejected": -294.88665771484375, "loss": -0.82, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.468705892562866, "rewards/margins": 2.3952927589416504, "rewards/rejected": -4.863998889923096, "step": 1310 }, { "epoch": 1.36, "learning_rate": 3.033256880733945e-07, "logits/chosen": -1.6138427257537842, "logits/rejected": -1.5653212070465088, "logps/chosen": -272.35723876953125, "logps/rejected": -244.7309112548828, "loss": -1.0125, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -3.5213189125061035, "rewards/margins": 1.1809017658233643, "rewards/rejected": -4.702220439910889, "step": 1320 }, { "epoch": 1.37, "learning_rate": 3.01414373088685e-07, "logits/chosen": -1.678091287612915, "logits/rejected": -1.6383358240127563, "logps/chosen": -305.22039794921875, "logps/rejected": -270.16644287109375, "loss": -0.7008, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.750168800354004, "rewards/margins": 1.9968726634979248, "rewards/rejected": -4.747041702270508, "step": 1330 }, { "epoch": 1.38, "learning_rate": 2.9950305810397555e-07, "logits/chosen": -1.8723211288452148, "logits/rejected": -1.6131559610366821, "logps/chosen": -302.5225524902344, "logps/rejected": -265.1141662597656, "loss": -0.8252, "rewards/accuracies": 0.625, "rewards/chosen": -3.2168407440185547, "rewards/margins": 1.872542381286621, "rewards/rejected": -5.089383125305176, "step": 1340 }, { "epoch": 1.39, "learning_rate": 2.9759174311926603e-07, "logits/chosen": -1.6552165746688843, "logits/rejected": -1.5474542379379272, "logps/chosen": -300.1455383300781, "logps/rejected": -269.47235107421875, "loss": -0.8361, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -3.7245125770568848, "rewards/margins": 1.281724214553833, "rewards/rejected": -5.006236553192139, "step": 1350 }, { "epoch": 1.4, "learning_rate": 2.9568042813455656e-07, "logits/chosen": -1.6618553400039673, "logits/rejected": -1.667223334312439, "logps/chosen": -310.4003601074219, "logps/rejected": -243.12240600585938, "loss": -0.731, "rewards/accuracies": 0.6875, "rewards/chosen": -3.615830659866333, "rewards/margins": 1.214110016822815, "rewards/rejected": -4.829940319061279, "step": 1360 }, { "epoch": 1.41, "learning_rate": 2.937691131498471e-07, "logits/chosen": -1.7880207300186157, "logits/rejected": -1.6480613946914673, "logps/chosen": -316.4659729003906, "logps/rejected": -285.7911682128906, "loss": -0.7485, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -3.4602973461151123, "rewards/margins": 1.8094520568847656, "rewards/rejected": -5.269749641418457, "step": 1370 }, { "epoch": 1.42, "learning_rate": 2.918577981651376e-07, "logits/chosen": -1.6994645595550537, "logits/rejected": -1.5988848209381104, "logps/chosen": -261.1907958984375, "logps/rejected": -270.0497741699219, "loss": -0.6992, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.9491472244262695, "rewards/margins": 1.7523548603057861, "rewards/rejected": -4.701501846313477, "step": 1380 }, { "epoch": 1.43, "learning_rate": 2.8994648318042816e-07, "logits/chosen": -1.7593275308609009, "logits/rejected": -1.6046631336212158, "logps/chosen": -299.09637451171875, "logps/rejected": -268.380859375, "loss": -0.9355, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.154327630996704, "rewards/margins": 2.1164073944091797, "rewards/rejected": -5.2707343101501465, "step": 1390 }, { "epoch": 1.44, "learning_rate": 2.8803516819571863e-07, "logits/chosen": -1.7734966278076172, "logits/rejected": -1.5951334238052368, "logps/chosen": -347.52239990234375, "logps/rejected": -293.4967346191406, "loss": -0.7492, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.558985948562622, "rewards/margins": 1.6943881511688232, "rewards/rejected": -5.253373622894287, "step": 1400 }, { "epoch": 1.46, "learning_rate": 2.8612385321100917e-07, "logits/chosen": -1.573909044265747, "logits/rejected": -1.3949878215789795, "logps/chosen": -270.31292724609375, "logps/rejected": -276.7618408203125, "loss": -1.0184, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.927518129348755, "rewards/margins": 1.0909746885299683, "rewards/rejected": -5.018492221832275, "step": 1410 }, { "epoch": 1.47, "learning_rate": 2.842125382262997e-07, "logits/chosen": -1.5700122117996216, "logits/rejected": -1.4677519798278809, "logps/chosen": -285.81103515625, "logps/rejected": -269.4806823730469, "loss": -1.0111, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.3703293800354004, "rewards/margins": 1.9672693014144897, "rewards/rejected": -5.3375983238220215, "step": 1420 }, { "epoch": 1.48, "learning_rate": 2.8230122324159023e-07, "logits/chosen": -1.5128788948059082, "logits/rejected": -1.475921869277954, "logps/chosen": -321.65020751953125, "logps/rejected": -334.950439453125, "loss": -1.0548, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -3.5967164039611816, "rewards/margins": 3.013796806335449, "rewards/rejected": -6.610513210296631, "step": 1430 }, { "epoch": 1.49, "learning_rate": 2.8038990825688076e-07, "logits/chosen": -1.5578891038894653, "logits/rejected": -1.5609424114227295, "logps/chosen": -261.49920654296875, "logps/rejected": -235.4161376953125, "loss": -0.9126, "rewards/accuracies": 0.5625, "rewards/chosen": -4.372951984405518, "rewards/margins": 0.7127580642700195, "rewards/rejected": -5.085709571838379, "step": 1440 }, { "epoch": 1.5, "learning_rate": 2.784785932721712e-07, "logits/chosen": -1.636566400527954, "logits/rejected": -1.4580261707305908, "logps/chosen": -304.53240966796875, "logps/rejected": -282.1325378417969, "loss": -0.9912, "rewards/accuracies": 0.6875, "rewards/chosen": -3.88989520072937, "rewards/margins": 2.1614978313446045, "rewards/rejected": -6.051393032073975, "step": 1450 }, { "epoch": 1.51, "learning_rate": 2.765672782874617e-07, "logits/chosen": -1.5655293464660645, "logits/rejected": -1.485058069229126, "logps/chosen": -318.0874938964844, "logps/rejected": -259.27130126953125, "loss": -0.908, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -4.269219398498535, "rewards/margins": 1.588846206665039, "rewards/rejected": -5.858065605163574, "step": 1460 }, { "epoch": 1.52, "learning_rate": 2.7465596330275225e-07, "logits/chosen": -1.5867637395858765, "logits/rejected": -1.6024789810180664, "logps/chosen": -312.49591064453125, "logps/rejected": -236.5067596435547, "loss": -0.9474, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.954392910003662, "rewards/margins": 2.293760061264038, "rewards/rejected": -6.248152732849121, "step": 1470 }, { "epoch": 1.53, "learning_rate": 2.727446483180428e-07, "logits/chosen": -1.7006601095199585, "logits/rejected": -1.5267009735107422, "logps/chosen": -298.12469482421875, "logps/rejected": -292.24395751953125, "loss": -1.2885, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -4.79081392288208, "rewards/margins": 1.9046157598495483, "rewards/rejected": -6.695429801940918, "step": 1480 }, { "epoch": 1.54, "learning_rate": 2.708333333333333e-07, "logits/chosen": -1.4423596858978271, "logits/rejected": -1.6754871606826782, "logps/chosen": -288.9786376953125, "logps/rejected": -286.6379699707031, "loss": -1.1448, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -4.346983909606934, "rewards/margins": 1.7372324466705322, "rewards/rejected": -6.084217071533203, "step": 1490 }, { "epoch": 1.55, "learning_rate": 2.6892201834862385e-07, "logits/chosen": -1.5492124557495117, "logits/rejected": -1.4746440649032593, "logps/chosen": -301.6089782714844, "logps/rejected": -268.57745361328125, "loss": -1.1715, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -4.268374443054199, "rewards/margins": 2.3135557174682617, "rewards/rejected": -6.581930637359619, "step": 1500 }, { "epoch": 1.56, "learning_rate": 2.6701070336391433e-07, "logits/chosen": -1.6524032354354858, "logits/rejected": -1.7148220539093018, "logps/chosen": -358.600830078125, "logps/rejected": -300.6563720703125, "loss": -1.2609, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -4.343426704406738, "rewards/margins": 2.409090280532837, "rewards/rejected": -6.752516746520996, "step": 1510 }, { "epoch": 1.57, "learning_rate": 2.6509938837920486e-07, "logits/chosen": -1.6305720806121826, "logits/rejected": -1.4456074237823486, "logps/chosen": -276.3116455078125, "logps/rejected": -281.0287780761719, "loss": -1.2401, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -4.514157295227051, "rewards/margins": 2.1482510566711426, "rewards/rejected": -6.662408351898193, "step": 1520 }, { "epoch": 1.58, "learning_rate": 2.631880733944954e-07, "logits/chosen": -1.529931664466858, "logits/rejected": -1.4439135789871216, "logps/chosen": -299.05157470703125, "logps/rejected": -275.3011474609375, "loss": -1.5077, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -4.548555850982666, "rewards/margins": 1.7041714191436768, "rewards/rejected": -6.252727508544922, "step": 1530 }, { "epoch": 1.59, "learning_rate": 2.612767584097859e-07, "logits/chosen": -1.629601240158081, "logits/rejected": -1.5165350437164307, "logps/chosen": -306.07647705078125, "logps/rejected": -262.46044921875, "loss": -1.1474, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -4.669868469238281, "rewards/margins": 1.6571537256240845, "rewards/rejected": -6.327021598815918, "step": 1540 }, { "epoch": 1.6, "learning_rate": 2.5936544342507646e-07, "logits/chosen": -1.682499647140503, "logits/rejected": -1.3376775979995728, "logps/chosen": -307.9109802246094, "logps/rejected": -296.7003479003906, "loss": -1.174, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -4.40157413482666, "rewards/margins": 2.6601130962371826, "rewards/rejected": -7.061687469482422, "step": 1550 }, { "epoch": 1.61, "learning_rate": 2.5745412844036693e-07, "logits/chosen": -1.6591355800628662, "logits/rejected": -1.5533663034439087, "logps/chosen": -335.03057861328125, "logps/rejected": -287.7520751953125, "loss": -1.3877, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -5.175102710723877, "rewards/margins": 2.255643844604492, "rewards/rejected": -7.430747032165527, "step": 1560 }, { "epoch": 1.62, "learning_rate": 2.5554281345565747e-07, "logits/chosen": -1.6894855499267578, "logits/rejected": -1.586287260055542, "logps/chosen": -304.94989013671875, "logps/rejected": -305.7507019042969, "loss": -1.3946, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -4.844755172729492, "rewards/margins": 2.5443403720855713, "rewards/rejected": -7.389095306396484, "step": 1570 }, { "epoch": 1.63, "learning_rate": 2.53631498470948e-07, "logits/chosen": -1.593643069267273, "logits/rejected": -1.5452083349227905, "logps/chosen": -339.3976135253906, "logps/rejected": -294.7111511230469, "loss": -1.556, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -5.678981781005859, "rewards/margins": 1.9970118999481201, "rewards/rejected": -7.6759934425354, "step": 1580 }, { "epoch": 1.64, "learning_rate": 2.5172018348623853e-07, "logits/chosen": -1.5461888313293457, "logits/rejected": -1.408332109451294, "logps/chosen": -336.61553955078125, "logps/rejected": -301.59759521484375, "loss": -1.0556, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -5.15664005279541, "rewards/margins": 2.157421827316284, "rewards/rejected": -7.314061164855957, "step": 1590 }, { "epoch": 1.65, "learning_rate": 2.4980886850152906e-07, "logits/chosen": -1.5243330001831055, "logits/rejected": -1.5529712438583374, "logps/chosen": -333.6750793457031, "logps/rejected": -288.2501525878906, "loss": -1.1851, "rewards/accuracies": 0.625, "rewards/chosen": -5.502475738525391, "rewards/margins": 1.9487812519073486, "rewards/rejected": -7.451257228851318, "step": 1600 }, { "epoch": 1.66, "learning_rate": 2.478975535168196e-07, "logits/chosen": -1.5254369974136353, "logits/rejected": -1.4977315664291382, "logps/chosen": -316.21868896484375, "logps/rejected": -277.3542175292969, "loss": -1.6845, "rewards/accuracies": 0.625, "rewards/chosen": -5.898019790649414, "rewards/margins": 1.867455244064331, "rewards/rejected": -7.765474796295166, "step": 1610 }, { "epoch": 1.67, "learning_rate": 2.459862385321101e-07, "logits/chosen": -1.747807502746582, "logits/rejected": -1.5742764472961426, "logps/chosen": -369.40728759765625, "logps/rejected": -318.35821533203125, "loss": -1.0463, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -6.141237735748291, "rewards/margins": 1.5390453338623047, "rewards/rejected": -7.680283546447754, "step": 1620 }, { "epoch": 1.68, "learning_rate": 2.440749235474006e-07, "logits/chosen": -1.5898773670196533, "logits/rejected": -1.5242546796798706, "logps/chosen": -336.76824951171875, "logps/rejected": -315.19384765625, "loss": -1.7444, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -4.8129730224609375, "rewards/margins": 3.9923481941223145, "rewards/rejected": -8.80532169342041, "step": 1630 }, { "epoch": 1.69, "learning_rate": 2.421636085626911e-07, "logits/chosen": -1.4560226202011108, "logits/rejected": -1.421790361404419, "logps/chosen": -328.801025390625, "logps/rejected": -280.44549560546875, "loss": -1.3433, "rewards/accuracies": 0.5625, "rewards/chosen": -6.336007118225098, "rewards/margins": 1.1562750339508057, "rewards/rejected": -7.492282867431641, "step": 1640 }, { "epoch": 1.7, "learning_rate": 2.402522935779816e-07, "logits/chosen": -1.7103254795074463, "logits/rejected": -1.5623626708984375, "logps/chosen": -353.27081298828125, "logps/rejected": -334.1893005371094, "loss": -1.3197, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -6.708989143371582, "rewards/margins": 1.9991058111190796, "rewards/rejected": -8.708094596862793, "step": 1650 }, { "epoch": 1.71, "learning_rate": 2.3834097859327215e-07, "logits/chosen": -1.508737325668335, "logits/rejected": -1.3252770900726318, "logps/chosen": -288.32635498046875, "logps/rejected": -296.4991455078125, "loss": -1.3704, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -5.095849990844727, "rewards/margins": 3.1136269569396973, "rewards/rejected": -8.209476470947266, "step": 1660 }, { "epoch": 1.72, "learning_rate": 2.3642966360856268e-07, "logits/chosen": -1.6645679473876953, "logits/rejected": -1.5162229537963867, "logps/chosen": -322.9044189453125, "logps/rejected": -300.4761047363281, "loss": -1.9619, "rewards/accuracies": 0.6875, "rewards/chosen": -5.100916862487793, "rewards/margins": 3.4188740253448486, "rewards/rejected": -8.519791603088379, "step": 1670 }, { "epoch": 1.73, "learning_rate": 2.345183486238532e-07, "logits/chosen": -1.5683923959732056, "logits/rejected": -1.4021813869476318, "logps/chosen": -327.93804931640625, "logps/rejected": -343.5317077636719, "loss": -1.6055, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -7.430581569671631, "rewards/margins": 1.1966545581817627, "rewards/rejected": -8.62723445892334, "step": 1680 }, { "epoch": 1.74, "learning_rate": 2.3260703363914372e-07, "logits/chosen": -1.631715178489685, "logits/rejected": -1.5181890726089478, "logps/chosen": -370.483642578125, "logps/rejected": -336.6531677246094, "loss": -1.8198, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -6.040884494781494, "rewards/margins": 3.5697269439697266, "rewards/rejected": -9.610611915588379, "step": 1690 }, { "epoch": 1.75, "learning_rate": 2.3069571865443425e-07, "logits/chosen": -1.4669805765151978, "logits/rejected": -1.3994741439819336, "logps/chosen": -295.02496337890625, "logps/rejected": -321.65814208984375, "loss": -2.0272, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -6.588179588317871, "rewards/margins": 2.978868007659912, "rewards/rejected": -9.567047119140625, "step": 1700 }, { "epoch": 1.76, "learning_rate": 2.2878440366972476e-07, "logits/chosen": -1.6681219339370728, "logits/rejected": -1.491996169090271, "logps/chosen": -338.78729248046875, "logps/rejected": -374.81634521484375, "loss": -1.8513, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -6.465402126312256, "rewards/margins": 2.903998851776123, "rewards/rejected": -9.369401931762695, "step": 1710 }, { "epoch": 1.78, "learning_rate": 2.268730886850153e-07, "logits/chosen": -1.4729211330413818, "logits/rejected": -1.1880760192871094, "logps/chosen": -334.2304992675781, "logps/rejected": -305.21368408203125, "loss": -2.0159, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -7.080233573913574, "rewards/margins": 2.117523431777954, "rewards/rejected": -9.197757720947266, "step": 1720 }, { "epoch": 1.79, "learning_rate": 2.249617737003058e-07, "logits/chosen": -1.563912034034729, "logits/rejected": -1.5858089923858643, "logps/chosen": -378.62396240234375, "logps/rejected": -377.3148193359375, "loss": -1.9999, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -8.893011093139648, "rewards/margins": 2.892535924911499, "rewards/rejected": -11.785547256469727, "step": 1730 }, { "epoch": 1.8, "learning_rate": 2.2305045871559633e-07, "logits/chosen": -1.392828345298767, "logits/rejected": -1.3615916967391968, "logps/chosen": -361.71051025390625, "logps/rejected": -335.33673095703125, "loss": -2.324, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -8.41543197631836, "rewards/margins": 2.103468418121338, "rewards/rejected": -10.518899917602539, "step": 1740 }, { "epoch": 1.81, "learning_rate": 2.2113914373088686e-07, "logits/chosen": -1.4968210458755493, "logits/rejected": -1.4618072509765625, "logps/chosen": -382.20367431640625, "logps/rejected": -378.17901611328125, "loss": -2.2244, "rewards/accuracies": 0.6875, "rewards/chosen": -7.971160888671875, "rewards/margins": 4.1811017990112305, "rewards/rejected": -12.152261734008789, "step": 1750 }, { "epoch": 1.82, "learning_rate": 2.1922782874617736e-07, "logits/chosen": -1.37467360496521, "logits/rejected": -1.3709341287612915, "logps/chosen": -313.5454406738281, "logps/rejected": -367.4576721191406, "loss": -1.9016, "rewards/accuracies": 0.6875, "rewards/chosen": -8.650922775268555, "rewards/margins": 2.388150691986084, "rewards/rejected": -11.039073944091797, "step": 1760 }, { "epoch": 1.83, "learning_rate": 2.1731651376146787e-07, "logits/chosen": -1.4017971754074097, "logits/rejected": -1.2825329303741455, "logps/chosen": -311.17572021484375, "logps/rejected": -345.6313171386719, "loss": -1.8086, "rewards/accuracies": 0.6875, "rewards/chosen": -8.644105911254883, "rewards/margins": 3.1263985633850098, "rewards/rejected": -11.77050495147705, "step": 1770 }, { "epoch": 1.84, "learning_rate": 2.154051987767584e-07, "logits/chosen": -1.5087705850601196, "logits/rejected": -1.3651801347732544, "logps/chosen": -351.78582763671875, "logps/rejected": -337.21295166015625, "loss": -1.5194, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -8.838014602661133, "rewards/margins": 3.8112175464630127, "rewards/rejected": -12.64923095703125, "step": 1780 }, { "epoch": 1.85, "learning_rate": 2.134938837920489e-07, "logits/chosen": -1.2244422435760498, "logits/rejected": -1.352123498916626, "logps/chosen": -281.91949462890625, "logps/rejected": -307.07867431640625, "loss": -1.8547, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -8.937861442565918, "rewards/margins": 3.2620761394500732, "rewards/rejected": -12.19993782043457, "step": 1790 }, { "epoch": 1.86, "learning_rate": 2.1158256880733944e-07, "logits/chosen": -1.3861466646194458, "logits/rejected": -1.3692476749420166, "logps/chosen": -379.1508483886719, "logps/rejected": -348.1292724609375, "loss": -2.9603, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -8.26054859161377, "rewards/margins": 5.145617485046387, "rewards/rejected": -13.406166076660156, "step": 1800 }, { "epoch": 1.87, "learning_rate": 2.0967125382262994e-07, "logits/chosen": -1.2910804748535156, "logits/rejected": -1.3471043109893799, "logps/chosen": -359.8387451171875, "logps/rejected": -327.9253845214844, "loss": -1.6607, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -9.164748191833496, "rewards/margins": 1.6195499897003174, "rewards/rejected": -10.784297943115234, "step": 1810 }, { "epoch": 1.88, "learning_rate": 2.0775993883792048e-07, "logits/chosen": -1.4032940864562988, "logits/rejected": -1.253796100616455, "logps/chosen": -357.06549072265625, "logps/rejected": -318.9412536621094, "loss": -2.9627, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -7.581735134124756, "rewards/margins": 3.610663652420044, "rewards/rejected": -11.192397117614746, "step": 1820 }, { "epoch": 1.89, "learning_rate": 2.05848623853211e-07, "logits/chosen": -1.4505128860473633, "logits/rejected": -1.4566118717193604, "logps/chosen": -400.2586975097656, "logps/rejected": -357.13458251953125, "loss": -2.4791, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -10.725576400756836, "rewards/margins": 2.9340341091156006, "rewards/rejected": -13.6596097946167, "step": 1830 }, { "epoch": 1.9, "learning_rate": 2.0393730886850151e-07, "logits/chosen": -1.4870072603225708, "logits/rejected": -1.2949211597442627, "logps/chosen": -384.02947998046875, "logps/rejected": -346.6249084472656, "loss": -1.9201, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -9.375011444091797, "rewards/margins": 3.3913631439208984, "rewards/rejected": -12.766374588012695, "step": 1840 }, { "epoch": 1.91, "learning_rate": 2.0202599388379205e-07, "logits/chosen": -1.5045329332351685, "logits/rejected": -1.3934712409973145, "logps/chosen": -375.15155029296875, "logps/rejected": -420.40283203125, "loss": -2.6807, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -10.547384262084961, "rewards/margins": 4.163573265075684, "rewards/rejected": -14.710957527160645, "step": 1850 }, { "epoch": 1.92, "learning_rate": 2.0011467889908258e-07, "logits/chosen": -1.5281354188919067, "logits/rejected": -1.2871553897857666, "logps/chosen": -395.02471923828125, "logps/rejected": -297.695068359375, "loss": -2.9446, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -10.007207870483398, "rewards/margins": 2.725371837615967, "rewards/rejected": -12.732580184936523, "step": 1860 }, { "epoch": 1.93, "learning_rate": 1.9820336391437308e-07, "logits/chosen": -1.310064673423767, "logits/rejected": -1.3744899034500122, "logps/chosen": -344.2419738769531, "logps/rejected": -346.10430908203125, "loss": -2.1029, "rewards/accuracies": 0.6875, "rewards/chosen": -9.30712604522705, "rewards/margins": 4.580069541931152, "rewards/rejected": -13.88719654083252, "step": 1870 }, { "epoch": 1.94, "learning_rate": 1.9629204892966362e-07, "logits/chosen": -1.4468410015106201, "logits/rejected": -1.3818161487579346, "logps/chosen": -382.7313232421875, "logps/rejected": -351.123291015625, "loss": -3.6235, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -10.61854362487793, "rewards/margins": 3.927306652069092, "rewards/rejected": -14.545849800109863, "step": 1880 }, { "epoch": 1.95, "learning_rate": 1.943807339449541e-07, "logits/chosen": -1.280505895614624, "logits/rejected": -1.126263976097107, "logps/chosen": -361.83258056640625, "logps/rejected": -395.9088439941406, "loss": -2.9208, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -10.34544849395752, "rewards/margins": 5.857726097106934, "rewards/rejected": -16.20317268371582, "step": 1890 }, { "epoch": 1.96, "learning_rate": 1.9246941896024463e-07, "logits/chosen": -1.300330400466919, "logits/rejected": -1.2540452480316162, "logps/chosen": -374.8466796875, "logps/rejected": -399.75933837890625, "loss": -2.3092, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -12.081557273864746, "rewards/margins": 3.7616398334503174, "rewards/rejected": -15.8431978225708, "step": 1900 }, { "epoch": 1.97, "learning_rate": 1.9055810397553516e-07, "logits/chosen": -1.3129085302352905, "logits/rejected": -1.266152262687683, "logps/chosen": -376.607666015625, "logps/rejected": -378.1551513671875, "loss": -2.5453, "rewards/accuracies": 0.625, "rewards/chosen": -12.030994415283203, "rewards/margins": 3.7824196815490723, "rewards/rejected": -15.813413619995117, "step": 1910 }, { "epoch": 1.98, "learning_rate": 1.8864678899082566e-07, "logits/chosen": -1.3077343702316284, "logits/rejected": -1.3403010368347168, "logps/chosen": -395.0058288574219, "logps/rejected": -348.19683837890625, "loss": -2.6208, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -12.53403091430664, "rewards/margins": 2.729905605316162, "rewards/rejected": -15.263936042785645, "step": 1920 }, { "epoch": 1.99, "learning_rate": 1.867354740061162e-07, "logits/chosen": -1.2000598907470703, "logits/rejected": -1.1860705614089966, "logps/chosen": -377.95538330078125, "logps/rejected": -394.8722229003906, "loss": -2.3739, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -12.864456176757812, "rewards/margins": 4.33025598526001, "rewards/rejected": -17.194711685180664, "step": 1930 }, { "epoch": 2.0, "eval_logits/chosen": -1.5908974409103394, "eval_logits/rejected": -1.4396800994873047, "eval_logps/chosen": -418.98797607421875, "eval_logps/rejected": -413.8171691894531, "eval_loss": -3.0139997005462646, "eval_rewards/accuracies": 0.658730149269104, "eval_rewards/chosen": -12.91853141784668, "eval_rewards/margins": 4.969918251037598, "eval_rewards/rejected": -17.888450622558594, "eval_runtime": 237.8807, "eval_samples_per_second": 8.408, "eval_steps_per_second": 0.265, "step": 1938 }, { "epoch": 2.0, "learning_rate": 1.8482415902140673e-07, "logits/chosen": -1.3271663188934326, "logits/rejected": -1.3011524677276611, "logps/chosen": -392.82757568359375, "logps/rejected": -392.5997314453125, "loss": -2.8487, "rewards/accuracies": 0.625, "rewards/chosen": -13.525858879089355, "rewards/margins": 4.043676376342773, "rewards/rejected": -17.569536209106445, "step": 1940 }, { "epoch": 2.01, "learning_rate": 1.8291284403669723e-07, "logits/chosen": -1.2705609798431396, "logits/rejected": -1.3038126230239868, "logps/chosen": -373.14031982421875, "logps/rejected": -386.13677978515625, "loss": -3.4669, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -12.79985237121582, "rewards/margins": 3.9696388244628906, "rewards/rejected": -16.76949119567871, "step": 1950 }, { "epoch": 2.02, "learning_rate": 1.8100152905198777e-07, "logits/chosen": -1.061727523803711, "logits/rejected": -1.110812783241272, "logps/chosen": -381.92529296875, "logps/rejected": -418.325927734375, "loss": -2.5351, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -12.154891967773438, "rewards/margins": 2.9344310760498047, "rewards/rejected": -15.089323043823242, "step": 1960 }, { "epoch": 2.03, "learning_rate": 1.7909021406727827e-07, "logits/chosen": -1.107367992401123, "logits/rejected": -1.453131914138794, "logps/chosen": -374.98504638671875, "logps/rejected": -365.43121337890625, "loss": -3.2083, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -11.676986694335938, "rewards/margins": 4.082896709442139, "rewards/rejected": -15.75988483428955, "step": 1970 }, { "epoch": 2.04, "learning_rate": 1.771788990825688e-07, "logits/chosen": -1.3608293533325195, "logits/rejected": -1.06710684299469, "logps/chosen": -423.4046325683594, "logps/rejected": -425.17681884765625, "loss": -2.8719, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -14.271319389343262, "rewards/margins": 4.651618957519531, "rewards/rejected": -18.92293930053711, "step": 1980 }, { "epoch": 2.05, "learning_rate": 1.7526758409785934e-07, "logits/chosen": -1.4061094522476196, "logits/rejected": -1.2359219789505005, "logps/chosen": -473.62841796875, "logps/rejected": -462.7413024902344, "loss": -3.7745, "rewards/accuracies": 0.5625, "rewards/chosen": -17.31951332092285, "rewards/margins": 4.680100440979004, "rewards/rejected": -21.99961280822754, "step": 1990 }, { "epoch": 2.06, "learning_rate": 1.7335626911314984e-07, "logits/chosen": -1.2134959697723389, "logits/rejected": -0.9500266909599304, "logps/chosen": -372.77880859375, "logps/rejected": -378.95703125, "loss": -3.7006, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -15.280550003051758, "rewards/margins": 3.6492176055908203, "rewards/rejected": -18.929767608642578, "step": 2000 }, { "epoch": 2.07, "learning_rate": 1.7144495412844037e-07, "logits/chosen": -1.3106526136398315, "logits/rejected": -1.1043154001235962, "logps/chosen": -463.447021484375, "logps/rejected": -458.7850036621094, "loss": -2.2585, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -16.96882438659668, "rewards/margins": 5.767825603485107, "rewards/rejected": -22.736652374267578, "step": 2010 }, { "epoch": 2.08, "learning_rate": 1.6953363914373088e-07, "logits/chosen": -1.373578667640686, "logits/rejected": -1.0678809881210327, "logps/chosen": -457.8382873535156, "logps/rejected": -414.07177734375, "loss": -2.567, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -16.41115951538086, "rewards/margins": 3.8120930194854736, "rewards/rejected": -20.223255157470703, "step": 2020 }, { "epoch": 2.09, "learning_rate": 1.6762232415902138e-07, "logits/chosen": -1.1989551782608032, "logits/rejected": -1.2645841836929321, "logps/chosen": -400.55767822265625, "logps/rejected": -395.439208984375, "loss": -2.5975, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -14.613690376281738, "rewards/margins": 3.9228732585906982, "rewards/rejected": -18.536563873291016, "step": 2030 }, { "epoch": 2.11, "learning_rate": 1.6571100917431192e-07, "logits/chosen": -1.3490411043167114, "logits/rejected": -1.1970597505569458, "logps/chosen": -399.5980224609375, "logps/rejected": -373.1912841796875, "loss": -2.902, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -13.886739730834961, "rewards/margins": 3.5066256523132324, "rewards/rejected": -17.39336585998535, "step": 2040 }, { "epoch": 2.12, "learning_rate": 1.6379969418960242e-07, "logits/chosen": -1.2504427433013916, "logits/rejected": -1.1676826477050781, "logps/chosen": -489.55914306640625, "logps/rejected": -456.7439880371094, "loss": -3.6402, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -17.439693450927734, "rewards/margins": 2.561140537261963, "rewards/rejected": -20.000835418701172, "step": 2050 }, { "epoch": 2.13, "learning_rate": 1.6188837920489295e-07, "logits/chosen": -1.4094626903533936, "logits/rejected": -0.9949380159378052, "logps/chosen": -461.00286865234375, "logps/rejected": -403.776123046875, "loss": -3.295, "rewards/accuracies": 0.5625, "rewards/chosen": -16.868534088134766, "rewards/margins": 2.4951887130737305, "rewards/rejected": -19.36372184753418, "step": 2060 }, { "epoch": 2.14, "learning_rate": 1.5997706422018349e-07, "logits/chosen": -1.5532751083374023, "logits/rejected": -1.1935482025146484, "logps/chosen": -410.2151794433594, "logps/rejected": -497.067626953125, "loss": -3.6557, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -14.347163200378418, "rewards/margins": 6.878281593322754, "rewards/rejected": -21.225446701049805, "step": 2070 }, { "epoch": 2.15, "learning_rate": 1.58065749235474e-07, "logits/chosen": -1.2084702253341675, "logits/rejected": -1.1066341400146484, "logps/chosen": -507.550537109375, "logps/rejected": -450.25341796875, "loss": -3.175, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -17.20359992980957, "rewards/margins": 2.5353291034698486, "rewards/rejected": -19.738927841186523, "step": 2080 }, { "epoch": 2.16, "learning_rate": 1.5615443425076452e-07, "logits/chosen": -1.0724289417266846, "logits/rejected": -1.0922993421554565, "logps/chosen": -402.9586486816406, "logps/rejected": -448.19830322265625, "loss": -3.6034, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -14.504638671875, "rewards/margins": 8.439682006835938, "rewards/rejected": -22.944318771362305, "step": 2090 }, { "epoch": 2.17, "learning_rate": 1.5424311926605506e-07, "logits/chosen": -0.9919137954711914, "logits/rejected": -0.9210994839668274, "logps/chosen": -369.6788024902344, "logps/rejected": -405.4874572753906, "loss": -4.0197, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -14.22148323059082, "rewards/margins": 8.295201301574707, "rewards/rejected": -22.51668357849121, "step": 2100 }, { "epoch": 2.18, "learning_rate": 1.5233180428134556e-07, "logits/chosen": -1.2452681064605713, "logits/rejected": -1.1532270908355713, "logps/chosen": -423.7035217285156, "logps/rejected": -444.843017578125, "loss": -4.0177, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -15.260812759399414, "rewards/margins": 7.597034454345703, "rewards/rejected": -22.857845306396484, "step": 2110 }, { "epoch": 2.19, "learning_rate": 1.504204892966361e-07, "logits/chosen": -1.2690461874008179, "logits/rejected": -1.036007046699524, "logps/chosen": -481.8981018066406, "logps/rejected": -458.9730529785156, "loss": -4.7473, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -18.875347137451172, "rewards/margins": 3.536944627761841, "rewards/rejected": -22.412288665771484, "step": 2120 }, { "epoch": 2.2, "learning_rate": 1.485091743119266e-07, "logits/chosen": -1.2708927392959595, "logits/rejected": -0.9568039178848267, "logps/chosen": -407.5409851074219, "logps/rejected": -469.5133361816406, "loss": -4.6317, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -15.849174499511719, "rewards/margins": 7.314669609069824, "rewards/rejected": -23.16384506225586, "step": 2130 }, { "epoch": 2.21, "learning_rate": 1.465978593272171e-07, "logits/chosen": -0.9912996292114258, "logits/rejected": -0.8085284233093262, "logps/chosen": -353.92413330078125, "logps/rejected": -355.982666015625, "loss": -4.0689, "rewards/accuracies": 0.625, "rewards/chosen": -15.852828979492188, "rewards/margins": 5.281838417053223, "rewards/rejected": -21.134668350219727, "step": 2140 }, { "epoch": 2.22, "learning_rate": 1.4468654434250764e-07, "logits/chosen": -1.2438325881958008, "logits/rejected": -1.1221582889556885, "logps/chosen": -519.1368408203125, "logps/rejected": -489.5020446777344, "loss": -2.5507, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -19.783428192138672, "rewards/margins": 1.9117088317871094, "rewards/rejected": -21.69513702392578, "step": 2150 }, { "epoch": 2.23, "learning_rate": 1.4277522935779814e-07, "logits/chosen": -1.0609955787658691, "logits/rejected": -1.2507171630859375, "logps/chosen": -440.3553161621094, "logps/rejected": -453.50323486328125, "loss": -3.4898, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -16.776865005493164, "rewards/margins": 4.883880615234375, "rewards/rejected": -21.660743713378906, "step": 2160 }, { "epoch": 2.24, "learning_rate": 1.4086391437308867e-07, "logits/chosen": -1.274261474609375, "logits/rejected": -1.1783298254013062, "logps/chosen": -472.8345642089844, "logps/rejected": -521.444580078125, "loss": -4.8646, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -17.88382339477539, "rewards/margins": 8.282121658325195, "rewards/rejected": -26.165943145751953, "step": 2170 }, { "epoch": 2.25, "learning_rate": 1.389525993883792e-07, "logits/chosen": -1.1477124691009521, "logits/rejected": -0.8995282053947449, "logps/chosen": -398.76043701171875, "logps/rejected": -418.26507568359375, "loss": -3.7138, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -17.033363342285156, "rewards/margins": 5.318907737731934, "rewards/rejected": -22.352272033691406, "step": 2180 }, { "epoch": 2.26, "learning_rate": 1.370412844036697e-07, "logits/chosen": -1.067368507385254, "logits/rejected": -1.2181618213653564, "logps/chosen": -437.30010986328125, "logps/rejected": -469.89056396484375, "loss": -3.5077, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -19.17410659790039, "rewards/margins": 4.917706489562988, "rewards/rejected": -24.091812133789062, "step": 2190 }, { "epoch": 2.27, "learning_rate": 1.3512996941896024e-07, "logits/chosen": -1.0004960298538208, "logits/rejected": -0.8722925186157227, "logps/chosen": -415.56719970703125, "logps/rejected": -430.51934814453125, "loss": -4.4207, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -19.313739776611328, "rewards/margins": 4.984239101409912, "rewards/rejected": -24.297977447509766, "step": 2200 }, { "epoch": 2.28, "learning_rate": 1.3321865443425075e-07, "logits/chosen": -1.0728847980499268, "logits/rejected": -0.7316335439682007, "logps/chosen": -489.6312561035156, "logps/rejected": -451.337158203125, "loss": -4.1332, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -22.230623245239258, "rewards/margins": 0.4879101812839508, "rewards/rejected": -22.71853256225586, "step": 2210 }, { "epoch": 2.29, "learning_rate": 1.3130733944954128e-07, "logits/chosen": -1.31141197681427, "logits/rejected": -1.0552115440368652, "logps/chosen": -473.9842834472656, "logps/rejected": -515.6341552734375, "loss": -3.8823, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -18.287981033325195, "rewards/margins": 8.8409423828125, "rewards/rejected": -27.128925323486328, "step": 2220 }, { "epoch": 2.3, "learning_rate": 1.293960244648318e-07, "logits/chosen": -1.1148552894592285, "logits/rejected": -1.010851502418518, "logps/chosen": -501.09588623046875, "logps/rejected": -478.259033203125, "loss": -4.2715, "rewards/accuracies": 0.625, "rewards/chosen": -20.409494400024414, "rewards/margins": 5.208123683929443, "rewards/rejected": -25.617618560791016, "step": 2230 }, { "epoch": 2.31, "learning_rate": 1.2748470948012232e-07, "logits/chosen": -1.1082009077072144, "logits/rejected": -0.9660416841506958, "logps/chosen": -469.67169189453125, "logps/rejected": -531.001708984375, "loss": -5.0927, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -20.520383834838867, "rewards/margins": 7.39895486831665, "rewards/rejected": -27.91933822631836, "step": 2240 }, { "epoch": 2.32, "learning_rate": 1.2557339449541285e-07, "logits/chosen": -1.1804434061050415, "logits/rejected": -1.013977289199829, "logps/chosen": -464.8408203125, "logps/rejected": -479.40301513671875, "loss": -5.3276, "rewards/accuracies": 0.5625, "rewards/chosen": -18.352712631225586, "rewards/margins": 6.4782609939575195, "rewards/rejected": -24.830974578857422, "step": 2250 }, { "epoch": 2.33, "learning_rate": 1.2366207951070336e-07, "logits/chosen": -1.1036585569381714, "logits/rejected": -1.1067498922348022, "logps/chosen": -482.696533203125, "logps/rejected": -480.6744689941406, "loss": -3.2585, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -21.47313117980957, "rewards/margins": 3.0878939628601074, "rewards/rejected": -24.561023712158203, "step": 2260 }, { "epoch": 2.34, "learning_rate": 1.217507645259939e-07, "logits/chosen": -1.1785879135131836, "logits/rejected": -0.8433502912521362, "logps/chosen": -448.43017578125, "logps/rejected": -455.5037536621094, "loss": -3.4348, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -17.98015594482422, "rewards/margins": 5.601017475128174, "rewards/rejected": -23.581167221069336, "step": 2270 }, { "epoch": 2.35, "learning_rate": 1.198394495412844e-07, "logits/chosen": -0.9618834257125854, "logits/rejected": -0.9488929510116577, "logps/chosen": -486.79248046875, "logps/rejected": -497.04498291015625, "loss": -5.1327, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -20.99383544921875, "rewards/margins": 5.7790398597717285, "rewards/rejected": -26.772876739501953, "step": 2280 }, { "epoch": 2.36, "learning_rate": 1.1792813455657493e-07, "logits/chosen": -1.175091028213501, "logits/rejected": -0.9602692723274231, "logps/chosen": -455.3914489746094, "logps/rejected": -423.8038024902344, "loss": -5.0517, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -19.302087783813477, "rewards/margins": 2.1102347373962402, "rewards/rejected": -21.412322998046875, "step": 2290 }, { "epoch": 2.37, "learning_rate": 1.1601681957186543e-07, "logits/chosen": -1.0566465854644775, "logits/rejected": -0.9404910802841187, "logps/chosen": -462.96478271484375, "logps/rejected": -497.4542541503906, "loss": -5.7946, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -18.82391357421875, "rewards/margins": 9.018692016601562, "rewards/rejected": -27.842605590820312, "step": 2300 }, { "epoch": 2.38, "learning_rate": 1.1410550458715595e-07, "logits/chosen": -0.9795541763305664, "logits/rejected": -1.0114076137542725, "logps/chosen": -471.488037109375, "logps/rejected": -525.1002807617188, "loss": -5.3356, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -21.668289184570312, "rewards/margins": 7.146371364593506, "rewards/rejected": -28.814661026000977, "step": 2310 }, { "epoch": 2.39, "learning_rate": 1.1219418960244648e-07, "logits/chosen": -1.0043725967407227, "logits/rejected": -0.814845085144043, "logps/chosen": -471.0302734375, "logps/rejected": -553.7667846679688, "loss": -7.8293, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -25.013063430786133, "rewards/margins": 5.8318376541137695, "rewards/rejected": -30.84490394592285, "step": 2320 }, { "epoch": 2.4, "learning_rate": 1.10282874617737e-07, "logits/chosen": -1.04874587059021, "logits/rejected": -0.8525232076644897, "logps/chosen": -503.905517578125, "logps/rejected": -522.6575927734375, "loss": -5.5289, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -27.28557777404785, "rewards/margins": 4.0987653732299805, "rewards/rejected": -31.38434410095215, "step": 2330 }, { "epoch": 2.41, "learning_rate": 1.0837155963302752e-07, "logits/chosen": -0.9908869862556458, "logits/rejected": -0.810786247253418, "logps/chosen": -424.4300842285156, "logps/rejected": -541.6786499023438, "loss": -4.8461, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -21.23228645324707, "rewards/margins": 9.21428394317627, "rewards/rejected": -30.446569442749023, "step": 2340 }, { "epoch": 2.43, "learning_rate": 1.0646024464831804e-07, "logits/chosen": -0.9769414067268372, "logits/rejected": -0.7807801961898804, "logps/chosen": -508.21539306640625, "logps/rejected": -505.65118408203125, "loss": -4.3914, "rewards/accuracies": 0.5625, "rewards/chosen": -23.276477813720703, "rewards/margins": 5.2235870361328125, "rewards/rejected": -28.500064849853516, "step": 2350 }, { "epoch": 2.44, "learning_rate": 1.0454892966360856e-07, "logits/chosen": -0.9564868211746216, "logits/rejected": -0.8420296907424927, "logps/chosen": -462.8448791503906, "logps/rejected": -527.154541015625, "loss": -4.464, "rewards/accuracies": 0.625, "rewards/chosen": -20.870206832885742, "rewards/margins": 10.262295722961426, "rewards/rejected": -31.13250160217285, "step": 2360 }, { "epoch": 2.45, "learning_rate": 1.0263761467889908e-07, "logits/chosen": -0.8812466859817505, "logits/rejected": -0.8560865521430969, "logps/chosen": -485.3046875, "logps/rejected": -493.06463623046875, "loss": -4.3681, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -22.31712532043457, "rewards/margins": 3.8712782859802246, "rewards/rejected": -26.188400268554688, "step": 2370 }, { "epoch": 2.46, "learning_rate": 1.007262996941896e-07, "logits/chosen": -0.9851228594779968, "logits/rejected": -0.6262848973274231, "logps/chosen": -522.0337524414062, "logps/rejected": -517.8556518554688, "loss": -3.7686, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -26.0426025390625, "rewards/margins": 6.041103839874268, "rewards/rejected": -32.083702087402344, "step": 2380 }, { "epoch": 2.47, "learning_rate": 9.881498470948011e-08, "logits/chosen": -0.9182702898979187, "logits/rejected": -0.8262530565261841, "logps/chosen": -526.509521484375, "logps/rejected": -501.44390869140625, "loss": -4.2044, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -25.09255599975586, "rewards/margins": 4.793452739715576, "rewards/rejected": -29.88600730895996, "step": 2390 }, { "epoch": 2.48, "learning_rate": 9.690366972477065e-08, "logits/chosen": -1.020527958869934, "logits/rejected": -0.9827292561531067, "logps/chosen": -535.2392578125, "logps/rejected": -474.393310546875, "loss": -4.1335, "rewards/accuracies": 0.4375, "rewards/chosen": -26.45871925354004, "rewards/margins": -0.03600626066327095, "rewards/rejected": -26.42270851135254, "step": 2400 }, { "epoch": 2.49, "learning_rate": 9.499235474006116e-08, "logits/chosen": -1.0558453798294067, "logits/rejected": -0.9320958256721497, "logps/chosen": -537.9478759765625, "logps/rejected": -526.4865112304688, "loss": -5.638, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -22.561458587646484, "rewards/margins": 8.904586791992188, "rewards/rejected": -31.466039657592773, "step": 2410 }, { "epoch": 2.5, "learning_rate": 9.308103975535168e-08, "logits/chosen": -0.9652633666992188, "logits/rejected": -0.9594426155090332, "logps/chosen": -547.8624877929688, "logps/rejected": -563.6146240234375, "loss": -5.9731, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -25.674243927001953, "rewards/margins": 5.594969749450684, "rewards/rejected": -31.269210815429688, "step": 2420 }, { "epoch": 2.51, "learning_rate": 9.116972477064219e-08, "logits/chosen": -0.9045840501785278, "logits/rejected": -0.9294489622116089, "logps/chosen": -545.46923828125, "logps/rejected": -560.5089111328125, "loss": -4.2549, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -26.388031005859375, "rewards/margins": 3.495013475418091, "rewards/rejected": -29.883047103881836, "step": 2430 }, { "epoch": 2.52, "learning_rate": 8.925840978593272e-08, "logits/chosen": -1.0906932353973389, "logits/rejected": -0.8586239814758301, "logps/chosen": -426.898193359375, "logps/rejected": -520.27099609375, "loss": -5.0199, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -21.479185104370117, "rewards/margins": 6.4797844886779785, "rewards/rejected": -27.958969116210938, "step": 2440 }, { "epoch": 2.53, "learning_rate": 8.734709480122324e-08, "logits/chosen": -0.8275815844535828, "logits/rejected": -0.8221632242202759, "logps/chosen": -514.3942260742188, "logps/rejected": -599.9034423828125, "loss": -5.2968, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -26.718358993530273, "rewards/margins": 10.353950500488281, "rewards/rejected": -37.07230758666992, "step": 2450 }, { "epoch": 2.54, "learning_rate": 8.543577981651376e-08, "logits/chosen": -1.0432653427124023, "logits/rejected": -1.0401585102081299, "logps/chosen": -591.7424926757812, "logps/rejected": -566.1804809570312, "loss": -5.6217, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -23.220060348510742, "rewards/margins": 7.772289276123047, "rewards/rejected": -30.99234962463379, "step": 2460 }, { "epoch": 2.55, "learning_rate": 8.352446483180428e-08, "logits/chosen": -0.9903294444084167, "logits/rejected": -0.9184169769287109, "logps/chosen": -531.7313842773438, "logps/rejected": -534.6137084960938, "loss": -3.808, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -26.629840850830078, "rewards/margins": 6.476201057434082, "rewards/rejected": -33.106040954589844, "step": 2470 }, { "epoch": 2.56, "learning_rate": 8.161314984709481e-08, "logits/chosen": -1.09735107421875, "logits/rejected": -0.9943147897720337, "logps/chosen": -521.5565795898438, "logps/rejected": -527.12744140625, "loss": -4.6607, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -24.24032211303711, "rewards/margins": 6.096768379211426, "rewards/rejected": -30.33709144592285, "step": 2480 }, { "epoch": 2.57, "learning_rate": 7.970183486238531e-08, "logits/chosen": -0.9941670298576355, "logits/rejected": -0.8651212453842163, "logps/chosen": -524.2615356445312, "logps/rejected": -511.6361389160156, "loss": -5.4135, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -26.943328857421875, "rewards/margins": 3.2720611095428467, "rewards/rejected": -30.21539306640625, "step": 2490 }, { "epoch": 2.58, "learning_rate": 7.779051987767583e-08, "logits/chosen": -1.145651936531067, "logits/rejected": -1.0183385610580444, "logps/chosen": -548.2122192382812, "logps/rejected": -548.8924560546875, "loss": -7.1154, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -24.76950454711914, "rewards/margins": 7.515495300292969, "rewards/rejected": -32.284996032714844, "step": 2500 }, { "epoch": 2.59, "learning_rate": 7.587920489296635e-08, "logits/chosen": -1.0196164846420288, "logits/rejected": -1.0715177059173584, "logps/chosen": -547.984375, "logps/rejected": -582.05126953125, "loss": -6.2112, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -26.091842651367188, "rewards/margins": 9.326883316040039, "rewards/rejected": -35.41872787475586, "step": 2510 }, { "epoch": 2.6, "learning_rate": 7.396788990825688e-08, "logits/chosen": -1.106105923652649, "logits/rejected": -0.9713128209114075, "logps/chosen": -485.42083740234375, "logps/rejected": -594.6617431640625, "loss": -6.3354, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -22.46695327758789, "rewards/margins": 16.13218879699707, "rewards/rejected": -38.599143981933594, "step": 2520 }, { "epoch": 2.61, "learning_rate": 7.20565749235474e-08, "logits/chosen": -0.9968252182006836, "logits/rejected": -0.8192588090896606, "logps/chosen": -515.4625854492188, "logps/rejected": -511.76849365234375, "loss": -5.7957, "rewards/accuracies": 0.5625, "rewards/chosen": -26.7257022857666, "rewards/margins": 5.563019752502441, "rewards/rejected": -32.28872299194336, "step": 2530 }, { "epoch": 2.62, "learning_rate": 7.014525993883792e-08, "logits/chosen": -0.9310768246650696, "logits/rejected": -0.9076264500617981, "logps/chosen": -532.5977783203125, "logps/rejected": -534.395263671875, "loss": -6.54, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -25.927881240844727, "rewards/margins": 5.213220119476318, "rewards/rejected": -31.141101837158203, "step": 2540 }, { "epoch": 2.63, "learning_rate": 6.823394495412843e-08, "logits/chosen": -0.8654760122299194, "logits/rejected": -0.9172045588493347, "logps/chosen": -494.37005615234375, "logps/rejected": -517.6791381835938, "loss": -5.034, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -22.28108787536621, "rewards/margins": 8.954570770263672, "rewards/rejected": -31.235660552978516, "step": 2550 }, { "epoch": 2.64, "learning_rate": 6.632262996941895e-08, "logits/chosen": -0.8583803176879883, "logits/rejected": -1.0703628063201904, "logps/chosen": -545.6281127929688, "logps/rejected": -576.9746704101562, "loss": -6.1075, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -29.19540023803711, "rewards/margins": 6.0790910720825195, "rewards/rejected": -35.27449035644531, "step": 2560 }, { "epoch": 2.65, "learning_rate": 6.441131498470948e-08, "logits/chosen": -0.9567610621452332, "logits/rejected": -0.7643688917160034, "logps/chosen": -534.2616577148438, "logps/rejected": -558.9248657226562, "loss": -6.139, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -28.329565048217773, "rewards/margins": 6.173336982727051, "rewards/rejected": -34.50290298461914, "step": 2570 }, { "epoch": 2.66, "learning_rate": 6.25e-08, "logits/chosen": -0.973718523979187, "logits/rejected": -0.9818305969238281, "logps/chosen": -575.3428955078125, "logps/rejected": -579.9434814453125, "loss": -5.6596, "rewards/accuracies": 0.5625, "rewards/chosen": -26.38543701171875, "rewards/margins": 7.383551120758057, "rewards/rejected": -33.76898956298828, "step": 2580 }, { "epoch": 2.67, "learning_rate": 6.058868501529052e-08, "logits/chosen": -1.0347440242767334, "logits/rejected": -1.049570083618164, "logps/chosen": -574.5046997070312, "logps/rejected": -509.9305114746094, "loss": -5.3415, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -28.74808692932129, "rewards/margins": -0.5448096394538879, "rewards/rejected": -28.203277587890625, "step": 2590 }, { "epoch": 2.68, "learning_rate": 5.8677370030581035e-08, "logits/chosen": -1.0313562154769897, "logits/rejected": -0.7958860397338867, "logps/chosen": -527.613525390625, "logps/rejected": -525.542724609375, "loss": -6.3272, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -26.725732803344727, "rewards/margins": 5.203427314758301, "rewards/rejected": -31.92915916442871, "step": 2600 }, { "epoch": 2.69, "learning_rate": 5.6766055045871554e-08, "logits/chosen": -0.9836112260818481, "logits/rejected": -0.9386361241340637, "logps/chosen": -525.5948486328125, "logps/rejected": -588.316162109375, "loss": -7.7597, "rewards/accuracies": 0.5625, "rewards/chosen": -26.01352882385254, "rewards/margins": 9.247634887695312, "rewards/rejected": -35.26116180419922, "step": 2610 }, { "epoch": 2.7, "learning_rate": 5.485474006116208e-08, "logits/chosen": -1.163663387298584, "logits/rejected": -0.8112475275993347, "logps/chosen": -533.2066040039062, "logps/rejected": -540.348388671875, "loss": -5.5581, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -26.54342269897461, "rewards/margins": 6.053246021270752, "rewards/rejected": -32.59667205810547, "step": 2620 }, { "epoch": 2.71, "learning_rate": 5.294342507645259e-08, "logits/chosen": -0.9945865869522095, "logits/rejected": -0.8910030126571655, "logps/chosen": -593.0062866210938, "logps/rejected": -556.135009765625, "loss": -4.082, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -28.179473876953125, "rewards/margins": 6.279976844787598, "rewards/rejected": -34.459449768066406, "step": 2630 }, { "epoch": 2.72, "learning_rate": 5.1032110091743117e-08, "logits/chosen": -0.6196568012237549, "logits/rejected": -0.8046578168869019, "logps/chosen": -520.0942993164062, "logps/rejected": -537.9401245117188, "loss": -4.2575, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -30.394145965576172, "rewards/margins": 2.2538414001464844, "rewards/rejected": -32.64798355102539, "step": 2640 }, { "epoch": 2.73, "learning_rate": 4.9120795107033635e-08, "logits/chosen": -0.9934478998184204, "logits/rejected": -0.9696424603462219, "logps/chosen": -542.2866821289062, "logps/rejected": -604.997314453125, "loss": -7.0483, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -26.58272361755371, "rewards/margins": 10.307984352111816, "rewards/rejected": -36.890708923339844, "step": 2650 }, { "epoch": 2.75, "learning_rate": 4.7209480122324154e-08, "logits/chosen": -1.0938358306884766, "logits/rejected": -0.9137821197509766, "logps/chosen": -557.5704345703125, "logps/rejected": -692.9163818359375, "loss": -4.9773, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -29.691476821899414, "rewards/margins": 14.579462051391602, "rewards/rejected": -44.270938873291016, "step": 2660 }, { "epoch": 2.76, "learning_rate": 4.529816513761467e-08, "logits/chosen": -0.9120942950248718, "logits/rejected": -0.9001785516738892, "logps/chosen": -560.4894409179688, "logps/rejected": -654.8059692382812, "loss": -8.4192, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -25.794815063476562, "rewards/margins": 12.174165725708008, "rewards/rejected": -37.9689826965332, "step": 2670 }, { "epoch": 2.77, "learning_rate": 4.33868501529052e-08, "logits/chosen": -1.0795204639434814, "logits/rejected": -0.798618733882904, "logps/chosen": -502.0621643066406, "logps/rejected": -547.533203125, "loss": -6.4279, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -27.192279815673828, "rewards/margins": 8.032793998718262, "rewards/rejected": -35.225074768066406, "step": 2680 }, { "epoch": 2.78, "learning_rate": 4.147553516819572e-08, "logits/chosen": -0.8620451092720032, "logits/rejected": -0.8486016988754272, "logps/chosen": -493.64447021484375, "logps/rejected": -515.5201416015625, "loss": -7.8372, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -26.775470733642578, "rewards/margins": 3.5873656272888184, "rewards/rejected": -30.362834930419922, "step": 2690 }, { "epoch": 2.79, "learning_rate": 3.9564220183486236e-08, "logits/chosen": -0.9343770146369934, "logits/rejected": -0.7297960519790649, "logps/chosen": -554.4147338867188, "logps/rejected": -525.2438354492188, "loss": -9.0144, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -29.26637840270996, "rewards/margins": 4.40227746963501, "rewards/rejected": -33.668663024902344, "step": 2700 }, { "epoch": 2.8, "learning_rate": 3.7652905198776755e-08, "logits/chosen": -0.9890888333320618, "logits/rejected": -0.8765937685966492, "logps/chosen": -600.9437255859375, "logps/rejected": -544.9365234375, "loss": -5.8302, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -30.97121238708496, "rewards/margins": 1.8391153812408447, "rewards/rejected": -32.810325622558594, "step": 2710 }, { "epoch": 2.81, "learning_rate": 3.574159021406728e-08, "logits/chosen": -0.885094165802002, "logits/rejected": -1.0295897722244263, "logps/chosen": -576.9892578125, "logps/rejected": -676.5888671875, "loss": -3.6913, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -30.517303466796875, "rewards/margins": 8.666280746459961, "rewards/rejected": -39.1835823059082, "step": 2720 }, { "epoch": 2.82, "learning_rate": 3.383027522935779e-08, "logits/chosen": -0.9966446161270142, "logits/rejected": -0.8098469972610474, "logps/chosen": -550.4933471679688, "logps/rejected": -582.1474609375, "loss": -5.0871, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -28.010635375976562, "rewards/margins": 6.666192054748535, "rewards/rejected": -34.67682647705078, "step": 2730 }, { "epoch": 2.83, "learning_rate": 3.191896024464832e-08, "logits/chosen": -0.9043565988540649, "logits/rejected": -0.7680369019508362, "logps/chosen": -609.8352661132812, "logps/rejected": -591.0311279296875, "loss": -7.8269, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -32.64293670654297, "rewards/margins": 3.9754230976104736, "rewards/rejected": -36.618350982666016, "step": 2740 }, { "epoch": 2.84, "learning_rate": 3.0007645259938836e-08, "logits/chosen": -0.9044156074523926, "logits/rejected": -0.7647527456283569, "logps/chosen": -504.7789611816406, "logps/rejected": -613.888671875, "loss": -8.5506, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -28.10544204711914, "rewards/margins": 12.60815715789795, "rewards/rejected": -40.713600158691406, "step": 2750 }, { "epoch": 2.85, "learning_rate": 2.809633027522936e-08, "logits/chosen": -0.7312633395195007, "logits/rejected": -0.8145195245742798, "logps/chosen": -568.981689453125, "logps/rejected": -555.5551147460938, "loss": -8.1692, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -32.20295715332031, "rewards/margins": 2.2871875762939453, "rewards/rejected": -34.490150451660156, "step": 2760 }, { "epoch": 2.86, "learning_rate": 2.6185015290519877e-08, "logits/chosen": -0.692935049533844, "logits/rejected": -0.9012505412101746, "logps/chosen": -530.6353759765625, "logps/rejected": -661.496826171875, "loss": -7.2198, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -26.056262969970703, "rewards/margins": 15.568461418151855, "rewards/rejected": -41.62472152709961, "step": 2770 }, { "epoch": 2.87, "learning_rate": 2.4273700305810396e-08, "logits/chosen": -1.0233699083328247, "logits/rejected": -0.7373823523521423, "logps/chosen": -615.1844482421875, "logps/rejected": -660.0513305664062, "loss": -4.8984, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -30.922901153564453, "rewards/margins": 10.078287124633789, "rewards/rejected": -41.001190185546875, "step": 2780 }, { "epoch": 2.88, "learning_rate": 2.2362385321100918e-08, "logits/chosen": -1.081015944480896, "logits/rejected": -0.7994831800460815, "logps/chosen": -571.29150390625, "logps/rejected": -558.0635375976562, "loss": -7.6276, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -28.834667205810547, "rewards/margins": 5.563540458679199, "rewards/rejected": -34.39820861816406, "step": 2790 }, { "epoch": 2.89, "learning_rate": 2.0451070336391437e-08, "logits/chosen": -0.8437451124191284, "logits/rejected": -0.8097723722457886, "logps/chosen": -579.05615234375, "logps/rejected": -576.0593872070312, "loss": -7.1202, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -30.914493560791016, "rewards/margins": 7.180275917053223, "rewards/rejected": -38.094764709472656, "step": 2800 }, { "epoch": 2.9, "learning_rate": 1.8539755351681956e-08, "logits/chosen": -0.816103458404541, "logits/rejected": -0.7335480451583862, "logps/chosen": -546.3377685546875, "logps/rejected": -556.5323486328125, "loss": -9.8674, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -28.328052520751953, "rewards/margins": 3.8435778617858887, "rewards/rejected": -32.171630859375, "step": 2810 }, { "epoch": 2.91, "learning_rate": 1.6628440366972478e-08, "logits/chosen": -0.994927704334259, "logits/rejected": -0.8127029538154602, "logps/chosen": -540.6372680664062, "logps/rejected": -544.7251586914062, "loss": -4.5174, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -25.344158172607422, "rewards/margins": 6.365922451019287, "rewards/rejected": -31.710086822509766, "step": 2820 }, { "epoch": 2.92, "learning_rate": 1.4717125382262997e-08, "logits/chosen": -0.900621235370636, "logits/rejected": -0.9584394693374634, "logps/chosen": -547.0264892578125, "logps/rejected": -650.3952026367188, "loss": -7.3594, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -27.896032333374023, "rewards/margins": 12.844169616699219, "rewards/rejected": -40.740196228027344, "step": 2830 }, { "epoch": 2.93, "learning_rate": 1.2805810397553517e-08, "logits/chosen": -0.9609657526016235, "logits/rejected": -0.9351837038993835, "logps/chosen": -620.4771118164062, "logps/rejected": -574.4600830078125, "loss": -4.3283, "rewards/accuracies": 0.5, "rewards/chosen": -34.61457061767578, "rewards/margins": 0.11583442986011505, "rewards/rejected": -34.730403900146484, "step": 2840 }, { "epoch": 2.94, "learning_rate": 1.0894495412844038e-08, "logits/chosen": -0.9170868992805481, "logits/rejected": -0.9585624933242798, "logps/chosen": -573.9271240234375, "logps/rejected": -615.289794921875, "loss": -9.5317, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -31.822189331054688, "rewards/margins": 6.956101894378662, "rewards/rejected": -38.77829360961914, "step": 2850 }, { "epoch": 2.95, "learning_rate": 8.983180428134555e-09, "logits/chosen": -0.9001976847648621, "logits/rejected": -0.7435283660888672, "logps/chosen": -546.4014892578125, "logps/rejected": -503.437255859375, "loss": -7.6183, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -28.65180015563965, "rewards/margins": 3.4450135231018066, "rewards/rejected": -32.0968132019043, "step": 2860 }, { "epoch": 2.96, "learning_rate": 7.071865443425076e-09, "logits/chosen": -0.9058972597122192, "logits/rejected": -0.6541125774383545, "logps/chosen": -654.9110107421875, "logps/rejected": -610.533447265625, "loss": -7.5332, "rewards/accuracies": 0.5625, "rewards/chosen": -37.84512710571289, "rewards/margins": 0.6990224123001099, "rewards/rejected": -38.544151306152344, "step": 2870 }, { "epoch": 2.97, "learning_rate": 5.1605504587155965e-09, "logits/chosen": -1.0033214092254639, "logits/rejected": -1.143033504486084, "logps/chosen": -586.54833984375, "logps/rejected": -587.2291259765625, "loss": -8.2379, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -30.552661895751953, "rewards/margins": 5.087256908416748, "rewards/rejected": -35.63991928100586, "step": 2880 }, { "epoch": 2.98, "learning_rate": 3.249235474006116e-09, "logits/chosen": -0.9559615850448608, "logits/rejected": -0.6920489072799683, "logps/chosen": -566.3262329101562, "logps/rejected": -614.7679443359375, "loss": -6.6734, "rewards/accuracies": 0.625, "rewards/chosen": -29.60939598083496, "rewards/margins": 8.876012802124023, "rewards/rejected": -38.48540496826172, "step": 2890 }, { "epoch": 2.99, "learning_rate": 1.3379204892966359e-09, "logits/chosen": -0.7593010663986206, "logits/rejected": -0.7966611981391907, "logps/chosen": -579.7311401367188, "logps/rejected": -572.9598999023438, "loss": -5.7169, "rewards/accuracies": 0.625, "rewards/chosen": -32.345096588134766, "rewards/margins": 4.745226860046387, "rewards/rejected": -37.09032440185547, "step": 2900 }, { "epoch": 3.0, "eval_logits/chosen": -1.2629982233047485, "eval_logits/rejected": -1.075066089630127, "eval_logps/chosen": -588.9970092773438, "eval_logps/rejected": -633.47216796875, "eval_loss": -7.541553497314453, "eval_rewards/accuracies": 0.6150793433189392, "eval_rewards/chosen": -29.919435501098633, "eval_rewards/margins": 9.934508323669434, "eval_rewards/rejected": -39.853946685791016, "eval_runtime": 238.4669, "eval_samples_per_second": 8.387, "eval_steps_per_second": 0.264, "step": 2907 }, { "epoch": 3.0, "step": 2907, "total_flos": 0.0, "train_loss": -1.932115889119454, "train_runtime": 45081.596, "train_samples_per_second": 4.124, "train_steps_per_second": 0.064 } ], "logging_steps": 10, "max_steps": 2907, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }