{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 3750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.3333333333333334e-08, "logits/chosen": -2.2416834831237793, "logits/rejected": -2.1367297172546387, "logps/chosen": -309.5174865722656, "logps/rejected": -533.4085693359375, "loss": 0.2593, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.3333333333333336e-07, "logits/chosen": -1.8251832723617554, "logits/rejected": -1.0621190071105957, "logps/chosen": -543.5095825195312, "logps/rejected": -825.732177734375, "loss": 0.1824, "rewards/accuracies": 0.4722222089767456, "rewards/chosen": 0.00025084687513299286, "rewards/margins": 0.0002259216271340847, "rewards/rejected": 2.4925222533056512e-05, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.666666666666667e-07, "logits/chosen": -1.6684499979019165, "logits/rejected": -1.289470911026001, "logps/chosen": -504.35736083984375, "logps/rejected": -862.5172729492188, "loss": 0.214, "rewards/accuracies": 0.625, "rewards/chosen": 0.00039741364889778197, "rewards/margins": 0.0008992180228233337, "rewards/rejected": -0.0005018044030293822, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.0000000000000003e-07, "logits/chosen": -1.448547601699829, "logits/rejected": -1.2029615640640259, "logps/chosen": -427.83740234375, "logps/rejected": -854.1618041992188, "loss": 0.2082, "rewards/accuracies": 0.75, "rewards/chosen": 0.0018184988293796778, "rewards/margins": 0.002095351228490472, "rewards/rejected": -0.000276852457318455, "step": 30 }, { "epoch": 0.01, "learning_rate": 5.333333333333335e-07, "logits/chosen": -1.6658061742782593, "logits/rejected": -1.1971313953399658, "logps/chosen": -429.181884765625, "logps/rejected": -865.9625854492188, "loss": 0.193, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.005454375874251127, "rewards/margins": 0.008814454078674316, "rewards/rejected": -0.0033600772731006145, "step": 40 }, { "epoch": 0.01, "learning_rate": 6.666666666666667e-07, "logits/chosen": -1.4225283861160278, "logits/rejected": -1.2158631086349487, "logps/chosen": -430.7893981933594, "logps/rejected": -787.4990234375, "loss": 0.1899, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.007644618395715952, "rewards/margins": 0.01243924256414175, "rewards/rejected": -0.004794624168425798, "step": 50 }, { "epoch": 0.02, "learning_rate": 8.000000000000001e-07, "logits/chosen": -1.47100830078125, "logits/rejected": -0.9200455546379089, "logps/chosen": -466.3695373535156, "logps/rejected": -906.1533203125, "loss": 0.1733, "rewards/accuracies": 0.875, "rewards/chosen": 0.008568339981138706, "rewards/margins": 0.03259299322962761, "rewards/rejected": -0.024024656042456627, "step": 60 }, { "epoch": 0.02, "learning_rate": 9.333333333333334e-07, "logits/chosen": -1.579919457435608, "logits/rejected": -0.9830185174942017, "logps/chosen": -454.715087890625, "logps/rejected": -852.3912353515625, "loss": 0.1825, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.01390122901648283, "rewards/margins": 0.05136318877339363, "rewards/rejected": -0.037461958825588226, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.066666666666667e-06, "logits/chosen": -1.6900907754898071, "logits/rejected": -1.3661834001541138, "logps/chosen": -492.48748779296875, "logps/rejected": -974.8717651367188, "loss": 0.1312, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.010218140669167042, "rewards/margins": 0.07319202274084091, "rewards/rejected": -0.06297388672828674, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.2000000000000002e-06, "logits/chosen": -1.6498056650161743, "logits/rejected": -1.0119965076446533, "logps/chosen": -440.96490478515625, "logps/rejected": -855.2099609375, "loss": 0.1564, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.006226236931979656, "rewards/margins": 0.08165968954563141, "rewards/rejected": -0.07543345540761948, "step": 90 }, { "epoch": 0.03, "learning_rate": 1.3333333333333334e-06, "logits/chosen": -1.9902693033218384, "logits/rejected": -1.1803243160247803, "logps/chosen": -578.6461791992188, "logps/rejected": -930.4148559570312, "loss": 0.1433, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.012515301816165447, "rewards/margins": 0.10045032203197479, "rewards/rejected": -0.11296562105417252, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.4666666666666669e-06, "logits/chosen": -1.6414697170257568, "logits/rejected": -0.9264998435974121, "logps/chosen": -572.1052856445312, "logps/rejected": -975.2772216796875, "loss": 0.117, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08218317478895187, "rewards/margins": 0.11511914432048798, "rewards/rejected": -0.19730232656002045, "step": 110 }, { "epoch": 0.03, "learning_rate": 1.6000000000000001e-06, "logits/chosen": -1.7165985107421875, "logits/rejected": -1.1280840635299683, "logps/chosen": -655.0675048828125, "logps/rejected": -1007.7223510742188, "loss": 0.1456, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11872265487909317, "rewards/margins": 0.10497407615184784, "rewards/rejected": -0.2236967384815216, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.7333333333333336e-06, "logits/chosen": -1.721414566040039, "logits/rejected": -1.0421576499938965, "logps/chosen": -637.6653442382812, "logps/rejected": -1049.671630859375, "loss": 0.1214, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1268424093723297, "rewards/margins": 0.13156402111053467, "rewards/rejected": -0.2584064304828644, "step": 130 }, { "epoch": 0.04, "learning_rate": 1.8666666666666669e-06, "logits/chosen": -1.8063952922821045, "logits/rejected": -1.1648313999176025, "logps/chosen": -533.8194580078125, "logps/rejected": -1127.229248046875, "loss": 0.0913, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06718692928552628, "rewards/margins": 0.20027296245098114, "rewards/rejected": -0.2674598693847656, "step": 140 }, { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -1.8062896728515625, "logits/rejected": -1.1342815160751343, "logps/chosen": -531.7640380859375, "logps/rejected": -1056.5418701171875, "loss": 0.1041, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09493765234947205, "rewards/margins": 0.18374750018119812, "rewards/rejected": -0.27868515253067017, "step": 150 }, { "epoch": 0.04, "learning_rate": 2.133333333333334e-06, "logits/chosen": -1.5977694988250732, "logits/rejected": -1.2945902347564697, "logps/chosen": -567.9004516601562, "logps/rejected": -1177.0767822265625, "loss": 0.0735, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10163917392492294, "rewards/margins": 0.22462336719036102, "rewards/rejected": -0.32626253366470337, "step": 160 }, { "epoch": 0.05, "learning_rate": 2.266666666666667e-06, "logits/chosen": -1.5195014476776123, "logits/rejected": -0.7815272212028503, "logps/chosen": -565.3408203125, "logps/rejected": -1178.865478515625, "loss": 0.0674, "rewards/accuracies": 0.875, "rewards/chosen": -0.09116648882627487, "rewards/margins": 0.2727690637111664, "rewards/rejected": -0.36393555998802185, "step": 170 }, { "epoch": 0.05, "learning_rate": 2.4000000000000003e-06, "logits/chosen": -1.8510783910751343, "logits/rejected": -1.1616629362106323, "logps/chosen": -542.5670166015625, "logps/rejected": -1118.016357421875, "loss": 0.0656, "rewards/accuracies": 0.875, "rewards/chosen": -0.09122536331415176, "rewards/margins": 0.22348365187644958, "rewards/rejected": -0.3147090673446655, "step": 180 }, { "epoch": 0.05, "learning_rate": 2.5333333333333338e-06, "logits/chosen": -1.9452266693115234, "logits/rejected": -1.231264352798462, "logps/chosen": -637.9483032226562, "logps/rejected": -1175.119140625, "loss": 0.1008, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12329447269439697, "rewards/margins": 0.2058517038822174, "rewards/rejected": -0.3291461765766144, "step": 190 }, { "epoch": 0.05, "learning_rate": 2.666666666666667e-06, "logits/chosen": -1.4857038259506226, "logits/rejected": -0.7614498138427734, "logps/chosen": -659.7203979492188, "logps/rejected": -1250.89892578125, "loss": 0.1015, "rewards/accuracies": 0.875, "rewards/chosen": -0.16732044517993927, "rewards/margins": 0.22920766472816467, "rewards/rejected": -0.39652806520462036, "step": 200 }, { "epoch": 0.06, "learning_rate": 2.8000000000000003e-06, "logits/chosen": -1.6152054071426392, "logits/rejected": -1.2735153436660767, "logps/chosen": -557.020751953125, "logps/rejected": -1197.393798828125, "loss": 0.0721, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14088398218154907, "rewards/margins": 0.26476138830184937, "rewards/rejected": -0.40564537048339844, "step": 210 }, { "epoch": 0.06, "learning_rate": 2.9333333333333338e-06, "logits/chosen": -1.4580497741699219, "logits/rejected": -0.8967218399047852, "logps/chosen": -538.6234130859375, "logps/rejected": -1041.23291015625, "loss": 0.1479, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08591003715991974, "rewards/margins": 0.20331135392189026, "rewards/rejected": -0.2892213761806488, "step": 220 }, { "epoch": 0.06, "learning_rate": 3.066666666666667e-06, "logits/chosen": -1.6280606985092163, "logits/rejected": -1.1948456764221191, "logps/chosen": -464.53472900390625, "logps/rejected": -1004.7527465820312, "loss": 0.0994, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04852370172739029, "rewards/margins": 0.19602426886558533, "rewards/rejected": -0.24454793334007263, "step": 230 }, { "epoch": 0.06, "learning_rate": 3.2000000000000003e-06, "logits/chosen": -2.0036234855651855, "logits/rejected": -1.0634952783584595, "logps/chosen": -654.42626953125, "logps/rejected": -1130.1927490234375, "loss": 0.0947, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14522799849510193, "rewards/margins": 0.20732179284095764, "rewards/rejected": -0.35254979133605957, "step": 240 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -1.987884759902954, "logits/rejected": -1.166095495223999, "logps/chosen": -544.5479736328125, "logps/rejected": -1122.8720703125, "loss": 0.0906, "rewards/accuracies": 0.75, "rewards/chosen": -0.12993717193603516, "rewards/margins": 0.21453383564949036, "rewards/rejected": -0.3444710373878479, "step": 250 }, { "epoch": 0.07, "learning_rate": 3.4666666666666672e-06, "logits/chosen": -1.532130479812622, "logits/rejected": -0.9931814074516296, "logps/chosen": -557.2303466796875, "logps/rejected": -1242.306396484375, "loss": 0.0831, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.10561565309762955, "rewards/margins": 0.23905417323112488, "rewards/rejected": -0.3446698486804962, "step": 260 }, { "epoch": 0.07, "learning_rate": 3.6000000000000003e-06, "logits/chosen": -1.8610786199569702, "logits/rejected": -1.1762195825576782, "logps/chosen": -624.46728515625, "logps/rejected": -1214.777099609375, "loss": 0.0777, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10222941637039185, "rewards/margins": 0.23841162025928497, "rewards/rejected": -0.3406410217285156, "step": 270 }, { "epoch": 0.07, "learning_rate": 3.7333333333333337e-06, "logits/chosen": -1.6525980234146118, "logits/rejected": -1.1402348279953003, "logps/chosen": -689.7327880859375, "logps/rejected": -1228.2315673828125, "loss": 0.0904, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1671145260334015, "rewards/margins": 0.22532522678375244, "rewards/rejected": -0.39243969321250916, "step": 280 }, { "epoch": 0.08, "learning_rate": 3.866666666666667e-06, "logits/chosen": -1.7951831817626953, "logits/rejected": -1.0651658773422241, "logps/chosen": -521.4989013671875, "logps/rejected": -1288.53466796875, "loss": 0.0653, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18049229681491852, "rewards/margins": 0.28950944542884827, "rewards/rejected": -0.4700016975402832, "step": 290 }, { "epoch": 0.08, "learning_rate": 4.000000000000001e-06, "logits/chosen": -1.5762369632720947, "logits/rejected": -1.2923007011413574, "logps/chosen": -643.3726806640625, "logps/rejected": -1266.006591796875, "loss": 0.0959, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2561195194721222, "rewards/margins": 0.247961163520813, "rewards/rejected": -0.5040806531906128, "step": 300 }, { "epoch": 0.08, "learning_rate": 4.133333333333333e-06, "logits/chosen": -1.8708035945892334, "logits/rejected": -0.9332900047302246, "logps/chosen": -730.3931274414062, "logps/rejected": -1277.650390625, "loss": 0.1103, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2379586398601532, "rewards/margins": 0.25126343965530396, "rewards/rejected": -0.48922213912010193, "step": 310 }, { "epoch": 0.09, "learning_rate": 4.266666666666668e-06, "logits/chosen": -1.7914050817489624, "logits/rejected": -1.1263148784637451, "logps/chosen": -617.783203125, "logps/rejected": -1344.7216796875, "loss": 0.0582, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18687450885772705, "rewards/margins": 0.3094720244407654, "rewards/rejected": -0.49634653329849243, "step": 320 }, { "epoch": 0.09, "learning_rate": 4.4e-06, "logits/chosen": -1.8126357793807983, "logits/rejected": -1.3105145692825317, "logps/chosen": -567.7543334960938, "logps/rejected": -1125.550048828125, "loss": 0.117, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17715778946876526, "rewards/margins": 0.23828192055225372, "rewards/rejected": -0.4154396951198578, "step": 330 }, { "epoch": 0.09, "learning_rate": 4.533333333333334e-06, "logits/chosen": -1.7919952869415283, "logits/rejected": -1.0027551651000977, "logps/chosen": -792.9563598632812, "logps/rejected": -1416.2335205078125, "loss": 0.0952, "rewards/accuracies": 0.875, "rewards/chosen": -0.27687594294548035, "rewards/margins": 0.27830666303634644, "rewards/rejected": -0.5551826357841492, "step": 340 }, { "epoch": 0.09, "learning_rate": 4.666666666666667e-06, "logits/chosen": -1.93537175655365, "logits/rejected": -1.2520567178726196, "logps/chosen": -673.220947265625, "logps/rejected": -1093.1484375, "loss": 0.1006, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20018038153648376, "rewards/margins": 0.18651911616325378, "rewards/rejected": -0.38669952750205994, "step": 350 }, { "epoch": 0.1, "learning_rate": 4.800000000000001e-06, "logits/chosen": -1.802390456199646, "logits/rejected": -1.3325846195220947, "logps/chosen": -649.5408935546875, "logps/rejected": -1129.62890625, "loss": 0.1359, "rewards/accuracies": 0.75, "rewards/chosen": -0.18187561631202698, "rewards/margins": 0.17299222946166992, "rewards/rejected": -0.3548678159713745, "step": 360 }, { "epoch": 0.1, "learning_rate": 4.933333333333334e-06, "logits/chosen": -1.8715474605560303, "logits/rejected": -1.3402589559555054, "logps/chosen": -546.8035888671875, "logps/rejected": -1010.1611328125, "loss": 0.1034, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05026025325059891, "rewards/margins": 0.22379866242408752, "rewards/rejected": -0.27405890822410583, "step": 370 }, { "epoch": 0.1, "learning_rate": 4.999972922944898e-06, "logits/chosen": -1.9571716785430908, "logits/rejected": -1.1372935771942139, "logps/chosen": -733.9254150390625, "logps/rejected": -1304.736328125, "loss": 0.0984, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15234608948230743, "rewards/margins": 0.2319423407316208, "rewards/rejected": -0.3842884600162506, "step": 380 }, { "epoch": 0.1, "learning_rate": 4.999756310023261e-06, "logits/chosen": -1.851304292678833, "logits/rejected": -1.3916990756988525, "logps/chosen": -548.0775146484375, "logps/rejected": -1033.42578125, "loss": 0.0848, "rewards/accuracies": 0.75, "rewards/chosen": -0.11450278759002686, "rewards/margins": 0.22328679263591766, "rewards/rejected": -0.3377895951271057, "step": 390 }, { "epoch": 0.11, "learning_rate": 4.999323102948655e-06, "logits/chosen": -2.0076701641082764, "logits/rejected": -1.2286336421966553, "logps/chosen": -721.4017333984375, "logps/rejected": -1361.4373779296875, "loss": 0.077, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20651724934577942, "rewards/margins": 0.26063308119773865, "rewards/rejected": -0.46715036034584045, "step": 400 }, { "epoch": 0.11, "learning_rate": 4.998673339256785e-06, "logits/chosen": -2.0088391304016113, "logits/rejected": -1.2929044961929321, "logps/chosen": -666.7005615234375, "logps/rejected": -1329.128173828125, "loss": 0.0905, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21113541722297668, "rewards/margins": 0.27807140350341797, "rewards/rejected": -0.48920679092407227, "step": 410 }, { "epoch": 0.11, "learning_rate": 4.997807075247147e-06, "logits/chosen": -2.1807756423950195, "logits/rejected": -1.3019901514053345, "logps/chosen": -720.1719360351562, "logps/rejected": -1275.56982421875, "loss": 0.0888, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1774640679359436, "rewards/margins": 0.26608842611312866, "rewards/rejected": -0.44355249404907227, "step": 420 }, { "epoch": 0.11, "learning_rate": 4.996724385978142e-06, "logits/chosen": -2.1555614471435547, "logits/rejected": -1.3447341918945312, "logps/chosen": -620.5142822265625, "logps/rejected": -1299.186279296875, "loss": 0.0505, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.08281116932630539, "rewards/margins": 0.3319624364376068, "rewards/rejected": -0.4147736430168152, "step": 430 }, { "epoch": 0.12, "learning_rate": 4.995425365260585e-06, "logits/chosen": -2.018123149871826, "logits/rejected": -1.2482590675354004, "logps/chosen": -659.6309814453125, "logps/rejected": -1221.164794921875, "loss": 0.0884, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15910136699676514, "rewards/margins": 0.2621740400791168, "rewards/rejected": -0.42127543687820435, "step": 440 }, { "epoch": 0.12, "learning_rate": 4.993910125649561e-06, "logits/chosen": -1.6636863946914673, "logits/rejected": -0.9911340475082397, "logps/chosen": -693.9474487304688, "logps/rejected": -1334.8740234375, "loss": 0.0902, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.23787899315357208, "rewards/margins": 0.24970480799674988, "rewards/rejected": -0.48758387565612793, "step": 450 }, { "epoch": 0.12, "learning_rate": 4.992178798434684e-06, "logits/chosen": -2.022722005844116, "logits/rejected": -1.2033276557922363, "logps/chosen": -636.1743774414062, "logps/rejected": -1268.32568359375, "loss": 0.0852, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19134658575057983, "rewards/margins": 0.29803937673568726, "rewards/rejected": -0.4893859326839447, "step": 460 }, { "epoch": 0.13, "learning_rate": 4.990231533628719e-06, "logits/chosen": -1.8332548141479492, "logits/rejected": -1.0606578588485718, "logps/chosen": -568.590576171875, "logps/rejected": -1158.369873046875, "loss": 0.0795, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.09971268475055695, "rewards/margins": 0.267979234457016, "rewards/rejected": -0.36769190430641174, "step": 470 }, { "epoch": 0.13, "learning_rate": 4.988068499954578e-06, "logits/chosen": -2.086530923843384, "logits/rejected": -1.4134924411773682, "logps/chosen": -491.255126953125, "logps/rejected": -967.7403564453125, "loss": 0.0949, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09277771413326263, "rewards/margins": 0.21773691475391388, "rewards/rejected": -0.3105146288871765, "step": 480 }, { "epoch": 0.13, "learning_rate": 4.985689884830711e-06, "logits/chosen": -1.7672725915908813, "logits/rejected": -1.2025644779205322, "logps/chosen": -747.850341796875, "logps/rejected": -1293.3529052734375, "loss": 0.1002, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22476276755332947, "rewards/margins": 0.23183217644691467, "rewards/rejected": -0.45659494400024414, "step": 490 }, { "epoch": 0.13, "learning_rate": 4.983095894354858e-06, "logits/chosen": -1.6667953729629517, "logits/rejected": -1.2720074653625488, "logps/chosen": -679.2232666015625, "logps/rejected": -1342.873291015625, "loss": 0.1095, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20331433415412903, "rewards/margins": 0.3078593611717224, "rewards/rejected": -0.511173665523529, "step": 500 }, { "epoch": 0.14, "learning_rate": 4.980286753286196e-06, "logits/chosen": -1.801032304763794, "logits/rejected": -1.2592867612838745, "logps/chosen": -528.0743408203125, "logps/rejected": -1059.638427734375, "loss": 0.0847, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10437086969614029, "rewards/margins": 0.2568288743495941, "rewards/rejected": -0.3611997663974762, "step": 510 }, { "epoch": 0.14, "learning_rate": 4.97726270502586e-06, "logits/chosen": -1.8762857913970947, "logits/rejected": -1.14815354347229, "logps/chosen": -567.5826416015625, "logps/rejected": -1179.177490234375, "loss": 0.0764, "rewards/accuracies": 0.875, "rewards/chosen": -0.09893319755792618, "rewards/margins": 0.27982866764068604, "rewards/rejected": -0.3787618577480316, "step": 520 }, { "epoch": 0.14, "learning_rate": 4.974024011595864e-06, "logits/chosen": -1.9012178182601929, "logits/rejected": -1.498219609260559, "logps/chosen": -575.654541015625, "logps/rejected": -1156.733642578125, "loss": 0.0812, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1486634910106659, "rewards/margins": 0.2521246075630188, "rewards/rejected": -0.40078815817832947, "step": 530 }, { "epoch": 0.14, "learning_rate": 4.970570953616383e-06, "logits/chosen": -1.5046392679214478, "logits/rejected": -1.2080037593841553, "logps/chosen": -609.6295166015625, "logps/rejected": -1234.922607421875, "loss": 0.0969, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19593383371829987, "rewards/margins": 0.2632136940956116, "rewards/rejected": -0.45914751291275024, "step": 540 }, { "epoch": 0.15, "learning_rate": 4.966903830281449e-06, "logits/chosen": -1.8756754398345947, "logits/rejected": -1.3740646839141846, "logps/chosen": -685.7990112304688, "logps/rejected": -1199.182373046875, "loss": 0.1186, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20507201552391052, "rewards/margins": 0.22510738670825958, "rewards/rejected": -0.4301794171333313, "step": 550 }, { "epoch": 0.15, "learning_rate": 4.9630229593330226e-06, "logits/chosen": -2.1258938312530518, "logits/rejected": -1.3714039325714111, "logps/chosen": -674.0800170898438, "logps/rejected": -1057.264892578125, "loss": 0.0883, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14157623052597046, "rewards/margins": 0.20917348563671112, "rewards/rejected": -0.35074976086616516, "step": 560 }, { "epoch": 0.15, "learning_rate": 4.958928677033465e-06, "logits/chosen": -1.9999202489852905, "logits/rejected": -1.2103183269500732, "logps/chosen": -599.47607421875, "logps/rejected": -1122.523681640625, "loss": 0.0788, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1143900528550148, "rewards/margins": 0.2687007486820221, "rewards/rejected": -0.3830908238887787, "step": 570 }, { "epoch": 0.15, "learning_rate": 4.954621338136399e-06, "logits/chosen": -1.8329057693481445, "logits/rejected": -1.1794686317443848, "logps/chosen": -716.9681396484375, "logps/rejected": -1392.547607421875, "loss": 0.1106, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2248903512954712, "rewards/margins": 0.31769150495529175, "rewards/rejected": -0.5425819158554077, "step": 580 }, { "epoch": 0.16, "learning_rate": 4.95010131585597e-06, "logits/chosen": -1.878209114074707, "logits/rejected": -1.0662002563476562, "logps/chosen": -822.8294677734375, "logps/rejected": -1354.05810546875, "loss": 0.0824, "rewards/accuracies": 0.875, "rewards/chosen": -0.24139878153800964, "rewards/margins": 0.24748913943767548, "rewards/rejected": -0.48888787627220154, "step": 590 }, { "epoch": 0.16, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -1.870165228843689, "logits/rejected": -1.3282101154327393, "logps/chosen": -545.804931640625, "logps/rejected": -1157.7216796875, "loss": 0.0724, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13343581557273865, "rewards/margins": 0.23632793128490448, "rewards/rejected": -0.3697637617588043, "step": 600 }, { "epoch": 0.16, "learning_rate": 4.940424806108619e-06, "logits/chosen": -1.7240148782730103, "logits/rejected": -1.2849372625350952, "logps/chosen": -562.0394897460938, "logps/rejected": -1218.740478515625, "loss": 0.0821, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11796098947525024, "rewards/margins": 0.26410388946533203, "rewards/rejected": -0.38206490874290466, "step": 610 }, { "epoch": 0.17, "learning_rate": 4.935269157073597e-06, "logits/chosen": -1.7999210357666016, "logits/rejected": -1.4692919254302979, "logps/chosen": -535.56396484375, "logps/rejected": -1111.559814453125, "loss": 0.1083, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1319592297077179, "rewards/margins": 0.24687273800373077, "rewards/rejected": -0.3788319528102875, "step": 620 }, { "epoch": 0.17, "learning_rate": 4.9299025014463665e-06, "logits/chosen": -1.8443183898925781, "logits/rejected": -1.105686902999878, "logps/chosen": -678.8682250976562, "logps/rejected": -1240.9027099609375, "loss": 0.0645, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14497962594032288, "rewards/margins": 0.2707517147064209, "rewards/rejected": -0.415731281042099, "step": 630 }, { "epoch": 0.17, "learning_rate": 4.924325304226745e-06, "logits/chosen": -1.597224235534668, "logits/rejected": -1.1997989416122437, "logps/chosen": -519.6995239257812, "logps/rejected": -1027.214599609375, "loss": 0.1139, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1587638556957245, "rewards/margins": 0.2005012482404709, "rewards/rejected": -0.3592650890350342, "step": 640 }, { "epoch": 0.17, "learning_rate": 4.91853804865716e-06, "logits/chosen": -1.8366111516952515, "logits/rejected": -1.1595910787582397, "logps/chosen": -564.30615234375, "logps/rejected": -1044.134521484375, "loss": 0.1318, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09679403156042099, "rewards/margins": 0.2052040547132492, "rewards/rejected": -0.3019980788230896, "step": 650 }, { "epoch": 0.18, "learning_rate": 4.912541236180779e-06, "logits/chosen": -1.5248780250549316, "logits/rejected": -0.9694509506225586, "logps/chosen": -725.3447265625, "logps/rejected": -1283.6407470703125, "loss": 0.077, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18108052015304565, "rewards/margins": 0.2481127232313156, "rewards/rejected": -0.42919325828552246, "step": 660 }, { "epoch": 0.18, "learning_rate": 4.9063353863980565e-06, "logits/chosen": -1.7408733367919922, "logits/rejected": -1.133147954940796, "logps/chosen": -621.7525024414062, "logps/rejected": -1341.664306640625, "loss": 0.0597, "rewards/accuracies": 0.875, "rewards/chosen": -0.18977642059326172, "rewards/margins": 0.3114860951900482, "rewards/rejected": -0.5012625455856323, "step": 670 }, { "epoch": 0.18, "learning_rate": 4.899921037021719e-06, "logits/chosen": -1.7900583744049072, "logits/rejected": -1.2211220264434814, "logps/chosen": -654.8604736328125, "logps/rejected": -1338.195068359375, "loss": 0.0749, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2140086591243744, "rewards/margins": 0.278323233127594, "rewards/rejected": -0.4923318922519684, "step": 680 }, { "epoch": 0.18, "learning_rate": 4.893298743830168e-06, "logits/chosen": -1.8528478145599365, "logits/rejected": -1.1774396896362305, "logps/chosen": -580.0806884765625, "logps/rejected": -1167.764892578125, "loss": 0.0921, "rewards/accuracies": 0.75, "rewards/chosen": -0.12002040445804596, "rewards/margins": 0.2513282895088196, "rewards/rejected": -0.37134867906570435, "step": 690 }, { "epoch": 0.19, "learning_rate": 4.88646908061933e-06, "logits/chosen": -1.7654540538787842, "logits/rejected": -1.2601468563079834, "logps/chosen": -561.857177734375, "logps/rejected": -1196.891357421875, "loss": 0.0861, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06450385600328445, "rewards/margins": 0.24975624680519104, "rewards/rejected": -0.3142600357532501, "step": 700 }, { "epoch": 0.19, "learning_rate": 4.879432639152935e-06, "logits/chosen": -1.7489614486694336, "logits/rejected": -1.418021559715271, "logps/chosen": -545.885498046875, "logps/rejected": -1274.466552734375, "loss": 0.0804, "rewards/accuracies": 0.875, "rewards/chosen": -0.11761901527643204, "rewards/margins": 0.29888078570365906, "rewards/rejected": -0.4164997935295105, "step": 710 }, { "epoch": 0.19, "learning_rate": 4.8721900291112415e-06, "logits/chosen": -1.6508852243423462, "logits/rejected": -1.3089258670806885, "logps/chosen": -873.3531494140625, "logps/rejected": -1446.792724609375, "loss": 0.0813, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3387971818447113, "rewards/margins": 0.23475828766822815, "rewards/rejected": -0.5735554695129395, "step": 720 }, { "epoch": 0.19, "learning_rate": 4.864741878038218e-06, "logits/chosen": -1.5085474252700806, "logits/rejected": -0.964281439781189, "logps/chosen": -629.9556274414062, "logps/rejected": -1125.6422119140625, "loss": 0.1154, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21656934916973114, "rewards/margins": 0.25458166003227234, "rewards/rejected": -0.47115105390548706, "step": 730 }, { "epoch": 0.2, "learning_rate": 4.857088831287158e-06, "logits/chosen": -1.6169427633285522, "logits/rejected": -1.1799370050430298, "logps/chosen": -644.1641235351562, "logps/rejected": -1156.3399658203125, "loss": 0.1243, "rewards/accuracies": 0.75, "rewards/chosen": -0.20531603693962097, "rewards/margins": 0.21790286898612976, "rewards/rejected": -0.4232189655303955, "step": 740 }, { "epoch": 0.2, "learning_rate": 4.849231551964771e-06, "logits/chosen": -1.7949107885360718, "logits/rejected": -0.9273164868354797, "logps/chosen": -694.5661010742188, "logps/rejected": -1367.0455322265625, "loss": 0.0694, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15553930401802063, "rewards/margins": 0.3378245532512665, "rewards/rejected": -0.4933638572692871, "step": 750 }, { "epoch": 0.2, "learning_rate": 4.841170720873723e-06, "logits/chosen": -1.671508550643921, "logits/rejected": -1.0756802558898926, "logps/chosen": -755.0061645507812, "logps/rejected": -1223.784423828125, "loss": 0.0906, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.28831586241722107, "rewards/margins": 0.23300468921661377, "rewards/rejected": -0.521320641040802, "step": 760 }, { "epoch": 0.21, "learning_rate": 4.832907036453647e-06, "logits/chosen": -1.6868644952774048, "logits/rejected": -1.017174482345581, "logps/chosen": -857.76904296875, "logps/rejected": -1345.37841796875, "loss": 0.1096, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31581613421440125, "rewards/margins": 0.24452456831932068, "rewards/rejected": -0.5603407621383667, "step": 770 }, { "epoch": 0.21, "learning_rate": 4.824441214720629e-06, "logits/chosen": -1.5865137577056885, "logits/rejected": -0.9322856068611145, "logps/chosen": -661.6602783203125, "logps/rejected": -1249.6767578125, "loss": 0.0862, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2079945057630539, "rewards/margins": 0.2843298017978668, "rewards/rejected": -0.4923242926597595, "step": 780 }, { "epoch": 0.21, "learning_rate": 4.815773989205165e-06, "logits/chosen": -1.4541298151016235, "logits/rejected": -1.0230770111083984, "logps/chosen": -536.982421875, "logps/rejected": -1293.7628173828125, "loss": 0.0569, "rewards/accuracies": 0.875, "rewards/chosen": -0.1561233103275299, "rewards/margins": 0.27445393800735474, "rewards/rejected": -0.43057721853256226, "step": 790 }, { "epoch": 0.21, "learning_rate": 4.806906110888606e-06, "logits/chosen": -1.5984938144683838, "logits/rejected": -1.1344892978668213, "logps/chosen": -560.5667114257812, "logps/rejected": -1207.4886474609375, "loss": 0.0874, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1664171814918518, "rewards/margins": 0.2653278708457947, "rewards/rejected": -0.4317450523376465, "step": 800 }, { "epoch": 0.22, "learning_rate": 4.7978383481380865e-06, "logits/chosen": -1.690313696861267, "logits/rejected": -1.2708024978637695, "logps/chosen": -635.1510009765625, "logps/rejected": -1258.609619140625, "loss": 0.0729, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19007566571235657, "rewards/margins": 0.27011603116989136, "rewards/rejected": -0.4601917266845703, "step": 810 }, { "epoch": 0.22, "learning_rate": 4.788571486639948e-06, "logits/chosen": -1.736071228981018, "logits/rejected": -1.4028418064117432, "logps/chosen": -652.5921630859375, "logps/rejected": -1381.756591796875, "loss": 0.0832, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20194599032402039, "rewards/margins": 0.2887413501739502, "rewards/rejected": -0.49068737030029297, "step": 820 }, { "epoch": 0.22, "learning_rate": 4.779106329331665e-06, "logits/chosen": -1.542184829711914, "logits/rejected": -0.9706169962882996, "logps/chosen": -619.546875, "logps/rejected": -1174.707763671875, "loss": 0.0978, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16596826910972595, "rewards/margins": 0.2696087956428528, "rewards/rejected": -0.43557706475257874, "step": 830 }, { "epoch": 0.22, "learning_rate": 4.769443696332272e-06, "logits/chosen": -1.812591552734375, "logits/rejected": -1.1759769916534424, "logps/chosen": -690.1990966796875, "logps/rejected": -1262.889892578125, "loss": 0.0952, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21801619231700897, "rewards/margins": 0.2809485197067261, "rewards/rejected": -0.49896472692489624, "step": 840 }, { "epoch": 0.23, "learning_rate": 4.759584424871302e-06, "logits/chosen": -1.5376830101013184, "logits/rejected": -0.7281585931777954, "logps/chosen": -729.5716552734375, "logps/rejected": -1302.743408203125, "loss": 0.0844, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.26298269629478455, "rewards/margins": 0.28789180517196655, "rewards/rejected": -0.5508745908737183, "step": 850 }, { "epoch": 0.23, "learning_rate": 4.749529369216246e-06, "logits/chosen": -1.5037004947662354, "logits/rejected": -0.9839785695075989, "logps/chosen": -823.6188354492188, "logps/rejected": -1403.5616455078125, "loss": 0.0976, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2846857011318207, "rewards/margins": 0.2837271988391876, "rewards/rejected": -0.5684128403663635, "step": 860 }, { "epoch": 0.23, "learning_rate": 4.7392794005985324e-06, "logits/chosen": -1.379028081893921, "logits/rejected": -1.1316661834716797, "logps/chosen": -523.5051879882812, "logps/rejected": -1148.9351806640625, "loss": 0.0857, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17531812191009521, "rewards/margins": 0.21767202019691467, "rewards/rejected": -0.3929901719093323, "step": 870 }, { "epoch": 0.23, "learning_rate": 4.7288354071380415e-06, "logits/chosen": -1.7908474206924438, "logits/rejected": -1.2674061059951782, "logps/chosen": -596.2501831054688, "logps/rejected": -1295.179931640625, "loss": 0.0668, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1408284455537796, "rewards/margins": 0.2789645195007324, "rewards/rejected": -0.4197929799556732, "step": 880 }, { "epoch": 0.24, "learning_rate": 4.7181982937661485e-06, "logits/chosen": -1.5081275701522827, "logits/rejected": -1.0480270385742188, "logps/chosen": -599.3560791015625, "logps/rejected": -1368.0018310546875, "loss": 0.0594, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17826338112354279, "rewards/margins": 0.3195931613445282, "rewards/rejected": -0.497856467962265, "step": 890 }, { "epoch": 0.24, "learning_rate": 4.707368982147318e-06, "logits/chosen": -1.6069328784942627, "logits/rejected": -0.8594322204589844, "logps/chosen": -767.88427734375, "logps/rejected": -1367.8349609375, "loss": 0.0836, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22526466846466064, "rewards/margins": 0.2738983929157257, "rewards/rejected": -0.49916306138038635, "step": 900 }, { "epoch": 0.24, "learning_rate": 4.696348410599244e-06, "logits/chosen": -1.690263032913208, "logits/rejected": -1.122013807296753, "logps/chosen": -767.4993896484375, "logps/rejected": -1443.0506591796875, "loss": 0.08, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24120143055915833, "rewards/margins": 0.29539158940315247, "rewards/rejected": -0.5365930199623108, "step": 910 }, { "epoch": 0.25, "learning_rate": 4.685137534011549e-06, "logits/chosen": -1.711660385131836, "logits/rejected": -1.2863471508026123, "logps/chosen": -588.4500732421875, "logps/rejected": -1289.240966796875, "loss": 0.0833, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14869460463523865, "rewards/margins": 0.28291845321655273, "rewards/rejected": -0.4316130578517914, "step": 920 }, { "epoch": 0.25, "learning_rate": 4.673737323763048e-06, "logits/chosen": -1.8320610523223877, "logits/rejected": -1.292765736579895, "logps/chosen": -668.5866088867188, "logps/rejected": -1182.257080078125, "loss": 0.1009, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18477411568164825, "rewards/margins": 0.22763219475746155, "rewards/rejected": -0.4124062955379486, "step": 930 }, { "epoch": 0.25, "learning_rate": 4.662148767637578e-06, "logits/chosen": -1.5611059665679932, "logits/rejected": -1.1849793195724487, "logps/chosen": -463.08355712890625, "logps/rejected": -1084.614501953125, "loss": 0.1011, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08577910810709, "rewards/margins": 0.24440805613994598, "rewards/rejected": -0.3301871418952942, "step": 940 }, { "epoch": 0.25, "learning_rate": 4.650372869738415e-06, "logits/chosen": -1.511456847190857, "logits/rejected": -1.0551631450653076, "logps/chosen": -556.8165283203125, "logps/rejected": -1228.0035400390625, "loss": 0.0704, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1265622079372406, "rewards/margins": 0.3069656193256378, "rewards/rejected": -0.4335278570652008, "step": 950 }, { "epoch": 0.26, "learning_rate": 4.638410650401267e-06, "logits/chosen": -1.5442140102386475, "logits/rejected": -0.9868549108505249, "logps/chosen": -613.2291259765625, "logps/rejected": -1180.44384765625, "loss": 0.0921, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14740554988384247, "rewards/margins": 0.27208468317985535, "rewards/rejected": -0.419490247964859, "step": 960 }, { "epoch": 0.26, "learning_rate": 4.626263146105875e-06, "logits/chosen": -1.7330728769302368, "logits/rejected": -1.2073631286621094, "logps/chosen": -700.1824340820312, "logps/rejected": -1393.9658203125, "loss": 0.0826, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21020905673503876, "rewards/margins": 0.29664525389671326, "rewards/rejected": -0.5068542957305908, "step": 970 }, { "epoch": 0.26, "learning_rate": 4.613931409386196e-06, "logits/chosen": -1.6415268182754517, "logits/rejected": -1.1623541116714478, "logps/chosen": -577.8115844726562, "logps/rejected": -1281.671875, "loss": 0.062, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.13329057395458221, "rewards/margins": 0.31404128670692444, "rewards/rejected": -0.44733184576034546, "step": 980 }, { "epoch": 0.26, "learning_rate": 4.601416508739211e-06, "logits/chosen": -1.5495585203170776, "logits/rejected": -1.219543695449829, "logps/chosen": -611.6688842773438, "logps/rejected": -1210.188720703125, "loss": 0.0883, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16439898312091827, "rewards/margins": 0.22104132175445557, "rewards/rejected": -0.38544028997421265, "step": 990 }, { "epoch": 0.27, "learning_rate": 4.588719528532342e-06, "logits/chosen": -1.6590620279312134, "logits/rejected": -1.1156672239303589, "logps/chosen": -558.9346923828125, "logps/rejected": -1250.8326416015625, "loss": 0.0613, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11309070885181427, "rewards/margins": 0.27457505464553833, "rewards/rejected": -0.3876657485961914, "step": 1000 }, { "epoch": 0.27, "learning_rate": 4.575841568909494e-06, "logits/chosen": -1.72207772731781, "logits/rejected": -0.9709069132804871, "logps/chosen": -574.8037719726562, "logps/rejected": -1254.651123046875, "loss": 0.0493, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.08526522666215897, "rewards/margins": 0.31907790899276733, "rewards/rejected": -0.4043431282043457, "step": 1010 }, { "epoch": 0.27, "learning_rate": 4.562783745695738e-06, "logits/chosen": -1.6100561618804932, "logits/rejected": -1.0836453437805176, "logps/chosen": -605.432373046875, "logps/rejected": -1389.6484375, "loss": 0.0562, "rewards/accuracies": 0.875, "rewards/chosen": -0.13646581768989563, "rewards/margins": 0.3396046459674835, "rewards/rejected": -0.47607049345970154, "step": 1020 }, { "epoch": 0.27, "learning_rate": 4.549547190300622e-06, "logits/chosen": -1.5626428127288818, "logits/rejected": -1.1030738353729248, "logps/chosen": -767.6092529296875, "logps/rejected": -1497.570068359375, "loss": 0.0747, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20658251643180847, "rewards/margins": 0.3094042241573334, "rewards/rejected": -0.5159868001937866, "step": 1030 }, { "epoch": 0.28, "learning_rate": 4.536133049620143e-06, "logits/chosen": -1.67703378200531, "logits/rejected": -1.1302894353866577, "logps/chosen": -506.2029724121094, "logps/rejected": -1050.456298828125, "loss": 0.1074, "rewards/accuracies": 0.75, "rewards/chosen": -0.10386884212493896, "rewards/margins": 0.24720358848571777, "rewards/rejected": -0.35107240080833435, "step": 1040 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -1.510943055152893, "logits/rejected": -1.2439063787460327, "logps/chosen": -546.3123168945312, "logps/rejected": -1226.2244873046875, "loss": 0.0807, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15128368139266968, "rewards/margins": 0.2694355547428131, "rewards/rejected": -0.4207192063331604, "step": 1050 }, { "epoch": 0.28, "learning_rate": 4.508776676821739e-06, "logits/chosen": -1.4208545684814453, "logits/rejected": -0.8101575970649719, "logps/chosen": -645.05126953125, "logps/rejected": -1315.7305908203125, "loss": 0.0778, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21077755093574524, "rewards/margins": 0.2958206832408905, "rewards/rejected": -0.5065982937812805, "step": 1060 }, { "epoch": 0.29, "learning_rate": 4.494836815027022e-06, "logits/chosen": -1.7293838262557983, "logits/rejected": -1.1680035591125488, "logps/chosen": -792.54443359375, "logps/rejected": -1469.5382080078125, "loss": 0.0713, "rewards/accuracies": 0.875, "rewards/chosen": -0.2889425456523895, "rewards/margins": 0.28303924202919006, "rewards/rejected": -0.5719817876815796, "step": 1070 }, { "epoch": 0.29, "learning_rate": 4.4807241083879774e-06, "logits/chosen": -1.5097590684890747, "logits/rejected": -0.8481209874153137, "logps/chosen": -801.5206298828125, "logps/rejected": -1431.248291015625, "loss": 0.0753, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.29523470997810364, "rewards/margins": 0.2885613441467285, "rewards/rejected": -0.5837960243225098, "step": 1080 }, { "epoch": 0.29, "learning_rate": 4.466439779715696e-06, "logits/chosen": -1.5163007974624634, "logits/rejected": -0.9659198522567749, "logps/chosen": -658.9117431640625, "logps/rejected": -1309.037841796875, "loss": 0.0852, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21787042915821075, "rewards/margins": 0.2800406515598297, "rewards/rejected": -0.49791112542152405, "step": 1090 }, { "epoch": 0.29, "learning_rate": 4.451985066691649e-06, "logits/chosen": -1.6100610494613647, "logits/rejected": -1.1183403730392456, "logps/chosen": -612.2716064453125, "logps/rejected": -1260.2725830078125, "loss": 0.0752, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21446070075035095, "rewards/margins": 0.3018341362476349, "rewards/rejected": -0.5162948369979858, "step": 1100 }, { "epoch": 0.3, "learning_rate": 4.437361221760449e-06, "logits/chosen": -1.4113812446594238, "logits/rejected": -0.932741641998291, "logps/chosen": -581.6453247070312, "logps/rejected": -1137.978515625, "loss": 0.1072, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20348218083381653, "rewards/margins": 0.25568509101867676, "rewards/rejected": -0.4591673016548157, "step": 1110 }, { "epoch": 0.3, "learning_rate": 4.422569512021332e-06, "logits/chosen": -1.6003286838531494, "logits/rejected": -0.9033063054084778, "logps/chosen": -878.2449340820312, "logps/rejected": -1371.249267578125, "loss": 0.0921, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.29766175150871277, "rewards/margins": 0.27674609422683716, "rewards/rejected": -0.5744079351425171, "step": 1120 }, { "epoch": 0.3, "learning_rate": 4.407611219118363e-06, "logits/chosen": -1.6345365047454834, "logits/rejected": -1.1004862785339355, "logps/chosen": -627.5486450195312, "logps/rejected": -1306.81640625, "loss": 0.0544, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20319166779518127, "rewards/margins": 0.3195189833641052, "rewards/rejected": -0.5227106809616089, "step": 1130 }, { "epoch": 0.3, "learning_rate": 4.3924876391293915e-06, "logits/chosen": -1.518014669418335, "logits/rejected": -0.9323236346244812, "logps/chosen": -658.4776611328125, "logps/rejected": -1171.27587890625, "loss": 0.1018, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22144591808319092, "rewards/margins": 0.2691202759742737, "rewards/rejected": -0.4905661642551422, "step": 1140 }, { "epoch": 0.31, "learning_rate": 4.377200082453748e-06, "logits/chosen": -1.5906239748001099, "logits/rejected": -0.9715790748596191, "logps/chosen": -688.2633056640625, "logps/rejected": -1256.56494140625, "loss": 0.0857, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2001594752073288, "rewards/margins": 0.25535720586776733, "rewards/rejected": -0.45551663637161255, "step": 1150 }, { "epoch": 0.31, "learning_rate": 4.361749873698707e-06, "logits/chosen": -1.8183555603027344, "logits/rejected": -1.1526719331741333, "logps/chosen": -603.915771484375, "logps/rejected": -1199.3839111328125, "loss": 0.0831, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1569216549396515, "rewards/margins": 0.24373742938041687, "rewards/rejected": -0.40065908432006836, "step": 1160 }, { "epoch": 0.31, "learning_rate": 4.346138351564711e-06, "logits/chosen": -1.5570250749588013, "logits/rejected": -1.029444932937622, "logps/chosen": -720.4706420898438, "logps/rejected": -1287.0992431640625, "loss": 0.0828, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22045788168907166, "rewards/margins": 0.23490801453590393, "rewards/rejected": -0.4553658962249756, "step": 1170 }, { "epoch": 0.31, "learning_rate": 4.330366868729376e-06, "logits/chosen": -1.524060845375061, "logits/rejected": -1.06089186668396, "logps/chosen": -683.9163208007812, "logps/rejected": -1294.5540771484375, "loss": 0.0878, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21835967898368835, "rewards/margins": 0.2567465901374817, "rewards/rejected": -0.4751063287258148, "step": 1180 }, { "epoch": 0.32, "learning_rate": 4.3144367917302964e-06, "logits/chosen": -1.5951565504074097, "logits/rejected": -1.2337514162063599, "logps/chosen": -591.2457885742188, "logps/rejected": -1147.8511962890625, "loss": 0.0876, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21049340069293976, "rewards/margins": 0.20914044976234436, "rewards/rejected": -0.41963380575180054, "step": 1190 }, { "epoch": 0.32, "learning_rate": 4.2983495008466285e-06, "logits/chosen": -1.3378808498382568, "logits/rejected": -0.8554127812385559, "logps/chosen": -704.482177734375, "logps/rejected": -1244.0657958984375, "loss": 0.0835, "rewards/accuracies": 0.875, "rewards/chosen": -0.2313729077577591, "rewards/margins": 0.24450743198394775, "rewards/rejected": -0.47588032484054565, "step": 1200 }, { "epoch": 0.32, "learning_rate": 4.2821063899795015e-06, "logits/chosen": -1.5431578159332275, "logits/rejected": -1.1451054811477661, "logps/chosen": -551.98486328125, "logps/rejected": -1086.984130859375, "loss": 0.1082, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17247965931892395, "rewards/margins": 0.2379414588212967, "rewards/rejected": -0.41042113304138184, "step": 1210 }, { "epoch": 0.33, "learning_rate": 4.265708866531238e-06, "logits/chosen": -1.3170068264007568, "logits/rejected": -1.3953243494033813, "logps/chosen": -555.5161743164062, "logps/rejected": -1217.95556640625, "loss": 0.0874, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1994590014219284, "rewards/margins": 0.24560394883155823, "rewards/rejected": -0.4450629651546478, "step": 1220 }, { "epoch": 0.33, "learning_rate": 4.249158351283414e-06, "logits/chosen": -1.5873005390167236, "logits/rejected": -0.9497630000114441, "logps/chosen": -762.7379150390625, "logps/rejected": -1492.514404296875, "loss": 0.0591, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.26952821016311646, "rewards/margins": 0.34387946128845215, "rewards/rejected": -0.6134077310562134, "step": 1230 }, { "epoch": 0.33, "learning_rate": 4.232456278273743e-06, "logits/chosen": -1.6160411834716797, "logits/rejected": -0.9002545475959778, "logps/chosen": -827.9026489257812, "logps/rejected": -1476.774169921875, "loss": 0.0731, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2839387059211731, "rewards/margins": 0.3188818097114563, "rewards/rejected": -0.6028205156326294, "step": 1240 }, { "epoch": 0.33, "learning_rate": 4.215604094671835e-06, "logits/chosen": -1.7747631072998047, "logits/rejected": -1.4146007299423218, "logps/chosen": -544.9703369140625, "logps/rejected": -1178.328369140625, "loss": 0.0946, "rewards/accuracies": 0.75, "rewards/chosen": -0.151317298412323, "rewards/margins": 0.26069843769073486, "rewards/rejected": -0.41201576590538025, "step": 1250 }, { "epoch": 0.34, "learning_rate": 4.198603260653792e-06, "logits/chosen": -1.6157176494598389, "logits/rejected": -0.8367627859115601, "logps/chosen": -626.7408447265625, "logps/rejected": -1291.121826171875, "loss": 0.0867, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1371547281742096, "rewards/margins": 0.3223341405391693, "rewards/rejected": -0.45948880910873413, "step": 1260 }, { "epoch": 0.34, "learning_rate": 4.181455249275701e-06, "logits/chosen": -1.7481143474578857, "logits/rejected": -1.12859308719635, "logps/chosen": -634.04638671875, "logps/rejected": -1124.959716796875, "loss": 0.0954, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16867594420909882, "rewards/margins": 0.2416352778673172, "rewards/rejected": -0.410311222076416, "step": 1270 }, { "epoch": 0.34, "learning_rate": 4.1641615463459926e-06, "logits/chosen": -1.5910767316818237, "logits/rejected": -1.0931599140167236, "logps/chosen": -631.27392578125, "logps/rejected": -1231.7950439453125, "loss": 0.0834, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16265997290611267, "rewards/margins": 0.24569034576416016, "rewards/rejected": -0.4083503186702728, "step": 1280 }, { "epoch": 0.34, "learning_rate": 4.146723650296701e-06, "logits/chosen": -1.6471401453018188, "logits/rejected": -1.2424445152282715, "logps/chosen": -468.88739013671875, "logps/rejected": -1020.91259765625, "loss": 0.102, "rewards/accuracies": 0.75, "rewards/chosen": -0.10470066964626312, "rewards/margins": 0.23985891044139862, "rewards/rejected": -0.34455958008766174, "step": 1290 }, { "epoch": 0.35, "learning_rate": 4.129143072053639e-06, "logits/chosen": -1.608716607093811, "logits/rejected": -0.9606212377548218, "logps/chosen": -669.7889404296875, "logps/rejected": -1358.1666259765625, "loss": 0.0559, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17117540538311005, "rewards/margins": 0.31088918447494507, "rewards/rejected": -0.4820645749568939, "step": 1300 }, { "epoch": 0.35, "learning_rate": 4.111421334905468e-06, "logits/chosen": -1.5793676376342773, "logits/rejected": -1.0094425678253174, "logps/chosen": -710.6974487304688, "logps/rejected": -1337.674560546875, "loss": 0.0837, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2793545424938202, "rewards/margins": 0.28449904918670654, "rewards/rejected": -0.5638536214828491, "step": 1310 }, { "epoch": 0.35, "learning_rate": 4.093559974371725e-06, "logits/chosen": -1.3306772708892822, "logits/rejected": -1.1824491024017334, "logps/chosen": -800.3571166992188, "logps/rejected": -1588.775146484375, "loss": 0.088, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.32887041568756104, "rewards/margins": 0.32484978437423706, "rewards/rejected": -0.6537202596664429, "step": 1320 }, { "epoch": 0.35, "learning_rate": 4.075560538069767e-06, "logits/chosen": -1.5261657238006592, "logits/rejected": -1.0766921043395996, "logps/chosen": -690.2559814453125, "logps/rejected": -1296.5557861328125, "loss": 0.0804, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21848633885383606, "rewards/margins": 0.2872334122657776, "rewards/rejected": -0.505719780921936, "step": 1330 }, { "epoch": 0.36, "learning_rate": 4.05742458558068e-06, "logits/chosen": -1.416123628616333, "logits/rejected": -1.1054461002349854, "logps/chosen": -764.1392822265625, "logps/rejected": -1332.025634765625, "loss": 0.1048, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22151347994804382, "rewards/margins": 0.24781036376953125, "rewards/rejected": -0.4693238139152527, "step": 1340 }, { "epoch": 0.36, "learning_rate": 4.039153688314146e-06, "logits/chosen": -1.4527040719985962, "logits/rejected": -0.6857194900512695, "logps/chosen": -812.2103271484375, "logps/rejected": -1516.066650390625, "loss": 0.0609, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3405809998512268, "rewards/margins": 0.31844016909599304, "rewards/rejected": -0.6590211987495422, "step": 1350 }, { "epoch": 0.36, "learning_rate": 4.020749429372286e-06, "logits/chosen": -1.446703314781189, "logits/rejected": -0.9012134671211243, "logps/chosen": -742.4305419921875, "logps/rejected": -1499.476806640625, "loss": 0.069, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3392742872238159, "rewards/margins": 0.3239519000053406, "rewards/rejected": -0.6632262468338013, "step": 1360 }, { "epoch": 0.37, "learning_rate": 4.002213403412492e-06, "logits/chosen": -1.3960011005401611, "logits/rejected": -0.9817187190055847, "logps/chosen": -778.759033203125, "logps/rejected": -1387.777099609375, "loss": 0.0727, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3149294853210449, "rewards/margins": 0.27625852823257446, "rewards/rejected": -0.5911880731582642, "step": 1370 }, { "epoch": 0.37, "learning_rate": 3.983547216509254e-06, "logits/chosen": -1.5987809896469116, "logits/rejected": -0.7307044863700867, "logps/chosen": -858.3521728515625, "logps/rejected": -1550.783935546875, "loss": 0.0618, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2980232834815979, "rewards/margins": 0.3726673722267151, "rewards/rejected": -0.670690655708313, "step": 1380 }, { "epoch": 0.37, "learning_rate": 3.964752486015001e-06, "logits/chosen": -1.7145130634307861, "logits/rejected": -1.0826901197433472, "logps/chosen": -551.7413330078125, "logps/rejected": -973.1448364257812, "loss": 0.1367, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17761428654193878, "rewards/margins": 0.18822605907917023, "rewards/rejected": -0.365840345621109, "step": 1390 }, { "epoch": 0.37, "learning_rate": 3.945830840419966e-06, "logits/chosen": -1.7292487621307373, "logits/rejected": -1.03867506980896, "logps/chosen": -610.347900390625, "logps/rejected": -1196.255126953125, "loss": 0.0801, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19123509526252747, "rewards/margins": 0.27820879220962524, "rewards/rejected": -0.4694438874721527, "step": 1400 }, { "epoch": 0.38, "learning_rate": 3.92678391921108e-06, "logits/chosen": -1.7660369873046875, "logits/rejected": -0.8002158403396606, "logps/chosen": -790.7001342773438, "logps/rejected": -1427.8797607421875, "loss": 0.0402, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.24115972220897675, "rewards/margins": 0.34107840061187744, "rewards/rejected": -0.5822380781173706, "step": 1410 }, { "epoch": 0.38, "learning_rate": 3.907613372729916e-06, "logits/chosen": -1.5305942296981812, "logits/rejected": -0.9681123495101929, "logps/chosen": -683.839599609375, "logps/rejected": -1229.334228515625, "loss": 0.1139, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.26777878403663635, "rewards/margins": 0.2556554675102234, "rewards/rejected": -0.5234342813491821, "step": 1420 }, { "epoch": 0.38, "learning_rate": 3.888320862029699e-06, "logits/chosen": -1.5409247875213623, "logits/rejected": -1.1824085712432861, "logps/chosen": -565.3055419921875, "logps/rejected": -1225.6197509765625, "loss": 0.0719, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21753540635108948, "rewards/margins": 0.262325644493103, "rewards/rejected": -0.4798610210418701, "step": 1430 }, { "epoch": 0.38, "learning_rate": 3.868908058731376e-06, "logits/chosen": -1.6002075672149658, "logits/rejected": -0.9055458903312683, "logps/chosen": -626.1796264648438, "logps/rejected": -1374.696044921875, "loss": 0.045, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20963218808174133, "rewards/margins": 0.36331892013549805, "rewards/rejected": -0.5729510188102722, "step": 1440 }, { "epoch": 0.39, "learning_rate": 3.849376644878783e-06, "logits/chosen": -1.5136663913726807, "logits/rejected": -1.2368497848510742, "logps/chosen": -744.4407958984375, "logps/rejected": -1368.80126953125, "loss": 0.0722, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.24185235798358917, "rewards/margins": 0.2773440182209015, "rewards/rejected": -0.5191963911056519, "step": 1450 }, { "epoch": 0.39, "learning_rate": 3.829728312792895e-06, "logits/chosen": -1.6058063507080078, "logits/rejected": -1.3173209428787231, "logps/chosen": -614.1032104492188, "logps/rejected": -1252.304443359375, "loss": 0.0718, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1905815303325653, "rewards/margins": 0.2669087052345276, "rewards/rejected": -0.45749014616012573, "step": 1460 }, { "epoch": 0.39, "learning_rate": 3.8099647649251984e-06, "logits/chosen": -1.5442336797714233, "logits/rejected": -1.108355164527893, "logps/chosen": -711.9778442382812, "logps/rejected": -1236.2525634765625, "loss": 0.1105, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17289568483829498, "rewards/margins": 0.2372523844242096, "rewards/rejected": -0.41014808416366577, "step": 1470 }, { "epoch": 0.39, "learning_rate": 3.790087713710179e-06, "logits/chosen": -1.7366338968276978, "logits/rejected": -1.3570505380630493, "logps/chosen": -617.22607421875, "logps/rejected": -1209.1883544921875, "loss": 0.072, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14606155455112457, "rewards/margins": 0.2611353397369385, "rewards/rejected": -0.40719684958457947, "step": 1480 }, { "epoch": 0.4, "learning_rate": 3.770098881416945e-06, "logits/chosen": -1.6808090209960938, "logits/rejected": -0.7971758842468262, "logps/chosen": -862.0647583007812, "logps/rejected": -1464.6036376953125, "loss": 0.0635, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.23483994603157043, "rewards/margins": 0.32541361451148987, "rewards/rejected": -0.5602535009384155, "step": 1490 }, { "epoch": 0.4, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.6327816247940063, "logits/rejected": -0.9372482299804688, "logps/chosen": -734.8026123046875, "logps/rejected": -1427.211181640625, "loss": 0.0614, "rewards/accuracies": 0.875, "rewards/chosen": -0.2201705276966095, "rewards/margins": 0.31630367040634155, "rewards/rejected": -0.5364742279052734, "step": 1500 }, { "epoch": 0.4, "learning_rate": 3.7297928109491765e-06, "logits/chosen": -1.4493783712387085, "logits/rejected": -1.1883541345596313, "logps/chosen": -693.4669189453125, "logps/rejected": -1344.7279052734375, "loss": 0.0707, "rewards/accuracies": 0.875, "rewards/chosen": -0.2357136309146881, "rewards/margins": 0.28132572770118713, "rewards/rejected": -0.5170393586158752, "step": 1510 }, { "epoch": 0.41, "learning_rate": 3.7094790651387414e-06, "logits/chosen": -1.6289390325546265, "logits/rejected": -1.1786553859710693, "logps/chosen": -669.4554443359375, "logps/rejected": -1307.296875, "loss": 0.0715, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2002047747373581, "rewards/margins": 0.3151922821998596, "rewards/rejected": -0.5153970122337341, "step": 1520 }, { "epoch": 0.41, "learning_rate": 3.689060522675689e-06, "logits/chosen": -1.5846166610717773, "logits/rejected": -0.9855213165283203, "logps/chosen": -595.7760620117188, "logps/rejected": -1270.208740234375, "loss": 0.0617, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1446508765220642, "rewards/margins": 0.3013971149921417, "rewards/rejected": -0.4460480213165283, "step": 1530 }, { "epoch": 0.41, "learning_rate": 3.668538952747236e-06, "logits/chosen": -1.6458574533462524, "logits/rejected": -1.1102992296218872, "logps/chosen": -588.1439208984375, "logps/rejected": -1164.717529296875, "loss": 0.0999, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1480761170387268, "rewards/margins": 0.22489504516124725, "rewards/rejected": -0.37297114729881287, "step": 1540 }, { "epoch": 0.41, "learning_rate": 3.6479161334675294e-06, "logits/chosen": -1.7304718494415283, "logits/rejected": -1.0173327922821045, "logps/chosen": -598.8756713867188, "logps/rejected": -1213.0777587890625, "loss": 0.064, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13348576426506042, "rewards/margins": 0.30749550461769104, "rewards/rejected": -0.44098129868507385, "step": 1550 }, { "epoch": 0.42, "learning_rate": 3.627193851723577e-06, "logits/chosen": -1.7607488632202148, "logits/rejected": -1.0212706327438354, "logps/chosen": -677.742431640625, "logps/rejected": -1323.628173828125, "loss": 0.0535, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1735551357269287, "rewards/margins": 0.3078446090221405, "rewards/rejected": -0.4813997745513916, "step": 1560 }, { "epoch": 0.42, "learning_rate": 3.6063739030204226e-06, "logits/chosen": -1.6025089025497437, "logits/rejected": -0.9767768979072571, "logps/chosen": -597.2946166992188, "logps/rejected": -1132.9073486328125, "loss": 0.1038, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16409102082252502, "rewards/margins": 0.2722373902797699, "rewards/rejected": -0.4363284111022949, "step": 1570 }, { "epoch": 0.42, "learning_rate": 3.5854580913255706e-06, "logits/chosen": -1.616591215133667, "logits/rejected": -1.1780548095703125, "logps/chosen": -675.4401245117188, "logps/rejected": -1367.3082275390625, "loss": 0.0635, "rewards/accuracies": 0.875, "rewards/chosen": -0.22119805216789246, "rewards/margins": 0.2958839535713196, "rewards/rejected": -0.5170820951461792, "step": 1580 }, { "epoch": 0.42, "learning_rate": 3.564448228912682e-06, "logits/chosen": -1.5432292222976685, "logits/rejected": -0.871782660484314, "logps/chosen": -773.683837890625, "logps/rejected": -1428.926025390625, "loss": 0.0403, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.21216030418872833, "rewards/margins": 0.35337987542152405, "rewards/rejected": -0.5655401349067688, "step": 1590 }, { "epoch": 0.43, "learning_rate": 3.543346136204545e-06, "logits/chosen": -1.3973724842071533, "logits/rejected": -1.1439921855926514, "logps/chosen": -646.6536254882812, "logps/rejected": -1378.410400390625, "loss": 0.0582, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18542486429214478, "rewards/margins": 0.3139794170856476, "rewards/rejected": -0.49940428137779236, "step": 1600 }, { "epoch": 0.43, "learning_rate": 3.522153641615345e-06, "logits/chosen": -1.7498506307601929, "logits/rejected": -0.853812038898468, "logps/chosen": -691.226806640625, "logps/rejected": -1190.421630859375, "loss": 0.0798, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1312675178050995, "rewards/margins": 0.2993599772453308, "rewards/rejected": -0.4306275248527527, "step": 1610 }, { "epoch": 0.43, "learning_rate": 3.5008725813922383e-06, "logits/chosen": -1.6955890655517578, "logits/rejected": -1.0088529586791992, "logps/chosen": -625.9053955078125, "logps/rejected": -1268.4305419921875, "loss": 0.0572, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15290668606758118, "rewards/margins": 0.2966083288192749, "rewards/rejected": -0.44951504468917847, "step": 1620 }, { "epoch": 0.43, "learning_rate": 3.4795047994562463e-06, "logits/chosen": -1.787287950515747, "logits/rejected": -0.991382896900177, "logps/chosen": -736.6607666015625, "logps/rejected": -1425.2071533203125, "loss": 0.041, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2185276746749878, "rewards/margins": 0.3424827456474304, "rewards/rejected": -0.561010479927063, "step": 1630 }, { "epoch": 0.44, "learning_rate": 3.458052147242494e-06, "logits/chosen": -1.5612847805023193, "logits/rejected": -0.9777131080627441, "logps/chosen": -677.7218627929688, "logps/rejected": -1298.963623046875, "loss": 0.0625, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19684460759162903, "rewards/margins": 0.29719001054763794, "rewards/rejected": -0.49403461813926697, "step": 1640 }, { "epoch": 0.44, "learning_rate": 3.436516483539781e-06, "logits/chosen": -1.522048830986023, "logits/rejected": -0.9393981099128723, "logps/chosen": -657.0848388671875, "logps/rejected": -1337.720458984375, "loss": 0.0442, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20744872093200684, "rewards/margins": 0.3454239070415497, "rewards/rejected": -0.5528727173805237, "step": 1650 }, { "epoch": 0.44, "learning_rate": 3.4148996743295305e-06, "logits/chosen": -1.6736905574798584, "logits/rejected": -1.3306677341461182, "logps/chosen": -594.0120849609375, "logps/rejected": -1347.886474609375, "loss": 0.0643, "rewards/accuracies": 0.875, "rewards/chosen": -0.16157297790050507, "rewards/margins": 0.33852246403694153, "rewards/rejected": -0.5000954270362854, "step": 1660 }, { "epoch": 0.45, "learning_rate": 3.3932035926241103e-06, "logits/chosen": -1.5001100301742554, "logits/rejected": -1.099002718925476, "logps/chosen": -574.8411254882812, "logps/rejected": -1262.277099609375, "loss": 0.071, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17917487025260925, "rewards/margins": 0.3023154139518738, "rewards/rejected": -0.4814903140068054, "step": 1670 }, { "epoch": 0.45, "learning_rate": 3.3714301183045382e-06, "logits/chosen": -1.5629947185516357, "logits/rejected": -1.0339363813400269, "logps/chosen": -695.541748046875, "logps/rejected": -1296.75830078125, "loss": 0.0871, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19980263710021973, "rewards/margins": 0.25737708806991577, "rewards/rejected": -0.4571797847747803, "step": 1680 }, { "epoch": 0.45, "learning_rate": 3.349581137957604e-06, "logits/chosen": -1.7300357818603516, "logits/rejected": -0.9519279599189758, "logps/chosen": -630.9297485351562, "logps/rejected": -1248.482421875, "loss": 0.0576, "rewards/accuracies": 0.875, "rewards/chosen": -0.1405705362558365, "rewards/margins": 0.3395325243473053, "rewards/rejected": -0.4801030158996582, "step": 1690 }, { "epoch": 0.45, "learning_rate": 3.3276585447123957e-06, "logits/chosen": -1.7029545307159424, "logits/rejected": -0.8141438364982605, "logps/chosen": -717.7041625976562, "logps/rejected": -1194.781005859375, "loss": 0.0961, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1431819498538971, "rewards/margins": 0.2582007944583893, "rewards/rejected": -0.40138277411460876, "step": 1700 }, { "epoch": 0.46, "learning_rate": 3.3056642380762783e-06, "logits/chosen": -1.7564830780029297, "logits/rejected": -1.1706587076187134, "logps/chosen": -668.6366577148438, "logps/rejected": -1349.9146728515625, "loss": 0.0828, "rewards/accuracies": 0.875, "rewards/chosen": -0.15937338769435883, "rewards/margins": 0.3159894347190857, "rewards/rejected": -0.4753628373146057, "step": 1710 }, { "epoch": 0.46, "learning_rate": 3.2836001237702993e-06, "logits/chosen": -1.8231847286224365, "logits/rejected": -1.20169198513031, "logps/chosen": -600.72265625, "logps/rejected": -1295.0966796875, "loss": 0.0506, "rewards/accuracies": 0.875, "rewards/chosen": -0.13170823454856873, "rewards/margins": 0.3157060444355011, "rewards/rejected": -0.4474143087863922, "step": 1720 }, { "epoch": 0.46, "learning_rate": 3.2614681135640696e-06, "logits/chosen": -1.424872636795044, "logits/rejected": -1.0434539318084717, "logps/chosen": -765.4076538085938, "logps/rejected": -1424.029052734375, "loss": 0.088, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22880463302135468, "rewards/margins": 0.25481894612312317, "rewards/rejected": -0.48362359404563904, "step": 1730 }, { "epoch": 0.46, "learning_rate": 3.2392701251101172e-06, "logits/chosen": -1.6727033853530884, "logits/rejected": -0.9850581884384155, "logps/chosen": -603.7737426757812, "logps/rejected": -1283.9576416015625, "loss": 0.0851, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1411423683166504, "rewards/margins": 0.2962447702884674, "rewards/rejected": -0.4373871386051178, "step": 1740 }, { "epoch": 0.47, "learning_rate": 3.217008081777726e-06, "logits/chosen": -1.4920157194137573, "logits/rejected": -1.0782114267349243, "logps/chosen": -520.2071533203125, "logps/rejected": -1167.503662109375, "loss": 0.0687, "rewards/accuracies": 0.875, "rewards/chosen": -0.12038830667734146, "rewards/margins": 0.26983264088630676, "rewards/rejected": -0.3902209401130676, "step": 1750 }, { "epoch": 0.47, "learning_rate": 3.1946839124862873e-06, "logits/chosen": -1.59346342086792, "logits/rejected": -0.8561047315597534, "logps/chosen": -620.8773803710938, "logps/rejected": -1345.0489501953125, "loss": 0.0611, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1517111212015152, "rewards/margins": 0.29772210121154785, "rewards/rejected": -0.44943323731422424, "step": 1760 }, { "epoch": 0.47, "learning_rate": 3.1722995515381644e-06, "logits/chosen": -1.590381383895874, "logits/rejected": -1.1091078519821167, "logps/chosen": -734.8489379882812, "logps/rejected": -1222.6331787109375, "loss": 0.0966, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19709806144237518, "rewards/margins": 0.24125704169273376, "rewards/rejected": -0.43835514783859253, "step": 1770 }, { "epoch": 0.47, "learning_rate": 3.149856938451094e-06, "logits/chosen": -1.6151930093765259, "logits/rejected": -0.9260737299919128, "logps/chosen": -644.0718383789062, "logps/rejected": -1129.24267578125, "loss": 0.0756, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14687219262123108, "rewards/margins": 0.2772686779499054, "rewards/rejected": -0.4241408407688141, "step": 1780 }, { "epoch": 0.48, "learning_rate": 3.127358017790132e-06, "logits/chosen": -1.8206260204315186, "logits/rejected": -1.036929965019226, "logps/chosen": -636.3726806640625, "logps/rejected": -1280.237060546875, "loss": 0.0717, "rewards/accuracies": 0.875, "rewards/chosen": -0.1703665554523468, "rewards/margins": 0.33507412672042847, "rewards/rejected": -0.5054406523704529, "step": 1790 }, { "epoch": 0.48, "learning_rate": 3.1048047389991693e-06, "logits/chosen": -1.7902650833129883, "logits/rejected": -1.1972464323043823, "logps/chosen": -671.567138671875, "logps/rejected": -1139.710205078125, "loss": 0.096, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17933420836925507, "rewards/margins": 0.23233267664909363, "rewards/rejected": -0.4116668701171875, "step": 1800 }, { "epoch": 0.48, "learning_rate": 3.082199056232015e-06, "logits/chosen": -1.74044668674469, "logits/rejected": -1.0097100734710693, "logps/chosen": -597.4201049804688, "logps/rejected": -1311.656494140625, "loss": 0.0724, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1604226678609848, "rewards/margins": 0.33031123876571655, "rewards/rejected": -0.4907340109348297, "step": 1810 }, { "epoch": 0.49, "learning_rate": 3.059542928183079e-06, "logits/chosen": -1.7327849864959717, "logits/rejected": -1.105128526687622, "logps/chosen": -666.2789306640625, "logps/rejected": -1354.0238037109375, "loss": 0.0725, "rewards/accuracies": 0.875, "rewards/chosen": -0.1905040591955185, "rewards/margins": 0.3310181498527527, "rewards/rejected": -0.5215222239494324, "step": 1820 }, { "epoch": 0.49, "learning_rate": 3.0368383179176584e-06, "logits/chosen": -1.4599992036819458, "logits/rejected": -0.9781301617622375, "logps/chosen": -612.1408081054688, "logps/rejected": -1311.3360595703125, "loss": 0.0769, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21572145819664001, "rewards/margins": 0.2840631902217865, "rewards/rejected": -0.4997846484184265, "step": 1830 }, { "epoch": 0.49, "learning_rate": 3.0140871927018466e-06, "logits/chosen": -1.6299057006835938, "logits/rejected": -1.1118695735931396, "logps/chosen": -627.4758911132812, "logps/rejected": -1280.96630859375, "loss": 0.0673, "rewards/accuracies": 0.875, "rewards/chosen": -0.1827511489391327, "rewards/margins": 0.30515769124031067, "rewards/rejected": -0.4879087805747986, "step": 1840 }, { "epoch": 0.49, "learning_rate": 2.9912915238320755e-06, "logits/chosen": -1.7462234497070312, "logits/rejected": -1.0023224353790283, "logps/chosen": -662.7495727539062, "logps/rejected": -1253.028076171875, "loss": 0.0537, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1878022700548172, "rewards/margins": 0.30388498306274414, "rewards/rejected": -0.4916872978210449, "step": 1850 }, { "epoch": 0.5, "learning_rate": 2.9684532864643123e-06, "logits/chosen": -1.6837208271026611, "logits/rejected": -1.037295937538147, "logps/chosen": -592.4058837890625, "logps/rejected": -1251.274658203125, "loss": 0.065, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.164472758769989, "rewards/margins": 0.3051846921443939, "rewards/rejected": -0.4696574807167053, "step": 1860 }, { "epoch": 0.5, "learning_rate": 2.945574459442917e-06, "logits/chosen": -1.7075077295303345, "logits/rejected": -1.0973665714263916, "logps/chosen": -623.5327758789062, "logps/rejected": -1248.406494140625, "loss": 0.0519, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14970310032367706, "rewards/margins": 0.3357471525669098, "rewards/rejected": -0.48545026779174805, "step": 1870 }, { "epoch": 0.5, "learning_rate": 2.922657025129185e-06, "logits/chosen": -1.659570336341858, "logits/rejected": -1.0617914199829102, "logps/chosen": -783.8493041992188, "logps/rejected": -1338.663818359375, "loss": 0.1171, "rewards/accuracies": 0.75, "rewards/chosen": -0.24820463359355927, "rewards/margins": 0.2960302531719208, "rewards/rejected": -0.5442348718643188, "step": 1880 }, { "epoch": 0.5, "learning_rate": 2.8997029692295875e-06, "logits/chosen": -1.685340166091919, "logits/rejected": -1.1272087097167969, "logps/chosen": -648.3568115234375, "logps/rejected": -1290.0946044921875, "loss": 0.0795, "rewards/accuracies": 0.875, "rewards/chosen": -0.2036975622177124, "rewards/margins": 0.29076558351516724, "rewards/rejected": -0.49446314573287964, "step": 1890 }, { "epoch": 0.51, "learning_rate": 2.876714280623708e-06, "logits/chosen": -1.5509741306304932, "logits/rejected": -1.353191614151001, "logps/chosen": -530.915771484375, "logps/rejected": -1135.705322265625, "loss": 0.1021, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18661265075206757, "rewards/margins": 0.2390277087688446, "rewards/rejected": -0.425640344619751, "step": 1900 }, { "epoch": 0.51, "learning_rate": 2.8536929511919227e-06, "logits/chosen": -1.5140665769577026, "logits/rejected": -0.9606078267097473, "logps/chosen": -781.3890380859375, "logps/rejected": -1375.6937255859375, "loss": 0.0884, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2563932538032532, "rewards/margins": 0.2651718854904175, "rewards/rejected": -0.5215650796890259, "step": 1910 }, { "epoch": 0.51, "learning_rate": 2.8306409756428067e-06, "logits/chosen": -1.6241651773452759, "logits/rejected": -0.8508566617965698, "logps/chosen": -628.0801391601562, "logps/rejected": -1286.9000244140625, "loss": 0.0711, "rewards/accuracies": 0.875, "rewards/chosen": -0.14530445635318756, "rewards/margins": 0.3342815041542053, "rewards/rejected": -0.4795859754085541, "step": 1920 }, { "epoch": 0.51, "learning_rate": 2.807560351340302e-06, "logits/chosen": -1.8166322708129883, "logits/rejected": -1.1172343492507935, "logps/chosen": -561.2113647460938, "logps/rejected": -1348.037841796875, "loss": 0.0472, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.12384496629238129, "rewards/margins": 0.3702481985092163, "rewards/rejected": -0.4940931797027588, "step": 1930 }, { "epoch": 0.52, "learning_rate": 2.7844530781306544e-06, "logits/chosen": -1.6891119480133057, "logits/rejected": -1.0480057001113892, "logps/chosen": -686.9342041015625, "logps/rejected": -1289.268310546875, "loss": 0.0736, "rewards/accuracies": 0.875, "rewards/chosen": -0.17461568117141724, "rewards/margins": 0.27256545424461365, "rewards/rejected": -0.44718116521835327, "step": 1940 }, { "epoch": 0.52, "learning_rate": 2.761321158169134e-06, "logits/chosen": -1.7340322732925415, "logits/rejected": -1.1662895679473877, "logps/chosen": -579.2034912109375, "logps/rejected": -1151.33154296875, "loss": 0.1019, "rewards/accuracies": 0.75, "rewards/chosen": -0.1479378640651703, "rewards/margins": 0.24325743317604065, "rewards/rejected": -0.3911953270435333, "step": 1950 }, { "epoch": 0.52, "learning_rate": 2.738166595746554e-06, "logits/chosen": -1.6452268362045288, "logits/rejected": -1.2167978286743164, "logps/chosen": -568.6005859375, "logps/rejected": -1262.2685546875, "loss": 0.0716, "rewards/accuracies": 0.875, "rewards/chosen": -0.15343192219734192, "rewards/margins": 0.26729413866996765, "rewards/rejected": -0.42072606086730957, "step": 1960 }, { "epoch": 0.53, "learning_rate": 2.7149913971156105e-06, "logits/chosen": -1.818169355392456, "logits/rejected": -1.1683781147003174, "logps/chosen": -652.8570556640625, "logps/rejected": -1269.952392578125, "loss": 0.0727, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1778111755847931, "rewards/margins": 0.29952138662338257, "rewards/rejected": -0.47733253240585327, "step": 1970 }, { "epoch": 0.53, "learning_rate": 2.6917975703170466e-06, "logits/chosen": -1.3900476694107056, "logits/rejected": -0.9688823819160461, "logps/chosen": -659.0379028320312, "logps/rejected": -1403.9931640625, "loss": 0.0587, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21347947418689728, "rewards/margins": 0.3426669239997864, "rewards/rejected": -0.5561463832855225, "step": 1980 }, { "epoch": 0.53, "learning_rate": 2.668587125005663e-06, "logits/chosen": -1.7847397327423096, "logits/rejected": -1.250427007675171, "logps/chosen": -552.1531982421875, "logps/rejected": -1304.26123046875, "loss": 0.0691, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15698403120040894, "rewards/margins": 0.30882400274276733, "rewards/rejected": -0.46580806374549866, "step": 1990 }, { "epoch": 0.53, "learning_rate": 2.6453620722761897e-06, "logits/chosen": -1.4962866306304932, "logits/rejected": -1.1884623765945435, "logps/chosen": -508.5406188964844, "logps/rejected": -1144.9775390625, "loss": 0.0703, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14970794320106506, "rewards/margins": 0.26972299814224243, "rewards/rejected": -0.41943103075027466, "step": 2000 }, { "epoch": 0.54, "learning_rate": 2.6221244244890336e-06, "logits/chosen": -1.4763376712799072, "logits/rejected": -1.208345890045166, "logps/chosen": -659.405029296875, "logps/rejected": -1311.456298828125, "loss": 0.0641, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21015486121177673, "rewards/margins": 0.261810302734375, "rewards/rejected": -0.47196516394615173, "step": 2010 }, { "epoch": 0.54, "learning_rate": 2.5988761950959133e-06, "logits/chosen": -1.6231094598770142, "logits/rejected": -1.3810958862304688, "logps/chosen": -578.0490112304688, "logps/rejected": -1218.637451171875, "loss": 0.0877, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15429143607616425, "rewards/margins": 0.29225456714630127, "rewards/rejected": -0.4465459883213043, "step": 2020 }, { "epoch": 0.54, "learning_rate": 2.575619398465402e-06, "logits/chosen": -1.6102511882781982, "logits/rejected": -0.9045922160148621, "logps/chosen": -626.107666015625, "logps/rejected": -1319.302978515625, "loss": 0.0697, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.14732882380485535, "rewards/margins": 0.3220018446445465, "rewards/rejected": -0.46933069825172424, "step": 2030 }, { "epoch": 0.54, "learning_rate": 2.5523560497083927e-06, "logits/chosen": -1.7386415004730225, "logits/rejected": -1.0501739978790283, "logps/chosen": -698.34765625, "logps/rejected": -1343.9432373046875, "loss": 0.0424, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20332041382789612, "rewards/margins": 0.3136293292045593, "rewards/rejected": -0.5169497728347778, "step": 2040 }, { "epoch": 0.55, "learning_rate": 2.5290881645034932e-06, "logits/chosen": -1.7993240356445312, "logits/rejected": -0.9968295097351074, "logps/chosen": -737.5123901367188, "logps/rejected": -1283.9423828125, "loss": 0.0839, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2413882464170456, "rewards/margins": 0.25109177827835083, "rewards/rejected": -0.4924800395965576, "step": 2050 }, { "epoch": 0.55, "learning_rate": 2.5058177589223766e-06, "logits/chosen": -1.6581776142120361, "logits/rejected": -1.2274023294448853, "logps/chosen": -621.2349853515625, "logps/rejected": -1304.399169921875, "loss": 0.0534, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15700002014636993, "rewards/margins": 0.3114224672317505, "rewards/rejected": -0.4684225022792816, "step": 2060 }, { "epoch": 0.55, "learning_rate": 2.482546849255096e-06, "logits/chosen": -1.822373628616333, "logits/rejected": -0.9462094306945801, "logps/chosen": -783.3508911132812, "logps/rejected": -1461.9649658203125, "loss": 0.0723, "rewards/accuracies": 0.875, "rewards/chosen": -0.2161974459886551, "rewards/margins": 0.3368082642555237, "rewards/rejected": -0.5530056953430176, "step": 2070 }, { "epoch": 0.55, "learning_rate": 2.4592774518353858e-06, "logits/chosen": -1.8345363140106201, "logits/rejected": -1.388167381286621, "logps/chosen": -656.7705688476562, "logps/rejected": -1437.629638671875, "loss": 0.0601, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1848996877670288, "rewards/margins": 0.3406218886375427, "rewards/rejected": -0.5255215764045715, "step": 2080 }, { "epoch": 0.56, "learning_rate": 2.436011582865945e-06, "logits/chosen": -1.5363919734954834, "logits/rejected": -1.1726293563842773, "logps/chosen": -633.4727172851562, "logps/rejected": -1333.555908203125, "loss": 0.0652, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17989441752433777, "rewards/margins": 0.3212878704071045, "rewards/rejected": -0.5011822581291199, "step": 2090 }, { "epoch": 0.56, "learning_rate": 2.4127512582437486e-06, "logits/chosen": -1.5197073221206665, "logits/rejected": -0.9411822557449341, "logps/chosen": -578.3282470703125, "logps/rejected": -1179.3984375, "loss": 0.0888, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18851898610591888, "rewards/margins": 0.27369505167007446, "rewards/rejected": -0.46221405267715454, "step": 2100 }, { "epoch": 0.56, "learning_rate": 2.3894984933853734e-06, "logits/chosen": -1.599889874458313, "logits/rejected": -0.9709933996200562, "logps/chosen": -711.7459716796875, "logps/rejected": -1445.8603515625, "loss": 0.0469, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.21319825947284698, "rewards/margins": 0.34705930948257446, "rewards/rejected": -0.5602575540542603, "step": 2110 }, { "epoch": 0.57, "learning_rate": 2.366255303052377e-06, "logits/chosen": -1.5162818431854248, "logits/rejected": -0.9374685287475586, "logps/chosen": -663.5987548828125, "logps/rejected": -1244.1162109375, "loss": 0.0774, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20607724785804749, "rewards/margins": 0.28292593359947205, "rewards/rejected": -0.48900318145751953, "step": 2120 }, { "epoch": 0.57, "learning_rate": 2.3430237011767166e-06, "logits/chosen": -1.9861704111099243, "logits/rejected": -1.1126511096954346, "logps/chosen": -685.7115478515625, "logps/rejected": -1307.9677734375, "loss": 0.0711, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.15337470173835754, "rewards/margins": 0.3153078854084015, "rewards/rejected": -0.46868258714675903, "step": 2130 }, { "epoch": 0.57, "learning_rate": 2.319805700686257e-06, "logits/chosen": -1.466512680053711, "logits/rejected": -0.9953567385673523, "logps/chosen": -656.9564819335938, "logps/rejected": -1330.86572265625, "loss": 0.0865, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20789778232574463, "rewards/margins": 0.27107080817222595, "rewards/rejected": -0.4789685606956482, "step": 2140 }, { "epoch": 0.57, "learning_rate": 2.296603313330355e-06, "logits/chosen": -1.7993675470352173, "logits/rejected": -0.795965313911438, "logps/chosen": -716.7975463867188, "logps/rejected": -1341.012451171875, "loss": 0.0413, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.16811136901378632, "rewards/margins": 0.3270968794822693, "rewards/rejected": -0.4952082633972168, "step": 2150 }, { "epoch": 0.58, "learning_rate": 2.2734185495055503e-06, "logits/chosen": -1.7564256191253662, "logits/rejected": -1.0065138339996338, "logps/chosen": -605.0843505859375, "logps/rejected": -1259.658447265625, "loss": 0.0639, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13863322138786316, "rewards/margins": 0.3194582462310791, "rewards/rejected": -0.4580914378166199, "step": 2160 }, { "epoch": 0.58, "learning_rate": 2.250253418081373e-06, "logits/chosen": -1.7674392461776733, "logits/rejected": -1.2567976713180542, "logps/chosen": -696.3572998046875, "logps/rejected": -1424.5084228515625, "loss": 0.0523, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20887522399425507, "rewards/margins": 0.35076838731765747, "rewards/rejected": -0.5596436262130737, "step": 2170 }, { "epoch": 0.58, "learning_rate": 2.22710992622628e-06, "logits/chosen": -1.7430893182754517, "logits/rejected": -1.2086124420166016, "logps/chosen": -602.26513671875, "logps/rejected": -1213.3702392578125, "loss": 0.0766, "rewards/accuracies": 0.75, "rewards/chosen": -0.17254558205604553, "rewards/margins": 0.29195210337638855, "rewards/rejected": -0.46449774503707886, "step": 2180 }, { "epoch": 0.58, "learning_rate": 2.2039900792337477e-06, "logits/chosen": -1.5437920093536377, "logits/rejected": -1.2289955615997314, "logps/chosen": -678.7044677734375, "logps/rejected": -1379.6451416015625, "loss": 0.0519, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19320425391197205, "rewards/margins": 0.32549750804901123, "rewards/rejected": -0.5187016725540161, "step": 2190 }, { "epoch": 0.59, "learning_rate": 2.1808958803485134e-06, "logits/chosen": -1.5962891578674316, "logits/rejected": -1.3936455249786377, "logps/chosen": -606.307373046875, "logps/rejected": -1305.8978271484375, "loss": 0.0898, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2137691229581833, "rewards/margins": 0.2590574324131012, "rewards/rejected": -0.4728265702724457, "step": 2200 }, { "epoch": 0.59, "learning_rate": 2.157829330593008e-06, "logits/chosen": -1.7511285543441772, "logits/rejected": -0.974310576915741, "logps/chosen": -633.572509765625, "logps/rejected": -1414.367431640625, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -0.18712595105171204, "rewards/margins": 0.37235841155052185, "rewards/rejected": -0.5594843626022339, "step": 2210 }, { "epoch": 0.59, "learning_rate": 2.134792428593971e-06, "logits/chosen": -1.644934058189392, "logits/rejected": -1.11379075050354, "logps/chosen": -639.0684814453125, "logps/rejected": -1331.093505859375, "loss": 0.0665, "rewards/accuracies": 0.875, "rewards/chosen": -0.2170993536710739, "rewards/margins": 0.3065374493598938, "rewards/rejected": -0.5236367583274841, "step": 2220 }, { "epoch": 0.59, "learning_rate": 2.1117871704092818e-06, "logits/chosen": -1.7506024837493896, "logits/rejected": -0.7958993911743164, "logps/chosen": -705.9612426757812, "logps/rejected": -1230.204833984375, "loss": 0.074, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18289552628993988, "rewards/margins": 0.2924391031265259, "rewards/rejected": -0.47533464431762695, "step": 2230 }, { "epoch": 0.6, "learning_rate": 2.0888155493550027e-06, "logits/chosen": -1.74947190284729, "logits/rejected": -1.175241231918335, "logps/chosen": -695.1935424804688, "logps/rejected": -1281.6820068359375, "loss": 0.0659, "rewards/accuracies": 0.75, "rewards/chosen": -0.227014422416687, "rewards/margins": 0.3012952506542206, "rewards/rejected": -0.5283096432685852, "step": 2240 }, { "epoch": 0.6, "learning_rate": 2.0658795558326745e-06, "logits/chosen": -1.3126758337020874, "logits/rejected": -0.9601501226425171, "logps/chosen": -722.9898681640625, "logps/rejected": -1265.3829345703125, "loss": 0.0976, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.26479509472846985, "rewards/margins": 0.2494693100452423, "rewards/rejected": -0.5142643451690674, "step": 2250 }, { "epoch": 0.6, "learning_rate": 2.0429811771568468e-06, "logits/chosen": -1.9693552255630493, "logits/rejected": -1.2665364742279053, "logps/chosen": -614.6461181640625, "logps/rejected": -1254.004150390625, "loss": 0.0682, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18582487106323242, "rewards/margins": 0.31714367866516113, "rewards/rejected": -0.5029684901237488, "step": 2260 }, { "epoch": 0.61, "learning_rate": 2.0201223973828917e-06, "logits/chosen": -1.4698238372802734, "logits/rejected": -0.9227995872497559, "logps/chosen": -671.4839477539062, "logps/rejected": -1292.746826171875, "loss": 0.089, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.26453524827957153, "rewards/margins": 0.270897775888443, "rewards/rejected": -0.5354331135749817, "step": 2270 }, { "epoch": 0.61, "learning_rate": 1.997305197135089e-06, "logits/chosen": -1.694977045059204, "logits/rejected": -1.0786057710647583, "logps/chosen": -716.204833984375, "logps/rejected": -1146.710205078125, "loss": 0.0988, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23150472342967987, "rewards/margins": 0.21505391597747803, "rewards/rejected": -0.4465586245059967, "step": 2280 }, { "epoch": 0.61, "learning_rate": 1.9745315534350157e-06, "logits/chosen": -1.5376017093658447, "logits/rejected": -1.0097434520721436, "logps/chosen": -613.5394287109375, "logps/rejected": -1213.8609619140625, "loss": 0.0891, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1808188557624817, "rewards/margins": 0.28589263558387756, "rewards/rejected": -0.46671146154403687, "step": 2290 }, { "epoch": 0.61, "learning_rate": 1.9518034395302413e-06, "logits/chosen": -1.6548999547958374, "logits/rejected": -1.14958655834198, "logps/chosen": -715.3568115234375, "logps/rejected": -1187.842529296875, "loss": 0.0824, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22143769264221191, "rewards/margins": 0.2518152594566345, "rewards/rejected": -0.47325292229652405, "step": 2300 }, { "epoch": 0.62, "learning_rate": 1.9291228247233607e-06, "logits/chosen": -1.728780746459961, "logits/rejected": -1.0694595575332642, "logps/chosen": -720.1282958984375, "logps/rejected": -1408.6763916015625, "loss": 0.1004, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22856228053569794, "rewards/margins": 0.35453858971595764, "rewards/rejected": -0.5831009149551392, "step": 2310 }, { "epoch": 0.62, "learning_rate": 1.9064916742013515e-06, "logits/chosen": -1.483896017074585, "logits/rejected": -0.8495844602584839, "logps/chosen": -686.5977783203125, "logps/rejected": -1382.664794921875, "loss": 0.0613, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.24766767024993896, "rewards/margins": 0.31919145584106445, "rewards/rejected": -0.5668591260910034, "step": 2320 }, { "epoch": 0.62, "learning_rate": 1.883911948865306e-06, "logits/chosen": -1.490492582321167, "logits/rejected": -0.8722866177558899, "logps/chosen": -710.7138671875, "logps/rejected": -1393.343994140625, "loss": 0.1017, "rewards/accuracies": 0.875, "rewards/chosen": -0.2688906192779541, "rewards/margins": 0.2782156765460968, "rewards/rejected": -0.5471062660217285, "step": 2330 }, { "epoch": 0.62, "learning_rate": 1.8613856051605242e-06, "logits/chosen": -1.5532639026641846, "logits/rejected": -1.1383737325668335, "logps/chosen": -775.1503295898438, "logps/rejected": -1363.999267578125, "loss": 0.094, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2418668568134308, "rewards/margins": 0.28089746832847595, "rewards/rejected": -0.5227643251419067, "step": 2340 }, { "epoch": 0.63, "learning_rate": 1.8389145949069953e-06, "logits/chosen": -1.7076427936553955, "logits/rejected": -1.0624377727508545, "logps/chosen": -605.94287109375, "logps/rejected": -1299.9039306640625, "loss": 0.0606, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1737835705280304, "rewards/margins": 0.30216971039772034, "rewards/rejected": -0.47595319151878357, "step": 2350 }, { "epoch": 0.63, "learning_rate": 1.816500865130279e-06, "logits/chosen": -1.704677939414978, "logits/rejected": -1.0432106256484985, "logps/chosen": -584.9574584960938, "logps/rejected": -1062.80078125, "loss": 0.1013, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17347513139247894, "rewards/margins": 0.24399061501026154, "rewards/rejected": -0.4174656867980957, "step": 2360 }, { "epoch": 0.63, "learning_rate": 1.7941463578928088e-06, "logits/chosen": -1.4847257137298584, "logits/rejected": -1.036319375038147, "logps/chosen": -565.1887817382812, "logps/rejected": -1178.5665283203125, "loss": 0.0822, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18391203880310059, "rewards/margins": 0.2579399347305298, "rewards/rejected": -0.44185200333595276, "step": 2370 }, { "epoch": 0.63, "learning_rate": 1.7718530101256115e-06, "logits/chosen": -1.7321197986602783, "logits/rejected": -1.0332145690917969, "logps/chosen": -719.0687866210938, "logps/rejected": -1425.6676025390625, "loss": 0.052, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.23235881328582764, "rewards/margins": 0.32954907417297363, "rewards/rejected": -0.5619078874588013, "step": 2380 }, { "epoch": 0.64, "learning_rate": 1.7496227534604859e-06, "logits/chosen": -1.789006233215332, "logits/rejected": -1.0403592586517334, "logps/chosen": -728.0673828125, "logps/rejected": -1402.5009765625, "loss": 0.0629, "rewards/accuracies": 0.875, "rewards/chosen": -0.19960185885429382, "rewards/margins": 0.30768799781799316, "rewards/rejected": -0.5072898864746094, "step": 2390 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -1.9194923639297485, "logits/rejected": -1.3303556442260742, "logps/chosen": -637.9463500976562, "logps/rejected": -1268.9755859375, "loss": 0.0732, "rewards/accuracies": 0.875, "rewards/chosen": -0.1636119931936264, "rewards/margins": 0.2945595383644104, "rewards/rejected": -0.458171546459198, "step": 2400 }, { "epoch": 0.64, "learning_rate": 1.7053592124637557e-06, "logits/chosen": -1.7786033153533936, "logits/rejected": -1.103823184967041, "logps/chosen": -694.8350219726562, "logps/rejected": -1348.7509765625, "loss": 0.0788, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20321345329284668, "rewards/margins": 0.2868782877922058, "rewards/rejected": -0.49009180068969727, "step": 2410 }, { "epoch": 0.65, "learning_rate": 1.6833297633956647e-06, "logits/chosen": -1.6945937871932983, "logits/rejected": -1.1909369230270386, "logps/chosen": -574.365966796875, "logps/rejected": -1239.800537109375, "loss": 0.0705, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1648532599210739, "rewards/margins": 0.2992625832557678, "rewards/rejected": -0.46411579847335815, "step": 2420 }, { "epoch": 0.65, "learning_rate": 1.661371075624363e-06, "logits/chosen": -1.5424587726593018, "logits/rejected": -1.3184611797332764, "logps/chosen": -526.9766845703125, "logps/rejected": -1158.5274658203125, "loss": 0.1027, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16265107691287994, "rewards/margins": 0.26259398460388184, "rewards/rejected": -0.42524510622024536, "step": 2430 }, { "epoch": 0.65, "learning_rate": 1.6394850517846621e-06, "logits/chosen": -1.6765334606170654, "logits/rejected": -0.9357019662857056, "logps/chosen": -637.3072509765625, "logps/rejected": -1350.0201416015625, "loss": 0.0483, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1886545568704605, "rewards/margins": 0.32397735118865967, "rewards/rejected": -0.5126319527626038, "step": 2440 }, { "epoch": 0.65, "learning_rate": 1.6176735882153284e-06, "logits/chosen": -1.4673728942871094, "logits/rejected": -0.7847996950149536, "logps/chosen": -721.7418212890625, "logps/rejected": -1300.6024169921875, "loss": 0.07, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20094910264015198, "rewards/margins": 0.28207510709762573, "rewards/rejected": -0.4830242097377777, "step": 2450 }, { "epoch": 0.66, "learning_rate": 1.5959385747947697e-06, "logits/chosen": -1.8030465841293335, "logits/rejected": -1.1619970798492432, "logps/chosen": -672.4512329101562, "logps/rejected": -1321.999267578125, "loss": 0.0729, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20753267407417297, "rewards/margins": 0.27218765020370483, "rewards/rejected": -0.4797203540802002, "step": 2460 }, { "epoch": 0.66, "learning_rate": 1.5742818947772875e-06, "logits/chosen": -1.4533730745315552, "logits/rejected": -0.9818112254142761, "logps/chosen": -737.9328002929688, "logps/rejected": -1423.608154296875, "loss": 0.0509, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23550765216350555, "rewards/margins": 0.3242724537849426, "rewards/rejected": -0.5597800612449646, "step": 2470 }, { "epoch": 0.66, "learning_rate": 1.552705424629898e-06, "logits/chosen": -1.8896198272705078, "logits/rejected": -1.0289218425750732, "logps/chosen": -772.728515625, "logps/rejected": -1228.538818359375, "loss": 0.0765, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2038073092699051, "rewards/margins": 0.26559290289878845, "rewards/rejected": -0.4694002568721771, "step": 2480 }, { "epoch": 0.66, "learning_rate": 1.5312110338697427e-06, "logits/chosen": -1.698685646057129, "logits/rejected": -1.106320858001709, "logps/chosen": -706.8604736328125, "logps/rejected": -1375.6136474609375, "loss": 0.0583, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21833567321300507, "rewards/margins": 0.31459516286849976, "rewards/rejected": -0.5329307317733765, "step": 2490 }, { "epoch": 0.67, "learning_rate": 1.509800584902108e-06, "logits/chosen": -1.5073596239089966, "logits/rejected": -0.9609388113021851, "logps/chosen": -698.6984252929688, "logps/rejected": -1362.0128173828125, "loss": 0.0613, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20453910529613495, "rewards/margins": 0.3083449900150299, "rewards/rejected": -0.5128840208053589, "step": 2500 }, { "epoch": 0.67, "learning_rate": 1.4884759328590476e-06, "logits/chosen": -1.6541383266448975, "logits/rejected": -1.2222260236740112, "logps/chosen": -686.1484375, "logps/rejected": -1445.005615234375, "loss": 0.0586, "rewards/accuracies": 0.875, "rewards/chosen": -0.20519807934761047, "rewards/margins": 0.32793715596199036, "rewards/rejected": -0.5331352353096008, "step": 2510 }, { "epoch": 0.67, "learning_rate": 1.467238925438646e-06, "logits/chosen": -1.5963326692581177, "logits/rejected": -1.0982022285461426, "logps/chosen": -682.7689208984375, "logps/rejected": -1145.491943359375, "loss": 0.1334, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21956536173820496, "rewards/margins": 0.209082692861557, "rewards/rejected": -0.42864808440208435, "step": 2520 }, { "epoch": 0.67, "learning_rate": 1.446091402744923e-06, "logits/chosen": -1.4184305667877197, "logits/rejected": -0.9969242215156555, "logps/chosen": -631.0663452148438, "logps/rejected": -1298.8023681640625, "loss": 0.0789, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2030848264694214, "rewards/margins": 0.2959844470024109, "rewards/rejected": -0.4990692138671875, "step": 2530 }, { "epoch": 0.68, "learning_rate": 1.4250351971283937e-06, "logits/chosen": -1.6301358938217163, "logits/rejected": -1.1058179140090942, "logps/chosen": -733.7390747070312, "logps/rejected": -1312.249267578125, "loss": 0.0789, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2173445224761963, "rewards/margins": 0.2981737554073334, "rewards/rejected": -0.515518307685852, "step": 2540 }, { "epoch": 0.68, "learning_rate": 1.4040721330273063e-06, "logits/chosen": -1.737184762954712, "logits/rejected": -0.8671928644180298, "logps/chosen": -759.1162719726562, "logps/rejected": -1388.3258056640625, "loss": 0.0713, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2206466943025589, "rewards/margins": 0.301429808139801, "rewards/rejected": -0.5220764875411987, "step": 2550 }, { "epoch": 0.68, "learning_rate": 1.3832040268095589e-06, "logits/chosen": -1.6039609909057617, "logits/rejected": -0.8329612612724304, "logps/chosen": -676.49755859375, "logps/rejected": -1311.5169677734375, "loss": 0.0776, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1940799504518509, "rewards/margins": 0.3204619288444519, "rewards/rejected": -0.5145418643951416, "step": 2560 }, { "epoch": 0.69, "learning_rate": 1.362432686615316e-06, "logits/chosen": -1.7901986837387085, "logits/rejected": -1.0667396783828735, "logps/chosen": -656.2354736328125, "logps/rejected": -1316.062744140625, "loss": 0.0758, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1804797351360321, "rewards/margins": 0.30354127287864685, "rewards/rejected": -0.48402100801467896, "step": 2570 }, { "epoch": 0.69, "learning_rate": 1.3417599122003464e-06, "logits/chosen": -1.5734020471572876, "logits/rejected": -1.0475807189941406, "logps/chosen": -566.615966796875, "logps/rejected": -1251.5736083984375, "loss": 0.0652, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16305221617221832, "rewards/margins": 0.3017520308494568, "rewards/rejected": -0.4648042619228363, "step": 2580 }, { "epoch": 0.69, "learning_rate": 1.3211874947800747e-06, "logits/chosen": -1.821344017982483, "logits/rejected": -1.1782556772232056, "logps/chosen": -690.4012451171875, "logps/rejected": -1318.4063720703125, "loss": 0.0693, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21744892001152039, "rewards/margins": 0.29652294516563416, "rewards/rejected": -0.5139719247817993, "step": 2590 }, { "epoch": 0.69, "learning_rate": 1.3007172168743854e-06, "logits/chosen": -1.438684105873108, "logits/rejected": -0.9294763803482056, "logps/chosen": -742.8309326171875, "logps/rejected": -1415.3411865234375, "loss": 0.0694, "rewards/accuracies": 0.875, "rewards/chosen": -0.2644263803958893, "rewards/margins": 0.3054746091365814, "rewards/rejected": -0.5699009895324707, "step": 2600 }, { "epoch": 0.7, "learning_rate": 1.280350852153168e-06, "logits/chosen": -1.6277799606323242, "logits/rejected": -1.0877095460891724, "logps/chosen": -703.9315795898438, "logps/rejected": -1340.6427001953125, "loss": 0.0651, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.23966872692108154, "rewards/margins": 0.2736600339412689, "rewards/rejected": -0.5133287310600281, "step": 2610 }, { "epoch": 0.7, "learning_rate": 1.260090165282645e-06, "logits/chosen": -1.6082645654678345, "logits/rejected": -0.9585866928100586, "logps/chosen": -733.3088989257812, "logps/rejected": -1261.9248046875, "loss": 0.0997, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23271843791007996, "rewards/margins": 0.248284250497818, "rewards/rejected": -0.48100265860557556, "step": 2620 }, { "epoch": 0.7, "learning_rate": 1.2399369117724582e-06, "logits/chosen": -1.703330397605896, "logits/rejected": -0.9837586283683777, "logps/chosen": -662.8977661132812, "logps/rejected": -1302.231201171875, "loss": 0.0872, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17935553193092346, "rewards/margins": 0.30029359459877014, "rewards/rejected": -0.4796491265296936, "step": 2630 }, { "epoch": 0.7, "learning_rate": 1.2198928378235717e-06, "logits/chosen": -1.6038057804107666, "logits/rejected": -1.2413532733917236, "logps/chosen": -518.2135009765625, "logps/rejected": -1082.8677978515625, "loss": 0.1043, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16320912539958954, "rewards/margins": 0.2511638402938843, "rewards/rejected": -0.414372980594635, "step": 2640 }, { "epoch": 0.71, "learning_rate": 1.1999596801769617e-06, "logits/chosen": -1.6547168493270874, "logits/rejected": -1.0749003887176514, "logps/chosen": -674.54541015625, "logps/rejected": -1409.031982421875, "loss": 0.0616, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18622934818267822, "rewards/margins": 0.3446107506752014, "rewards/rejected": -0.5308400988578796, "step": 2650 }, { "epoch": 0.71, "learning_rate": 1.1801391659631423e-06, "logits/chosen": -1.5979634523391724, "logits/rejected": -1.0661733150482178, "logps/chosen": -460.7833557128906, "logps/rejected": -1178.478759765625, "loss": 0.055, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1438596248626709, "rewards/margins": 0.30475491285324097, "rewards/rejected": -0.4486145079135895, "step": 2660 }, { "epoch": 0.71, "learning_rate": 1.160433012552508e-06, "logits/chosen": -1.8192787170410156, "logits/rejected": -1.0415807962417603, "logps/chosen": -610.6654052734375, "logps/rejected": -1276.691162109375, "loss": 0.0536, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15597601234912872, "rewards/margins": 0.3177304267883301, "rewards/rejected": -0.4737063944339752, "step": 2670 }, { "epoch": 0.71, "learning_rate": 1.1408429274065418e-06, "logits/chosen": -1.4305180311203003, "logits/rejected": -1.164957880973816, "logps/chosen": -624.7916870117188, "logps/rejected": -1304.414306640625, "loss": 0.0804, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18183110654354095, "rewards/margins": 0.29839175939559937, "rewards/rejected": -0.4802228808403015, "step": 2680 }, { "epoch": 0.72, "learning_rate": 1.1213706079298566e-06, "logits/chosen": -1.6688525676727295, "logits/rejected": -1.1245152950286865, "logps/chosen": -636.1717529296875, "logps/rejected": -1271.97802734375, "loss": 0.0642, "rewards/accuracies": 0.875, "rewards/chosen": -0.19803071022033691, "rewards/margins": 0.29812103509902954, "rewards/rejected": -0.49615174531936646, "step": 2690 }, { "epoch": 0.72, "learning_rate": 1.1020177413231334e-06, "logits/chosen": -1.4946445226669312, "logits/rejected": -0.8901990056037903, "logps/chosen": -699.8790893554688, "logps/rejected": -1338.4324951171875, "loss": 0.0797, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2173963338136673, "rewards/margins": 0.31975775957107544, "rewards/rejected": -0.5371540784835815, "step": 2700 }, { "epoch": 0.72, "learning_rate": 1.0827860044369226e-06, "logits/chosen": -1.8231594562530518, "logits/rejected": -0.8806201815605164, "logps/chosen": -766.8648071289062, "logps/rejected": -1401.1787109375, "loss": 0.05, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21393120288848877, "rewards/margins": 0.3332614004611969, "rewards/rejected": -0.5471926331520081, "step": 2710 }, { "epoch": 0.73, "learning_rate": 1.06367706362636e-06, "logits/chosen": -1.6795213222503662, "logits/rejected": -1.0750644207000732, "logps/chosen": -631.3731689453125, "logps/rejected": -1225.7191162109375, "loss": 0.0755, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18206752836704254, "rewards/margins": 0.2682720720767975, "rewards/rejected": -0.45033949613571167, "step": 2720 }, { "epoch": 0.73, "learning_rate": 1.0446925746067768e-06, "logits/chosen": -1.4441897869110107, "logits/rejected": -0.9866140484809875, "logps/chosen": -579.9114379882812, "logps/rejected": -1187.9873046875, "loss": 0.0804, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16464915871620178, "rewards/margins": 0.2846793234348297, "rewards/rejected": -0.4493285119533539, "step": 2730 }, { "epoch": 0.73, "learning_rate": 1.0258341823102418e-06, "logits/chosen": -1.6576883792877197, "logits/rejected": -1.0028798580169678, "logps/chosen": -635.6063232421875, "logps/rejected": -1112.1954345703125, "loss": 0.0729, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16143657267093658, "rewards/margins": 0.22869448363780975, "rewards/rejected": -0.39013105630874634, "step": 2740 }, { "epoch": 0.73, "learning_rate": 1.0071035207430352e-06, "logits/chosen": -1.6806875467300415, "logits/rejected": -0.9319907426834106, "logps/chosen": -598.1236572265625, "logps/rejected": -1265.469970703125, "loss": 0.0684, "rewards/accuracies": 0.875, "rewards/chosen": -0.14364568889141083, "rewards/margins": 0.3332282602787018, "rewards/rejected": -0.4768740236759186, "step": 2750 }, { "epoch": 0.74, "learning_rate": 9.88502212844063e-07, "logits/chosen": -1.6131244897842407, "logits/rejected": -1.3068736791610718, "logps/chosen": -547.2989501953125, "logps/rejected": -1120.500244140625, "loss": 0.0879, "rewards/accuracies": 0.75, "rewards/chosen": -0.1456841677427292, "rewards/margins": 0.27742549777030945, "rewards/rejected": -0.42310968041419983, "step": 2760 }, { "epoch": 0.74, "learning_rate": 9.700318703442437e-07, "logits/chosen": -1.7162357568740845, "logits/rejected": -1.1048786640167236, "logps/chosen": -623.9015502929688, "logps/rejected": -1287.5198974609375, "loss": 0.0766, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15075866878032684, "rewards/margins": 0.31179898977279663, "rewards/rejected": -0.4625576436519623, "step": 2770 }, { "epoch": 0.74, "learning_rate": 9.516940936268504e-07, "logits/chosen": -1.7830989360809326, "logits/rejected": -1.1697231531143188, "logps/chosen": -662.9132080078125, "logps/rejected": -1347.837646484375, "loss": 0.0518, "rewards/accuracies": 0.875, "rewards/chosen": -0.21208259463310242, "rewards/margins": 0.3107925057411194, "rewards/rejected": -0.5228751301765442, "step": 2780 }, { "epoch": 0.74, "learning_rate": 9.334904715888496e-07, "logits/chosen": -1.7367998361587524, "logits/rejected": -0.8941909670829773, "logps/chosen": -727.5262451171875, "logps/rejected": -1364.0263671875, "loss": 0.072, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1890576183795929, "rewards/margins": 0.31730249524116516, "rewards/rejected": -0.5063600540161133, "step": 2790 }, { "epoch": 0.75, "learning_rate": 9.154225815032242e-07, "logits/chosen": -1.7351534366607666, "logits/rejected": -0.9410954713821411, "logps/chosen": -572.9583740234375, "logps/rejected": -1397.9078369140625, "loss": 0.0414, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.14058896899223328, "rewards/margins": 0.37250643968582153, "rewards/rejected": -0.5130953788757324, "step": 2800 }, { "epoch": 0.75, "learning_rate": 8.974919888823164e-07, "logits/chosen": -1.6442314386367798, "logits/rejected": -1.245091438293457, "logps/chosen": -625.5496215820312, "logps/rejected": -1145.61865234375, "loss": 0.1189, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1761692762374878, "rewards/margins": 0.21311041712760925, "rewards/rejected": -0.38927969336509705, "step": 2810 }, { "epoch": 0.75, "learning_rate": 8.797002473421729e-07, "logits/chosen": -1.5787712335586548, "logits/rejected": -1.1393609046936035, "logps/chosen": -564.6102905273438, "logps/rejected": -1168.3687744140625, "loss": 0.0749, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15366871654987335, "rewards/margins": 0.2722090780735016, "rewards/rejected": -0.42587780952453613, "step": 2820 }, { "epoch": 0.75, "learning_rate": 8.620488984679378e-07, "logits/chosen": -1.747091293334961, "logits/rejected": -1.1368491649627686, "logps/chosen": -637.8484497070312, "logps/rejected": -1327.956298828125, "loss": 0.0344, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18866987526416779, "rewards/margins": 0.32480621337890625, "rewards/rejected": -0.5134760737419128, "step": 2830 }, { "epoch": 0.76, "learning_rate": 8.445394716802754e-07, "logits/chosen": -1.7131919860839844, "logits/rejected": -1.1550548076629639, "logps/chosen": -685.2410888671875, "logps/rejected": -1205.682861328125, "loss": 0.097, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20656362175941467, "rewards/margins": 0.2589021325111389, "rewards/rejected": -0.4654656946659088, "step": 2840 }, { "epoch": 0.76, "learning_rate": 8.271734841028553e-07, "logits/chosen": -1.637920618057251, "logits/rejected": -1.3879964351654053, "logps/chosen": -607.6511840820312, "logps/rejected": -1199.6259765625, "loss": 0.0784, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1954393833875656, "rewards/margins": 0.25878021121025085, "rewards/rejected": -0.4542195796966553, "step": 2850 }, { "epoch": 0.76, "learning_rate": 8.099524404308948e-07, "logits/chosen": -1.4263249635696411, "logits/rejected": -1.002600908279419, "logps/chosen": -636.8242797851562, "logps/rejected": -1328.989501953125, "loss": 0.0539, "rewards/accuracies": 0.875, "rewards/chosen": -0.2097894698381424, "rewards/margins": 0.3159825801849365, "rewards/rejected": -0.5257720351219177, "step": 2860 }, { "epoch": 0.77, "learning_rate": 7.928778328007918e-07, "logits/chosen": -1.884615182876587, "logits/rejected": -0.9678564071655273, "logps/chosen": -658.4131469726562, "logps/rejected": -1356.146240234375, "loss": 0.0639, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20153768360614777, "rewards/margins": 0.3334410786628723, "rewards/rejected": -0.5349787473678589, "step": 2870 }, { "epoch": 0.77, "learning_rate": 7.759511406608255e-07, "logits/chosen": -1.3170682191848755, "logits/rejected": -0.7453212141990662, "logps/chosen": -650.6473999023438, "logps/rejected": -1283.6512451171875, "loss": 0.0787, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20364892482757568, "rewards/margins": 0.28083834052085876, "rewards/rejected": -0.48448723554611206, "step": 2880 }, { "epoch": 0.77, "learning_rate": 7.591738306429769e-07, "logits/chosen": -1.6163628101348877, "logits/rejected": -0.9143250584602356, "logps/chosen": -624.8411865234375, "logps/rejected": -1358.2152099609375, "loss": 0.05, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17616188526153564, "rewards/margins": 0.3604372441768646, "rewards/rejected": -0.5365991592407227, "step": 2890 }, { "epoch": 0.77, "learning_rate": 7.425473564358457e-07, "logits/chosen": -1.734678030014038, "logits/rejected": -1.1536650657653809, "logps/chosen": -641.8426513671875, "logps/rejected": -1439.745849609375, "loss": 0.0529, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19275933504104614, "rewards/margins": 0.3525656759738922, "rewards/rejected": -0.5453251004219055, "step": 2900 }, { "epoch": 0.78, "learning_rate": 7.260731586586983e-07, "logits/chosen": -1.6836907863616943, "logits/rejected": -0.9171245694160461, "logps/chosen": -621.8990478515625, "logps/rejected": -1194.991943359375, "loss": 0.0742, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15242193639278412, "rewards/margins": 0.2880454659461975, "rewards/rejected": -0.4404674172401428, "step": 2910 }, { "epoch": 0.78, "learning_rate": 7.097526647366379e-07, "logits/chosen": -1.5863702297210693, "logits/rejected": -1.0138695240020752, "logps/chosen": -717.5474853515625, "logps/rejected": -1396.6239013671875, "loss": 0.0631, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19577325880527496, "rewards/margins": 0.3056796193122864, "rewards/rejected": -0.5014528632164001, "step": 2920 }, { "epoch": 0.78, "learning_rate": 6.935872887769299e-07, "logits/chosen": -1.5059714317321777, "logits/rejected": -0.9991394281387329, "logps/chosen": -704.1346435546875, "logps/rejected": -1267.742919921875, "loss": 0.0853, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2067212611436844, "rewards/margins": 0.26990193128585815, "rewards/rejected": -0.4766232371330261, "step": 2930 }, { "epoch": 0.78, "learning_rate": 6.775784314464717e-07, "logits/chosen": -1.5541185140609741, "logits/rejected": -1.0399705171585083, "logps/chosen": -592.094482421875, "logps/rejected": -1375.5118408203125, "loss": 0.0491, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19767038524150848, "rewards/margins": 0.3308340013027191, "rewards/rejected": -0.5285044312477112, "step": 2940 }, { "epoch": 0.79, "learning_rate": 6.617274798504286e-07, "logits/chosen": -1.4966031312942505, "logits/rejected": -0.8529815673828125, "logps/chosen": -752.8843994140625, "logps/rejected": -1323.464599609375, "loss": 0.0798, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24442438781261444, "rewards/margins": 0.27123716473579407, "rewards/rejected": -0.5156615972518921, "step": 2950 }, { "epoch": 0.79, "learning_rate": 6.460358074120518e-07, "logits/chosen": -1.9015638828277588, "logits/rejected": -1.1131356954574585, "logps/chosen": -598.8160400390625, "logps/rejected": -1302.127197265625, "loss": 0.0637, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15112538635730743, "rewards/margins": 0.34708380699157715, "rewards/rejected": -0.4982091784477234, "step": 2960 }, { "epoch": 0.79, "learning_rate": 6.305047737536707e-07, "logits/chosen": -1.577651858329773, "logits/rejected": -1.0997803211212158, "logps/chosen": -706.3680419921875, "logps/rejected": -1251.1690673828125, "loss": 0.0795, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22735171020030975, "rewards/margins": 0.2633035480976105, "rewards/rejected": -0.4906553328037262, "step": 2970 }, { "epoch": 0.79, "learning_rate": 6.151357245788917e-07, "logits/chosen": -1.5803357362747192, "logits/rejected": -1.0134937763214111, "logps/chosen": -787.1359252929688, "logps/rejected": -1268.341064453125, "loss": 0.0776, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2360551357269287, "rewards/margins": 0.2515987157821655, "rewards/rejected": -0.4876538813114166, "step": 2980 }, { "epoch": 0.8, "learning_rate": 5.999299915559956e-07, "logits/chosen": -1.863478422164917, "logits/rejected": -1.186255693435669, "logps/chosen": -670.4616088867188, "logps/rejected": -1253.962646484375, "loss": 0.0807, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18092487752437592, "rewards/margins": 0.2903370261192322, "rewards/rejected": -0.4712619185447693, "step": 2990 }, { "epoch": 0.8, "learning_rate": 5.848888922025553e-07, "logits/chosen": -1.4436417818069458, "logits/rejected": -1.0620644092559814, "logps/chosen": -752.1005859375, "logps/rejected": -1337.766357421875, "loss": 0.0742, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.252055823802948, "rewards/margins": 0.2509405016899109, "rewards/rejected": -0.5029963254928589, "step": 3000 }, { "epoch": 0.8, "learning_rate": 5.700137297712749e-07, "logits/chosen": -1.4030582904815674, "logits/rejected": -1.0030864477157593, "logps/chosen": -657.2237548828125, "logps/rejected": -1292.746826171875, "loss": 0.0651, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.227573424577713, "rewards/margins": 0.301101952791214, "rewards/rejected": -0.5286754369735718, "step": 3010 }, { "epoch": 0.81, "learning_rate": 5.553057931370729e-07, "logits/chosen": -1.7350914478302002, "logits/rejected": -1.2158474922180176, "logps/chosen": -638.9827270507812, "logps/rejected": -1461.6263427734375, "loss": 0.0521, "rewards/accuracies": 0.875, "rewards/chosen": -0.19272522628307343, "rewards/margins": 0.3386714458465576, "rewards/rejected": -0.5313966274261475, "step": 3020 }, { "epoch": 0.81, "learning_rate": 5.407663566854008e-07, "logits/chosen": -1.6435270309448242, "logits/rejected": -1.0143921375274658, "logps/chosen": -617.9176635742188, "logps/rejected": -1159.302734375, "loss": 0.0887, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19033944606781006, "rewards/margins": 0.2596290409564972, "rewards/rejected": -0.44996848702430725, "step": 3030 }, { "epoch": 0.81, "learning_rate": 5.263966802018275e-07, "logits/chosen": -1.5586180686950684, "logits/rejected": -0.9451137781143188, "logps/chosen": -577.8426513671875, "logps/rejected": -1369.449951171875, "loss": 0.0498, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17990294098854065, "rewards/margins": 0.33183303475379944, "rewards/rejected": -0.5117359757423401, "step": 3040 }, { "epoch": 0.81, "learning_rate": 5.121980087628802e-07, "logits/chosen": -1.7278461456298828, "logits/rejected": -1.0558230876922607, "logps/chosen": -643.5891723632812, "logps/rejected": -1279.1890869140625, "loss": 0.0769, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19444116950035095, "rewards/margins": 0.27763885259628296, "rewards/rejected": -0.4720799922943115, "step": 3050 }, { "epoch": 0.82, "learning_rate": 4.981715726281666e-07, "logits/chosen": -1.3910518884658813, "logits/rejected": -0.9968549609184265, "logps/chosen": -648.2385864257812, "logps/rejected": -1314.0189208984375, "loss": 0.0556, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20168296992778778, "rewards/margins": 0.27340278029441833, "rewards/rejected": -0.4750857949256897, "step": 3060 }, { "epoch": 0.82, "learning_rate": 4.843185871337722e-07, "logits/chosen": -1.7175220251083374, "logits/rejected": -1.0445207357406616, "logps/chosen": -652.8839111328125, "logps/rejected": -1321.618896484375, "loss": 0.0565, "rewards/accuracies": 0.875, "rewards/chosen": -0.16258391737937927, "rewards/margins": 0.3201899528503418, "rewards/rejected": -0.48277387022972107, "step": 3070 }, { "epoch": 0.82, "learning_rate": 4.706402525869633e-07, "logits/chosen": -1.6909739971160889, "logits/rejected": -1.2913880348205566, "logps/chosen": -679.966796875, "logps/rejected": -1319.14794921875, "loss": 0.0825, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1967785656452179, "rewards/margins": 0.2842392921447754, "rewards/rejected": -0.4810178279876709, "step": 3080 }, { "epoch": 0.82, "learning_rate": 4.5713775416217884e-07, "logits/chosen": -1.450941801071167, "logits/rejected": -1.0710570812225342, "logps/chosen": -729.031494140625, "logps/rejected": -1396.8394775390625, "loss": 0.0777, "rewards/accuracies": 0.875, "rewards/chosen": -0.23177771270275116, "rewards/margins": 0.28798457980155945, "rewards/rejected": -0.5197622776031494, "step": 3090 }, { "epoch": 0.83, "learning_rate": 4.438122617983442e-07, "logits/chosen": -1.2658835649490356, "logits/rejected": -0.6509383916854858, "logps/chosen": -678.7276611328125, "logps/rejected": -1277.664794921875, "loss": 0.1047, "rewards/accuracies": 0.875, "rewards/chosen": -0.1934930384159088, "rewards/margins": 0.27904012799263, "rewards/rejected": -0.47253313660621643, "step": 3100 }, { "epoch": 0.83, "learning_rate": 4.3066493009749853e-07, "logits/chosen": -1.8485597372055054, "logits/rejected": -1.3042134046554565, "logps/chosen": -598.8069458007812, "logps/rejected": -1223.86767578125, "loss": 0.0587, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16556212306022644, "rewards/margins": 0.29780706763267517, "rewards/rejected": -0.4633691906929016, "step": 3110 }, { "epoch": 0.83, "learning_rate": 4.1769689822475147e-07, "logits/chosen": -1.780917763710022, "logits/rejected": -1.0629148483276367, "logps/chosen": -598.9677734375, "logps/rejected": -1247.2830810546875, "loss": 0.0715, "rewards/accuracies": 0.875, "rewards/chosen": -0.17943796515464783, "rewards/margins": 0.29289960861206055, "rewards/rejected": -0.472337543964386, "step": 3120 }, { "epoch": 0.83, "learning_rate": 4.049092898095816e-07, "logits/chosen": -1.6170413494110107, "logits/rejected": -1.0848805904388428, "logps/chosen": -687.8775634765625, "logps/rejected": -1320.7880859375, "loss": 0.0641, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16832710802555084, "rewards/margins": 0.3181244730949402, "rewards/rejected": -0.4864516258239746, "step": 3130 }, { "epoch": 0.84, "learning_rate": 3.9230321284847856e-07, "logits/chosen": -1.835370659828186, "logits/rejected": -0.9373771548271179, "logps/chosen": -725.0065307617188, "logps/rejected": -1372.621337890625, "loss": 0.0804, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18989510834217072, "rewards/margins": 0.32323604822158813, "rewards/rejected": -0.5131311416625977, "step": 3140 }, { "epoch": 0.84, "learning_rate": 3.798797596089351e-07, "logits/chosen": -1.5447750091552734, "logits/rejected": -0.8559864163398743, "logps/chosen": -728.7130126953125, "logps/rejected": -1214.0345458984375, "loss": 0.0969, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2228674590587616, "rewards/margins": 0.24048006534576416, "rewards/rejected": -0.46334752440452576, "step": 3150 }, { "epoch": 0.84, "learning_rate": 3.6764000653481263e-07, "logits/chosen": -1.5830670595169067, "logits/rejected": -1.0614241361618042, "logps/chosen": -623.8796997070312, "logps/rejected": -1382.993408203125, "loss": 0.0584, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.17642198503017426, "rewards/margins": 0.3370228409767151, "rewards/rejected": -0.5134447813034058, "step": 3160 }, { "epoch": 0.85, "learning_rate": 3.555850141530659e-07, "logits/chosen": -1.6500043869018555, "logits/rejected": -0.9364662170410156, "logps/chosen": -665.5390014648438, "logps/rejected": -1273.1171875, "loss": 0.054, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14827661216259003, "rewards/margins": 0.31309622526168823, "rewards/rejected": -0.46137291193008423, "step": 3170 }, { "epoch": 0.85, "learning_rate": 3.4371582698185636e-07, "logits/chosen": -1.5176864862442017, "logits/rejected": -1.098508596420288, "logps/chosen": -559.7102661132812, "logps/rejected": -1274.8511962890625, "loss": 0.0613, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16253405809402466, "rewards/margins": 0.30766361951828003, "rewards/rejected": -0.4701976180076599, "step": 3180 }, { "epoch": 0.85, "learning_rate": 3.3203347344004737e-07, "logits/chosen": -1.6515905857086182, "logits/rejected": -1.023119568824768, "logps/chosen": -841.9296875, "logps/rejected": -1383.3951416015625, "loss": 0.0841, "rewards/accuracies": 0.875, "rewards/chosen": -0.2267489731311798, "rewards/margins": 0.2902681827545166, "rewards/rejected": -0.517017126083374, "step": 3190 }, { "epoch": 0.85, "learning_rate": 3.2053896575809426e-07, "logits/chosen": -1.6936334371566772, "logits/rejected": -1.1252939701080322, "logps/chosen": -680.880859375, "logps/rejected": -1214.6029052734375, "loss": 0.0888, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20804938673973083, "rewards/margins": 0.2518993020057678, "rewards/rejected": -0.45994871854782104, "step": 3200 }, { "epoch": 0.86, "learning_rate": 3.092332998903416e-07, "logits/chosen": -1.7081215381622314, "logits/rejected": -0.7063247561454773, "logps/chosen": -839.2431640625, "logps/rejected": -1423.2156982421875, "loss": 0.053, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2406737506389618, "rewards/margins": 0.3139367997646332, "rewards/rejected": -0.5546106100082397, "step": 3210 }, { "epoch": 0.86, "learning_rate": 2.981174554287239e-07, "logits/chosen": -1.6795532703399658, "logits/rejected": -1.0844824314117432, "logps/chosen": -483.9825134277344, "logps/rejected": -1096.248779296875, "loss": 0.0943, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1309995800256729, "rewards/margins": 0.2878738045692444, "rewards/rejected": -0.4188733994960785, "step": 3220 }, { "epoch": 0.86, "learning_rate": 2.871923955178918e-07, "logits/chosen": -1.651178002357483, "logits/rejected": -0.8967401385307312, "logps/chosen": -714.80810546875, "logps/rejected": -1329.046630859375, "loss": 0.0553, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21358323097229004, "rewards/margins": 0.28250735998153687, "rewards/rejected": -0.4960905909538269, "step": 3230 }, { "epoch": 0.86, "learning_rate": 2.764590667717562e-07, "logits/chosen": -1.6797590255737305, "logits/rejected": -1.2603545188903809, "logps/chosen": -557.3433227539062, "logps/rejected": -1234.2099609375, "loss": 0.0775, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14634376764297485, "rewards/margins": 0.3086283206939697, "rewards/rejected": -0.4549720883369446, "step": 3240 }, { "epoch": 0.87, "learning_rate": 2.6591839919146963e-07, "logits/chosen": -1.477281928062439, "logits/rejected": -0.9903861880302429, "logps/chosen": -665.1192626953125, "logps/rejected": -1360.6173095703125, "loss": 0.0868, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20300845801830292, "rewards/margins": 0.2980322539806366, "rewards/rejected": -0.5010407567024231, "step": 3250 }, { "epoch": 0.87, "learning_rate": 2.555713060848433e-07, "logits/chosen": -1.2929660081863403, "logits/rejected": -1.0018800497055054, "logps/chosen": -685.843017578125, "logps/rejected": -1353.837158203125, "loss": 0.0817, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21414248645305634, "rewards/margins": 0.30294984579086304, "rewards/rejected": -0.5170923471450806, "step": 3260 }, { "epoch": 0.87, "learning_rate": 2.454186839872158e-07, "logits/chosen": -1.6706600189208984, "logits/rejected": -1.0511361360549927, "logps/chosen": -549.2362670898438, "logps/rejected": -1177.3765869140625, "loss": 0.0754, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1762518584728241, "rewards/margins": 0.275063693523407, "rewards/rejected": -0.45131558179855347, "step": 3270 }, { "epoch": 0.87, "learning_rate": 2.3546141258376786e-07, "logits/chosen": -1.6843605041503906, "logits/rejected": -1.0689982175827026, "logps/chosen": -718.2612915039062, "logps/rejected": -1315.400634765625, "loss": 0.1008, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20562107861042023, "rewards/margins": 0.2962108850479126, "rewards/rejected": -0.5018320083618164, "step": 3280 }, { "epoch": 0.88, "learning_rate": 2.257003546333042e-07, "logits/chosen": -1.6436039209365845, "logits/rejected": -1.0740540027618408, "logps/chosen": -530.2700805664062, "logps/rejected": -1222.441650390625, "loss": 0.0701, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17040416598320007, "rewards/margins": 0.3154822885990143, "rewards/rejected": -0.48588642477989197, "step": 3290 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.7491300106048584, "logits/rejected": -0.9703881144523621, "logps/chosen": -634.4661865234375, "logps/rejected": -1327.0771484375, "loss": 0.0478, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15670377016067505, "rewards/margins": 0.3363407254219055, "rewards/rejected": -0.49304452538490295, "step": 3300 }, { "epoch": 0.88, "learning_rate": 2.0677024504760752e-07, "logits/chosen": -1.5196778774261475, "logits/rejected": -1.040668249130249, "logps/chosen": -678.7161865234375, "logps/rejected": -1322.600830078125, "loss": 0.0634, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19689127802848816, "rewards/margins": 0.32300588488578796, "rewards/rejected": -0.5198971033096313, "step": 3310 }, { "epoch": 0.89, "learning_rate": 1.9760283363267684e-07, "logits/chosen": -1.5344351530075073, "logits/rejected": -1.066190481185913, "logps/chosen": -577.383056640625, "logps/rejected": -1262.2083740234375, "loss": 0.0653, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14838716387748718, "rewards/margins": 0.31335335969924927, "rewards/rejected": -0.4617405831813812, "step": 3320 }, { "epoch": 0.89, "learning_rate": 1.8863491596921745e-07, "logits/chosen": -1.5965193510055542, "logits/rejected": -1.0383847951889038, "logps/chosen": -608.2281494140625, "logps/rejected": -1189.1614990234375, "loss": 0.0814, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1726662814617157, "rewards/margins": 0.26928216218948364, "rewards/rejected": -0.44194841384887695, "step": 3330 }, { "epoch": 0.89, "learning_rate": 1.798672690923828e-07, "logits/chosen": -1.3265117406845093, "logits/rejected": -0.8752411007881165, "logps/chosen": -684.5750732421875, "logps/rejected": -1294.5103759765625, "loss": 0.0898, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21347585320472717, "rewards/margins": 0.2741771936416626, "rewards/rejected": -0.4876530170440674, "step": 3340 }, { "epoch": 0.89, "learning_rate": 1.713006526846439e-07, "logits/chosen": -1.5635359287261963, "logits/rejected": -1.1060292720794678, "logps/chosen": -639.0496826171875, "logps/rejected": -1266.63720703125, "loss": 0.0836, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2038009911775589, "rewards/margins": 0.26461145281791687, "rewards/rejected": -0.4684123992919922, "step": 3350 }, { "epoch": 0.9, "learning_rate": 1.629358090099639e-07, "logits/chosen": -1.5050264596939087, "logits/rejected": -1.0238497257232666, "logps/chosen": -667.7071533203125, "logps/rejected": -1466.681884765625, "loss": 0.0488, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21334633231163025, "rewards/margins": 0.3355127274990082, "rewards/rejected": -0.5488591194152832, "step": 3360 }, { "epoch": 0.9, "learning_rate": 1.5477346284948292e-07, "logits/chosen": -1.8201059103012085, "logits/rejected": -1.0349600315093994, "logps/chosen": -649.684326171875, "logps/rejected": -1250.92236328125, "loss": 0.053, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1781463921070099, "rewards/margins": 0.3080436587333679, "rewards/rejected": -0.4861900210380554, "step": 3370 }, { "epoch": 0.9, "learning_rate": 1.4681432143872133e-07, "logits/chosen": -1.6552870273590088, "logits/rejected": -1.2580300569534302, "logps/chosen": -656.0045166015625, "logps/rejected": -1372.251953125, "loss": 0.0669, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19065485894680023, "rewards/margins": 0.3371616005897522, "rewards/rejected": -0.5278164744377136, "step": 3380 }, { "epoch": 0.9, "learning_rate": 1.3905907440629752e-07, "logits/chosen": -1.679107666015625, "logits/rejected": -1.0372817516326904, "logps/chosen": -586.6466674804688, "logps/rejected": -1267.5791015625, "loss": 0.0689, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18233391642570496, "rewards/margins": 0.279081255197525, "rewards/rejected": -0.4614151418209076, "step": 3390 }, { "epoch": 0.91, "learning_rate": 1.31508393714177e-07, "logits/chosen": -1.4867794513702393, "logits/rejected": -0.8477977514266968, "logps/chosen": -554.5364990234375, "logps/rejected": -1226.228271484375, "loss": 0.0688, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16267123818397522, "rewards/margins": 0.3219824433326721, "rewards/rejected": -0.4846537113189697, "step": 3400 }, { "epoch": 0.91, "learning_rate": 1.241629335994471e-07, "logits/chosen": -1.8708140850067139, "logits/rejected": -1.0227950811386108, "logps/chosen": -799.6752319335938, "logps/rejected": -1433.6239013671875, "loss": 0.0827, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2366192787885666, "rewards/margins": 0.31682151556015015, "rewards/rejected": -0.5534407496452332, "step": 3410 }, { "epoch": 0.91, "learning_rate": 1.1702333051763271e-07, "logits/chosen": -1.6258529424667358, "logits/rejected": -1.1475646495819092, "logps/chosen": -691.2940063476562, "logps/rejected": -1324.8748779296875, "loss": 0.0872, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20850765705108643, "rewards/margins": 0.28260093927383423, "rewards/rejected": -0.49110865592956543, "step": 3420 }, { "epoch": 0.91, "learning_rate": 1.1009020308754587e-07, "logits/chosen": -1.414668083190918, "logits/rejected": -1.0075281858444214, "logps/chosen": -676.2430419921875, "logps/rejected": -1345.2977294921875, "loss": 0.0937, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22368064522743225, "rewards/margins": 0.27526089549064636, "rewards/rejected": -0.498941570520401, "step": 3430 }, { "epoch": 0.92, "learning_rate": 1.0336415203768962e-07, "logits/chosen": -1.6306703090667725, "logits/rejected": -1.1734403371810913, "logps/chosen": -728.4344482421875, "logps/rejected": -1319.7030029296875, "loss": 0.068, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21984486281871796, "rewards/margins": 0.26837268471717834, "rewards/rejected": -0.4882175326347351, "step": 3440 }, { "epoch": 0.92, "learning_rate": 9.684576015420277e-08, "logits/chosen": -1.685302734375, "logits/rejected": -1.3321112394332886, "logps/chosen": -745.2166748046875, "logps/rejected": -1243.215087890625, "loss": 0.0751, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1889590322971344, "rewards/margins": 0.25796008110046387, "rewards/rejected": -0.44691914319992065, "step": 3450 }, { "epoch": 0.92, "learning_rate": 9.053559223036746e-08, "logits/chosen": -1.6785064935684204, "logits/rejected": -1.05990731716156, "logps/chosen": -662.5062255859375, "logps/rejected": -1327.50390625, "loss": 0.0552, "rewards/accuracies": 0.875, "rewards/chosen": -0.21094217896461487, "rewards/margins": 0.28824371099472046, "rewards/rejected": -0.49918586015701294, "step": 3460 }, { "epoch": 0.93, "learning_rate": 8.44341950176683e-08, "logits/chosen": -1.6093642711639404, "logits/rejected": -0.9099162817001343, "logps/chosen": -605.3573608398438, "logps/rejected": -1294.749755859375, "loss": 0.0693, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16440364718437195, "rewards/margins": 0.3216980993747711, "rewards/rejected": -0.4861017167568207, "step": 3470 }, { "epoch": 0.93, "learning_rate": 7.854209717842231e-08, "logits/chosen": -1.6602537631988525, "logits/rejected": -1.277896761894226, "logps/chosen": -698.0342407226562, "logps/rejected": -1383.7066650390625, "loss": 0.0846, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2171173393726349, "rewards/margins": 0.3016865849494934, "rewards/rejected": -0.5188038945198059, "step": 3480 }, { "epoch": 0.93, "learning_rate": 7.285980923996989e-08, "logits/chosen": -1.7102686166763306, "logits/rejected": -1.154813528060913, "logps/chosen": -705.587158203125, "logps/rejected": -1409.553955078125, "loss": 0.088, "rewards/accuracies": 0.875, "rewards/chosen": -0.23871013522148132, "rewards/margins": 0.29305535554885864, "rewards/rejected": -0.5317655205726624, "step": 3490 }, { "epoch": 0.93, "learning_rate": 6.738782355044048e-08, "logits/chosen": -1.7222769260406494, "logits/rejected": -1.0485639572143555, "logps/chosen": -677.2478637695312, "logps/rejected": -1264.8682861328125, "loss": 0.0694, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21438893675804138, "rewards/margins": 0.27774578332901, "rewards/rejected": -0.4921347498893738, "step": 3500 }, { "epoch": 0.94, "learning_rate": 6.212661423609184e-08, "logits/chosen": -1.6767486333847046, "logits/rejected": -0.995303750038147, "logps/chosen": -749.8323974609375, "logps/rejected": -1487.445068359375, "loss": 0.0432, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.21066558361053467, "rewards/margins": 0.3550676703453064, "rewards/rejected": -0.5657332539558411, "step": 3510 }, { "epoch": 0.94, "learning_rate": 5.707663716023021e-08, "logits/chosen": -1.4784590005874634, "logits/rejected": -0.6975718140602112, "logps/chosen": -666.94189453125, "logps/rejected": -1402.3831787109375, "loss": 0.0401, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20545247197151184, "rewards/margins": 0.34110763669013977, "rewards/rejected": -0.5465600490570068, "step": 3520 }, { "epoch": 0.94, "learning_rate": 5.22383298837098e-08, "logits/chosen": -1.527578353881836, "logits/rejected": -0.8852685689926147, "logps/chosen": -677.0675659179688, "logps/rejected": -1252.460205078125, "loss": 0.0794, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20508520305156708, "rewards/margins": 0.2905604839324951, "rewards/rejected": -0.4956456124782562, "step": 3530 }, { "epoch": 0.94, "learning_rate": 4.761211162702117e-08, "logits/chosen": -1.4756338596343994, "logits/rejected": -0.9907282590866089, "logps/chosen": -628.2028198242188, "logps/rejected": -1525.0224609375, "loss": 0.0322, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21087181568145752, "rewards/margins": 0.3604298532009125, "rewards/rejected": -0.5713016390800476, "step": 3540 }, { "epoch": 0.95, "learning_rate": 4.319838323396691e-08, "logits/chosen": -1.6325428485870361, "logits/rejected": -1.2529757022857666, "logps/chosen": -640.9552612304688, "logps/rejected": -1428.263916015625, "loss": 0.0422, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1934538334608078, "rewards/margins": 0.32024726271629333, "rewards/rejected": -0.5137011408805847, "step": 3550 }, { "epoch": 0.95, "learning_rate": 3.8997527136930004e-08, "logits/chosen": -1.505150556564331, "logits/rejected": -0.8225961923599243, "logps/chosen": -707.0218505859375, "logps/rejected": -1327.9573974609375, "loss": 0.0923, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2203945368528366, "rewards/margins": 0.2966047525405884, "rewards/rejected": -0.5169993042945862, "step": 3560 }, { "epoch": 0.95, "learning_rate": 3.5009907323737826e-08, "logits/chosen": -1.5440739393234253, "logits/rejected": -0.8097125887870789, "logps/chosen": -710.9814453125, "logps/rejected": -1306.008056640625, "loss": 0.0667, "rewards/accuracies": 0.875, "rewards/chosen": -0.21110442280769348, "rewards/margins": 0.3028753995895386, "rewards/rejected": -0.5139797925949097, "step": 3570 }, { "epoch": 0.95, "learning_rate": 3.1235869306123766e-08, "logits/chosen": -1.7210094928741455, "logits/rejected": -1.13853120803833, "logps/chosen": -723.6201782226562, "logps/rejected": -1426.6231689453125, "loss": 0.062, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2129667103290558, "rewards/margins": 0.31347301602363586, "rewards/rejected": -0.5264397263526917, "step": 3580 }, { "epoch": 0.96, "learning_rate": 2.767574008979007e-08, "logits/chosen": -1.492082953453064, "logits/rejected": -0.8750694990158081, "logps/chosen": -754.5972290039062, "logps/rejected": -1386.701904296875, "loss": 0.0783, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.23436252772808075, "rewards/margins": 0.2830619215965271, "rewards/rejected": -0.517424464225769, "step": 3590 }, { "epoch": 0.96, "learning_rate": 2.4329828146074096e-08, "logits/chosen": -1.768261194229126, "logits/rejected": -1.1734797954559326, "logps/chosen": -634.6853637695312, "logps/rejected": -1281.8953857421875, "loss": 0.0754, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19491219520568848, "rewards/margins": 0.29776549339294434, "rewards/rejected": -0.4926777482032776, "step": 3600 }, { "epoch": 0.96, "learning_rate": 2.1198423385220822e-08, "logits/chosen": -1.518048882484436, "logits/rejected": -1.1481419801712036, "logps/chosen": -578.7421875, "logps/rejected": -1115.6094970703125, "loss": 0.1095, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18905951082706451, "rewards/margins": 0.2177170068025589, "rewards/rejected": -0.4067765176296234, "step": 3610 }, { "epoch": 0.97, "learning_rate": 1.82817971312621e-08, "logits/chosen": -1.632956862449646, "logits/rejected": -1.2727057933807373, "logps/chosen": -638.421875, "logps/rejected": -1247.95751953125, "loss": 0.0849, "rewards/accuracies": 0.75, "rewards/chosen": -0.20175893604755402, "rewards/margins": 0.25679153203964233, "rewards/rejected": -0.45855045318603516, "step": 3620 }, { "epoch": 0.97, "learning_rate": 1.5580202098509078e-08, "logits/chosen": -1.6741609573364258, "logits/rejected": -1.1588261127471924, "logps/chosen": -625.3444213867188, "logps/rejected": -1219.2210693359375, "loss": 0.0667, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15933595597743988, "rewards/margins": 0.29343315958976746, "rewards/rejected": -0.4527691900730133, "step": 3630 }, { "epoch": 0.97, "learning_rate": 1.3093872369654148e-08, "logits/chosen": -1.5549055337905884, "logits/rejected": -1.1253769397735596, "logps/chosen": -622.0947265625, "logps/rejected": -1193.876220703125, "loss": 0.1058, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21242782473564148, "rewards/margins": 0.2279575765132904, "rewards/rejected": -0.4403854012489319, "step": 3640 }, { "epoch": 0.97, "learning_rate": 1.0823023375489128e-08, "logits/chosen": -1.463275671005249, "logits/rejected": -0.7773112058639526, "logps/chosen": -759.4178466796875, "logps/rejected": -1502.75732421875, "loss": 0.0479, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.24037644267082214, "rewards/margins": 0.34176188707351685, "rewards/rejected": -0.5821383595466614, "step": 3650 }, { "epoch": 0.98, "learning_rate": 8.767851876239075e-09, "logits/chosen": -1.8317701816558838, "logits/rejected": -0.975128173828125, "logps/chosen": -716.7714233398438, "logps/rejected": -1325.99072265625, "loss": 0.0745, "rewards/accuracies": 0.875, "rewards/chosen": -0.19285240769386292, "rewards/margins": 0.2963181734085083, "rewards/rejected": -0.48917055130004883, "step": 3660 }, { "epoch": 0.98, "learning_rate": 6.9285359445145366e-09, "logits/chosen": -1.6910631656646729, "logits/rejected": -1.0203049182891846, "logps/chosen": -723.9457397460938, "logps/rejected": -1435.2896728515625, "loss": 0.0543, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20491650700569153, "rewards/margins": 0.3260103464126587, "rewards/rejected": -0.5309268832206726, "step": 3670 }, { "epoch": 0.98, "learning_rate": 5.305234949880001e-09, "logits/chosen": -1.5100289583206177, "logits/rejected": -0.9951759576797485, "logps/chosen": -733.7612915039062, "logps/rejected": -1351.5802001953125, "loss": 0.0599, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2286415845155716, "rewards/margins": 0.2934814393520355, "rewards/rejected": -0.5221229791641235, "step": 3680 }, { "epoch": 0.98, "learning_rate": 3.8980895450474455e-09, "logits/chosen": -1.6468843221664429, "logits/rejected": -0.8552689552307129, "logps/chosen": -657.6549072265625, "logps/rejected": -1334.9576416015625, "loss": 0.0688, "rewards/accuracies": 0.875, "rewards/chosen": -0.1897544115781784, "rewards/margins": 0.32274651527404785, "rewards/rejected": -0.5125009417533875, "step": 3690 }, { "epoch": 0.99, "learning_rate": 2.7072216536885855e-09, "logits/chosen": -1.424248218536377, "logits/rejected": -0.8001385927200317, "logps/chosen": -590.4542846679688, "logps/rejected": -1241.249267578125, "loss": 0.0646, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16355466842651367, "rewards/margins": 0.31166428327560425, "rewards/rejected": -0.47521892189979553, "step": 3700 }, { "epoch": 0.99, "learning_rate": 1.7327344598702667e-09, "logits/chosen": -1.6445577144622803, "logits/rejected": -0.9098326563835144, "logps/chosen": -725.5911865234375, "logps/rejected": -1269.808349609375, "loss": 0.0797, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19787754118442535, "rewards/margins": 0.27285638451576233, "rewards/rejected": -0.47073397040367126, "step": 3710 }, { "epoch": 0.99, "learning_rate": 9.747123991141193e-10, "logits/chosen": -1.4280940294265747, "logits/rejected": -0.8096100687980652, "logps/chosen": -702.1490478515625, "logps/rejected": -1242.6480712890625, "loss": 0.0495, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1968768984079361, "rewards/margins": 0.2870542109012604, "rewards/rejected": -0.48393112421035767, "step": 3720 }, { "epoch": 0.99, "learning_rate": 4.332211510807427e-10, "logits/chosen": -1.5423029661178589, "logits/rejected": -1.002629041671753, "logps/chosen": -668.7158813476562, "logps/rejected": -1202.1529541015625, "loss": 0.0746, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20696516335010529, "rewards/margins": 0.26598578691482544, "rewards/rejected": -0.47295087575912476, "step": 3730 }, { "epoch": 1.0, "learning_rate": 1.0830763387897902e-10, "logits/chosen": -1.3063008785247803, "logits/rejected": -1.1998459100723267, "logps/chosen": -585.5635375976562, "logps/rejected": -1489.9176025390625, "loss": 0.0559, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20169933140277863, "rewards/margins": 0.3584030270576477, "rewards/rejected": -0.5601023435592651, "step": 3740 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -1.7910579442977905, "logits/rejected": -1.1715670824050903, "logps/chosen": -708.5948486328125, "logps/rejected": -1328.570556640625, "loss": 0.0773, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21266476809978485, "rewards/margins": 0.2909637689590454, "rewards/rejected": -0.5036285519599915, "step": 3750 }, { "epoch": 1.0, "step": 3750, "total_flos": 0.0, "train_loss": 0.0801518168369929, "train_runtime": 15780.2418, "train_samples_per_second": 0.951, "train_steps_per_second": 0.238 } ], "logging_steps": 10, "max_steps": 3750, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }