|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 500, |
|
"global_step": 1910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.617801047120419e-08, |
|
"logits/chosen": 0.8436492085456848, |
|
"logits/rejected": 1.1560968160629272, |
|
"logps/chosen": -330.2955322265625, |
|
"logps/rejected": -239.8994140625, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.617801047120419e-07, |
|
"logits/chosen": 1.0090492963790894, |
|
"logits/rejected": 1.0627849102020264, |
|
"logps/chosen": -279.4153137207031, |
|
"logps/rejected": -249.27322387695312, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -8.76396952662617e-05, |
|
"rewards/margins": -9.456619591219351e-05, |
|
"rewards/rejected": 6.926496553205652e-06, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.235602094240838e-07, |
|
"logits/chosen": 1.0303412675857544, |
|
"logits/rejected": 1.0532195568084717, |
|
"logps/chosen": -321.72723388671875, |
|
"logps/rejected": -270.56353759765625, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -6.834287341916934e-05, |
|
"rewards/margins": -4.8897858505370095e-05, |
|
"rewards/rejected": -1.9445011275820434e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.853403141361258e-07, |
|
"logits/chosen": 1.002454400062561, |
|
"logits/rejected": 1.06557297706604, |
|
"logps/chosen": -252.0704345703125, |
|
"logps/rejected": -246.32705688476562, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 1.5753510524518788e-05, |
|
"rewards/margins": 5.4146301408763975e-05, |
|
"rewards/rejected": -3.83927981602028e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0471204188481676e-06, |
|
"logits/chosen": 1.0041682720184326, |
|
"logits/rejected": 1.1504443883895874, |
|
"logps/chosen": -235.38217163085938, |
|
"logps/rejected": -230.2617645263672, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": 7.3400560722802766e-06, |
|
"rewards/margins": 2.9947289021947654e-06, |
|
"rewards/rejected": 4.3453355829115026e-06, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3089005235602096e-06, |
|
"logits/chosen": 0.9595837593078613, |
|
"logits/rejected": 1.0130202770233154, |
|
"logps/chosen": -294.26007080078125, |
|
"logps/rejected": -249.2256317138672, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.00017269175441469997, |
|
"rewards/margins": 9.17307916097343e-05, |
|
"rewards/rejected": 8.096096280496567e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5706806282722515e-06, |
|
"logits/chosen": 0.9245076179504395, |
|
"logits/rejected": 1.023485779762268, |
|
"logps/chosen": -242.47689819335938, |
|
"logps/rejected": -230.57373046875, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0002746728132478893, |
|
"rewards/margins": 0.00012865502503700554, |
|
"rewards/rejected": 0.00014601778821088374, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8324607329842933e-06, |
|
"logits/chosen": 0.9357272386550903, |
|
"logits/rejected": 1.0410839319229126, |
|
"logps/chosen": -257.8460388183594, |
|
"logps/rejected": -238.37973022460938, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.00047300319420173764, |
|
"rewards/margins": 0.00019578025967348367, |
|
"rewards/rejected": 0.0002772229490801692, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.094240837696335e-06, |
|
"logits/chosen": 1.0097007751464844, |
|
"logits/rejected": 1.0268934965133667, |
|
"logps/chosen": -263.69903564453125, |
|
"logps/rejected": -256.5643615722656, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0005936628440394998, |
|
"rewards/margins": 0.00022301140415947884, |
|
"rewards/rejected": 0.0003706514835357666, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.356020942408377e-06, |
|
"logits/chosen": 0.9857368469238281, |
|
"logits/rejected": 1.050782561302185, |
|
"logps/chosen": -252.1823272705078, |
|
"logps/rejected": -253.6891326904297, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.000997263239696622, |
|
"rewards/margins": 0.00041304732440039515, |
|
"rewards/rejected": 0.0005842159152962267, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.617801047120419e-06, |
|
"logits/chosen": 1.0416964292526245, |
|
"logits/rejected": 1.0389362573623657, |
|
"logps/chosen": -254.76235961914062, |
|
"logps/rejected": -224.39559936523438, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.0013410584069788456, |
|
"rewards/margins": 0.0005450797034427524, |
|
"rewards/rejected": 0.0007959787035360932, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8795811518324613e-06, |
|
"logits/chosen": 1.0654562711715698, |
|
"logits/rejected": 1.1301515102386475, |
|
"logps/chosen": -294.14031982421875, |
|
"logps/rejected": -258.11077880859375, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.001866974518634379, |
|
"rewards/margins": 0.0006641600048169494, |
|
"rewards/rejected": 0.0012028145138174295, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.141361256544503e-06, |
|
"logits/chosen": 0.9807151556015015, |
|
"logits/rejected": 1.125035285949707, |
|
"logps/chosen": -303.8504943847656, |
|
"logps/rejected": -249.7647705078125, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.002772308187559247, |
|
"rewards/margins": 0.0014164599124342203, |
|
"rewards/rejected": 0.001355848042294383, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.403141361256545e-06, |
|
"logits/chosen": 1.096975564956665, |
|
"logits/rejected": 1.1348248720169067, |
|
"logps/chosen": -278.3834533691406, |
|
"logps/rejected": -245.82968139648438, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.003960819449275732, |
|
"rewards/margins": 0.0022526984103024006, |
|
"rewards/rejected": 0.0017081208061426878, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6649214659685865e-06, |
|
"logits/chosen": 1.0514932870864868, |
|
"logits/rejected": 1.1338948011398315, |
|
"logps/chosen": -275.76031494140625, |
|
"logps/rejected": -258.5254821777344, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.004108738619834185, |
|
"rewards/margins": 0.001808557310141623, |
|
"rewards/rejected": 0.0023001814261078835, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.926701570680629e-06, |
|
"logits/chosen": 0.9971652030944824, |
|
"logits/rejected": 1.0917918682098389, |
|
"logps/chosen": -291.89044189453125, |
|
"logps/rejected": -254.80679321289062, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0049656108021736145, |
|
"rewards/margins": 0.002658768789842725, |
|
"rewards/rejected": 0.0023068420123308897, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.18848167539267e-06, |
|
"logits/chosen": 1.0320146083831787, |
|
"logits/rejected": 1.053504228591919, |
|
"logps/chosen": -285.04559326171875, |
|
"logps/rejected": -244.1322784423828, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.005488889757543802, |
|
"rewards/margins": 0.002789679216220975, |
|
"rewards/rejected": 0.00269921007566154, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.450261780104713e-06, |
|
"logits/chosen": 1.0273762941360474, |
|
"logits/rejected": 1.062558650970459, |
|
"logps/chosen": -287.9652099609375, |
|
"logps/rejected": -232.247314453125, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.007104066200554371, |
|
"rewards/margins": 0.0033009883482009172, |
|
"rewards/rejected": 0.0038030785508453846, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.712041884816754e-06, |
|
"logits/chosen": 1.0532909631729126, |
|
"logits/rejected": 1.1673284769058228, |
|
"logps/chosen": -274.5193786621094, |
|
"logps/rejected": -238.21286010742188, |
|
"loss": 0.499, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.00742004532366991, |
|
"rewards/margins": 0.003918725997209549, |
|
"rewards/rejected": 0.003501318860799074, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9738219895287965e-06, |
|
"logits/chosen": 1.1504939794540405, |
|
"logits/rejected": 1.1638376712799072, |
|
"logps/chosen": -237.76797485351562, |
|
"logps/rejected": -211.50613403320312, |
|
"loss": 0.499, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.007698298431932926, |
|
"rewards/margins": 0.003723274450749159, |
|
"rewards/rejected": 0.00397502351552248, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999661831436499e-06, |
|
"logits/chosen": 1.0712188482284546, |
|
"logits/rejected": 1.0771671533584595, |
|
"logps/chosen": -288.3528747558594, |
|
"logps/rejected": -265.5425109863281, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.008598363026976585, |
|
"rewards/margins": 0.005429488606750965, |
|
"rewards/rejected": 0.003168874653056264, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9984929711403395e-06, |
|
"logits/chosen": 1.1236344575881958, |
|
"logits/rejected": 1.2009334564208984, |
|
"logps/chosen": -254.3011932373047, |
|
"logps/rejected": -224.9448699951172, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.007906198501586914, |
|
"rewards/margins": 0.005368872079998255, |
|
"rewards/rejected": 0.0025373264215886593, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.996489634487865e-06, |
|
"logits/chosen": 1.0867538452148438, |
|
"logits/rejected": 1.2004356384277344, |
|
"logps/chosen": -258.08062744140625, |
|
"logps/rejected": -240.8439483642578, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.007928581908345222, |
|
"rewards/margins": 0.004648840986192226, |
|
"rewards/rejected": 0.003279739525169134, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9936524905772466e-06, |
|
"logits/chosen": 1.0192543268203735, |
|
"logits/rejected": 1.2005066871643066, |
|
"logps/chosen": -274.07037353515625, |
|
"logps/rejected": -256.2618713378906, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.006800153758376837, |
|
"rewards/margins": 0.003369166050106287, |
|
"rewards/rejected": 0.0034309872426092625, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9899824869915e-06, |
|
"logits/chosen": 1.111426830291748, |
|
"logits/rejected": 1.1554086208343506, |
|
"logps/chosen": -243.208984375, |
|
"logps/rejected": -205.7252655029297, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.007739508058875799, |
|
"rewards/margins": 0.0072770556434988976, |
|
"rewards/rejected": 0.0004624520370271057, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985480849482012e-06, |
|
"logits/chosen": 1.1005799770355225, |
|
"logits/rejected": 1.230799913406372, |
|
"logps/chosen": -272.18597412109375, |
|
"logps/rejected": -257.9790954589844, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.005292638670653105, |
|
"rewards/margins": 0.0029095064383000135, |
|
"rewards/rejected": 0.0023831322323530912, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980149081559142e-06, |
|
"logits/chosen": 1.0777183771133423, |
|
"logits/rejected": 1.155970573425293, |
|
"logps/chosen": -294.93328857421875, |
|
"logps/rejected": -261.9263610839844, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.009707033634185791, |
|
"rewards/margins": 0.008258306421339512, |
|
"rewards/rejected": 0.00144872663076967, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9739889639900655e-06, |
|
"logits/chosen": 1.1088669300079346, |
|
"logits/rejected": 1.1434690952301025, |
|
"logps/chosen": -254.5012664794922, |
|
"logps/rejected": -254.6510009765625, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.009705386124551296, |
|
"rewards/margins": 0.009683574549853802, |
|
"rewards/rejected": 2.181164381909184e-05, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967002554204009e-06, |
|
"logits/chosen": 1.0548467636108398, |
|
"logits/rejected": 1.1509649753570557, |
|
"logps/chosen": -245.9481964111328, |
|
"logps/rejected": -229.8827362060547, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.009255246259272099, |
|
"rewards/margins": 0.006528814323246479, |
|
"rewards/rejected": 0.0027264312375336885, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.959192185605089e-06, |
|
"logits/chosen": 1.0842396020889282, |
|
"logits/rejected": 1.1220932006835938, |
|
"logps/chosen": -266.4988708496094, |
|
"logps/rejected": -246.9526824951172, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.009208474308252335, |
|
"rewards/margins": 0.007726150564849377, |
|
"rewards/rejected": 0.0014823225792497396, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.950560466792969e-06, |
|
"logits/chosen": 1.1049131155014038, |
|
"logits/rejected": 1.1441484689712524, |
|
"logps/chosen": -275.13421630859375, |
|
"logps/rejected": -246.1587677001953, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.006010602228343487, |
|
"rewards/margins": 0.00892153661698103, |
|
"rewards/rejected": -0.0029109339229762554, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9411102806916185e-06, |
|
"logits/chosen": 1.021583080291748, |
|
"logits/rejected": 1.047163963317871, |
|
"logps/chosen": -323.06097412109375, |
|
"logps/rejected": -254.7588653564453, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.008619217202067375, |
|
"rewards/margins": 0.012051543220877647, |
|
"rewards/rejected": -0.003432326018810272, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.930844783586424e-06, |
|
"logits/chosen": 1.024611473083496, |
|
"logits/rejected": 1.0655776262283325, |
|
"logps/chosen": -238.3491668701172, |
|
"logps/rejected": -231.0393829345703, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.006629918701946735, |
|
"rewards/margins": 0.010882768779993057, |
|
"rewards/rejected": -0.004252850078046322, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.919767404070033e-06, |
|
"logits/chosen": 1.04720139503479, |
|
"logits/rejected": 1.0630711317062378, |
|
"logps/chosen": -261.62982177734375, |
|
"logps/rejected": -247.97607421875, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0036115895491093397, |
|
"rewards/margins": 0.009240304119884968, |
|
"rewards/rejected": -0.005628715269267559, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.907881841897216e-06, |
|
"logits/chosen": 1.0087223052978516, |
|
"logits/rejected": 1.059715986251831, |
|
"logps/chosen": -314.62408447265625, |
|
"logps/rejected": -248.10879516601562, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.003107100958004594, |
|
"rewards/margins": 0.013965976424515247, |
|
"rewards/rejected": -0.010858876630663872, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.89519206674919e-06, |
|
"logits/chosen": 0.9633463621139526, |
|
"logits/rejected": 1.0100409984588623, |
|
"logps/chosen": -241.84793090820312, |
|
"logps/rejected": -252.7783203125, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0028146414551883936, |
|
"rewards/margins": 0.013054436072707176, |
|
"rewards/rejected": -0.010239794850349426, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.881702316907769e-06, |
|
"logits/chosen": 0.9069837331771851, |
|
"logits/rejected": 1.0270668268203735, |
|
"logps/chosen": -210.9730987548828, |
|
"logps/rejected": -243.6437225341797, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0008282591588795185, |
|
"rewards/margins": 0.010188087821006775, |
|
"rewards/rejected": -0.009359828196465969, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.86741709783982e-06, |
|
"logits/chosen": 0.8630668520927429, |
|
"logits/rejected": 0.9914480447769165, |
|
"logps/chosen": -332.7330627441406, |
|
"logps/rejected": -281.46807861328125, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0035103503614664078, |
|
"rewards/margins": 0.01303508598357439, |
|
"rewards/rejected": -0.009524735622107983, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.852341180692471e-06, |
|
"logits/chosen": 0.9135398864746094, |
|
"logits/rejected": 0.9984884262084961, |
|
"logps/chosen": -284.92620849609375, |
|
"logps/rejected": -252.03970336914062, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.0040648458525538445, |
|
"rewards/margins": 0.0157476756721735, |
|
"rewards/rejected": -0.011682827956974506, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.836479600699579e-06, |
|
"logits/chosen": 0.9406082034111023, |
|
"logits/rejected": 0.9047748446464539, |
|
"logps/chosen": -278.61248779296875, |
|
"logps/rejected": -284.1888732910156, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.006144961342215538, |
|
"rewards/margins": 0.017049867659807205, |
|
"rewards/rejected": -0.010904906317591667, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.819837655500014e-06, |
|
"logits/chosen": 0.8400663137435913, |
|
"logits/rejected": 0.9222391843795776, |
|
"logps/chosen": -230.8615264892578, |
|
"logps/rejected": -221.2638397216797, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0005404525436460972, |
|
"rewards/margins": 0.011931750923395157, |
|
"rewards/rejected": -0.011391298845410347, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.802420903368286e-06, |
|
"logits/chosen": 0.8889272809028625, |
|
"logits/rejected": 0.8912805318832397, |
|
"logps/chosen": -268.0902099609375, |
|
"logps/rejected": -250.4331512451172, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0009850022615864873, |
|
"rewards/margins": 0.010063153691589832, |
|
"rewards/rejected": -0.009078151546418667, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.784235161358124e-06, |
|
"logits/chosen": 0.8787338137626648, |
|
"logits/rejected": 0.9284510612487793, |
|
"logps/chosen": -288.6819152832031, |
|
"logps/rejected": -265.958984375, |
|
"loss": 0.4971, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.0032099136151373386, |
|
"rewards/margins": 0.021029185503721237, |
|
"rewards/rejected": -0.017819274216890335, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.765286503359632e-06, |
|
"logits/chosen": 0.8820232152938843, |
|
"logits/rejected": 0.9475772976875305, |
|
"logps/chosen": -270.6169738769531, |
|
"logps/rejected": -259.78839111328125, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0021267482079565525, |
|
"rewards/margins": 0.019616421312093735, |
|
"rewards/rejected": -0.021743169054389, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.745581258070654e-06, |
|
"logits/chosen": 0.7767919301986694, |
|
"logits/rejected": 0.87933349609375, |
|
"logps/chosen": -254.14315795898438, |
|
"logps/rejected": -252.87222290039062, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0043312786146998405, |
|
"rewards/margins": 0.013770043849945068, |
|
"rewards/rejected": -0.018101321533322334, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.725126006883047e-06, |
|
"logits/chosen": 0.7937654256820679, |
|
"logits/rejected": 0.8364180326461792, |
|
"logps/chosen": -238.3746337890625, |
|
"logps/rejected": -241.1796875, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.00781493354588747, |
|
"rewards/margins": 0.011845615692436695, |
|
"rewards/rejected": -0.019660547375679016, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.70392758168454e-06, |
|
"logits/chosen": 0.7985974550247192, |
|
"logits/rejected": 0.8068701028823853, |
|
"logps/chosen": -345.21343994140625, |
|
"logps/rejected": -304.43817138671875, |
|
"loss": 0.4965, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.007276026997715235, |
|
"rewards/margins": 0.02650422975420952, |
|
"rewards/rejected": -0.033780258148908615, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68199306257695e-06, |
|
"logits/chosen": 0.7760607004165649, |
|
"logits/rejected": 0.773891806602478, |
|
"logps/chosen": -327.35369873046875, |
|
"logps/rejected": -314.1829528808594, |
|
"loss": 0.4961, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.014054256491363049, |
|
"rewards/margins": 0.03367748484015465, |
|
"rewards/rejected": -0.04773174598813057, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.659329775511478e-06, |
|
"logits/chosen": 0.7017660140991211, |
|
"logits/rejected": 0.7137667536735535, |
|
"logps/chosen": -287.37652587890625, |
|
"logps/rejected": -271.36358642578125, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.021392906084656715, |
|
"rewards/margins": 0.025359559804201126, |
|
"rewards/rejected": -0.04675246775150299, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.635945289841902e-06, |
|
"logits/chosen": 0.5314046144485474, |
|
"logits/rejected": 0.5452633500099182, |
|
"logps/chosen": -337.0295104980469, |
|
"logps/rejected": -379.64593505859375, |
|
"loss": 0.4958, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.05135764926671982, |
|
"rewards/margins": 0.04310908168554306, |
|
"rewards/rejected": -0.09446673840284348, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.611847415796476e-06, |
|
"logits/chosen": 0.29375532269477844, |
|
"logits/rejected": 0.2797163724899292, |
|
"logps/chosen": -427.3785095214844, |
|
"logps/rejected": -405.41461181640625, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12762612104415894, |
|
"rewards/margins": 0.03379129245877266, |
|
"rewards/rejected": -0.16141743957996368, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.587044201869378e-06, |
|
"logits/chosen": -0.2227209359407425, |
|
"logits/rejected": -0.20223090052604675, |
|
"logps/chosen": -787.1062622070312, |
|
"logps/rejected": -1045.249267578125, |
|
"loss": 0.4818, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.5222705602645874, |
|
"rewards/margins": 0.26146870851516724, |
|
"rewards/rejected": -0.7837392687797546, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.561543932132574e-06, |
|
"logits/chosen": -0.11980749666690826, |
|
"logits/rejected": -0.12788312137126923, |
|
"logps/chosen": -732.790283203125, |
|
"logps/rejected": -833.4085693359375, |
|
"loss": 0.4873, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.45401230454444885, |
|
"rewards/margins": 0.1505609005689621, |
|
"rewards/rejected": -0.6045731902122498, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.535355123469009e-06, |
|
"logits/chosen": -0.14909827709197998, |
|
"logits/rejected": -0.18795037269592285, |
|
"logps/chosen": -696.719482421875, |
|
"logps/rejected": -1046.4390869140625, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.42250218987464905, |
|
"rewards/margins": 0.3782690167427063, |
|
"rewards/rejected": -0.8007712364196777, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.508486522728037e-06, |
|
"logits/chosen": -0.18408063054084778, |
|
"logits/rejected": -0.14851421117782593, |
|
"logps/chosen": -893.5338134765625, |
|
"logps/rejected": -1111.295654296875, |
|
"loss": 0.4841, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.629914402961731, |
|
"rewards/margins": 0.23182418942451477, |
|
"rewards/rejected": -0.8617385625839233, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.480947103804044e-06, |
|
"logits/chosen": -0.20195765793323517, |
|
"logits/rejected": -0.2249602973461151, |
|
"logps/chosen": -970.3370971679688, |
|
"logps/rejected": -1377.3724365234375, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7055306434631348, |
|
"rewards/margins": 0.46008825302124023, |
|
"rewards/rejected": -1.165618896484375, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.452746064639239e-06, |
|
"logits/chosen": -0.27148136496543884, |
|
"logits/rejected": -0.24398574233055115, |
|
"logps/chosen": -1213.016357421875, |
|
"logps/rejected": -1345.276123046875, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.8822624087333679, |
|
"rewards/margins": 0.20509858429431915, |
|
"rewards/rejected": -1.0873609781265259, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.423892824151617e-06, |
|
"logits/chosen": -0.32366353273391724, |
|
"logits/rejected": -0.3419601321220398, |
|
"logps/chosen": -1556.5191650390625, |
|
"logps/rejected": -1787.370361328125, |
|
"loss": 0.4843, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.2833073139190674, |
|
"rewards/margins": 0.28631919622421265, |
|
"rewards/rejected": -1.5696265697479248, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3943970190891164e-06, |
|
"logits/chosen": -0.23246267437934875, |
|
"logits/rejected": -0.25014322996139526, |
|
"logps/chosen": -1218.4473876953125, |
|
"logps/rejected": -1182.8861083984375, |
|
"loss": 0.4809, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.9385588765144348, |
|
"rewards/margins": 0.02590467967092991, |
|
"rewards/rejected": -0.9644634127616882, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.364268500811025e-06, |
|
"logits/chosen": -0.17416557669639587, |
|
"logits/rejected": -0.17902135848999023, |
|
"logps/chosen": -985.0808715820312, |
|
"logps/rejected": -1348.008056640625, |
|
"loss": 0.4847, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.6908355951309204, |
|
"rewards/margins": 0.41670989990234375, |
|
"rewards/rejected": -1.1075454950332642, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.333517331997704e-06, |
|
"logits/chosen": -0.14922045171260834, |
|
"logits/rejected": -0.19132760167121887, |
|
"logps/chosen": -1128.3173828125, |
|
"logps/rejected": -1489.837158203125, |
|
"loss": 0.4745, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.8316856622695923, |
|
"rewards/margins": 0.4227636754512787, |
|
"rewards/rejected": -1.2544492483139038, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.302153783289737e-06, |
|
"logits/chosen": -0.1274535059928894, |
|
"logits/rejected": -0.17803938686847687, |
|
"logps/chosen": -1048.890625, |
|
"logps/rejected": -1506.1158447265625, |
|
"loss": 0.4743, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7957647442817688, |
|
"rewards/margins": 0.4669608175754547, |
|
"rewards/rejected": -1.2627254724502563, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.270188329857613e-06, |
|
"logits/chosen": -0.14815063774585724, |
|
"logits/rejected": -0.15499570965766907, |
|
"logps/chosen": -1084.8118896484375, |
|
"logps/rejected": -1618.885009765625, |
|
"loss": 0.4711, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7856907844543457, |
|
"rewards/margins": 0.5791957974433899, |
|
"rewards/rejected": -1.3648868799209595, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.237631647903115e-06, |
|
"logits/chosen": -0.024261217564344406, |
|
"logits/rejected": -0.038342759013175964, |
|
"logps/chosen": -723.5900268554688, |
|
"logps/rejected": -1155.1717529296875, |
|
"loss": 0.4678, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.46949324011802673, |
|
"rewards/margins": 0.45854002237319946, |
|
"rewards/rejected": -0.9280332326889038, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.204494611093548e-06, |
|
"logits/chosen": -0.05518772080540657, |
|
"logits/rejected": -0.100825235247612, |
|
"logps/chosen": -1270.6005859375, |
|
"logps/rejected": -1703.8551025390625, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.9448369145393372, |
|
"rewards/margins": 0.4941697120666504, |
|
"rewards/rejected": -1.4390065670013428, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.170788286930024e-06, |
|
"logits/chosen": -0.06449203193187714, |
|
"logits/rejected": -0.1527264416217804, |
|
"logps/chosen": -1250.4991455078125, |
|
"logps/rejected": -1752.0111083984375, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.9863438606262207, |
|
"rewards/margins": 0.5237391591072083, |
|
"rewards/rejected": -1.5100830793380737, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.136523933051005e-06, |
|
"logits/chosen": -0.10980840772390366, |
|
"logits/rejected": -0.13391873240470886, |
|
"logps/chosen": -1053.7823486328125, |
|
"logps/rejected": -1614.2884521484375, |
|
"loss": 0.4762, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.8304306864738464, |
|
"rewards/margins": 0.5787540078163147, |
|
"rewards/rejected": -1.4091846942901611, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.101712993472348e-06, |
|
"logits/chosen": -0.10138118267059326, |
|
"logits/rejected": -0.13220438361167908, |
|
"logps/chosen": -1581.559326171875, |
|
"logps/rejected": -1862.4993896484375, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -1.2885770797729492, |
|
"rewards/margins": 0.32578420639038086, |
|
"rewards/rejected": -1.6143611669540405, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.066367094765091e-06, |
|
"logits/chosen": -0.06212924048304558, |
|
"logits/rejected": -0.09771373122930527, |
|
"logps/chosen": -1470.7352294921875, |
|
"logps/rejected": -1844.652587890625, |
|
"loss": 0.4783, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.204660177230835, |
|
"rewards/margins": 0.3980388641357422, |
|
"rewards/rejected": -1.6026990413665771, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.030498042172277e-06, |
|
"logits/chosen": 0.01754361391067505, |
|
"logits/rejected": -0.048445507884025574, |
|
"logps/chosen": -979.1268310546875, |
|
"logps/rejected": -1244.6566162109375, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.7009618878364563, |
|
"rewards/margins": 0.3003775477409363, |
|
"rewards/rejected": -1.0013394355773926, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.994117815666095e-06, |
|
"logits/chosen": -0.04728760942816734, |
|
"logits/rejected": -0.0919174998998642, |
|
"logps/chosen": -1344.916015625, |
|
"logps/rejected": -1900.5986328125, |
|
"loss": 0.472, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.0581436157226562, |
|
"rewards/margins": 0.5791832208633423, |
|
"rewards/rejected": -1.6373268365859985, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.957238565946672e-06, |
|
"logits/chosen": 0.004687662236392498, |
|
"logits/rejected": -0.06074858829379082, |
|
"logps/chosen": -1193.521240234375, |
|
"logps/rejected": -2065.345947265625, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8945425152778625, |
|
"rewards/margins": 0.8969429731369019, |
|
"rewards/rejected": -1.7914857864379883, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.919872610383831e-06, |
|
"logits/chosen": 0.07505255192518234, |
|
"logits/rejected": -0.015723228454589844, |
|
"logps/chosen": -1065.49365234375, |
|
"logps/rejected": -1707.6328125, |
|
"loss": 0.4739, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.7956100702285767, |
|
"rewards/margins": 0.6891741752624512, |
|
"rewards/rejected": -1.4847842454910278, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.882032428903195e-06, |
|
"logits/chosen": 0.02505052089691162, |
|
"logits/rejected": -0.009700920432806015, |
|
"logps/chosen": -1372.3634033203125, |
|
"logps/rejected": -2129.860595703125, |
|
"loss": 0.4656, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.0954824686050415, |
|
"rewards/margins": 0.7768491506576538, |
|
"rewards/rejected": -1.8723316192626953, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.84373065981799e-06, |
|
"logits/chosen": 0.1249980553984642, |
|
"logits/rejected": 0.04747745767235756, |
|
"logps/chosen": -956.18115234375, |
|
"logps/rejected": -1541.792724609375, |
|
"loss": 0.4693, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6708589792251587, |
|
"rewards/margins": 0.6129963994026184, |
|
"rewards/rejected": -1.2838553190231323, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8049800956079552e-06, |
|
"logits/chosen": 0.23526708781719208, |
|
"logits/rejected": 0.19636312127113342, |
|
"logps/chosen": -1106.01513671875, |
|
"logps/rejected": -1326.5162353515625, |
|
"loss": 0.4752, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.8387149572372437, |
|
"rewards/margins": 0.24964456260204315, |
|
"rewards/rejected": -1.0883597135543823, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.765793678646753e-06, |
|
"logits/chosen": 0.19188269972801208, |
|
"logits/rejected": 0.1782020926475525, |
|
"logps/chosen": -802.7251586914062, |
|
"logps/rejected": -1634.812255859375, |
|
"loss": 0.4647, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5356382727622986, |
|
"rewards/margins": 0.8580523729324341, |
|
"rewards/rejected": -1.3936904668807983, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.726184496879323e-06, |
|
"logits/chosen": 0.14159968495368958, |
|
"logits/rejected": 0.08811040967702866, |
|
"logps/chosen": -1127.4029541015625, |
|
"logps/rejected": -1502.1641845703125, |
|
"loss": 0.4756, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.8773125410079956, |
|
"rewards/margins": 0.3974476158618927, |
|
"rewards/rejected": -1.274760365486145, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.686165779450619e-06, |
|
"logits/chosen": 0.1939581334590912, |
|
"logits/rejected": 0.1522776186466217, |
|
"logps/chosen": -968.0919799804688, |
|
"logps/rejected": -1507.5386962890625, |
|
"loss": 0.4793, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.7149516344070435, |
|
"rewards/margins": 0.5672934055328369, |
|
"rewards/rejected": -1.2822450399398804, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.645750892287178e-06, |
|
"logits/chosen": 0.1306479275226593, |
|
"logits/rejected": 0.05887848883867264, |
|
"logps/chosen": -1289.082275390625, |
|
"logps/rejected": -1864.7164306640625, |
|
"loss": 0.4721, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.9888286590576172, |
|
"rewards/margins": 0.6331573724746704, |
|
"rewards/rejected": -1.6219860315322876, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.604953333633009e-06, |
|
"logits/chosen": 0.205116868019104, |
|
"logits/rejected": 0.15303435921669006, |
|
"logps/chosen": -848.7705078125, |
|
"logps/rejected": -1336.090576171875, |
|
"loss": 0.4708, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5991231799125671, |
|
"rewards/margins": 0.5247097015380859, |
|
"rewards/rejected": -1.1238329410552979, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.56378672954129e-06, |
|
"logits/chosen": 0.22229023277759552, |
|
"logits/rejected": 0.17705193161964417, |
|
"logps/chosen": -1094.6126708984375, |
|
"logps/rejected": -1681.7445068359375, |
|
"loss": 0.469, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.8267385363578796, |
|
"rewards/margins": 0.6382580995559692, |
|
"rewards/rejected": -1.4649966955184937, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5222648293233806e-06, |
|
"logits/chosen": 0.1940724402666092, |
|
"logits/rejected": 0.1474287211894989, |
|
"logps/chosen": -1133.5128173828125, |
|
"logps/rejected": -1901.333984375, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.8616431951522827, |
|
"rewards/margins": 0.8072026968002319, |
|
"rewards/rejected": -1.6688458919525146, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4804015009566573e-06, |
|
"logits/chosen": 0.14867620170116425, |
|
"logits/rejected": 0.050886522978544235, |
|
"logps/chosen": -1169.879638671875, |
|
"logps/rejected": -2415.080078125, |
|
"loss": 0.4639, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9197257161140442, |
|
"rewards/margins": 1.2611197233200073, |
|
"rewards/rejected": -2.180845260620117, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4382107264527244e-06, |
|
"logits/chosen": 0.16670770943164825, |
|
"logits/rejected": 0.11358609050512314, |
|
"logps/chosen": -1215.7694091796875, |
|
"logps/rejected": -1938.170654296875, |
|
"loss": 0.4701, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.9481611251831055, |
|
"rewards/margins": 0.7456313967704773, |
|
"rewards/rejected": -1.6937923431396484, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.3957065971875387e-06, |
|
"logits/chosen": 0.24467554688453674, |
|
"logits/rejected": 0.1815129816532135, |
|
"logps/chosen": -1700.726806640625, |
|
"logps/rejected": -2238.2724609375, |
|
"loss": 0.4738, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -1.4334746599197388, |
|
"rewards/margins": 0.5637392997741699, |
|
"rewards/rejected": -1.9972139596939087, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.352903309194999e-06, |
|
"logits/chosen": 0.25681573152542114, |
|
"logits/rejected": 0.22445912659168243, |
|
"logps/chosen": -1175.2008056640625, |
|
"logps/rejected": -1852.9886474609375, |
|
"loss": 0.476, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9056652784347534, |
|
"rewards/margins": 0.6970826387405396, |
|
"rewards/rejected": -1.6027476787567139, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.309815158425591e-06, |
|
"logits/chosen": 0.35658639669418335, |
|
"logits/rejected": 0.23468701541423798, |
|
"logps/chosen": -1126.1968994140625, |
|
"logps/rejected": -1490.5289306640625, |
|
"loss": 0.4765, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.8638699650764465, |
|
"rewards/margins": 0.4017399847507477, |
|
"rewards/rejected": -1.265609860420227, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.266456535971654e-06, |
|
"logits/chosen": 0.29603832960128784, |
|
"logits/rejected": 0.2804957330226898, |
|
"logps/chosen": -1391.908447265625, |
|
"logps/rejected": -1630.26220703125, |
|
"loss": 0.4842, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.103570580482483, |
|
"rewards/margins": 0.2950761914253235, |
|
"rewards/rejected": -1.3986468315124512, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2228419232608692e-06, |
|
"logits/chosen": 0.25324004888534546, |
|
"logits/rejected": 0.19424840807914734, |
|
"logps/chosen": -1254.45947265625, |
|
"logps/rejected": -1625.465087890625, |
|
"loss": 0.483, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -1.0103174448013306, |
|
"rewards/margins": 0.3879779279232025, |
|
"rewards/rejected": -1.3982954025268555, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1789858872195888e-06, |
|
"logits/chosen": 0.35612553358078003, |
|
"logits/rejected": 0.2640685737133026, |
|
"logps/chosen": -1018.1083984375, |
|
"logps/rejected": -1447.966796875, |
|
"loss": 0.4713, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.7736637592315674, |
|
"rewards/margins": 0.4553070068359375, |
|
"rewards/rejected": -1.2289707660675049, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1349030754075945e-06, |
|
"logits/chosen": 0.32709187269210815, |
|
"logits/rejected": 0.27523329854011536, |
|
"logps/chosen": -996.7443237304688, |
|
"logps/rejected": -1309.497802734375, |
|
"loss": 0.4674, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.6882850527763367, |
|
"rewards/margins": 0.3720100224018097, |
|
"rewards/rejected": -1.0602951049804688, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0906082111259313e-06, |
|
"logits/chosen": 0.28385213017463684, |
|
"logits/rejected": 0.26248598098754883, |
|
"logps/chosen": -1238.9512939453125, |
|
"logps/rejected": -1446.0545654296875, |
|
"loss": 0.4729, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.9621122479438782, |
|
"rewards/margins": 0.24497418105602264, |
|
"rewards/rejected": -1.207086443901062, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.046116088499449e-06, |
|
"logits/chosen": 0.20961081981658936, |
|
"logits/rejected": 0.12288858741521835, |
|
"logps/chosen": -1385.43359375, |
|
"logps/rejected": -2388.202392578125, |
|
"loss": 0.4591, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.1082097291946411, |
|
"rewards/margins": 1.0280786752700806, |
|
"rewards/rejected": -2.1362884044647217, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0014415675356813e-06, |
|
"logits/chosen": 0.2143702507019043, |
|
"logits/rejected": 0.12640917301177979, |
|
"logps/chosen": -1842.924072265625, |
|
"logps/rejected": -2572.03759765625, |
|
"loss": 0.4703, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -1.5414974689483643, |
|
"rewards/margins": 0.7958974838256836, |
|
"rewards/rejected": -2.337394952774048, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9565995691617242e-06, |
|
"logits/chosen": 0.2267983853816986, |
|
"logits/rejected": 0.19906947016716003, |
|
"logps/chosen": -1659.0390625, |
|
"logps/rejected": -1897.612548828125, |
|
"loss": 0.4796, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -1.4338902235031128, |
|
"rewards/margins": 0.23841390013694763, |
|
"rewards/rejected": -1.672304391860962, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9116050702407706e-06, |
|
"logits/chosen": 0.2076607495546341, |
|
"logits/rejected": 0.15953665971755981, |
|
"logps/chosen": -1761.4957275390625, |
|
"logps/rejected": -2119.157470703125, |
|
"loss": 0.4733, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -1.521126627922058, |
|
"rewards/margins": 0.37375563383102417, |
|
"rewards/rejected": -1.8948824405670166, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8664730985699537e-06, |
|
"logits/chosen": 0.2155609130859375, |
|
"logits/rejected": 0.15363694727420807, |
|
"logps/chosen": -1374.277587890625, |
|
"logps/rejected": -2335.11962890625, |
|
"loss": 0.4691, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -1.131838083267212, |
|
"rewards/margins": 0.9804404973983765, |
|
"rewards/rejected": -2.112278461456299, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8212187278611907e-06, |
|
"logits/chosen": 0.3766547739505768, |
|
"logits/rejected": 0.23996052145957947, |
|
"logps/chosen": -978.6238403320312, |
|
"logps/rejected": -1637.2352294921875, |
|
"loss": 0.4666, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.6910273432731628, |
|
"rewards/margins": 0.7083319425582886, |
|
"rewards/rejected": -1.3993593454360962, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7758570727066843e-06, |
|
"logits/chosen": 0.3515971899032593, |
|
"logits/rejected": 0.2718420922756195, |
|
"logps/chosen": -945.19921875, |
|
"logps/rejected": -1549.3182373046875, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.6832455396652222, |
|
"rewards/margins": 0.6422259211540222, |
|
"rewards/rejected": -1.3254715204238892, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.730403283530767e-06, |
|
"logits/chosen": 0.3331068158149719, |
|
"logits/rejected": 0.21990351378917694, |
|
"logps/chosen": -957.8298950195312, |
|
"logps/rejected": -1847.413330078125, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6945260167121887, |
|
"rewards/margins": 0.9040031433105469, |
|
"rewards/rejected": -1.5985292196273804, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6848725415297888e-06, |
|
"logits/chosen": 0.24949748814105988, |
|
"logits/rejected": 0.1596693992614746, |
|
"logps/chosen": -1084.4876708984375, |
|
"logps/rejected": -1898.144287109375, |
|
"loss": 0.4618, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8169393539428711, |
|
"rewards/margins": 0.8545991778373718, |
|
"rewards/rejected": -1.6715381145477295, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.639280053601719e-06, |
|
"logits/chosen": 0.22901049256324768, |
|
"logits/rejected": 0.1595744788646698, |
|
"logps/chosen": -1491.752197265625, |
|
"logps/rejected": -2144.299560546875, |
|
"loss": 0.4707, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.2246992588043213, |
|
"rewards/margins": 0.6539346575737, |
|
"rewards/rejected": -1.8786340951919556, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.59364104726716e-06, |
|
"logits/chosen": 0.31597059965133667, |
|
"logits/rejected": 0.21497178077697754, |
|
"logps/chosen": -1171.93212890625, |
|
"logps/rejected": -1925.6861572265625, |
|
"loss": 0.465, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.8903388977050781, |
|
"rewards/margins": 0.8044350743293762, |
|
"rewards/rejected": -1.6947739124298096, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.547970765583491e-06, |
|
"logits/chosen": 0.35459914803504944, |
|
"logits/rejected": 0.21209494769573212, |
|
"logps/chosen": -1010.3555908203125, |
|
"logps/rejected": -1694.515869140625, |
|
"loss": 0.4642, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.7338335514068604, |
|
"rewards/margins": 0.7184348106384277, |
|
"rewards/rejected": -1.452268362045288, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.502284462053799e-06, |
|
"logits/chosen": 0.2834840416908264, |
|
"logits/rejected": 0.19832350313663483, |
|
"logps/chosen": -1069.5567626953125, |
|
"logps/rejected": -1713.046142578125, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.781063437461853, |
|
"rewards/margins": 0.6842104196548462, |
|
"rewards/rejected": -1.4652738571166992, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.456597395532338e-06, |
|
"logits/chosen": 0.23369982838630676, |
|
"logits/rejected": 0.15703235566616058, |
|
"logps/chosen": -1476.560546875, |
|
"logps/rejected": -2163.74267578125, |
|
"loss": 0.4708, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.1821922063827515, |
|
"rewards/margins": 0.7186304330825806, |
|
"rewards/rejected": -1.900822639465332, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4109248251281953e-06, |
|
"logits/chosen": 0.2690127491950989, |
|
"logits/rejected": 0.1083533763885498, |
|
"logps/chosen": -1436.783447265625, |
|
"logps/rejected": -2573.591064453125, |
|
"loss": 0.4639, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1651248931884766, |
|
"rewards/margins": 1.1516902446746826, |
|
"rewards/rejected": -2.316815137863159, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.365282005108875e-06, |
|
"logits/chosen": 0.2598133087158203, |
|
"logits/rejected": 0.17415449023246765, |
|
"logps/chosen": -1348.472412109375, |
|
"logps/rejected": -1934.5325927734375, |
|
"loss": 0.4721, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.0647830963134766, |
|
"rewards/margins": 0.6361646056175232, |
|
"rewards/rejected": -1.7009475231170654, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.319684179805491e-06, |
|
"logits/chosen": 0.28293731808662415, |
|
"logits/rejected": 0.16613037884235382, |
|
"logps/chosen": -1299.3868408203125, |
|
"logps/rejected": -2169.75830078125, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.0253424644470215, |
|
"rewards/margins": 0.8982712626457214, |
|
"rewards/rejected": -1.9236137866973877, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2741465785212905e-06, |
|
"logits/chosen": 0.3770299553871155, |
|
"logits/rejected": 0.3206137418746948, |
|
"logps/chosen": -845.8580322265625, |
|
"logps/rejected": -1371.0318603515625, |
|
"loss": 0.4754, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.573067843914032, |
|
"rewards/margins": 0.5587201714515686, |
|
"rewards/rejected": -1.1317881345748901, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2286844104451848e-06, |
|
"logits/chosen": 0.29950836300849915, |
|
"logits/rejected": 0.2572200298309326, |
|
"logps/chosen": -1225.456298828125, |
|
"logps/rejected": -1701.4114990234375, |
|
"loss": 0.4717, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.9399654269218445, |
|
"rewards/margins": 0.5411953926086426, |
|
"rewards/rejected": -1.4811608791351318, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.183312859572008e-06, |
|
"logits/chosen": 0.2056627720594406, |
|
"logits/rejected": 0.13243384659290314, |
|
"logps/chosen": -1311.5948486328125, |
|
"logps/rejected": -2090.031494140625, |
|
"loss": 0.473, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.03976309299469, |
|
"rewards/margins": 0.8429223895072937, |
|
"rewards/rejected": -1.8826854228973389, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1380470796311843e-06, |
|
"logits/chosen": 0.26897698640823364, |
|
"logits/rejected": 0.19322913885116577, |
|
"logps/chosen": -1409.248779296875, |
|
"logps/rejected": -1968.114013671875, |
|
"loss": 0.4624, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1417442560195923, |
|
"rewards/margins": 0.591802716255188, |
|
"rewards/rejected": -1.7335469722747803, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.092902189025507e-06, |
|
"logits/chosen": 0.298466295003891, |
|
"logits/rejected": 0.1567627638578415, |
|
"logps/chosen": -1206.5018310546875, |
|
"logps/rejected": -2206.86767578125, |
|
"loss": 0.4604, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.9475823640823364, |
|
"rewards/margins": 1.0231791734695435, |
|
"rewards/rejected": -1.9707612991333008, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0478932657817105e-06, |
|
"logits/chosen": 0.31211769580841064, |
|
"logits/rejected": 0.1320025771856308, |
|
"logps/chosen": -1475.707275390625, |
|
"logps/rejected": -2485.997802734375, |
|
"loss": 0.4686, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.1953158378601074, |
|
"rewards/margins": 1.0596123933792114, |
|
"rewards/rejected": -2.2549283504486084, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0030353425145376e-06, |
|
"logits/chosen": 0.29154402017593384, |
|
"logits/rejected": 0.20484980940818787, |
|
"logps/chosen": -1307.7490234375, |
|
"logps/rejected": -1891.5804443359375, |
|
"loss": 0.475, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.068193793296814, |
|
"rewards/margins": 0.5871996879577637, |
|
"rewards/rejected": -1.6553936004638672, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.958343401405964e-06, |
|
"logits/chosen": 0.2675972282886505, |
|
"logits/rejected": 0.20726804435253143, |
|
"logps/chosen": -1136.7181396484375, |
|
"logps/rejected": -1507.15234375, |
|
"loss": 0.4705, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.8827505111694336, |
|
"rewards/margins": 0.3945409953594208, |
|
"rewards/rejected": -1.2772915363311768, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9138323692012734e-06, |
|
"logits/chosen": 0.273415207862854, |
|
"logits/rejected": 0.16786028444766998, |
|
"logps/chosen": -1736.076416015625, |
|
"logps/rejected": -2560.149169921875, |
|
"loss": 0.4705, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.4262146949768066, |
|
"rewards/margins": 0.8943548202514648, |
|
"rewards/rejected": -2.3205695152282715, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8695171122236443e-06, |
|
"logits/chosen": 0.20894399285316467, |
|
"logits/rejected": 0.10228965431451797, |
|
"logps/chosen": -1324.954345703125, |
|
"logps/rejected": -2638.982666015625, |
|
"loss": 0.4668, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.0546363592147827, |
|
"rewards/margins": 1.3503773212432861, |
|
"rewards/rejected": -2.4050137996673584, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8254124314089225e-06, |
|
"logits/chosen": 0.3430663049221039, |
|
"logits/rejected": 0.2673262655735016, |
|
"logps/chosen": -861.5838623046875, |
|
"logps/rejected": -1974.1458740234375, |
|
"loss": 0.4518, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5944491624832153, |
|
"rewards/margins": 1.1017727851867676, |
|
"rewards/rejected": -1.696221947669983, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.781533057362221e-06, |
|
"logits/chosen": 0.3156498670578003, |
|
"logits/rejected": 0.185347780585289, |
|
"logps/chosen": -1168.6217041015625, |
|
"logps/rejected": -1924.7115478515625, |
|
"loss": 0.4583, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8821272850036621, |
|
"rewards/margins": 0.8161047101020813, |
|
"rewards/rejected": -1.6982319355010986, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7378936454380277e-06, |
|
"logits/chosen": 0.36333876848220825, |
|
"logits/rejected": 0.26434630155563354, |
|
"logps/chosen": -1027.678466796875, |
|
"logps/rejected": -1634.684814453125, |
|
"loss": 0.4654, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.7384849786758423, |
|
"rewards/margins": 0.6454702615737915, |
|
"rewards/rejected": -1.383955478668213, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6945087708454273e-06, |
|
"logits/chosen": 0.27189189195632935, |
|
"logits/rejected": 0.18399885296821594, |
|
"logps/chosen": -1334.14990234375, |
|
"logps/rejected": -1880.106201171875, |
|
"loss": 0.4767, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -1.090384840965271, |
|
"rewards/margins": 0.5727940797805786, |
|
"rewards/rejected": -1.66317880153656, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.651392923780105e-06, |
|
"logits/chosen": 0.4100673794746399, |
|
"logits/rejected": 0.2657643258571625, |
|
"logps/chosen": -1111.376220703125, |
|
"logps/rejected": -1941.037353515625, |
|
"loss": 0.46, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.8117152452468872, |
|
"rewards/margins": 0.8833802938461304, |
|
"rewards/rejected": -1.695095419883728, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.608560504584737e-06, |
|
"logits/chosen": 0.301455020904541, |
|
"logits/rejected": 0.22863301634788513, |
|
"logps/chosen": -1159.3509521484375, |
|
"logps/rejected": -2089.1953125, |
|
"loss": 0.4631, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.8735660314559937, |
|
"rewards/margins": 0.9659594297409058, |
|
"rewards/rejected": -1.839525580406189, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5660258189393945e-06, |
|
"logits/chosen": 0.19146260619163513, |
|
"logits/rejected": 0.14353962242603302, |
|
"logps/chosen": -1484.316650390625, |
|
"logps/rejected": -2343.659423828125, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2383463382720947, |
|
"rewards/margins": 0.8734383583068848, |
|
"rewards/rejected": -2.1117844581604004, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5238030730835578e-06, |
|
"logits/chosen": 0.31026071310043335, |
|
"logits/rejected": 0.19475135207176208, |
|
"logps/chosen": -1738.6246337890625, |
|
"logps/rejected": -2328.933349609375, |
|
"loss": 0.4693, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.462491750717163, |
|
"rewards/margins": 0.6261566281318665, |
|
"rewards/rejected": -2.0886483192443848, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4819063690713565e-06, |
|
"logits/chosen": 0.26937440037727356, |
|
"logits/rejected": 0.15669001638889313, |
|
"logps/chosen": -1396.046630859375, |
|
"logps/rejected": -2102.262939453125, |
|
"loss": 0.4598, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1547179222106934, |
|
"rewards/margins": 0.7373046278953552, |
|
"rewards/rejected": -1.892022728919983, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4403497000615885e-06, |
|
"logits/chosen": 0.3091123700141907, |
|
"logits/rejected": 0.204463392496109, |
|
"logps/chosen": -1624.702392578125, |
|
"logps/rejected": -2571.47412109375, |
|
"loss": 0.4654, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.3447537422180176, |
|
"rewards/margins": 0.9728642702102661, |
|
"rewards/rejected": -2.3176181316375732, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3991469456441273e-06, |
|
"logits/chosen": 0.31638103723526, |
|
"logits/rejected": 0.23879094421863556, |
|
"logps/chosen": -1413.901123046875, |
|
"logps/rejected": -2330.88330078125, |
|
"loss": 0.4546, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1106030941009521, |
|
"rewards/margins": 0.9651702642440796, |
|
"rewards/rejected": -2.075773239135742, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3583118672042441e-06, |
|
"logits/chosen": 0.274738609790802, |
|
"logits/rejected": 0.18945345282554626, |
|
"logps/chosen": -1652.8541259765625, |
|
"logps/rejected": -2093.89990234375, |
|
"loss": 0.4704, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -1.371697187423706, |
|
"rewards/margins": 0.5045391917228699, |
|
"rewards/rejected": -1.8762363195419312, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3178581033264218e-06, |
|
"logits/chosen": 0.27669447660446167, |
|
"logits/rejected": 0.16615034639835358, |
|
"logps/chosen": -1164.213134765625, |
|
"logps/rejected": -2034.477783203125, |
|
"loss": 0.4566, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9321501851081848, |
|
"rewards/margins": 0.888770580291748, |
|
"rewards/rejected": -1.820920705795288, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2777991652391757e-06, |
|
"logits/chosen": 0.31228479743003845, |
|
"logits/rejected": 0.21845977008342743, |
|
"logps/chosen": -1202.439697265625, |
|
"logps/rejected": -1930.3785400390625, |
|
"loss": 0.4661, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.9665547609329224, |
|
"rewards/margins": 0.7526635527610779, |
|
"rewards/rejected": -1.7192184925079346, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2381484323024178e-06, |
|
"logits/chosen": 0.35927221179008484, |
|
"logits/rejected": 0.2287793606519699, |
|
"logps/chosen": -1124.3046875, |
|
"logps/rejected": -2056.501708984375, |
|
"loss": 0.4623, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.8218294382095337, |
|
"rewards/margins": 0.98065185546875, |
|
"rewards/rejected": -1.8024810552597046, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1989191475388518e-06, |
|
"logits/chosen": 0.3698425889015198, |
|
"logits/rejected": 0.2954414486885071, |
|
"logps/chosen": -1166.0611572265625, |
|
"logps/rejected": -1549.630126953125, |
|
"loss": 0.4695, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8860333561897278, |
|
"rewards/margins": 0.42343559861183167, |
|
"rewards/rejected": -1.3094689846038818, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.160124413210918e-06, |
|
"logits/chosen": 0.35506299138069153, |
|
"logits/rejected": 0.2409767210483551, |
|
"logps/chosen": -1092.040283203125, |
|
"logps/rejected": -1912.9970703125, |
|
"loss": 0.4582, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.802148163318634, |
|
"rewards/margins": 0.8678997755050659, |
|
"rewards/rejected": -1.6700481176376343, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1217771864447396e-06, |
|
"logits/chosen": 0.3243677616119385, |
|
"logits/rejected": 0.17696735262870789, |
|
"logps/chosen": -991.9781494140625, |
|
"logps/rejected": -2300.01806640625, |
|
"loss": 0.4563, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6990408897399902, |
|
"rewards/margins": 1.3517526388168335, |
|
"rewards/rejected": -2.050793409347534, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.08389027490255e-06, |
|
"logits/chosen": 0.27917546033859253, |
|
"logits/rejected": 0.13479743897914886, |
|
"logps/chosen": -1405.369384765625, |
|
"logps/rejected": -2042.785888671875, |
|
"loss": 0.4724, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.150667428970337, |
|
"rewards/margins": 0.6895908713340759, |
|
"rewards/rejected": -1.8402583599090576, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.046476332505036e-06, |
|
"logits/chosen": 0.3343364894390106, |
|
"logits/rejected": 0.23037847876548767, |
|
"logps/chosen": -1098.7138671875, |
|
"logps/rejected": -2268.34130859375, |
|
"loss": 0.463, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.8331190347671509, |
|
"rewards/margins": 1.2149721384048462, |
|
"rewards/rejected": -2.048090934753418, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0095478552050348e-06, |
|
"logits/chosen": 0.26566246151924133, |
|
"logits/rejected": 0.2032911777496338, |
|
"logps/chosen": -956.0791015625, |
|
"logps/rejected": -1707.116943359375, |
|
"loss": 0.4575, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.7228320837020874, |
|
"rewards/margins": 0.797810435295105, |
|
"rewards/rejected": -1.5206425189971924, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.731171768139808e-07, |
|
"logits/chosen": 0.3556608557701111, |
|
"logits/rejected": 0.28849393129348755, |
|
"logps/chosen": -1063.8748779296875, |
|
"logps/rejected": -1503.830322265625, |
|
"loss": 0.4712, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8245790600776672, |
|
"rewards/margins": 0.47296270728111267, |
|
"rewards/rejected": -1.2975417375564575, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.371964648825221e-07, |
|
"logits/chosen": 0.3505791425704956, |
|
"logits/rejected": 0.22841492295265198, |
|
"logps/chosen": -1045.915283203125, |
|
"logps/rejected": -2175.98876953125, |
|
"loss": 0.4619, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7385074496269226, |
|
"rewards/margins": 1.1998847723007202, |
|
"rewards/rejected": -1.9383922815322876, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.017977166366445e-07, |
|
"logits/chosen": 0.2420744001865387, |
|
"logits/rejected": 0.17516903579235077, |
|
"logps/chosen": -1474.8148193359375, |
|
"logps/rejected": -1958.3011474609375, |
|
"loss": 0.4696, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.2239506244659424, |
|
"rewards/margins": 0.5244899988174438, |
|
"rewards/rejected": -1.7484405040740967, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.669327549707096e-07, |
|
"logits/chosen": 0.2893267571926117, |
|
"logits/rejected": 0.18889647722244263, |
|
"logps/chosen": -1455.797607421875, |
|
"logps/rejected": -2097.127685546875, |
|
"loss": 0.467, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1924570798873901, |
|
"rewards/margins": 0.6752533912658691, |
|
"rewards/rejected": -1.8677103519439697, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.326132244986932e-07, |
|
"logits/chosen": 0.24378347396850586, |
|
"logits/rejected": 0.050407588481903076, |
|
"logps/chosen": -1516.9559326171875, |
|
"logps/rejected": -2759.12353515625, |
|
"loss": 0.4595, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.2377126216888428, |
|
"rewards/margins": 1.2976287603378296, |
|
"rewards/rejected": -2.535341262817383, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.988505876649863e-07, |
|
"logits/chosen": 0.2632651925086975, |
|
"logits/rejected": 0.18519091606140137, |
|
"logps/chosen": -1460.3145751953125, |
|
"logps/rejected": -1786.594482421875, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.1815235614776611, |
|
"rewards/margins": 0.385366290807724, |
|
"rewards/rejected": -1.566890001296997, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.656561209160248e-07, |
|
"logits/chosen": 0.2761257290840149, |
|
"logits/rejected": 0.16277745366096497, |
|
"logps/chosen": -1284.623291015625, |
|
"logps/rejected": -2513.677734375, |
|
"loss": 0.4624, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.9854960441589355, |
|
"rewards/margins": 1.282775640487671, |
|
"rewards/rejected": -2.2682716846466064, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.330409109340563e-07, |
|
"logits/chosen": 0.2461864948272705, |
|
"logits/rejected": 0.16639626026153564, |
|
"logps/chosen": -1436.0191650390625, |
|
"logps/rejected": -2242.687744140625, |
|
"loss": 0.4672, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.1464489698410034, |
|
"rewards/margins": 0.8480439186096191, |
|
"rewards/rejected": -1.9944928884506226, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.010158509342682e-07, |
|
"logits/chosen": 0.2478228360414505, |
|
"logits/rejected": 0.1436949521303177, |
|
"logps/chosen": -1367.794921875, |
|
"logps/rejected": -2203.85107421875, |
|
"loss": 0.4721, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.1351174116134644, |
|
"rewards/margins": 0.8538748621940613, |
|
"rewards/rejected": -1.9889923334121704, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.695916370265529e-07, |
|
"logits/chosen": 0.27428361773490906, |
|
"logits/rejected": 0.18057170510292053, |
|
"logps/chosen": -1440.5146484375, |
|
"logps/rejected": -2161.803466796875, |
|
"loss": 0.4628, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.1880239248275757, |
|
"rewards/margins": 0.7238161563873291, |
|
"rewards/rejected": -1.9118402004241943, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.387787646430854e-07, |
|
"logits/chosen": 0.25450989603996277, |
|
"logits/rejected": 0.1020331159234047, |
|
"logps/chosen": -1392.365966796875, |
|
"logps/rejected": -2656.48974609375, |
|
"loss": 0.4575, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.118877649307251, |
|
"rewards/margins": 1.2837135791778564, |
|
"rewards/rejected": -2.4025912284851074, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.085875250329401e-07, |
|
"logits/chosen": 0.3250389099121094, |
|
"logits/rejected": 0.23088189959526062, |
|
"logps/chosen": -1277.065673828125, |
|
"logps/rejected": -2237.5419921875, |
|
"loss": 0.4588, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.98078453540802, |
|
"rewards/margins": 1.0101826190948486, |
|
"rewards/rejected": -1.9909673929214478, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.79028001824894e-07, |
|
"logits/chosen": 0.34990447759628296, |
|
"logits/rejected": 0.1642770618200302, |
|
"logps/chosen": -1346.687744140625, |
|
"logps/rejected": -3187.396484375, |
|
"loss": 0.4642, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0415836572647095, |
|
"rewards/margins": 1.8741792440414429, |
|
"rewards/rejected": -2.9157626628875732, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.501100676595761e-07, |
|
"logits/chosen": 0.2536852955818176, |
|
"logits/rejected": 0.1401246041059494, |
|
"logps/chosen": -1562.163818359375, |
|
"logps/rejected": -2294.75732421875, |
|
"loss": 0.4614, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2489855289459229, |
|
"rewards/margins": 0.7926613092422485, |
|
"rewards/rejected": -2.041646957397461, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.218433808920884e-07, |
|
"logits/chosen": 0.2926151752471924, |
|
"logits/rejected": 0.09962544590234756, |
|
"logps/chosen": -1433.572509765625, |
|
"logps/rejected": -2299.615478515625, |
|
"loss": 0.4524, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.1709363460540771, |
|
"rewards/margins": 0.9143635630607605, |
|
"rewards/rejected": -2.0853002071380615, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.942373823661928e-07, |
|
"logits/chosen": 0.23216836154460907, |
|
"logits/rejected": 0.19754758477210999, |
|
"logps/chosen": -1521.939208984375, |
|
"logps/rejected": -2178.3291015625, |
|
"loss": 0.4698, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.2342188358306885, |
|
"rewards/margins": 0.6809908151626587, |
|
"rewards/rejected": -1.9152095317840576, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.6730129226114363e-07, |
|
"logits/chosen": 0.19226306676864624, |
|
"logits/rejected": 0.13501006364822388, |
|
"logps/chosen": -1532.320068359375, |
|
"logps/rejected": -2355.093505859375, |
|
"loss": 0.4712, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.2820327281951904, |
|
"rewards/margins": 0.8527010679244995, |
|
"rewards/rejected": -2.1347339153289795, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.4104410701222703e-07, |
|
"logits/chosen": 0.15366807579994202, |
|
"logits/rejected": 0.11835174262523651, |
|
"logps/chosen": -1608.6761474609375, |
|
"logps/rejected": -2489.91455078125, |
|
"loss": 0.469, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.338335394859314, |
|
"rewards/margins": 0.8876321911811829, |
|
"rewards/rejected": -2.2259676456451416, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.154745963060197e-07, |
|
"logits/chosen": 0.21381524205207825, |
|
"logits/rejected": 0.0645713359117508, |
|
"logps/chosen": -1354.0247802734375, |
|
"logps/rejected": -2909.98828125, |
|
"loss": 0.4559, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.0845643281936646, |
|
"rewards/margins": 1.571176290512085, |
|
"rewards/rejected": -2.655740737915039, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9060130015138863e-07, |
|
"logits/chosen": 0.25924235582351685, |
|
"logits/rejected": 0.1109732836484909, |
|
"logps/chosen": -1437.39501953125, |
|
"logps/rejected": -2759.584228515625, |
|
"loss": 0.4559, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1647388935089111, |
|
"rewards/margins": 1.3532658815383911, |
|
"rewards/rejected": -2.5180046558380127, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.664325260271953e-07, |
|
"logits/chosen": 0.22887060046195984, |
|
"logits/rejected": 0.09053263813257217, |
|
"logps/chosen": -1473.260009765625, |
|
"logps/rejected": -1995.0269775390625, |
|
"loss": 0.4712, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -1.2130258083343506, |
|
"rewards/margins": 0.5937215089797974, |
|
"rewards/rejected": -1.8067471981048584, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.429763461076677e-07, |
|
"logits/chosen": 0.1899276226758957, |
|
"logits/rejected": 0.12356813251972198, |
|
"logps/chosen": -1743.064453125, |
|
"logps/rejected": -2304.783203125, |
|
"loss": 0.4677, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.4444160461425781, |
|
"rewards/margins": 0.6152055859565735, |
|
"rewards/rejected": -2.059621572494507, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.202405945663556e-07, |
|
"logits/chosen": 0.22914421558380127, |
|
"logits/rejected": 0.09422020614147186, |
|
"logps/chosen": -1509.6998291015625, |
|
"logps/rejected": -2195.837646484375, |
|
"loss": 0.4638, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -1.2225978374481201, |
|
"rewards/margins": 0.7319514155387878, |
|
"rewards/rejected": -1.9545494318008423, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.982328649595856e-07, |
|
"logits/chosen": 0.24722608923912048, |
|
"logits/rejected": 0.10591373592615128, |
|
"logps/chosen": -1233.9052734375, |
|
"logps/rejected": -2268.322509765625, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.9642356634140015, |
|
"rewards/margins": 1.0845736265182495, |
|
"rewards/rejected": -2.04880952835083, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7696050769026954e-07, |
|
"logits/chosen": 0.21008674800395966, |
|
"logits/rejected": 0.05934596806764603, |
|
"logps/chosen": -1442.0106201171875, |
|
"logps/rejected": -2874.48388671875, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2054154872894287, |
|
"rewards/margins": 1.4088830947875977, |
|
"rewards/rejected": -2.6142985820770264, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.564306275529341e-07, |
|
"logits/chosen": 0.18529877066612244, |
|
"logits/rejected": 0.12559422850608826, |
|
"logps/chosen": -1704.0299072265625, |
|
"logps/rejected": -2808.08349609375, |
|
"loss": 0.4591, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.4349021911621094, |
|
"rewards/margins": 1.1065049171447754, |
|
"rewards/rejected": -2.5414071083068848, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3665008136077332e-07, |
|
"logits/chosen": 0.19881121814250946, |
|
"logits/rejected": 0.17202343046665192, |
|
"logps/chosen": -1710.2633056640625, |
|
"logps/rejected": -1984.5556640625, |
|
"loss": 0.473, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.3971118927001953, |
|
"rewards/margins": 0.33112001419067383, |
|
"rewards/rejected": -1.7282320261001587, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1762547565553293e-07, |
|
"logits/chosen": 0.17657816410064697, |
|
"logits/rejected": 0.11265295743942261, |
|
"logps/chosen": -1725.0482177734375, |
|
"logps/rejected": -1982.76953125, |
|
"loss": 0.466, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -1.5021684169769287, |
|
"rewards/margins": 0.2558698058128357, |
|
"rewards/rejected": -1.7580381631851196, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.993631645009747e-07, |
|
"logits/chosen": 0.19522444903850555, |
|
"logits/rejected": 0.058800529688596725, |
|
"logps/chosen": -1578.4208984375, |
|
"logps/rejected": -2554.65185546875, |
|
"loss": 0.4675, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.2845125198364258, |
|
"rewards/margins": 1.006216049194336, |
|
"rewards/rejected": -2.290728807449341, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.818692473606748e-07, |
|
"logits/chosen": 0.2271948605775833, |
|
"logits/rejected": 0.18108686804771423, |
|
"logps/chosen": -1478.1927490234375, |
|
"logps/rejected": -2156.734375, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.2414519786834717, |
|
"rewards/margins": 0.7026728391647339, |
|
"rewards/rejected": -1.9441248178482056, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6514956706084885e-07, |
|
"logits/chosen": 0.23735575377941132, |
|
"logits/rejected": 0.11482490599155426, |
|
"logps/chosen": -1801.324462890625, |
|
"logps/rejected": -2704.887939453125, |
|
"loss": 0.4736, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.552073359489441, |
|
"rewards/margins": 0.8944045901298523, |
|
"rewards/rejected": -2.4464781284332275, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4920970783889737e-07, |
|
"logits/chosen": 0.22280173003673553, |
|
"logits/rejected": 0.11919368803501129, |
|
"logps/chosen": -1566.2589111328125, |
|
"logps/rejected": -2471.8125, |
|
"loss": 0.4684, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2630040645599365, |
|
"rewards/margins": 0.966931164264679, |
|
"rewards/rejected": -2.229935646057129, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.340549934783164e-07, |
|
"logits/chosen": 0.2689998745918274, |
|
"logits/rejected": 0.12245997041463852, |
|
"logps/chosen": -1098.0716552734375, |
|
"logps/rejected": -2332.2578125, |
|
"loss": 0.4659, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.8609301447868347, |
|
"rewards/margins": 1.2441167831420898, |
|
"rewards/rejected": -2.1050469875335693, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.196904855305961e-07, |
|
"logits/chosen": 0.2383730709552765, |
|
"logits/rejected": 0.15037932991981506, |
|
"logps/chosen": -1544.904052734375, |
|
"logps/rejected": -2499.219482421875, |
|
"loss": 0.4561, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.2684749364852905, |
|
"rewards/margins": 0.9886786341667175, |
|
"rewards/rejected": -2.2571537494659424, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0612098162470302e-07, |
|
"logits/chosen": 0.20837631821632385, |
|
"logits/rejected": 0.1260487288236618, |
|
"logps/chosen": -1376.4371337890625, |
|
"logps/rejected": -2311.586669921875, |
|
"loss": 0.4467, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.126542329788208, |
|
"rewards/margins": 0.9472710490226746, |
|
"rewards/rejected": -2.0738134384155273, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.335101386471285e-08, |
|
"logits/chosen": 0.2322504222393036, |
|
"logits/rejected": 0.06627029925584793, |
|
"logps/chosen": -1435.283447265625, |
|
"logps/rejected": -2674.15576171875, |
|
"loss": 0.4715, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.151064157485962, |
|
"rewards/margins": 1.2704349756240845, |
|
"rewards/rejected": -2.421499252319336, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.138484731612273e-08, |
|
"logits/chosen": 0.2155352383852005, |
|
"logits/rejected": 0.12622274458408356, |
|
"logps/chosen": -1182.394287109375, |
|
"logps/rejected": -2245.24462890625, |
|
"loss": 0.4629, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.9620729684829712, |
|
"rewards/margins": 1.0421679019927979, |
|
"rewards/rejected": -2.0042405128479004, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.022647858135501e-08, |
|
"logits/chosen": 0.30309510231018066, |
|
"logits/rejected": 0.18017789721488953, |
|
"logps/chosen": -1599.3291015625, |
|
"logps/rejected": -2475.38720703125, |
|
"loss": 0.465, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.3132779598236084, |
|
"rewards/margins": 0.8995476961135864, |
|
"rewards/rejected": -2.2128255367279053, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.987963446492384e-08, |
|
"logits/chosen": 0.23334476351737976, |
|
"logits/rejected": 0.17126549780368805, |
|
"logps/chosen": -1491.8856201171875, |
|
"logps/rejected": -2064.55078125, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.2208709716796875, |
|
"rewards/margins": 0.6093058586120605, |
|
"rewards/rejected": -1.8301767110824585, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.034777072871394e-08, |
|
"logits/chosen": 0.23894283175468445, |
|
"logits/rejected": 0.16225464642047882, |
|
"logps/chosen": -1209.31494140625, |
|
"logps/rejected": -1923.9974365234375, |
|
"loss": 0.4748, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.9387443661689758, |
|
"rewards/margins": 0.7473281621932983, |
|
"rewards/rejected": -1.6860727071762085, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.163407093778243e-08, |
|
"logits/chosen": 0.30054157972335815, |
|
"logits/rejected": 0.17386284470558167, |
|
"logps/chosen": -1040.991455078125, |
|
"logps/rejected": -2445.29541015625, |
|
"loss": 0.4516, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7577398419380188, |
|
"rewards/margins": 1.4329960346221924, |
|
"rewards/rejected": -2.1907360553741455, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.37414453970758e-08, |
|
"logits/chosen": 0.303236186504364, |
|
"logits/rejected": 0.1971709430217743, |
|
"logps/chosen": -1248.239501953125, |
|
"logps/rejected": -2541.384033203125, |
|
"loss": 0.4512, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9271729588508606, |
|
"rewards/margins": 1.3627898693084717, |
|
"rewards/rejected": -2.2899627685546875, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.6672530179410183e-08, |
|
"logits/chosen": 0.25464674830436707, |
|
"logits/rejected": 0.13462017476558685, |
|
"logps/chosen": -1484.759521484375, |
|
"logps/rejected": -2381.3857421875, |
|
"loss": 0.4582, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2104871273040771, |
|
"rewards/margins": 0.955074667930603, |
|
"rewards/rejected": -2.165562152862549, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.04296862450451e-08, |
|
"logits/chosen": 0.34345191717147827, |
|
"logits/rejected": 0.1766502857208252, |
|
"logps/chosen": -1336.6195068359375, |
|
"logps/rejected": -2531.597412109375, |
|
"loss": 0.4675, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.0585607290267944, |
|
"rewards/margins": 1.2248413562774658, |
|
"rewards/rejected": -2.28340220451355, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.501499865314171e-08, |
|
"logits/chosen": 0.31596893072128296, |
|
"logits/rejected": 0.17752663791179657, |
|
"logps/chosen": -1208.4625244140625, |
|
"logps/rejected": -2460.017578125, |
|
"loss": 0.4534, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.9176605939865112, |
|
"rewards/margins": 1.2910888195037842, |
|
"rewards/rejected": -2.208749294281006, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0430275865371265e-08, |
|
"logits/chosen": 0.30796024203300476, |
|
"logits/rejected": 0.15131710469722748, |
|
"logps/chosen": -1164.2542724609375, |
|
"logps/rejected": -2230.33056640625, |
|
"loss": 0.4555, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8924150466918945, |
|
"rewards/margins": 1.0938717126846313, |
|
"rewards/rejected": -1.9862868785858154, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.677049141901315e-09, |
|
"logits/chosen": 0.26449787616729736, |
|
"logits/rejected": 0.12270595878362656, |
|
"logps/chosen": -1493.645263671875, |
|
"logps/rejected": -2633.17626953125, |
|
"loss": 0.4614, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.2356170415878296, |
|
"rewards/margins": 1.1605656147003174, |
|
"rewards/rejected": -2.3961825370788574, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.756572029968708e-09, |
|
"logits/chosen": 0.23211045563220978, |
|
"logits/rejected": 0.13400281965732574, |
|
"logps/chosen": -1511.829345703125, |
|
"logps/rejected": -2489.31494140625, |
|
"loss": 0.4594, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2328951358795166, |
|
"rewards/margins": 1.0022671222686768, |
|
"rewards/rejected": -2.2351622581481934, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.6698199452053199e-09, |
|
"logits/chosen": 0.19983918964862823, |
|
"logits/rejected": 0.11516892910003662, |
|
"logps/chosen": -1396.664306640625, |
|
"logps/rejected": -2378.28857421875, |
|
"loss": 0.4543, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.1454143524169922, |
|
"rewards/margins": 1.0044304132461548, |
|
"rewards/rejected": -2.1498446464538574, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.1748984585560094e-10, |
|
"logits/chosen": 0.2773471474647522, |
|
"logits/rejected": 0.1175018697977066, |
|
"logps/chosen": -1402.577392578125, |
|
"logps/rejected": -2661.568603515625, |
|
"loss": 0.4649, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.132912039756775, |
|
"rewards/margins": 1.262406587600708, |
|
"rewards/rejected": -2.3953185081481934, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 0.21633613109588623, |
|
"logits/rejected": 0.111175537109375, |
|
"logps/chosen": -1688.2955322265625, |
|
"logps/rejected": -2615.567626953125, |
|
"loss": 0.4707, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -1.4329384565353394, |
|
"rewards/margins": 0.9476302862167358, |
|
"rewards/rejected": -2.380568504333496, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1910, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0024642594821790128, |
|
"train_runtime": 126.2817, |
|
"train_samples_per_second": 484.116, |
|
"train_steps_per_second": 15.125 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 20, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|