|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-08, |
|
"logits/chosen": -2.6577353477478027, |
|
"logits/rejected": -2.043900489807129, |
|
"logps/chosen": -505.98724365234375, |
|
"logps/rejected": -319.40179443359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -2.477166175842285, |
|
"logits/rejected": -2.134406089782715, |
|
"logps/chosen": -285.34820556640625, |
|
"logps/rejected": -191.54904174804688, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.2777777910232544, |
|
"rewards/chosen": 8.868110307957977e-05, |
|
"rewards/margins": -0.00045536039397120476, |
|
"rewards/rejected": 0.0005440415116026998, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -2.361990213394165, |
|
"logits/rejected": -2.128539562225342, |
|
"logps/chosen": -271.50360107421875, |
|
"logps/rejected": -208.84353637695312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": 0.0001849079126259312, |
|
"rewards/margins": -0.0003974610008299351, |
|
"rewards/rejected": 0.0005823688698001206, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"logits/chosen": -2.2791178226470947, |
|
"logits/rejected": -2.2503268718719482, |
|
"logps/chosen": -269.80023193359375, |
|
"logps/rejected": -289.0177307128906, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0028744328301399946, |
|
"rewards/margins": 0.002431499771773815, |
|
"rewards/rejected": 0.00044293305836617947, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -2.5082411766052246, |
|
"logits/rejected": -2.340522050857544, |
|
"logps/chosen": -210.58447265625, |
|
"logps/rejected": -181.58480834960938, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.0015180089976638556, |
|
"rewards/margins": 0.0007624348509125412, |
|
"rewards/rejected": 0.0007555742631666362, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -2.3627030849456787, |
|
"logits/rejected": -2.3390040397644043, |
|
"logps/chosen": -195.58641052246094, |
|
"logps/rejected": -211.3288116455078, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0013842754997313023, |
|
"rewards/margins": 0.0027850826736539602, |
|
"rewards/rejected": -0.0014008075231686234, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -2.494563341140747, |
|
"logits/rejected": -2.3034842014312744, |
|
"logps/chosen": -244.6390838623047, |
|
"logps/rejected": -277.8663635253906, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.009891030378639698, |
|
"rewards/margins": 0.0002678747696336359, |
|
"rewards/rejected": -0.010158904828131199, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"logits/chosen": -2.2501957416534424, |
|
"logits/rejected": -2.2676117420196533, |
|
"logps/chosen": -229.9625244140625, |
|
"logps/rejected": -216.9250946044922, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.011323804967105389, |
|
"rewards/margins": 0.008610954508185387, |
|
"rewards/rejected": -0.0199347585439682, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": -2.401120901107788, |
|
"logits/rejected": -2.3949601650238037, |
|
"logps/chosen": -265.9338684082031, |
|
"logps/rejected": -262.85272216796875, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.012857760302722454, |
|
"rewards/margins": 0.021094320341944695, |
|
"rewards/rejected": -0.033952079713344574, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"logits/chosen": -2.330019474029541, |
|
"logits/rejected": -1.993003487586975, |
|
"logps/chosen": -267.9632873535156, |
|
"logps/rejected": -179.67996215820312, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.025740137323737144, |
|
"rewards/margins": 0.021240444853901863, |
|
"rewards/rejected": -0.04698058217763901, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -2.310814380645752, |
|
"logits/rejected": -2.3143043518066406, |
|
"logps/chosen": -274.63226318359375, |
|
"logps/rejected": -250.58740234375, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.028762299567461014, |
|
"rewards/margins": 0.010499788448214531, |
|
"rewards/rejected": -0.0392620824277401, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4e-06, |
|
"logits/chosen": -2.2598509788513184, |
|
"logits/rejected": -2.14603853225708, |
|
"logps/chosen": -211.3937225341797, |
|
"logps/rejected": -179.83021545410156, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.006855879910290241, |
|
"rewards/margins": 0.036714259535074234, |
|
"rewards/rejected": -0.029858380556106567, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": -2.1621768474578857, |
|
"logits/rejected": -2.172668933868408, |
|
"logps/chosen": -236.74301147460938, |
|
"logps/rejected": -268.79986572265625, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.004106069449335337, |
|
"rewards/margins": 0.048674389719963074, |
|
"rewards/rejected": -0.052780456840991974, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999756310023261e-06, |
|
"logits/chosen": -2.4506375789642334, |
|
"logits/rejected": -2.3234333992004395, |
|
"logps/chosen": -235.06594848632812, |
|
"logps/rejected": -192.08798217773438, |
|
"loss": 0.6771, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.008781708776950836, |
|
"rewards/margins": 0.04695295915007591, |
|
"rewards/rejected": -0.05573466420173645, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997807075247147e-06, |
|
"logits/chosen": -2.3371706008911133, |
|
"logits/rejected": -2.0340487957000732, |
|
"logps/chosen": -201.59564208984375, |
|
"logps/rejected": -182.42376708984375, |
|
"loss": 0.6744, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.10378706455230713, |
|
"rewards/margins": 0.04835032299160957, |
|
"rewards/rejected": -0.1521373838186264, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993910125649561e-06, |
|
"logits/chosen": -2.3194079399108887, |
|
"logits/rejected": -2.2057042121887207, |
|
"logps/chosen": -232.71102905273438, |
|
"logps/rejected": -202.6046600341797, |
|
"loss": 0.6531, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.04620756581425667, |
|
"rewards/margins": 0.10107270628213882, |
|
"rewards/rejected": -0.1472802758216858, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988068499954578e-06, |
|
"logits/chosen": -2.2133073806762695, |
|
"logits/rejected": -2.286154270172119, |
|
"logps/chosen": -283.87872314453125, |
|
"logps/rejected": -295.348388671875, |
|
"loss": 0.6795, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.3122519850730896, |
|
"rewards/margins": 0.053290385752916336, |
|
"rewards/rejected": -0.3655424416065216, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980286753286196e-06, |
|
"logits/chosen": -2.431159019470215, |
|
"logits/rejected": -2.0319712162017822, |
|
"logps/chosen": -291.0899353027344, |
|
"logps/rejected": -253.94198608398438, |
|
"loss": 0.6407, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.2418215274810791, |
|
"rewards/margins": 0.04736508056521416, |
|
"rewards/rejected": -0.2891865670681, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970570953616383e-06, |
|
"logits/chosen": -2.2349159717559814, |
|
"logits/rejected": -2.2275729179382324, |
|
"logps/chosen": -238.7818603515625, |
|
"logps/rejected": -243.41622924804688, |
|
"loss": 0.6578, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.12219883501529694, |
|
"rewards/margins": 0.04631805419921875, |
|
"rewards/rejected": -0.1685168743133545, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.958928677033465e-06, |
|
"logits/chosen": -2.2396342754364014, |
|
"logits/rejected": -2.1675162315368652, |
|
"logps/chosen": -255.7926025390625, |
|
"logps/rejected": -241.1310577392578, |
|
"loss": 0.6489, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.19492614269256592, |
|
"rewards/margins": 0.1306600272655487, |
|
"rewards/rejected": -0.325586199760437, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": -2.2987706661224365, |
|
"logits/rejected": -2.15234375, |
|
"logps/chosen": -283.39788818359375, |
|
"logps/rejected": -306.2623596191406, |
|
"loss": 0.6387, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.29035812616348267, |
|
"rewards/margins": 0.15090219676494598, |
|
"rewards/rejected": -0.44126027822494507, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9299025014463665e-06, |
|
"logits/chosen": -2.449690818786621, |
|
"logits/rejected": -2.0890920162200928, |
|
"logps/chosen": -297.98785400390625, |
|
"logps/rejected": -251.8286895751953, |
|
"loss": 0.6042, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.24480652809143066, |
|
"rewards/margins": 0.36872270703315735, |
|
"rewards/rejected": -0.6135291457176208, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.912541236180779e-06, |
|
"logits/chosen": -2.3042047023773193, |
|
"logits/rejected": -1.8709608316421509, |
|
"logps/chosen": -323.5066833496094, |
|
"logps/rejected": -318.5735168457031, |
|
"loss": 0.6452, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8693976402282715, |
|
"rewards/margins": 0.33919450640678406, |
|
"rewards/rejected": -1.208592176437378, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.893298743830168e-06, |
|
"logits/chosen": -2.284022569656372, |
|
"logits/rejected": -2.3184571266174316, |
|
"logps/chosen": -280.39013671875, |
|
"logps/rejected": -322.4888610839844, |
|
"loss": 0.6498, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.8154991269111633, |
|
"rewards/margins": 0.15407198667526245, |
|
"rewards/rejected": -0.9695711135864258, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8721900291112415e-06, |
|
"logits/chosen": -2.2206528186798096, |
|
"logits/rejected": -2.040665864944458, |
|
"logps/chosen": -288.07427978515625, |
|
"logps/rejected": -295.1524963378906, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.8145530819892883, |
|
"rewards/margins": 0.19842155277729034, |
|
"rewards/rejected": -1.0129746198654175, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -2.190338373184204, |
|
"logits/rejected": -2.0681099891662598, |
|
"logps/chosen": -246.13955688476562, |
|
"logps/rejected": -268.1896667480469, |
|
"loss": 0.6422, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.7617325782775879, |
|
"rewards/margins": 0.19349338114261627, |
|
"rewards/rejected": -0.955225944519043, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits/chosen": -2.2392935752868652, |
|
"logits/rejected": -1.9916995763778687, |
|
"logps/chosen": -346.5711669921875, |
|
"logps/rejected": -366.2030944824219, |
|
"loss": 0.596, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7316231727600098, |
|
"rewards/margins": 0.35942238569259644, |
|
"rewards/rejected": -1.0910453796386719, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7978383481380865e-06, |
|
"logits/chosen": -2.201519012451172, |
|
"logits/rejected": -1.9899609088897705, |
|
"logps/chosen": -315.45465087890625, |
|
"logps/rejected": -312.4045104980469, |
|
"loss": 0.5967, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6710847020149231, |
|
"rewards/margins": 0.3409194350242615, |
|
"rewards/rejected": -1.0120041370391846, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.769443696332272e-06, |
|
"logits/chosen": -1.9985568523406982, |
|
"logits/rejected": -2.0734457969665527, |
|
"logps/chosen": -236.2917022705078, |
|
"logps/rejected": -308.3038024902344, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6992406249046326, |
|
"rewards/margins": 0.36759305000305176, |
|
"rewards/rejected": -1.066833734512329, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7392794005985324e-06, |
|
"logits/chosen": -2.3794713020324707, |
|
"logits/rejected": -2.083773136138916, |
|
"logps/chosen": -338.1396789550781, |
|
"logps/rejected": -312.3577880859375, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6138805747032166, |
|
"rewards/margins": 0.2969156801700592, |
|
"rewards/rejected": -0.9107962846755981, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.707368982147318e-06, |
|
"logits/chosen": -2.196028470993042, |
|
"logits/rejected": -2.1599950790405273, |
|
"logps/chosen": -352.38922119140625, |
|
"logps/rejected": -310.0411376953125, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.6144166588783264, |
|
"rewards/margins": 0.37919124960899353, |
|
"rewards/rejected": -0.9936079978942871, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.673737323763048e-06, |
|
"logits/chosen": -2.0284199714660645, |
|
"logits/rejected": -2.0696732997894287, |
|
"logps/chosen": -299.6443786621094, |
|
"logps/rejected": -365.4646911621094, |
|
"loss": 0.6192, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -1.0700325965881348, |
|
"rewards/margins": 0.24421080946922302, |
|
"rewards/rejected": -1.3142435550689697, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638410650401267e-06, |
|
"logits/chosen": -2.0705060958862305, |
|
"logits/rejected": -1.7289657592773438, |
|
"logps/chosen": -373.0816955566406, |
|
"logps/rejected": -348.92218017578125, |
|
"loss": 0.6091, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.1900722980499268, |
|
"rewards/margins": 0.5464798212051392, |
|
"rewards/rejected": -1.7365522384643555, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.601416508739211e-06, |
|
"logits/chosen": -2.163390636444092, |
|
"logits/rejected": -2.1936163902282715, |
|
"logps/chosen": -382.85137939453125, |
|
"logps/rejected": -361.06494140625, |
|
"loss": 0.5855, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -1.100768804550171, |
|
"rewards/margins": 0.1708061397075653, |
|
"rewards/rejected": -1.2715749740600586, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562783745695738e-06, |
|
"logits/chosen": -2.121084690093994, |
|
"logits/rejected": -1.8607723712921143, |
|
"logps/chosen": -273.9230651855469, |
|
"logps/rejected": -291.5062561035156, |
|
"loss": 0.589, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8008276224136353, |
|
"rewards/margins": 0.5303484201431274, |
|
"rewards/rejected": -1.3311761617660522, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": -2.2258460521698, |
|
"logits/rejected": -1.9319546222686768, |
|
"logps/chosen": -454.8755798339844, |
|
"logps/rejected": -460.62982177734375, |
|
"loss": 0.5902, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.990256905555725, |
|
"rewards/margins": 0.35523930191993713, |
|
"rewards/rejected": -2.345496416091919, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4807241083879774e-06, |
|
"logits/chosen": -2.1903085708618164, |
|
"logits/rejected": -2.0086958408355713, |
|
"logps/chosen": -601.3458862304688, |
|
"logps/rejected": -612.4051513671875, |
|
"loss": 0.5569, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -3.4381797313690186, |
|
"rewards/margins": 0.47519922256469727, |
|
"rewards/rejected": -3.913378953933716, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.437361221760449e-06, |
|
"logits/chosen": -2.0529446601867676, |
|
"logits/rejected": -1.9928582906723022, |
|
"logps/chosen": -529.0535278320312, |
|
"logps/rejected": -547.7728271484375, |
|
"loss": 0.6339, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -3.4125945568084717, |
|
"rewards/margins": 0.22588801383972168, |
|
"rewards/rejected": -3.6384825706481934, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3924876391293915e-06, |
|
"logits/chosen": -1.9475816488265991, |
|
"logits/rejected": -1.9098947048187256, |
|
"logps/chosen": -375.98992919921875, |
|
"logps/rejected": -415.5345153808594, |
|
"loss": 0.5732, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.8701465129852295, |
|
"rewards/margins": 0.3048131763935089, |
|
"rewards/rejected": -2.174959659576416, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.346138351564711e-06, |
|
"logits/chosen": -2.0606818199157715, |
|
"logits/rejected": -1.8960349559783936, |
|
"logps/chosen": -316.76715087890625, |
|
"logps/rejected": -320.61114501953125, |
|
"loss": 0.5587, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2516812086105347, |
|
"rewards/margins": 0.4567112922668457, |
|
"rewards/rejected": -1.7083925008773804, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"logits/chosen": -2.0786519050598145, |
|
"logits/rejected": -2.10847806930542, |
|
"logps/chosen": -361.7239074707031, |
|
"logps/rejected": -400.70941162109375, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.4803402423858643, |
|
"rewards/margins": 0.255945086479187, |
|
"rewards/rejected": -1.7362852096557617, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.249158351283414e-06, |
|
"logits/chosen": -2.1878209114074707, |
|
"logits/rejected": -1.96550714969635, |
|
"logps/chosen": -349.62078857421875, |
|
"logps/rejected": -385.7138366699219, |
|
"loss": 0.6384, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.8981186151504517, |
|
"rewards/margins": 0.3005538880825043, |
|
"rewards/rejected": -1.1986725330352783, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.198603260653792e-06, |
|
"logits/chosen": -2.3381173610687256, |
|
"logits/rejected": -2.1147091388702393, |
|
"logps/chosen": -364.11090087890625, |
|
"logps/rejected": -345.1046447753906, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.9675353169441223, |
|
"rewards/margins": 0.05901508405804634, |
|
"rewards/rejected": -1.0265504121780396, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.146723650296701e-06, |
|
"logits/chosen": -2.1770806312561035, |
|
"logits/rejected": -2.23179292678833, |
|
"logps/chosen": -457.45672607421875, |
|
"logps/rejected": -525.1158447265625, |
|
"loss": 0.6147, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5764873027801514, |
|
"rewards/margins": 0.5498846769332886, |
|
"rewards/rejected": -2.1263718605041504, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.093559974371725e-06, |
|
"logits/chosen": -2.175420045852661, |
|
"logits/rejected": -1.8347841501235962, |
|
"logps/chosen": -312.1004943847656, |
|
"logps/rejected": -321.1251525878906, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.281625509262085, |
|
"rewards/margins": 0.4661393165588379, |
|
"rewards/rejected": -1.7477649450302124, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.039153688314146e-06, |
|
"logits/chosen": -2.1562466621398926, |
|
"logits/rejected": -1.9570480585098267, |
|
"logps/chosen": -400.806396484375, |
|
"logps/rejected": -429.9332580566406, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.3771603107452393, |
|
"rewards/margins": 0.6382063031196594, |
|
"rewards/rejected": -2.015366554260254, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.983547216509254e-06, |
|
"logits/chosen": -2.1951937675476074, |
|
"logits/rejected": -2.1014106273651123, |
|
"logps/chosen": -404.2888488769531, |
|
"logps/rejected": -422.8614807128906, |
|
"loss": 0.5802, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.7574684619903564, |
|
"rewards/margins": 0.48904165625572205, |
|
"rewards/rejected": -2.2465100288391113, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92678391921108e-06, |
|
"logits/chosen": -2.0647380352020264, |
|
"logits/rejected": -1.9699338674545288, |
|
"logps/chosen": -319.4429626464844, |
|
"logps/rejected": -336.6853332519531, |
|
"loss": 0.6099, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -1.5423263311386108, |
|
"rewards/margins": 0.34772855043411255, |
|
"rewards/rejected": -1.8900550603866577, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.868908058731376e-06, |
|
"logits/chosen": -2.0104379653930664, |
|
"logits/rejected": -1.9088401794433594, |
|
"logps/chosen": -430.5732421875, |
|
"logps/rejected": -496.94720458984375, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.6480965614318848, |
|
"rewards/margins": 0.45647579431533813, |
|
"rewards/rejected": -3.1045725345611572, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8099647649251984e-06, |
|
"logits/chosen": -2.1025550365448, |
|
"logits/rejected": -1.8321536779403687, |
|
"logps/chosen": -518.1032104492188, |
|
"logps/rejected": -518.704345703125, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.717057466506958, |
|
"rewards/margins": 0.5123800039291382, |
|
"rewards/rejected": -3.2294373512268066, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -2.1220908164978027, |
|
"logits/rejected": -2.0493388175964355, |
|
"logps/chosen": -415.57196044921875, |
|
"logps/rejected": -471.4869079589844, |
|
"loss": 0.5952, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.2129175662994385, |
|
"rewards/margins": 0.5477197766304016, |
|
"rewards/rejected": -2.7606372833251953, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.689060522675689e-06, |
|
"logits/chosen": -2.2190887928009033, |
|
"logits/rejected": -2.0425148010253906, |
|
"logps/chosen": -435.57763671875, |
|
"logps/rejected": -455.32757568359375, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.938948392868042, |
|
"rewards/margins": 0.5450895428657532, |
|
"rewards/rejected": -2.4840381145477295, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.627193851723577e-06, |
|
"logits/chosen": -2.1195268630981445, |
|
"logits/rejected": -1.9826160669326782, |
|
"logps/chosen": -442.97576904296875, |
|
"logps/rejected": -464.6687927246094, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -2.141632080078125, |
|
"rewards/margins": 0.47049254179000854, |
|
"rewards/rejected": -2.6121246814727783, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.564448228912682e-06, |
|
"logits/chosen": -2.1186394691467285, |
|
"logits/rejected": -2.0721774101257324, |
|
"logps/chosen": -458.518310546875, |
|
"logps/rejected": -493.88653564453125, |
|
"loss": 0.5786, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.1196634769439697, |
|
"rewards/margins": 0.24448445439338684, |
|
"rewards/rejected": -2.364147663116455, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5008725813922383e-06, |
|
"logits/chosen": -2.1211726665496826, |
|
"logits/rejected": -1.8444690704345703, |
|
"logps/chosen": -473.51470947265625, |
|
"logps/rejected": -447.91400146484375, |
|
"loss": 0.5624, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1058106422424316, |
|
"rewards/margins": 0.4687918722629547, |
|
"rewards/rejected": -2.5746026039123535, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.436516483539781e-06, |
|
"logits/chosen": -2.0425033569335938, |
|
"logits/rejected": -1.8863608837127686, |
|
"logps/chosen": -412.14239501953125, |
|
"logps/rejected": -484.99334716796875, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.103408098220825, |
|
"rewards/margins": 0.9393216967582703, |
|
"rewards/rejected": -3.042729616165161, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3714301183045382e-06, |
|
"logits/chosen": -2.1661436557769775, |
|
"logits/rejected": -2.238974094390869, |
|
"logps/chosen": -504.8619689941406, |
|
"logps/rejected": -558.937744140625, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.069854736328125, |
|
"rewards/margins": 0.780713677406311, |
|
"rewards/rejected": -2.8505682945251465, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3056642380762783e-06, |
|
"logits/chosen": -2.04276967048645, |
|
"logits/rejected": -2.0428547859191895, |
|
"logps/chosen": -422.66351318359375, |
|
"logps/rejected": -454.6181640625, |
|
"loss": 0.5758, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.6807305812835693, |
|
"rewards/margins": 0.5961163640022278, |
|
"rewards/rejected": -2.2768468856811523, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2392701251101172e-06, |
|
"logits/chosen": -2.0288186073303223, |
|
"logits/rejected": -1.9108963012695312, |
|
"logps/chosen": -413.84832763671875, |
|
"logps/rejected": -435.688232421875, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.9628708362579346, |
|
"rewards/margins": 0.696241557598114, |
|
"rewards/rejected": -2.6591122150421143, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1722995515381644e-06, |
|
"logits/chosen": -1.9494431018829346, |
|
"logits/rejected": -1.8113425970077515, |
|
"logps/chosen": -407.6769104003906, |
|
"logps/rejected": -408.3319396972656, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.5669306516647339, |
|
"rewards/margins": 0.4425128400325775, |
|
"rewards/rejected": -2.0094432830810547, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"logits/chosen": -2.0704987049102783, |
|
"logits/rejected": -1.9060993194580078, |
|
"logps/chosen": -397.5736389160156, |
|
"logps/rejected": -465.2367248535156, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8127038478851318, |
|
"rewards/margins": 0.5960454344749451, |
|
"rewards/rejected": -2.4087491035461426, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0368383179176584e-06, |
|
"logits/chosen": -1.999671220779419, |
|
"logits/rejected": -1.9255882501602173, |
|
"logps/chosen": -422.7398986816406, |
|
"logps/rejected": -462.8828125, |
|
"loss": 0.537, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8113523721694946, |
|
"rewards/margins": 0.7707425951957703, |
|
"rewards/rejected": -2.58209490776062, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9684532864643123e-06, |
|
"logits/chosen": -2.034332275390625, |
|
"logits/rejected": -1.918892502784729, |
|
"logps/chosen": -454.9341735839844, |
|
"logps/rejected": -488.122314453125, |
|
"loss": 0.5587, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.0255672931671143, |
|
"rewards/margins": 0.639295220375061, |
|
"rewards/rejected": -2.6648623943328857, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8997029692295875e-06, |
|
"logits/chosen": -2.1899733543395996, |
|
"logits/rejected": -2.0211856365203857, |
|
"logps/chosen": -444.3063049316406, |
|
"logps/rejected": -499.0299377441406, |
|
"loss": 0.51, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.7396934032440186, |
|
"rewards/margins": 0.8382138013839722, |
|
"rewards/rejected": -2.577907085418701, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8306409756428067e-06, |
|
"logits/chosen": -2.042299747467041, |
|
"logits/rejected": -1.6256879568099976, |
|
"logps/chosen": -482.6806640625, |
|
"logps/rejected": -522.2861328125, |
|
"loss": 0.525, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8350658416748047, |
|
"rewards/margins": 1.0798089504241943, |
|
"rewards/rejected": -2.9148752689361572, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.761321158169134e-06, |
|
"logits/chosen": -2.126718044281006, |
|
"logits/rejected": -1.8609893321990967, |
|
"logps/chosen": -578.2748413085938, |
|
"logps/rejected": -569.1837768554688, |
|
"loss": 0.5116, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -3.034454822540283, |
|
"rewards/margins": 0.5776246786117554, |
|
"rewards/rejected": -3.612079620361328, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6917975703170466e-06, |
|
"logits/chosen": -1.7727696895599365, |
|
"logits/rejected": -1.48529052734375, |
|
"logps/chosen": -634.0877685546875, |
|
"logps/rejected": -776.0706176757812, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.8410019874572754, |
|
"rewards/margins": 1.3400521278381348, |
|
"rewards/rejected": -5.181053638458252, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6221244244890336e-06, |
|
"logits/chosen": -1.8298507928848267, |
|
"logits/rejected": -1.6536800861358643, |
|
"logps/chosen": -668.0659790039062, |
|
"logps/rejected": -715.1585083007812, |
|
"loss": 0.5195, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -4.25075101852417, |
|
"rewards/margins": 0.8174532055854797, |
|
"rewards/rejected": -5.068203926086426, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5523560497083927e-06, |
|
"logits/chosen": -2.064204692840576, |
|
"logits/rejected": -1.8902099132537842, |
|
"logps/chosen": -402.74334716796875, |
|
"logps/rejected": -417.3500061035156, |
|
"loss": 0.6347, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.059872627258301, |
|
"rewards/margins": 0.30380430817604065, |
|
"rewards/rejected": -2.3636770248413086, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.482546849255096e-06, |
|
"logits/chosen": -2.0144591331481934, |
|
"logits/rejected": -1.7140735387802124, |
|
"logps/chosen": -352.4971008300781, |
|
"logps/rejected": -371.90869140625, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3799242973327637, |
|
"rewards/margins": 0.5062106251716614, |
|
"rewards/rejected": -1.8861348628997803, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"logits/chosen": -2.0474960803985596, |
|
"logits/rejected": -1.9881216287612915, |
|
"logps/chosen": -443.65185546875, |
|
"logps/rejected": -492.8255310058594, |
|
"loss": 0.5122, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6040445566177368, |
|
"rewards/margins": 0.7050210237503052, |
|
"rewards/rejected": -2.309065341949463, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits/chosen": -1.8097617626190186, |
|
"logits/rejected": -1.6723369359970093, |
|
"logps/chosen": -468.43475341796875, |
|
"logps/rejected": -514.5213012695312, |
|
"loss": 0.5255, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4575445652008057, |
|
"rewards/margins": 0.8011848330497742, |
|
"rewards/rejected": -3.2587294578552246, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2734185495055503e-06, |
|
"logits/chosen": -1.6928268671035767, |
|
"logits/rejected": -1.4622031450271606, |
|
"logps/chosen": -615.6226806640625, |
|
"logps/rejected": -672.2274169921875, |
|
"loss": 0.4353, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.9083850383758545, |
|
"rewards/margins": 1.1158344745635986, |
|
"rewards/rejected": -5.024219512939453, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2039900792337477e-06, |
|
"logits/chosen": -1.6810636520385742, |
|
"logits/rejected": -1.4750381708145142, |
|
"logps/chosen": -718.7698364257812, |
|
"logps/rejected": -769.5067749023438, |
|
"loss": 0.5824, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -4.622340202331543, |
|
"rewards/margins": 0.9261082410812378, |
|
"rewards/rejected": -5.548448085784912, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.134792428593971e-06, |
|
"logits/chosen": -1.858790636062622, |
|
"logits/rejected": -1.5472383499145508, |
|
"logps/chosen": -749.5997924804688, |
|
"logps/rejected": -797.5546875, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -5.035767078399658, |
|
"rewards/margins": 0.9318579435348511, |
|
"rewards/rejected": -5.967624664306641, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -1.7888858318328857, |
|
"logits/rejected": -1.8512537479400635, |
|
"logps/chosen": -627.5963134765625, |
|
"logps/rejected": -687.7399291992188, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -4.275559425354004, |
|
"rewards/margins": 0.5892196297645569, |
|
"rewards/rejected": -4.864778995513916, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.997305197135089e-06, |
|
"logits/chosen": -1.8849258422851562, |
|
"logits/rejected": -1.7010767459869385, |
|
"logps/chosen": -621.4512939453125, |
|
"logps/rejected": -654.05224609375, |
|
"loss": 0.5652, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -4.103709697723389, |
|
"rewards/margins": 0.48206719756126404, |
|
"rewards/rejected": -4.5857768058776855, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9291228247233607e-06, |
|
"logits/chosen": -2.156606674194336, |
|
"logits/rejected": -1.8361657857894897, |
|
"logps/chosen": -640.6911010742188, |
|
"logps/rejected": -605.0956420898438, |
|
"loss": 0.5607, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -3.5032150745391846, |
|
"rewards/margins": 0.49892592430114746, |
|
"rewards/rejected": -4.002140045166016, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8613856051605242e-06, |
|
"logits/chosen": -1.863431692123413, |
|
"logits/rejected": -1.6354913711547852, |
|
"logps/chosen": -584.6881713867188, |
|
"logps/rejected": -643.9428100585938, |
|
"loss": 0.5343, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.2495388984680176, |
|
"rewards/margins": 0.8126918077468872, |
|
"rewards/rejected": -4.062230587005615, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7941463578928088e-06, |
|
"logits/chosen": -1.978654146194458, |
|
"logits/rejected": -1.9466804265975952, |
|
"logps/chosen": -587.6908569335938, |
|
"logps/rejected": -607.2252807617188, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -3.702901840209961, |
|
"rewards/margins": 0.33423447608947754, |
|
"rewards/rejected": -4.037137031555176, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -1.870070457458496, |
|
"logits/rejected": -2.013461112976074, |
|
"logps/chosen": -526.104248046875, |
|
"logps/rejected": -600.121337890625, |
|
"loss": 0.484, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.800811767578125, |
|
"rewards/margins": 0.7185395956039429, |
|
"rewards/rejected": -3.5193512439727783, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.661371075624363e-06, |
|
"logits/chosen": -1.931865930557251, |
|
"logits/rejected": -1.6386762857437134, |
|
"logps/chosen": -461.39520263671875, |
|
"logps/rejected": -491.856689453125, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.690922975540161, |
|
"rewards/margins": 0.708380401134491, |
|
"rewards/rejected": -3.399303436279297, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5959385747947697e-06, |
|
"logits/chosen": -1.916992425918579, |
|
"logits/rejected": -1.8899227380752563, |
|
"logps/chosen": -531.813232421875, |
|
"logps/rejected": -615.9520263671875, |
|
"loss": 0.4847, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.888909101486206, |
|
"rewards/margins": 0.7070828676223755, |
|
"rewards/rejected": -3.595991849899292, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5312110338697427e-06, |
|
"logits/chosen": -2.0526282787323, |
|
"logits/rejected": -1.9669021368026733, |
|
"logps/chosen": -449.2479553222656, |
|
"logps/rejected": -457.72027587890625, |
|
"loss": 0.6194, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.594003200531006, |
|
"rewards/margins": 0.2246524840593338, |
|
"rewards/rejected": -2.818655490875244, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.467238925438646e-06, |
|
"logits/chosen": -2.113966226577759, |
|
"logits/rejected": -1.9181636571884155, |
|
"logps/chosen": -503.37835693359375, |
|
"logps/rejected": -561.77783203125, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.3552279472351074, |
|
"rewards/margins": 0.7212526202201843, |
|
"rewards/rejected": -3.0764806270599365, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"logits/chosen": -1.7894271612167358, |
|
"logits/rejected": -1.5007646083831787, |
|
"logps/chosen": -614.6292724609375, |
|
"logps/rejected": -649.1776123046875, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.184319019317627, |
|
"rewards/margins": 0.7777611613273621, |
|
"rewards/rejected": -3.9620795249938965, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3417599122003464e-06, |
|
"logits/chosen": -2.140821933746338, |
|
"logits/rejected": -1.7113529443740845, |
|
"logps/chosen": -523.3508911132812, |
|
"logps/rejected": -562.307861328125, |
|
"loss": 0.469, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.6072592735290527, |
|
"rewards/margins": 1.0337148904800415, |
|
"rewards/rejected": -3.6409740447998047, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.280350852153168e-06, |
|
"logits/chosen": -1.8685197830200195, |
|
"logits/rejected": -1.9930238723754883, |
|
"logps/chosen": -437.0240783691406, |
|
"logps/rejected": -565.9390258789062, |
|
"loss": 0.5039, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.464498996734619, |
|
"rewards/margins": 0.857032299041748, |
|
"rewards/rejected": -3.3215317726135254, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2198928378235717e-06, |
|
"logits/chosen": -1.9025375843048096, |
|
"logits/rejected": -1.7775294780731201, |
|
"logps/chosen": -559.2813720703125, |
|
"logps/rejected": -652.2202758789062, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -3.108556032180786, |
|
"rewards/margins": 0.7565592527389526, |
|
"rewards/rejected": -3.8651154041290283, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.160433012552508e-06, |
|
"logits/chosen": -2.1011109352111816, |
|
"logits/rejected": -2.0423381328582764, |
|
"logps/chosen": -430.05633544921875, |
|
"logps/rejected": -476.6094665527344, |
|
"loss": 0.5447, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.923673391342163, |
|
"rewards/margins": 0.9791573286056519, |
|
"rewards/rejected": -2.9028310775756836, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"logits/chosen": -2.0440664291381836, |
|
"logits/rejected": -1.8446366786956787, |
|
"logps/chosen": -435.1307067871094, |
|
"logps/rejected": -506.0008239746094, |
|
"loss": 0.5042, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.0204269886016846, |
|
"rewards/margins": 1.0044559240341187, |
|
"rewards/rejected": -3.0248827934265137, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0446925746067768e-06, |
|
"logits/chosen": -1.8525664806365967, |
|
"logits/rejected": -1.5761830806732178, |
|
"logps/chosen": -413.330810546875, |
|
"logps/rejected": -453.98931884765625, |
|
"loss": 0.5014, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.897646188735962, |
|
"rewards/margins": 1.0005159378051758, |
|
"rewards/rejected": -2.8981618881225586, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.88502212844063e-07, |
|
"logits/chosen": -1.9756139516830444, |
|
"logits/rejected": -1.8542131185531616, |
|
"logps/chosen": -528.9044799804688, |
|
"logps/rejected": -650.6089477539062, |
|
"loss": 0.525, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.553828716278076, |
|
"rewards/margins": 1.2451233863830566, |
|
"rewards/rejected": -3.798952102661133, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.334904715888496e-07, |
|
"logits/chosen": -1.94060480594635, |
|
"logits/rejected": -1.6692876815795898, |
|
"logps/chosen": -567.3487548828125, |
|
"logps/rejected": -671.7532958984375, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.8961987495422363, |
|
"rewards/margins": 1.0622972249984741, |
|
"rewards/rejected": -3.95849609375, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.797002473421729e-07, |
|
"logits/chosen": -1.8798109292984009, |
|
"logits/rejected": -1.8840408325195312, |
|
"logps/chosen": -468.63287353515625, |
|
"logps/rejected": -541.5391845703125, |
|
"loss": 0.5102, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.1703548431396484, |
|
"rewards/margins": 0.741019606590271, |
|
"rewards/rejected": -2.91137433052063, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": -1.871678113937378, |
|
"logits/rejected": -1.6603199243545532, |
|
"logps/chosen": -525.9596557617188, |
|
"logps/rejected": -581.8895263671875, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.715925931930542, |
|
"rewards/margins": 0.8995075225830078, |
|
"rewards/rejected": -3.6154332160949707, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.759511406608255e-07, |
|
"logits/chosen": -1.9854234457015991, |
|
"logits/rejected": -1.8719909191131592, |
|
"logps/chosen": -508.23785400390625, |
|
"logps/rejected": -571.8689575195312, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.7327098846435547, |
|
"rewards/margins": 0.7255285978317261, |
|
"rewards/rejected": -3.4582386016845703, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.260731586586983e-07, |
|
"logits/chosen": -1.753035545349121, |
|
"logits/rejected": -1.8223479986190796, |
|
"logps/chosen": -362.6646423339844, |
|
"logps/rejected": -436.86102294921875, |
|
"loss": 0.5738, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.3551478385925293, |
|
"rewards/margins": 0.7355124950408936, |
|
"rewards/rejected": -3.090660572052002, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.775784314464717e-07, |
|
"logits/chosen": -2.150531768798828, |
|
"logits/rejected": -1.819373369216919, |
|
"logps/chosen": -458.1045837402344, |
|
"logps/rejected": -522.545166015625, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1924614906311035, |
|
"rewards/margins": 1.1207162141799927, |
|
"rewards/rejected": -3.3131778240203857, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.305047737536707e-07, |
|
"logits/chosen": -1.841164231300354, |
|
"logits/rejected": -1.885084867477417, |
|
"logps/chosen": -514.1463623046875, |
|
"logps/rejected": -619.3209228515625, |
|
"loss": 0.4802, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.1367721557617188, |
|
"rewards/margins": 0.8921027183532715, |
|
"rewards/rejected": -4.028874397277832, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -1.8358408212661743, |
|
"logits/rejected": -2.0529000759124756, |
|
"logps/chosen": -559.2037353515625, |
|
"logps/rejected": -580.3322143554688, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.9438185691833496, |
|
"rewards/margins": 0.5294431447982788, |
|
"rewards/rejected": -3.473261594772339, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.407663566854008e-07, |
|
"logits/chosen": -2.139596462249756, |
|
"logits/rejected": -1.9000869989395142, |
|
"logps/chosen": -503.7278747558594, |
|
"logps/rejected": -551.3563842773438, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.439404010772705, |
|
"rewards/margins": 0.9661655426025391, |
|
"rewards/rejected": -3.405569553375244, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.981715726281666e-07, |
|
"logits/chosen": -2.013249397277832, |
|
"logits/rejected": -1.9738517999649048, |
|
"logps/chosen": -589.2969970703125, |
|
"logps/rejected": -724.22998046875, |
|
"loss": 0.5035, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -3.0665476322174072, |
|
"rewards/margins": 1.2686010599136353, |
|
"rewards/rejected": -4.335148811340332, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.5713775416217884e-07, |
|
"logits/chosen": -1.6897433996200562, |
|
"logits/rejected": -1.7765251398086548, |
|
"logps/chosen": -498.42156982421875, |
|
"logps/rejected": -617.593994140625, |
|
"loss": 0.5346, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -3.009658098220825, |
|
"rewards/margins": 1.1272680759429932, |
|
"rewards/rejected": -4.136926174163818, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1769689822475147e-07, |
|
"logits/chosen": -1.6748046875, |
|
"logits/rejected": -1.8121490478515625, |
|
"logps/chosen": -465.87432861328125, |
|
"logps/rejected": -581.8338623046875, |
|
"loss": 0.583, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -3.0733983516693115, |
|
"rewards/margins": 0.6260702013969421, |
|
"rewards/rejected": -3.6994686126708984, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.798797596089351e-07, |
|
"logits/chosen": -1.9456278085708618, |
|
"logits/rejected": -1.7044868469238281, |
|
"logps/chosen": -526.3280639648438, |
|
"logps/rejected": -614.6798095703125, |
|
"loss": 0.4783, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.9347662925720215, |
|
"rewards/margins": 1.119192123413086, |
|
"rewards/rejected": -4.053957939147949, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4371582698185636e-07, |
|
"logits/chosen": -2.0055367946624756, |
|
"logits/rejected": -1.8538860082626343, |
|
"logps/chosen": -615.2640380859375, |
|
"logps/rejected": -729.064453125, |
|
"loss": 0.4626, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.3482022285461426, |
|
"rewards/margins": 0.9904724955558777, |
|
"rewards/rejected": -4.338675022125244, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.092332998903416e-07, |
|
"logits/chosen": -1.9933702945709229, |
|
"logits/rejected": -2.050975799560547, |
|
"logps/chosen": -535.1612548828125, |
|
"logps/rejected": -645.9298706054688, |
|
"loss": 0.4793, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.8539366722106934, |
|
"rewards/margins": 1.0242040157318115, |
|
"rewards/rejected": -3.878140926361084, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.764590667717562e-07, |
|
"logits/chosen": -1.8224666118621826, |
|
"logits/rejected": -1.5814918279647827, |
|
"logps/chosen": -508.09332275390625, |
|
"logps/rejected": -499.42742919921875, |
|
"loss": 0.5644, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -3.324652910232544, |
|
"rewards/margins": 0.2990475594997406, |
|
"rewards/rejected": -3.6237003803253174, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.454186839872158e-07, |
|
"logits/chosen": -1.8880449533462524, |
|
"logits/rejected": -1.7552686929702759, |
|
"logps/chosen": -522.8394775390625, |
|
"logps/rejected": -642.8104858398438, |
|
"loss": 0.5033, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.2225208282470703, |
|
"rewards/margins": 1.2935682535171509, |
|
"rewards/rejected": -4.51608943939209, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"logits/chosen": -1.9811900854110718, |
|
"logits/rejected": -1.7504934072494507, |
|
"logps/chosen": -492.2660217285156, |
|
"logps/rejected": -541.9775390625, |
|
"loss": 0.5612, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.6576216220855713, |
|
"rewards/margins": 0.7241417169570923, |
|
"rewards/rejected": -3.381763458251953, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8863491596921745e-07, |
|
"logits/chosen": -1.9917253255844116, |
|
"logits/rejected": -1.6974796056747437, |
|
"logps/chosen": -522.3150634765625, |
|
"logps/rejected": -547.88330078125, |
|
"loss": 0.4857, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.875357151031494, |
|
"rewards/margins": 0.8617655038833618, |
|
"rewards/rejected": -3.7371230125427246, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.629358090099639e-07, |
|
"logits/chosen": -1.8854055404663086, |
|
"logits/rejected": -1.7443320751190186, |
|
"logps/chosen": -544.697509765625, |
|
"logps/rejected": -587.5208129882812, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -3.345867156982422, |
|
"rewards/margins": 0.8077778816223145, |
|
"rewards/rejected": -4.1536455154418945, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3905907440629752e-07, |
|
"logits/chosen": -1.8521480560302734, |
|
"logits/rejected": -1.633465051651001, |
|
"logps/chosen": -494.1001892089844, |
|
"logps/rejected": -566.96826171875, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.8628900051116943, |
|
"rewards/margins": 1.034968614578247, |
|
"rewards/rejected": -3.8978583812713623, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1702333051763271e-07, |
|
"logits/chosen": -1.7924169301986694, |
|
"logits/rejected": -1.5471246242523193, |
|
"logps/chosen": -587.1184692382812, |
|
"logps/rejected": -650.1336669921875, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -3.2694034576416016, |
|
"rewards/margins": 1.0973501205444336, |
|
"rewards/rejected": -4.366753101348877, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.684576015420277e-08, |
|
"logits/chosen": -1.751611351966858, |
|
"logits/rejected": -1.576918363571167, |
|
"logps/chosen": -530.2127075195312, |
|
"logps/rejected": -614.9729614257812, |
|
"loss": 0.4732, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.8580543994903564, |
|
"rewards/margins": 0.9249665141105652, |
|
"rewards/rejected": -3.7830207347869873, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits/chosen": -1.7469266653060913, |
|
"logits/rejected": -1.668701171875, |
|
"logps/chosen": -585.7467041015625, |
|
"logps/rejected": -588.47509765625, |
|
"loss": 0.5739, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -3.5444464683532715, |
|
"rewards/margins": 0.3376322388648987, |
|
"rewards/rejected": -3.8820786476135254, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.212661423609184e-08, |
|
"logits/chosen": -1.7435801029205322, |
|
"logits/rejected": -1.680567741394043, |
|
"logps/chosen": -615.296142578125, |
|
"logps/rejected": -718.3519287109375, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -4.011544704437256, |
|
"rewards/margins": 0.8902410268783569, |
|
"rewards/rejected": -4.901785850524902, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.761211162702117e-08, |
|
"logits/chosen": -1.855932593345642, |
|
"logits/rejected": -1.8077094554901123, |
|
"logps/chosen": -531.5975952148438, |
|
"logps/rejected": -581.9424438476562, |
|
"loss": 0.5434, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -3.0784988403320312, |
|
"rewards/margins": 0.6800780892372131, |
|
"rewards/rejected": -3.7585768699645996, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5009907323737826e-08, |
|
"logits/chosen": -1.838123083114624, |
|
"logits/rejected": -1.5591208934783936, |
|
"logps/chosen": -556.9913330078125, |
|
"logps/rejected": -647.0695190429688, |
|
"loss": 0.504, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -3.241734027862549, |
|
"rewards/margins": 1.0456501245498657, |
|
"rewards/rejected": -4.287384033203125, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"logits/chosen": -2.1346633434295654, |
|
"logits/rejected": -1.826059341430664, |
|
"logps/chosen": -596.9236450195312, |
|
"logps/rejected": -622.0565185546875, |
|
"loss": 0.5305, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -3.315636396408081, |
|
"rewards/margins": 0.8735532760620117, |
|
"rewards/rejected": -4.189189910888672, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5580202098509078e-08, |
|
"logits/chosen": -1.944238305091858, |
|
"logits/rejected": -1.891898512840271, |
|
"logps/chosen": -647.6207275390625, |
|
"logps/rejected": -664.7337036132812, |
|
"loss": 0.5806, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -3.0909292697906494, |
|
"rewards/margins": 0.758102536201477, |
|
"rewards/rejected": -3.849031448364258, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.767851876239075e-09, |
|
"logits/chosen": -1.6966060400009155, |
|
"logits/rejected": -1.641953706741333, |
|
"logps/chosen": -548.3276977539062, |
|
"logps/rejected": -660.7442626953125, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -3.2273573875427246, |
|
"rewards/margins": 1.0470120906829834, |
|
"rewards/rejected": -4.274369239807129, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.8980895450474455e-09, |
|
"logits/chosen": -1.908227562904358, |
|
"logits/rejected": -1.7419946193695068, |
|
"logps/chosen": -513.4429931640625, |
|
"logps/rejected": -574.7293701171875, |
|
"loss": 0.5757, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.947870969772339, |
|
"rewards/margins": 0.8125208616256714, |
|
"rewards/rejected": -3.7603917121887207, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.747123991141193e-10, |
|
"logits/chosen": -2.0077996253967285, |
|
"logits/rejected": -1.8994073867797852, |
|
"logps/chosen": -504.7936096191406, |
|
"logps/rejected": -594.3426513671875, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.80145001411438, |
|
"rewards/margins": 0.9755638241767883, |
|
"rewards/rejected": -3.7770137786865234, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.8658443689346313, |
|
"logits/rejected": -1.699480414390564, |
|
"logps/chosen": -557.4786987304688, |
|
"logps/rejected": -628.1792602539062, |
|
"loss": 0.4848, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -3.1368298530578613, |
|
"rewards/margins": 0.8951441049575806, |
|
"rewards/rejected": -4.031973838806152, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1250, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5286114639282227, |
|
"train_runtime": 12048.4045, |
|
"train_samples_per_second": 1.245, |
|
"train_steps_per_second": 0.104 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 20, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|