|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 1000, |
|
"global_step": 1145, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004366812227074236, |
|
"grad_norm": 0.49208763779092174, |
|
"learning_rate": 4.347826086956522e-08, |
|
"logits/chosen": -1.130352258682251, |
|
"logits/rejected": -0.9433857798576355, |
|
"logps/chosen": -272.3143005371094, |
|
"logps/rejected": -290.848388671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.043668122270742356, |
|
"grad_norm": 0.5373741627747036, |
|
"learning_rate": 4.347826086956522e-07, |
|
"logits/chosen": -1.0119584798812866, |
|
"logits/rejected": -1.054663062095642, |
|
"logps/chosen": -300.1561279296875, |
|
"logps/rejected": -261.20513916015625, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.0015893004601821303, |
|
"rewards/margins": 0.001330445520579815, |
|
"rewards/rejected": 0.0002588547649793327, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08733624454148471, |
|
"grad_norm": 0.518120805736266, |
|
"learning_rate": 8.695652173913044e-07, |
|
"logits/chosen": -1.1330419778823853, |
|
"logits/rejected": -1.0284273624420166, |
|
"logps/chosen": -283.1369934082031, |
|
"logps/rejected": -323.2412109375, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0005519447731785476, |
|
"rewards/margins": 0.0006424236344173551, |
|
"rewards/rejected": -0.0011943683493882418, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13100436681222707, |
|
"grad_norm": 0.6136347394482697, |
|
"learning_rate": 1.3043478260869566e-06, |
|
"logits/chosen": -1.0650444030761719, |
|
"logits/rejected": -1.107439637184143, |
|
"logps/chosen": -290.29168701171875, |
|
"logps/rejected": -270.15887451171875, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.0012172337155789137, |
|
"rewards/margins": 0.0061761606484651566, |
|
"rewards/rejected": -0.007393394596874714, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17467248908296942, |
|
"grad_norm": 0.5452485335548993, |
|
"learning_rate": 1.7391304347826088e-06, |
|
"logits/chosen": -1.0981855392456055, |
|
"logits/rejected": -1.1204359531402588, |
|
"logps/chosen": -288.19488525390625, |
|
"logps/rejected": -279.60968017578125, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.01171443797647953, |
|
"rewards/margins": 0.023151502013206482, |
|
"rewards/rejected": -0.03486593812704086, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2183406113537118, |
|
"grad_norm": 0.5716667587131498, |
|
"learning_rate": 2.173913043478261e-06, |
|
"logits/chosen": -1.1297352313995361, |
|
"logits/rejected": -1.0235356092453003, |
|
"logps/chosen": -267.85931396484375, |
|
"logps/rejected": -315.6715393066406, |
|
"loss": 0.6681, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.03383685275912285, |
|
"rewards/margins": 0.050773195922374725, |
|
"rewards/rejected": -0.08461005985736847, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.26200873362445415, |
|
"grad_norm": 0.7397729774530086, |
|
"learning_rate": 2.6086956521739132e-06, |
|
"logits/chosen": -1.0748536586761475, |
|
"logits/rejected": -1.096380352973938, |
|
"logps/chosen": -298.35894775390625, |
|
"logps/rejected": -278.65167236328125, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.06509696692228317, |
|
"rewards/margins": 0.13354261219501495, |
|
"rewards/rejected": -0.19863960146903992, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3056768558951965, |
|
"grad_norm": 0.9490199807243085, |
|
"learning_rate": 3.043478260869566e-06, |
|
"logits/chosen": -1.1220731735229492, |
|
"logits/rejected": -1.1358802318572998, |
|
"logps/chosen": -287.66119384765625, |
|
"logps/rejected": -300.89215087890625, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.05601518228650093, |
|
"rewards/margins": 0.2802619934082031, |
|
"rewards/rejected": -0.33627718687057495, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.34934497816593885, |
|
"grad_norm": 1.1366581667233986, |
|
"learning_rate": 3.4782608695652175e-06, |
|
"logits/chosen": -1.1631652116775513, |
|
"logits/rejected": -1.0547783374786377, |
|
"logps/chosen": -273.6378173828125, |
|
"logps/rejected": -378.0707702636719, |
|
"loss": 0.4241, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0387813039124012, |
|
"rewards/margins": 0.6967908143997192, |
|
"rewards/rejected": -0.7355720400810242, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3930131004366812, |
|
"grad_norm": 0.699317306849894, |
|
"learning_rate": 3.91304347826087e-06, |
|
"logits/chosen": -1.13710355758667, |
|
"logits/rejected": -1.1224480867385864, |
|
"logps/chosen": -286.89630126953125, |
|
"logps/rejected": -425.4947204589844, |
|
"loss": 0.1966, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.010465627536177635, |
|
"rewards/margins": 1.6259124279022217, |
|
"rewards/rejected": -1.6154468059539795, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4366812227074236, |
|
"grad_norm": 0.20225227555832068, |
|
"learning_rate": 4.347826086956522e-06, |
|
"logits/chosen": -1.162099003791809, |
|
"logits/rejected": -1.0710939168930054, |
|
"logps/chosen": -254.20108032226562, |
|
"logps/rejected": -621.758544921875, |
|
"loss": 0.0422, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.06020994111895561, |
|
"rewards/margins": 3.4915261268615723, |
|
"rewards/rejected": -3.431316375732422, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.48034934497816595, |
|
"grad_norm": 0.09010131899726684, |
|
"learning_rate": 4.782608695652174e-06, |
|
"logits/chosen": -1.0987495183944702, |
|
"logits/rejected": -1.049612283706665, |
|
"logps/chosen": -281.94390869140625, |
|
"logps/rejected": -799.0904541015625, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1645246148109436, |
|
"rewards/margins": 5.45929479598999, |
|
"rewards/rejected": -5.29477071762085, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5240174672489083, |
|
"grad_norm": 0.030005828505106115, |
|
"learning_rate": 4.999709285361594e-06, |
|
"logits/chosen": -1.1363656520843506, |
|
"logits/rejected": -1.0334757566452026, |
|
"logps/chosen": -270.98944091796875, |
|
"logps/rejected": -936.4185791015625, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.26095929741859436, |
|
"rewards/margins": 6.916310787200928, |
|
"rewards/rejected": -6.655351161956787, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5676855895196506, |
|
"grad_norm": 0.019361577478786484, |
|
"learning_rate": 4.997383973910631e-06, |
|
"logits/chosen": -1.0980539321899414, |
|
"logits/rejected": -0.9340838193893433, |
|
"logps/chosen": -266.50250244140625, |
|
"logps/rejected": -1020.0845947265625, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.24470993876457214, |
|
"rewards/margins": 7.702264308929443, |
|
"rewards/rejected": -7.457553863525391, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.611353711790393, |
|
"grad_norm": 0.012282637611773778, |
|
"learning_rate": 4.9927355140895775e-06, |
|
"logits/chosen": -1.1392152309417725, |
|
"logits/rejected": -0.8957275152206421, |
|
"logps/chosen": -242.6056365966797, |
|
"logps/rejected": -1173.87158203125, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2704493999481201, |
|
"rewards/margins": 8.908761978149414, |
|
"rewards/rejected": -8.638312339782715, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6550218340611353, |
|
"grad_norm": 0.04700706955431712, |
|
"learning_rate": 4.985768230048011e-06, |
|
"logits/chosen": -1.1246144771575928, |
|
"logits/rejected": -0.9621224403381348, |
|
"logps/chosen": -273.8687438964844, |
|
"logps/rejected": -1090.0859375, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17607423663139343, |
|
"rewards/margins": 8.613072395324707, |
|
"rewards/rejected": -8.43699836730957, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6986899563318777, |
|
"grad_norm": 0.01943896178498091, |
|
"learning_rate": 4.976488602981748e-06, |
|
"logits/chosen": -1.145508885383606, |
|
"logits/rejected": -0.9405732154846191, |
|
"logps/chosen": -249.9766387939453, |
|
"logps/rejected": -1222.8212890625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.33843302726745605, |
|
"rewards/margins": 9.41877269744873, |
|
"rewards/rejected": -9.080339431762695, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.74235807860262, |
|
"grad_norm": 0.019746462175002255, |
|
"learning_rate": 4.9649052651038255e-06, |
|
"logits/chosen": -1.0897105932235718, |
|
"logits/rejected": -0.9191669225692749, |
|
"logps/chosen": -267.56549072265625, |
|
"logps/rejected": -1116.246826171875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.28829699754714966, |
|
"rewards/margins": 8.88953971862793, |
|
"rewards/rejected": -8.601243019104004, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7860262008733624, |
|
"grad_norm": 0.014841671172711, |
|
"learning_rate": 4.9510289916145295e-06, |
|
"logits/chosen": -1.0843827724456787, |
|
"logits/rejected": -0.8706234693527222, |
|
"logps/chosen": -254.61972045898438, |
|
"logps/rejected": -1236.5650634765625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.22450891137123108, |
|
"rewards/margins": 9.739435195922852, |
|
"rewards/rejected": -9.514925956726074, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8296943231441049, |
|
"grad_norm": 0.012319918167579146, |
|
"learning_rate": 4.934872690677953e-06, |
|
"logits/chosen": -1.0835936069488525, |
|
"logits/rejected": -0.8566627502441406, |
|
"logps/chosen": -267.7489013671875, |
|
"logps/rejected": -1272.4853515625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1923988163471222, |
|
"rewards/margins": 9.936999320983887, |
|
"rewards/rejected": -9.744600296020508, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8733624454148472, |
|
"grad_norm": 0.005430948770812638, |
|
"learning_rate": 4.9164513914144005e-06, |
|
"logits/chosen": -1.0162463188171387, |
|
"logits/rejected": -0.7294031381607056, |
|
"logps/chosen": -249.8229522705078, |
|
"logps/rejected": -1294.401611328125, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20589938759803772, |
|
"rewards/margins": 10.359135627746582, |
|
"rewards/rejected": -10.153237342834473, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9170305676855895, |
|
"grad_norm": 0.009500725575344962, |
|
"learning_rate": 4.8957822299198045e-06, |
|
"logits/chosen": -1.1154823303222656, |
|
"logits/rejected": -0.8606049418449402, |
|
"logps/chosen": -262.8265686035156, |
|
"logps/rejected": -1394.8551025390625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19020363688468933, |
|
"rewards/margins": 11.131672859191895, |
|
"rewards/rejected": -10.941469192504883, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9606986899563319, |
|
"grad_norm": 0.02542499541949849, |
|
"learning_rate": 4.872884433325169e-06, |
|
"logits/chosen": -1.0547072887420654, |
|
"logits/rejected": -0.8354827165603638, |
|
"logps/chosen": -282.1262512207031, |
|
"logps/rejected": -1277.065673828125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10952471196651459, |
|
"rewards/margins": 10.300054550170898, |
|
"rewards/rejected": -10.190529823303223, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0043668122270741, |
|
"grad_norm": 0.019502802530769667, |
|
"learning_rate": 4.847779301910868e-06, |
|
"logits/chosen": -1.088416576385498, |
|
"logits/rejected": -0.8197258114814758, |
|
"logps/chosen": -240.4183807373047, |
|
"logps/rejected": -1398.536376953125, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14008644223213196, |
|
"rewards/margins": 11.213750839233398, |
|
"rewards/rejected": -11.073664665222168, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.0480349344978166, |
|
"grad_norm": 0.025635188925498188, |
|
"learning_rate": 4.820490189292415e-06, |
|
"logits/chosen": -1.019892930984497, |
|
"logits/rejected": -0.7206239700317383, |
|
"logps/chosen": -249.9215087890625, |
|
"logps/rejected": -1368.528564453125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11964831501245499, |
|
"rewards/margins": 11.160282135009766, |
|
"rewards/rejected": -11.040634155273438, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.091703056768559, |
|
"grad_norm": 0.0019862259316801, |
|
"learning_rate": 4.791042480696179e-06, |
|
"logits/chosen": -1.1091744899749756, |
|
"logits/rejected": -0.8607629537582397, |
|
"logps/chosen": -280.6003112792969, |
|
"logps/rejected": -1413.723388671875, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14324969053268433, |
|
"rewards/margins": 11.427762985229492, |
|
"rewards/rejected": -11.284514427185059, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.1353711790393013, |
|
"grad_norm": 0.003461068195787667, |
|
"learning_rate": 4.759463569345205e-06, |
|
"logits/chosen": -1.0466573238372803, |
|
"logits/rejected": -0.8044939041137695, |
|
"logps/chosen": -261.8208312988281, |
|
"logps/rejected": -1364.37255859375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16325196623802185, |
|
"rewards/margins": 11.088302612304688, |
|
"rewards/rejected": -10.925050735473633, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.1790393013100438, |
|
"grad_norm": 0.005672203724021556, |
|
"learning_rate": 4.725782830977145e-06, |
|
"logits/chosen": -1.0772409439086914, |
|
"logits/rejected": -0.7674180865287781, |
|
"logps/chosen": -244.04861450195312, |
|
"logps/rejected": -1468.4453125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1920471489429474, |
|
"rewards/margins": 11.930693626403809, |
|
"rewards/rejected": -11.738645553588867, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.222707423580786, |
|
"grad_norm": 0.00517545244251001, |
|
"learning_rate": 4.690031596517984e-06, |
|
"logits/chosen": -1.0256779193878174, |
|
"logits/rejected": -0.7417846918106079, |
|
"logps/chosen": -246.1106414794922, |
|
"logps/rejected": -1458.6060791015625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17811021208763123, |
|
"rewards/margins": 11.871198654174805, |
|
"rewards/rejected": -11.69308853149414, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.2663755458515285, |
|
"grad_norm": 0.0052311080547555655, |
|
"learning_rate": 4.652243122936987e-06, |
|
"logits/chosen": -1.0737969875335693, |
|
"logits/rejected": -0.8021048307418823, |
|
"logps/chosen": -262.6719665527344, |
|
"logps/rejected": -1506.664306640625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19292786717414856, |
|
"rewards/margins": 12.288152694702148, |
|
"rewards/rejected": -12.09522533416748, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.3100436681222707, |
|
"grad_norm": 0.007845893197308343, |
|
"learning_rate": 4.612452562309975e-06, |
|
"logits/chosen": -1.0275319814682007, |
|
"logits/rejected": -0.6741601228713989, |
|
"logps/chosen": -233.83407592773438, |
|
"logps/rejected": -1481.06640625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20325830578804016, |
|
"rewards/margins": 12.083272933959961, |
|
"rewards/rejected": -11.88001537322998, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.3537117903930131, |
|
"grad_norm": 0.001639011685908484, |
|
"learning_rate": 4.570696929119717e-06, |
|
"logits/chosen": -1.0229878425598145, |
|
"logits/rejected": -0.7648278474807739, |
|
"logps/chosen": -259.880615234375, |
|
"logps/rejected": -1439.419189453125, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13743802905082703, |
|
"rewards/margins": 11.878623962402344, |
|
"rewards/rejected": -11.741185188293457, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.3973799126637554, |
|
"grad_norm": 0.002036377670452987, |
|
"learning_rate": 4.527015065823841e-06, |
|
"logits/chosen": -1.013264536857605, |
|
"logits/rejected": -0.7761918306350708, |
|
"logps/chosen": -272.4781799316406, |
|
"logps/rejected": -1432.2052001953125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1656205952167511, |
|
"rewards/margins": 11.849295616149902, |
|
"rewards/rejected": -11.683676719665527, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.4410480349344978, |
|
"grad_norm": 0.00825121579599894, |
|
"learning_rate": 4.481447606722309e-06, |
|
"logits/chosen": -1.0379178524017334, |
|
"logits/rejected": -0.7940360903739929, |
|
"logps/chosen": -274.08929443359375, |
|
"logps/rejected": -1472.6800537109375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19115516543388367, |
|
"rewards/margins": 12.22935962677002, |
|
"rewards/rejected": -12.03820514678955, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.48471615720524, |
|
"grad_norm": 0.006624246041316681, |
|
"learning_rate": 4.434036940158062e-06, |
|
"logits/chosen": -1.0243127346038818, |
|
"logits/rejected": -0.6224126219749451, |
|
"logps/chosen": -222.64013671875, |
|
"logps/rejected": -1671.427001953125, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19593994319438934, |
|
"rewards/margins": 13.684160232543945, |
|
"rewards/rejected": -13.48822021484375, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.5283842794759825, |
|
"grad_norm": 0.00969023506429811, |
|
"learning_rate": 4.384827169085993e-06, |
|
"logits/chosen": -1.0768173933029175, |
|
"logits/rejected": -0.9027138948440552, |
|
"logps/chosen": -304.2896728515625, |
|
"logps/rejected": -1465.677001953125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.06724857538938522, |
|
"rewards/margins": 12.110160827636719, |
|
"rewards/rejected": -12.042912483215332, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.572052401746725, |
|
"grad_norm": 0.008449136082982325, |
|
"learning_rate": 4.333864070046938e-06, |
|
"logits/chosen": -0.9996398687362671, |
|
"logits/rejected": -0.6732652187347412, |
|
"logps/chosen": -264.0496520996094, |
|
"logps/rejected": -1532.588134765625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09769190102815628, |
|
"rewards/margins": 12.633995056152344, |
|
"rewards/rejected": -12.53630256652832, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.6157205240174672, |
|
"grad_norm": 0.0026627154109082445, |
|
"learning_rate": 4.28119505058483e-06, |
|
"logits/chosen": -1.0139929056167603, |
|
"logits/rejected": -0.7123194336891174, |
|
"logps/chosen": -275.62481689453125, |
|
"logps/rejected": -1458.732421875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08639346808195114, |
|
"rewards/margins": 12.07685661315918, |
|
"rewards/rejected": -11.990463256835938, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.6593886462882095, |
|
"grad_norm": 0.004704630681169572, |
|
"learning_rate": 4.226869105146658e-06, |
|
"logits/chosen": -1.0525470972061157, |
|
"logits/rejected": -0.7499625086784363, |
|
"logps/chosen": -256.189453125, |
|
"logps/rejected": -1665.451904296875, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.15123505890369415, |
|
"rewards/margins": 13.766995429992676, |
|
"rewards/rejected": -13.615760803222656, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.703056768558952, |
|
"grad_norm": 0.008451726735303476, |
|
"learning_rate": 4.170936769506222e-06, |
|
"logits/chosen": -1.0494039058685303, |
|
"logits/rejected": -0.7635387182235718, |
|
"logps/chosen": -272.2103576660156, |
|
"logps/rejected": -1517.40625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11954255402088165, |
|
"rewards/margins": 12.597132682800293, |
|
"rewards/rejected": -12.477590560913086, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.7467248908296944, |
|
"grad_norm": 0.004007124213956819, |
|
"learning_rate": 4.1134500737541026e-06, |
|
"logits/chosen": -1.0403097867965698, |
|
"logits/rejected": -0.7625882029533386, |
|
"logps/chosen": -270.08642578125, |
|
"logps/rejected": -1559.534423828125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14756450057029724, |
|
"rewards/margins": 12.893702507019043, |
|
"rewards/rejected": -12.746137619018555, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.7903930131004366, |
|
"grad_norm": 0.008071006947580056, |
|
"learning_rate": 4.054462493897569e-06, |
|
"logits/chosen": -1.0538218021392822, |
|
"logits/rejected": -0.6631166934967041, |
|
"logps/chosen": -230.2433319091797, |
|
"logps/rejected": -1751.548095703125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1488143503665924, |
|
"rewards/margins": 14.362360000610352, |
|
"rewards/rejected": -14.213546752929688, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.8340611353711789, |
|
"grad_norm": 0.0018837924584018762, |
|
"learning_rate": 3.994028902115439e-06, |
|
"logits/chosen": -1.0209208726882935, |
|
"logits/rejected": -0.6195182204246521, |
|
"logps/chosen": -235.20816040039062, |
|
"logps/rejected": -1623.1568603515625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12359651178121567, |
|
"rewards/margins": 13.363885879516602, |
|
"rewards/rejected": -13.240289688110352, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.8777292576419216, |
|
"grad_norm": 0.0024505034148971385, |
|
"learning_rate": 3.932205515714189e-06, |
|
"logits/chosen": -1.0572965145111084, |
|
"logits/rejected": -0.764691948890686, |
|
"logps/chosen": -299.56732177734375, |
|
"logps/rejected": -1628.056884765625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.033301644027233124, |
|
"rewards/margins": 13.467303276062012, |
|
"rewards/rejected": -13.500605583190918, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.9213973799126638, |
|
"grad_norm": 0.006019680096008564, |
|
"learning_rate": 3.86904984483277e-06, |
|
"logits/chosen": -1.0619957447052002, |
|
"logits/rejected": -0.7923606634140015, |
|
"logps/chosen": -298.5255432128906, |
|
"logps/rejected": -1547.7294921875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.007502234075218439, |
|
"rewards/margins": 12.943402290344238, |
|
"rewards/rejected": -12.93589973449707, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.965065502183406, |
|
"grad_norm": 0.0039079828979289005, |
|
"learning_rate": 3.8046206389447916e-06, |
|
"logits/chosen": -1.0510538816452026, |
|
"logits/rejected": -0.7345406413078308, |
|
"logps/chosen": -275.7720947265625, |
|
"logps/rejected": -1645.208251953125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0038163154385983944, |
|
"rewards/margins": 13.638483047485352, |
|
"rewards/rejected": -13.634668350219727, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.0087336244541483, |
|
"grad_norm": 0.0017708249899463602, |
|
"learning_rate": 3.738977832207839e-06, |
|
"logits/chosen": -1.0177559852600098, |
|
"logits/rejected": -0.7104192972183228, |
|
"logps/chosen": -263.3643798828125, |
|
"logps/rejected": -1611.4786376953125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02570909820497036, |
|
"rewards/margins": 13.336071968078613, |
|
"rewards/rejected": -13.3103609085083, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.052401746724891, |
|
"grad_norm": 0.000997282058931975, |
|
"learning_rate": 3.6721824877107588e-06, |
|
"logits/chosen": -1.0775004625320435, |
|
"logits/rejected": -0.8090157508850098, |
|
"logps/chosen": -305.11627197265625, |
|
"logps/rejected": -1616.4625244140625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.022619858384132385, |
|
"rewards/margins": 13.472871780395508, |
|
"rewards/rejected": -13.450250625610352, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.096069868995633, |
|
"grad_norm": 0.0066682395247295335, |
|
"learning_rate": 3.604296740670768e-06, |
|
"logits/chosen": -1.0786068439483643, |
|
"logits/rejected": -0.6820305585861206, |
|
"logps/chosen": -257.0702819824219, |
|
"logps/rejected": -1786.6517333984375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09799555689096451, |
|
"rewards/margins": 14.890355110168457, |
|
"rewards/rejected": -14.7923583984375, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.1397379912663754, |
|
"grad_norm": 0.004948689561737302, |
|
"learning_rate": 3.5353837406332464e-06, |
|
"logits/chosen": -1.0740149021148682, |
|
"logits/rejected": -0.7413342595100403, |
|
"logps/chosen": -247.671630859375, |
|
"logps/rejected": -1785.2659912109375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1334933489561081, |
|
"rewards/margins": 14.81743335723877, |
|
"rewards/rejected": -14.683941841125488, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.183406113537118, |
|
"grad_norm": 0.0032297799668046237, |
|
"learning_rate": 3.4655075927279576e-06, |
|
"logits/chosen": -1.0715500116348267, |
|
"logits/rejected": -0.7507299184799194, |
|
"logps/chosen": -257.148681640625, |
|
"logps/rejected": -1643.170654296875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04553316906094551, |
|
"rewards/margins": 13.757342338562012, |
|
"rewards/rejected": -13.711812019348145, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.2270742358078603, |
|
"grad_norm": 0.0018344305263622298, |
|
"learning_rate": 3.3947332980363552e-06, |
|
"logits/chosen": -0.9531366229057312, |
|
"logits/rejected": -0.5800803303718567, |
|
"logps/chosen": -267.22320556640625, |
|
"logps/rejected": -1683.7496337890625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01091139018535614, |
|
"rewards/margins": 14.13904094696045, |
|
"rewards/rejected": -14.128130912780762, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.2707423580786026, |
|
"grad_norm": 0.0008743192695472521, |
|
"learning_rate": 3.3231266931254546e-06, |
|
"logits/chosen": -1.0591729879379272, |
|
"logits/rejected": -0.7667123079299927, |
|
"logps/chosen": -288.68048095703125, |
|
"logps/rejected": -1706.4267578125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.048968568444252014, |
|
"rewards/margins": 14.315594673156738, |
|
"rewards/rejected": -14.266626358032227, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.314410480349345, |
|
"grad_norm": 0.0006214259809055745, |
|
"learning_rate": 3.250754388804495e-06, |
|
"logits/chosen": -1.0545308589935303, |
|
"logits/rejected": -0.6721210479736328, |
|
"logps/chosen": -258.18707275390625, |
|
"logps/rejected": -1773.6435546875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.016913337633013725, |
|
"rewards/margins": 14.813578605651855, |
|
"rewards/rejected": -14.796663284301758, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.3580786026200875, |
|
"grad_norm": 0.0009986936165298707, |
|
"learning_rate": 3.1776837081613893e-06, |
|
"logits/chosen": -0.9897233247756958, |
|
"logits/rejected": -0.6502217650413513, |
|
"logps/chosen": -265.50994873046875, |
|
"logps/rejected": -1707.3509521484375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05818755552172661, |
|
"rewards/margins": 14.338269233703613, |
|
"rewards/rejected": -14.280080795288086, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.4017467248908297, |
|
"grad_norm": 0.00984202902471183, |
|
"learning_rate": 3.1039826239365754e-06, |
|
"logits/chosen": -1.026111364364624, |
|
"logits/rejected": -0.7162208557128906, |
|
"logps/chosen": -260.0814514160156, |
|
"logps/rejected": -1701.0667724609375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.060255538672208786, |
|
"rewards/margins": 14.235310554504395, |
|
"rewards/rejected": -14.175054550170898, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.445414847161572, |
|
"grad_norm": 0.0006428554350249238, |
|
"learning_rate": 3.0297196952925533e-06, |
|
"logits/chosen": -1.0254216194152832, |
|
"logits/rejected": -0.7658672332763672, |
|
"logps/chosen": -304.80194091796875, |
|
"logps/rejected": -1645.176025390625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.019381849095225334, |
|
"rewards/margins": 13.853315353393555, |
|
"rewards/rejected": -13.872695922851562, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.489082969432314, |
|
"grad_norm": 0.0006532892925360383, |
|
"learning_rate": 2.9549640040379043e-06, |
|
"logits/chosen": -1.0421395301818848, |
|
"logits/rejected": -0.7338224649429321, |
|
"logps/chosen": -289.6170349121094, |
|
"logps/rejected": -1806.13671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.016583764925599098, |
|
"rewards/margins": 15.094789505004883, |
|
"rewards/rejected": -15.078204154968262, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.532751091703057, |
|
"grad_norm": 0.0006677213433183142, |
|
"learning_rate": 2.8797850903651274e-06, |
|
"logits/chosen": -1.0362173318862915, |
|
"logits/rejected": -0.716796875, |
|
"logps/chosen": -265.3944091796875, |
|
"logps/rejected": -1734.1741943359375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07199420034885406, |
|
"rewards/margins": 14.672683715820312, |
|
"rewards/rejected": -14.600687980651855, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.576419213973799, |
|
"grad_norm": 0.0017150618685269877, |
|
"learning_rate": 2.804252888162079e-06, |
|
"logits/chosen": -1.1411523818969727, |
|
"logits/rejected": -0.7371311187744141, |
|
"logps/chosen": -265.2336120605469, |
|
"logps/rejected": -1891.736572265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.006541900336742401, |
|
"rewards/margins": 15.778755187988281, |
|
"rewards/rejected": -15.772212028503418, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.6200873362445414, |
|
"grad_norm": 0.0014112958209940734, |
|
"learning_rate": 2.7284376599571776e-06, |
|
"logits/chosen": -1.054970622062683, |
|
"logits/rejected": -0.7131912112236023, |
|
"logps/chosen": -264.7176818847656, |
|
"logps/rejected": -1720.33984375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05957148224115372, |
|
"rewards/margins": 14.534815788269043, |
|
"rewards/rejected": -14.475244522094727, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.6637554585152836, |
|
"grad_norm": 0.0006698680618471773, |
|
"learning_rate": 2.652409931558898e-06, |
|
"logits/chosen": -1.053966760635376, |
|
"logits/rejected": -0.7491308450698853, |
|
"logps/chosen": -280.0963134765625, |
|
"logps/rejected": -1777.6761474609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.008257086388766766, |
|
"rewards/margins": 14.989705085754395, |
|
"rewards/rejected": -14.98144817352295, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.7074235807860263, |
|
"grad_norm": 0.00042568362060489706, |
|
"learning_rate": 2.5762404264503538e-06, |
|
"logits/chosen": -0.9900799989700317, |
|
"logits/rejected": -0.6423364281654358, |
|
"logps/chosen": -269.9592590332031, |
|
"logps/rejected": -1832.5003662109375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.042371202260255814, |
|
"rewards/margins": 15.435081481933594, |
|
"rewards/rejected": -15.392709732055664, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.7510917030567685, |
|
"grad_norm": 0.006748202311280723, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -1.0268778800964355, |
|
"logits/rejected": -0.7622432708740234, |
|
"logps/chosen": -285.1914978027344, |
|
"logps/rejected": -1769.4827880859375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.037803106009960175, |
|
"rewards/margins": 14.83598804473877, |
|
"rewards/rejected": -14.873788833618164, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.7947598253275108, |
|
"grad_norm": 0.0002702166687339878, |
|
"learning_rate": 2.423759573549647e-06, |
|
"logits/chosen": -1.0029267072677612, |
|
"logits/rejected": -0.6316828727722168, |
|
"logps/chosen": -260.8511962890625, |
|
"logps/rejected": -1813.537841796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.039741456508636475, |
|
"rewards/margins": 15.310914993286133, |
|
"rewards/rejected": -15.271173477172852, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.8384279475982535, |
|
"grad_norm": 0.0006646355698341602, |
|
"learning_rate": 2.3475900684411027e-06, |
|
"logits/chosen": -1.006449818611145, |
|
"logits/rejected": -0.6019884943962097, |
|
"logps/chosen": -250.7771453857422, |
|
"logps/rejected": -1806.491455078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.015752162784337997, |
|
"rewards/margins": 15.226987838745117, |
|
"rewards/rejected": -15.2427396774292, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.8820960698689957, |
|
"grad_norm": 0.00047399361927177104, |
|
"learning_rate": 2.2715623400428228e-06, |
|
"logits/chosen": -1.0256198644638062, |
|
"logits/rejected": -0.7082680463790894, |
|
"logps/chosen": -287.2723083496094, |
|
"logps/rejected": -1791.8154296875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.028542857617139816, |
|
"rewards/margins": 15.111543655395508, |
|
"rewards/rejected": -15.14008617401123, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.925764192139738, |
|
"grad_norm": 0.054595514895715194, |
|
"learning_rate": 2.1957471118379213e-06, |
|
"logits/chosen": -1.1037609577178955, |
|
"logits/rejected": -0.7577636241912842, |
|
"logps/chosen": -303.9775390625, |
|
"logps/rejected": -1865.5306396484375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.036632128059864044, |
|
"rewards/margins": 15.698193550109863, |
|
"rewards/rejected": -15.734827041625977, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.96943231441048, |
|
"grad_norm": 0.0032068293343592976, |
|
"learning_rate": 2.120214909634873e-06, |
|
"logits/chosen": -1.0517921447753906, |
|
"logits/rejected": -0.770795464515686, |
|
"logps/chosen": -298.4246520996094, |
|
"logps/rejected": -1795.1331787109375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.014761227183043957, |
|
"rewards/margins": 15.1719970703125, |
|
"rewards/rejected": -15.186758041381836, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.013100436681223, |
|
"grad_norm": 0.0011657499181104522, |
|
"learning_rate": 2.045035995962097e-06, |
|
"logits/chosen": -1.1193125247955322, |
|
"logits/rejected": -0.7743921279907227, |
|
"logps/chosen": -279.6126403808594, |
|
"logps/rejected": -1916.1976318359375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.046258725225925446, |
|
"rewards/margins": 16.126333236694336, |
|
"rewards/rejected": -16.172592163085938, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.056768558951965, |
|
"grad_norm": 0.005031019960938767, |
|
"learning_rate": 1.970280304707447e-06, |
|
"logits/chosen": -1.0536220073699951, |
|
"logits/rejected": -0.6994236707687378, |
|
"logps/chosen": -274.2372131347656, |
|
"logps/rejected": -1818.864013671875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.030473018065094948, |
|
"rewards/margins": 15.32104206085205, |
|
"rewards/rejected": -15.351513862609863, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.1004366812227073, |
|
"grad_norm": 0.0003812225537618957, |
|
"learning_rate": 1.8960173760634257e-06, |
|
"logits/chosen": -1.1193227767944336, |
|
"logits/rejected": -0.8343600034713745, |
|
"logps/chosen": -317.20367431640625, |
|
"logps/rejected": -1797.347900390625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.015081268735229969, |
|
"rewards/margins": 15.233189582824707, |
|
"rewards/rejected": -15.248270034790039, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.14410480349345, |
|
"grad_norm": 0.000800716771561942, |
|
"learning_rate": 1.8223162918386122e-06, |
|
"logits/chosen": -1.0911037921905518, |
|
"logits/rejected": -0.7263038158416748, |
|
"logps/chosen": -281.10723876953125, |
|
"logps/rejected": -1893.2275390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07366519421339035, |
|
"rewards/margins": 15.97996711730957, |
|
"rewards/rejected": -16.053630828857422, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.1877729257641922, |
|
"grad_norm": 0.0033435888991239673, |
|
"learning_rate": 1.7492456111955052e-06, |
|
"logits/chosen": -1.0024917125701904, |
|
"logits/rejected": -0.6330265998840332, |
|
"logps/chosen": -266.6875915527344, |
|
"logps/rejected": -1799.3509521484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05265193060040474, |
|
"rewards/margins": 15.307687759399414, |
|
"rewards/rejected": -15.255037307739258, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.2314410480349345, |
|
"grad_norm": 0.003033628662190874, |
|
"learning_rate": 1.6768733068745468e-06, |
|
"logits/chosen": -1.0493130683898926, |
|
"logits/rejected": -0.7165160775184631, |
|
"logps/chosen": -277.95294189453125, |
|
"logps/rejected": -1927.5621337890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.008759565651416779, |
|
"rewards/margins": 16.3278865814209, |
|
"rewards/rejected": -16.336645126342773, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.2751091703056767, |
|
"grad_norm": 0.00030739766784929366, |
|
"learning_rate": 1.6052667019636462e-06, |
|
"logits/chosen": -1.0858592987060547, |
|
"logits/rejected": -0.7486026883125305, |
|
"logps/chosen": -285.6336669921875, |
|
"logps/rejected": -1904.0074462890625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.039479680359363556, |
|
"rewards/margins": 15.980633735656738, |
|
"rewards/rejected": -16.02011489868164, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.3187772925764194, |
|
"grad_norm": 0.0099847141974217, |
|
"learning_rate": 1.5344924072720434e-06, |
|
"logits/chosen": -1.0522048473358154, |
|
"logits/rejected": -0.7434892058372498, |
|
"logps/chosen": -294.0987548828125, |
|
"logps/rejected": -1748.886962890625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06646230816841125, |
|
"rewards/margins": 14.771476745605469, |
|
"rewards/rejected": -14.837939262390137, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.3624454148471616, |
|
"grad_norm": 0.00046452138658813734, |
|
"learning_rate": 1.4646162593667535e-06, |
|
"logits/chosen": -1.0780597925186157, |
|
"logits/rejected": -0.8004827499389648, |
|
"logps/chosen": -309.17120361328125, |
|
"logps/rejected": -1823.7366943359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.043299149721860886, |
|
"rewards/margins": 15.575587272644043, |
|
"rewards/rejected": -15.532289505004883, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.406113537117904, |
|
"grad_norm": 0.0018748318848776301, |
|
"learning_rate": 1.3957032593292319e-06, |
|
"logits/chosen": -1.042887568473816, |
|
"logits/rejected": -0.6965152621269226, |
|
"logps/chosen": -282.84088134765625, |
|
"logps/rejected": -1889.387451171875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.013044556602835655, |
|
"rewards/margins": 15.971423149108887, |
|
"rewards/rejected": -15.958378791809082, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.449781659388646, |
|
"grad_norm": 0.0009191121751634733, |
|
"learning_rate": 1.3278175122892416e-06, |
|
"logits/chosen": -1.029362678527832, |
|
"logits/rejected": -0.6999324560165405, |
|
"logps/chosen": -269.5941467285156, |
|
"logps/rejected": -1812.1949462890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10188305377960205, |
|
"rewards/margins": 15.274584770202637, |
|
"rewards/rejected": -15.376466751098633, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.493449781659389, |
|
"grad_norm": 0.003126132491902847, |
|
"learning_rate": 1.261022167792161e-06, |
|
"logits/chosen": -1.0470654964447021, |
|
"logits/rejected": -0.6794711351394653, |
|
"logps/chosen": -278.49285888671875, |
|
"logps/rejected": -1961.002197265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.009503474459052086, |
|
"rewards/margins": 16.650577545166016, |
|
"rewards/rejected": -16.641077041625977, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.537117903930131, |
|
"grad_norm": 0.0059086305628955095, |
|
"learning_rate": 1.195379361055209e-06, |
|
"logits/chosen": -0.9826908111572266, |
|
"logits/rejected": -0.6099307537078857, |
|
"logps/chosen": -278.61492919921875, |
|
"logps/rejected": -1975.528076171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03355104476213455, |
|
"rewards/margins": 16.752765655517578, |
|
"rewards/rejected": -16.78631591796875, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.5807860262008733, |
|
"grad_norm": 0.003101839804676522, |
|
"learning_rate": 1.1309501551672303e-06, |
|
"logits/chosen": -0.957851231098175, |
|
"logits/rejected": -0.5697217583656311, |
|
"logps/chosen": -272.66534423828125, |
|
"logps/rejected": -1837.3662109375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07275199890136719, |
|
"rewards/margins": 15.55827522277832, |
|
"rewards/rejected": -15.631025314331055, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.6244541484716155, |
|
"grad_norm": 0.006553990432039902, |
|
"learning_rate": 1.0677944842858112e-06, |
|
"logits/chosen": -1.0141226053237915, |
|
"logits/rejected": -0.6257001757621765, |
|
"logps/chosen": -280.70672607421875, |
|
"logps/rejected": -1861.9078369140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05027519538998604, |
|
"rewards/margins": 15.878219604492188, |
|
"rewards/rejected": -15.928494453430176, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.668122270742358, |
|
"grad_norm": 0.0003416983892558372, |
|
"learning_rate": 1.005971097884561e-06, |
|
"logits/chosen": -1.0156476497650146, |
|
"logits/rejected": -0.668128252029419, |
|
"logps/chosen": -302.67767333984375, |
|
"logps/rejected": -1935.9398193359375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08607998490333557, |
|
"rewards/margins": 16.394611358642578, |
|
"rewards/rejected": -16.480690002441406, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.7117903930131004, |
|
"grad_norm": 0.0015327333742183899, |
|
"learning_rate": 9.455375061024319e-07, |
|
"logits/chosen": -1.0618703365325928, |
|
"logits/rejected": -0.7237562537193298, |
|
"logps/chosen": -283.15557861328125, |
|
"logps/rejected": -1865.9681396484375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.054165177047252655, |
|
"rewards/margins": 15.781875610351562, |
|
"rewards/rejected": -15.836041450500488, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.7554585152838427, |
|
"grad_norm": 0.004269773041594158, |
|
"learning_rate": 8.86549926245898e-07, |
|
"logits/chosen": -1.060253381729126, |
|
"logits/rejected": -0.6907247304916382, |
|
"logps/chosen": -279.11090087890625, |
|
"logps/rejected": -1889.4056396484375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0650867372751236, |
|
"rewards/margins": 15.91883659362793, |
|
"rewards/rejected": -15.983922958374023, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.7991266375545854, |
|
"grad_norm": 0.0010427555714299119, |
|
"learning_rate": 8.29063230493779e-07, |
|
"logits/chosen": -1.0654280185699463, |
|
"logits/rejected": -0.6975089907646179, |
|
"logps/chosen": -275.31011962890625, |
|
"logps/rejected": -1959.466796875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10659889876842499, |
|
"rewards/margins": 16.469921112060547, |
|
"rewards/rejected": -16.576520919799805, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.8427947598253276, |
|
"grad_norm": 0.0051211999521691845, |
|
"learning_rate": 7.731308948533431e-07, |
|
"logits/chosen": -1.0260651111602783, |
|
"logits/rejected": -0.7374225854873657, |
|
"logps/chosen": -309.5072937011719, |
|
"logps/rejected": -1826.8656005859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08689963072538376, |
|
"rewards/margins": 15.4946928024292, |
|
"rewards/rejected": -15.58159351348877, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.88646288209607, |
|
"grad_norm": 0.002864032933131896, |
|
"learning_rate": 7.188049494151703e-07, |
|
"logits/chosen": -0.9869282841682434, |
|
"logits/rejected": -0.6170369386672974, |
|
"logps/chosen": -270.72308349609375, |
|
"logps/rejected": -1809.623046875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03863278776407242, |
|
"rewards/margins": 15.385714530944824, |
|
"rewards/rejected": -15.424346923828125, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.930131004366812, |
|
"grad_norm": 0.0005105250492242013, |
|
"learning_rate": 6.661359299530626e-07, |
|
"logits/chosen": -1.0458041429519653, |
|
"logits/rejected": -0.8353503346443176, |
|
"logps/chosen": -325.7237548828125, |
|
"logps/rejected": -1614.521484375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15461190044879913, |
|
"rewards/margins": 13.781686782836914, |
|
"rewards/rejected": -13.936299324035645, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.9737991266375547, |
|
"grad_norm": 0.0019150602563612286, |
|
"learning_rate": 6.151728309140071e-07, |
|
"logits/chosen": -1.0450153350830078, |
|
"logits/rejected": -0.7124982476234436, |
|
"logps/chosen": -285.0978698730469, |
|
"logps/rejected": -1913.976318359375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.026266271248459816, |
|
"rewards/margins": 16.297687530517578, |
|
"rewards/rejected": -16.32395362854004, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.0174672489082965, |
|
"grad_norm": 0.003658964581230378, |
|
"learning_rate": 5.659630598419391e-07, |
|
"logits/chosen": -1.0644596815109253, |
|
"logits/rejected": -0.6967732906341553, |
|
"logps/chosen": -284.8916320800781, |
|
"logps/rejected": -1840.668212890625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.04237327724695206, |
|
"rewards/margins": 15.650690078735352, |
|
"rewards/rejected": -15.69306468963623, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.06113537117904, |
|
"grad_norm": 0.015359415097265612, |
|
"learning_rate": 5.185523932776923e-07, |
|
"logits/chosen": -1.0555784702301025, |
|
"logits/rejected": -0.7345298528671265, |
|
"logps/chosen": -284.2557067871094, |
|
"logps/rejected": -1905.572509765625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.04300800338387489, |
|
"rewards/margins": 16.220272064208984, |
|
"rewards/rejected": -16.26327896118164, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.104803493449782, |
|
"grad_norm": 0.0043225257316477665, |
|
"learning_rate": 4.7298493417616024e-07, |
|
"logits/chosen": -1.024458646774292, |
|
"logits/rejected": -0.7369360327720642, |
|
"logps/chosen": -297.3896484375, |
|
"logps/rejected": -1880.3125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06699924170970917, |
|
"rewards/margins": 15.987092971801758, |
|
"rewards/rejected": -16.05409049987793, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.148471615720524, |
|
"grad_norm": 0.0007120346767669291, |
|
"learning_rate": 4.293030708802834e-07, |
|
"logits/chosen": -1.0639699697494507, |
|
"logits/rejected": -0.7229939699172974, |
|
"logps/chosen": -270.7474060058594, |
|
"logps/rejected": -1974.702392578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.030464956536889076, |
|
"rewards/margins": 16.875165939331055, |
|
"rewards/rejected": -16.84469985961914, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.192139737991266, |
|
"grad_norm": 0.0004859166750622356, |
|
"learning_rate": 3.875474376900254e-07, |
|
"logits/chosen": -0.9850033521652222, |
|
"logits/rejected": -0.6558709144592285, |
|
"logps/chosen": -287.7508544921875, |
|
"logps/rejected": -1824.685791015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10610561072826385, |
|
"rewards/margins": 15.522783279418945, |
|
"rewards/rejected": -15.628889083862305, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.235807860262009, |
|
"grad_norm": 0.002799456551978829, |
|
"learning_rate": 3.4775687706301437e-07, |
|
"logits/chosen": -1.1057827472686768, |
|
"logits/rejected": -0.7324444055557251, |
|
"logps/chosen": -268.9285583496094, |
|
"logps/rejected": -2027.9287109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.012243424542248249, |
|
"rewards/margins": 17.31172752380371, |
|
"rewards/rejected": -17.299488067626953, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.279475982532751, |
|
"grad_norm": 0.0022373731136834133, |
|
"learning_rate": 3.0996840348201717e-07, |
|
"logits/chosen": -1.1121981143951416, |
|
"logits/rejected": -0.8360017538070679, |
|
"logps/chosen": -309.40155029296875, |
|
"logps/rejected": -1771.3427734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.12068891525268555, |
|
"rewards/margins": 14.9802827835083, |
|
"rewards/rejected": -15.100972175598145, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.323144104803493, |
|
"grad_norm": 0.00044941977383980663, |
|
"learning_rate": 2.742171690228562e-07, |
|
"logits/chosen": -1.0458042621612549, |
|
"logits/rejected": -0.7098134756088257, |
|
"logps/chosen": -300.3705749511719, |
|
"logps/rejected": -1846.2822265625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.13983380794525146, |
|
"rewards/margins": 15.580610275268555, |
|
"rewards/rejected": -15.720443725585938, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.366812227074236, |
|
"grad_norm": 0.002386856484501028, |
|
"learning_rate": 2.405364306547955e-07, |
|
"logits/chosen": -1.082747459411621, |
|
"logits/rejected": -0.8248895406723022, |
|
"logps/chosen": -306.8914794921875, |
|
"logps/rejected": -1777.709228515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0018977939616888762, |
|
"rewards/margins": 15.146817207336426, |
|
"rewards/rejected": -15.148715019226074, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.4104803493449785, |
|
"grad_norm": 0.0029792452832107918, |
|
"learning_rate": 2.0895751930382125e-07, |
|
"logits/chosen": -1.0091092586517334, |
|
"logits/rejected": -0.6796575784683228, |
|
"logps/chosen": -295.8236389160156, |
|
"logps/rejected": -1882.607421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.007821875624358654, |
|
"rewards/margins": 16.001935958862305, |
|
"rewards/rejected": -16.00975799560547, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.454148471615721, |
|
"grad_norm": 0.0002768349770005794, |
|
"learning_rate": 1.7950981070758488e-07, |
|
"logits/chosen": -1.0478919744491577, |
|
"logits/rejected": -0.7594279050827026, |
|
"logps/chosen": -287.52288818359375, |
|
"logps/rejected": -1790.5562744140625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0713457316160202, |
|
"rewards/margins": 15.24449634552002, |
|
"rewards/rejected": -15.315841674804688, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.497816593886463, |
|
"grad_norm": 0.005069342731202753, |
|
"learning_rate": 1.5222069808913303e-07, |
|
"logits/chosen": -1.0174686908721924, |
|
"logits/rejected": -0.683228611946106, |
|
"logps/chosen": -301.92144775390625, |
|
"logps/rejected": -1872.2666015625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.04044654220342636, |
|
"rewards/margins": 15.973681449890137, |
|
"rewards/rejected": -16.01412582397461, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 4.541484716157205, |
|
"grad_norm": 0.0004959573429988225, |
|
"learning_rate": 1.271155666748311e-07, |
|
"logits/chosen": -1.0908715724945068, |
|
"logits/rejected": -0.7660378217697144, |
|
"logps/chosen": -306.63043212890625, |
|
"logps/rejected": -1871.072509765625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09966543316841125, |
|
"rewards/margins": 15.866052627563477, |
|
"rewards/rejected": -15.965716361999512, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 4.585152838427947, |
|
"grad_norm": 0.00019462022413360382, |
|
"learning_rate": 1.0421777008019663e-07, |
|
"logits/chosen": -1.0952246189117432, |
|
"logits/rejected": -0.709916353225708, |
|
"logps/chosen": -295.17877197265625, |
|
"logps/rejected": -2033.247314453125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07402383536100388, |
|
"rewards/margins": 17.205215454101562, |
|
"rewards/rejected": -17.279239654541016, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.62882096069869, |
|
"grad_norm": 0.0036632249464871505, |
|
"learning_rate": 8.354860858560021e-08, |
|
"logits/chosen": -1.041784405708313, |
|
"logits/rejected": -0.6548532247543335, |
|
"logps/chosen": -293.025146484375, |
|
"logps/rejected": -2029.672607421875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0596146360039711, |
|
"rewards/margins": 17.3281307220459, |
|
"rewards/rejected": -17.387744903564453, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 4.672489082969433, |
|
"grad_norm": 0.00032895380717983924, |
|
"learning_rate": 6.512730932204698e-08, |
|
"logits/chosen": -1.0059609413146973, |
|
"logits/rejected": -0.7132547497749329, |
|
"logps/chosen": -317.78680419921875, |
|
"logps/rejected": -1786.2691650390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03703427314758301, |
|
"rewards/margins": 15.275805473327637, |
|
"rewards/rejected": -15.312841415405273, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.716157205240175, |
|
"grad_norm": 0.0031606785955010904, |
|
"learning_rate": 4.897100838547081e-08, |
|
"logits/chosen": -1.0450844764709473, |
|
"logits/rejected": -0.7194000482559204, |
|
"logps/chosen": -301.86541748046875, |
|
"logps/rejected": -1955.65625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05521082133054733, |
|
"rewards/margins": 16.574329376220703, |
|
"rewards/rejected": -16.62953758239746, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.759825327510917, |
|
"grad_norm": 0.000347629698955204, |
|
"learning_rate": 3.5094734896174985e-08, |
|
"logits/chosen": -1.080841302871704, |
|
"logits/rejected": -0.6831713914871216, |
|
"logps/chosen": -268.0194091796875, |
|
"logps/rejected": -1998.539306640625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.04163534566760063, |
|
"rewards/margins": 16.94226837158203, |
|
"rewards/rejected": -16.98390007019043, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 4.8034934497816595, |
|
"grad_norm": 0.004295417718120041, |
|
"learning_rate": 2.351139701825267e-08, |
|
"logits/chosen": -0.9642395973205566, |
|
"logits/rejected": -0.6139574646949768, |
|
"logps/chosen": -276.0238037109375, |
|
"logps/rejected": -1780.542236328125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0755363255739212, |
|
"rewards/margins": 15.09814167022705, |
|
"rewards/rejected": -15.173677444458008, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.847161572052402, |
|
"grad_norm": 0.00037246559243259353, |
|
"learning_rate": 1.4231769951990326e-08, |
|
"logits/chosen": -1.033734679222107, |
|
"logits/rejected": -0.6827409267425537, |
|
"logps/chosen": -287.67584228515625, |
|
"logps/rejected": -1885.5576171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08407995104789734, |
|
"rewards/margins": 16.050437927246094, |
|
"rewards/rejected": -16.134517669677734, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 4.890829694323144, |
|
"grad_norm": 0.0003852961387970997, |
|
"learning_rate": 7.264485910423447e-09, |
|
"logits/chosen": -0.9720686078071594, |
|
"logits/rejected": -0.5935501456260681, |
|
"logps/chosen": -265.44622802734375, |
|
"logps/rejected": -1915.1038818359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.024737151339650154, |
|
"rewards/margins": 16.284820556640625, |
|
"rewards/rejected": -16.309558868408203, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 4.934497816593886, |
|
"grad_norm": 0.015272373299215733, |
|
"learning_rate": 2.6160260893692833e-09, |
|
"logits/chosen": -1.0101922750473022, |
|
"logits/rejected": -0.6879107356071472, |
|
"logps/chosen": -289.36138916015625, |
|
"logps/rejected": -1796.4605712890625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.028253447264432907, |
|
"rewards/margins": 15.371627807617188, |
|
"rewards/rejected": -15.34337329864502, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 4.978165938864628, |
|
"grad_norm": 0.0002424398537573296, |
|
"learning_rate": 2.9071463840540936e-10, |
|
"logits/chosen": -1.0956311225891113, |
|
"logits/rejected": -0.8314415216445923, |
|
"logps/chosen": -341.66766357421875, |
|
"logps/rejected": -1909.0863037109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11481740325689316, |
|
"rewards/margins": 16.255878448486328, |
|
"rewards/rejected": -16.370695114135742, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 1145, |
|
"total_flos": 0.0, |
|
"train_loss": 0.046525747915877304, |
|
"train_runtime": 10909.6849, |
|
"train_samples_per_second": 6.711, |
|
"train_steps_per_second": 0.105 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1145, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|