|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9946666666666668, |
|
"eval_steps": 1000, |
|
"global_step": 374, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005333333333333333, |
|
"grad_norm": 0.5672486208114706, |
|
"learning_rate": 1.3157894736842107e-07, |
|
"logits/chosen": -0.9279001951217651, |
|
"logits/rejected": -0.858139157295227, |
|
"logps/chosen": -227.95245361328125, |
|
"logps/rejected": -298.680908203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05333333333333334, |
|
"grad_norm": 0.627338544388044, |
|
"learning_rate": 1.3157894736842106e-06, |
|
"logits/chosen": -1.0396056175231934, |
|
"logits/rejected": -1.0286777019500732, |
|
"logps/chosen": -272.0198974609375, |
|
"logps/rejected": -275.8685302734375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 0.0009284570114687085, |
|
"rewards/margins": 0.001860518823377788, |
|
"rewards/rejected": -0.0009320618119090796, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.10666666666666667, |
|
"grad_norm": 0.6142473047006762, |
|
"learning_rate": 2.631578947368421e-06, |
|
"logits/chosen": -0.9923893809318542, |
|
"logits/rejected": -1.010837197303772, |
|
"logps/chosen": -281.47979736328125, |
|
"logps/rejected": -268.1535949707031, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.006842092610895634, |
|
"rewards/margins": 0.010892460122704506, |
|
"rewards/rejected": -0.0040503679774701595, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.7939072580410426, |
|
"learning_rate": 3.947368421052632e-06, |
|
"logits/chosen": -1.0665647983551025, |
|
"logits/rejected": -1.030176043510437, |
|
"logps/chosen": -265.56134033203125, |
|
"logps/rejected": -276.7889404296875, |
|
"loss": 0.658, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03331710770726204, |
|
"rewards/margins": 0.06989365816116333, |
|
"rewards/rejected": -0.03657654672861099, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.21333333333333335, |
|
"grad_norm": 1.582486884512004, |
|
"learning_rate": 4.999562902281866e-06, |
|
"logits/chosen": -1.0957633256912231, |
|
"logits/rejected": -1.0948419570922852, |
|
"logps/chosen": -266.51983642578125, |
|
"logps/rejected": -322.9562072753906, |
|
"loss": 0.5339, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11751838773488998, |
|
"rewards/margins": 0.3626277446746826, |
|
"rewards/rejected": -0.24510934948921204, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.8169663169771063, |
|
"learning_rate": 4.984280524733107e-06, |
|
"logits/chosen": -1.1005247831344604, |
|
"logits/rejected": -1.1034621000289917, |
|
"logps/chosen": -270.61724853515625, |
|
"logps/rejected": -416.21142578125, |
|
"loss": 0.2383, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.15088006854057312, |
|
"rewards/margins": 1.5602823495864868, |
|
"rewards/rejected": -1.4094021320343018, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3705635446867794, |
|
"learning_rate": 4.947295864744121e-06, |
|
"logits/chosen": -1.1092312335968018, |
|
"logits/rejected": -1.0661927461624146, |
|
"logps/chosen": -294.2104797363281, |
|
"logps/rejected": -600.4303588867188, |
|
"loss": 0.0671, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08243656903505325, |
|
"rewards/margins": 3.372529983520508, |
|
"rewards/rejected": -3.2900936603546143, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.37333333333333335, |
|
"grad_norm": 0.09049040384751605, |
|
"learning_rate": 4.8889320144653525e-06, |
|
"logits/chosen": -1.0999512672424316, |
|
"logits/rejected": -0.9580685496330261, |
|
"logps/chosen": -330.55194091796875, |
|
"logps/rejected": -985.8513793945312, |
|
"loss": 0.0161, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5064759254455566, |
|
"rewards/margins": 6.231157302856445, |
|
"rewards/rejected": -6.73763370513916, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 0.03915438076032923, |
|
"learning_rate": 4.809698831278217e-06, |
|
"logits/chosen": -0.9111706018447876, |
|
"logits/rejected": -0.7140064835548401, |
|
"logps/chosen": -339.89349365234375, |
|
"logps/rejected": -1143.648681640625, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.66388338804245, |
|
"rewards/margins": 7.908216953277588, |
|
"rewards/rejected": -8.572099685668945, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.14297159038439752, |
|
"learning_rate": 4.710288483761524e-06, |
|
"logits/chosen": -0.8663455247879028, |
|
"logits/rejected": -0.5593339800834656, |
|
"logps/chosen": -336.32037353515625, |
|
"logps/rejected": -1434.2740478515625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8545030355453491, |
|
"rewards/margins": 10.544784545898438, |
|
"rewards/rejected": -11.399286270141602, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.040959450492445315, |
|
"learning_rate": 4.59156940501605e-06, |
|
"logits/chosen": -0.9001060724258423, |
|
"logits/rejected": -0.6381432414054871, |
|
"logps/chosen": -347.5130310058594, |
|
"logps/rejected": -1511.1595458984375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7411862015724182, |
|
"rewards/margins": 11.320222854614258, |
|
"rewards/rejected": -12.061409950256348, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5866666666666667, |
|
"grad_norm": 0.010465140313811594, |
|
"learning_rate": 4.454578706170075e-06, |
|
"logits/chosen": -0.8447334170341492, |
|
"logits/rejected": -0.5461128950119019, |
|
"logps/chosen": -324.555908203125, |
|
"logps/rejected": -1563.4332275390625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7364819645881653, |
|
"rewards/margins": 11.867830276489258, |
|
"rewards/rejected": -12.604310989379883, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.018698800124617214, |
|
"learning_rate": 4.300513116340317e-06, |
|
"logits/chosen": -0.869040846824646, |
|
"logits/rejected": -0.6451767086982727, |
|
"logps/chosen": -368.4622497558594, |
|
"logps/rejected": -1524.347412109375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5922040939331055, |
|
"rewards/margins": 11.815667152404785, |
|
"rewards/rejected": -12.407870292663574, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6933333333333334, |
|
"grad_norm": 0.13074428382691303, |
|
"learning_rate": 4.130718528195303e-06, |
|
"logits/chosen": -0.7969690561294556, |
|
"logits/rejected": -0.5475348234176636, |
|
"logps/chosen": -344.4666442871094, |
|
"logps/rejected": -1474.3719482421875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5560614466667175, |
|
"rewards/margins": 11.487146377563477, |
|
"rewards/rejected": -12.043208122253418, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7466666666666667, |
|
"grad_norm": 0.007599436316894573, |
|
"learning_rate": 3.946678240449515e-06, |
|
"logits/chosen": -0.8450958132743835, |
|
"logits/rejected": -0.6068762540817261, |
|
"logps/chosen": -330.353271484375, |
|
"logps/rejected": -1493.345947265625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6169044375419617, |
|
"rewards/margins": 11.569721221923828, |
|
"rewards/rejected": -12.186625480651855, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.007906481570741206, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -0.7317169308662415, |
|
"logits/rejected": -0.472684770822525, |
|
"logps/chosen": -334.15936279296875, |
|
"logps/rejected": -1581.9859619140625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6376131176948547, |
|
"rewards/margins": 12.395392417907715, |
|
"rewards/rejected": -13.03300666809082, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 0.005373942509470849, |
|
"learning_rate": 3.542401956903321e-06, |
|
"logits/chosen": -0.802183985710144, |
|
"logits/rejected": -0.517475962638855, |
|
"logps/chosen": -346.039306640625, |
|
"logps/rejected": -1731.537841796875, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6884077787399292, |
|
"rewards/margins": 13.671854972839355, |
|
"rewards/rejected": -14.360262870788574, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9066666666666666, |
|
"grad_norm": 0.006855807463409515, |
|
"learning_rate": 3.3256976548879183e-06, |
|
"logits/chosen": -0.7976305484771729, |
|
"logits/rejected": -0.48461779952049255, |
|
"logps/chosen": -332.21539306640625, |
|
"logps/rejected": -1720.519775390625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6103914380073547, |
|
"rewards/margins": 13.717634201049805, |
|
"rewards/rejected": -14.328027725219727, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.006530794838523059, |
|
"learning_rate": 3.1017801885224332e-06, |
|
"logits/chosen": -0.8089723587036133, |
|
"logits/rejected": -0.547804594039917, |
|
"logps/chosen": -331.06561279296875, |
|
"logps/rejected": -1655.431640625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4778788685798645, |
|
"rewards/margins": 13.217842102050781, |
|
"rewards/rejected": -13.695721626281738, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0133333333333334, |
|
"grad_norm": 0.002402189687585693, |
|
"learning_rate": 2.872605665440436e-06, |
|
"logits/chosen": -0.8274615406990051, |
|
"logits/rejected": -0.6256132125854492, |
|
"logps/chosen": -343.0153503417969, |
|
"logps/rejected": -1610.4468994140625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.47418123483657837, |
|
"rewards/margins": 12.88371753692627, |
|
"rewards/rejected": -13.357897758483887, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 0.0030571071844198616, |
|
"learning_rate": 2.6401761180929798e-06, |
|
"logits/chosen": -0.8143685460090637, |
|
"logits/rejected": -0.5041629076004028, |
|
"logps/chosen": -334.1429138183594, |
|
"logps/rejected": -1818.0869140625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.558273434638977, |
|
"rewards/margins": 14.656936645507812, |
|
"rewards/rejected": -15.2152099609375, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.00820790094406569, |
|
"learning_rate": 2.4065220143091863e-06, |
|
"logits/chosen": -0.8139235377311707, |
|
"logits/rejected": -0.564848780632019, |
|
"logps/chosen": -345.9544982910156, |
|
"logps/rejected": -1764.953125, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.475874662399292, |
|
"rewards/margins": 14.230936050415039, |
|
"rewards/rejected": -14.706808090209961, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.1733333333333333, |
|
"grad_norm": 0.0043868434266810755, |
|
"learning_rate": 2.173684519449872e-06, |
|
"logits/chosen": -0.7210798263549805, |
|
"logits/rejected": -0.36144906282424927, |
|
"logps/chosen": -327.31622314453125, |
|
"logps/rejected": -1841.2216796875, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6532616019248962, |
|
"rewards/margins": 14.736944198608398, |
|
"rewards/rejected": -15.39020824432373, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.2266666666666666, |
|
"grad_norm": 0.0068525897764614785, |
|
"learning_rate": 1.9436976651092143e-06, |
|
"logits/chosen": -0.7221536636352539, |
|
"logits/rejected": -0.5240283012390137, |
|
"logps/chosen": -350.7161865234375, |
|
"logps/rejected": -1619.64599609375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.45207110047340393, |
|
"rewards/margins": 13.1558198928833, |
|
"rewards/rejected": -13.607892990112305, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.0033954692926207583, |
|
"learning_rate": 1.7185705801358892e-06, |
|
"logits/chosen": -0.8645750880241394, |
|
"logits/rejected": -0.6266194581985474, |
|
"logps/chosen": -343.2956848144531, |
|
"logps/rejected": -1709.9945068359375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.46298861503601074, |
|
"rewards/margins": 13.744463920593262, |
|
"rewards/rejected": -14.2074556350708, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.008787649845585386, |
|
"learning_rate": 1.500269939200648e-06, |
|
"logits/chosen": -0.7889136075973511, |
|
"logits/rejected": -0.5392887592315674, |
|
"logps/chosen": -342.34405517578125, |
|
"logps/rejected": -1703.201171875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.380669504404068, |
|
"rewards/margins": 13.953561782836914, |
|
"rewards/rejected": -14.334230422973633, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3866666666666667, |
|
"grad_norm": 0.0019219492245800027, |
|
"learning_rate": 1.2907027822369006e-06, |
|
"logits/chosen": -0.8140700459480286, |
|
"logits/rejected": -0.5784817337989807, |
|
"logps/chosen": -325.9125061035156, |
|
"logps/rejected": -1678.7923583984375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.44779616594314575, |
|
"rewards/margins": 13.65271282196045, |
|
"rewards/rejected": -14.100509643554688, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.0029707873083702468, |
|
"learning_rate": 1.0916998548409449e-06, |
|
"logits/chosen": -0.7845500111579895, |
|
"logits/rejected": -0.5012301206588745, |
|
"logps/chosen": -358.30419921875, |
|
"logps/rejected": -1745.314697265625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6766600608825684, |
|
"rewards/margins": 13.99413013458252, |
|
"rewards/rejected": -14.67078971862793, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.4933333333333334, |
|
"grad_norm": 0.003009319728743961, |
|
"learning_rate": 9.04999615167479e-07, |
|
"logits/chosen": -0.8230724334716797, |
|
"logits/rejected": -0.550376832485199, |
|
"logps/chosen": -346.7623596191406, |
|
"logps/rejected": -1745.7318115234375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5431645512580872, |
|
"rewards/margins": 14.117253303527832, |
|
"rewards/rejected": -14.660417556762695, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.5466666666666666, |
|
"grad_norm": 0.05446004244419421, |
|
"learning_rate": 7.322330470336314e-07, |
|
"logits/chosen": -0.8042120933532715, |
|
"logits/rejected": -0.4271600842475891, |
|
"logps/chosen": -319.5382080078125, |
|
"logps/rejected": -1869.0816650390625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6211446523666382, |
|
"rewards/margins": 15.130853652954102, |
|
"rewards/rejected": -15.751996994018555, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.0026251482158599366, |
|
"learning_rate": 5.749094119018431e-07, |
|
"logits/chosen": -0.8732158541679382, |
|
"logits/rejected": -0.5424922704696655, |
|
"logps/chosen": -324.2518005371094, |
|
"logps/rejected": -1899.756591796875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.514877438545227, |
|
"rewards/margins": 15.413823127746582, |
|
"rewards/rejected": -15.928703308105469, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6533333333333333, |
|
"grad_norm": 0.0023975764198324104, |
|
"learning_rate": 4.344030642100133e-07, |
|
"logits/chosen": -0.8402504920959473, |
|
"logits/rejected": -0.5483088493347168, |
|
"logps/chosen": -330.42828369140625, |
|
"logps/rejected": -1794.327392578125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4678085446357727, |
|
"rewards/margins": 14.648382186889648, |
|
"rewards/rejected": -15.116189956665039, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.7066666666666666, |
|
"grad_norm": 0.0028734478026904357, |
|
"learning_rate": 3.119414452281158e-07, |
|
"logits/chosen": -0.8355986475944519, |
|
"logits/rejected": -0.5001510977745056, |
|
"logps/chosen": -328.2898254394531, |
|
"logps/rejected": -1902.9224853515625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.504935622215271, |
|
"rewards/margins": 15.536231994628906, |
|
"rewards/rejected": -16.041166305541992, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.002690221439778056, |
|
"learning_rate": 2.0859436032505954e-07, |
|
"logits/chosen": -0.896633505821228, |
|
"logits/rejected": -0.6399273872375488, |
|
"logps/chosen": -357.3143615722656, |
|
"logps/rejected": -1739.0318603515625, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.39237022399902344, |
|
"rewards/margins": 14.242953300476074, |
|
"rewards/rejected": -14.635324478149414, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.8133333333333335, |
|
"grad_norm": 0.002170040138657762, |
|
"learning_rate": 1.2526463331788503e-07, |
|
"logits/chosen": -0.847479522228241, |
|
"logits/rejected": -0.6152299642562866, |
|
"logps/chosen": -348.79742431640625, |
|
"logps/rejected": -1842.7880859375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.48484006524086, |
|
"rewards/margins": 15.058262825012207, |
|
"rewards/rejected": -15.54310131072998, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.8666666666666667, |
|
"grad_norm": 0.003300394252196583, |
|
"learning_rate": 6.268021954544095e-08, |
|
"logits/chosen": -0.8356849551200867, |
|
"logits/rejected": -0.4748550355434418, |
|
"logps/chosen": -336.01373291015625, |
|
"logps/rejected": -1930.8939208984375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5835164785385132, |
|
"rewards/margins": 15.679295539855957, |
|
"rewards/rejected": -16.2628116607666, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.0026600066446259086, |
|
"learning_rate": 2.1387846565474047e-08, |
|
"logits/chosen": -0.8271343111991882, |
|
"logits/rejected": -0.5607911348342896, |
|
"logps/chosen": -359.8587341308594, |
|
"logps/rejected": -1746.127685546875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4917505383491516, |
|
"rewards/margins": 14.259208679199219, |
|
"rewards/rejected": -14.750958442687988, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.9733333333333334, |
|
"grad_norm": 0.014374372097938156, |
|
"learning_rate": 1.7482380290034795e-09, |
|
"logits/chosen": -0.807357668876648, |
|
"logits/rejected": -0.4851298928260803, |
|
"logps/chosen": -321.13861083984375, |
|
"logps/rejected": -1839.5152587890625, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5102803111076355, |
|
"rewards/margins": 15.034269332885742, |
|
"rewards/rejected": -15.544550895690918, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.9946666666666668, |
|
"step": 374, |
|
"total_flos": 0.0, |
|
"train_loss": 0.07791340151829097, |
|
"train_runtime": 6908.9348, |
|
"train_samples_per_second": 3.474, |
|
"train_steps_per_second": 0.054 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 374, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|