Qin Liu
Model save
ba0be3a verified
raw
history blame
20.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9946666666666668,
"eval_steps": 1000,
"global_step": 374,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005333333333333333,
"grad_norm": 0.5672486208114706,
"learning_rate": 1.3157894736842107e-07,
"logits/chosen": -0.9279001951217651,
"logits/rejected": -0.858139157295227,
"logps/chosen": -227.95245361328125,
"logps/rejected": -298.680908203125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.05333333333333334,
"grad_norm": 0.627338544388044,
"learning_rate": 1.3157894736842106e-06,
"logits/chosen": -1.0396056175231934,
"logits/rejected": -1.0286777019500732,
"logps/chosen": -272.0198974609375,
"logps/rejected": -275.8685302734375,
"loss": 0.693,
"rewards/accuracies": 0.5416666865348816,
"rewards/chosen": 0.0009284570114687085,
"rewards/margins": 0.001860518823377788,
"rewards/rejected": -0.0009320618119090796,
"step": 10
},
{
"epoch": 0.10666666666666667,
"grad_norm": 0.6142473047006762,
"learning_rate": 2.631578947368421e-06,
"logits/chosen": -0.9923893809318542,
"logits/rejected": -1.010837197303772,
"logps/chosen": -281.47979736328125,
"logps/rejected": -268.1535949707031,
"loss": 0.6876,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.006842092610895634,
"rewards/margins": 0.010892460122704506,
"rewards/rejected": -0.0040503679774701595,
"step": 20
},
{
"epoch": 0.16,
"grad_norm": 0.7939072580410426,
"learning_rate": 3.947368421052632e-06,
"logits/chosen": -1.0665647983551025,
"logits/rejected": -1.030176043510437,
"logps/chosen": -265.56134033203125,
"logps/rejected": -276.7889404296875,
"loss": 0.658,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.03331710770726204,
"rewards/margins": 0.06989365816116333,
"rewards/rejected": -0.03657654672861099,
"step": 30
},
{
"epoch": 0.21333333333333335,
"grad_norm": 1.582486884512004,
"learning_rate": 4.999562902281866e-06,
"logits/chosen": -1.0957633256912231,
"logits/rejected": -1.0948419570922852,
"logps/chosen": -266.51983642578125,
"logps/rejected": -322.9562072753906,
"loss": 0.5339,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.11751838773488998,
"rewards/margins": 0.3626277446746826,
"rewards/rejected": -0.24510934948921204,
"step": 40
},
{
"epoch": 0.26666666666666666,
"grad_norm": 0.8169663169771063,
"learning_rate": 4.984280524733107e-06,
"logits/chosen": -1.1005247831344604,
"logits/rejected": -1.1034621000289917,
"logps/chosen": -270.61724853515625,
"logps/rejected": -416.21142578125,
"loss": 0.2383,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.15088006854057312,
"rewards/margins": 1.5602823495864868,
"rewards/rejected": -1.4094021320343018,
"step": 50
},
{
"epoch": 0.32,
"grad_norm": 0.3705635446867794,
"learning_rate": 4.947295864744121e-06,
"logits/chosen": -1.1092312335968018,
"logits/rejected": -1.0661927461624146,
"logps/chosen": -294.2104797363281,
"logps/rejected": -600.4303588867188,
"loss": 0.0671,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.08243656903505325,
"rewards/margins": 3.372529983520508,
"rewards/rejected": -3.2900936603546143,
"step": 60
},
{
"epoch": 0.37333333333333335,
"grad_norm": 0.09049040384751605,
"learning_rate": 4.8889320144653525e-06,
"logits/chosen": -1.0999512672424316,
"logits/rejected": -0.9580685496330261,
"logps/chosen": -330.55194091796875,
"logps/rejected": -985.8513793945312,
"loss": 0.0161,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5064759254455566,
"rewards/margins": 6.231157302856445,
"rewards/rejected": -6.73763370513916,
"step": 70
},
{
"epoch": 0.4266666666666667,
"grad_norm": 0.03915438076032923,
"learning_rate": 4.809698831278217e-06,
"logits/chosen": -0.9111706018447876,
"logits/rejected": -0.7140064835548401,
"logps/chosen": -339.89349365234375,
"logps/rejected": -1143.648681640625,
"loss": 0.0051,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.66388338804245,
"rewards/margins": 7.908216953277588,
"rewards/rejected": -8.572099685668945,
"step": 80
},
{
"epoch": 0.48,
"grad_norm": 0.14297159038439752,
"learning_rate": 4.710288483761524e-06,
"logits/chosen": -0.8663455247879028,
"logits/rejected": -0.5593339800834656,
"logps/chosen": -336.32037353515625,
"logps/rejected": -1434.2740478515625,
"loss": 0.0022,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8545030355453491,
"rewards/margins": 10.544784545898438,
"rewards/rejected": -11.399286270141602,
"step": 90
},
{
"epoch": 0.5333333333333333,
"grad_norm": 0.040959450492445315,
"learning_rate": 4.59156940501605e-06,
"logits/chosen": -0.9001060724258423,
"logits/rejected": -0.6381432414054871,
"logps/chosen": -347.5130310058594,
"logps/rejected": -1511.1595458984375,
"loss": 0.0013,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7411862015724182,
"rewards/margins": 11.320222854614258,
"rewards/rejected": -12.061409950256348,
"step": 100
},
{
"epoch": 0.5866666666666667,
"grad_norm": 0.010465140313811594,
"learning_rate": 4.454578706170075e-06,
"logits/chosen": -0.8447334170341492,
"logits/rejected": -0.5461128950119019,
"logps/chosen": -324.555908203125,
"logps/rejected": -1563.4332275390625,
"loss": 0.0009,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.7364819645881653,
"rewards/margins": 11.867830276489258,
"rewards/rejected": -12.604310989379883,
"step": 110
},
{
"epoch": 0.64,
"grad_norm": 0.018698800124617214,
"learning_rate": 4.300513116340317e-06,
"logits/chosen": -0.869040846824646,
"logits/rejected": -0.6451767086982727,
"logps/chosen": -368.4622497558594,
"logps/rejected": -1524.347412109375,
"loss": 0.0013,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5922040939331055,
"rewards/margins": 11.815667152404785,
"rewards/rejected": -12.407870292663574,
"step": 120
},
{
"epoch": 0.6933333333333334,
"grad_norm": 0.13074428382691303,
"learning_rate": 4.130718528195303e-06,
"logits/chosen": -0.7969690561294556,
"logits/rejected": -0.5475348234176636,
"logps/chosen": -344.4666442871094,
"logps/rejected": -1474.3719482421875,
"loss": 0.0011,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5560614466667175,
"rewards/margins": 11.487146377563477,
"rewards/rejected": -12.043208122253418,
"step": 130
},
{
"epoch": 0.7466666666666667,
"grad_norm": 0.007599436316894573,
"learning_rate": 3.946678240449515e-06,
"logits/chosen": -0.8450958132743835,
"logits/rejected": -0.6068762540817261,
"logps/chosen": -330.353271484375,
"logps/rejected": -1493.345947265625,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.6169044375419617,
"rewards/margins": 11.569721221923828,
"rewards/rejected": -12.186625480651855,
"step": 140
},
{
"epoch": 0.8,
"grad_norm": 0.007906481570741206,
"learning_rate": 3.7500000000000005e-06,
"logits/chosen": -0.7317169308662415,
"logits/rejected": -0.472684770822525,
"logps/chosen": -334.15936279296875,
"logps/rejected": -1581.9859619140625,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.6376131176948547,
"rewards/margins": 12.395392417907715,
"rewards/rejected": -13.03300666809082,
"step": 150
},
{
"epoch": 0.8533333333333334,
"grad_norm": 0.005373942509470849,
"learning_rate": 3.542401956903321e-06,
"logits/chosen": -0.802183985710144,
"logits/rejected": -0.517475962638855,
"logps/chosen": -346.039306640625,
"logps/rejected": -1731.537841796875,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.6884077787399292,
"rewards/margins": 13.671854972839355,
"rewards/rejected": -14.360262870788574,
"step": 160
},
{
"epoch": 0.9066666666666666,
"grad_norm": 0.006855807463409515,
"learning_rate": 3.3256976548879183e-06,
"logits/chosen": -0.7976305484771729,
"logits/rejected": -0.48461779952049255,
"logps/chosen": -332.21539306640625,
"logps/rejected": -1720.519775390625,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.6103914380073547,
"rewards/margins": 13.717634201049805,
"rewards/rejected": -14.328027725219727,
"step": 170
},
{
"epoch": 0.96,
"grad_norm": 0.006530794838523059,
"learning_rate": 3.1017801885224332e-06,
"logits/chosen": -0.8089723587036133,
"logits/rejected": -0.547804594039917,
"logps/chosen": -331.06561279296875,
"logps/rejected": -1655.431640625,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.4778788685798645,
"rewards/margins": 13.217842102050781,
"rewards/rejected": -13.695721626281738,
"step": 180
},
{
"epoch": 1.0133333333333334,
"grad_norm": 0.002402189687585693,
"learning_rate": 2.872605665440436e-06,
"logits/chosen": -0.8274615406990051,
"logits/rejected": -0.6256132125854492,
"logps/chosen": -343.0153503417969,
"logps/rejected": -1610.4468994140625,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.47418123483657837,
"rewards/margins": 12.88371753692627,
"rewards/rejected": -13.357897758483887,
"step": 190
},
{
"epoch": 1.0666666666666667,
"grad_norm": 0.0030571071844198616,
"learning_rate": 2.6401761180929798e-06,
"logits/chosen": -0.8143685460090637,
"logits/rejected": -0.5041629076004028,
"logps/chosen": -334.1429138183594,
"logps/rejected": -1818.0869140625,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.558273434638977,
"rewards/margins": 14.656936645507812,
"rewards/rejected": -15.2152099609375,
"step": 200
},
{
"epoch": 1.12,
"grad_norm": 0.00820790094406569,
"learning_rate": 2.4065220143091863e-06,
"logits/chosen": -0.8139235377311707,
"logits/rejected": -0.564848780632019,
"logps/chosen": -345.9544982910156,
"logps/rejected": -1764.953125,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.475874662399292,
"rewards/margins": 14.230936050415039,
"rewards/rejected": -14.706808090209961,
"step": 210
},
{
"epoch": 1.1733333333333333,
"grad_norm": 0.0043868434266810755,
"learning_rate": 2.173684519449872e-06,
"logits/chosen": -0.7210798263549805,
"logits/rejected": -0.36144906282424927,
"logps/chosen": -327.31622314453125,
"logps/rejected": -1841.2216796875,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.6532616019248962,
"rewards/margins": 14.736944198608398,
"rewards/rejected": -15.39020824432373,
"step": 220
},
{
"epoch": 1.2266666666666666,
"grad_norm": 0.0068525897764614785,
"learning_rate": 1.9436976651092143e-06,
"logits/chosen": -0.7221536636352539,
"logits/rejected": -0.5240283012390137,
"logps/chosen": -350.7161865234375,
"logps/rejected": -1619.64599609375,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.45207110047340393,
"rewards/margins": 13.1558198928833,
"rewards/rejected": -13.607892990112305,
"step": 230
},
{
"epoch": 1.28,
"grad_norm": 0.0033954692926207583,
"learning_rate": 1.7185705801358892e-06,
"logits/chosen": -0.8645750880241394,
"logits/rejected": -0.6266194581985474,
"logps/chosen": -343.2956848144531,
"logps/rejected": -1709.9945068359375,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.46298861503601074,
"rewards/margins": 13.744463920593262,
"rewards/rejected": -14.2074556350708,
"step": 240
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.008787649845585386,
"learning_rate": 1.500269939200648e-06,
"logits/chosen": -0.7889136075973511,
"logits/rejected": -0.5392887592315674,
"logps/chosen": -342.34405517578125,
"logps/rejected": -1703.201171875,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.380669504404068,
"rewards/margins": 13.953561782836914,
"rewards/rejected": -14.334230422973633,
"step": 250
},
{
"epoch": 1.3866666666666667,
"grad_norm": 0.0019219492245800027,
"learning_rate": 1.2907027822369006e-06,
"logits/chosen": -0.8140700459480286,
"logits/rejected": -0.5784817337989807,
"logps/chosen": -325.9125061035156,
"logps/rejected": -1678.7923583984375,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.44779616594314575,
"rewards/margins": 13.65271282196045,
"rewards/rejected": -14.100509643554688,
"step": 260
},
{
"epoch": 1.44,
"grad_norm": 0.0029707873083702468,
"learning_rate": 1.0916998548409449e-06,
"logits/chosen": -0.7845500111579895,
"logits/rejected": -0.5012301206588745,
"logps/chosen": -358.30419921875,
"logps/rejected": -1745.314697265625,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.6766600608825684,
"rewards/margins": 13.99413013458252,
"rewards/rejected": -14.67078971862793,
"step": 270
},
{
"epoch": 1.4933333333333334,
"grad_norm": 0.003009319728743961,
"learning_rate": 9.04999615167479e-07,
"logits/chosen": -0.8230724334716797,
"logits/rejected": -0.550376832485199,
"logps/chosen": -346.7623596191406,
"logps/rejected": -1745.7318115234375,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5431645512580872,
"rewards/margins": 14.117253303527832,
"rewards/rejected": -14.660417556762695,
"step": 280
},
{
"epoch": 1.5466666666666666,
"grad_norm": 0.05446004244419421,
"learning_rate": 7.322330470336314e-07,
"logits/chosen": -0.8042120933532715,
"logits/rejected": -0.4271600842475891,
"logps/chosen": -319.5382080078125,
"logps/rejected": -1869.0816650390625,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.6211446523666382,
"rewards/margins": 15.130853652954102,
"rewards/rejected": -15.751996994018555,
"step": 290
},
{
"epoch": 1.6,
"grad_norm": 0.0026251482158599366,
"learning_rate": 5.749094119018431e-07,
"logits/chosen": -0.8732158541679382,
"logits/rejected": -0.5424922704696655,
"logps/chosen": -324.2518005371094,
"logps/rejected": -1899.756591796875,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.514877438545227,
"rewards/margins": 15.413823127746582,
"rewards/rejected": -15.928703308105469,
"step": 300
},
{
"epoch": 1.6533333333333333,
"grad_norm": 0.0023975764198324104,
"learning_rate": 4.344030642100133e-07,
"logits/chosen": -0.8402504920959473,
"logits/rejected": -0.5483088493347168,
"logps/chosen": -330.42828369140625,
"logps/rejected": -1794.327392578125,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.4678085446357727,
"rewards/margins": 14.648382186889648,
"rewards/rejected": -15.116189956665039,
"step": 310
},
{
"epoch": 1.7066666666666666,
"grad_norm": 0.0028734478026904357,
"learning_rate": 3.119414452281158e-07,
"logits/chosen": -0.8355986475944519,
"logits/rejected": -0.5001510977745056,
"logps/chosen": -328.2898254394531,
"logps/rejected": -1902.9224853515625,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.504935622215271,
"rewards/margins": 15.536231994628906,
"rewards/rejected": -16.041166305541992,
"step": 320
},
{
"epoch": 1.76,
"grad_norm": 0.002690221439778056,
"learning_rate": 2.0859436032505954e-07,
"logits/chosen": -0.896633505821228,
"logits/rejected": -0.6399273872375488,
"logps/chosen": -357.3143615722656,
"logps/rejected": -1739.0318603515625,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.39237022399902344,
"rewards/margins": 14.242953300476074,
"rewards/rejected": -14.635324478149414,
"step": 330
},
{
"epoch": 1.8133333333333335,
"grad_norm": 0.002170040138657762,
"learning_rate": 1.2526463331788503e-07,
"logits/chosen": -0.847479522228241,
"logits/rejected": -0.6152299642562866,
"logps/chosen": -348.79742431640625,
"logps/rejected": -1842.7880859375,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.48484006524086,
"rewards/margins": 15.058262825012207,
"rewards/rejected": -15.54310131072998,
"step": 340
},
{
"epoch": 1.8666666666666667,
"grad_norm": 0.003300394252196583,
"learning_rate": 6.268021954544095e-08,
"logits/chosen": -0.8356849551200867,
"logits/rejected": -0.4748550355434418,
"logps/chosen": -336.01373291015625,
"logps/rejected": -1930.8939208984375,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5835164785385132,
"rewards/margins": 15.679295539855957,
"rewards/rejected": -16.2628116607666,
"step": 350
},
{
"epoch": 1.92,
"grad_norm": 0.0026600066446259086,
"learning_rate": 2.1387846565474047e-08,
"logits/chosen": -0.8271343111991882,
"logits/rejected": -0.5607911348342896,
"logps/chosen": -359.8587341308594,
"logps/rejected": -1746.127685546875,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.4917505383491516,
"rewards/margins": 14.259208679199219,
"rewards/rejected": -14.750958442687988,
"step": 360
},
{
"epoch": 1.9733333333333334,
"grad_norm": 0.014374372097938156,
"learning_rate": 1.7482380290034795e-09,
"logits/chosen": -0.807357668876648,
"logits/rejected": -0.4851298928260803,
"logps/chosen": -321.13861083984375,
"logps/rejected": -1839.5152587890625,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5102803111076355,
"rewards/margins": 15.034269332885742,
"rewards/rejected": -15.544550895690918,
"step": 370
},
{
"epoch": 1.9946666666666668,
"step": 374,
"total_flos": 0.0,
"train_loss": 0.07791340151829097,
"train_runtime": 6908.9348,
"train_samples_per_second": 3.474,
"train_steps_per_second": 0.054
}
],
"logging_steps": 10,
"max_steps": 374,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}