zephyr-7b-dpo-qlora / trainer_state.json
jikaixuan's picture
Model save
49b9b9f verified
raw
history blame
26.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.998691442030882,
"eval_steps": 100,
"global_step": 477,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": -2.856400966644287,
"logits/rejected": -2.6539194583892822,
"logps/chosen": -302.289794921875,
"logps/rejected": -253.04373168945312,
"loss": 2500.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.02,
"learning_rate": 1.0416666666666667e-06,
"logits/chosen": -2.5851330757141113,
"logits/rejected": -2.6188478469848633,
"logps/chosen": -265.6952209472656,
"logps/rejected": -261.4213562011719,
"loss": 2495.385,
"rewards/accuracies": 0.4375,
"rewards/chosen": 0.005977082531899214,
"rewards/margins": 0.0005994850071147084,
"rewards/rejected": 0.005377596709877253,
"step": 10
},
{
"epoch": 0.04,
"learning_rate": 2.0833333333333334e-06,
"logits/chosen": -2.6101512908935547,
"logits/rejected": -2.5939109325408936,
"logps/chosen": -255.68185424804688,
"logps/rejected": -248.1254119873047,
"loss": 2457.86,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": 0.013690793886780739,
"rewards/margins": 0.00916606467217207,
"rewards/rejected": 0.004524729214608669,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 3.125e-06,
"logits/chosen": -2.604323148727417,
"logits/rejected": -2.598053455352783,
"logps/chosen": -254.423095703125,
"logps/rejected": -226.73153686523438,
"loss": 2402.3988,
"rewards/accuracies": 0.703125,
"rewards/chosen": 0.01266755722463131,
"rewards/margins": 0.024019470438361168,
"rewards/rejected": -0.01135191135108471,
"step": 30
},
{
"epoch": 0.08,
"learning_rate": 4.166666666666667e-06,
"logits/chosen": -2.6043972969055176,
"logits/rejected": -2.582412004470825,
"logps/chosen": -279.12042236328125,
"logps/rejected": -241.2065887451172,
"loss": 2290.4264,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": 0.024520257487893105,
"rewards/margins": 0.0557018406689167,
"rewards/rejected": -0.031181585043668747,
"step": 40
},
{
"epoch": 0.1,
"learning_rate": 4.999731868769027e-06,
"logits/chosen": -2.531161308288574,
"logits/rejected": -2.5264387130737305,
"logps/chosen": -252.51846313476562,
"logps/rejected": -247.7227325439453,
"loss": 2291.9322,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": 0.029673133045434952,
"rewards/margins": 0.08245684206485748,
"rewards/rejected": -0.05278371647000313,
"step": 50
},
{
"epoch": 0.13,
"learning_rate": 4.9903533134293035e-06,
"logits/chosen": -2.545037031173706,
"logits/rejected": -2.5416412353515625,
"logps/chosen": -260.83905029296875,
"logps/rejected": -239.8417205810547,
"loss": 2269.9371,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": 0.03231300041079521,
"rewards/margins": 0.09112317860126495,
"rewards/rejected": -0.05881017446517944,
"step": 60
},
{
"epoch": 0.15,
"learning_rate": 4.967625656594782e-06,
"logits/chosen": -2.5832419395446777,
"logits/rejected": -2.564356565475464,
"logps/chosen": -275.95452880859375,
"logps/rejected": -264.7611083984375,
"loss": 2236.1113,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": 0.036882974207401276,
"rewards/margins": 0.08578891307115555,
"rewards/rejected": -0.048905935138463974,
"step": 70
},
{
"epoch": 0.17,
"learning_rate": 4.93167072587771e-06,
"logits/chosen": -2.552919864654541,
"logits/rejected": -2.524970293045044,
"logps/chosen": -257.78448486328125,
"logps/rejected": -262.3812561035156,
"loss": 2220.0893,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.037375591695308685,
"rewards/margins": 0.11339374631643295,
"rewards/rejected": -0.07601816952228546,
"step": 80
},
{
"epoch": 0.19,
"learning_rate": 4.882681251368549e-06,
"logits/chosen": -2.56257963180542,
"logits/rejected": -2.5289363861083984,
"logps/chosen": -239.4860382080078,
"logps/rejected": -252.36196899414062,
"loss": 2167.3848,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.04182355850934982,
"rewards/margins": 0.10886694490909576,
"rewards/rejected": -0.06704337894916534,
"step": 90
},
{
"epoch": 0.21,
"learning_rate": 4.8209198325401815e-06,
"logits/chosen": -2.5551962852478027,
"logits/rejected": -2.562063455581665,
"logps/chosen": -266.8739013671875,
"logps/rejected": -269.649169921875,
"loss": 2149.4746,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.04759662598371506,
"rewards/margins": 0.1307816356420517,
"rewards/rejected": -0.08318501710891724,
"step": 100
},
{
"epoch": 0.21,
"eval_logits/chosen": -2.222931385040283,
"eval_logits/rejected": -2.1770126819610596,
"eval_logps/chosen": -260.57818603515625,
"eval_logps/rejected": -253.25228881835938,
"eval_loss": 2190.7666015625,
"eval_rewards/accuracies": 0.7460317611694336,
"eval_rewards/chosen": 0.044464047998189926,
"eval_rewards/margins": 0.12927772104740143,
"eval_rewards/rejected": -0.0848136618733406,
"eval_runtime": 549.355,
"eval_samples_per_second": 3.641,
"eval_steps_per_second": 0.115,
"step": 100
},
{
"epoch": 0.23,
"learning_rate": 4.746717530629565e-06,
"logits/chosen": -2.5229454040527344,
"logits/rejected": -2.5105621814727783,
"logps/chosen": -261.46649169921875,
"logps/rejected": -256.37835693359375,
"loss": 2174.1184,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": 0.03517655283212662,
"rewards/margins": 0.11897265911102295,
"rewards/rejected": -0.08379611372947693,
"step": 110
},
{
"epoch": 0.25,
"learning_rate": 4.660472094042121e-06,
"logits/chosen": -2.5114097595214844,
"logits/rejected": -2.481840133666992,
"logps/chosen": -246.70370483398438,
"logps/rejected": -238.27621459960938,
"loss": 2181.3053,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.044524095952510834,
"rewards/margins": 0.10293309390544891,
"rewards/rejected": -0.05840899422764778,
"step": 120
},
{
"epoch": 0.27,
"learning_rate": 4.5626458262912745e-06,
"logits/chosen": -2.4726600646972656,
"logits/rejected": -2.46514630317688,
"logps/chosen": -271.7862548828125,
"logps/rejected": -260.61676025390625,
"loss": 2175.3252,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.06200919300317764,
"rewards/margins": 0.12613125145435333,
"rewards/rejected": -0.06412206590175629,
"step": 130
},
{
"epoch": 0.29,
"learning_rate": 4.453763107901676e-06,
"logits/chosen": -2.506436586380005,
"logits/rejected": -2.5005128383636475,
"logps/chosen": -237.8655242919922,
"logps/rejected": -249.9298553466797,
"loss": 2167.2516,
"rewards/accuracies": 0.734375,
"rewards/chosen": 0.024008702486753464,
"rewards/margins": 0.1495535969734192,
"rewards/rejected": -0.12554487586021423,
"step": 140
},
{
"epoch": 0.31,
"learning_rate": 4.33440758555951e-06,
"logits/chosen": -2.5227842330932617,
"logits/rejected": -2.536785364151001,
"logps/chosen": -260.7518005371094,
"logps/rejected": -235.9630889892578,
"loss": 2119.4062,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": 0.04733316972851753,
"rewards/margins": 0.12345732748508453,
"rewards/rejected": -0.0761241465806961,
"step": 150
},
{
"epoch": 0.33,
"learning_rate": 4.205219043576955e-06,
"logits/chosen": -2.5534234046936035,
"logits/rejected": -2.4914207458496094,
"logps/chosen": -254.14065551757812,
"logps/rejected": -250.95700073242188,
"loss": 2114.7645,
"rewards/accuracies": 0.778124988079071,
"rewards/chosen": 0.06031092256307602,
"rewards/margins": 0.15202957391738892,
"rewards/rejected": -0.09171866625547409,
"step": 160
},
{
"epoch": 0.36,
"learning_rate": 4.066889974440757e-06,
"logits/chosen": -2.5092320442199707,
"logits/rejected": -2.4965577125549316,
"logps/chosen": -254.91439819335938,
"logps/rejected": -242.8040008544922,
"loss": 2229.8135,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.034448813647031784,
"rewards/margins": 0.12951095402240753,
"rewards/rejected": -0.09506212174892426,
"step": 170
},
{
"epoch": 0.38,
"learning_rate": 3.92016186682789e-06,
"logits/chosen": -2.521221399307251,
"logits/rejected": -2.533686399459839,
"logps/chosen": -251.4235382080078,
"logps/rejected": -259.76220703125,
"loss": 2175.5213,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": 0.04062749817967415,
"rewards/margins": 0.120635487139225,
"rewards/rejected": -0.08000798523426056,
"step": 180
},
{
"epoch": 0.4,
"learning_rate": 3.7658212309857576e-06,
"logits/chosen": -2.5192363262176514,
"logits/rejected": -2.4917151927948,
"logps/chosen": -255.2060089111328,
"logps/rejected": -250.82022094726562,
"loss": 2099.443,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": 0.0492943711578846,
"rewards/margins": 0.14053165912628174,
"rewards/rejected": -0.09123729914426804,
"step": 190
},
{
"epoch": 0.42,
"learning_rate": 3.604695382782159e-06,
"logits/chosen": -2.5251801013946533,
"logits/rejected": -2.5034642219543457,
"logps/chosen": -269.3675537109375,
"logps/rejected": -262.86376953125,
"loss": 2105.1256,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": 0.0575677752494812,
"rewards/margins": 0.14340198040008545,
"rewards/rejected": -0.08583419024944305,
"step": 200
},
{
"epoch": 0.42,
"eval_logits/chosen": -2.260270833969116,
"eval_logits/rejected": -2.2073864936828613,
"eval_logps/chosen": -259.5941467285156,
"eval_logps/rejected": -254.3839874267578,
"eval_loss": 2151.155517578125,
"eval_rewards/accuracies": 0.7599206566810608,
"eval_rewards/chosen": 0.05430443957448006,
"eval_rewards/margins": 0.15043501555919647,
"eval_rewards/rejected": -0.09613056480884552,
"eval_runtime": 548.195,
"eval_samples_per_second": 3.648,
"eval_steps_per_second": 0.115,
"step": 200
},
{
"epoch": 0.44,
"learning_rate": 3.437648009023905e-06,
"logits/chosen": -2.533383369445801,
"logits/rejected": -2.4935860633850098,
"logps/chosen": -243.6236114501953,
"logps/rejected": -238.85140991210938,
"loss": 2145.5416,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": 0.06410142779350281,
"rewards/margins": 0.14374245703220367,
"rewards/rejected": -0.07964102178812027,
"step": 210
},
{
"epoch": 0.46,
"learning_rate": 3.265574537815398e-06,
"logits/chosen": -2.554565906524658,
"logits/rejected": -2.56289005279541,
"logps/chosen": -277.4061584472656,
"logps/rejected": -253.40048217773438,
"loss": 2196.8484,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.052330613136291504,
"rewards/margins": 0.11339585483074188,
"rewards/rejected": -0.06106524541974068,
"step": 220
},
{
"epoch": 0.48,
"learning_rate": 3.089397338773569e-06,
"logits/chosen": -2.4857611656188965,
"logits/rejected": -2.473193407058716,
"logps/chosen": -247.3427276611328,
"logps/rejected": -241.8627471923828,
"loss": 2160.1729,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.03845102712512016,
"rewards/margins": 0.11976752430200577,
"rewards/rejected": -0.0813164934515953,
"step": 230
},
{
"epoch": 0.5,
"learning_rate": 2.9100607788275547e-06,
"logits/chosen": -2.5121560096740723,
"logits/rejected": -2.516338586807251,
"logps/chosen": -257.1769714355469,
"logps/rejected": -247.3695068359375,
"loss": 2185.7641,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": 0.0379050187766552,
"rewards/margins": 0.11140499264001846,
"rewards/rejected": -0.07349997013807297,
"step": 240
},
{
"epoch": 0.52,
"learning_rate": 2.72852616010567e-06,
"logits/chosen": -2.5092978477478027,
"logits/rejected": -2.487090826034546,
"logps/chosen": -264.5955505371094,
"logps/rejected": -246.3382110595703,
"loss": 2136.6197,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": 0.039962492883205414,
"rewards/margins": 0.1403963267803192,
"rewards/rejected": -0.1004338413476944,
"step": 250
},
{
"epoch": 0.54,
"learning_rate": 2.5457665670441937e-06,
"logits/chosen": -2.5069711208343506,
"logits/rejected": -2.5030505657196045,
"logps/chosen": -257.4859619140625,
"logps/rejected": -231.91958618164062,
"loss": 2085.2795,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.05723271518945694,
"rewards/margins": 0.15024301409721375,
"rewards/rejected": -0.0930103212594986,
"step": 260
},
{
"epoch": 0.57,
"learning_rate": 2.3627616503391813e-06,
"logits/chosen": -2.525665760040283,
"logits/rejected": -2.5043163299560547,
"logps/chosen": -280.7471618652344,
"logps/rejected": -267.36712646484375,
"loss": 2089.859,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.05569761246442795,
"rewards/margins": 0.179846853017807,
"rewards/rejected": -0.12414924055337906,
"step": 270
},
{
"epoch": 0.59,
"learning_rate": 2.1804923757009885e-06,
"logits/chosen": -2.500837564468384,
"logits/rejected": -2.501950740814209,
"logps/chosen": -270.04193115234375,
"logps/rejected": -248.61978149414062,
"loss": 2111.6906,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.05320361256599426,
"rewards/margins": 0.1410333216190338,
"rewards/rejected": -0.08782971650362015,
"step": 280
},
{
"epoch": 0.61,
"learning_rate": 1.9999357655598894e-06,
"logits/chosen": -2.5122292041778564,
"logits/rejected": -2.50368070602417,
"logps/chosen": -258.72686767578125,
"logps/rejected": -256.91387939453125,
"loss": 2137.0592,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.053160279989242554,
"rewards/margins": 0.15454119443893433,
"rewards/rejected": -0.10138092190027237,
"step": 290
},
{
"epoch": 0.63,
"learning_rate": 1.8220596619089576e-06,
"logits/chosen": -2.471623659133911,
"logits/rejected": -2.4690403938293457,
"logps/chosen": -246.51766967773438,
"logps/rejected": -251.79257202148438,
"loss": 2135.4973,
"rewards/accuracies": 0.71875,
"rewards/chosen": 0.0453377440571785,
"rewards/margins": 0.12641170620918274,
"rewards/rejected": -0.08107397705316544,
"step": 300
},
{
"epoch": 0.63,
"eval_logits/chosen": -2.2764506340026855,
"eval_logits/rejected": -2.2231767177581787,
"eval_logps/chosen": -258.7624206542969,
"eval_logps/rejected": -252.75852966308594,
"eval_loss": 2129.089599609375,
"eval_rewards/accuracies": 0.7559523582458496,
"eval_rewards/chosen": 0.06262180209159851,
"eval_rewards/margins": 0.14249789714813232,
"eval_rewards/rejected": -0.07987607270479202,
"eval_runtime": 547.9938,
"eval_samples_per_second": 3.65,
"eval_steps_per_second": 0.115,
"step": 300
},
{
"epoch": 0.65,
"learning_rate": 1.647817538357072e-06,
"logits/chosen": -2.5041086673736572,
"logits/rejected": -2.495436191558838,
"logps/chosen": -264.5109558105469,
"logps/rejected": -248.3275604248047,
"loss": 2107.123,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": 0.05480458214879036,
"rewards/margins": 0.13964474201202393,
"rewards/rejected": -0.08484016358852386,
"step": 310
},
{
"epoch": 0.67,
"learning_rate": 1.4781433892011132e-06,
"logits/chosen": -2.53191876411438,
"logits/rejected": -2.4989166259765625,
"logps/chosen": -242.36599731445312,
"logps/rejected": -243.78067016601562,
"loss": 2076.0621,
"rewards/accuracies": 0.7718750238418579,
"rewards/chosen": 0.05456935614347458,
"rewards/margins": 0.14978976547718048,
"rewards/rejected": -0.0952204093337059,
"step": 320
},
{
"epoch": 0.69,
"learning_rate": 1.3139467229135999e-06,
"logits/chosen": -2.4768006801605225,
"logits/rejected": -2.4569873809814453,
"logps/chosen": -263.0523681640625,
"logps/rejected": -250.5469207763672,
"loss": 2112.1141,
"rewards/accuracies": 0.734375,
"rewards/chosen": 0.044828929007053375,
"rewards/margins": 0.13050048053264618,
"rewards/rejected": -0.0856715738773346,
"step": 330
},
{
"epoch": 0.71,
"learning_rate": 1.1561076868822756e-06,
"logits/chosen": -2.5158028602600098,
"logits/rejected": -2.5096983909606934,
"logps/chosen": -275.6848449707031,
"logps/rejected": -246.7259979248047,
"loss": 2151.2445,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": 0.052164845168590546,
"rewards/margins": 0.15314052999019623,
"rewards/rejected": -0.10097566992044449,
"step": 340
},
{
"epoch": 0.73,
"learning_rate": 1.0054723495346484e-06,
"logits/chosen": -2.518799304962158,
"logits/rejected": -2.4620516300201416,
"logps/chosen": -249.27401733398438,
"logps/rejected": -218.7183074951172,
"loss": 2093.9803,
"rewards/accuracies": 0.7593749761581421,
"rewards/chosen": 0.0662151575088501,
"rewards/margins": 0.14556364715099335,
"rewards/rejected": -0.07934850454330444,
"step": 350
},
{
"epoch": 0.75,
"learning_rate": 8.628481651367876e-07,
"logits/chosen": -2.5340943336486816,
"logits/rejected": -2.5006654262542725,
"logps/chosen": -260.32464599609375,
"logps/rejected": -237.3218536376953,
"loss": 2094.1246,
"rewards/accuracies": 0.765625,
"rewards/chosen": 0.05396001785993576,
"rewards/margins": 0.15317106246948242,
"rewards/rejected": -0.09921105206012726,
"step": 360
},
{
"epoch": 0.77,
"learning_rate": 7.289996455765749e-07,
"logits/chosen": -2.529265880584717,
"logits/rejected": -2.515712261199951,
"logps/chosen": -266.943115234375,
"logps/rejected": -246.0579376220703,
"loss": 2115.357,
"rewards/accuracies": 0.71875,
"rewards/chosen": 0.052078358829021454,
"rewards/margins": 0.1462351232767105,
"rewards/rejected": -0.09415675699710846,
"step": 370
},
{
"epoch": 0.8,
"learning_rate": 6.046442623320145e-07,
"logits/chosen": -2.4891440868377686,
"logits/rejected": -2.499753952026367,
"logps/chosen": -253.51632690429688,
"logps/rejected": -245.4505615234375,
"loss": 2082.182,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": 0.051686953753232956,
"rewards/margins": 0.1390691101551056,
"rewards/rejected": -0.08738215267658234,
"step": 380
},
{
"epoch": 0.82,
"learning_rate": 4.904486005914027e-07,
"logits/chosen": -2.532160997390747,
"logits/rejected": -2.5001654624938965,
"logps/chosen": -280.9754333496094,
"logps/rejected": -279.0588684082031,
"loss": 2114.3043,
"rewards/accuracies": 0.7593749761581421,
"rewards/chosen": 0.0547635443508625,
"rewards/margins": 0.14076778292655945,
"rewards/rejected": -0.08600424975156784,
"step": 390
},
{
"epoch": 0.84,
"learning_rate": 3.8702478614051353e-07,
"logits/chosen": -2.4791765213012695,
"logits/rejected": -2.4799935817718506,
"logps/chosen": -246.14102172851562,
"logps/rejected": -251.533447265625,
"loss": 2099.8018,
"rewards/accuracies": 0.703125,
"rewards/chosen": 0.0392024889588356,
"rewards/margins": 0.13221651315689087,
"rewards/rejected": -0.09301402419805527,
"step": 400
},
{
"epoch": 0.84,
"eval_logits/chosen": -2.254145860671997,
"eval_logits/rejected": -2.2016360759735107,
"eval_logps/chosen": -259.64398193359375,
"eval_logps/rejected": -254.3590850830078,
"eval_loss": 2121.667236328125,
"eval_rewards/accuracies": 0.7539682388305664,
"eval_rewards/chosen": 0.05380600318312645,
"eval_rewards/margins": 0.14968746900558472,
"eval_rewards/rejected": -0.09588146954774857,
"eval_runtime": 547.9727,
"eval_samples_per_second": 3.65,
"eval_steps_per_second": 0.115,
"step": 400
},
{
"epoch": 0.86,
"learning_rate": 2.9492720416985004e-07,
"logits/chosen": -2.4832329750061035,
"logits/rejected": -2.463463306427002,
"logps/chosen": -284.7741394042969,
"logps/rejected": -252.4269561767578,
"loss": 2145.448,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.05263269692659378,
"rewards/margins": 0.15021036565303802,
"rewards/rejected": -0.09757767617702484,
"step": 410
},
{
"epoch": 0.88,
"learning_rate": 2.1464952759020857e-07,
"logits/chosen": -2.4804348945617676,
"logits/rejected": -2.457764148712158,
"logps/chosen": -254.78604125976562,
"logps/rejected": -278.61346435546875,
"loss": 2123.6629,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": 0.033899884670972824,
"rewards/margins": 0.11116783320903778,
"rewards/rejected": -0.07726795971393585,
"step": 420
},
{
"epoch": 0.9,
"learning_rate": 1.4662207078575685e-07,
"logits/chosen": -2.4848549365997314,
"logits/rejected": -2.485640048980713,
"logps/chosen": -268.3457336425781,
"logps/rejected": -268.5885925292969,
"loss": 2144.4309,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": 0.03841588646173477,
"rewards/margins": 0.13024446368217468,
"rewards/rejected": -0.09182857722043991,
"step": 430
},
{
"epoch": 0.92,
"learning_rate": 9.120948298936422e-08,
"logits/chosen": -2.457054615020752,
"logits/rejected": -2.4329726696014404,
"logps/chosen": -231.9584197998047,
"logps/rejected": -234.6277313232422,
"loss": 2118.3984,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.038600482046604156,
"rewards/margins": 0.13669805228710175,
"rewards/rejected": -0.09809757024049759,
"step": 440
},
{
"epoch": 0.94,
"learning_rate": 4.870879364444109e-08,
"logits/chosen": -2.5156655311584473,
"logits/rejected": -2.563300848007202,
"logps/chosen": -263.9936218261719,
"logps/rejected": -265.6227722167969,
"loss": 2123.5402,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.04902677983045578,
"rewards/margins": 0.1260160207748413,
"rewards/rejected": -0.07698923349380493,
"step": 450
},
{
"epoch": 0.96,
"learning_rate": 1.93478202307823e-08,
"logits/chosen": -2.470996379852295,
"logits/rejected": -2.4720451831817627,
"logps/chosen": -258.21734619140625,
"logps/rejected": -262.04925537109375,
"loss": 2078.5094,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": 0.04391016811132431,
"rewards/margins": 0.14817874133586884,
"rewards/rejected": -0.10426857322454453,
"step": 460
},
{
"epoch": 0.98,
"learning_rate": 3.283947088983663e-09,
"logits/chosen": -2.513140916824341,
"logits/rejected": -2.535651206970215,
"logps/chosen": -249.6727752685547,
"logps/rejected": -248.2782745361328,
"loss": 2093.2779,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.04741714522242546,
"rewards/margins": 0.143958181142807,
"rewards/rejected": -0.09654103964567184,
"step": 470
},
{
"epoch": 1.0,
"step": 477,
"total_flos": 0.0,
"train_loss": 2164.5614415454666,
"train_runtime": 32346.8016,
"train_samples_per_second": 1.89,
"train_steps_per_second": 0.015
}
],
"logging_steps": 10,
"max_steps": 477,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}