zephyr-7b-dpo-lora / trainer_state.json
jiuhai's picture
Training in progress, epoch 2
87c25a7
raw
history blame
24.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 485,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.020408163265306e-08,
"logits/chosen": -3.094454526901245,
"logits/rejected": -3.0498220920562744,
"logps/chosen": -242.99183654785156,
"logps/rejected": -74.66817474365234,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.02,
"learning_rate": 1.0204081632653061e-07,
"logits/chosen": -3.032047986984253,
"logits/rejected": -3.029446840286255,
"logps/chosen": -290.1824645996094,
"logps/rejected": -75.82839965820312,
"loss": 0.6935,
"rewards/accuracies": 0.4027777910232544,
"rewards/chosen": -0.007104851305484772,
"rewards/margins": -0.0044839149340987206,
"rewards/rejected": -0.0026209354400634766,
"step": 10
},
{
"epoch": 0.04,
"learning_rate": 2.0408163265306121e-07,
"logits/chosen": -2.9773757457733154,
"logits/rejected": -2.967517852783203,
"logps/chosen": -297.57342529296875,
"logps/rejected": -77.62318420410156,
"loss": 0.692,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.00020697650325018913,
"rewards/margins": 0.003021990181878209,
"rewards/rejected": -0.0028150142170488834,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 3.0612244897959183e-07,
"logits/chosen": -2.983607769012451,
"logits/rejected": -2.9363152980804443,
"logps/chosen": -288.51458740234375,
"logps/rejected": -75.65086364746094,
"loss": 0.6892,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.0037677965592592955,
"rewards/margins": 0.004846884869039059,
"rewards/rejected": -0.008614679798483849,
"step": 30
},
{
"epoch": 0.08,
"learning_rate": 4.0816326530612243e-07,
"logits/chosen": -3.0467514991760254,
"logits/rejected": -3.010239362716675,
"logps/chosen": -243.7971954345703,
"logps/rejected": -81.06056213378906,
"loss": 0.685,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.0063628097996115685,
"rewards/margins": 0.02118637040257454,
"rewards/rejected": -0.014823561534285545,
"step": 40
},
{
"epoch": 0.1,
"learning_rate": 4.988532110091743e-07,
"logits/chosen": -3.0095317363739014,
"logits/rejected": -3.0367846488952637,
"logps/chosen": -251.5819854736328,
"logps/rejected": -78.19547271728516,
"loss": 0.6784,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.005416669882833958,
"rewards/margins": 0.023932188749313354,
"rewards/rejected": -0.018515516072511673,
"step": 50
},
{
"epoch": 0.12,
"learning_rate": 4.873853211009174e-07,
"logits/chosen": -3.0116028785705566,
"logits/rejected": -3.0300631523132324,
"logps/chosen": -281.01361083984375,
"logps/rejected": -75.49365997314453,
"loss": 0.6715,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.015385298058390617,
"rewards/margins": 0.050571341067552567,
"rewards/rejected": -0.0351860448718071,
"step": 60
},
{
"epoch": 0.14,
"learning_rate": 4.7591743119266054e-07,
"logits/chosen": -3.0327250957489014,
"logits/rejected": -3.0184121131896973,
"logps/chosen": -262.8722229003906,
"logps/rejected": -71.65990447998047,
"loss": 0.6649,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": 0.016824517399072647,
"rewards/margins": 0.06025807186961174,
"rewards/rejected": -0.043433547019958496,
"step": 70
},
{
"epoch": 0.16,
"learning_rate": 4.644495412844037e-07,
"logits/chosen": -3.0364532470703125,
"logits/rejected": -2.988002300262451,
"logps/chosen": -254.49423217773438,
"logps/rejected": -70.27412414550781,
"loss": 0.6556,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 0.022701723501086235,
"rewards/margins": 0.07623252272605896,
"rewards/rejected": -0.05353079363703728,
"step": 80
},
{
"epoch": 0.19,
"learning_rate": 4.5298165137614677e-07,
"logits/chosen": -3.068497657775879,
"logits/rejected": -3.0402565002441406,
"logps/chosen": -266.61614990234375,
"logps/rejected": -81.87393951416016,
"loss": 0.6455,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 0.026070792227983475,
"rewards/margins": 0.10358123481273651,
"rewards/rejected": -0.07751044631004333,
"step": 90
},
{
"epoch": 0.21,
"learning_rate": 4.4151376146788986e-07,
"logits/chosen": -3.0521655082702637,
"logits/rejected": -3.057821750640869,
"logps/chosen": -286.0577087402344,
"logps/rejected": -77.96414947509766,
"loss": 0.6336,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 0.033475782722234726,
"rewards/margins": 0.14013811945915222,
"rewards/rejected": -0.10666234791278839,
"step": 100
},
{
"epoch": 0.23,
"learning_rate": 4.30045871559633e-07,
"logits/chosen": -3.003532886505127,
"logits/rejected": -2.995978355407715,
"logps/chosen": -276.5457458496094,
"logps/rejected": -80.02079010009766,
"loss": 0.6234,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.0331401564180851,
"rewards/margins": 0.14480046927928925,
"rewards/rejected": -0.11166031658649445,
"step": 110
},
{
"epoch": 0.25,
"learning_rate": 4.1857798165137613e-07,
"logits/chosen": -3.0330376625061035,
"logits/rejected": -3.030214548110962,
"logps/chosen": -276.41632080078125,
"logps/rejected": -77.67643737792969,
"loss": 0.6164,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.043682295829057693,
"rewards/margins": 0.177944153547287,
"rewards/rejected": -0.1342618763446808,
"step": 120
},
{
"epoch": 0.27,
"learning_rate": 4.071100917431192e-07,
"logits/chosen": -2.9754703044891357,
"logits/rejected": -2.9898681640625,
"logps/chosen": -283.3277587890625,
"logps/rejected": -83.87138366699219,
"loss": 0.6121,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 0.048630841076374054,
"rewards/margins": 0.19439519941806793,
"rewards/rejected": -0.14576435089111328,
"step": 130
},
{
"epoch": 0.29,
"learning_rate": 3.9564220183486236e-07,
"logits/chosen": -3.0477757453918457,
"logits/rejected": -3.0237550735473633,
"logps/chosen": -291.98065185546875,
"logps/rejected": -82.53144073486328,
"loss": 0.5997,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 0.034745730459690094,
"rewards/margins": 0.20989501476287842,
"rewards/rejected": -0.17514929175376892,
"step": 140
},
{
"epoch": 0.31,
"learning_rate": 3.841743119266055e-07,
"logits/chosen": -3.033001661300659,
"logits/rejected": -3.015845775604248,
"logps/chosen": -289.15582275390625,
"logps/rejected": -76.08447265625,
"loss": 0.5925,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 0.0425817035138607,
"rewards/margins": 0.21189098060131073,
"rewards/rejected": -0.16930925846099854,
"step": 150
},
{
"epoch": 0.33,
"learning_rate": 3.7270642201834864e-07,
"logits/chosen": -3.0720551013946533,
"logits/rejected": -3.0518932342529297,
"logps/chosen": -271.08258056640625,
"logps/rejected": -75.97576141357422,
"loss": 0.5874,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 0.03000471368432045,
"rewards/margins": 0.20934228599071503,
"rewards/rejected": -0.17933759093284607,
"step": 160
},
{
"epoch": 0.35,
"learning_rate": 3.612385321100918e-07,
"logits/chosen": -3.026865243911743,
"logits/rejected": -3.030813455581665,
"logps/chosen": -287.5133361816406,
"logps/rejected": -77.84892272949219,
"loss": 0.5811,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 0.050167638808488846,
"rewards/margins": 0.24577708542346954,
"rewards/rejected": -0.1956094205379486,
"step": 170
},
{
"epoch": 0.37,
"learning_rate": 3.497706422018348e-07,
"logits/chosen": -3.064037322998047,
"logits/rejected": -3.0434131622314453,
"logps/chosen": -270.81378173828125,
"logps/rejected": -78.64222717285156,
"loss": 0.5708,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 0.0572846345603466,
"rewards/margins": 0.27750909328460693,
"rewards/rejected": -0.2202244997024536,
"step": 180
},
{
"epoch": 0.39,
"learning_rate": 3.3830275229357795e-07,
"logits/chosen": -3.0381369590759277,
"logits/rejected": -3.031832456588745,
"logps/chosen": -273.7306823730469,
"logps/rejected": -79.31744384765625,
"loss": 0.5604,
"rewards/accuracies": 0.96875,
"rewards/chosen": 0.05553610250353813,
"rewards/margins": 0.29081013798713684,
"rewards/rejected": -0.2352740317583084,
"step": 190
},
{
"epoch": 0.41,
"learning_rate": 3.268348623853211e-07,
"logits/chosen": -3.036811113357544,
"logits/rejected": -3.0287680625915527,
"logps/chosen": -266.4691467285156,
"logps/rejected": -77.38215637207031,
"loss": 0.5504,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 0.08118367195129395,
"rewards/margins": 0.3425747752189636,
"rewards/rejected": -0.2613911032676697,
"step": 200
},
{
"epoch": 0.43,
"learning_rate": 3.1536697247706423e-07,
"logits/chosen": -3.061699867248535,
"logits/rejected": -3.042888641357422,
"logps/chosen": -269.961181640625,
"logps/rejected": -89.21647644042969,
"loss": 0.5501,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 0.07142322510480881,
"rewards/margins": 0.3240587115287781,
"rewards/rejected": -0.25263547897338867,
"step": 210
},
{
"epoch": 0.45,
"learning_rate": 3.038990825688073e-07,
"logits/chosen": -3.04771089553833,
"logits/rejected": -3.018721103668213,
"logps/chosen": -250.44091796875,
"logps/rejected": -72.33317565917969,
"loss": 0.5488,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 0.06637217104434967,
"rewards/margins": 0.3276647627353668,
"rewards/rejected": -0.26129260659217834,
"step": 220
},
{
"epoch": 0.47,
"learning_rate": 2.9243119266055045e-07,
"logits/chosen": -2.9626972675323486,
"logits/rejected": -2.9827158451080322,
"logps/chosen": -293.9212646484375,
"logps/rejected": -72.2821044921875,
"loss": 0.5313,
"rewards/accuracies": 0.981249988079071,
"rewards/chosen": 0.08349540829658508,
"rewards/margins": 0.3892216682434082,
"rewards/rejected": -0.30572623014450073,
"step": 230
},
{
"epoch": 0.49,
"learning_rate": 2.809633027522936e-07,
"logits/chosen": -3.034790277481079,
"logits/rejected": -3.016634225845337,
"logps/chosen": -280.6105651855469,
"logps/rejected": -76.09197235107422,
"loss": 0.5333,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.08378176391124725,
"rewards/margins": 0.4068339467048645,
"rewards/rejected": -0.32305219769477844,
"step": 240
},
{
"epoch": 0.52,
"learning_rate": 2.6949541284403673e-07,
"logits/chosen": -3.0789849758148193,
"logits/rejected": -3.0785841941833496,
"logps/chosen": -264.5536804199219,
"logps/rejected": -82.22047424316406,
"loss": 0.5282,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 0.06328760087490082,
"rewards/margins": 0.40200409293174744,
"rewards/rejected": -0.3387165069580078,
"step": 250
},
{
"epoch": 0.54,
"learning_rate": 2.5802752293577976e-07,
"logits/chosen": -2.9741625785827637,
"logits/rejected": -2.9866743087768555,
"logps/chosen": -282.30902099609375,
"logps/rejected": -70.76858520507812,
"loss": 0.5277,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 0.10191468149423599,
"rewards/margins": 0.39590951800346375,
"rewards/rejected": -0.29399481415748596,
"step": 260
},
{
"epoch": 0.56,
"learning_rate": 2.465596330275229e-07,
"logits/chosen": -3.032557964324951,
"logits/rejected": -3.03240704536438,
"logps/chosen": -274.0851135253906,
"logps/rejected": -86.98384094238281,
"loss": 0.5135,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.07479412853717804,
"rewards/margins": 0.4109489321708679,
"rewards/rejected": -0.3361548185348511,
"step": 270
},
{
"epoch": 0.58,
"learning_rate": 2.3509174311926604e-07,
"logits/chosen": -3.060285806655884,
"logits/rejected": -2.9775302410125732,
"logps/chosen": -253.785888671875,
"logps/rejected": -70.39444732666016,
"loss": 0.5183,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 0.07235217839479446,
"rewards/margins": 0.3860532343387604,
"rewards/rejected": -0.31370100378990173,
"step": 280
},
{
"epoch": 0.6,
"learning_rate": 2.2362385321100916e-07,
"logits/chosen": -3.029343843460083,
"logits/rejected": -3.0406129360198975,
"logps/chosen": -276.57196044921875,
"logps/rejected": -84.54597473144531,
"loss": 0.5107,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 0.08857797086238861,
"rewards/margins": 0.4803849756717682,
"rewards/rejected": -0.3918069899082184,
"step": 290
},
{
"epoch": 0.62,
"learning_rate": 2.121559633027523e-07,
"logits/chosen": -2.9938578605651855,
"logits/rejected": -2.9954426288604736,
"logps/chosen": -273.7822265625,
"logps/rejected": -77.98421478271484,
"loss": 0.5079,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 0.08799968659877777,
"rewards/margins": 0.40502768754959106,
"rewards/rejected": -0.3170279860496521,
"step": 300
},
{
"epoch": 0.64,
"learning_rate": 2.0068807339449538e-07,
"logits/chosen": -3.052614212036133,
"logits/rejected": -3.0461201667785645,
"logps/chosen": -281.28814697265625,
"logps/rejected": -81.84606170654297,
"loss": 0.5038,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 0.05326849967241287,
"rewards/margins": 0.46244749426841736,
"rewards/rejected": -0.4091789722442627,
"step": 310
},
{
"epoch": 0.66,
"learning_rate": 1.8922018348623852e-07,
"logits/chosen": -3.031501054763794,
"logits/rejected": -3.042961597442627,
"logps/chosen": -271.274658203125,
"logps/rejected": -87.3827133178711,
"loss": 0.5003,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 0.07084844261407852,
"rewards/margins": 0.445441871881485,
"rewards/rejected": -0.37459343671798706,
"step": 320
},
{
"epoch": 0.68,
"learning_rate": 1.7775229357798163e-07,
"logits/chosen": -3.0476019382476807,
"logits/rejected": -3.0447893142700195,
"logps/chosen": -249.735595703125,
"logps/rejected": -73.10395812988281,
"loss": 0.4976,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 0.06198754906654358,
"rewards/margins": 0.43834322690963745,
"rewards/rejected": -0.37635567784309387,
"step": 330
},
{
"epoch": 0.7,
"learning_rate": 1.6628440366972477e-07,
"logits/chosen": -3.055901288986206,
"logits/rejected": -3.0517029762268066,
"logps/chosen": -273.3477478027344,
"logps/rejected": -85.53290557861328,
"loss": 0.496,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 0.08338963240385056,
"rewards/margins": 0.5042273998260498,
"rewards/rejected": -0.42083778977394104,
"step": 340
},
{
"epoch": 0.72,
"learning_rate": 1.5481651376146786e-07,
"logits/chosen": -3.063744306564331,
"logits/rejected": -3.066366195678711,
"logps/chosen": -277.1488952636719,
"logps/rejected": -88.2572250366211,
"loss": 0.4931,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 0.07289155572652817,
"rewards/margins": 0.5126849412918091,
"rewards/rejected": -0.4397934079170227,
"step": 350
},
{
"epoch": 0.74,
"learning_rate": 1.43348623853211e-07,
"logits/chosen": -3.0237436294555664,
"logits/rejected": -3.0258359909057617,
"logps/chosen": -292.0096740722656,
"logps/rejected": -81.93167114257812,
"loss": 0.4951,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": 0.07367613166570663,
"rewards/margins": 0.49797001481056213,
"rewards/rejected": -0.4242939352989197,
"step": 360
},
{
"epoch": 0.76,
"learning_rate": 1.318807339449541e-07,
"logits/chosen": -2.9882092475891113,
"logits/rejected": -2.9637956619262695,
"logps/chosen": -274.551513671875,
"logps/rejected": -73.8973388671875,
"loss": 0.496,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 0.0880483016371727,
"rewards/margins": 0.49274787306785583,
"rewards/rejected": -0.4046996533870697,
"step": 370
},
{
"epoch": 0.78,
"learning_rate": 1.2041284403669725e-07,
"logits/chosen": -3.070621967315674,
"logits/rejected": -3.0683789253234863,
"logps/chosen": -266.607177734375,
"logps/rejected": -81.02775573730469,
"loss": 0.493,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 0.10891600698232651,
"rewards/margins": 0.5303564071655273,
"rewards/rejected": -0.42144036293029785,
"step": 380
},
{
"epoch": 0.8,
"learning_rate": 1.0894495412844036e-07,
"logits/chosen": -3.0497114658355713,
"logits/rejected": -3.053192615509033,
"logps/chosen": -280.43218994140625,
"logps/rejected": -80.42735290527344,
"loss": 0.4892,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.10893626511096954,
"rewards/margins": 0.5605167746543884,
"rewards/rejected": -0.4515805244445801,
"step": 390
},
{
"epoch": 0.82,
"learning_rate": 9.747706422018348e-08,
"logits/chosen": -3.002933979034424,
"logits/rejected": -3.0063657760620117,
"logps/chosen": -241.24276733398438,
"logps/rejected": -75.92924499511719,
"loss": 0.4833,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 0.07781459391117096,
"rewards/margins": 0.46425342559814453,
"rewards/rejected": -0.38643890619277954,
"step": 400
},
{
"epoch": 0.85,
"learning_rate": 8.60091743119266e-08,
"logits/chosen": -3.0454163551330566,
"logits/rejected": -3.035583972930908,
"logps/chosen": -264.18585205078125,
"logps/rejected": -78.031982421875,
"loss": 0.4744,
"rewards/accuracies": 0.987500011920929,
"rewards/chosen": 0.09802711009979248,
"rewards/margins": 0.5436574816703796,
"rewards/rejected": -0.44563040137290955,
"step": 410
},
{
"epoch": 0.87,
"learning_rate": 7.454128440366971e-08,
"logits/chosen": -3.0196666717529297,
"logits/rejected": -3.0026302337646484,
"logps/chosen": -272.02630615234375,
"logps/rejected": -82.01240539550781,
"loss": 0.481,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 0.08279488980770111,
"rewards/margins": 0.5704164505004883,
"rewards/rejected": -0.48762160539627075,
"step": 420
},
{
"epoch": 0.89,
"learning_rate": 6.307339449541284e-08,
"logits/chosen": -3.0509345531463623,
"logits/rejected": -3.0137345790863037,
"logps/chosen": -262.2018127441406,
"logps/rejected": -77.63418579101562,
"loss": 0.4731,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 0.1073322519659996,
"rewards/margins": 0.5776056051254272,
"rewards/rejected": -0.4702734053134918,
"step": 430
},
{
"epoch": 0.91,
"learning_rate": 5.1605504587155966e-08,
"logits/chosen": -3.0285000801086426,
"logits/rejected": -3.0236475467681885,
"logps/chosen": -266.83599853515625,
"logps/rejected": -77.38362121582031,
"loss": 0.476,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 0.08291526138782501,
"rewards/margins": 0.4984784722328186,
"rewards/rejected": -0.41556310653686523,
"step": 440
},
{
"epoch": 0.93,
"learning_rate": 4.0137614678899086e-08,
"logits/chosen": -3.02640438079834,
"logits/rejected": -3.011373996734619,
"logps/chosen": -295.5868835449219,
"logps/rejected": -80.76414489746094,
"loss": 0.4707,
"rewards/accuracies": 0.96875,
"rewards/chosen": 0.09663239866495132,
"rewards/margins": 0.5815601944923401,
"rewards/rejected": -0.48492780327796936,
"step": 450
},
{
"epoch": 0.95,
"learning_rate": 2.86697247706422e-08,
"logits/chosen": -3.0195059776306152,
"logits/rejected": -2.988323926925659,
"logps/chosen": -300.5026550292969,
"logps/rejected": -86.79838562011719,
"loss": 0.4808,
"rewards/accuracies": 0.96875,
"rewards/chosen": 0.11054690927267075,
"rewards/margins": 0.5899176001548767,
"rewards/rejected": -0.47937074303627014,
"step": 460
},
{
"epoch": 0.97,
"learning_rate": 1.720183486238532e-08,
"logits/chosen": -3.0426931381225586,
"logits/rejected": -3.0394179821014404,
"logps/chosen": -235.52706909179688,
"logps/rejected": -73.9857406616211,
"loss": 0.4819,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 0.08785500377416611,
"rewards/margins": 0.5274263620376587,
"rewards/rejected": -0.4395713806152344,
"step": 470
},
{
"epoch": 0.99,
"learning_rate": 5.73394495412844e-09,
"logits/chosen": -3.0092616081237793,
"logits/rejected": -2.972731590270996,
"logps/chosen": -249.88876342773438,
"logps/rejected": -85.80451965332031,
"loss": 0.482,
"rewards/accuracies": 0.96875,
"rewards/chosen": 0.07512323558330536,
"rewards/margins": 0.5230099558830261,
"rewards/rejected": -0.44788676500320435,
"step": 480
},
{
"epoch": 1.0,
"eval_logits/chosen": -3.034407377243042,
"eval_logits/rejected": -3.069913864135742,
"eval_logps/chosen": -271.40020751953125,
"eval_logps/rejected": -175.5244140625,
"eval_loss": 0.5650191903114319,
"eval_rewards/accuracies": 0.76953125,
"eval_rewards/chosen": 0.08157022297382355,
"eval_rewards/margins": 0.33799096941947937,
"eval_rewards/rejected": -0.25642073154449463,
"eval_runtime": 256.4523,
"eval_samples_per_second": 7.799,
"eval_steps_per_second": 0.062,
"step": 485
},
{
"epoch": 1.0,
"step": 485,
"total_flos": 0.0,
"train_loss": 0.5539181610972611,
"train_runtime": 15602.6148,
"train_samples_per_second": 3.978,
"train_steps_per_second": 0.031
}
],
"logging_steps": 10,
"max_steps": 485,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}