|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 485, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.020408163265306e-08, |
|
"logits/chosen": -3.094454526901245, |
|
"logits/rejected": -3.0498220920562744, |
|
"logps/chosen": -242.99183654785156, |
|
"logps/rejected": -74.66817474365234, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0204081632653061e-07, |
|
"logits/chosen": -3.032047986984253, |
|
"logits/rejected": -3.029446840286255, |
|
"logps/chosen": -290.1824645996094, |
|
"logps/rejected": -75.82839965820312, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.4027777910232544, |
|
"rewards/chosen": -0.007104851305484772, |
|
"rewards/margins": -0.0044839149340987206, |
|
"rewards/rejected": -0.0026209354400634766, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0408163265306121e-07, |
|
"logits/chosen": -2.9773757457733154, |
|
"logits/rejected": -2.967517852783203, |
|
"logps/chosen": -297.57342529296875, |
|
"logps/rejected": -77.62318420410156, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00020697650325018913, |
|
"rewards/margins": 0.003021990181878209, |
|
"rewards/rejected": -0.0028150142170488834, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0612244897959183e-07, |
|
"logits/chosen": -2.983607769012451, |
|
"logits/rejected": -2.9363152980804443, |
|
"logps/chosen": -288.51458740234375, |
|
"logps/rejected": -75.65086364746094, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.0037677965592592955, |
|
"rewards/margins": 0.004846884869039059, |
|
"rewards/rejected": -0.008614679798483849, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.0816326530612243e-07, |
|
"logits/chosen": -3.0467514991760254, |
|
"logits/rejected": -3.010239362716675, |
|
"logps/chosen": -243.7971954345703, |
|
"logps/rejected": -81.06056213378906, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0063628097996115685, |
|
"rewards/margins": 0.02118637040257454, |
|
"rewards/rejected": -0.014823561534285545, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.988532110091743e-07, |
|
"logits/chosen": -3.0095317363739014, |
|
"logits/rejected": -3.0367846488952637, |
|
"logps/chosen": -251.5819854736328, |
|
"logps/rejected": -78.19547271728516, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.005416669882833958, |
|
"rewards/margins": 0.023932188749313354, |
|
"rewards/rejected": -0.018515516072511673, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.873853211009174e-07, |
|
"logits/chosen": -3.0116028785705566, |
|
"logits/rejected": -3.0300631523132324, |
|
"logps/chosen": -281.01361083984375, |
|
"logps/rejected": -75.49365997314453, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.015385298058390617, |
|
"rewards/margins": 0.050571341067552567, |
|
"rewards/rejected": -0.0351860448718071, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7591743119266054e-07, |
|
"logits/chosen": -3.0327250957489014, |
|
"logits/rejected": -3.0184121131896973, |
|
"logps/chosen": -262.8722229003906, |
|
"logps/rejected": -71.65990447998047, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.016824517399072647, |
|
"rewards/margins": 0.06025807186961174, |
|
"rewards/rejected": -0.043433547019958496, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.644495412844037e-07, |
|
"logits/chosen": -3.0364532470703125, |
|
"logits/rejected": -2.988002300262451, |
|
"logps/chosen": -254.49423217773438, |
|
"logps/rejected": -70.27412414550781, |
|
"loss": 0.6556, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.022701723501086235, |
|
"rewards/margins": 0.07623252272605896, |
|
"rewards/rejected": -0.05353079363703728, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5298165137614677e-07, |
|
"logits/chosen": -3.068497657775879, |
|
"logits/rejected": -3.0402565002441406, |
|
"logps/chosen": -266.61614990234375, |
|
"logps/rejected": -81.87393951416016, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 0.026070792227983475, |
|
"rewards/margins": 0.10358123481273651, |
|
"rewards/rejected": -0.07751044631004333, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.4151376146788986e-07, |
|
"logits/chosen": -3.0521655082702637, |
|
"logits/rejected": -3.057821750640869, |
|
"logps/chosen": -286.0577087402344, |
|
"logps/rejected": -77.96414947509766, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.033475782722234726, |
|
"rewards/margins": 0.14013811945915222, |
|
"rewards/rejected": -0.10666234791278839, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.30045871559633e-07, |
|
"logits/chosen": -3.003532886505127, |
|
"logits/rejected": -2.995978355407715, |
|
"logps/chosen": -276.5457458496094, |
|
"logps/rejected": -80.02079010009766, |
|
"loss": 0.6234, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.0331401564180851, |
|
"rewards/margins": 0.14480046927928925, |
|
"rewards/rejected": -0.11166031658649445, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1857798165137613e-07, |
|
"logits/chosen": -3.0330376625061035, |
|
"logits/rejected": -3.030214548110962, |
|
"logps/chosen": -276.41632080078125, |
|
"logps/rejected": -77.67643737792969, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.043682295829057693, |
|
"rewards/margins": 0.177944153547287, |
|
"rewards/rejected": -0.1342618763446808, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.071100917431192e-07, |
|
"logits/chosen": -2.9754703044891357, |
|
"logits/rejected": -2.9898681640625, |
|
"logps/chosen": -283.3277587890625, |
|
"logps/rejected": -83.87138366699219, |
|
"loss": 0.6121, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.048630841076374054, |
|
"rewards/margins": 0.19439519941806793, |
|
"rewards/rejected": -0.14576435089111328, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9564220183486236e-07, |
|
"logits/chosen": -3.0477757453918457, |
|
"logits/rejected": -3.0237550735473633, |
|
"logps/chosen": -291.98065185546875, |
|
"logps/rejected": -82.53144073486328, |
|
"loss": 0.5997, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.034745730459690094, |
|
"rewards/margins": 0.20989501476287842, |
|
"rewards/rejected": -0.17514929175376892, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.841743119266055e-07, |
|
"logits/chosen": -3.033001661300659, |
|
"logits/rejected": -3.015845775604248, |
|
"logps/chosen": -289.15582275390625, |
|
"logps/rejected": -76.08447265625, |
|
"loss": 0.5925, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.0425817035138607, |
|
"rewards/margins": 0.21189098060131073, |
|
"rewards/rejected": -0.16930925846099854, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7270642201834864e-07, |
|
"logits/chosen": -3.0720551013946533, |
|
"logits/rejected": -3.0518932342529297, |
|
"logps/chosen": -271.08258056640625, |
|
"logps/rejected": -75.97576141357422, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.03000471368432045, |
|
"rewards/margins": 0.20934228599071503, |
|
"rewards/rejected": -0.17933759093284607, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.612385321100918e-07, |
|
"logits/chosen": -3.026865243911743, |
|
"logits/rejected": -3.030813455581665, |
|
"logps/chosen": -287.5133361816406, |
|
"logps/rejected": -77.84892272949219, |
|
"loss": 0.5811, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.050167638808488846, |
|
"rewards/margins": 0.24577708542346954, |
|
"rewards/rejected": -0.1956094205379486, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.497706422018348e-07, |
|
"logits/chosen": -3.064037322998047, |
|
"logits/rejected": -3.0434131622314453, |
|
"logps/chosen": -270.81378173828125, |
|
"logps/rejected": -78.64222717285156, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0572846345603466, |
|
"rewards/margins": 0.27750909328460693, |
|
"rewards/rejected": -0.2202244997024536, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3830275229357795e-07, |
|
"logits/chosen": -3.0381369590759277, |
|
"logits/rejected": -3.031832456588745, |
|
"logps/chosen": -273.7306823730469, |
|
"logps/rejected": -79.31744384765625, |
|
"loss": 0.5604, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.05553610250353813, |
|
"rewards/margins": 0.29081013798713684, |
|
"rewards/rejected": -0.2352740317583084, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.268348623853211e-07, |
|
"logits/chosen": -3.036811113357544, |
|
"logits/rejected": -3.0287680625915527, |
|
"logps/chosen": -266.4691467285156, |
|
"logps/rejected": -77.38215637207031, |
|
"loss": 0.5504, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.08118367195129395, |
|
"rewards/margins": 0.3425747752189636, |
|
"rewards/rejected": -0.2613911032676697, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.1536697247706423e-07, |
|
"logits/chosen": -3.061699867248535, |
|
"logits/rejected": -3.042888641357422, |
|
"logps/chosen": -269.961181640625, |
|
"logps/rejected": -89.21647644042969, |
|
"loss": 0.5501, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.07142322510480881, |
|
"rewards/margins": 0.3240587115287781, |
|
"rewards/rejected": -0.25263547897338867, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.038990825688073e-07, |
|
"logits/chosen": -3.04771089553833, |
|
"logits/rejected": -3.018721103668213, |
|
"logps/chosen": -250.44091796875, |
|
"logps/rejected": -72.33317565917969, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.06637217104434967, |
|
"rewards/margins": 0.3276647627353668, |
|
"rewards/rejected": -0.26129260659217834, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9243119266055045e-07, |
|
"logits/chosen": -2.9626972675323486, |
|
"logits/rejected": -2.9827158451080322, |
|
"logps/chosen": -293.9212646484375, |
|
"logps/rejected": -72.2821044921875, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 0.08349540829658508, |
|
"rewards/margins": 0.3892216682434082, |
|
"rewards/rejected": -0.30572623014450073, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.809633027522936e-07, |
|
"logits/chosen": -3.034790277481079, |
|
"logits/rejected": -3.016634225845337, |
|
"logps/chosen": -280.6105651855469, |
|
"logps/rejected": -76.09197235107422, |
|
"loss": 0.5333, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.08378176391124725, |
|
"rewards/margins": 0.4068339467048645, |
|
"rewards/rejected": -0.32305219769477844, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6949541284403673e-07, |
|
"logits/chosen": -3.0789849758148193, |
|
"logits/rejected": -3.0785841941833496, |
|
"logps/chosen": -264.5536804199219, |
|
"logps/rejected": -82.22047424316406, |
|
"loss": 0.5282, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.06328760087490082, |
|
"rewards/margins": 0.40200409293174744, |
|
"rewards/rejected": -0.3387165069580078, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5802752293577976e-07, |
|
"logits/chosen": -2.9741625785827637, |
|
"logits/rejected": -2.9866743087768555, |
|
"logps/chosen": -282.30902099609375, |
|
"logps/rejected": -70.76858520507812, |
|
"loss": 0.5277, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.10191468149423599, |
|
"rewards/margins": 0.39590951800346375, |
|
"rewards/rejected": -0.29399481415748596, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.465596330275229e-07, |
|
"logits/chosen": -3.032557964324951, |
|
"logits/rejected": -3.03240704536438, |
|
"logps/chosen": -274.0851135253906, |
|
"logps/rejected": -86.98384094238281, |
|
"loss": 0.5135, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.07479412853717804, |
|
"rewards/margins": 0.4109489321708679, |
|
"rewards/rejected": -0.3361548185348511, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3509174311926604e-07, |
|
"logits/chosen": -3.060285806655884, |
|
"logits/rejected": -2.9775302410125732, |
|
"logps/chosen": -253.785888671875, |
|
"logps/rejected": -70.39444732666016, |
|
"loss": 0.5183, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.07235217839479446, |
|
"rewards/margins": 0.3860532343387604, |
|
"rewards/rejected": -0.31370100378990173, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2362385321100916e-07, |
|
"logits/chosen": -3.029343843460083, |
|
"logits/rejected": -3.0406129360198975, |
|
"logps/chosen": -276.57196044921875, |
|
"logps/rejected": -84.54597473144531, |
|
"loss": 0.5107, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.08857797086238861, |
|
"rewards/margins": 0.4803849756717682, |
|
"rewards/rejected": -0.3918069899082184, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.121559633027523e-07, |
|
"logits/chosen": -2.9938578605651855, |
|
"logits/rejected": -2.9954426288604736, |
|
"logps/chosen": -273.7822265625, |
|
"logps/rejected": -77.98421478271484, |
|
"loss": 0.5079, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.08799968659877777, |
|
"rewards/margins": 0.40502768754959106, |
|
"rewards/rejected": -0.3170279860496521, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0068807339449538e-07, |
|
"logits/chosen": -3.052614212036133, |
|
"logits/rejected": -3.0461201667785645, |
|
"logps/chosen": -281.28814697265625, |
|
"logps/rejected": -81.84606170654297, |
|
"loss": 0.5038, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.05326849967241287, |
|
"rewards/margins": 0.46244749426841736, |
|
"rewards/rejected": -0.4091789722442627, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8922018348623852e-07, |
|
"logits/chosen": -3.031501054763794, |
|
"logits/rejected": -3.042961597442627, |
|
"logps/chosen": -271.274658203125, |
|
"logps/rejected": -87.3827133178711, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.07084844261407852, |
|
"rewards/margins": 0.445441871881485, |
|
"rewards/rejected": -0.37459343671798706, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7775229357798163e-07, |
|
"logits/chosen": -3.0476019382476807, |
|
"logits/rejected": -3.0447893142700195, |
|
"logps/chosen": -249.735595703125, |
|
"logps/rejected": -73.10395812988281, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.06198754906654358, |
|
"rewards/margins": 0.43834322690963745, |
|
"rewards/rejected": -0.37635567784309387, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6628440366972477e-07, |
|
"logits/chosen": -3.055901288986206, |
|
"logits/rejected": -3.0517029762268066, |
|
"logps/chosen": -273.3477478027344, |
|
"logps/rejected": -85.53290557861328, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.08338963240385056, |
|
"rewards/margins": 0.5042273998260498, |
|
"rewards/rejected": -0.42083778977394104, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5481651376146786e-07, |
|
"logits/chosen": -3.063744306564331, |
|
"logits/rejected": -3.066366195678711, |
|
"logps/chosen": -277.1488952636719, |
|
"logps/rejected": -88.2572250366211, |
|
"loss": 0.4931, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.07289155572652817, |
|
"rewards/margins": 0.5126849412918091, |
|
"rewards/rejected": -0.4397934079170227, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.43348623853211e-07, |
|
"logits/chosen": -3.0237436294555664, |
|
"logits/rejected": -3.0258359909057617, |
|
"logps/chosen": -292.0096740722656, |
|
"logps/rejected": -81.93167114257812, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 0.07367613166570663, |
|
"rewards/margins": 0.49797001481056213, |
|
"rewards/rejected": -0.4242939352989197, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.318807339449541e-07, |
|
"logits/chosen": -2.9882092475891113, |
|
"logits/rejected": -2.9637956619262695, |
|
"logps/chosen": -274.551513671875, |
|
"logps/rejected": -73.8973388671875, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0880483016371727, |
|
"rewards/margins": 0.49274787306785583, |
|
"rewards/rejected": -0.4046996533870697, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2041284403669725e-07, |
|
"logits/chosen": -3.070621967315674, |
|
"logits/rejected": -3.0683789253234863, |
|
"logps/chosen": -266.607177734375, |
|
"logps/rejected": -81.02775573730469, |
|
"loss": 0.493, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.10891600698232651, |
|
"rewards/margins": 0.5303564071655273, |
|
"rewards/rejected": -0.42144036293029785, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0894495412844036e-07, |
|
"logits/chosen": -3.0497114658355713, |
|
"logits/rejected": -3.053192615509033, |
|
"logps/chosen": -280.43218994140625, |
|
"logps/rejected": -80.42735290527344, |
|
"loss": 0.4892, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.10893626511096954, |
|
"rewards/margins": 0.5605167746543884, |
|
"rewards/rejected": -0.4515805244445801, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.747706422018348e-08, |
|
"logits/chosen": -3.002933979034424, |
|
"logits/rejected": -3.0063657760620117, |
|
"logps/chosen": -241.24276733398438, |
|
"logps/rejected": -75.92924499511719, |
|
"loss": 0.4833, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.07781459391117096, |
|
"rewards/margins": 0.46425342559814453, |
|
"rewards/rejected": -0.38643890619277954, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.60091743119266e-08, |
|
"logits/chosen": -3.0454163551330566, |
|
"logits/rejected": -3.035583972930908, |
|
"logps/chosen": -264.18585205078125, |
|
"logps/rejected": -78.031982421875, |
|
"loss": 0.4744, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.09802711009979248, |
|
"rewards/margins": 0.5436574816703796, |
|
"rewards/rejected": -0.44563040137290955, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.454128440366971e-08, |
|
"logits/chosen": -3.0196666717529297, |
|
"logits/rejected": -3.0026302337646484, |
|
"logps/chosen": -272.02630615234375, |
|
"logps/rejected": -82.01240539550781, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.08279488980770111, |
|
"rewards/margins": 0.5704164505004883, |
|
"rewards/rejected": -0.48762160539627075, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.307339449541284e-08, |
|
"logits/chosen": -3.0509345531463623, |
|
"logits/rejected": -3.0137345790863037, |
|
"logps/chosen": -262.2018127441406, |
|
"logps/rejected": -77.63418579101562, |
|
"loss": 0.4731, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1073322519659996, |
|
"rewards/margins": 0.5776056051254272, |
|
"rewards/rejected": -0.4702734053134918, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.1605504587155966e-08, |
|
"logits/chosen": -3.0285000801086426, |
|
"logits/rejected": -3.0236475467681885, |
|
"logps/chosen": -266.83599853515625, |
|
"logps/rejected": -77.38362121582031, |
|
"loss": 0.476, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.08291526138782501, |
|
"rewards/margins": 0.4984784722328186, |
|
"rewards/rejected": -0.41556310653686523, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.0137614678899086e-08, |
|
"logits/chosen": -3.02640438079834, |
|
"logits/rejected": -3.011373996734619, |
|
"logps/chosen": -295.5868835449219, |
|
"logps/rejected": -80.76414489746094, |
|
"loss": 0.4707, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.09663239866495132, |
|
"rewards/margins": 0.5815601944923401, |
|
"rewards/rejected": -0.48492780327796936, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.86697247706422e-08, |
|
"logits/chosen": -3.0195059776306152, |
|
"logits/rejected": -2.988323926925659, |
|
"logps/chosen": -300.5026550292969, |
|
"logps/rejected": -86.79838562011719, |
|
"loss": 0.4808, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.11054690927267075, |
|
"rewards/margins": 0.5899176001548767, |
|
"rewards/rejected": -0.47937074303627014, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.720183486238532e-08, |
|
"logits/chosen": -3.0426931381225586, |
|
"logits/rejected": -3.0394179821014404, |
|
"logps/chosen": -235.52706909179688, |
|
"logps/rejected": -73.9857406616211, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.08785500377416611, |
|
"rewards/margins": 0.5274263620376587, |
|
"rewards/rejected": -0.4395713806152344, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.73394495412844e-09, |
|
"logits/chosen": -3.0092616081237793, |
|
"logits/rejected": -2.972731590270996, |
|
"logps/chosen": -249.88876342773438, |
|
"logps/rejected": -85.80451965332031, |
|
"loss": 0.482, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.07512323558330536, |
|
"rewards/margins": 0.5230099558830261, |
|
"rewards/rejected": -0.44788676500320435, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -3.034407377243042, |
|
"eval_logits/rejected": -3.069913864135742, |
|
"eval_logps/chosen": -271.40020751953125, |
|
"eval_logps/rejected": -175.5244140625, |
|
"eval_loss": 0.5650191903114319, |
|
"eval_rewards/accuracies": 0.76953125, |
|
"eval_rewards/chosen": 0.08157022297382355, |
|
"eval_rewards/margins": 0.33799096941947937, |
|
"eval_rewards/rejected": -0.25642073154449463, |
|
"eval_runtime": 256.4523, |
|
"eval_samples_per_second": 7.799, |
|
"eval_steps_per_second": 0.062, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 485, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5539181610972611, |
|
"train_runtime": 15602.6148, |
|
"train_samples_per_second": 3.978, |
|
"train_steps_per_second": 0.031 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 485, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|