|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 500, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 31.324190504537746, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -0.49775856733322144, |
|
"logits/rejected": -0.5134874582290649, |
|
"logps/chosen": -1.1746575832366943, |
|
"logps/rejected": -1.3592634201049805, |
|
"loss": 2.1738, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1746575832366943, |
|
"rewards/margins": 0.18460586667060852, |
|
"rewards/rejected": -1.3592634201049805, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 17.522763098577006, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -0.5211091637611389, |
|
"logits/rejected": -0.49808019399642944, |
|
"logps/chosen": -1.1585900783538818, |
|
"logps/rejected": -1.2622541189193726, |
|
"loss": 2.1407, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.1585900783538818, |
|
"rewards/margins": 0.10366388410329819, |
|
"rewards/rejected": -1.2622541189193726, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 25.192278194697494, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -0.461596816778183, |
|
"logits/rejected": -0.45038098096847534, |
|
"logps/chosen": -1.1062204837799072, |
|
"logps/rejected": -1.3620827198028564, |
|
"loss": 2.1074, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1062204837799072, |
|
"rewards/margins": 0.255862295627594, |
|
"rewards/rejected": -1.3620827198028564, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 44.544789847879194, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.4408242106437683, |
|
"logits/rejected": -0.45246267318725586, |
|
"logps/chosen": -1.1579445600509644, |
|
"logps/rejected": -1.2627536058425903, |
|
"loss": 2.1651, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1579445600509644, |
|
"rewards/margins": 0.10480908304452896, |
|
"rewards/rejected": -1.2627536058425903, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 11.346692540130856, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": -0.5032289028167725, |
|
"logits/rejected": -0.4789913296699524, |
|
"logps/chosen": -1.166441559791565, |
|
"logps/rejected": -1.2368651628494263, |
|
"loss": 2.1373, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.166441559791565, |
|
"rewards/margins": 0.07042353600263596, |
|
"rewards/rejected": -1.2368651628494263, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 28.570034370144306, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": -0.49172288179397583, |
|
"logits/rejected": -0.4948248267173767, |
|
"logps/chosen": -1.1403913497924805, |
|
"logps/rejected": -1.275451898574829, |
|
"loss": 2.163, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -1.1403913497924805, |
|
"rewards/margins": 0.13506053388118744, |
|
"rewards/rejected": -1.275451898574829, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 19.91642226793408, |
|
"learning_rate": 7.291666666666666e-07, |
|
"logits/chosen": -0.47831740975379944, |
|
"logits/rejected": -0.4338778853416443, |
|
"logps/chosen": -1.1529806852340698, |
|
"logps/rejected": -1.3276116847991943, |
|
"loss": 2.1154, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.1529806852340698, |
|
"rewards/margins": 0.1746309995651245, |
|
"rewards/rejected": -1.3276116847991943, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 26.52326580399366, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": -0.4782256484031677, |
|
"logits/rejected": -0.4668501019477844, |
|
"logps/chosen": -1.108135461807251, |
|
"logps/rejected": -1.4614675045013428, |
|
"loss": 2.0666, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.108135461807251, |
|
"rewards/margins": 0.353331983089447, |
|
"rewards/rejected": -1.4614675045013428, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 13.796799671660693, |
|
"learning_rate": 9.374999999999999e-07, |
|
"logits/chosen": -0.44356870651245117, |
|
"logits/rejected": -0.4471743702888489, |
|
"logps/chosen": -1.0965029001235962, |
|
"logps/rejected": -1.3664577007293701, |
|
"loss": 2.0864, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.0965029001235962, |
|
"rewards/margins": 0.26995497941970825, |
|
"rewards/rejected": -1.3664577007293701, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 30.371297005919416, |
|
"learning_rate": 9.999463737538052e-07, |
|
"logits/chosen": -0.461489200592041, |
|
"logits/rejected": -0.4655645489692688, |
|
"logps/chosen": -1.1575626134872437, |
|
"logps/rejected": -1.4973771572113037, |
|
"loss": 2.1199, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.1575626134872437, |
|
"rewards/margins": 0.3398147225379944, |
|
"rewards/rejected": -1.4973771572113037, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 26.67718500476433, |
|
"learning_rate": 9.993432105822034e-07, |
|
"logits/chosen": -0.4001489281654358, |
|
"logits/rejected": -0.37682315707206726, |
|
"logps/chosen": -1.1248127222061157, |
|
"logps/rejected": -1.4001871347427368, |
|
"loss": 2.0897, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1248127222061157, |
|
"rewards/margins": 0.27537447214126587, |
|
"rewards/rejected": -1.4001871347427368, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 15.812441875154704, |
|
"learning_rate": 9.980706626858607e-07, |
|
"logits/chosen": -0.43878427147865295, |
|
"logits/rejected": -0.4231850504875183, |
|
"logps/chosen": -1.2165329456329346, |
|
"logps/rejected": -1.3715764284133911, |
|
"loss": 2.0665, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.2165329456329346, |
|
"rewards/margins": 0.1550435572862625, |
|
"rewards/rejected": -1.3715764284133911, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 32.69892893599103, |
|
"learning_rate": 9.961304359538434e-07, |
|
"logits/chosen": -0.38188332319259644, |
|
"logits/rejected": -0.30855393409729004, |
|
"logps/chosen": -1.1145586967468262, |
|
"logps/rejected": -1.7429344654083252, |
|
"loss": 2.0414, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1145586967468262, |
|
"rewards/margins": 0.6283758878707886, |
|
"rewards/rejected": -1.7429344654083252, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 44.90817025126785, |
|
"learning_rate": 9.935251313189563e-07, |
|
"logits/chosen": -0.27111151814460754, |
|
"logits/rejected": -0.24608612060546875, |
|
"logps/chosen": -1.1660597324371338, |
|
"logps/rejected": -1.5309925079345703, |
|
"loss": 2.0234, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.1660597324371338, |
|
"rewards/margins": 0.3649328947067261, |
|
"rewards/rejected": -1.5309925079345703, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 38.268073195027156, |
|
"learning_rate": 9.902582412711118e-07, |
|
"logits/chosen": -0.28683459758758545, |
|
"logits/rejected": -0.25514599680900574, |
|
"logps/chosen": -1.1409043073654175, |
|
"logps/rejected": -1.5740129947662354, |
|
"loss": 2.0488, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1409043073654175, |
|
"rewards/margins": 0.4331088066101074, |
|
"rewards/rejected": -1.5740129947662354, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 23.626638109106274, |
|
"learning_rate": 9.86334145175542e-07, |
|
"logits/chosen": -0.40040236711502075, |
|
"logits/rejected": -0.3598732650279999, |
|
"logps/chosen": -1.1197240352630615, |
|
"logps/rejected": -1.6543350219726562, |
|
"loss": 2.0889, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.1197240352630615, |
|
"rewards/margins": 0.5346111059188843, |
|
"rewards/rejected": -1.6543350219726562, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 21.67675055841775, |
|
"learning_rate": 9.817581034021272e-07, |
|
"logits/chosen": -0.4968738555908203, |
|
"logits/rejected": -0.4568953514099121, |
|
"logps/chosen": -1.1042544841766357, |
|
"logps/rejected": -1.4778095483779907, |
|
"loss": 2.0732, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.1042544841766357, |
|
"rewards/margins": 0.37355509400367737, |
|
"rewards/rejected": -1.4778095483779907, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 32.61370646153053, |
|
"learning_rate": 9.765362502737097e-07, |
|
"logits/chosen": -0.4779502749443054, |
|
"logits/rejected": -0.44491392374038696, |
|
"logps/chosen": -1.144523024559021, |
|
"logps/rejected": -1.4939491748809814, |
|
"loss": 2.0171, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.144523024559021, |
|
"rewards/margins": 0.3494262099266052, |
|
"rewards/rejected": -1.4939491748809814, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 29.010011255606027, |
|
"learning_rate": 9.706755858428485e-07, |
|
"logits/chosen": -0.4942244589328766, |
|
"logits/rejected": -0.39027169346809387, |
|
"logps/chosen": -1.2216947078704834, |
|
"logps/rejected": -1.6423091888427734, |
|
"loss": 2.0511, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.2216947078704834, |
|
"rewards/margins": 0.4206143319606781, |
|
"rewards/rejected": -1.6423091888427734, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 25.050588840086288, |
|
"learning_rate": 9.641839665080363e-07, |
|
"logits/chosen": -0.46108850836753845, |
|
"logits/rejected": -0.423541396856308, |
|
"logps/chosen": -1.1832860708236694, |
|
"logps/rejected": -1.7398521900177002, |
|
"loss": 2.0554, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.1832860708236694, |
|
"rewards/margins": 0.5565661787986755, |
|
"rewards/rejected": -1.7398521900177002, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 76.09509812922548, |
|
"learning_rate": 9.570700944819582e-07, |
|
"logits/chosen": -0.48844489455223083, |
|
"logits/rejected": -0.47664815187454224, |
|
"logps/chosen": -1.065321683883667, |
|
"logps/rejected": -1.5008853673934937, |
|
"loss": 2.0306, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.065321683883667, |
|
"rewards/margins": 0.4355636537075043, |
|
"rewards/rejected": -1.5008853673934937, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 87.9539848283412, |
|
"learning_rate": 9.493435061259129e-07, |
|
"logits/chosen": -0.5218511819839478, |
|
"logits/rejected": -0.49293455481529236, |
|
"logps/chosen": -1.0804827213287354, |
|
"logps/rejected": -1.5555989742279053, |
|
"loss": 2.0182, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0804827213287354, |
|
"rewards/margins": 0.4751162528991699, |
|
"rewards/rejected": -1.5555989742279053, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 24.95587592194343, |
|
"learning_rate": 9.4101455916603e-07, |
|
"logits/chosen": -0.4004356265068054, |
|
"logits/rejected": -0.34801220893859863, |
|
"logps/chosen": -1.1054725646972656, |
|
"logps/rejected": -1.7531585693359375, |
|
"loss": 1.9992, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1054725646972656, |
|
"rewards/margins": 0.6476858854293823, |
|
"rewards/rejected": -1.7531585693359375, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 53.12789958164912, |
|
"learning_rate": 9.320944188084241e-07, |
|
"logits/chosen": -0.3867969810962677, |
|
"logits/rejected": -0.3542706072330475, |
|
"logps/chosen": -1.3296326398849487, |
|
"logps/rejected": -1.7101236581802368, |
|
"loss": 2.069, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.3296326398849487, |
|
"rewards/margins": 0.3804909884929657, |
|
"rewards/rejected": -1.7101236581802368, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 25.68062394354381, |
|
"learning_rate": 9.225950427718974e-07, |
|
"logits/chosen": -0.4343915581703186, |
|
"logits/rejected": -0.40751656889915466, |
|
"logps/chosen": -1.1859281063079834, |
|
"logps/rejected": -1.5661814212799072, |
|
"loss": 2.0229, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.1859281063079834, |
|
"rewards/margins": 0.3802531659603119, |
|
"rewards/rejected": -1.5661814212799072, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 146.99732744643043, |
|
"learning_rate": 9.125291652582547e-07, |
|
"logits/chosen": -0.43255624175071716, |
|
"logits/rejected": -0.42008519172668457, |
|
"logps/chosen": -1.1270229816436768, |
|
"logps/rejected": -1.3844034671783447, |
|
"loss": 2.0368, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1270229816436768, |
|
"rewards/margins": 0.2573803663253784, |
|
"rewards/rejected": -1.3844034671783447, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 42.69972929682183, |
|
"learning_rate": 9.019102798817195e-07, |
|
"logits/chosen": -0.5087494254112244, |
|
"logits/rejected": -0.4200964570045471, |
|
"logps/chosen": -1.1956226825714111, |
|
"logps/rejected": -1.9745105504989624, |
|
"loss": 1.9952, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.1956226825714111, |
|
"rewards/margins": 0.7788880467414856, |
|
"rewards/rejected": -1.9745105504989624, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 19.87017547277629, |
|
"learning_rate": 8.90752621580335e-07, |
|
"logits/chosen": -0.4257656931877136, |
|
"logits/rejected": -0.364449143409729, |
|
"logps/chosen": -1.2079570293426514, |
|
"logps/rejected": -1.8338918685913086, |
|
"loss": 1.9605, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2079570293426514, |
|
"rewards/margins": 0.6259347200393677, |
|
"rewards/rejected": -1.8338918685913086, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 15.24234201577276, |
|
"learning_rate": 8.79071147533597e-07, |
|
"logits/chosen": -0.47194284200668335, |
|
"logits/rejected": -0.44540295004844666, |
|
"logps/chosen": -1.2036808729171753, |
|
"logps/rejected": -1.6797609329223633, |
|
"loss": 2.0129, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.2036808729171753, |
|
"rewards/margins": 0.4760800004005432, |
|
"rewards/rejected": -1.6797609329223633, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 41.583916372931604, |
|
"learning_rate": 8.668815171119019e-07, |
|
"logits/chosen": -0.4502836763858795, |
|
"logits/rejected": -0.416980117559433, |
|
"logps/chosen": -1.0764203071594238, |
|
"logps/rejected": -1.5866191387176514, |
|
"loss": 1.9679, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.0764203071594238, |
|
"rewards/margins": 0.5101990699768066, |
|
"rewards/rejected": -1.5866191387176514, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 17.97044676211115, |
|
"learning_rate": 8.54200070884685e-07, |
|
"logits/chosen": -0.4577752947807312, |
|
"logits/rejected": -0.4022301733493805, |
|
"logps/chosen": -1.1599218845367432, |
|
"logps/rejected": -1.6104686260223389, |
|
"loss": 1.9736, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1599218845367432, |
|
"rewards/margins": 0.45054665207862854, |
|
"rewards/rejected": -1.6104686260223389, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 37.67621637306142, |
|
"learning_rate": 8.410438087153911e-07, |
|
"logits/chosen": -0.33586519956588745, |
|
"logits/rejected": -0.2821674942970276, |
|
"logps/chosen": -1.2303192615509033, |
|
"logps/rejected": -1.7895514965057373, |
|
"loss": 2.0104, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2303192615509033, |
|
"rewards/margins": 0.5592321753501892, |
|
"rewards/rejected": -1.7895514965057373, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 16.05482538779056, |
|
"learning_rate": 8.274303669726426e-07, |
|
"logits/chosen": -0.4002958834171295, |
|
"logits/rejected": -0.34722983837127686, |
|
"logps/chosen": -1.1306252479553223, |
|
"logps/rejected": -1.6940090656280518, |
|
"loss": 2.0112, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1306252479553223, |
|
"rewards/margins": 0.5633838176727295, |
|
"rewards/rejected": -1.6940090656280518, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 15.217072980607172, |
|
"learning_rate": 8.133779948881513e-07, |
|
"logits/chosen": -0.45079272985458374, |
|
"logits/rejected": -0.37534087896347046, |
|
"logps/chosen": -1.1774274110794067, |
|
"logps/rejected": -1.6361265182495117, |
|
"loss": 2.0148, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.1774274110794067, |
|
"rewards/margins": 0.4586990773677826, |
|
"rewards/rejected": -1.6361265182495117, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 19.691952142371672, |
|
"learning_rate": 7.989055300930704e-07, |
|
"logits/chosen": -0.42495885491371155, |
|
"logits/rejected": -0.3137228488922119, |
|
"logps/chosen": -1.2254281044006348, |
|
"logps/rejected": -1.73735773563385, |
|
"loss": 2.0104, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2254281044006348, |
|
"rewards/margins": 0.5119296312332153, |
|
"rewards/rejected": -1.73735773563385, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 30.34827875211837, |
|
"learning_rate": 7.840323733655778e-07, |
|
"logits/chosen": -0.3100610673427582, |
|
"logits/rejected": -0.25817859172821045, |
|
"logps/chosen": -1.2358551025390625, |
|
"logps/rejected": -1.8043813705444336, |
|
"loss": 1.9916, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2358551025390625, |
|
"rewards/margins": 0.5685264468193054, |
|
"rewards/rejected": -1.8043813705444336, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 21.54243489627896, |
|
"learning_rate": 7.687784626235447e-07, |
|
"logits/chosen": -0.24814710021018982, |
|
"logits/rejected": -0.12512032687664032, |
|
"logps/chosen": -1.2242952585220337, |
|
"logps/rejected": -1.974454641342163, |
|
"loss": 1.9456, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.2242952585220337, |
|
"rewards/margins": 0.7501593828201294, |
|
"rewards/rejected": -1.974454641342163, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 27.936855442626964, |
|
"learning_rate": 7.531642461971514e-07, |
|
"logits/chosen": -0.2731862962245941, |
|
"logits/rejected": -0.18622538447380066, |
|
"logps/chosen": -1.176733136177063, |
|
"logps/rejected": -1.7295942306518555, |
|
"loss": 2.0622, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.176733136177063, |
|
"rewards/margins": 0.5528609752655029, |
|
"rewards/rejected": -1.7295942306518555, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 18.211412725741514, |
|
"learning_rate": 7.372106554172801e-07, |
|
"logits/chosen": -0.21031120419502258, |
|
"logits/rejected": -0.14914147555828094, |
|
"logps/chosen": -1.2273377180099487, |
|
"logps/rejected": -1.6471458673477173, |
|
"loss": 1.9975, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2273377180099487, |
|
"rewards/margins": 0.41980820894241333, |
|
"rewards/rejected": -1.6471458673477173, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 28.304585509307277, |
|
"learning_rate": 7.209390765564318e-07, |
|
"logits/chosen": -0.13628198206424713, |
|
"logits/rejected": -0.0973358079791069, |
|
"logps/chosen": -1.2455083131790161, |
|
"logps/rejected": -1.753761649131775, |
|
"loss": 2.0029, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.2455083131790161, |
|
"rewards/margins": 0.5082534551620483, |
|
"rewards/rejected": -1.753761649131775, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 23.204471353515586, |
|
"learning_rate": 7.043713221597773e-07, |
|
"logits/chosen": -0.07737751305103302, |
|
"logits/rejected": -0.005436101462692022, |
|
"logps/chosen": -1.0530147552490234, |
|
"logps/rejected": -1.7120428085327148, |
|
"loss": 1.9468, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0530147552490234, |
|
"rewards/margins": 0.6590279340744019, |
|
"rewards/rejected": -1.7120428085327148, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 19.22100285707222, |
|
"learning_rate": 6.875296018047809e-07, |
|
"logits/chosen": -0.14544904232025146, |
|
"logits/rejected": -0.09322938323020935, |
|
"logps/chosen": -1.25759756565094, |
|
"logps/rejected": -1.6059818267822266, |
|
"loss": 2.0319, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.25759756565094, |
|
"rewards/margins": 0.3483843505382538, |
|
"rewards/rejected": -1.6059818267822266, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 28.866239067564, |
|
"learning_rate": 6.704364923285857e-07, |
|
"logits/chosen": -0.21608710289001465, |
|
"logits/rejected": -0.135384202003479, |
|
"logps/chosen": -1.1534065008163452, |
|
"logps/rejected": -1.7110164165496826, |
|
"loss": 1.9831, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.1534065008163452, |
|
"rewards/margins": 0.5576101541519165, |
|
"rewards/rejected": -1.7110164165496826, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 83.00316897734959, |
|
"learning_rate": 6.531149075630796e-07, |
|
"logits/chosen": -0.22518062591552734, |
|
"logits/rejected": -0.04796000197529793, |
|
"logps/chosen": -1.2540584802627563, |
|
"logps/rejected": -1.8683173656463623, |
|
"loss": 1.9781, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.2540584802627563, |
|
"rewards/margins": 0.6142589449882507, |
|
"rewards/rejected": -1.8683173656463623, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 15.63146505822897, |
|
"learning_rate": 6.355880676182085e-07, |
|
"logits/chosen": -0.24729761481285095, |
|
"logits/rejected": -0.10253201425075531, |
|
"logps/chosen": -1.148567795753479, |
|
"logps/rejected": -1.861864447593689, |
|
"loss": 1.9337, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.148567795753479, |
|
"rewards/margins": 0.7132967114448547, |
|
"rewards/rejected": -1.861864447593689, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 1092.6523686417404, |
|
"learning_rate": 6.178794677547137e-07, |
|
"logits/chosen": -0.33141931891441345, |
|
"logits/rejected": -0.1571967899799347, |
|
"logps/chosen": -1.125832200050354, |
|
"logps/rejected": -1.9030935764312744, |
|
"loss": 1.9444, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.125832200050354, |
|
"rewards/margins": 0.7772611379623413, |
|
"rewards/rejected": -1.9030935764312744, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 18.438187215474308, |
|
"learning_rate": 6.000128468880222e-07, |
|
"logits/chosen": -0.19492967426776886, |
|
"logits/rejected": -0.088912233710289, |
|
"logps/chosen": -1.1279089450836182, |
|
"logps/rejected": -1.7057428359985352, |
|
"loss": 1.9794, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1279089450836182, |
|
"rewards/margins": 0.5778340101242065, |
|
"rewards/rejected": -1.7057428359985352, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 45.11647792728952, |
|
"learning_rate": 5.820121557655108e-07, |
|
"logits/chosen": -0.17841561138629913, |
|
"logits/rejected": -0.08987215161323547, |
|
"logps/chosen": -1.1346948146820068, |
|
"logps/rejected": -1.8120676279067993, |
|
"loss": 1.9898, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1346948146820068, |
|
"rewards/margins": 0.6773727536201477, |
|
"rewards/rejected": -1.8120676279067993, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 53.80136713305279, |
|
"learning_rate": 5.639015248598023e-07, |
|
"logits/chosen": -0.2315063774585724, |
|
"logits/rejected": -0.11919162422418594, |
|
"logps/chosen": -1.254396677017212, |
|
"logps/rejected": -1.7449557781219482, |
|
"loss": 1.9968, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.254396677017212, |
|
"rewards/margins": 0.49055904150009155, |
|
"rewards/rejected": -1.7449557781219482, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 30.376240963875347, |
|
"learning_rate": 5.457052320211339e-07, |
|
"logits/chosen": -0.2132711410522461, |
|
"logits/rejected": -0.11911521106958389, |
|
"logps/chosen": -1.1606347560882568, |
|
"logps/rejected": -1.8521320819854736, |
|
"loss": 1.9963, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.1606347560882568, |
|
"rewards/margins": 0.6914970874786377, |
|
"rewards/rejected": -1.8521320819854736, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 24.612321850210826, |
|
"learning_rate": 5.274476699321637e-07, |
|
"logits/chosen": -0.17434340715408325, |
|
"logits/rejected": -0.02575433813035488, |
|
"logps/chosen": -1.2206462621688843, |
|
"logps/rejected": -1.893471121788025, |
|
"loss": 1.9294, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2206462621688843, |
|
"rewards/margins": 0.6728248000144958, |
|
"rewards/rejected": -1.893471121788025, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 23.578174980485148, |
|
"learning_rate": 5.091533134088387e-07, |
|
"logits/chosen": -0.19827161729335785, |
|
"logits/rejected": -0.10442183911800385, |
|
"logps/chosen": -1.1325616836547852, |
|
"logps/rejected": -1.894374132156372, |
|
"loss": 1.9889, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1325616836547852, |
|
"rewards/margins": 0.7618124485015869, |
|
"rewards/rejected": -1.894374132156372, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 23.363765551953982, |
|
"learning_rate": 4.908466865911614e-07, |
|
"logits/chosen": -0.22801117599010468, |
|
"logits/rejected": -0.15166376531124115, |
|
"logps/chosen": -1.2147762775421143, |
|
"logps/rejected": -1.6708816289901733, |
|
"loss": 1.9391, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2147762775421143, |
|
"rewards/margins": 0.45610541105270386, |
|
"rewards/rejected": -1.6708816289901733, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 20.86303085584383, |
|
"learning_rate": 4.7255233006783624e-07, |
|
"logits/chosen": -0.22982990741729736, |
|
"logits/rejected": -0.13931187987327576, |
|
"logps/chosen": -1.2865099906921387, |
|
"logps/rejected": -1.766331434249878, |
|
"loss": 1.9878, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2865099906921387, |
|
"rewards/margins": 0.47982144355773926, |
|
"rewards/rejected": -1.766331434249878, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 12.144303285220628, |
|
"learning_rate": 4.5429476797886617e-07, |
|
"logits/chosen": -0.2274014949798584, |
|
"logits/rejected": -0.07431206852197647, |
|
"logps/chosen": -1.1824675798416138, |
|
"logps/rejected": -1.998253583908081, |
|
"loss": 1.962, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1824675798416138, |
|
"rewards/margins": 0.8157860040664673, |
|
"rewards/rejected": -1.998253583908081, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 37.56330617572613, |
|
"learning_rate": 4.3609847514019763e-07, |
|
"logits/chosen": -0.2594318687915802, |
|
"logits/rejected": -0.14403223991394043, |
|
"logps/chosen": -1.1071598529815674, |
|
"logps/rejected": -1.610290765762329, |
|
"loss": 1.957, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1071598529815674, |
|
"rewards/margins": 0.5031307935714722, |
|
"rewards/rejected": -1.610290765762329, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 55.56290292891477, |
|
"learning_rate": 4.179878442344892e-07, |
|
"logits/chosen": -0.2227039635181427, |
|
"logits/rejected": -0.1900090128183365, |
|
"logps/chosen": -1.1886059045791626, |
|
"logps/rejected": -1.7931125164031982, |
|
"loss": 1.9481, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1886059045791626, |
|
"rewards/margins": 0.60450679063797, |
|
"rewards/rejected": -1.7931125164031982, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 24.48468402537705, |
|
"learning_rate": 3.9998715311197783e-07, |
|
"logits/chosen": -0.26827192306518555, |
|
"logits/rejected": -0.17545387148857117, |
|
"logps/chosen": -1.1850652694702148, |
|
"logps/rejected": -1.8715204000473022, |
|
"loss": 1.9349, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1850652694702148, |
|
"rewards/margins": 0.6864550709724426, |
|
"rewards/rejected": -1.8715204000473022, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 19.0989416435893, |
|
"learning_rate": 3.821205322452863e-07, |
|
"logits/chosen": -0.2373635321855545, |
|
"logits/rejected": -0.1607808768749237, |
|
"logps/chosen": -1.1796191930770874, |
|
"logps/rejected": -1.9065383672714233, |
|
"loss": 1.9901, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.1796191930770874, |
|
"rewards/margins": 0.7269191741943359, |
|
"rewards/rejected": -1.9065383672714233, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 35.51594817128474, |
|
"learning_rate": 3.6441193238179146e-07, |
|
"logits/chosen": -0.28120699524879456, |
|
"logits/rejected": -0.2147771418094635, |
|
"logps/chosen": -1.2024883031845093, |
|
"logps/rejected": -1.7524086236953735, |
|
"loss": 1.9577, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2024883031845093, |
|
"rewards/margins": 0.5499202013015747, |
|
"rewards/rejected": -1.7524086236953735, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 19.807409901213642, |
|
"learning_rate": 3.4688509243692034e-07, |
|
"logits/chosen": -0.1579556167125702, |
|
"logits/rejected": -0.09319324791431427, |
|
"logps/chosen": -1.2312943935394287, |
|
"logps/rejected": -1.9326064586639404, |
|
"loss": 1.9317, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.2312943935394287, |
|
"rewards/margins": 0.7013120055198669, |
|
"rewards/rejected": -1.9326064586639404, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 26.79163246884692, |
|
"learning_rate": 3.295635076714144e-07, |
|
"logits/chosen": -0.13611330091953278, |
|
"logits/rejected": -0.1433105766773224, |
|
"logps/chosen": -1.1258060932159424, |
|
"logps/rejected": -1.763738989830017, |
|
"loss": 1.9276, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1258060932159424, |
|
"rewards/margins": 0.6379327774047852, |
|
"rewards/rejected": -1.763738989830017, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 26.007353485880714, |
|
"learning_rate": 3.12470398195219e-07, |
|
"logits/chosen": -0.1855328381061554, |
|
"logits/rejected": -0.06350420415401459, |
|
"logps/chosen": -1.1226041316986084, |
|
"logps/rejected": -1.979421854019165, |
|
"loss": 1.9461, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.1226041316986084, |
|
"rewards/margins": 0.8568177223205566, |
|
"rewards/rejected": -1.979421854019165, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 25.93600538288609, |
|
"learning_rate": 2.956286778402226e-07, |
|
"logits/chosen": -0.16057109832763672, |
|
"logits/rejected": -0.10531453043222427, |
|
"logps/chosen": -1.1869053840637207, |
|
"logps/rejected": -1.7816956043243408, |
|
"loss": 1.8982, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.1869053840637207, |
|
"rewards/margins": 0.5947902798652649, |
|
"rewards/rejected": -1.7816956043243408, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 41.1877461903664, |
|
"learning_rate": 2.7906092344356826e-07, |
|
"logits/chosen": -0.16566753387451172, |
|
"logits/rejected": -0.06549857556819916, |
|
"logps/chosen": -1.1580512523651123, |
|
"logps/rejected": -1.8924694061279297, |
|
"loss": 1.9157, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.1580512523651123, |
|
"rewards/margins": 0.7344181537628174, |
|
"rewards/rejected": -1.8924694061279297, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 13.497224748766067, |
|
"learning_rate": 2.6278934458271996e-07, |
|
"logits/chosen": -0.09990070015192032, |
|
"logits/rejected": -0.019180208444595337, |
|
"logps/chosen": -1.1130152940750122, |
|
"logps/rejected": -1.6457436084747314, |
|
"loss": 1.9451, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.1130152940750122, |
|
"rewards/margins": 0.5327284932136536, |
|
"rewards/rejected": -1.6457436084747314, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 20.73440619316291, |
|
"learning_rate": 2.468357538028487e-07, |
|
"logits/chosen": -0.17166391015052795, |
|
"logits/rejected": -0.08680696785449982, |
|
"logps/chosen": -1.109227180480957, |
|
"logps/rejected": -1.7418838739395142, |
|
"loss": 1.9573, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.109227180480957, |
|
"rewards/margins": 0.6326566934585571, |
|
"rewards/rejected": -1.7418838739395142, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 20.25166204813565, |
|
"learning_rate": 2.312215373764551e-07, |
|
"logits/chosen": -0.155477374792099, |
|
"logits/rejected": -0.05189569666981697, |
|
"logps/chosen": -1.3119245767593384, |
|
"logps/rejected": -1.9228538274765015, |
|
"loss": 1.9728, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3119245767593384, |
|
"rewards/margins": 0.6109293103218079, |
|
"rewards/rejected": -1.9228538274765015, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 35.62472752098736, |
|
"learning_rate": 2.1596762663442213e-07, |
|
"logits/chosen": -0.18124118447303772, |
|
"logits/rejected": -0.04932355508208275, |
|
"logps/chosen": -1.2099921703338623, |
|
"logps/rejected": -1.9292633533477783, |
|
"loss": 1.9751, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2099921703338623, |
|
"rewards/margins": 0.719271183013916, |
|
"rewards/rejected": -1.9292633533477783, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 19.36102520036485, |
|
"learning_rate": 2.0109446990692963e-07, |
|
"logits/chosen": -0.048113010823726654, |
|
"logits/rejected": -0.02143859677016735, |
|
"logps/chosen": -1.227217197418213, |
|
"logps/rejected": -1.7735779285430908, |
|
"loss": 2.0111, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.227217197418213, |
|
"rewards/margins": 0.5463606715202332, |
|
"rewards/rejected": -1.7735779285430908, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 16.299019547207138, |
|
"learning_rate": 1.8662200511184872e-07, |
|
"logits/chosen": -0.09398343414068222, |
|
"logits/rejected": -0.01715996116399765, |
|
"logps/chosen": -1.061127781867981, |
|
"logps/rejected": -1.851822853088379, |
|
"loss": 1.8894, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.061127781867981, |
|
"rewards/margins": 0.7906948328018188, |
|
"rewards/rejected": -1.851822853088379, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 21.325612236488393, |
|
"learning_rate": 1.725696330273575e-07, |
|
"logits/chosen": -0.19810739159584045, |
|
"logits/rejected": -0.09949172288179398, |
|
"logps/chosen": -1.0794689655303955, |
|
"logps/rejected": -1.6091794967651367, |
|
"loss": 1.8836, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0794689655303955, |
|
"rewards/margins": 0.529710590839386, |
|
"rewards/rejected": -1.6091794967651367, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 17.67539053293725, |
|
"learning_rate": 1.589561912846089e-07, |
|
"logits/chosen": -0.19371333718299866, |
|
"logits/rejected": -0.06843050569295883, |
|
"logps/chosen": -1.2321817874908447, |
|
"logps/rejected": -1.8411308526992798, |
|
"loss": 1.9833, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2321817874908447, |
|
"rewards/margins": 0.6089491844177246, |
|
"rewards/rejected": -1.8411308526992798, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 23.235373655195016, |
|
"learning_rate": 1.4579992911531496e-07, |
|
"logits/chosen": -0.11578913033008575, |
|
"logits/rejected": -0.025940338149666786, |
|
"logps/chosen": -1.196590781211853, |
|
"logps/rejected": -1.895391821861267, |
|
"loss": 1.9263, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.196590781211853, |
|
"rewards/margins": 0.6988012790679932, |
|
"rewards/rejected": -1.895391821861267, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 19.259561354186946, |
|
"learning_rate": 1.3311848288809813e-07, |
|
"logits/chosen": -0.11768321692943573, |
|
"logits/rejected": -0.1705169379711151, |
|
"logps/chosen": -1.2138588428497314, |
|
"logps/rejected": -1.7918386459350586, |
|
"loss": 1.9695, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.2138588428497314, |
|
"rewards/margins": 0.5779798030853271, |
|
"rewards/rejected": -1.7918386459350586, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 19.09434464976567, |
|
"learning_rate": 1.209288524664029e-07, |
|
"logits/chosen": -0.1390591561794281, |
|
"logits/rejected": -0.08628968149423599, |
|
"logps/chosen": -1.211247444152832, |
|
"logps/rejected": -1.7502481937408447, |
|
"loss": 1.9086, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.211247444152832, |
|
"rewards/margins": 0.5390007495880127, |
|
"rewards/rejected": -1.7502481937408447, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 22.75496669970745, |
|
"learning_rate": 1.0924737841966497e-07, |
|
"logits/chosen": -0.14960381388664246, |
|
"logits/rejected": -0.08989100158214569, |
|
"logps/chosen": -1.1806560754776, |
|
"logps/rejected": -1.799631118774414, |
|
"loss": 1.9473, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1806560754776, |
|
"rewards/margins": 0.6189749240875244, |
|
"rewards/rejected": -1.799631118774414, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 21.199803415422714, |
|
"learning_rate": 9.808972011828054e-08, |
|
"logits/chosen": -0.13692599534988403, |
|
"logits/rejected": -0.04226923733949661, |
|
"logps/chosen": -1.1819908618927002, |
|
"logps/rejected": -1.9731757640838623, |
|
"loss": 1.9367, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.1819908618927002, |
|
"rewards/margins": 0.7911848425865173, |
|
"rewards/rejected": -1.9731757640838623, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 33.49806758309421, |
|
"learning_rate": 8.747083474174527e-08, |
|
"logits/chosen": -0.13622619211673737, |
|
"logits/rejected": 0.037842754274606705, |
|
"logps/chosen": -1.2155778408050537, |
|
"logps/rejected": -1.890428900718689, |
|
"loss": 1.9388, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.2155778408050537, |
|
"rewards/margins": 0.6748510599136353, |
|
"rewards/rejected": -1.890428900718689, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 15.890713698381443, |
|
"learning_rate": 7.740495722810269e-08, |
|
"logits/chosen": -0.05593853071331978, |
|
"logits/rejected": -0.004029959440231323, |
|
"logps/chosen": -1.112066388130188, |
|
"logps/rejected": -1.8403129577636719, |
|
"loss": 1.9207, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.112066388130188, |
|
"rewards/margins": 0.7282465696334839, |
|
"rewards/rejected": -1.8403129577636719, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 19.88967649390424, |
|
"learning_rate": 6.790558119157597e-08, |
|
"logits/chosen": -0.18492689728736877, |
|
"logits/rejected": -0.10850385576486588, |
|
"logps/chosen": -1.2788586616516113, |
|
"logps/rejected": -2.0290207862854004, |
|
"loss": 1.9523, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2788586616516113, |
|
"rewards/margins": 0.7501621842384338, |
|
"rewards/rejected": -2.0290207862854004, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 22.56741853126592, |
|
"learning_rate": 5.898544083397e-08, |
|
"logits/chosen": -0.14272233843803406, |
|
"logits/rejected": -0.0651661604642868, |
|
"logps/chosen": -1.1273430585861206, |
|
"logps/rejected": -1.6827017068862915, |
|
"loss": 1.9304, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.1273430585861206, |
|
"rewards/margins": 0.5553585290908813, |
|
"rewards/rejected": -1.6827017068862915, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 15.199788752886258, |
|
"learning_rate": 5.065649387408705e-08, |
|
"logits/chosen": -0.14387831091880798, |
|
"logits/rejected": -0.009860972873866558, |
|
"logps/chosen": -1.161084771156311, |
|
"logps/rejected": -1.8390836715698242, |
|
"loss": 1.9141, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.161084771156311, |
|
"rewards/margins": 0.6779987812042236, |
|
"rewards/rejected": -1.8390836715698242, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 14.485810825336134, |
|
"learning_rate": 4.292990551804171e-08, |
|
"logits/chosen": -0.12360888719558716, |
|
"logits/rejected": -0.05216851085424423, |
|
"logps/chosen": -1.1394500732421875, |
|
"logps/rejected": -1.831883192062378, |
|
"loss": 1.9578, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.1394500732421875, |
|
"rewards/margins": 0.69243323802948, |
|
"rewards/rejected": -1.831883192062378, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 22.957524299991945, |
|
"learning_rate": 3.581603349196371e-08, |
|
"logits/chosen": -0.08880945295095444, |
|
"logits/rejected": -0.02426137961447239, |
|
"logps/chosen": -1.296489953994751, |
|
"logps/rejected": -1.8570985794067383, |
|
"loss": 1.9254, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.296489953994751, |
|
"rewards/margins": 0.5606086254119873, |
|
"rewards/rejected": -1.8570985794067383, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 17.939695720657745, |
|
"learning_rate": 2.9324414157151367e-08, |
|
"logits/chosen": -0.10626481473445892, |
|
"logits/rejected": -0.055657435208559036, |
|
"logps/chosen": -1.219440221786499, |
|
"logps/rejected": -1.922663688659668, |
|
"loss": 1.9204, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.219440221786499, |
|
"rewards/margins": 0.7032233476638794, |
|
"rewards/rejected": -1.922663688659668, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 19.609830420854962, |
|
"learning_rate": 2.3463749726290284e-08, |
|
"logits/chosen": -0.14449790120124817, |
|
"logits/rejected": -0.08098597824573517, |
|
"logps/chosen": -1.1550737619400024, |
|
"logps/rejected": -1.9791103601455688, |
|
"loss": 1.9163, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1550737619400024, |
|
"rewards/margins": 0.8240365982055664, |
|
"rewards/rejected": -1.9791103601455688, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 31.437744158726638, |
|
"learning_rate": 1.824189659787284e-08, |
|
"logits/chosen": 0.0060030072927474976, |
|
"logits/rejected": 0.009024476632475853, |
|
"logps/chosen": -1.1824986934661865, |
|
"logps/rejected": -1.7867063283920288, |
|
"loss": 1.9724, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1824986934661865, |
|
"rewards/margins": 0.6042075157165527, |
|
"rewards/rejected": -1.7867063283920288, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 34.49114038658599, |
|
"learning_rate": 1.3665854824458035e-08, |
|
"logits/chosen": -0.15822723507881165, |
|
"logits/rejected": -0.08658315241336823, |
|
"logps/chosen": -1.1747385263442993, |
|
"logps/rejected": -1.7831497192382812, |
|
"loss": 1.9708, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1747385263442993, |
|
"rewards/margins": 0.6084113121032715, |
|
"rewards/rejected": -1.7831497192382812, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 22.736368343788918, |
|
"learning_rate": 9.741758728888217e-09, |
|
"logits/chosen": -0.05001335218548775, |
|
"logits/rejected": -0.013674241490662098, |
|
"logps/chosen": -1.179164171218872, |
|
"logps/rejected": -1.8373947143554688, |
|
"loss": 1.909, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.179164171218872, |
|
"rewards/margins": 0.6582303643226624, |
|
"rewards/rejected": -1.8373947143554688, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 21.67558731525575, |
|
"learning_rate": 6.474868681043577e-09, |
|
"logits/chosen": -0.10400988906621933, |
|
"logits/rejected": -0.05608060210943222, |
|
"logps/chosen": -1.3397135734558105, |
|
"logps/rejected": -1.716301679611206, |
|
"loss": 1.9844, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3397135734558105, |
|
"rewards/margins": 0.3765881657600403, |
|
"rewards/rejected": -1.716301679611206, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 22.99781560062592, |
|
"learning_rate": 3.869564046156459e-09, |
|
"logits/chosen": -0.06456808745861053, |
|
"logits/rejected": -0.012792855501174927, |
|
"logps/chosen": -1.0940654277801514, |
|
"logps/rejected": -1.805354356765747, |
|
"loss": 1.8916, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0940654277801514, |
|
"rewards/margins": 0.7112888097763062, |
|
"rewards/rejected": -1.805354356765747, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 17.285058450470018, |
|
"learning_rate": 1.929337314139412e-09, |
|
"logits/chosen": -0.19403138756752014, |
|
"logits/rejected": -0.07949899882078171, |
|
"logps/chosen": -1.2133488655090332, |
|
"logps/rejected": -1.8430767059326172, |
|
"loss": 1.9376, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2133488655090332, |
|
"rewards/margins": 0.6297277808189392, |
|
"rewards/rejected": -1.8430767059326172, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 19.37788975464885, |
|
"learning_rate": 6.567894177967325e-10, |
|
"logits/chosen": -0.1643257737159729, |
|
"logits/rejected": -0.06100650504231453, |
|
"logps/chosen": -1.181461215019226, |
|
"logps/rejected": -1.707772970199585, |
|
"loss": 1.9914, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.181461215019226, |
|
"rewards/margins": 0.5263119339942932, |
|
"rewards/rejected": -1.707772970199585, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 24.408366857719134, |
|
"learning_rate": 5.3626246194704575e-11, |
|
"logits/chosen": -0.20142404735088348, |
|
"logits/rejected": -0.07068441808223724, |
|
"logps/chosen": -1.2009718418121338, |
|
"logps/rejected": -1.803815245628357, |
|
"loss": 1.9479, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.2009718418121338, |
|
"rewards/margins": 0.6028433442115784, |
|
"rewards/rejected": -1.803815245628357, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.998691442030882, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 4.3143, |
|
"train_samples_per_second": 14170.447, |
|
"train_steps_per_second": 110.564 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|