|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 1910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.235602094240838e-09, |
|
"logits/chosen": -1.3201165199279785, |
|
"logits/rejected": -1.2275193929672241, |
|
"logps/chosen": -2993.4990234375, |
|
"logps/rejected": -2222.55078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.2356020942408376e-08, |
|
"logits/chosen": -1.2813271284103394, |
|
"logits/rejected": -1.2465020418167114, |
|
"logps/chosen": -3047.636474609375, |
|
"logps/rejected": -2742.105712890625, |
|
"loss": 0.6973, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": 0.00026022063684649765, |
|
"rewards/margins": 0.0008929346804507077, |
|
"rewards/rejected": -0.0006327141309157014, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.0471204188481675e-07, |
|
"logits/chosen": -1.2586185932159424, |
|
"logits/rejected": -1.1957629919052124, |
|
"logps/chosen": -2689.84716796875, |
|
"logps/rejected": -2126.1083984375, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.014919064939022064, |
|
"rewards/margins": 0.006186266429722309, |
|
"rewards/rejected": 0.008732798509299755, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.5706806282722514e-07, |
|
"logits/chosen": -1.175875186920166, |
|
"logits/rejected": -1.1656105518341064, |
|
"logps/chosen": -2198.431640625, |
|
"logps/rejected": -2021.9176025390625, |
|
"loss": 0.7049, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0053156702779233456, |
|
"rewards/margins": -0.05735307186841965, |
|
"rewards/rejected": 0.05203740671277046, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.094240837696335e-07, |
|
"logits/chosen": -1.1858023405075073, |
|
"logits/rejected": -1.1230406761169434, |
|
"logps/chosen": -2056.973388671875, |
|
"logps/rejected": -2170.3056640625, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.1324843466281891, |
|
"rewards/margins": -0.016001610085368156, |
|
"rewards/rejected": 0.1484859436750412, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.6178010471204185e-07, |
|
"logits/chosen": -1.2066991329193115, |
|
"logits/rejected": -1.15940260887146, |
|
"logps/chosen": -2678.28515625, |
|
"logps/rejected": -2157.86376953125, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.32956749200820923, |
|
"rewards/margins": 0.08421512693166733, |
|
"rewards/rejected": 0.2453523427248001, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.1413612565445027e-07, |
|
"logits/chosen": -1.2342027425765991, |
|
"logits/rejected": -1.1995573043823242, |
|
"logps/chosen": -2410.271484375, |
|
"logps/rejected": -2036.266845703125, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.37566477060317993, |
|
"rewards/margins": 0.07754239439964294, |
|
"rewards/rejected": 0.2981223464012146, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.6649214659685864e-07, |
|
"logits/chosen": -1.1794008016586304, |
|
"logits/rejected": -1.1591062545776367, |
|
"logps/chosen": -2638.678955078125, |
|
"logps/rejected": -2372.677001953125, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.43634381890296936, |
|
"rewards/margins": 0.0520954504609108, |
|
"rewards/rejected": 0.38424837589263916, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.18848167539267e-07, |
|
"logits/chosen": -1.2023160457611084, |
|
"logits/rejected": -1.1861956119537354, |
|
"logps/chosen": -2399.763671875, |
|
"logps/rejected": -2263.85888671875, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.452880322933197, |
|
"rewards/margins": 0.04662833362817764, |
|
"rewards/rejected": 0.4062519967556, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.712041884816754e-07, |
|
"logits/chosen": -1.2319462299346924, |
|
"logits/rejected": -1.2353641986846924, |
|
"logps/chosen": -2180.666259765625, |
|
"logps/rejected": -2063.204345703125, |
|
"loss": 0.6665, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.36227527260780334, |
|
"rewards/margins": 0.02720705047249794, |
|
"rewards/rejected": 0.3350681960582733, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.235602094240837e-07, |
|
"logits/chosen": -1.2101176977157593, |
|
"logits/rejected": -1.1575647592544556, |
|
"logps/chosen": -2522.456298828125, |
|
"logps/rejected": -2253.9931640625, |
|
"loss": 0.6558, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.5727291703224182, |
|
"rewards/margins": 0.10190355777740479, |
|
"rewards/rejected": 0.47082558274269104, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -1.2241016626358032, |
|
"eval_logits/rejected": -1.182218313217163, |
|
"eval_logps/chosen": -2595.654296875, |
|
"eval_logps/rejected": -2172.529052734375, |
|
"eval_loss": 0.6526807546615601, |
|
"eval_rewards/accuracies": 0.5740000009536743, |
|
"eval_rewards/chosen": 0.7712106108665466, |
|
"eval_rewards/margins": 0.1913326531648636, |
|
"eval_rewards/rejected": 0.5798779726028442, |
|
"eval_runtime": 302.6088, |
|
"eval_samples_per_second": 6.609, |
|
"eval_steps_per_second": 0.413, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.759162303664922e-07, |
|
"logits/chosen": -1.162023901939392, |
|
"logits/rejected": -1.1786675453186035, |
|
"logps/chosen": -2315.97216796875, |
|
"logps/rejected": -2253.127685546875, |
|
"loss": 0.6732, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.7014600038528442, |
|
"rewards/margins": 0.1181831955909729, |
|
"rewards/rejected": 0.5832767486572266, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.282722513089005e-07, |
|
"logits/chosen": -1.2144238948822021, |
|
"logits/rejected": -1.1650540828704834, |
|
"logps/chosen": -2668.5830078125, |
|
"logps/rejected": -1998.516845703125, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6677217483520508, |
|
"rewards/margins": 0.20832547545433044, |
|
"rewards/rejected": 0.45939627289772034, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.806282722513089e-07, |
|
"logits/chosen": -1.220961332321167, |
|
"logits/rejected": -1.1595335006713867, |
|
"logps/chosen": -2847.095458984375, |
|
"logps/rejected": -2245.98828125, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8143318891525269, |
|
"rewards/margins": 0.25173696875572205, |
|
"rewards/rejected": 0.5625948905944824, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.329842931937173e-07, |
|
"logits/chosen": -1.1750261783599854, |
|
"logits/rejected": -1.1362488269805908, |
|
"logps/chosen": -2556.08349609375, |
|
"logps/rejected": -2165.498779296875, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6861199140548706, |
|
"rewards/margins": 0.11765004694461823, |
|
"rewards/rejected": 0.5684698820114136, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.853403141361256e-07, |
|
"logits/chosen": -1.213008165359497, |
|
"logits/rejected": -1.1688684225082397, |
|
"logps/chosen": -2662.8193359375, |
|
"logps/rejected": -2211.24072265625, |
|
"loss": 0.6339, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.870284914970398, |
|
"rewards/margins": 0.22113271057605743, |
|
"rewards/rejected": 0.6491522192955017, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.37696335078534e-07, |
|
"logits/chosen": -1.1444575786590576, |
|
"logits/rejected": -1.091567039489746, |
|
"logps/chosen": -2689.31298828125, |
|
"logps/rejected": -2391.873291015625, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.5730727314949036, |
|
"rewards/margins": 0.2371658980846405, |
|
"rewards/rejected": 0.33590689301490784, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.900523560209424e-07, |
|
"logits/chosen": -1.1294758319854736, |
|
"logits/rejected": -1.178647756576538, |
|
"logps/chosen": -2683.22509765625, |
|
"logps/rejected": -2484.3818359375, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.4668382704257965, |
|
"rewards/margins": 0.08485493808984756, |
|
"rewards/rejected": 0.38198333978652954, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.424083769633508e-07, |
|
"logits/chosen": -1.2192734479904175, |
|
"logits/rejected": -1.1568591594696045, |
|
"logps/chosen": -2561.9091796875, |
|
"logps/rejected": -2213.013916015625, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.9690437316894531, |
|
"rewards/margins": 0.3352271616458893, |
|
"rewards/rejected": 0.6338165998458862, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.947643979057591e-07, |
|
"logits/chosen": -1.184699535369873, |
|
"logits/rejected": -1.1766315698623657, |
|
"logps/chosen": -2123.99072265625, |
|
"logps/rejected": -2111.645751953125, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.6798163652420044, |
|
"rewards/margins": 0.07367928326129913, |
|
"rewards/rejected": 0.6061369776725769, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.999323662872996e-07, |
|
"logits/chosen": -1.2072479724884033, |
|
"logits/rejected": -1.1839154958724976, |
|
"logps/chosen": -2698.072998046875, |
|
"logps/rejected": -2592.82861328125, |
|
"loss": 0.6404, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6077369451522827, |
|
"rewards/margins": 0.17234833538532257, |
|
"rewards/rejected": 0.4353886544704437, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -1.2423152923583984, |
|
"eval_logits/rejected": -1.201860785484314, |
|
"eval_logps/chosen": -2626.8759765625, |
|
"eval_logps/rejected": -2203.748291015625, |
|
"eval_loss": 0.6911113858222961, |
|
"eval_rewards/accuracies": 0.5860000252723694, |
|
"eval_rewards/chosen": 0.45899277925491333, |
|
"eval_rewards/margins": 0.19130723178386688, |
|
"eval_rewards/rejected": 0.26768550276756287, |
|
"eval_runtime": 302.3649, |
|
"eval_samples_per_second": 6.615, |
|
"eval_steps_per_second": 0.413, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.996985942280678e-07, |
|
"logits/chosen": -1.2993234395980835, |
|
"logits/rejected": -1.2211077213287354, |
|
"logps/chosen": -2626.205810546875, |
|
"logps/rejected": -1850.9456787109375, |
|
"loss": 0.6556, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.6171352863311768, |
|
"rewards/margins": 0.32769179344177246, |
|
"rewards/rejected": 0.2894434928894043, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.99297926897573e-07, |
|
"logits/chosen": -1.249463438987732, |
|
"logits/rejected": -1.2620993852615356, |
|
"logps/chosen": -2312.38427734375, |
|
"logps/rejected": -2108.46826171875, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.7376146912574768, |
|
"rewards/margins": 0.25427359342575073, |
|
"rewards/rejected": 0.48334112763404846, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.987304981154493e-07, |
|
"logits/chosen": -1.2905672788619995, |
|
"logits/rejected": -1.2782526016235352, |
|
"logps/chosen": -2793.2978515625, |
|
"logps/rejected": -2365.16552734375, |
|
"loss": 0.7268, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.6005491018295288, |
|
"rewards/margins": 0.08131317794322968, |
|
"rewards/rejected": 0.5192359685897827, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.979964973983e-07, |
|
"logits/chosen": -1.402222752571106, |
|
"logits/rejected": -1.3204929828643799, |
|
"logps/chosen": -2332.16650390625, |
|
"logps/rejected": -1890.1295166015625, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.3606724441051483, |
|
"rewards/margins": 0.234793022274971, |
|
"rewards/rejected": 0.1258794367313385, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.970961698964024e-07, |
|
"logits/chosen": -1.399332046508789, |
|
"logits/rejected": -1.3611127138137817, |
|
"logps/chosen": -2618.633056640625, |
|
"logps/rejected": -2216.18505859375, |
|
"loss": 0.7038, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7694709897041321, |
|
"rewards/margins": 0.19202515482902527, |
|
"rewards/rejected": 0.577445924282074, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.960298163118284e-07, |
|
"logits/chosen": -1.4756546020507812, |
|
"logits/rejected": -1.3830201625823975, |
|
"logps/chosen": -2662.10986328125, |
|
"logps/rejected": -2112.115478515625, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.9105646014213562, |
|
"rewards/margins": 0.19633980095386505, |
|
"rewards/rejected": 0.7142248749732971, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.94797792798013e-07, |
|
"logits/chosen": -1.4841511249542236, |
|
"logits/rejected": -1.4767415523529053, |
|
"logps/chosen": -2305.857177734375, |
|
"logps/rejected": -2128.56396484375, |
|
"loss": 0.6626, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.2341788113117218, |
|
"rewards/margins": 0.13304655253887177, |
|
"rewards/rejected": 0.10113225132226944, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.934005108408016e-07, |
|
"logits/chosen": -1.4331722259521484, |
|
"logits/rejected": -1.3947049379348755, |
|
"logps/chosen": -2292.278564453125, |
|
"logps/rejected": -1913.346435546875, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.8132773637771606, |
|
"rewards/margins": 0.22855396568775177, |
|
"rewards/rejected": 0.5847233533859253, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.918384371210175e-07, |
|
"logits/chosen": -1.4025981426239014, |
|
"logits/rejected": -1.3736456632614136, |
|
"logps/chosen": -2201.71044921875, |
|
"logps/rejected": -2091.62255859375, |
|
"loss": 0.6766, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7386767268180847, |
|
"rewards/margins": 0.228462815284729, |
|
"rewards/rejected": 0.5102138519287109, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.901120933585937e-07, |
|
"logits/chosen": -1.3154966831207275, |
|
"logits/rejected": -1.326516032218933, |
|
"logps/chosen": -2670.81201171875, |
|
"logps/rejected": -2235.08349609375, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.7127049565315247, |
|
"rewards/margins": 0.18496084213256836, |
|
"rewards/rejected": 0.5277441143989563, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": -1.3645591735839844, |
|
"eval_logits/rejected": -1.314851999282837, |
|
"eval_logps/chosen": -2591.692138671875, |
|
"eval_logps/rejected": -2178.205810546875, |
|
"eval_loss": 0.6602776050567627, |
|
"eval_rewards/accuracies": 0.6320000290870667, |
|
"eval_rewards/chosen": 0.8108287453651428, |
|
"eval_rewards/margins": 0.28771865367889404, |
|
"eval_rewards/rejected": 0.5231101512908936, |
|
"eval_runtime": 302.3737, |
|
"eval_samples_per_second": 6.614, |
|
"eval_steps_per_second": 0.413, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.882220561383237e-07, |
|
"logits/chosen": -1.3421976566314697, |
|
"logits/rejected": -1.2967360019683838, |
|
"logps/chosen": -2590.6484375, |
|
"logps/rejected": -2214.814208984375, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.7196224331855774, |
|
"rewards/margins": 0.18787309527397156, |
|
"rewards/rejected": 0.5317493081092834, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.861689567172849e-07, |
|
"logits/chosen": -1.3033558130264282, |
|
"logits/rejected": -1.2557708024978638, |
|
"logps/chosen": -2364.27587890625, |
|
"logps/rejected": -2370.61865234375, |
|
"loss": 0.7144, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6515111923217773, |
|
"rewards/margins": 0.11765609681606293, |
|
"rewards/rejected": 0.5338551998138428, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.839534808140065e-07, |
|
"logits/chosen": -1.2571797370910645, |
|
"logits/rejected": -1.2486730813980103, |
|
"logps/chosen": -2348.859130859375, |
|
"logps/rejected": -1969.1402587890625, |
|
"loss": 0.7502, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.6785815954208374, |
|
"rewards/margins": 0.024524565786123276, |
|
"rewards/rejected": 0.6540570259094238, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.815763683794431e-07, |
|
"logits/chosen": -1.2969481945037842, |
|
"logits/rejected": -1.2044627666473389, |
|
"logps/chosen": -2964.642578125, |
|
"logps/rejected": -2117.79150390625, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.3430386185646057, |
|
"rewards/margins": 0.15022581815719604, |
|
"rewards/rejected": 0.19281277060508728, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.790384133498377e-07, |
|
"logits/chosen": -1.3875682353973389, |
|
"logits/rejected": -1.3528212308883667, |
|
"logps/chosen": -2609.759765625, |
|
"logps/rejected": -2217.990234375, |
|
"loss": 0.65, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.2748018503189087, |
|
"rewards/margins": 0.3255355954170227, |
|
"rewards/rejected": 0.9492664337158203, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.763404633815536e-07, |
|
"logits/chosen": -1.4445443153381348, |
|
"logits/rejected": -1.409148931503296, |
|
"logps/chosen": -2325.73095703125, |
|
"logps/rejected": -2067.62646484375, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.9282833337783813, |
|
"rewards/margins": 0.2425541877746582, |
|
"rewards/rejected": 0.6857292056083679, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.73483419567964e-07, |
|
"logits/chosen": -1.5681045055389404, |
|
"logits/rejected": -1.47848379611969, |
|
"logps/chosen": -2851.124267578125, |
|
"logps/rejected": -2266.677734375, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.8608830571174622, |
|
"rewards/margins": 0.2859550416469574, |
|
"rewards/rejected": 0.5749280452728271, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.70468236138494e-07, |
|
"logits/chosen": -1.5734655857086182, |
|
"logits/rejected": -1.4612947702407837, |
|
"logps/chosen": -2619.15576171875, |
|
"logps/rejected": -1996.1292724609375, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.7406389117240906, |
|
"rewards/margins": 0.2604018747806549, |
|
"rewards/rejected": 0.4802371561527252, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.672959201399155e-07, |
|
"logits/chosen": -1.4863954782485962, |
|
"logits/rejected": -1.4341216087341309, |
|
"logps/chosen": -2418.91748046875, |
|
"logps/rejected": -2210.710205078125, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.9080713987350464, |
|
"rewards/margins": 0.19638116657733917, |
|
"rewards/rejected": 0.7116903066635132, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.639675311000027e-07, |
|
"logits/chosen": -1.478477120399475, |
|
"logits/rejected": -1.4470995664596558, |
|
"logps/chosen": -2378.759521484375, |
|
"logps/rejected": -2213.616455078125, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.5797199606895447, |
|
"rewards/margins": 0.15609867870807648, |
|
"rewards/rejected": 0.4236213266849518, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.5029045343399048, |
|
"eval_logits/rejected": -1.4427672624588013, |
|
"eval_logps/chosen": -2591.764892578125, |
|
"eval_logps/rejected": -2180.5830078125, |
|
"eval_loss": 0.6528961658477783, |
|
"eval_rewards/accuracies": 0.628000020980835, |
|
"eval_rewards/chosen": 0.8101032376289368, |
|
"eval_rewards/margins": 0.31076449155807495, |
|
"eval_rewards/rejected": 0.49933871626853943, |
|
"eval_runtime": 300.9467, |
|
"eval_samples_per_second": 6.646, |
|
"eval_steps_per_second": 0.415, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.60484180673657e-07, |
|
"logits/chosen": -1.4771575927734375, |
|
"logits/rejected": -1.449158787727356, |
|
"logps/chosen": -2471.6416015625, |
|
"logps/rejected": -2168.50439453125, |
|
"loss": 0.7235, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.5357500314712524, |
|
"rewards/margins": 0.03546437621116638, |
|
"rewards/rejected": 0.5002856254577637, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.568470322716246e-07, |
|
"logits/chosen": -1.461313247680664, |
|
"logits/rejected": -1.3947060108184814, |
|
"logps/chosen": -2724.66748046875, |
|
"logps/rejected": -2191.56787109375, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.7562235593795776, |
|
"rewards/margins": 0.328954815864563, |
|
"rewards/rejected": 0.4272686541080475, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.530573006719263e-07, |
|
"logits/chosen": -1.5015565156936646, |
|
"logits/rejected": -1.4776034355163574, |
|
"logps/chosen": -2666.500732421875, |
|
"logps/rejected": -2279.621826171875, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.5253168344497681, |
|
"rewards/margins": 0.28119999170303345, |
|
"rewards/rejected": 0.24411681294441223, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.491162516141307e-07, |
|
"logits/chosen": -1.4172331094741821, |
|
"logits/rejected": -1.422502040863037, |
|
"logps/chosen": -2282.531005859375, |
|
"logps/rejected": -2387.561767578125, |
|
"loss": 0.6692, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.860162615776062, |
|
"rewards/margins": 0.07978199422359467, |
|
"rewards/rejected": 0.7803806662559509, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.450252013766092e-07, |
|
"logits/chosen": -1.3361685276031494, |
|
"logits/rejected": -1.2606579065322876, |
|
"logps/chosen": -2627.769775390625, |
|
"logps/rejected": -2308.65380859375, |
|
"loss": 0.6375, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.5678683519363403, |
|
"rewards/margins": 0.21432606875896454, |
|
"rewards/rejected": 0.3535422682762146, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.407855163369078e-07, |
|
"logits/chosen": -1.306783676147461, |
|
"logits/rejected": -1.2825387716293335, |
|
"logps/chosen": -2633.41162109375, |
|
"logps/rejected": -2218.27294921875, |
|
"loss": 0.6678, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.9192908406257629, |
|
"rewards/margins": 0.24978260695934296, |
|
"rewards/rejected": 0.669508159160614, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.3639861251539e-07, |
|
"logits/chosen": -1.2543857097625732, |
|
"logits/rejected": -1.195093035697937, |
|
"logps/chosen": -2341.584228515625, |
|
"logps/rejected": -1947.591796875, |
|
"loss": 0.6284, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5097464919090271, |
|
"rewards/margins": 0.36333781480789185, |
|
"rewards/rejected": 0.14640869200229645, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.318659551022955e-07, |
|
"logits/chosen": -1.3397210836410522, |
|
"logits/rejected": -1.281937837600708, |
|
"logps/chosen": -2238.00732421875, |
|
"logps/rejected": -1736.181640625, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.786676287651062, |
|
"rewards/margins": 0.2685468792915344, |
|
"rewards/rejected": 0.5181293487548828, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.271890579683804e-07, |
|
"logits/chosen": -1.4926373958587646, |
|
"logits/rejected": -1.4876558780670166, |
|
"logps/chosen": -2662.705322265625, |
|
"logps/rejected": -2349.420166015625, |
|
"loss": 0.7143, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.7370970845222473, |
|
"rewards/margins": 0.34762194752693176, |
|
"rewards/rejected": 0.38947516679763794, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.223694831592952e-07, |
|
"logits/chosen": -1.5373231172561646, |
|
"logits/rejected": -1.4849967956542969, |
|
"logps/chosen": -2402.5634765625, |
|
"logps/rejected": -2132.68701171875, |
|
"loss": 0.6682, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.7446134090423584, |
|
"rewards/margins": 0.32214781641960144, |
|
"rewards/rejected": 0.42246556282043457, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -1.5664644241333008, |
|
"eval_logits/rejected": -1.5148077011108398, |
|
"eval_logps/chosen": -2576.100830078125, |
|
"eval_logps/rejected": -2169.265380859375, |
|
"eval_loss": 0.6673685312271118, |
|
"eval_rewards/accuracies": 0.6420000195503235, |
|
"eval_rewards/chosen": 0.966746985912323, |
|
"eval_rewards/margins": 0.3542312681674957, |
|
"eval_rewards/rejected": 0.6125158071517944, |
|
"eval_runtime": 302.6642, |
|
"eval_samples_per_second": 6.608, |
|
"eval_steps_per_second": 0.413, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.174088403738755e-07, |
|
"logits/chosen": -1.5560601949691772, |
|
"logits/rejected": -1.5580723285675049, |
|
"logps/chosen": -2103.93310546875, |
|
"logps/rejected": -2181.848876953125, |
|
"loss": 0.6493, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.6312128305435181, |
|
"rewards/margins": 0.3005504906177521, |
|
"rewards/rejected": 0.330662339925766, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.123087864265147e-07, |
|
"logits/chosen": -1.543971061706543, |
|
"logits/rejected": -1.5191954374313354, |
|
"logps/chosen": -2323.391357421875, |
|
"logps/rejected": -2031.1025390625, |
|
"loss": 0.6736, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.41579127311706543, |
|
"rewards/margins": 0.1768406629562378, |
|
"rewards/rejected": 0.23895065486431122, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.070710246938016e-07, |
|
"logits/chosen": -1.5579715967178345, |
|
"logits/rejected": -1.5655916929244995, |
|
"logps/chosen": -2268.76318359375, |
|
"logps/rejected": -2190.51318359375, |
|
"loss": 0.6519, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.6393724083900452, |
|
"rewards/margins": 0.3283298909664154, |
|
"rewards/rejected": 0.3110424876213074, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.016973045456073e-07, |
|
"logits/chosen": -1.6396840810775757, |
|
"logits/rejected": -1.6098705530166626, |
|
"logps/chosen": -2668.9462890625, |
|
"logps/rejected": -2160.803955078125, |
|
"loss": 0.669, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8928348422050476, |
|
"rewards/margins": 0.47784289717674255, |
|
"rewards/rejected": 0.41499200463294983, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.961894207608087e-07, |
|
"logits/chosen": -1.6586135625839233, |
|
"logits/rejected": -1.6290054321289062, |
|
"logps/chosen": -2212.68994140625, |
|
"logps/rejected": -2054.17626953125, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.49731844663619995, |
|
"rewards/margins": 0.16844932734966278, |
|
"rewards/rejected": 0.32886913418769836, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.905492129278477e-07, |
|
"logits/chosen": -1.6478192806243896, |
|
"logits/rejected": -1.5791934728622437, |
|
"logps/chosen": -2915.1103515625, |
|
"logps/rejected": -2492.820068359375, |
|
"loss": 0.6553, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6440809965133667, |
|
"rewards/margins": 0.29220613837242126, |
|
"rewards/rejected": 0.35187482833862305, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.847785648303233e-07, |
|
"logits/chosen": -1.648879051208496, |
|
"logits/rejected": -1.5808627605438232, |
|
"logps/chosen": -2345.06787109375, |
|
"logps/rejected": -1874.7965087890625, |
|
"loss": 0.6562, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.5588332414627075, |
|
"rewards/margins": 0.2794465720653534, |
|
"rewards/rejected": 0.2793866991996765, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.788794038178232e-07, |
|
"logits/chosen": -1.646813154220581, |
|
"logits/rejected": -1.5900137424468994, |
|
"logps/chosen": -2427.92822265625, |
|
"logps/rejected": -1974.943359375, |
|
"loss": 0.6286, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 1.0534051656723022, |
|
"rewards/margins": 0.35476142168045044, |
|
"rewards/rejected": 0.6986437439918518, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.728537001622049e-07, |
|
"logits/chosen": -1.6359336376190186, |
|
"logits/rejected": -1.5665844678878784, |
|
"logps/chosen": -2346.7265625, |
|
"logps/rejected": -1916.209716796875, |
|
"loss": 0.6555, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.7451139092445374, |
|
"rewards/margins": 0.24112704396247864, |
|
"rewards/rejected": 0.5039868354797363, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.667034663995408e-07, |
|
"logits/chosen": -1.6207376718521118, |
|
"logits/rejected": -1.5811537504196167, |
|
"logps/chosen": -2380.62939453125, |
|
"logps/rejected": -2060.835205078125, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.8570950627326965, |
|
"rewards/margins": 0.32400840520858765, |
|
"rewards/rejected": 0.5330866575241089, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -1.6448516845703125, |
|
"eval_logits/rejected": -1.588512897491455, |
|
"eval_logps/chosen": -2589.297119140625, |
|
"eval_logps/rejected": -2183.78515625, |
|
"eval_loss": 0.6445065140724182, |
|
"eval_rewards/accuracies": 0.6579999923706055, |
|
"eval_rewards/chosen": 0.834783673286438, |
|
"eval_rewards/margins": 0.3674681782722473, |
|
"eval_rewards/rejected": 0.4673156440258026, |
|
"eval_runtime": 306.3454, |
|
"eval_samples_per_second": 6.529, |
|
"eval_steps_per_second": 0.408, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.604307566579472e-07, |
|
"logits/chosen": -1.5816807746887207, |
|
"logits/rejected": -1.6054216623306274, |
|
"logps/chosen": -2258.828857421875, |
|
"logps/rejected": -2473.440185546875, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3196907639503479, |
|
"rewards/margins": 0.4189208149909973, |
|
"rewards/rejected": -0.09922999143600464, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.540376659715225e-07, |
|
"logits/chosen": -1.6599409580230713, |
|
"logits/rejected": -1.5913432836532593, |
|
"logps/chosen": -2412.462890625, |
|
"logps/rejected": -2083.058837890625, |
|
"loss": 0.6291, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.8317147493362427, |
|
"rewards/margins": 0.3438655138015747, |
|
"rewards/rejected": 0.48784923553466797, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.47526329580623e-07, |
|
"logits/chosen": -1.535036325454712, |
|
"logits/rejected": -1.5678516626358032, |
|
"logps/chosen": -2142.04931640625, |
|
"logps/rejected": -2099.13720703125, |
|
"loss": 0.633, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.9528681635856628, |
|
"rewards/margins": 0.24734528362751007, |
|
"rewards/rejected": 0.7055227756500244, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.408989222187096e-07, |
|
"logits/chosen": -1.5995115041732788, |
|
"logits/rejected": -1.5139375925064087, |
|
"logps/chosen": -3065.62451171875, |
|
"logps/rejected": -2365.10107421875, |
|
"loss": 0.6969, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.9795970916748047, |
|
"rewards/margins": 0.47979211807250977, |
|
"rewards/rejected": 0.49980488419532776, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.341576573860047e-07, |
|
"logits/chosen": -1.5332003831863403, |
|
"logits/rejected": -1.4982550144195557, |
|
"logps/chosen": -2392.21728515625, |
|
"logps/rejected": -1984.2425537109375, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8843706846237183, |
|
"rewards/margins": 0.32931455969810486, |
|
"rewards/rejected": 0.5550561547279358, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.27304786610201e-07, |
|
"logits/chosen": -1.5626050233840942, |
|
"logits/rejected": -1.5275344848632812, |
|
"logps/chosen": -2318.65625, |
|
"logps/rejected": -1863.1956787109375, |
|
"loss": 0.6323, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.8664724230766296, |
|
"rewards/margins": 0.5049992799758911, |
|
"rewards/rejected": 0.3614731729030609, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.203425986944696e-07, |
|
"logits/chosen": -1.5559314489364624, |
|
"logits/rejected": -1.5068961381912231, |
|
"logps/chosen": -2837.03369140625, |
|
"logps/rejected": -2028.3587646484375, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.7081668972969055, |
|
"rewards/margins": 0.37415772676467896, |
|
"rewards/rejected": 0.3340091109275818, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.132734189530182e-07, |
|
"logits/chosen": -1.569585919380188, |
|
"logits/rejected": -1.5583667755126953, |
|
"logps/chosen": -2081.708984375, |
|
"logps/rejected": -2073.14892578125, |
|
"loss": 0.7058, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.22897915542125702, |
|
"rewards/margins": 0.06144998222589493, |
|
"rewards/rejected": 0.1675291508436203, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.060996084344553e-07, |
|
"logits/chosen": -1.6668421030044556, |
|
"logits/rejected": -1.6300331354141235, |
|
"logps/chosen": -2808.94140625, |
|
"logps/rejected": -2424.194580078125, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.9484899640083313, |
|
"rewards/margins": 0.38452741503715515, |
|
"rewards/rejected": 0.5639625787734985, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.98823563133219e-07, |
|
"logits/chosen": -1.6251919269561768, |
|
"logits/rejected": -1.6152589321136475, |
|
"logps/chosen": -2532.464111328125, |
|
"logps/rejected": -2264.97802734375, |
|
"loss": 0.6467, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.8040878176689148, |
|
"rewards/margins": 0.3939053416252136, |
|
"rewards/rejected": 0.4101824164390564, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -1.7105224132537842, |
|
"eval_logits/rejected": -1.6561530828475952, |
|
"eval_logps/chosen": -2584.251220703125, |
|
"eval_logps/rejected": -2175.965087890625, |
|
"eval_loss": 0.6481595635414124, |
|
"eval_rewards/accuracies": 0.6240000128746033, |
|
"eval_rewards/chosen": 0.8852397799491882, |
|
"eval_rewards/margins": 0.3397220969200134, |
|
"eval_rewards/rejected": 0.54551762342453, |
|
"eval_runtime": 303.8379, |
|
"eval_samples_per_second": 6.582, |
|
"eval_steps_per_second": 0.411, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.914477131893342e-07, |
|
"logits/chosen": -1.71377432346344, |
|
"logits/rejected": -1.708833932876587, |
|
"logps/chosen": -2544.854248046875, |
|
"logps/rejected": -2375.308349609375, |
|
"loss": 0.6722, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6972166299819946, |
|
"rewards/margins": 0.06426803767681122, |
|
"rewards/rejected": 0.6329485774040222, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.839745220767661e-07, |
|
"logits/chosen": -1.694154143333435, |
|
"logits/rejected": -1.669390320777893, |
|
"logps/chosen": -2534.442626953125, |
|
"logps/rejected": -2229.87158203125, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.48106852173805237, |
|
"rewards/margins": 0.24986381828784943, |
|
"rewards/rejected": 0.23120474815368652, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.764064857806389e-07, |
|
"logits/chosen": -1.6268012523651123, |
|
"logits/rejected": -1.575046420097351, |
|
"logps/chosen": -2722.456298828125, |
|
"logps/rejected": -2351.8857421875, |
|
"loss": 0.643, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7691014409065247, |
|
"rewards/margins": 0.34405142068862915, |
|
"rewards/rejected": 0.42504996061325073, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.68746131963598e-07, |
|
"logits/chosen": -1.6478900909423828, |
|
"logits/rejected": -1.597701072692871, |
|
"logps/chosen": -2222.41259765625, |
|
"logps/rejected": -1990.4273681640625, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.6448198556900024, |
|
"rewards/margins": 0.2648247182369232, |
|
"rewards/rejected": 0.37999510765075684, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.609960191215909e-07, |
|
"logits/chosen": -1.6781095266342163, |
|
"logits/rejected": -1.6269840002059937, |
|
"logps/chosen": -2453.95068359375, |
|
"logps/rejected": -2161.110595703125, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.6921306252479553, |
|
"rewards/margins": 0.11647888273000717, |
|
"rewards/rejected": 0.5756517648696899, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.531587357293505e-07, |
|
"logits/chosen": -1.6048580408096313, |
|
"logits/rejected": -1.6003602743148804, |
|
"logps/chosen": -2562.139404296875, |
|
"logps/rejected": -2293.66943359375, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.7623199820518494, |
|
"rewards/margins": 0.2832568287849426, |
|
"rewards/rejected": 0.4790631830692291, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.452368993758645e-07, |
|
"logits/chosen": -1.585092544555664, |
|
"logits/rejected": -1.557943344116211, |
|
"logps/chosen": -2426.169677734375, |
|
"logps/rejected": -2058.61083984375, |
|
"loss": 0.6519, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.553870677947998, |
|
"rewards/margins": 0.39466503262519836, |
|
"rewards/rejected": 0.15920567512512207, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.372331558901237e-07, |
|
"logits/chosen": -1.5951181650161743, |
|
"logits/rejected": -1.55776846408844, |
|
"logps/chosen": -2530.603515625, |
|
"logps/rejected": -2058.31494140625, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.5011290311813354, |
|
"rewards/margins": 0.12420739978551865, |
|
"rewards/rejected": 0.3769216239452362, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.291501784574355e-07, |
|
"logits/chosen": -1.7254797220230103, |
|
"logits/rejected": -1.6313526630401611, |
|
"logps/chosen": -2754.68408203125, |
|
"logps/rejected": -2185.399169921875, |
|
"loss": 0.6073, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6229848265647888, |
|
"rewards/margins": 0.35090917348861694, |
|
"rewards/rejected": 0.27207568287849426, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.209906667266017e-07, |
|
"logits/chosen": -1.7093772888183594, |
|
"logits/rejected": -1.6865718364715576, |
|
"logps/chosen": -2462.615478515625, |
|
"logps/rejected": -2213.93798828125, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 1.1175382137298584, |
|
"rewards/margins": 0.40151238441467285, |
|
"rewards/rejected": 0.7160258293151855, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -1.7084823846817017, |
|
"eval_logits/rejected": -1.6541036367416382, |
|
"eval_logps/chosen": -2563.754638671875, |
|
"eval_logps/rejected": -2162.267822265625, |
|
"eval_loss": 0.6452978253364563, |
|
"eval_rewards/accuracies": 0.6380000114440918, |
|
"eval_rewards/chosen": 1.0902061462402344, |
|
"eval_rewards/margins": 0.4077164828777313, |
|
"eval_rewards/rejected": 0.6824895739555359, |
|
"eval_runtime": 301.7419, |
|
"eval_samples_per_second": 6.628, |
|
"eval_steps_per_second": 0.414, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.12757345908258e-07, |
|
"logits/chosen": -1.7412763833999634, |
|
"logits/rejected": -1.6791282892227173, |
|
"logps/chosen": -2606.15283203125, |
|
"logps/rejected": -1956.8831787109375, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.9316846132278442, |
|
"rewards/margins": 0.45442262291908264, |
|
"rewards/rejected": 0.47726184129714966, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.044529658646761e-07, |
|
"logits/chosen": -1.710146188735962, |
|
"logits/rejected": -1.7056090831756592, |
|
"logps/chosen": -2651.176513671875, |
|
"logps/rejected": -2550.99755859375, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6641424298286438, |
|
"rewards/margins": 0.2352844774723053, |
|
"rewards/rejected": 0.4288579821586609, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.960803001913314e-07, |
|
"logits/chosen": -1.6102991104125977, |
|
"logits/rejected": -1.5880324840545654, |
|
"logps/chosen": -1818.771484375, |
|
"logps/rejected": -1763.439208984375, |
|
"loss": 0.6175, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.4126269817352295, |
|
"rewards/margins": 0.2377271205186844, |
|
"rewards/rejected": 0.1748998463153839, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.876421452905448e-07, |
|
"logits/chosen": -1.6048507690429688, |
|
"logits/rejected": -1.5550066232681274, |
|
"logps/chosen": -2419.88818359375, |
|
"logps/rejected": -1979.8333740234375, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 1.1539905071258545, |
|
"rewards/margins": 0.44714298844337463, |
|
"rewards/rejected": 0.7068475484848022, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.791413194375076e-07, |
|
"logits/chosen": -1.5756229162216187, |
|
"logits/rejected": -1.5317662954330444, |
|
"logps/chosen": -2326.3671875, |
|
"logps/rejected": -2082.76123046875, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.8061001896858215, |
|
"rewards/margins": 0.20508570969104767, |
|
"rewards/rejected": 0.6010144948959351, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.705806618389997e-07, |
|
"logits/chosen": -1.6245572566986084, |
|
"logits/rejected": -1.6081863641738892, |
|
"logps/chosen": -2542.473876953125, |
|
"logps/rejected": -2442.247314453125, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.8134121894836426, |
|
"rewards/margins": 0.18775935471057892, |
|
"rewards/rejected": 0.6256529092788696, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 6.619630316851182e-07, |
|
"logits/chosen": -1.6937329769134521, |
|
"logits/rejected": -1.6594982147216797, |
|
"logps/chosen": -2513.98046875, |
|
"logps/rejected": -2264.63623046875, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.6390259861946106, |
|
"rewards/margins": 0.21240201592445374, |
|
"rewards/rejected": 0.4266239106655121, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 6.532913071943307e-07, |
|
"logits/chosen": -1.6279165744781494, |
|
"logits/rejected": -1.5716134309768677, |
|
"logps/chosen": -2358.2890625, |
|
"logps/rejected": -2005.8092041015625, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.1509922742843628, |
|
"rewards/margins": 0.4300170838832855, |
|
"rewards/rejected": 0.7209752798080444, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 6.445683846521738e-07, |
|
"logits/chosen": -1.458832025527954, |
|
"logits/rejected": -1.3705499172210693, |
|
"logps/chosen": -2031.3890380859375, |
|
"logps/rejected": -1786.692626953125, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.4857109487056732, |
|
"rewards/margins": 0.11415307223796844, |
|
"rewards/rejected": 0.3715578615665436, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 6.357971774439177e-07, |
|
"logits/chosen": -1.446877360343933, |
|
"logits/rejected": -1.4010428190231323, |
|
"logps/chosen": -2083.528564453125, |
|
"logps/rejected": -2091.34228515625, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3722456991672516, |
|
"rewards/margins": 0.18528583645820618, |
|
"rewards/rejected": 0.18695983290672302, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": -1.5652438402175903, |
|
"eval_logits/rejected": -1.5145412683486938, |
|
"eval_logps/chosen": -2594.7568359375, |
|
"eval_logps/rejected": -2185.613525390625, |
|
"eval_loss": 0.6415941119194031, |
|
"eval_rewards/accuracies": 0.6439999938011169, |
|
"eval_rewards/chosen": 0.780185878276825, |
|
"eval_rewards/margins": 0.33115366101264954, |
|
"eval_rewards/rejected": 0.44903212785720825, |
|
"eval_runtime": 290.6591, |
|
"eval_samples_per_second": 6.881, |
|
"eval_steps_per_second": 0.43, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6.269806150815187e-07, |
|
"logits/chosen": -1.580451250076294, |
|
"logits/rejected": -1.5398848056793213, |
|
"logps/chosen": -2756.412109375, |
|
"logps/rejected": -2110.937255859375, |
|
"loss": 0.5836, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.1578181982040405, |
|
"rewards/margins": 0.4512609839439392, |
|
"rewards/rejected": 0.7065572738647461, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6.181216422251862e-07, |
|
"logits/chosen": -1.6002380847930908, |
|
"logits/rejected": -1.5482442378997803, |
|
"logps/chosen": -2669.18408203125, |
|
"logps/rejected": -2383.2392578125, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.3831857442855835, |
|
"rewards/margins": 0.3688461184501648, |
|
"rewards/rejected": 1.014339566230774, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 6.092232176998897e-07, |
|
"logits/chosen": -1.5446488857269287, |
|
"logits/rejected": -1.5036358833312988, |
|
"logps/chosen": -2283.471923828125, |
|
"logps/rejected": -2156.527587890625, |
|
"loss": 0.6389, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.8198372721672058, |
|
"rewards/margins": 0.23020341992378235, |
|
"rewards/rejected": 0.5896340012550354, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 6.002883135071362e-07, |
|
"logits/chosen": -1.4674952030181885, |
|
"logits/rejected": -1.3860971927642822, |
|
"logps/chosen": -2495.39794921875, |
|
"logps/rejected": -2081.33544921875, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.46686476469039917, |
|
"rewards/margins": 0.33061760663986206, |
|
"rewards/rejected": 0.1362471729516983, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.913199138323448e-07, |
|
"logits/chosen": -1.5902820825576782, |
|
"logits/rejected": -1.5817844867706299, |
|
"logps/chosen": -2237.93603515625, |
|
"logps/rejected": -2165.838623046875, |
|
"loss": 0.699, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.5283821821212769, |
|
"rewards/margins": 0.3398032486438751, |
|
"rewards/rejected": 0.18857893347740173, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.82321014048154e-07, |
|
"logits/chosen": -1.5519543886184692, |
|
"logits/rejected": -1.5687713623046875, |
|
"logps/chosen": -2170.23583984375, |
|
"logps/rejected": -2091.04248046875, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.2492622435092926, |
|
"rewards/margins": 0.2455929070711136, |
|
"rewards/rejected": 0.00366935133934021, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.732946197139906e-07, |
|
"logits/chosen": -1.5598348379135132, |
|
"logits/rejected": -1.5337880849838257, |
|
"logps/chosen": -2266.143310546875, |
|
"logps/rejected": -2009.6168212890625, |
|
"loss": 0.6497, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.789315402507782, |
|
"rewards/margins": 0.16782251000404358, |
|
"rewards/rejected": 0.6214929223060608, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.642437455722381e-07, |
|
"logits/chosen": -1.5074641704559326, |
|
"logits/rejected": -1.4456851482391357, |
|
"logps/chosen": -2503.286865234375, |
|
"logps/rejected": -2021.8304443359375, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 1.141226053237915, |
|
"rewards/margins": 0.3969436287879944, |
|
"rewards/rejected": 0.7442826628684998, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.551714145413368e-07, |
|
"logits/chosen": -1.468330979347229, |
|
"logits/rejected": -1.3824667930603027, |
|
"logps/chosen": -2575.858154296875, |
|
"logps/rejected": -1971.8447265625, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.726246178150177, |
|
"rewards/margins": 0.32752370834350586, |
|
"rewards/rejected": 0.39872246980667114, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.460806567061533e-07, |
|
"logits/chosen": -1.5170243978500366, |
|
"logits/rejected": -1.4751875400543213, |
|
"logps/chosen": -2752.580322265625, |
|
"logps/rejected": -2291.04833984375, |
|
"loss": 0.644, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.8746698498725891, |
|
"rewards/margins": 0.38163238763809204, |
|
"rewards/rejected": 0.4930374026298523, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -1.5046511888504028, |
|
"eval_logits/rejected": -1.4505603313446045, |
|
"eval_logps/chosen": -2602.00390625, |
|
"eval_logps/rejected": -2193.728515625, |
|
"eval_loss": 0.6499609351158142, |
|
"eval_rewards/accuracies": 0.6399999856948853, |
|
"eval_rewards/chosen": 0.7077119946479797, |
|
"eval_rewards/margins": 0.3398290276527405, |
|
"eval_rewards/rejected": 0.36788299679756165, |
|
"eval_runtime": 299.5822, |
|
"eval_samples_per_second": 6.676, |
|
"eval_steps_per_second": 0.417, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5.369745083059577e-07, |
|
"logits/chosen": -1.490482211112976, |
|
"logits/rejected": -1.424222707748413, |
|
"logps/chosen": -2471.395263671875, |
|
"logps/rejected": -1937.520751953125, |
|
"loss": 0.6353, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.47275876998901367, |
|
"rewards/margins": 0.2599312365055084, |
|
"rewards/rejected": 0.21282756328582764, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5.278560107203437e-07, |
|
"logits/chosen": -1.459146499633789, |
|
"logits/rejected": -1.4577230215072632, |
|
"logps/chosen": -2559.42724609375, |
|
"logps/rejected": -2042.339599609375, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.7685127258300781, |
|
"rewards/margins": 0.3085792660713196, |
|
"rewards/rejected": 0.45993345975875854, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5.18728209453432e-07, |
|
"logits/chosen": -1.5719316005706787, |
|
"logits/rejected": -1.5082643032073975, |
|
"logps/chosen": -2554.538818359375, |
|
"logps/rejected": -2257.06201171875, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.9037872552871704, |
|
"rewards/margins": 0.3130941092967987, |
|
"rewards/rejected": 0.5906931161880493, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5.095941531166982e-07, |
|
"logits/chosen": -1.5710715055465698, |
|
"logits/rejected": -1.5428146123886108, |
|
"logps/chosen": -2587.89111328125, |
|
"logps/rejected": -2198.08056640625, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.7090158462524414, |
|
"rewards/margins": 0.3786148130893707, |
|
"rewards/rejected": 0.33040106296539307, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5.004568924107598e-07, |
|
"logits/chosen": -1.6318562030792236, |
|
"logits/rejected": -1.5859413146972656, |
|
"logps/chosen": -2931.807373046875, |
|
"logps/rejected": -2507.31298828125, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.7447463274002075, |
|
"rewards/margins": 0.2536779046058655, |
|
"rewards/rejected": 0.49106842279434204, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.913194791064675e-07, |
|
"logits/chosen": -1.639493703842163, |
|
"logits/rejected": -1.5823523998260498, |
|
"logps/chosen": -2601.8447265625, |
|
"logps/rejected": -2357.34814453125, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.7931571006774902, |
|
"rewards/margins": 0.5028332471847534, |
|
"rewards/rejected": 0.2903238832950592, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.82184965025639e-07, |
|
"logits/chosen": -1.5899850130081177, |
|
"logits/rejected": -1.5473779439926147, |
|
"logps/chosen": -2727.800537109375, |
|
"logps/rejected": -2362.034423828125, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.9983813166618347, |
|
"rewards/margins": 0.39955899119377136, |
|
"rewards/rejected": 0.5988222360610962, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.73056401021775e-07, |
|
"logits/chosen": -1.5197970867156982, |
|
"logits/rejected": -1.4553916454315186, |
|
"logps/chosen": -2388.419921875, |
|
"logps/rejected": -2081.69775390625, |
|
"loss": 0.6171, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.601919949054718, |
|
"rewards/margins": 0.227634459733963, |
|
"rewards/rejected": 0.374285489320755, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.639368359610982e-07, |
|
"logits/chosen": -1.4987363815307617, |
|
"logits/rejected": -1.4325814247131348, |
|
"logps/chosen": -2522.322509765625, |
|
"logps/rejected": -2121.84912109375, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.5968645215034485, |
|
"rewards/margins": 0.3043002486228943, |
|
"rewards/rejected": 0.2925642132759094, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.5482931570425803e-07, |
|
"logits/chosen": -1.5703797340393066, |
|
"logits/rejected": -1.5181505680084229, |
|
"logps/chosen": -2581.994140625, |
|
"logps/rejected": -2270.20166015625, |
|
"loss": 0.6539, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6347614526748657, |
|
"rewards/margins": 0.321241557598114, |
|
"rewards/rejected": 0.31352001428604126, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -1.5226702690124512, |
|
"eval_logits/rejected": -1.4696787595748901, |
|
"eval_logps/chosen": -2588.0068359375, |
|
"eval_logps/rejected": -2181.99365234375, |
|
"eval_loss": 0.6389243006706238, |
|
"eval_rewards/accuracies": 0.6499999761581421, |
|
"eval_rewards/chosen": 0.8476871848106384, |
|
"eval_rewards/margins": 0.362454891204834, |
|
"eval_rewards/rejected": 0.4852323532104492, |
|
"eval_runtime": 301.2203, |
|
"eval_samples_per_second": 6.64, |
|
"eval_steps_per_second": 0.415, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4573688208903686e-07, |
|
"logits/chosen": -1.4915900230407715, |
|
"logits/rejected": -1.3990033864974976, |
|
"logps/chosen": -2177.49169921875, |
|
"logps/rejected": -1711.8460693359375, |
|
"loss": 0.6447, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.6701298356056213, |
|
"rewards/margins": 0.3176502585411072, |
|
"rewards/rejected": 0.3524795174598694, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.366625719144016e-07, |
|
"logits/chosen": -1.5326006412506104, |
|
"logits/rejected": -1.4640724658966064, |
|
"logps/chosen": -2241.04052734375, |
|
"logps/rejected": -1938.517822265625, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.9225455522537231, |
|
"rewards/margins": 0.3196006417274475, |
|
"rewards/rejected": 0.6029448509216309, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.276094159262368e-07, |
|
"logits/chosen": -1.459031343460083, |
|
"logits/rejected": -1.4118678569793701, |
|
"logps/chosen": -2329.41943359375, |
|
"logps/rejected": -2065.614501953125, |
|
"loss": 0.6114, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 1.0603306293487549, |
|
"rewards/margins": 0.38362884521484375, |
|
"rewards/rejected": 0.6767016649246216, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.1858043780510135e-07, |
|
"logits/chosen": -1.4943807125091553, |
|
"logits/rejected": -1.4440956115722656, |
|
"logps/chosen": -2648.4462890625, |
|
"logps/rejected": -2317.19970703125, |
|
"loss": 0.6521, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.9549520611763, |
|
"rewards/margins": 0.1597224771976471, |
|
"rewards/rejected": 0.7952295541763306, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.0957865315634204e-07, |
|
"logits/chosen": -1.4685379266738892, |
|
"logits/rejected": -1.4013986587524414, |
|
"logps/chosen": -2750.71142578125, |
|
"logps/rejected": -2100.20068359375, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.7830246686935425, |
|
"rewards/margins": 0.5725045204162598, |
|
"rewards/rejected": 0.2105201780796051, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.006070685029075e-07, |
|
"logits/chosen": -1.484535813331604, |
|
"logits/rejected": -1.4587595462799072, |
|
"logps/chosen": -2228.81787109375, |
|
"logps/rejected": -2157.81298828125, |
|
"loss": 0.6803, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.4148440957069397, |
|
"rewards/margins": 0.10307104885578156, |
|
"rewards/rejected": 0.3117729723453522, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.916686802811927e-07, |
|
"logits/chosen": -1.3863401412963867, |
|
"logits/rejected": -1.4270175695419312, |
|
"logps/chosen": -2092.947998046875, |
|
"logps/rejected": -2140.6953125, |
|
"loss": 0.624, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.6797593832015991, |
|
"rewards/margins": 0.16529296338558197, |
|
"rewards/rejected": 0.514466404914856, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.8276647384025467e-07, |
|
"logits/chosen": -1.4469492435455322, |
|
"logits/rejected": -1.3607311248779297, |
|
"logps/chosen": -2557.885009765625, |
|
"logps/rejected": -2165.09033203125, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.6221494078636169, |
|
"rewards/margins": 0.2990773320198059, |
|
"rewards/rejected": 0.3230721354484558, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.7390342244472883e-07, |
|
"logits/chosen": -1.5888515710830688, |
|
"logits/rejected": -1.5609667301177979, |
|
"logps/chosen": -2778.28515625, |
|
"logps/rejected": -2496.6396484375, |
|
"loss": 0.6533, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.9935188293457031, |
|
"rewards/margins": 0.3621361255645752, |
|
"rewards/rejected": 0.6313827037811279, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.6508248628178446e-07, |
|
"logits/chosen": -1.6396839618682861, |
|
"logits/rejected": -1.5974278450012207, |
|
"logps/chosen": -2493.72216796875, |
|
"logps/rejected": -2359.435791015625, |
|
"loss": 0.7267, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.9704666137695312, |
|
"rewards/margins": 0.3994936943054199, |
|
"rewards/rejected": 0.5709729790687561, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -1.6800066232681274, |
|
"eval_logits/rejected": -1.6292266845703125, |
|
"eval_logps/chosen": -2618.873779296875, |
|
"eval_logps/rejected": -2207.94384765625, |
|
"eval_loss": 0.6421077847480774, |
|
"eval_rewards/accuracies": 0.6620000004768372, |
|
"eval_rewards/chosen": 0.5390151143074036, |
|
"eval_rewards/margins": 0.3132854104042053, |
|
"eval_rewards/rejected": 0.22572976350784302, |
|
"eval_runtime": 304.4335, |
|
"eval_samples_per_second": 6.57, |
|
"eval_steps_per_second": 0.411, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.563066114724441e-07, |
|
"logits/chosen": -1.6271164417266846, |
|
"logits/rejected": -1.5858738422393799, |
|
"logps/chosen": -2807.364990234375, |
|
"logps/rejected": -2029.6510009765625, |
|
"loss": 0.6347, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6177263855934143, |
|
"rewards/margins": 0.27368754148483276, |
|
"rewards/rejected": 0.3440387547016144, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.475787290876055e-07, |
|
"logits/chosen": -1.5973155498504639, |
|
"logits/rejected": -1.558475375175476, |
|
"logps/chosen": -2490.0703125, |
|
"logps/rejected": -2087.466064453125, |
|
"loss": 0.6385, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8565654754638672, |
|
"rewards/margins": 0.4143308699131012, |
|
"rewards/rejected": 0.4422345757484436, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.389017541690854e-07, |
|
"logits/chosen": -1.5630786418914795, |
|
"logits/rejected": -1.548064947128296, |
|
"logps/chosen": -2276.59619140625, |
|
"logps/rejected": -1839.0726318359375, |
|
"loss": 0.6357, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.7428416609764099, |
|
"rewards/margins": 0.3907639980316162, |
|
"rewards/rejected": 0.35207757353782654, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.30278584756021e-07, |
|
"logits/chosen": -1.548689365386963, |
|
"logits/rejected": -1.4891592264175415, |
|
"logps/chosen": -2640.1591796875, |
|
"logps/rejected": -2317.181396484375, |
|
"loss": 0.6184, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.910789966583252, |
|
"rewards/margins": 0.37699228525161743, |
|
"rewards/rejected": 0.5337976217269897, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.2171210091694735e-07, |
|
"logits/chosen": -1.608028769493103, |
|
"logits/rejected": -1.5826674699783325, |
|
"logps/chosen": -2531.904296875, |
|
"logps/rejected": -2342.30419921875, |
|
"loss": 0.6087, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.7222377061843872, |
|
"rewards/margins": 0.4060022830963135, |
|
"rewards/rejected": 0.3162355422973633, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.132051637878789e-07, |
|
"logits/chosen": -1.5921976566314697, |
|
"logits/rejected": -1.4880411624908447, |
|
"logps/chosen": -2295.463134765625, |
|
"logps/rejected": -1800.047119140625, |
|
"loss": 0.6709, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.8092087507247925, |
|
"rewards/margins": 0.39788728952407837, |
|
"rewards/rejected": 0.4113215506076813, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.0476061461671155e-07, |
|
"logits/chosen": -1.5929429531097412, |
|
"logits/rejected": -1.560585856437683, |
|
"logps/chosen": -2178.914306640625, |
|
"logps/rejected": -2029.7672119140625, |
|
"loss": 0.6315, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.8661308288574219, |
|
"rewards/margins": 0.3666331171989441, |
|
"rewards/rejected": 0.4994977116584778, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.9638127381427127e-07, |
|
"logits/chosen": -1.4586659669876099, |
|
"logits/rejected": -1.4546220302581787, |
|
"logps/chosen": -2244.927978515625, |
|
"logps/rejected": -2030.598876953125, |
|
"loss": 0.5909, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7213354110717773, |
|
"rewards/margins": 0.37115171551704407, |
|
"rewards/rejected": 0.3501836955547333, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.8806994001231766e-07, |
|
"logits/chosen": -1.462428092956543, |
|
"logits/rejected": -1.4601207971572876, |
|
"logps/chosen": -2553.372314453125, |
|
"logps/rejected": -2366.053955078125, |
|
"loss": 0.6324, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.9080332517623901, |
|
"rewards/margins": 0.3693556487560272, |
|
"rewards/rejected": 0.5386777520179749, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.7982938912882544e-07, |
|
"logits/chosen": -1.5518906116485596, |
|
"logits/rejected": -1.47800874710083, |
|
"logps/chosen": -2843.82421875, |
|
"logps/rejected": -2309.199951171875, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 1.102667212486267, |
|
"rewards/margins": 0.6155067682266235, |
|
"rewards/rejected": 0.48716044425964355, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_logits/chosen": -1.5460779666900635, |
|
"eval_logits/rejected": -1.4993510246276855, |
|
"eval_logps/chosen": -2582.20947265625, |
|
"eval_logps/rejected": -2181.592041015625, |
|
"eval_loss": 0.6300790905952454, |
|
"eval_rewards/accuracies": 0.6660000085830688, |
|
"eval_rewards/chosen": 0.9056587815284729, |
|
"eval_rewards/margins": 0.41641080379486084, |
|
"eval_rewards/rejected": 0.48924797773361206, |
|
"eval_runtime": 299.2617, |
|
"eval_samples_per_second": 6.683, |
|
"eval_steps_per_second": 0.418, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.716623734408488e-07, |
|
"logits/chosen": -1.5478688478469849, |
|
"logits/rejected": -1.509421944618225, |
|
"logps/chosen": -2733.4658203125, |
|
"logps/rejected": -2210.788330078125, |
|
"loss": 0.676, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.8321071863174438, |
|
"rewards/margins": 0.17042401432991028, |
|
"rewards/rejected": 0.661683201789856, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.635716206652843e-07, |
|
"logits/chosen": -1.51913321018219, |
|
"logits/rejected": -1.5177617073059082, |
|
"logps/chosen": -2348.56005859375, |
|
"logps/rejected": -2216.1884765625, |
|
"loss": 0.5911, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6093713045120239, |
|
"rewards/margins": 0.3202818036079407, |
|
"rewards/rejected": 0.28908950090408325, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.5555983304783515e-07, |
|
"logits/chosen": -1.4471040964126587, |
|
"logits/rejected": -1.4324887990951538, |
|
"logps/chosen": -2042.9017333984375, |
|
"logps/rejected": -1859.039306640625, |
|
"loss": 0.6168, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.49268943071365356, |
|
"rewards/margins": 0.3076168894767761, |
|
"rewards/rejected": 0.18507252633571625, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.4762968646048356e-07, |
|
"logits/chosen": -1.4452800750732422, |
|
"logits/rejected": -1.3810513019561768, |
|
"logps/chosen": -2950.53271484375, |
|
"logps/rejected": -2301.14892578125, |
|
"loss": 0.6184, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.9146178364753723, |
|
"rewards/margins": 0.5878747701644897, |
|
"rewards/rejected": 0.326742947101593, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.397838295077703e-07, |
|
"logits/chosen": -1.4514172077178955, |
|
"logits/rejected": -1.430443525314331, |
|
"logps/chosen": -2407.11181640625, |
|
"logps/rejected": -2338.7666015625, |
|
"loss": 0.6172, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.6004685163497925, |
|
"rewards/margins": 0.15282198786735535, |
|
"rewards/rejected": 0.44764652848243713, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.3202488264218357e-07, |
|
"logits/chosen": -1.4685500860214233, |
|
"logits/rejected": -1.3829035758972168, |
|
"logps/chosen": -2675.003173828125, |
|
"logps/rejected": -2091.812744140625, |
|
"loss": 0.61, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.828966498374939, |
|
"rewards/margins": 0.3263750672340393, |
|
"rewards/rejected": 0.5025915503501892, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.243554372889479e-07, |
|
"logits/chosen": -1.4399888515472412, |
|
"logits/rejected": -1.3919384479522705, |
|
"logps/chosen": -2576.9365234375, |
|
"logps/rejected": -2010.0601806640625, |
|
"loss": 0.597, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.9715896844863892, |
|
"rewards/margins": 0.460097074508667, |
|
"rewards/rejected": 0.5114925503730774, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.1677805498050998e-07, |
|
"logits/chosen": -1.3894431591033936, |
|
"logits/rejected": -1.3669414520263672, |
|
"logps/chosen": -1986.740966796875, |
|
"logps/rejected": -1580.8253173828125, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.5728658437728882, |
|
"rewards/margins": 0.245010107755661, |
|
"rewards/rejected": 0.32785576581954956, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.0929526650100716e-07, |
|
"logits/chosen": -1.4540735483169556, |
|
"logits/rejected": -1.3499418497085571, |
|
"logps/chosen": -2753.11669921875, |
|
"logps/rejected": -2095.53466796875, |
|
"loss": 0.6456, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8334699869155884, |
|
"rewards/margins": 0.7317672967910767, |
|
"rewards/rejected": 0.10170261561870575, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.0190957104100692e-07, |
|
"logits/chosen": -1.4822982549667358, |
|
"logits/rejected": -1.4137917757034302, |
|
"logps/chosen": -2363.976806640625, |
|
"logps/rejected": -1997.6536865234375, |
|
"loss": 0.6053, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.7185107469558716, |
|
"rewards/margins": 0.401099294424057, |
|
"rewards/rejected": 0.3174114525318146, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -1.4891161918640137, |
|
"eval_logits/rejected": -1.4439697265625, |
|
"eval_logps/chosen": -2585.19140625, |
|
"eval_logps/rejected": -2184.890869140625, |
|
"eval_loss": 0.6342132091522217, |
|
"eval_rewards/accuracies": 0.6660000085830688, |
|
"eval_rewards/chosen": 0.8758403062820435, |
|
"eval_rewards/margins": 0.4195804297924042, |
|
"eval_rewards/rejected": 0.4562598764896393, |
|
"eval_runtime": 299.1063, |
|
"eval_samples_per_second": 6.687, |
|
"eval_steps_per_second": 0.418, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.9462343536279612e-07, |
|
"logits/chosen": -1.475975751876831, |
|
"logits/rejected": -1.4379873275756836, |
|
"logps/chosen": -2481.176025390625, |
|
"logps/rejected": -2232.84912109375, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.9434836506843567, |
|
"rewards/margins": 0.4186176657676697, |
|
"rewards/rejected": 0.524865984916687, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.874392929765044e-07, |
|
"logits/chosen": -1.4733283519744873, |
|
"logits/rejected": -1.3902546167373657, |
|
"logps/chosen": -2782.106689453125, |
|
"logps/rejected": -2127.639404296875, |
|
"loss": 0.5946, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.1233876943588257, |
|
"rewards/margins": 0.5207871198654175, |
|
"rewards/rejected": 0.6026005148887634, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.8035954332732889e-07, |
|
"logits/chosen": -1.4501025676727295, |
|
"logits/rejected": -1.4023559093475342, |
|
"logps/chosen": -2202.23974609375, |
|
"logps/rejected": -1934.811279296875, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.644204318523407, |
|
"rewards/margins": 0.34255489706993103, |
|
"rewards/rejected": 0.30164945125579834, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.733865509941419e-07, |
|
"logits/chosen": -1.4848979711532593, |
|
"logits/rejected": -1.445502519607544, |
|
"logps/chosen": -2633.660888671875, |
|
"logps/rejected": -2392.826416015625, |
|
"loss": 0.6303, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8459588885307312, |
|
"rewards/margins": 0.4044179916381836, |
|
"rewards/rejected": 0.4415409564971924, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6652264489973861e-07, |
|
"logits/chosen": -1.4826475381851196, |
|
"logits/rejected": -1.426309585571289, |
|
"logps/chosen": -2556.17626953125, |
|
"logps/rejected": -1992.7232666015625, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6768069267272949, |
|
"rewards/margins": 0.32333052158355713, |
|
"rewards/rejected": 0.3534763753414154, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.5977011753299724e-07, |
|
"logits/chosen": -1.5091631412506104, |
|
"logits/rejected": -1.4753676652908325, |
|
"logps/chosen": -2201.044921875, |
|
"logps/rejected": -1877.4302978515625, |
|
"loss": 0.612, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7366055250167847, |
|
"rewards/margins": 0.2949199378490448, |
|
"rewards/rejected": 0.44168558716773987, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.5313122418320496e-07, |
|
"logits/chosen": -1.5059702396392822, |
|
"logits/rejected": -1.4471460580825806, |
|
"logps/chosen": -2972.50439453125, |
|
"logps/rejected": -2307.0458984375, |
|
"loss": 0.6042, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.0683201551437378, |
|
"rewards/margins": 0.5777542591094971, |
|
"rewards/rejected": 0.49056586623191833, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4660818218681125e-07, |
|
"logits/chosen": -1.4828715324401855, |
|
"logits/rejected": -1.4702181816101074, |
|
"logps/chosen": -2593.748046875, |
|
"logps/rejected": -2591.448974609375, |
|
"loss": 0.588, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.9806830286979675, |
|
"rewards/margins": 0.4084799885749817, |
|
"rewards/rejected": 0.5722029805183411, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4020317018685362e-07, |
|
"logits/chosen": -1.456514596939087, |
|
"logits/rejected": -1.390700101852417, |
|
"logps/chosen": -2405.19482421875, |
|
"logps/rejected": -1981.04296875, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.614848256111145, |
|
"rewards/margins": 0.3052050471305847, |
|
"rewards/rejected": 0.3096432089805603, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.3391832740531055e-07, |
|
"logits/chosen": -1.4236390590667725, |
|
"logits/rejected": -1.3956820964813232, |
|
"logps/chosen": -2446.695068359375, |
|
"logps/rejected": -2376.41259765625, |
|
"loss": 0.6232, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.7929419875144958, |
|
"rewards/margins": 0.35024353861808777, |
|
"rewards/rejected": 0.44269853830337524, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -1.4759258031845093, |
|
"eval_logits/rejected": -1.4282684326171875, |
|
"eval_logps/chosen": -2592.221923828125, |
|
"eval_logps/rejected": -2190.57958984375, |
|
"eval_loss": 0.6323803663253784, |
|
"eval_rewards/accuracies": 0.6579999923706055, |
|
"eval_rewards/chosen": 0.8055330514907837, |
|
"eval_rewards/margins": 0.40616247057914734, |
|
"eval_rewards/rejected": 0.39937061071395874, |
|
"eval_runtime": 299.6311, |
|
"eval_samples_per_second": 6.675, |
|
"eval_steps_per_second": 0.417, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.2775575292861707e-07, |
|
"logits/chosen": -1.4745705127716064, |
|
"logits/rejected": -1.4221175909042358, |
|
"logps/chosen": -2639.8076171875, |
|
"logps/rejected": -2123.642578125, |
|
"loss": 0.6056, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.9188385009765625, |
|
"rewards/margins": 0.5551499128341675, |
|
"rewards/rejected": 0.3636886477470398, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.21717505006588e-07, |
|
"logits/chosen": -1.4603058099746704, |
|
"logits/rejected": -1.4439467191696167, |
|
"logps/chosen": -2664.22119140625, |
|
"logps/rejected": -2496.781005859375, |
|
"loss": 0.6213, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.9416143298149109, |
|
"rewards/margins": 0.3402588963508606, |
|
"rewards/rejected": 0.6013555526733398, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1580560036497877e-07, |
|
"logits/chosen": -1.473534345626831, |
|
"logits/rejected": -1.4060730934143066, |
|
"logps/chosen": -2819.74462890625, |
|
"logps/rejected": -2299.840576171875, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.8828972578048706, |
|
"rewards/margins": 0.5179694294929504, |
|
"rewards/rejected": 0.3649279475212097, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.1002201353191521e-07, |
|
"logits/chosen": -1.4415251016616821, |
|
"logits/rejected": -1.461745023727417, |
|
"logps/chosen": -2390.272705078125, |
|
"logps/rejected": -2447.08642578125, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.6030459403991699, |
|
"rewards/margins": 0.1989385038614273, |
|
"rewards/rejected": 0.4041074216365814, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0436867617841766e-07, |
|
"logits/chosen": -1.4779837131500244, |
|
"logits/rejected": -1.443192958831787, |
|
"logps/chosen": -2101.65771484375, |
|
"logps/rejected": -1614.459228515625, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.38758862018585205, |
|
"rewards/margins": 0.41923385858535767, |
|
"rewards/rejected": -0.03164520859718323, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.884747647323854e-08, |
|
"logits/chosen": -1.4118781089782715, |
|
"logits/rejected": -1.398271083831787, |
|
"logps/chosen": -2657.19287109375, |
|
"logps/rejected": -2414.64990234375, |
|
"loss": 0.6554, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.6775075793266296, |
|
"rewards/margins": 0.22664561867713928, |
|
"rewards/rejected": 0.4508620798587799, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.346025845222871e-08, |
|
"logits/chosen": -1.4589564800262451, |
|
"logits/rejected": -1.4241827726364136, |
|
"logps/chosen": -2566.69384765625, |
|
"logps/rejected": -2381.8310546875, |
|
"loss": 0.6699, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.7393044829368591, |
|
"rewards/margins": 0.26446717977523804, |
|
"rewards/rejected": 0.4748373031616211, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.82088214024454e-08, |
|
"logits/chosen": -1.4593846797943115, |
|
"logits/rejected": -1.4349015951156616, |
|
"logps/chosen": -2314.169189453125, |
|
"logps/rejected": -2187.58544921875, |
|
"loss": 0.6497, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.4934845566749573, |
|
"rewards/margins": 0.21952751278877258, |
|
"rewards/rejected": 0.2739570140838623, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.309491926120393e-08, |
|
"logits/chosen": -1.4479442834854126, |
|
"logits/rejected": -1.388183832168579, |
|
"logps/chosen": -2701.14111328125, |
|
"logps/rejected": -2293.677001953125, |
|
"loss": 0.6347, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6762970089912415, |
|
"rewards/margins": 0.43245062232017517, |
|
"rewards/rejected": 0.2438463717699051, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.812026003027771e-08, |
|
"logits/chosen": -1.2826584577560425, |
|
"logits/rejected": -1.2632884979248047, |
|
"logps/chosen": -2654.244873046875, |
|
"logps/rejected": -2260.9638671875, |
|
"loss": 0.6326, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.40709176659584045, |
|
"rewards/margins": 0.25365540385246277, |
|
"rewards/rejected": 0.15343639254570007, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -1.4959200620651245, |
|
"eval_logits/rejected": -1.450128436088562, |
|
"eval_logps/chosen": -2627.5283203125, |
|
"eval_logps/rejected": -2220.19970703125, |
|
"eval_loss": 0.6391750574111938, |
|
"eval_rewards/accuracies": 0.656000018119812, |
|
"eval_rewards/chosen": 0.4524710476398468, |
|
"eval_rewards/margins": 0.3492998778820038, |
|
"eval_rewards/rejected": 0.10317116975784302, |
|
"eval_runtime": 302.5644, |
|
"eval_samples_per_second": 6.61, |
|
"eval_steps_per_second": 0.413, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.328650520543906e-08, |
|
"logits/chosen": -1.4119188785552979, |
|
"logits/rejected": -1.2946244478225708, |
|
"logps/chosen": -2411.543701171875, |
|
"logps/rejected": -1841.427978515625, |
|
"loss": 0.6211, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.3078997731208801, |
|
"rewards/margins": 0.17288625240325928, |
|
"rewards/rejected": 0.13501352071762085, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.859526922153352e-08, |
|
"logits/chosen": -1.4251132011413574, |
|
"logits/rejected": -1.3843073844909668, |
|
"logps/chosen": -2429.940185546875, |
|
"logps/rejected": -1990.4915771484375, |
|
"loss": 0.6556, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.5606139898300171, |
|
"rewards/margins": 0.2745349407196045, |
|
"rewards/rejected": 0.286079078912735, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.40481189132711e-08, |
|
"logits/chosen": -1.4726622104644775, |
|
"logits/rejected": -1.4261372089385986, |
|
"logps/chosen": -2766.93115234375, |
|
"logps/rejected": -2061.09912109375, |
|
"loss": 0.6425, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5808910131454468, |
|
"rewards/margins": 0.43574967980384827, |
|
"rewards/rejected": 0.1451413631439209, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.964657299191711e-08, |
|
"logits/chosen": -1.4473376274108887, |
|
"logits/rejected": -1.4126627445220947, |
|
"logps/chosen": -2487.42919921875, |
|
"logps/rejected": -2065.8955078125, |
|
"loss": 0.6381, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.8268505930900574, |
|
"rewards/margins": 0.4533798098564148, |
|
"rewards/rejected": 0.37347084283828735, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.53921015380539e-08, |
|
"logits/chosen": -1.428260087966919, |
|
"logits/rejected": -1.4423437118530273, |
|
"logps/chosen": -2295.45556640625, |
|
"logps/rejected": -2376.85595703125, |
|
"loss": 0.6173, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.6111637353897095, |
|
"rewards/margins": 0.18756714463233948, |
|
"rewards/rejected": 0.4235965311527252, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5.1286125510586805e-08, |
|
"logits/chosen": -1.462693452835083, |
|
"logits/rejected": -1.4421815872192383, |
|
"logps/chosen": -2543.067626953125, |
|
"logps/rejected": -2478.81494140625, |
|
"loss": 0.613, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8606179356575012, |
|
"rewards/margins": 0.37834784388542175, |
|
"rewards/rejected": 0.48227009177207947, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.733001627215466e-08, |
|
"logits/chosen": -1.4652189016342163, |
|
"logits/rejected": -1.4526941776275635, |
|
"logps/chosen": -2576.45556640625, |
|
"logps/rejected": -2486.090576171875, |
|
"loss": 0.6675, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.7547820210456848, |
|
"rewards/margins": 0.23100514709949493, |
|
"rewards/rejected": 0.5237768292427063, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.352509513110658e-08, |
|
"logits/chosen": -1.4286987781524658, |
|
"logits/rejected": -1.4079492092132568, |
|
"logps/chosen": -2363.428955078125, |
|
"logps/rejected": -2208.08740234375, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.574379026889801, |
|
"rewards/margins": 0.24226748943328857, |
|
"rewards/rejected": 0.33211153745651245, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9872632900194936e-08, |
|
"logits/chosen": -1.4842069149017334, |
|
"logits/rejected": -1.415021300315857, |
|
"logps/chosen": -2913.2490234375, |
|
"logps/rejected": -2346.609619140625, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.6599145531654358, |
|
"rewards/margins": 0.26596465706825256, |
|
"rewards/rejected": 0.3939499258995056, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.6373849472134954e-08, |
|
"logits/chosen": -1.4031012058258057, |
|
"logits/rejected": -1.3779500722885132, |
|
"logps/chosen": -2266.2158203125, |
|
"logps/rejected": -1981.5833740234375, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5494025945663452, |
|
"rewards/margins": 0.21553239226341248, |
|
"rewards/rejected": 0.33387020230293274, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": -1.4758340120315552, |
|
"eval_logits/rejected": -1.4289432764053345, |
|
"eval_logps/chosen": -2598.2412109375, |
|
"eval_logps/rejected": -2195.535888671875, |
|
"eval_loss": 0.6306354403495789, |
|
"eval_rewards/accuracies": 0.6660000085830688, |
|
"eval_rewards/chosen": 0.7453421354293823, |
|
"eval_rewards/margins": 0.3955351710319519, |
|
"eval_rewards/rejected": 0.3498069643974304, |
|
"eval_runtime": 295.7456, |
|
"eval_samples_per_second": 6.763, |
|
"eval_steps_per_second": 0.423, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.302991341216976e-08, |
|
"logits/chosen": -1.4159257411956787, |
|
"logits/rejected": -1.392617106437683, |
|
"logps/chosen": -2077.9482421875, |
|
"logps/rejected": -1972.2515869140625, |
|
"loss": 0.6409, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.5421566367149353, |
|
"rewards/margins": 0.2578433156013489, |
|
"rewards/rejected": 0.28431329131126404, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.9841941567779474e-08, |
|
"logits/chosen": -1.4799764156341553, |
|
"logits/rejected": -1.4051799774169922, |
|
"logps/chosen": -2897.63232421875, |
|
"logps/rejected": -2480.90625, |
|
"loss": 0.6257, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.8838424682617188, |
|
"rewards/margins": 0.3801085352897644, |
|
"rewards/rejected": 0.5037339925765991, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.681099869566328e-08, |
|
"logits/chosen": -1.4630422592163086, |
|
"logits/rejected": -1.4653818607330322, |
|
"logps/chosen": -2166.15966796875, |
|
"logps/rejected": -2133.84326171875, |
|
"loss": 0.6171, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.5266101956367493, |
|
"rewards/margins": 0.21218034625053406, |
|
"rewards/rejected": 0.3144298195838928, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.3938097106119216e-08, |
|
"logits/chosen": -1.4574975967407227, |
|
"logits/rejected": -1.4154255390167236, |
|
"logps/chosen": -2208.398681640625, |
|
"logps/rejected": -1935.158203125, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6478286981582642, |
|
"rewards/margins": 0.3098670542240143, |
|
"rewards/rejected": 0.33796167373657227, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.12241963249406e-08, |
|
"logits/chosen": -1.4689569473266602, |
|
"logits/rejected": -1.4307196140289307, |
|
"logps/chosen": -2519.071044921875, |
|
"logps/rejected": -2212.586181640625, |
|
"loss": 0.6578, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6326580047607422, |
|
"rewards/margins": 0.3157083988189697, |
|
"rewards/rejected": 0.31694963574409485, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.8670202772942568e-08, |
|
"logits/chosen": -1.4382356405258179, |
|
"logits/rejected": -1.3769454956054688, |
|
"logps/chosen": -2694.0830078125, |
|
"logps/rejected": -2166.41845703125, |
|
"loss": 0.6341, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.7784560322761536, |
|
"rewards/margins": 0.3015449643135071, |
|
"rewards/rejected": 0.4769110679626465, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6276969463224545e-08, |
|
"logits/chosen": -1.4650015830993652, |
|
"logits/rejected": -1.463744878768921, |
|
"logps/chosen": -2586.126220703125, |
|
"logps/rejected": -2591.75439453125, |
|
"loss": 0.6103, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.6145761609077454, |
|
"rewards/margins": 0.4303979277610779, |
|
"rewards/rejected": 0.18417824804782867, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4045295716271e-08, |
|
"logits/chosen": -1.4920063018798828, |
|
"logits/rejected": -1.450634241104126, |
|
"logps/chosen": -2605.60986328125, |
|
"logps/rejected": -2116.304931640625, |
|
"loss": 0.608, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.645778477191925, |
|
"rewards/margins": 0.3450910151004791, |
|
"rewards/rejected": 0.30068737268447876, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1975926892984766e-08, |
|
"logits/chosen": -1.4100964069366455, |
|
"logits/rejected": -1.3769333362579346, |
|
"logps/chosen": -2435.0087890625, |
|
"logps/rejected": -2033.880126953125, |
|
"loss": 0.6496, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.6221305727958679, |
|
"rewards/margins": 0.3294012248516083, |
|
"rewards/rejected": 0.29272931814193726, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0069554145742787e-08, |
|
"logits/chosen": -1.395265817642212, |
|
"logits/rejected": -1.3731589317321777, |
|
"logps/chosen": -2578.064697265625, |
|
"logps/rejected": -2280.887451171875, |
|
"loss": 0.669, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.6557528972625732, |
|
"rewards/margins": 0.4573606848716736, |
|
"rewards/rejected": 0.1983920931816101, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -1.4769095182418823, |
|
"eval_logits/rejected": -1.4307643175125122, |
|
"eval_logps/chosen": -2607.336669921875, |
|
"eval_logps/rejected": -2203.039306640625, |
|
"eval_loss": 0.6322839260101318, |
|
"eval_rewards/accuracies": 0.6600000262260437, |
|
"eval_rewards/chosen": 0.6543857455253601, |
|
"eval_rewards/margins": 0.3796128034591675, |
|
"eval_rewards/rejected": 0.2747729420661926, |
|
"eval_runtime": 293.77, |
|
"eval_samples_per_second": 6.808, |
|
"eval_steps_per_second": 0.426, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.326814187556485e-09, |
|
"logits/chosen": -1.4078927040100098, |
|
"logits/rejected": -1.380299687385559, |
|
"logps/chosen": -2524.50439453125, |
|
"logps/rejected": -2226.43994140625, |
|
"loss": 0.6208, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.5907411575317383, |
|
"rewards/margins": 0.25163906812667847, |
|
"rewards/rejected": 0.3391020894050598, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.7482890794151594e-09, |
|
"logits/chosen": -1.4838191270828247, |
|
"logits/rejected": -1.4362868070602417, |
|
"logps/chosen": -2814.218017578125, |
|
"logps/rejected": -2245.9033203125, |
|
"loss": 0.632, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.8923807144165039, |
|
"rewards/margins": 0.4581494927406311, |
|
"rewards/rejected": 0.4342312812805176, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.334506035882036e-09, |
|
"logits/chosen": -1.370774507522583, |
|
"logits/rejected": -1.3359023332595825, |
|
"logps/chosen": -2687.776123046875, |
|
"logps/rejected": -2035.099609375, |
|
"loss": 0.6014, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.6979535818099976, |
|
"rewards/margins": 0.42525219917297363, |
|
"rewards/rejected": 0.27270132303237915, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.0859372490090194e-09, |
|
"logits/chosen": -1.4562771320343018, |
|
"logits/rejected": -1.4093388319015503, |
|
"logps/chosen": -2788.104248046875, |
|
"logps/rejected": -2335.853759765625, |
|
"loss": 0.6116, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7869713306427002, |
|
"rewards/margins": 0.4374913275241852, |
|
"rewards/rejected": 0.3494799733161926, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.0029997306283416e-09, |
|
"logits/chosen": -1.4756406545639038, |
|
"logits/rejected": -1.3986704349517822, |
|
"logps/chosen": -2574.64111328125, |
|
"logps/rejected": -1893.6328125, |
|
"loss": 0.6546, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.6360118389129639, |
|
"rewards/margins": 0.3881533145904541, |
|
"rewards/rejected": 0.24785849452018738, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.0860551730742526e-09, |
|
"logits/chosen": -1.4544508457183838, |
|
"logits/rejected": -1.419983983039856, |
|
"logps/chosen": -2375.126220703125, |
|
"logps/rejected": -2017.3466796875, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.8510934710502625, |
|
"rewards/margins": 0.6245936155319214, |
|
"rewards/rejected": 0.22649994492530823, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3354098283802628e-09, |
|
"logits/chosen": -1.4696677923202515, |
|
"logits/rejected": -1.4230769872665405, |
|
"logps/chosen": -2438.054931640625, |
|
"logps/rejected": -2103.46044921875, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.7339269518852234, |
|
"rewards/margins": 0.35110199451446533, |
|
"rewards/rejected": 0.38282495737075806, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.513144059937415e-10, |
|
"logits/chosen": -1.4952335357666016, |
|
"logits/rejected": -1.442657232284546, |
|
"logps/chosen": -2848.296630859375, |
|
"logps/rejected": -2374.80126953125, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.7486821413040161, |
|
"rewards/margins": 0.31393861770629883, |
|
"rewards/rejected": 0.43474358320236206, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.3396398904106393e-10, |
|
"logits/chosen": -1.4425480365753174, |
|
"logits/rejected": -1.4436792135238647, |
|
"logps/chosen": -2551.7880859375, |
|
"logps/rejected": -2169.797607421875, |
|
"loss": 0.6124, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5796520709991455, |
|
"rewards/margins": 0.4224782884120941, |
|
"rewards/rejected": 0.15717382729053497, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.349796917112018e-11, |
|
"logits/chosen": -1.4112383127212524, |
|
"logits/rejected": -1.3823628425598145, |
|
"logps/chosen": -2330.736083984375, |
|
"logps/rejected": -2090.098876953125, |
|
"loss": 0.6531, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.5138527750968933, |
|
"rewards/margins": 0.193558931350708, |
|
"rewards/rejected": 0.3202938437461853, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": -1.4753704071044922, |
|
"eval_logits/rejected": -1.4289445877075195, |
|
"eval_logps/chosen": -2603.777587890625, |
|
"eval_logps/rejected": -2200.1181640625, |
|
"eval_loss": 0.6316895484924316, |
|
"eval_rewards/accuracies": 0.6639999747276306, |
|
"eval_rewards/chosen": 0.6899767518043518, |
|
"eval_rewards/margins": 0.38598912954330444, |
|
"eval_rewards/rejected": 0.30398762226104736, |
|
"eval_runtime": 302.6434, |
|
"eval_samples_per_second": 6.608, |
|
"eval_steps_per_second": 0.413, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.4598416090011597, |
|
"logits/rejected": -1.4293019771575928, |
|
"logps/chosen": -2462.09912109375, |
|
"logps/rejected": -2050.02490234375, |
|
"loss": 0.6322, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.5636069178581238, |
|
"rewards/margins": 0.25135958194732666, |
|
"rewards/rejected": 0.3122473955154419, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1910, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6480738864519209, |
|
"train_runtime": 26013.0665, |
|
"train_samples_per_second": 2.35, |
|
"train_steps_per_second": 0.073 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100000000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|