|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 3750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3333333333333334e-08, |
|
"logits/chosen": -1.5177760124206543, |
|
"logits/rejected": -1.1611042022705078, |
|
"logps/chosen": -309.02911376953125, |
|
"logps/rejected": -848.8409423828125, |
|
"loss": 0.2593, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3333333333333336e-07, |
|
"logits/chosen": -1.5949721336364746, |
|
"logits/rejected": -1.165027379989624, |
|
"logps/chosen": -451.9952697753906, |
|
"logps/rejected": -786.9351806640625, |
|
"loss": 0.2269, |
|
"rewards/accuracies": 0.2777777910232544, |
|
"rewards/chosen": -0.0005774286109954119, |
|
"rewards/margins": -0.000440336880274117, |
|
"rewards/rejected": -0.00013709173072129488, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.666666666666667e-07, |
|
"logits/chosen": -1.557755470275879, |
|
"logits/rejected": -1.188490629196167, |
|
"logps/chosen": -457.94342041015625, |
|
"logps/rejected": -653.2659912109375, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0007440468179993331, |
|
"rewards/margins": 0.000692694156896323, |
|
"rewards/rejected": 5.135267929290421e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -1.8446645736694336, |
|
"logits/rejected": -1.1848350763320923, |
|
"logps/chosen": -542.8465576171875, |
|
"logps/rejected": -840.0565185546875, |
|
"loss": 0.1931, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.001833869144320488, |
|
"rewards/margins": 0.002176427748054266, |
|
"rewards/rejected": -0.000342558283591643, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.333333333333335e-07, |
|
"logits/chosen": -1.3947551250457764, |
|
"logits/rejected": -1.0338518619537354, |
|
"logps/chosen": -513.7941284179688, |
|
"logps/rejected": -851.7515869140625, |
|
"loss": 0.1978, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0025823800824582577, |
|
"rewards/margins": 0.007740010507404804, |
|
"rewards/rejected": -0.005157629959285259, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.666666666666667e-07, |
|
"logits/chosen": -1.544039249420166, |
|
"logits/rejected": -1.1640592813491821, |
|
"logps/chosen": -444.1375427246094, |
|
"logps/rejected": -751.6806640625, |
|
"loss": 0.1943, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.006201503798365593, |
|
"rewards/margins": 0.012399530969560146, |
|
"rewards/rejected": -0.006198027171194553, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -1.7549737691879272, |
|
"logits/rejected": -1.134901523590088, |
|
"logps/chosen": -524.1077880859375, |
|
"logps/rejected": -897.7693481445312, |
|
"loss": 0.1816, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.010963965207338333, |
|
"rewards/margins": 0.028727427124977112, |
|
"rewards/rejected": -0.01776346191763878, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.333333333333334e-07, |
|
"logits/chosen": -1.707073450088501, |
|
"logits/rejected": -0.9422414898872375, |
|
"logps/chosen": -418.4791564941406, |
|
"logps/rejected": -887.1856689453125, |
|
"loss": 0.166, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.011874416843056679, |
|
"rewards/margins": 0.05441901832818985, |
|
"rewards/rejected": -0.042544592171907425, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.066666666666667e-06, |
|
"logits/chosen": -1.7464405298233032, |
|
"logits/rejected": -1.3240439891815186, |
|
"logps/chosen": -393.67059326171875, |
|
"logps/rejected": -797.724609375, |
|
"loss": 0.1765, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.014196820557117462, |
|
"rewards/margins": 0.05819591134786606, |
|
"rewards/rejected": -0.043999094516038895, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"logits/chosen": -1.47357976436615, |
|
"logits/rejected": -0.9985636472702026, |
|
"logps/chosen": -445.72979736328125, |
|
"logps/rejected": -712.3211059570312, |
|
"loss": 0.1603, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.003458250779658556, |
|
"rewards/margins": 0.06728541851043701, |
|
"rewards/rejected": -0.06382717192173004, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"logits/chosen": -1.5050207376480103, |
|
"logits/rejected": -1.0137097835540771, |
|
"logps/chosen": -409.77252197265625, |
|
"logps/rejected": -882.01025390625, |
|
"loss": 0.1755, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.011769925244152546, |
|
"rewards/margins": 0.09516488015651703, |
|
"rewards/rejected": -0.1069348007440567, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4666666666666669e-06, |
|
"logits/chosen": -1.508277177810669, |
|
"logits/rejected": -0.9909588098526001, |
|
"logps/chosen": -446.08514404296875, |
|
"logps/rejected": -970.3818359375, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.039314642548561096, |
|
"rewards/margins": 0.12522205710411072, |
|
"rewards/rejected": -0.16453669965267181, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -1.5001076459884644, |
|
"logits/rejected": -1.1417639255523682, |
|
"logps/chosen": -595.5568237304688, |
|
"logps/rejected": -1075.8310546875, |
|
"loss": 0.1178, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11702040582895279, |
|
"rewards/margins": 0.1300191581249237, |
|
"rewards/rejected": -0.2470395863056183, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7333333333333336e-06, |
|
"logits/chosen": -1.6354900598526, |
|
"logits/rejected": -1.1173183917999268, |
|
"logps/chosen": -596.342041015625, |
|
"logps/rejected": -1051.9910888671875, |
|
"loss": 0.122, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1303187757730484, |
|
"rewards/margins": 0.14471343159675598, |
|
"rewards/rejected": -0.2750321924686432, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8666666666666669e-06, |
|
"logits/chosen": -1.857448935508728, |
|
"logits/rejected": -1.1995853185653687, |
|
"logps/chosen": -488.5594177246094, |
|
"logps/rejected": -1067.145263671875, |
|
"loss": 0.1382, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.11596964299678802, |
|
"rewards/margins": 0.1565937101840973, |
|
"rewards/rejected": -0.2725633978843689, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -1.6261777877807617, |
|
"logits/rejected": -1.1077944040298462, |
|
"logps/chosen": -564.3380737304688, |
|
"logps/rejected": -1317.8497314453125, |
|
"loss": 0.0805, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.14445793628692627, |
|
"rewards/margins": 0.2764219641685486, |
|
"rewards/rejected": -0.42087993025779724, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.133333333333334e-06, |
|
"logits/chosen": -1.4915621280670166, |
|
"logits/rejected": -1.1237232685089111, |
|
"logps/chosen": -487.80487060546875, |
|
"logps/rejected": -1048.151611328125, |
|
"loss": 0.0999, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.14958631992340088, |
|
"rewards/margins": 0.19907937943935394, |
|
"rewards/rejected": -0.3486657440662384, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.266666666666667e-06, |
|
"logits/chosen": -1.462720274925232, |
|
"logits/rejected": -1.1023520231246948, |
|
"logps/chosen": -552.3968505859375, |
|
"logps/rejected": -1198.3912353515625, |
|
"loss": 0.0924, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1677219718694687, |
|
"rewards/margins": 0.21208930015563965, |
|
"rewards/rejected": -0.37981128692626953, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -1.6897376775741577, |
|
"logits/rejected": -0.9965440630912781, |
|
"logps/chosen": -626.109619140625, |
|
"logps/rejected": -1155.2620849609375, |
|
"loss": 0.0705, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.15819820761680603, |
|
"rewards/margins": 0.23869426548480988, |
|
"rewards/rejected": -0.3968924880027771, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5333333333333338e-06, |
|
"logits/chosen": -1.8232837915420532, |
|
"logits/rejected": -0.823337197303772, |
|
"logps/chosen": -675.5474853515625, |
|
"logps/rejected": -1195.9923095703125, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12152433395385742, |
|
"rewards/margins": 0.2081596404314041, |
|
"rewards/rejected": -0.32968395948410034, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.666666666666667e-06, |
|
"logits/chosen": -1.9476888179779053, |
|
"logits/rejected": -1.0242823362350464, |
|
"logps/chosen": -624.99951171875, |
|
"logps/rejected": -1197.134521484375, |
|
"loss": 0.0905, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12548482418060303, |
|
"rewards/margins": 0.23341400921344757, |
|
"rewards/rejected": -0.3588988482952118, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"logits/chosen": -1.518812894821167, |
|
"logits/rejected": -1.046945571899414, |
|
"logps/chosen": -764.122314453125, |
|
"logps/rejected": -1280.217041015625, |
|
"loss": 0.1437, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.23460201919078827, |
|
"rewards/margins": 0.19938690960407257, |
|
"rewards/rejected": -0.4339889585971832, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9333333333333338e-06, |
|
"logits/chosen": -1.6284335851669312, |
|
"logits/rejected": -1.1508177518844604, |
|
"logps/chosen": -523.3719482421875, |
|
"logps/rejected": -1222.5562744140625, |
|
"loss": 0.0616, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14848622679710388, |
|
"rewards/margins": 0.24298810958862305, |
|
"rewards/rejected": -0.39147430658340454, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.066666666666667e-06, |
|
"logits/chosen": -1.6719776391983032, |
|
"logits/rejected": -0.9572904706001282, |
|
"logps/chosen": -747.065185546875, |
|
"logps/rejected": -1382.8427734375, |
|
"loss": 0.0855, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.23779627680778503, |
|
"rewards/margins": 0.2256799042224884, |
|
"rewards/rejected": -0.4634762406349182, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": -1.840018630027771, |
|
"logits/rejected": -1.1092523336410522, |
|
"logps/chosen": -612.7644653320312, |
|
"logps/rejected": -1217.589111328125, |
|
"loss": 0.0733, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16975006461143494, |
|
"rewards/margins": 0.2756669223308563, |
|
"rewards/rejected": -0.44541701674461365, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -1.574760913848877, |
|
"logits/rejected": -1.1205968856811523, |
|
"logps/chosen": -629.0726318359375, |
|
"logps/rejected": -1156.187255859375, |
|
"loss": 0.0998, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13886727392673492, |
|
"rewards/margins": 0.202668234705925, |
|
"rewards/rejected": -0.3415355086326599, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4666666666666672e-06, |
|
"logits/chosen": -1.7936065196990967, |
|
"logits/rejected": -0.9479697942733765, |
|
"logps/chosen": -538.048583984375, |
|
"logps/rejected": -1271.905029296875, |
|
"loss": 0.063, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.10844652354717255, |
|
"rewards/margins": 0.2883257269859314, |
|
"rewards/rejected": -0.39677220582962036, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"logits/chosen": -1.7110416889190674, |
|
"logits/rejected": -1.0976063013076782, |
|
"logps/chosen": -601.929443359375, |
|
"logps/rejected": -1087.264404296875, |
|
"loss": 0.074, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13733436167240143, |
|
"rewards/margins": 0.2208215445280075, |
|
"rewards/rejected": -0.35815590620040894, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.7333333333333337e-06, |
|
"logits/chosen": -1.684920310974121, |
|
"logits/rejected": -1.0355182886123657, |
|
"logps/chosen": -502.43438720703125, |
|
"logps/rejected": -1141.5850830078125, |
|
"loss": 0.0975, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10886510461568832, |
|
"rewards/margins": 0.2565527558326721, |
|
"rewards/rejected": -0.3654178977012634, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.866666666666667e-06, |
|
"logits/chosen": -1.7269914150238037, |
|
"logits/rejected": -0.9788557887077332, |
|
"logps/chosen": -717.2500610351562, |
|
"logps/rejected": -1336.142333984375, |
|
"loss": 0.0838, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.23452234268188477, |
|
"rewards/margins": 0.29266995191574097, |
|
"rewards/rejected": -0.527192234992981, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -1.7060441970825195, |
|
"logits/rejected": -1.1475598812103271, |
|
"logps/chosen": -730.9443969726562, |
|
"logps/rejected": -1244.9676513671875, |
|
"loss": 0.1341, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.255375474691391, |
|
"rewards/margins": 0.19869598746299744, |
|
"rewards/rejected": -0.4540714621543884, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.133333333333333e-06, |
|
"logits/chosen": -1.7061704397201538, |
|
"logits/rejected": -1.1434520483016968, |
|
"logps/chosen": -705.079833984375, |
|
"logps/rejected": -1187.599365234375, |
|
"loss": 0.1019, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.13966450095176697, |
|
"rewards/margins": 0.22368240356445312, |
|
"rewards/rejected": -0.3633468747138977, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.266666666666668e-06, |
|
"logits/chosen": -2.0864200592041016, |
|
"logits/rejected": -1.0598593950271606, |
|
"logps/chosen": -621.1461181640625, |
|
"logps/rejected": -1218.3201904296875, |
|
"loss": 0.076, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12446784973144531, |
|
"rewards/margins": 0.285147488117218, |
|
"rewards/rejected": -0.40961527824401855, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4e-06, |
|
"logits/chosen": -1.633683204650879, |
|
"logits/rejected": -1.259135127067566, |
|
"logps/chosen": -594.0384521484375, |
|
"logps/rejected": -1040.244873046875, |
|
"loss": 0.1407, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13469278812408447, |
|
"rewards/margins": 0.18073201179504395, |
|
"rewards/rejected": -0.3154247999191284, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.533333333333334e-06, |
|
"logits/chosen": -1.5713467597961426, |
|
"logits/rejected": -1.1714986562728882, |
|
"logps/chosen": -552.28515625, |
|
"logps/rejected": -954.9786376953125, |
|
"loss": 0.102, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.16208061575889587, |
|
"rewards/margins": 0.17426642775535583, |
|
"rewards/rejected": -0.3363470435142517, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.666666666666667e-06, |
|
"logits/chosen": -1.383143424987793, |
|
"logits/rejected": -1.1191000938415527, |
|
"logps/chosen": -495.08245849609375, |
|
"logps/rejected": -1157.8853759765625, |
|
"loss": 0.0813, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1548539251089096, |
|
"rewards/margins": 0.23339009284973145, |
|
"rewards/rejected": -0.38824400305747986, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": -1.7334524393081665, |
|
"logits/rejected": -1.072506070137024, |
|
"logps/chosen": -742.59814453125, |
|
"logps/rejected": -1228.1810302734375, |
|
"loss": 0.128, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15262000262737274, |
|
"rewards/margins": 0.23166091740131378, |
|
"rewards/rejected": -0.3842809200286865, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.933333333333334e-06, |
|
"logits/chosen": -1.8477518558502197, |
|
"logits/rejected": -1.3124161958694458, |
|
"logps/chosen": -677.6192626953125, |
|
"logps/rejected": -1154.7989501953125, |
|
"loss": 0.1155, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12368879467248917, |
|
"rewards/margins": 0.20320534706115723, |
|
"rewards/rejected": -0.32689422369003296, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999972922944898e-06, |
|
"logits/chosen": -1.8957774639129639, |
|
"logits/rejected": -1.3899834156036377, |
|
"logps/chosen": -547.0092163085938, |
|
"logps/rejected": -1174.024658203125, |
|
"loss": 0.0877, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06287173926830292, |
|
"rewards/margins": 0.2412436455488205, |
|
"rewards/rejected": -0.304115355014801, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999756310023261e-06, |
|
"logits/chosen": -1.791933298110962, |
|
"logits/rejected": -1.1157985925674438, |
|
"logps/chosen": -516.9517211914062, |
|
"logps/rejected": -1007.5950317382812, |
|
"loss": 0.0993, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06450649350881577, |
|
"rewards/margins": 0.22859111428260803, |
|
"rewards/rejected": -0.293097585439682, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999323102948655e-06, |
|
"logits/chosen": -1.6634111404418945, |
|
"logits/rejected": -0.9339207410812378, |
|
"logps/chosen": -541.6902465820312, |
|
"logps/rejected": -1222.6890869140625, |
|
"loss": 0.0786, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11366814374923706, |
|
"rewards/margins": 0.3344195783138275, |
|
"rewards/rejected": -0.44808775186538696, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998673339256785e-06, |
|
"logits/chosen": -1.7227275371551514, |
|
"logits/rejected": -1.4248206615447998, |
|
"logps/chosen": -612.4100341796875, |
|
"logps/rejected": -1344.530517578125, |
|
"loss": 0.0903, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.16698965430259705, |
|
"rewards/margins": 0.24890287220478058, |
|
"rewards/rejected": -0.41589251160621643, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997807075247147e-06, |
|
"logits/chosen": -1.7536413669586182, |
|
"logits/rejected": -1.3619048595428467, |
|
"logps/chosen": -464.01678466796875, |
|
"logps/rejected": -1163.01953125, |
|
"loss": 0.0692, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.021548744291067123, |
|
"rewards/margins": 0.24685168266296387, |
|
"rewards/rejected": -0.2684004306793213, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.996724385978142e-06, |
|
"logits/chosen": -2.028895139694214, |
|
"logits/rejected": -1.2840532064437866, |
|
"logps/chosen": -478.65533447265625, |
|
"logps/rejected": -1112.1922607421875, |
|
"loss": 0.0923, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.00986658688634634, |
|
"rewards/margins": 0.23762169480323792, |
|
"rewards/rejected": -0.24748826026916504, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995425365260585e-06, |
|
"logits/chosen": -1.9320755004882812, |
|
"logits/rejected": -1.3463362455368042, |
|
"logps/chosen": -468.8946838378906, |
|
"logps/rejected": -1079.24951171875, |
|
"loss": 0.0844, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.05752667784690857, |
|
"rewards/margins": 0.25381767749786377, |
|
"rewards/rejected": -0.31134432554244995, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993910125649561e-06, |
|
"logits/chosen": -1.8947498798370361, |
|
"logits/rejected": -1.4494032859802246, |
|
"logps/chosen": -612.285400390625, |
|
"logps/rejected": -1175.610595703125, |
|
"loss": 0.0961, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.20282471179962158, |
|
"rewards/margins": 0.24255582690238953, |
|
"rewards/rejected": -0.4453805387020111, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992178798434684e-06, |
|
"logits/chosen": -1.8909610509872437, |
|
"logits/rejected": -1.2547855377197266, |
|
"logps/chosen": -703.7716064453125, |
|
"logps/rejected": -1249.4072265625, |
|
"loss": 0.0749, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18841485679149628, |
|
"rewards/margins": 0.24599456787109375, |
|
"rewards/rejected": -0.4344094693660736, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990231533628719e-06, |
|
"logits/chosen": -1.9798517227172852, |
|
"logits/rejected": -1.4687498807907104, |
|
"logps/chosen": -451.41192626953125, |
|
"logps/rejected": -1110.53466796875, |
|
"loss": 0.0835, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.05847315117716789, |
|
"rewards/margins": 0.2581016421318054, |
|
"rewards/rejected": -0.3165748119354248, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988068499954578e-06, |
|
"logits/chosen": -1.9682140350341797, |
|
"logits/rejected": -1.161273717880249, |
|
"logps/chosen": -534.8321533203125, |
|
"logps/rejected": -1069.1895751953125, |
|
"loss": 0.0861, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.08595879375934601, |
|
"rewards/margins": 0.2487233430147171, |
|
"rewards/rejected": -0.3346821367740631, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985689884830711e-06, |
|
"logits/chosen": -1.800855040550232, |
|
"logits/rejected": -1.0105341672897339, |
|
"logps/chosen": -645.858154296875, |
|
"logps/rejected": -1335.704833984375, |
|
"loss": 0.06, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17263874411582947, |
|
"rewards/margins": 0.28426748514175415, |
|
"rewards/rejected": -0.4569062292575836, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.983095894354858e-06, |
|
"logits/chosen": -1.845642328262329, |
|
"logits/rejected": -1.124245285987854, |
|
"logps/chosen": -684.5682373046875, |
|
"logps/rejected": -1378.302490234375, |
|
"loss": 0.0689, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19359903037548065, |
|
"rewards/margins": 0.3165056109428406, |
|
"rewards/rejected": -0.5101046562194824, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980286753286196e-06, |
|
"logits/chosen": -1.7543941736221313, |
|
"logits/rejected": -1.139478087425232, |
|
"logps/chosen": -591.703857421875, |
|
"logps/rejected": -1196.9752197265625, |
|
"loss": 0.0752, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17276380956172943, |
|
"rewards/margins": 0.274630606174469, |
|
"rewards/rejected": -0.44739437103271484, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.97726270502586e-06, |
|
"logits/chosen": -1.7981364727020264, |
|
"logits/rejected": -1.2439225912094116, |
|
"logps/chosen": -622.7135620117188, |
|
"logps/rejected": -1289.652587890625, |
|
"loss": 0.0642, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16895990073680878, |
|
"rewards/margins": 0.27986788749694824, |
|
"rewards/rejected": -0.4488278329372406, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.974024011595864e-06, |
|
"logits/chosen": -1.7756052017211914, |
|
"logits/rejected": -1.273600697517395, |
|
"logps/chosen": -779.8721923828125, |
|
"logps/rejected": -1319.716064453125, |
|
"loss": 0.0889, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19681531190872192, |
|
"rewards/margins": 0.24762988090515137, |
|
"rewards/rejected": -0.4444451928138733, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970570953616383e-06, |
|
"logits/chosen": -1.7580140829086304, |
|
"logits/rejected": -1.2909038066864014, |
|
"logps/chosen": -611.9743041992188, |
|
"logps/rejected": -1292.6343994140625, |
|
"loss": 0.0655, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14458617568016052, |
|
"rewards/margins": 0.3040325939655304, |
|
"rewards/rejected": -0.44861873984336853, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.966903830281449e-06, |
|
"logits/chosen": -2.1057114601135254, |
|
"logits/rejected": -1.096806287765503, |
|
"logps/chosen": -525.5113525390625, |
|
"logps/rejected": -1102.981689453125, |
|
"loss": 0.0727, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09377844631671906, |
|
"rewards/margins": 0.2827639877796173, |
|
"rewards/rejected": -0.3765423893928528, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9630229593330226e-06, |
|
"logits/chosen": -1.8463417291641235, |
|
"logits/rejected": -1.2035672664642334, |
|
"logps/chosen": -658.9556884765625, |
|
"logps/rejected": -1261.522705078125, |
|
"loss": 0.0586, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1989162266254425, |
|
"rewards/margins": 0.25805556774139404, |
|
"rewards/rejected": -0.45697179436683655, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.958928677033465e-06, |
|
"logits/chosen": -1.9065221548080444, |
|
"logits/rejected": -1.3213990926742554, |
|
"logps/chosen": -802.2817993164062, |
|
"logps/rejected": -1280.823486328125, |
|
"loss": 0.1215, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2689030170440674, |
|
"rewards/margins": 0.23530542850494385, |
|
"rewards/rejected": -0.5042084455490112, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.954621338136399e-06, |
|
"logits/chosen": -1.796940803527832, |
|
"logits/rejected": -1.1448333263397217, |
|
"logps/chosen": -800.59619140625, |
|
"logps/rejected": -1366.584716796875, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.21505789458751678, |
|
"rewards/margins": 0.30636119842529297, |
|
"rewards/rejected": -0.5214190483093262, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.95010131585597e-06, |
|
"logits/chosen": -1.6272817850112915, |
|
"logits/rejected": -0.8004865646362305, |
|
"logps/chosen": -684.5340576171875, |
|
"logps/rejected": -1158.3621826171875, |
|
"loss": 0.1052, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.14372439682483673, |
|
"rewards/margins": 0.24541731178760529, |
|
"rewards/rejected": -0.389141708612442, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": -1.791548490524292, |
|
"logits/rejected": -1.2827670574188232, |
|
"logps/chosen": -447.36956787109375, |
|
"logps/rejected": -1077.491943359375, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08956549316644669, |
|
"rewards/margins": 0.26021477580070496, |
|
"rewards/rejected": -0.34978026151657104, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.940424806108619e-06, |
|
"logits/chosen": -1.989205002784729, |
|
"logits/rejected": -1.3596338033676147, |
|
"logps/chosen": -683.2862548828125, |
|
"logps/rejected": -1159.7490234375, |
|
"loss": 0.1118, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.177694171667099, |
|
"rewards/margins": 0.19292449951171875, |
|
"rewards/rejected": -0.37061864137649536, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.935269157073597e-06, |
|
"logits/chosen": -1.8252586126327515, |
|
"logits/rejected": -1.4199360609054565, |
|
"logps/chosen": -499.2095642089844, |
|
"logps/rejected": -1207.395263671875, |
|
"loss": 0.0594, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09857722371816635, |
|
"rewards/margins": 0.3041486144065857, |
|
"rewards/rejected": -0.40272584557533264, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9299025014463665e-06, |
|
"logits/chosen": -1.646740198135376, |
|
"logits/rejected": -0.9128702878952026, |
|
"logps/chosen": -546.8827514648438, |
|
"logps/rejected": -1260.8980712890625, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10494896024465561, |
|
"rewards/margins": 0.3301486670970917, |
|
"rewards/rejected": -0.4350976347923279, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924325304226745e-06, |
|
"logits/chosen": -1.8604828119277954, |
|
"logits/rejected": -1.0761983394622803, |
|
"logps/chosen": -693.2246704101562, |
|
"logps/rejected": -1271.934326171875, |
|
"loss": 0.0518, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19305351376533508, |
|
"rewards/margins": 0.2916993200778961, |
|
"rewards/rejected": -0.4847528040409088, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.91853804865716e-06, |
|
"logits/chosen": -1.38680100440979, |
|
"logits/rejected": -0.9548083543777466, |
|
"logps/chosen": -604.3922729492188, |
|
"logps/rejected": -1208.889404296875, |
|
"loss": 0.09, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18794074654579163, |
|
"rewards/margins": 0.24451354146003723, |
|
"rewards/rejected": -0.43245425820350647, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.912541236180779e-06, |
|
"logits/chosen": -1.663114309310913, |
|
"logits/rejected": -1.3176881074905396, |
|
"logps/chosen": -644.1502685546875, |
|
"logps/rejected": -1255.1209716796875, |
|
"loss": 0.107, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18079988658428192, |
|
"rewards/margins": 0.23088447749614716, |
|
"rewards/rejected": -0.4116843640804291, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9063353863980565e-06, |
|
"logits/chosen": -1.651368498802185, |
|
"logits/rejected": -1.0333479642868042, |
|
"logps/chosen": -622.9571533203125, |
|
"logps/rejected": -1196.442138671875, |
|
"loss": 0.1069, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13679654896259308, |
|
"rewards/margins": 0.2643422484397888, |
|
"rewards/rejected": -0.4011387825012207, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.899921037021719e-06, |
|
"logits/chosen": -1.8946233987808228, |
|
"logits/rejected": -1.4026496410369873, |
|
"logps/chosen": -608.56005859375, |
|
"logps/rejected": -1102.85107421875, |
|
"loss": 0.1145, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18631377816200256, |
|
"rewards/margins": 0.20509126782417297, |
|
"rewards/rejected": -0.39140504598617554, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.893298743830168e-06, |
|
"logits/chosen": -1.664089560508728, |
|
"logits/rejected": -1.1638383865356445, |
|
"logps/chosen": -681.1607055664062, |
|
"logps/rejected": -1325.0721435546875, |
|
"loss": 0.0936, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.213485985994339, |
|
"rewards/margins": 0.2793218493461609, |
|
"rewards/rejected": -0.4928078055381775, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.88646908061933e-06, |
|
"logits/chosen": -1.7145121097564697, |
|
"logits/rejected": -1.0264990329742432, |
|
"logps/chosen": -640.181396484375, |
|
"logps/rejected": -1224.787841796875, |
|
"loss": 0.0784, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20638792216777802, |
|
"rewards/margins": 0.2602555751800537, |
|
"rewards/rejected": -0.46664348244667053, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.879432639152935e-06, |
|
"logits/chosen": -1.5630238056182861, |
|
"logits/rejected": -0.8563889265060425, |
|
"logps/chosen": -688.0330810546875, |
|
"logps/rejected": -1333.184326171875, |
|
"loss": 0.087, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20561043918132782, |
|
"rewards/margins": 0.285735547542572, |
|
"rewards/rejected": -0.4913460314273834, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8721900291112415e-06, |
|
"logits/chosen": -1.7675155401229858, |
|
"logits/rejected": -1.3769733905792236, |
|
"logps/chosen": -606.588134765625, |
|
"logps/rejected": -1178.3841552734375, |
|
"loss": 0.0998, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10641028732061386, |
|
"rewards/margins": 0.26885437965393066, |
|
"rewards/rejected": -0.3752647042274475, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.864741878038218e-06, |
|
"logits/chosen": -1.5133328437805176, |
|
"logits/rejected": -1.1089714765548706, |
|
"logps/chosen": -560.0144653320312, |
|
"logps/rejected": -1235.3402099609375, |
|
"loss": 0.085, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12044046074151993, |
|
"rewards/margins": 0.26784801483154297, |
|
"rewards/rejected": -0.3882884979248047, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.857088831287158e-06, |
|
"logits/chosen": -1.8833305835723877, |
|
"logits/rejected": -1.1213592290878296, |
|
"logps/chosen": -614.7459716796875, |
|
"logps/rejected": -1231.1275634765625, |
|
"loss": 0.0612, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.14084835350513458, |
|
"rewards/margins": 0.25714007019996643, |
|
"rewards/rejected": -0.3979884088039398, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -1.6487013101577759, |
|
"logits/rejected": -1.0574976205825806, |
|
"logps/chosen": -638.1517333984375, |
|
"logps/rejected": -1221.0208740234375, |
|
"loss": 0.0773, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15978939831256866, |
|
"rewards/margins": 0.2666565477848053, |
|
"rewards/rejected": -0.4264459013938904, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.841170720873723e-06, |
|
"logits/chosen": -1.6006218194961548, |
|
"logits/rejected": -1.0765124559402466, |
|
"logps/chosen": -648.8009643554688, |
|
"logps/rejected": -1193.5694580078125, |
|
"loss": 0.0903, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20380504429340363, |
|
"rewards/margins": 0.2502138018608093, |
|
"rewards/rejected": -0.45401889085769653, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.832907036453647e-06, |
|
"logits/chosen": -1.6672321557998657, |
|
"logits/rejected": -1.2181921005249023, |
|
"logps/chosen": -757.1592407226562, |
|
"logps/rejected": -1409.3095703125, |
|
"loss": 0.0809, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.24005849659442902, |
|
"rewards/margins": 0.2847335636615753, |
|
"rewards/rejected": -0.5247920155525208, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits/chosen": -1.3765209913253784, |
|
"logits/rejected": -1.0029934644699097, |
|
"logps/chosen": -498.0054626464844, |
|
"logps/rejected": -1238.66943359375, |
|
"loss": 0.0667, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15500691533088684, |
|
"rewards/margins": 0.3080350160598755, |
|
"rewards/rejected": -0.4630419611930847, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.815773989205165e-06, |
|
"logits/chosen": -1.7891244888305664, |
|
"logits/rejected": -1.2491934299468994, |
|
"logps/chosen": -556.8863525390625, |
|
"logps/rejected": -1332.5364990234375, |
|
"loss": 0.0617, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13201333582401276, |
|
"rewards/margins": 0.3241254687309265, |
|
"rewards/rejected": -0.4561387896537781, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.806906110888606e-06, |
|
"logits/chosen": -1.6729551553726196, |
|
"logits/rejected": -1.187744379043579, |
|
"logps/chosen": -529.7684936523438, |
|
"logps/rejected": -1193.1453857421875, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1251310557126999, |
|
"rewards/margins": 0.2839392423629761, |
|
"rewards/rejected": -0.4090702533721924, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7978383481380865e-06, |
|
"logits/chosen": -1.6085048913955688, |
|
"logits/rejected": -1.2299137115478516, |
|
"logps/chosen": -579.7472534179688, |
|
"logps/rejected": -1075.484130859375, |
|
"loss": 0.0994, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.15328553318977356, |
|
"rewards/margins": 0.2056526243686676, |
|
"rewards/rejected": -0.3589381277561188, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.788571486639948e-06, |
|
"logits/chosen": -1.4437693357467651, |
|
"logits/rejected": -0.9531752467155457, |
|
"logps/chosen": -721.705078125, |
|
"logps/rejected": -1422.717529296875, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19354596734046936, |
|
"rewards/margins": 0.2650890648365021, |
|
"rewards/rejected": -0.45863503217697144, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.779106329331665e-06, |
|
"logits/chosen": -1.7850446701049805, |
|
"logits/rejected": -1.1975328922271729, |
|
"logps/chosen": -639.4754028320312, |
|
"logps/rejected": -1183.6280517578125, |
|
"loss": 0.1136, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18970054388046265, |
|
"rewards/margins": 0.22527900338172913, |
|
"rewards/rejected": -0.4149795472621918, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.769443696332272e-06, |
|
"logits/chosen": -1.6451295614242554, |
|
"logits/rejected": -0.9056906700134277, |
|
"logps/chosen": -704.8148193359375, |
|
"logps/rejected": -1420.8524169921875, |
|
"loss": 0.0473, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17515410482883453, |
|
"rewards/margins": 0.3173523545265198, |
|
"rewards/rejected": -0.4925064444541931, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.759584424871302e-06, |
|
"logits/chosen": -1.5979700088500977, |
|
"logits/rejected": -1.1091766357421875, |
|
"logps/chosen": -686.29052734375, |
|
"logps/rejected": -1212.19580078125, |
|
"loss": 0.1128, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.15797309577465057, |
|
"rewards/margins": 0.21523161232471466, |
|
"rewards/rejected": -0.3732047379016876, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.749529369216246e-06, |
|
"logits/chosen": -1.7736440896987915, |
|
"logits/rejected": -1.172586441040039, |
|
"logps/chosen": -660.5985107421875, |
|
"logps/rejected": -1275.8603515625, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.12020470947027206, |
|
"rewards/margins": 0.2714688181877136, |
|
"rewards/rejected": -0.39167362451553345, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7392794005985324e-06, |
|
"logits/chosen": -1.8436565399169922, |
|
"logits/rejected": -1.348578929901123, |
|
"logps/chosen": -471.02703857421875, |
|
"logps/rejected": -1074.3665771484375, |
|
"loss": 0.0923, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.061982929706573486, |
|
"rewards/margins": 0.23231768608093262, |
|
"rewards/rejected": -0.2943006157875061, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7288354071380415e-06, |
|
"logits/chosen": -1.6346375942230225, |
|
"logits/rejected": -1.053733468055725, |
|
"logps/chosen": -518.6777954101562, |
|
"logps/rejected": -1128.2366943359375, |
|
"loss": 0.0928, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11276010423898697, |
|
"rewards/margins": 0.24085621535778046, |
|
"rewards/rejected": -0.35361629724502563, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7181982937661485e-06, |
|
"logits/chosen": -1.5796483755111694, |
|
"logits/rejected": -0.9548759460449219, |
|
"logps/chosen": -694.7703857421875, |
|
"logps/rejected": -1379.165771484375, |
|
"loss": 0.0694, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.21314816176891327, |
|
"rewards/margins": 0.28819718956947327, |
|
"rewards/rejected": -0.5013453364372253, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.707368982147318e-06, |
|
"logits/chosen": -1.6242942810058594, |
|
"logits/rejected": -1.100235104560852, |
|
"logps/chosen": -656.1912841796875, |
|
"logps/rejected": -1320.0205078125, |
|
"loss": 0.0818, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.20513398945331573, |
|
"rewards/margins": 0.2904512286186218, |
|
"rewards/rejected": -0.49558526277542114, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.696348410599244e-06, |
|
"logits/chosen": -1.6266180276870728, |
|
"logits/rejected": -1.112475872039795, |
|
"logps/chosen": -615.5531005859375, |
|
"logps/rejected": -1176.023681640625, |
|
"loss": 0.1074, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.21413663029670715, |
|
"rewards/margins": 0.24315333366394043, |
|
"rewards/rejected": -0.45728999376296997, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.685137534011549e-06, |
|
"logits/chosen": -1.8083276748657227, |
|
"logits/rejected": -0.9349889755249023, |
|
"logps/chosen": -659.9114990234375, |
|
"logps/rejected": -1210.7021484375, |
|
"loss": 0.0749, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1867416799068451, |
|
"rewards/margins": 0.27049291133880615, |
|
"rewards/rejected": -0.45723456144332886, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.673737323763048e-06, |
|
"logits/chosen": -1.5211584568023682, |
|
"logits/rejected": -0.8713824152946472, |
|
"logps/chosen": -557.5753784179688, |
|
"logps/rejected": -1095.0653076171875, |
|
"loss": 0.1147, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1781322956085205, |
|
"rewards/margins": 0.23243267834186554, |
|
"rewards/rejected": -0.41056495904922485, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.662148767637578e-06, |
|
"logits/chosen": -1.6907579898834229, |
|
"logits/rejected": -0.92974454164505, |
|
"logps/chosen": -672.3154907226562, |
|
"logps/rejected": -1263.9849853515625, |
|
"loss": 0.0513, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.14491698145866394, |
|
"rewards/margins": 0.2988959848880768, |
|
"rewards/rejected": -0.44381293654441833, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.650372869738415e-06, |
|
"logits/chosen": -1.713449239730835, |
|
"logits/rejected": -1.046890139579773, |
|
"logps/chosen": -656.3117065429688, |
|
"logps/rejected": -1259.781982421875, |
|
"loss": 0.0727, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1550595909357071, |
|
"rewards/margins": 0.27939510345458984, |
|
"rewards/rejected": -0.43445467948913574, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638410650401267e-06, |
|
"logits/chosen": -1.6114223003387451, |
|
"logits/rejected": -0.8841923475265503, |
|
"logps/chosen": -619.8568725585938, |
|
"logps/rejected": -1200.001953125, |
|
"loss": 0.092, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18295657634735107, |
|
"rewards/margins": 0.27934443950653076, |
|
"rewards/rejected": -0.46230101585388184, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626263146105875e-06, |
|
"logits/chosen": -1.8829456567764282, |
|
"logits/rejected": -1.1874229907989502, |
|
"logps/chosen": -612.0319213867188, |
|
"logps/rejected": -1310.8118896484375, |
|
"loss": 0.0626, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15665313601493835, |
|
"rewards/margins": 0.32128262519836426, |
|
"rewards/rejected": -0.4779357314109802, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.613931409386196e-06, |
|
"logits/chosen": -1.7148971557617188, |
|
"logits/rejected": -1.1706236600875854, |
|
"logps/chosen": -651.0546875, |
|
"logps/rejected": -1184.420654296875, |
|
"loss": 0.0975, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.15911589562892914, |
|
"rewards/margins": 0.26245635747909546, |
|
"rewards/rejected": -0.4215722680091858, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.601416508739211e-06, |
|
"logits/chosen": -1.7349941730499268, |
|
"logits/rejected": -1.0320522785186768, |
|
"logps/chosen": -641.8563842773438, |
|
"logps/rejected": -1191.799072265625, |
|
"loss": 0.0864, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12740136682987213, |
|
"rewards/margins": 0.27434709668159485, |
|
"rewards/rejected": -0.4017484784126282, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.588719528532342e-06, |
|
"logits/chosen": -1.7186208963394165, |
|
"logits/rejected": -1.139203429222107, |
|
"logps/chosen": -608.3776245117188, |
|
"logps/rejected": -1194.766845703125, |
|
"loss": 0.0803, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12015128135681152, |
|
"rewards/margins": 0.28770390152931213, |
|
"rewards/rejected": -0.4078551232814789, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.575841568909494e-06, |
|
"logits/chosen": -1.9281619787216187, |
|
"logits/rejected": -1.021177887916565, |
|
"logps/chosen": -606.2479248046875, |
|
"logps/rejected": -1277.761474609375, |
|
"loss": 0.0629, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12076227366924286, |
|
"rewards/margins": 0.31744498014450073, |
|
"rewards/rejected": -0.4382072389125824, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562783745695738e-06, |
|
"logits/chosen": -1.772539734840393, |
|
"logits/rejected": -1.1384470462799072, |
|
"logps/chosen": -584.2857666015625, |
|
"logps/rejected": -1120.206787109375, |
|
"loss": 0.0924, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.10691050440073013, |
|
"rewards/margins": 0.25287091732025146, |
|
"rewards/rejected": -0.359781414270401, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.549547190300622e-06, |
|
"logits/chosen": -1.6101019382476807, |
|
"logits/rejected": -0.995293915271759, |
|
"logps/chosen": -663.1490478515625, |
|
"logps/rejected": -1269.482666015625, |
|
"loss": 0.0903, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19160196185112, |
|
"rewards/margins": 0.2655636966228485, |
|
"rewards/rejected": -0.4571656584739685, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536133049620143e-06, |
|
"logits/chosen": -1.6123756170272827, |
|
"logits/rejected": -0.8887365460395813, |
|
"logps/chosen": -645.6787719726562, |
|
"logps/rejected": -1266.5006103515625, |
|
"loss": 0.0808, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.13368944823741913, |
|
"rewards/margins": 0.28590840101242065, |
|
"rewards/rejected": -0.4195978045463562, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": -1.7597821950912476, |
|
"logits/rejected": -1.0739284753799438, |
|
"logps/chosen": -670.746337890625, |
|
"logps/rejected": -1303.915771484375, |
|
"loss": 0.0671, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15525345504283905, |
|
"rewards/margins": 0.28055456280708313, |
|
"rewards/rejected": -0.435808002948761, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.508776676821739e-06, |
|
"logits/chosen": -1.6723902225494385, |
|
"logits/rejected": -1.1365679502487183, |
|
"logps/chosen": -659.1893310546875, |
|
"logps/rejected": -1257.1162109375, |
|
"loss": 0.0961, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18045826256275177, |
|
"rewards/margins": 0.28194642066955566, |
|
"rewards/rejected": -0.46240463852882385, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.494836815027022e-06, |
|
"logits/chosen": -1.674168348312378, |
|
"logits/rejected": -0.8119763135910034, |
|
"logps/chosen": -675.2777099609375, |
|
"logps/rejected": -1269.997802734375, |
|
"loss": 0.1022, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2055848389863968, |
|
"rewards/margins": 0.2812694013118744, |
|
"rewards/rejected": -0.48685422539711, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4807241083879774e-06, |
|
"logits/chosen": -1.5376824140548706, |
|
"logits/rejected": -0.9127294421195984, |
|
"logps/chosen": -633.2178344726562, |
|
"logps/rejected": -1178.2454833984375, |
|
"loss": 0.0838, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18428584933280945, |
|
"rewards/margins": 0.24417026340961456, |
|
"rewards/rejected": -0.4284561276435852, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.466439779715696e-06, |
|
"logits/chosen": -1.4871512651443481, |
|
"logits/rejected": -0.8935056924819946, |
|
"logps/chosen": -606.7117309570312, |
|
"logps/rejected": -1183.592041015625, |
|
"loss": 0.0976, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1557384431362152, |
|
"rewards/margins": 0.2602311670780182, |
|
"rewards/rejected": -0.4159695506095886, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.451985066691649e-06, |
|
"logits/chosen": -1.5563310384750366, |
|
"logits/rejected": -1.0307669639587402, |
|
"logps/chosen": -558.4649047851562, |
|
"logps/rejected": -1085.895751953125, |
|
"loss": 0.0918, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1063220277428627, |
|
"rewards/margins": 0.26596084237098694, |
|
"rewards/rejected": -0.37228289246559143, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.437361221760449e-06, |
|
"logits/chosen": -1.692957878112793, |
|
"logits/rejected": -1.1781022548675537, |
|
"logps/chosen": -514.2180786132812, |
|
"logps/rejected": -1231.850341796875, |
|
"loss": 0.0484, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.07757623493671417, |
|
"rewards/margins": 0.30450260639190674, |
|
"rewards/rejected": -0.3820788860321045, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.422569512021332e-06, |
|
"logits/chosen": -1.6798826456069946, |
|
"logits/rejected": -1.1389153003692627, |
|
"logps/chosen": -585.5572509765625, |
|
"logps/rejected": -1191.422607421875, |
|
"loss": 0.083, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06024783104658127, |
|
"rewards/margins": 0.2825511693954468, |
|
"rewards/rejected": -0.34279894828796387, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.407611219118363e-06, |
|
"logits/chosen": -1.574668526649475, |
|
"logits/rejected": -1.1745796203613281, |
|
"logps/chosen": -573.3482666015625, |
|
"logps/rejected": -1237.1324462890625, |
|
"loss": 0.0851, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14945417642593384, |
|
"rewards/margins": 0.26941633224487305, |
|
"rewards/rejected": -0.4188705384731293, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3924876391293915e-06, |
|
"logits/chosen": -1.5106923580169678, |
|
"logits/rejected": -1.0384470224380493, |
|
"logps/chosen": -653.4393920898438, |
|
"logps/rejected": -1249.9561767578125, |
|
"loss": 0.0827, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.21140387654304504, |
|
"rewards/margins": 0.26517191529273987, |
|
"rewards/rejected": -0.4765757620334625, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.377200082453748e-06, |
|
"logits/chosen": -1.553951621055603, |
|
"logits/rejected": -1.1727259159088135, |
|
"logps/chosen": -584.9124145507812, |
|
"logps/rejected": -1274.189453125, |
|
"loss": 0.0708, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17560866475105286, |
|
"rewards/margins": 0.3020893931388855, |
|
"rewards/rejected": -0.47769802808761597, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.361749873698707e-06, |
|
"logits/chosen": -1.6106698513031006, |
|
"logits/rejected": -0.9710136651992798, |
|
"logps/chosen": -683.4093017578125, |
|
"logps/rejected": -1351.5721435546875, |
|
"loss": 0.0537, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1945325881242752, |
|
"rewards/margins": 0.3134706914424896, |
|
"rewards/rejected": -0.5080032348632812, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.346138351564711e-06, |
|
"logits/chosen": -1.4298365116119385, |
|
"logits/rejected": -0.9186006784439087, |
|
"logps/chosen": -629.0538940429688, |
|
"logps/rejected": -1190.7537841796875, |
|
"loss": 0.1065, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1838223785161972, |
|
"rewards/margins": 0.25950607657432556, |
|
"rewards/rejected": -0.44332846999168396, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.330366868729376e-06, |
|
"logits/chosen": -1.5331902503967285, |
|
"logits/rejected": -0.8202164769172668, |
|
"logps/chosen": -719.1092529296875, |
|
"logps/rejected": -1225.5712890625, |
|
"loss": 0.1239, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.23043549060821533, |
|
"rewards/margins": 0.24460101127624512, |
|
"rewards/rejected": -0.47503647208213806, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3144367917302964e-06, |
|
"logits/chosen": -1.5329627990722656, |
|
"logits/rejected": -1.0226593017578125, |
|
"logps/chosen": -558.7479248046875, |
|
"logps/rejected": -1248.3746337890625, |
|
"loss": 0.0732, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17867226898670197, |
|
"rewards/margins": 0.27305805683135986, |
|
"rewards/rejected": -0.451730340719223, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"logits/chosen": -1.4446375370025635, |
|
"logits/rejected": -1.2019789218902588, |
|
"logps/chosen": -629.9727172851562, |
|
"logps/rejected": -1310.1802978515625, |
|
"loss": 0.0701, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.19203418493270874, |
|
"rewards/margins": 0.26955386996269226, |
|
"rewards/rejected": -0.461588054895401, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2821063899795015e-06, |
|
"logits/chosen": -1.5882418155670166, |
|
"logits/rejected": -0.9619570970535278, |
|
"logps/chosen": -633.544921875, |
|
"logps/rejected": -1342.188720703125, |
|
"loss": 0.0702, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15442287921905518, |
|
"rewards/margins": 0.3408200740814209, |
|
"rewards/rejected": -0.49524298310279846, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.265708866531238e-06, |
|
"logits/chosen": -1.6361687183380127, |
|
"logits/rejected": -1.160875916481018, |
|
"logps/chosen": -604.972412109375, |
|
"logps/rejected": -1180.0421142578125, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15669772028923035, |
|
"rewards/margins": 0.2758365571498871, |
|
"rewards/rejected": -0.43253427743911743, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.249158351283414e-06, |
|
"logits/chosen": -1.4963274002075195, |
|
"logits/rejected": -1.0508579015731812, |
|
"logps/chosen": -670.7274169921875, |
|
"logps/rejected": -1422.38037109375, |
|
"loss": 0.0547, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.18853971362113953, |
|
"rewards/margins": 0.3481212556362152, |
|
"rewards/rejected": -0.5366610288619995, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.232456278273743e-06, |
|
"logits/chosen": -1.5074360370635986, |
|
"logits/rejected": -1.1942028999328613, |
|
"logps/chosen": -621.916015625, |
|
"logps/rejected": -1097.22705078125, |
|
"loss": 0.1119, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.20438826084136963, |
|
"rewards/margins": 0.19910171627998352, |
|
"rewards/rejected": -0.40348997712135315, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.215604094671835e-06, |
|
"logits/chosen": -1.7932571172714233, |
|
"logits/rejected": -1.156964659690857, |
|
"logps/chosen": -605.7149047851562, |
|
"logps/rejected": -1170.5318603515625, |
|
"loss": 0.0823, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13441742956638336, |
|
"rewards/margins": 0.26707369089126587, |
|
"rewards/rejected": -0.4014911651611328, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.198603260653792e-06, |
|
"logits/chosen": -1.5639320611953735, |
|
"logits/rejected": -1.011678695678711, |
|
"logps/chosen": -570.6861572265625, |
|
"logps/rejected": -1016.75390625, |
|
"loss": 0.1338, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.09839320182800293, |
|
"rewards/margins": 0.16072291135787964, |
|
"rewards/rejected": -0.25911611318588257, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.181455249275701e-06, |
|
"logits/chosen": -1.3490540981292725, |
|
"logits/rejected": -0.8460888862609863, |
|
"logps/chosen": -604.7997436523438, |
|
"logps/rejected": -1211.717529296875, |
|
"loss": 0.0948, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16084125638008118, |
|
"rewards/margins": 0.24433521926403046, |
|
"rewards/rejected": -0.40517646074295044, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1641615463459926e-06, |
|
"logits/chosen": -1.4548208713531494, |
|
"logits/rejected": -0.9700274467468262, |
|
"logps/chosen": -662.6041259765625, |
|
"logps/rejected": -1517.4892578125, |
|
"loss": 0.043, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.16144345700740814, |
|
"rewards/margins": 0.34403008222579956, |
|
"rewards/rejected": -0.5054734945297241, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.146723650296701e-06, |
|
"logits/chosen": -1.5642893314361572, |
|
"logits/rejected": -1.012499213218689, |
|
"logps/chosen": -522.3748168945312, |
|
"logps/rejected": -1114.373779296875, |
|
"loss": 0.0893, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.11404868215322495, |
|
"rewards/margins": 0.27995288372039795, |
|
"rewards/rejected": -0.3940015733242035, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.129143072053639e-06, |
|
"logits/chosen": -1.8208191394805908, |
|
"logits/rejected": -1.0344860553741455, |
|
"logps/chosen": -717.9315185546875, |
|
"logps/rejected": -1288.3831787109375, |
|
"loss": 0.0814, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1353410929441452, |
|
"rewards/margins": 0.2894688546657562, |
|
"rewards/rejected": -0.42480993270874023, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.111421334905468e-06, |
|
"logits/chosen": -1.67376708984375, |
|
"logits/rejected": -0.7968643307685852, |
|
"logps/chosen": -653.73193359375, |
|
"logps/rejected": -1237.788330078125, |
|
"loss": 0.0799, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10021106898784637, |
|
"rewards/margins": 0.2965288758277893, |
|
"rewards/rejected": -0.3967399299144745, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.093559974371725e-06, |
|
"logits/chosen": -1.5515210628509521, |
|
"logits/rejected": -1.0218132734298706, |
|
"logps/chosen": -771.3948974609375, |
|
"logps/rejected": -1335.147705078125, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16722288727760315, |
|
"rewards/margins": 0.2642812132835388, |
|
"rewards/rejected": -0.4315040707588196, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.075560538069767e-06, |
|
"logits/chosen": -1.6814053058624268, |
|
"logits/rejected": -1.175160527229309, |
|
"logps/chosen": -561.907470703125, |
|
"logps/rejected": -1060.85498046875, |
|
"loss": 0.1004, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.06503216922283173, |
|
"rewards/margins": 0.25370270013809204, |
|
"rewards/rejected": -0.3187348544597626, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.05742458558068e-06, |
|
"logits/chosen": -1.8086637258529663, |
|
"logits/rejected": -1.3191715478897095, |
|
"logps/chosen": -529.0259399414062, |
|
"logps/rejected": -1079.9012451171875, |
|
"loss": 0.0936, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0950629860162735, |
|
"rewards/margins": 0.24811288714408875, |
|
"rewards/rejected": -0.34317582845687866, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.039153688314146e-06, |
|
"logits/chosen": -1.6986970901489258, |
|
"logits/rejected": -1.07126784324646, |
|
"logps/chosen": -537.1072387695312, |
|
"logps/rejected": -1259.658935546875, |
|
"loss": 0.0665, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07069256901741028, |
|
"rewards/margins": 0.353458434343338, |
|
"rewards/rejected": -0.4241510331630707, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020749429372286e-06, |
|
"logits/chosen": -1.6649013757705688, |
|
"logits/rejected": -1.1139470338821411, |
|
"logps/chosen": -582.5765991210938, |
|
"logps/rejected": -1291.3966064453125, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08450852334499359, |
|
"rewards/margins": 0.30345186591148376, |
|
"rewards/rejected": -0.38796037435531616, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.002213403412492e-06, |
|
"logits/chosen": -1.6457083225250244, |
|
"logits/rejected": -1.1551355123519897, |
|
"logps/chosen": -443.95770263671875, |
|
"logps/rejected": -1125.097412109375, |
|
"loss": 0.0942, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.04620728641748428, |
|
"rewards/margins": 0.29895836114883423, |
|
"rewards/rejected": -0.3451656699180603, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.983547216509254e-06, |
|
"logits/chosen": -1.8475234508514404, |
|
"logits/rejected": -0.9538863897323608, |
|
"logps/chosen": -532.26171875, |
|
"logps/rejected": -1173.6441650390625, |
|
"loss": 0.0721, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07093639671802521, |
|
"rewards/margins": 0.30083587765693665, |
|
"rewards/rejected": -0.37177228927612305, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.964752486015001e-06, |
|
"logits/chosen": -1.702235460281372, |
|
"logits/rejected": -1.0087345838546753, |
|
"logps/chosen": -570.5701904296875, |
|
"logps/rejected": -1237.5167236328125, |
|
"loss": 0.0554, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.10062988102436066, |
|
"rewards/margins": 0.29185742139816284, |
|
"rewards/rejected": -0.3924873471260071, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.945830840419966e-06, |
|
"logits/chosen": -1.8062200546264648, |
|
"logits/rejected": -1.2370173931121826, |
|
"logps/chosen": -563.0610961914062, |
|
"logps/rejected": -1202.4017333984375, |
|
"loss": 0.1014, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08197510987520218, |
|
"rewards/margins": 0.284410685300827, |
|
"rewards/rejected": -0.3663857579231262, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92678391921108e-06, |
|
"logits/chosen": -1.685450792312622, |
|
"logits/rejected": -1.268028974533081, |
|
"logps/chosen": -474.2938537597656, |
|
"logps/rejected": -1090.5147705078125, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0882890596985817, |
|
"rewards/margins": 0.26188915967941284, |
|
"rewards/rejected": -0.35017821192741394, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.907613372729916e-06, |
|
"logits/chosen": -1.5756769180297852, |
|
"logits/rejected": -1.1073580980300903, |
|
"logps/chosen": -508.64654541015625, |
|
"logps/rejected": -1192.768798828125, |
|
"loss": 0.0905, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11147113889455795, |
|
"rewards/margins": 0.2684895396232605, |
|
"rewards/rejected": -0.37996068596839905, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.888320862029699e-06, |
|
"logits/chosen": -1.970336675643921, |
|
"logits/rejected": -1.1683611869812012, |
|
"logps/chosen": -608.2377319335938, |
|
"logps/rejected": -1272.260498046875, |
|
"loss": 0.091, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08651524782180786, |
|
"rewards/margins": 0.3126547038555145, |
|
"rewards/rejected": -0.399169921875, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.868908058731376e-06, |
|
"logits/chosen": -1.7017894983291626, |
|
"logits/rejected": -0.9617505073547363, |
|
"logps/chosen": -684.0576782226562, |
|
"logps/rejected": -1245.509765625, |
|
"loss": 0.0996, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09601768106222153, |
|
"rewards/margins": 0.29448553919792175, |
|
"rewards/rejected": -0.3905032277107239, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.849376644878783e-06, |
|
"logits/chosen": -1.536409616470337, |
|
"logits/rejected": -1.0880365371704102, |
|
"logps/chosen": -533.0728149414062, |
|
"logps/rejected": -1243.9488525390625, |
|
"loss": 0.0928, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07541215419769287, |
|
"rewards/margins": 0.28547203540802, |
|
"rewards/rejected": -0.3608841896057129, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.829728312792895e-06, |
|
"logits/chosen": -1.8492475748062134, |
|
"logits/rejected": -1.256415605545044, |
|
"logps/chosen": -434.9085388183594, |
|
"logps/rejected": -1020.18798828125, |
|
"loss": 0.0619, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.04833076149225235, |
|
"rewards/margins": 0.2557259202003479, |
|
"rewards/rejected": -0.30405664443969727, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8099647649251984e-06, |
|
"logits/chosen": -1.5395265817642212, |
|
"logits/rejected": -1.087846040725708, |
|
"logps/chosen": -591.6812744140625, |
|
"logps/rejected": -1221.6868896484375, |
|
"loss": 0.0932, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.10671044886112213, |
|
"rewards/margins": 0.2588750422000885, |
|
"rewards/rejected": -0.36558544635772705, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.790087713710179e-06, |
|
"logits/chosen": -1.5241081714630127, |
|
"logits/rejected": -0.8990601301193237, |
|
"logps/chosen": -703.971923828125, |
|
"logps/rejected": -1318.2982177734375, |
|
"loss": 0.0708, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16658341884613037, |
|
"rewards/margins": 0.270012766122818, |
|
"rewards/rejected": -0.436596155166626, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.770098881416945e-06, |
|
"logits/chosen": -1.489585041999817, |
|
"logits/rejected": -1.3004378080368042, |
|
"logps/chosen": -654.8056030273438, |
|
"logps/rejected": -1286.8641357421875, |
|
"loss": 0.0956, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.15169434249401093, |
|
"rewards/margins": 0.25045520067214966, |
|
"rewards/rejected": -0.40214958786964417, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -1.6937404870986938, |
|
"logits/rejected": -1.0904152393341064, |
|
"logps/chosen": -521.1519775390625, |
|
"logps/rejected": -1150.9736328125, |
|
"loss": 0.0922, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09178796410560608, |
|
"rewards/margins": 0.2572135031223297, |
|
"rewards/rejected": -0.3490014672279358, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7297928109491765e-06, |
|
"logits/chosen": -1.5384725332260132, |
|
"logits/rejected": -1.1151206493377686, |
|
"logps/chosen": -500.30340576171875, |
|
"logps/rejected": -1171.871337890625, |
|
"loss": 0.0648, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09594957530498505, |
|
"rewards/margins": 0.31415387988090515, |
|
"rewards/rejected": -0.4101034700870514, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.7094790651387414e-06, |
|
"logits/chosen": -1.750771164894104, |
|
"logits/rejected": -1.0950592756271362, |
|
"logps/chosen": -557.678466796875, |
|
"logps/rejected": -1188.5867919921875, |
|
"loss": 0.0778, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.14062079787254333, |
|
"rewards/margins": 0.282297819852829, |
|
"rewards/rejected": -0.4229187071323395, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.689060522675689e-06, |
|
"logits/chosen": -1.5061167478561401, |
|
"logits/rejected": -1.0993683338165283, |
|
"logps/chosen": -719.0147705078125, |
|
"logps/rejected": -1315.07421875, |
|
"loss": 0.0885, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.22822144627571106, |
|
"rewards/margins": 0.24384041130542755, |
|
"rewards/rejected": -0.47206181287765503, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.668538952747236e-06, |
|
"logits/chosen": -1.7315492630004883, |
|
"logits/rejected": -1.098783254623413, |
|
"logps/chosen": -702.6656494140625, |
|
"logps/rejected": -1301.657958984375, |
|
"loss": 0.0741, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.16757100820541382, |
|
"rewards/margins": 0.2810649871826172, |
|
"rewards/rejected": -0.4486359655857086, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6479161334675294e-06, |
|
"logits/chosen": -1.6738449335098267, |
|
"logits/rejected": -1.0924396514892578, |
|
"logps/chosen": -584.9110107421875, |
|
"logps/rejected": -1123.946533203125, |
|
"loss": 0.0867, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09195319563150406, |
|
"rewards/margins": 0.2546834349632263, |
|
"rewards/rejected": -0.3466365933418274, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.627193851723577e-06, |
|
"logits/chosen": -1.5191009044647217, |
|
"logits/rejected": -1.2671594619750977, |
|
"logps/chosen": -644.867431640625, |
|
"logps/rejected": -1194.0931396484375, |
|
"loss": 0.1019, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.133761465549469, |
|
"rewards/margins": 0.24754054844379425, |
|
"rewards/rejected": -0.38130199909210205, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6063739030204226e-06, |
|
"logits/chosen": -1.7906509637832642, |
|
"logits/rejected": -1.3401691913604736, |
|
"logps/chosen": -475.7640075683594, |
|
"logps/rejected": -1105.63623046875, |
|
"loss": 0.0816, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.06556513905525208, |
|
"rewards/margins": 0.2934816777706146, |
|
"rewards/rejected": -0.3590467870235443, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5854580913255706e-06, |
|
"logits/chosen": -1.6345583200454712, |
|
"logits/rejected": -0.9686171412467957, |
|
"logps/chosen": -546.1632080078125, |
|
"logps/rejected": -1137.38232421875, |
|
"loss": 0.0774, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09245268255472183, |
|
"rewards/margins": 0.2836568355560303, |
|
"rewards/rejected": -0.3761095106601715, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.564448228912682e-06, |
|
"logits/chosen": -1.774083137512207, |
|
"logits/rejected": -1.0874212980270386, |
|
"logps/chosen": -627.0970458984375, |
|
"logps/rejected": -1292.3609619140625, |
|
"loss": 0.0548, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.14228971302509308, |
|
"rewards/margins": 0.29837566614151, |
|
"rewards/rejected": -0.4406653940677643, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543346136204545e-06, |
|
"logits/chosen": -1.492363691329956, |
|
"logits/rejected": -0.9753513336181641, |
|
"logps/chosen": -715.6846923828125, |
|
"logps/rejected": -1311.7313232421875, |
|
"loss": 0.1043, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.19200663268566132, |
|
"rewards/margins": 0.2355610430240631, |
|
"rewards/rejected": -0.427567720413208, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.522153641615345e-06, |
|
"logits/chosen": -1.6796743869781494, |
|
"logits/rejected": -1.0669097900390625, |
|
"logps/chosen": -651.3643798828125, |
|
"logps/rejected": -1211.4693603515625, |
|
"loss": 0.0636, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12537550926208496, |
|
"rewards/margins": 0.29044246673583984, |
|
"rewards/rejected": -0.4158180356025696, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5008725813922383e-06, |
|
"logits/chosen": -1.53739333152771, |
|
"logits/rejected": -1.009767770767212, |
|
"logps/chosen": -771.9571533203125, |
|
"logps/rejected": -1199.3785400390625, |
|
"loss": 0.0922, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17814821004867554, |
|
"rewards/margins": 0.24210956692695618, |
|
"rewards/rejected": -0.4202577471733093, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4795047994562463e-06, |
|
"logits/chosen": -1.4524638652801514, |
|
"logits/rejected": -1.051928162574768, |
|
"logps/chosen": -690.6948852539062, |
|
"logps/rejected": -1226.161865234375, |
|
"loss": 0.0876, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18693287670612335, |
|
"rewards/margins": 0.25937145948410034, |
|
"rewards/rejected": -0.4463043212890625, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.458052147242494e-06, |
|
"logits/chosen": -1.8221120834350586, |
|
"logits/rejected": -1.1258487701416016, |
|
"logps/chosen": -630.0181274414062, |
|
"logps/rejected": -1044.5679931640625, |
|
"loss": 0.1046, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09280923753976822, |
|
"rewards/margins": 0.23150058090686798, |
|
"rewards/rejected": -0.3243098556995392, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.436516483539781e-06, |
|
"logits/chosen": -1.8554388284683228, |
|
"logits/rejected": -1.3803541660308838, |
|
"logps/chosen": -633.9047241210938, |
|
"logps/rejected": -1229.6116943359375, |
|
"loss": 0.0778, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13664793968200684, |
|
"rewards/margins": 0.2661344110965729, |
|
"rewards/rejected": -0.4027823805809021, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4148996743295305e-06, |
|
"logits/chosen": -1.9048486948013306, |
|
"logits/rejected": -1.058411955833435, |
|
"logps/chosen": -720.1047973632812, |
|
"logps/rejected": -1232.9039306640625, |
|
"loss": 0.0732, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12227736413478851, |
|
"rewards/margins": 0.2787812352180481, |
|
"rewards/rejected": -0.4010585844516754, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3932035926241103e-06, |
|
"logits/chosen": -1.6380399465560913, |
|
"logits/rejected": -1.262209177017212, |
|
"logps/chosen": -482.85662841796875, |
|
"logps/rejected": -1295.7718505859375, |
|
"loss": 0.0462, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08021150529384613, |
|
"rewards/margins": 0.3560211658477783, |
|
"rewards/rejected": -0.43623265624046326, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3714301183045382e-06, |
|
"logits/chosen": -1.595003366470337, |
|
"logits/rejected": -1.2377384901046753, |
|
"logps/chosen": -547.7657470703125, |
|
"logps/rejected": -1363.28125, |
|
"loss": 0.0617, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12761729955673218, |
|
"rewards/margins": 0.29965031147003174, |
|
"rewards/rejected": -0.4272676110267639, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.349581137957604e-06, |
|
"logits/chosen": -1.8540780544281006, |
|
"logits/rejected": -1.0956978797912598, |
|
"logps/chosen": -570.6678466796875, |
|
"logps/rejected": -1214.4659423828125, |
|
"loss": 0.0458, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08135120570659637, |
|
"rewards/margins": 0.32362592220306396, |
|
"rewards/rejected": -0.40497714281082153, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3276585447123957e-06, |
|
"logits/chosen": -1.4734654426574707, |
|
"logits/rejected": -1.1689575910568237, |
|
"logps/chosen": -471.4612731933594, |
|
"logps/rejected": -1102.623046875, |
|
"loss": 0.0775, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06378956139087677, |
|
"rewards/margins": 0.2605707049369812, |
|
"rewards/rejected": -0.32436028122901917, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3056642380762783e-06, |
|
"logits/chosen": -1.6254606246948242, |
|
"logits/rejected": -0.9264505505561829, |
|
"logps/chosen": -598.3116455078125, |
|
"logps/rejected": -1273.1226806640625, |
|
"loss": 0.0645, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.10875244438648224, |
|
"rewards/margins": 0.3382043242454529, |
|
"rewards/rejected": -0.4469567835330963, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2836001237702993e-06, |
|
"logits/chosen": -1.7050098180770874, |
|
"logits/rejected": -1.1736676692962646, |
|
"logps/chosen": -605.455078125, |
|
"logps/rejected": -1361.8411865234375, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09871726483106613, |
|
"rewards/margins": 0.318486750125885, |
|
"rewards/rejected": -0.41720399260520935, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2614681135640696e-06, |
|
"logits/chosen": -1.666524887084961, |
|
"logits/rejected": -1.0706026554107666, |
|
"logps/chosen": -633.6300048828125, |
|
"logps/rejected": -1287.6302490234375, |
|
"loss": 0.0617, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08346323668956757, |
|
"rewards/margins": 0.29316291213035583, |
|
"rewards/rejected": -0.376626193523407, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2392701251101172e-06, |
|
"logits/chosen": -1.6395193338394165, |
|
"logits/rejected": -1.1502461433410645, |
|
"logps/chosen": -596.9259033203125, |
|
"logps/rejected": -1209.7242431640625, |
|
"loss": 0.0985, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.13397231698036194, |
|
"rewards/margins": 0.2410469949245453, |
|
"rewards/rejected": -0.3750193417072296, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.217008081777726e-06, |
|
"logits/chosen": -1.6936429738998413, |
|
"logits/rejected": -1.1541160345077515, |
|
"logps/chosen": -687.9554443359375, |
|
"logps/rejected": -1373.260986328125, |
|
"loss": 0.0775, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19113728404045105, |
|
"rewards/margins": 0.3089086413383484, |
|
"rewards/rejected": -0.500045895576477, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1946839124862873e-06, |
|
"logits/chosen": -1.499732255935669, |
|
"logits/rejected": -1.073246955871582, |
|
"logps/chosen": -632.1316528320312, |
|
"logps/rejected": -1340.867919921875, |
|
"loss": 0.055, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.14432887732982635, |
|
"rewards/margins": 0.3110666275024414, |
|
"rewards/rejected": -0.45539551973342896, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1722995515381644e-06, |
|
"logits/chosen": -1.6296335458755493, |
|
"logits/rejected": -0.9428736567497253, |
|
"logps/chosen": -613.3892822265625, |
|
"logps/rejected": -1241.071533203125, |
|
"loss": 0.0922, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1409432291984558, |
|
"rewards/margins": 0.28491806983947754, |
|
"rewards/rejected": -0.42586126923561096, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.149856938451094e-06, |
|
"logits/chosen": -1.9275051355361938, |
|
"logits/rejected": -0.9073979258537292, |
|
"logps/chosen": -685.4188842773438, |
|
"logps/rejected": -1194.86767578125, |
|
"loss": 0.0869, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15064950287342072, |
|
"rewards/margins": 0.25674349069595337, |
|
"rewards/rejected": -0.4073929786682129, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.127358017790132e-06, |
|
"logits/chosen": -1.5247657299041748, |
|
"logits/rejected": -0.8251350522041321, |
|
"logps/chosen": -658.7485961914062, |
|
"logps/rejected": -1302.989013671875, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15536119043827057, |
|
"rewards/margins": 0.306736022233963, |
|
"rewards/rejected": -0.46209725737571716, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"logits/chosen": -1.5092096328735352, |
|
"logits/rejected": -0.8953585624694824, |
|
"logps/chosen": -563.54638671875, |
|
"logps/rejected": -1277.382080078125, |
|
"loss": 0.0635, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12220227718353271, |
|
"rewards/margins": 0.32350245118141174, |
|
"rewards/rejected": -0.44570469856262207, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.082199056232015e-06, |
|
"logits/chosen": -1.555259108543396, |
|
"logits/rejected": -1.2513010501861572, |
|
"logps/chosen": -576.5824584960938, |
|
"logps/rejected": -1193.6871337890625, |
|
"loss": 0.1014, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14716719090938568, |
|
"rewards/margins": 0.2701808214187622, |
|
"rewards/rejected": -0.4173479974269867, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.059542928183079e-06, |
|
"logits/chosen": -1.2370166778564453, |
|
"logits/rejected": -0.862767219543457, |
|
"logps/chosen": -630.3663330078125, |
|
"logps/rejected": -1336.5428466796875, |
|
"loss": 0.0544, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1622113287448883, |
|
"rewards/margins": 0.32105860114097595, |
|
"rewards/rejected": -0.48326998949050903, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0368383179176584e-06, |
|
"logits/chosen": -1.385969877243042, |
|
"logits/rejected": -1.0176479816436768, |
|
"logps/chosen": -719.9691772460938, |
|
"logps/rejected": -1240.3895263671875, |
|
"loss": 0.1078, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19016364216804504, |
|
"rewards/margins": 0.2406831681728363, |
|
"rewards/rejected": -0.43084684014320374, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0140871927018466e-06, |
|
"logits/chosen": -1.6181665658950806, |
|
"logits/rejected": -1.2087422609329224, |
|
"logps/chosen": -704.0010986328125, |
|
"logps/rejected": -1382.859619140625, |
|
"loss": 0.0552, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19687633216381073, |
|
"rewards/margins": 0.28999894857406616, |
|
"rewards/rejected": -0.4868752360343933, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9912915238320755e-06, |
|
"logits/chosen": -1.4043359756469727, |
|
"logits/rejected": -1.1318188905715942, |
|
"logps/chosen": -555.2379760742188, |
|
"logps/rejected": -1354.341796875, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.16941991448402405, |
|
"rewards/margins": 0.33636412024497986, |
|
"rewards/rejected": -0.5057840347290039, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9684532864643123e-06, |
|
"logits/chosen": -1.7377849817276, |
|
"logits/rejected": -1.16031014919281, |
|
"logps/chosen": -689.5167236328125, |
|
"logps/rejected": -1164.914794921875, |
|
"loss": 0.1023, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.21689483523368835, |
|
"rewards/margins": 0.22367532551288605, |
|
"rewards/rejected": -0.440570205450058, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.945574459442917e-06, |
|
"logits/chosen": -1.6468284130096436, |
|
"logits/rejected": -1.190332055091858, |
|
"logps/chosen": -639.2093505859375, |
|
"logps/rejected": -1291.997802734375, |
|
"loss": 0.0592, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1794385313987732, |
|
"rewards/margins": 0.31004798412323, |
|
"rewards/rejected": -0.4894865155220032, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.922657025129185e-06, |
|
"logits/chosen": -1.5556018352508545, |
|
"logits/rejected": -1.0828845500946045, |
|
"logps/chosen": -718.0322875976562, |
|
"logps/rejected": -1318.2115478515625, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.27457764744758606, |
|
"rewards/margins": 0.23540663719177246, |
|
"rewards/rejected": -0.5099843144416809, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8997029692295875e-06, |
|
"logits/chosen": -1.66329026222229, |
|
"logits/rejected": -1.0540707111358643, |
|
"logps/chosen": -647.1304931640625, |
|
"logps/rejected": -1394.345703125, |
|
"loss": 0.0644, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16953504085540771, |
|
"rewards/margins": 0.3324953019618988, |
|
"rewards/rejected": -0.5020303726196289, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.876714280623708e-06, |
|
"logits/chosen": -1.3675628900527954, |
|
"logits/rejected": -0.910226047039032, |
|
"logps/chosen": -656.5228881835938, |
|
"logps/rejected": -1325.244140625, |
|
"loss": 0.0585, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18668699264526367, |
|
"rewards/margins": 0.3124103248119354, |
|
"rewards/rejected": -0.4990972876548767, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8536929511919227e-06, |
|
"logits/chosen": -1.5113346576690674, |
|
"logits/rejected": -0.917544960975647, |
|
"logps/chosen": -621.9835815429688, |
|
"logps/rejected": -1159.2508544921875, |
|
"loss": 0.0598, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17902755737304688, |
|
"rewards/margins": 0.25234436988830566, |
|
"rewards/rejected": -0.43137192726135254, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8306409756428067e-06, |
|
"logits/chosen": -1.56842041015625, |
|
"logits/rejected": -0.9856363534927368, |
|
"logps/chosen": -565.5784301757812, |
|
"logps/rejected": -1276.6402587890625, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.158840149641037, |
|
"rewards/margins": 0.29064539074897766, |
|
"rewards/rejected": -0.4494854807853699, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.807560351340302e-06, |
|
"logits/chosen": -1.5603129863739014, |
|
"logits/rejected": -1.0442321300506592, |
|
"logps/chosen": -594.1895751953125, |
|
"logps/rejected": -1224.7371826171875, |
|
"loss": 0.0977, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17820130288600922, |
|
"rewards/margins": 0.2835688889026642, |
|
"rewards/rejected": -0.4617701470851898, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7844530781306544e-06, |
|
"logits/chosen": -1.436647653579712, |
|
"logits/rejected": -0.8475536108016968, |
|
"logps/chosen": -703.9013061523438, |
|
"logps/rejected": -1465.6171875, |
|
"loss": 0.0612, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.22263050079345703, |
|
"rewards/margins": 0.3488808274269104, |
|
"rewards/rejected": -0.5715113282203674, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.761321158169134e-06, |
|
"logits/chosen": -1.6537196636199951, |
|
"logits/rejected": -1.1116868257522583, |
|
"logps/chosen": -717.2203369140625, |
|
"logps/rejected": -1459.016357421875, |
|
"loss": 0.0578, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19066976010799408, |
|
"rewards/margins": 0.35232046246528625, |
|
"rewards/rejected": -0.5429901480674744, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.738166595746554e-06, |
|
"logits/chosen": -1.709814429283142, |
|
"logits/rejected": -0.9134047627449036, |
|
"logps/chosen": -705.0501708984375, |
|
"logps/rejected": -1441.9495849609375, |
|
"loss": 0.0426, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.172596737742424, |
|
"rewards/margins": 0.34574776887893677, |
|
"rewards/rejected": -0.518344521522522, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.7149913971156105e-06, |
|
"logits/chosen": -1.6743762493133545, |
|
"logits/rejected": -1.134342908859253, |
|
"logps/chosen": -682.0380859375, |
|
"logps/rejected": -1288.8563232421875, |
|
"loss": 0.0783, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21863889694213867, |
|
"rewards/margins": 0.30050721764564514, |
|
"rewards/rejected": -0.5191460847854614, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6917975703170466e-06, |
|
"logits/chosen": -1.384412407875061, |
|
"logits/rejected": -0.9328360557556152, |
|
"logps/chosen": -756.0421752929688, |
|
"logps/rejected": -1321.971923828125, |
|
"loss": 0.0844, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2494826316833496, |
|
"rewards/margins": 0.2922648787498474, |
|
"rewards/rejected": -0.541747510433197, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.668587125005663e-06, |
|
"logits/chosen": -1.573412299156189, |
|
"logits/rejected": -0.9849382638931274, |
|
"logps/chosen": -653.5593872070312, |
|
"logps/rejected": -1219.503173828125, |
|
"loss": 0.0674, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.22859041392803192, |
|
"rewards/margins": 0.2729692757129669, |
|
"rewards/rejected": -0.5015596747398376, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6453620722761897e-06, |
|
"logits/chosen": -1.6217581033706665, |
|
"logits/rejected": -0.829363226890564, |
|
"logps/chosen": -687.14794921875, |
|
"logps/rejected": -1367.03515625, |
|
"loss": 0.0515, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.2579112648963928, |
|
"rewards/margins": 0.3186108469963074, |
|
"rewards/rejected": -0.5765220522880554, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6221244244890336e-06, |
|
"logits/chosen": -1.4822559356689453, |
|
"logits/rejected": -0.8854954838752747, |
|
"logps/chosen": -803.3583984375, |
|
"logps/rejected": -1373.9749755859375, |
|
"loss": 0.0823, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2859230637550354, |
|
"rewards/margins": 0.2797744870185852, |
|
"rewards/rejected": -0.5656975507736206, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5988761950959133e-06, |
|
"logits/chosen": -1.405379056930542, |
|
"logits/rejected": -0.980889618396759, |
|
"logps/chosen": -653.2147216796875, |
|
"logps/rejected": -1412.9344482421875, |
|
"loss": 0.0489, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19988027215003967, |
|
"rewards/margins": 0.3394158184528351, |
|
"rewards/rejected": -0.5392960906028748, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.575619398465402e-06, |
|
"logits/chosen": -1.5412838459014893, |
|
"logits/rejected": -0.9163180589675903, |
|
"logps/chosen": -539.6043090820312, |
|
"logps/rejected": -1107.312255859375, |
|
"loss": 0.0771, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1628035455942154, |
|
"rewards/margins": 0.2565108835697174, |
|
"rewards/rejected": -0.419314444065094, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5523560497083927e-06, |
|
"logits/chosen": -1.482742428779602, |
|
"logits/rejected": -1.0331344604492188, |
|
"logps/chosen": -751.7644653320312, |
|
"logps/rejected": -1281.9117431640625, |
|
"loss": 0.0847, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.21723918616771698, |
|
"rewards/margins": 0.26914697885513306, |
|
"rewards/rejected": -0.48638615012168884, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5290881645034932e-06, |
|
"logits/chosen": -1.6871871948242188, |
|
"logits/rejected": -0.9969019889831543, |
|
"logps/chosen": -712.149169921875, |
|
"logps/rejected": -1383.6920166015625, |
|
"loss": 0.0813, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2281813621520996, |
|
"rewards/margins": 0.3213092088699341, |
|
"rewards/rejected": -0.5494905710220337, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5058177589223766e-06, |
|
"logits/chosen": -1.4727680683135986, |
|
"logits/rejected": -1.048156976699829, |
|
"logps/chosen": -657.0037231445312, |
|
"logps/rejected": -1363.59814453125, |
|
"loss": 0.0745, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2256421595811844, |
|
"rewards/margins": 0.31263765692710876, |
|
"rewards/rejected": -0.5382798314094543, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.482546849255096e-06, |
|
"logits/chosen": -1.5077242851257324, |
|
"logits/rejected": -0.8984912633895874, |
|
"logps/chosen": -722.7063598632812, |
|
"logps/rejected": -1281.8148193359375, |
|
"loss": 0.0741, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.23482950031757355, |
|
"rewards/margins": 0.291878879070282, |
|
"rewards/rejected": -0.5267083644866943, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4592774518353858e-06, |
|
"logits/chosen": -1.6423757076263428, |
|
"logits/rejected": -0.9732748866081238, |
|
"logps/chosen": -625.0535888671875, |
|
"logps/rejected": -1348.394287109375, |
|
"loss": 0.0562, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19351795315742493, |
|
"rewards/margins": 0.3293379247188568, |
|
"rewards/rejected": -0.5228558778762817, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.436011582865945e-06, |
|
"logits/chosen": -1.7477773427963257, |
|
"logits/rejected": -1.0844796895980835, |
|
"logps/chosen": -786.804443359375, |
|
"logps/rejected": -1510.250732421875, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.24655666947364807, |
|
"rewards/margins": 0.34261250495910645, |
|
"rewards/rejected": -0.5891691446304321, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"logits/chosen": -1.1730453968048096, |
|
"logits/rejected": -1.0297441482543945, |
|
"logps/chosen": -610.2826538085938, |
|
"logps/rejected": -1334.4935302734375, |
|
"loss": 0.075, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.21944193542003632, |
|
"rewards/margins": 0.28680866956710815, |
|
"rewards/rejected": -0.5062506198883057, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3894984933853734e-06, |
|
"logits/chosen": -1.6909167766571045, |
|
"logits/rejected": -1.2207863330841064, |
|
"logps/chosen": -736.7841796875, |
|
"logps/rejected": -1436.5059814453125, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2114332616329193, |
|
"rewards/margins": 0.30715304613113403, |
|
"rewards/rejected": -0.518586277961731, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.366255303052377e-06, |
|
"logits/chosen": -1.7539796829223633, |
|
"logits/rejected": -1.0083894729614258, |
|
"logps/chosen": -702.0449829101562, |
|
"logps/rejected": -1368.424072265625, |
|
"loss": 0.0588, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17488941550254822, |
|
"rewards/margins": 0.3086225390434265, |
|
"rewards/rejected": -0.4835119843482971, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits/chosen": -1.783517837524414, |
|
"logits/rejected": -0.9859122037887573, |
|
"logps/chosen": -631.6397705078125, |
|
"logps/rejected": -1418.53857421875, |
|
"loss": 0.0386, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.13998395204544067, |
|
"rewards/margins": 0.3510037064552307, |
|
"rewards/rejected": -0.490987628698349, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.319805700686257e-06, |
|
"logits/chosen": -1.5874156951904297, |
|
"logits/rejected": -1.0402486324310303, |
|
"logps/chosen": -647.2991943359375, |
|
"logps/rejected": -1264.6134033203125, |
|
"loss": 0.0663, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15432874858379364, |
|
"rewards/margins": 0.29131144285202026, |
|
"rewards/rejected": -0.4456402361392975, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.296603313330355e-06, |
|
"logits/chosen": -1.6740115880966187, |
|
"logits/rejected": -1.049090027809143, |
|
"logps/chosen": -650.9720458984375, |
|
"logps/rejected": -1069.4693603515625, |
|
"loss": 0.1228, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1620035022497177, |
|
"rewards/margins": 0.2206643521785736, |
|
"rewards/rejected": -0.3826678991317749, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2734185495055503e-06, |
|
"logits/chosen": -1.4526993036270142, |
|
"logits/rejected": -1.0033073425292969, |
|
"logps/chosen": -464.20257568359375, |
|
"logps/rejected": -1249.659423828125, |
|
"loss": 0.0672, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11725373566150665, |
|
"rewards/margins": 0.32836082577705383, |
|
"rewards/rejected": -0.4456145763397217, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.250253418081373e-06, |
|
"logits/chosen": -1.6637146472930908, |
|
"logits/rejected": -1.0717463493347168, |
|
"logps/chosen": -662.2470703125, |
|
"logps/rejected": -1223.6536865234375, |
|
"loss": 0.0843, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1466962993144989, |
|
"rewards/margins": 0.2738208770751953, |
|
"rewards/rejected": -0.4205172061920166, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.22710992622628e-06, |
|
"logits/chosen": -1.7047134637832642, |
|
"logits/rejected": -0.7803869843482971, |
|
"logps/chosen": -681.6057739257812, |
|
"logps/rejected": -1309.5865478515625, |
|
"loss": 0.057, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15952114760875702, |
|
"rewards/margins": 0.309850811958313, |
|
"rewards/rejected": -0.4693719744682312, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2039900792337477e-06, |
|
"logits/chosen": -1.5044130086898804, |
|
"logits/rejected": -0.787899374961853, |
|
"logps/chosen": -632.465576171875, |
|
"logps/rejected": -1316.4517822265625, |
|
"loss": 0.0515, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.18237130343914032, |
|
"rewards/margins": 0.32328280806541443, |
|
"rewards/rejected": -0.5056540369987488, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1808958803485134e-06, |
|
"logits/chosen": -1.6702648401260376, |
|
"logits/rejected": -0.990136444568634, |
|
"logps/chosen": -455.564208984375, |
|
"logps/rejected": -1168.5498046875, |
|
"loss": 0.0599, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12168209254741669, |
|
"rewards/margins": 0.3131280541419983, |
|
"rewards/rejected": -0.434810072183609, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.157829330593008e-06, |
|
"logits/chosen": -1.6868867874145508, |
|
"logits/rejected": -0.9523127675056458, |
|
"logps/chosen": -733.1408081054688, |
|
"logps/rejected": -1337.63818359375, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.21179482340812683, |
|
"rewards/margins": 0.30666384100914, |
|
"rewards/rejected": -0.5184586644172668, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.134792428593971e-06, |
|
"logits/chosen": -1.6914653778076172, |
|
"logits/rejected": -1.2282737493515015, |
|
"logps/chosen": -614.1090087890625, |
|
"logps/rejected": -1088.0286865234375, |
|
"loss": 0.1059, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1738576740026474, |
|
"rewards/margins": 0.21695072948932648, |
|
"rewards/rejected": -0.3908084034919739, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1117871704092818e-06, |
|
"logits/chosen": -1.6489229202270508, |
|
"logits/rejected": -0.9123425483703613, |
|
"logps/chosen": -679.4093017578125, |
|
"logps/rejected": -1336.1690673828125, |
|
"loss": 0.0813, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1528438925743103, |
|
"rewards/margins": 0.3031119108200073, |
|
"rewards/rejected": -0.4559558033943176, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0888155493550027e-06, |
|
"logits/chosen": -1.658093810081482, |
|
"logits/rejected": -1.2635515928268433, |
|
"logps/chosen": -563.3775634765625, |
|
"logps/rejected": -1217.8135986328125, |
|
"loss": 0.0686, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12988874316215515, |
|
"rewards/margins": 0.31268182396888733, |
|
"rewards/rejected": -0.44257062673568726, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -1.5542079210281372, |
|
"logits/rejected": -0.9705120921134949, |
|
"logps/chosen": -641.7821655273438, |
|
"logps/rejected": -1335.944091796875, |
|
"loss": 0.0586, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1482844054698944, |
|
"rewards/margins": 0.3350493013858795, |
|
"rewards/rejected": -0.48333367705345154, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0429811771568468e-06, |
|
"logits/chosen": -1.6625282764434814, |
|
"logits/rejected": -0.9575098752975464, |
|
"logps/chosen": -673.6990966796875, |
|
"logps/rejected": -1288.328369140625, |
|
"loss": 0.0408, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1419149488210678, |
|
"rewards/margins": 0.2989000678062439, |
|
"rewards/rejected": -0.4408150315284729, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0201223973828917e-06, |
|
"logits/chosen": -1.7629448175430298, |
|
"logits/rejected": -1.1014915704727173, |
|
"logps/chosen": -609.2691650390625, |
|
"logps/rejected": -1323.014404296875, |
|
"loss": 0.0751, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12834301590919495, |
|
"rewards/margins": 0.31924059987068176, |
|
"rewards/rejected": -0.4475835859775543, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.997305197135089e-06, |
|
"logits/chosen": -1.5687427520751953, |
|
"logits/rejected": -0.9063301086425781, |
|
"logps/chosen": -674.9013671875, |
|
"logps/rejected": -1229.5550537109375, |
|
"loss": 0.0873, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12829624116420746, |
|
"rewards/margins": 0.24914589524269104, |
|
"rewards/rejected": -0.3774421215057373, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9745315534350157e-06, |
|
"logits/chosen": -1.6142327785491943, |
|
"logits/rejected": -1.2041738033294678, |
|
"logps/chosen": -527.4874877929688, |
|
"logps/rejected": -1276.542724609375, |
|
"loss": 0.0724, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11953765153884888, |
|
"rewards/margins": 0.3008989989757538, |
|
"rewards/rejected": -0.42043668031692505, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9518034395302413e-06, |
|
"logits/chosen": -1.584478497505188, |
|
"logits/rejected": -0.8746352195739746, |
|
"logps/chosen": -507.75128173828125, |
|
"logps/rejected": -1205.164794921875, |
|
"loss": 0.0654, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12713788449764252, |
|
"rewards/margins": 0.3278200626373291, |
|
"rewards/rejected": -0.4549580216407776, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9291228247233607e-06, |
|
"logits/chosen": -1.452108383178711, |
|
"logits/rejected": -0.9603986740112305, |
|
"logps/chosen": -750.004638671875, |
|
"logps/rejected": -1415.546875, |
|
"loss": 0.0642, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.22353100776672363, |
|
"rewards/margins": 0.3011419475078583, |
|
"rewards/rejected": -0.5246729254722595, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9064916742013515e-06, |
|
"logits/chosen": -1.5349475145339966, |
|
"logits/rejected": -0.8534983396530151, |
|
"logps/chosen": -691.6678466796875, |
|
"logps/rejected": -1337.88671875, |
|
"loss": 0.0564, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19090059399604797, |
|
"rewards/margins": 0.3062039911746979, |
|
"rewards/rejected": -0.49710458517074585, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.883911948865306e-06, |
|
"logits/chosen": -1.6528412103652954, |
|
"logits/rejected": -1.132430076599121, |
|
"logps/chosen": -626.3675537109375, |
|
"logps/rejected": -1352.5123291015625, |
|
"loss": 0.0702, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1741064339876175, |
|
"rewards/margins": 0.31939199566841125, |
|
"rewards/rejected": -0.49349841475486755, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8613856051605242e-06, |
|
"logits/chosen": -1.8713703155517578, |
|
"logits/rejected": -1.2219023704528809, |
|
"logps/chosen": -479.8798828125, |
|
"logps/rejected": -1113.4814453125, |
|
"loss": 0.0658, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10413724184036255, |
|
"rewards/margins": 0.322685569524765, |
|
"rewards/rejected": -0.42682284116744995, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8389145949069953e-06, |
|
"logits/chosen": -1.5313884019851685, |
|
"logits/rejected": -1.042482614517212, |
|
"logps/chosen": -540.1700439453125, |
|
"logps/rejected": -1220.364990234375, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.13704395294189453, |
|
"rewards/margins": 0.29186248779296875, |
|
"rewards/rejected": -0.4289064407348633, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.816500865130279e-06, |
|
"logits/chosen": -1.495516300201416, |
|
"logits/rejected": -1.306896448135376, |
|
"logps/chosen": -562.7477416992188, |
|
"logps/rejected": -1185.534912109375, |
|
"loss": 0.1051, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.14714586734771729, |
|
"rewards/margins": 0.26410627365112305, |
|
"rewards/rejected": -0.4112521708011627, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7941463578928088e-06, |
|
"logits/chosen": -1.6695563793182373, |
|
"logits/rejected": -0.9721006155014038, |
|
"logps/chosen": -601.9846801757812, |
|
"logps/rejected": -1367.9365234375, |
|
"loss": 0.0603, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1521584689617157, |
|
"rewards/margins": 0.35412856936454773, |
|
"rewards/rejected": -0.5062869787216187, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7718530101256115e-06, |
|
"logits/chosen": -1.6784439086914062, |
|
"logits/rejected": -1.0285909175872803, |
|
"logps/chosen": -690.5152587890625, |
|
"logps/rejected": -1287.234130859375, |
|
"loss": 0.083, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.17937234044075012, |
|
"rewards/margins": 0.27991387248039246, |
|
"rewards/rejected": -0.4592861533164978, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7496227534604859e-06, |
|
"logits/chosen": -1.7998254299163818, |
|
"logits/rejected": -1.1072345972061157, |
|
"logps/chosen": -648.5956420898438, |
|
"logps/rejected": -1205.2398681640625, |
|
"loss": 0.0657, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1724121868610382, |
|
"rewards/margins": 0.29253289103507996, |
|
"rewards/rejected": -0.46494507789611816, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -1.6273527145385742, |
|
"logits/rejected": -1.183037519454956, |
|
"logps/chosen": -620.6856689453125, |
|
"logps/rejected": -1215.266845703125, |
|
"loss": 0.0901, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17143599689006805, |
|
"rewards/margins": 0.2781030535697937, |
|
"rewards/rejected": -0.44953903555870056, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7053592124637557e-06, |
|
"logits/chosen": -1.8067405223846436, |
|
"logits/rejected": -1.2858269214630127, |
|
"logps/chosen": -659.517822265625, |
|
"logps/rejected": -1250.8367919921875, |
|
"loss": 0.0949, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17256858944892883, |
|
"rewards/margins": 0.2582642436027527, |
|
"rewards/rejected": -0.43083280324935913, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6833297633956647e-06, |
|
"logits/chosen": -1.443641185760498, |
|
"logits/rejected": -1.0768052339553833, |
|
"logps/chosen": -559.7144775390625, |
|
"logps/rejected": -1292.55859375, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.11608362197875977, |
|
"rewards/margins": 0.34168118238449097, |
|
"rewards/rejected": -0.45776480436325073, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.661371075624363e-06, |
|
"logits/chosen": -1.6469463109970093, |
|
"logits/rejected": -1.0461533069610596, |
|
"logps/chosen": -603.0146484375, |
|
"logps/rejected": -1207.7100830078125, |
|
"loss": 0.07, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.10989055782556534, |
|
"rewards/margins": 0.2882770299911499, |
|
"rewards/rejected": -0.39816758036613464, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6394850517846621e-06, |
|
"logits/chosen": -1.6739981174468994, |
|
"logits/rejected": -1.2458163499832153, |
|
"logps/chosen": -687.8931884765625, |
|
"logps/rejected": -1236.5269775390625, |
|
"loss": 0.0977, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.148878276348114, |
|
"rewards/margins": 0.2945893704891205, |
|
"rewards/rejected": -0.4434676170349121, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6176735882153284e-06, |
|
"logits/chosen": -1.6518714427947998, |
|
"logits/rejected": -0.9769365191459656, |
|
"logps/chosen": -496.71661376953125, |
|
"logps/rejected": -1090.7801513671875, |
|
"loss": 0.081, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.12074653804302216, |
|
"rewards/margins": 0.26993122696876526, |
|
"rewards/rejected": -0.3906777799129486, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5959385747947697e-06, |
|
"logits/chosen": -1.5575945377349854, |
|
"logits/rejected": -1.1600332260131836, |
|
"logps/chosen": -541.9180908203125, |
|
"logps/rejected": -1182.4320068359375, |
|
"loss": 0.0797, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16873383522033691, |
|
"rewards/margins": 0.2602773606777191, |
|
"rewards/rejected": -0.42901119589805603, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5742818947772875e-06, |
|
"logits/chosen": -1.6753427982330322, |
|
"logits/rejected": -0.9124045372009277, |
|
"logps/chosen": -678.1805419921875, |
|
"logps/rejected": -1311.1553955078125, |
|
"loss": 0.0578, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17256096005439758, |
|
"rewards/margins": 0.3086041510105133, |
|
"rewards/rejected": -0.4811651110649109, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.552705424629898e-06, |
|
"logits/chosen": -1.5884169340133667, |
|
"logits/rejected": -0.9173520803451538, |
|
"logps/chosen": -705.35009765625, |
|
"logps/rejected": -1284.0096435546875, |
|
"loss": 0.0683, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16716155409812927, |
|
"rewards/margins": 0.28985482454299927, |
|
"rewards/rejected": -0.4570164084434509, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5312110338697427e-06, |
|
"logits/chosen": -1.533384084701538, |
|
"logits/rejected": -1.1220409870147705, |
|
"logps/chosen": -613.6298828125, |
|
"logps/rejected": -1082.766357421875, |
|
"loss": 0.1039, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17001187801361084, |
|
"rewards/margins": 0.20055198669433594, |
|
"rewards/rejected": -0.3705638647079468, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.509800584902108e-06, |
|
"logits/chosen": -1.4955171346664429, |
|
"logits/rejected": -0.8390592336654663, |
|
"logps/chosen": -611.5145263671875, |
|
"logps/rejected": -1197.4530029296875, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12041016668081284, |
|
"rewards/margins": 0.2896363139152527, |
|
"rewards/rejected": -0.4100464880466461, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4884759328590476e-06, |
|
"logits/chosen": -1.914384126663208, |
|
"logits/rejected": -1.2168171405792236, |
|
"logps/chosen": -600.9666137695312, |
|
"logps/rejected": -1124.942626953125, |
|
"loss": 0.0888, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.11489248275756836, |
|
"rewards/margins": 0.26221710443496704, |
|
"rewards/rejected": -0.3771095871925354, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.467238925438646e-06, |
|
"logits/chosen": -1.738040566444397, |
|
"logits/rejected": -0.8857895731925964, |
|
"logps/chosen": -679.0386962890625, |
|
"logps/rejected": -1410.933837890625, |
|
"loss": 0.0558, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.16262464225292206, |
|
"rewards/margins": 0.339643657207489, |
|
"rewards/rejected": -0.5022683143615723, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.446091402744923e-06, |
|
"logits/chosen": -1.3981173038482666, |
|
"logits/rejected": -0.8543848991394043, |
|
"logps/chosen": -590.76513671875, |
|
"logps/rejected": -1206.71240234375, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15895266830921173, |
|
"rewards/margins": 0.2688734829425812, |
|
"rewards/rejected": -0.4278261661529541, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4250351971283937e-06, |
|
"logits/chosen": -1.6803276538848877, |
|
"logits/rejected": -1.0602971315383911, |
|
"logps/chosen": -679.5074462890625, |
|
"logps/rejected": -1229.011962890625, |
|
"loss": 0.0871, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19001373648643494, |
|
"rewards/margins": 0.26307451725006104, |
|
"rewards/rejected": -0.45308828353881836, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"logits/chosen": -1.5601686239242554, |
|
"logits/rejected": -0.9407947659492493, |
|
"logps/chosen": -616.7720336914062, |
|
"logps/rejected": -1353.0308837890625, |
|
"loss": 0.0501, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16183273494243622, |
|
"rewards/margins": 0.3334823250770569, |
|
"rewards/rejected": -0.4953150749206543, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3832040268095589e-06, |
|
"logits/chosen": -1.52444589138031, |
|
"logits/rejected": -0.7717920541763306, |
|
"logps/chosen": -666.0655517578125, |
|
"logps/rejected": -1330.985595703125, |
|
"loss": 0.0645, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1790360063314438, |
|
"rewards/margins": 0.3166579306125641, |
|
"rewards/rejected": -0.49569398164749146, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.362432686615316e-06, |
|
"logits/chosen": -1.511685848236084, |
|
"logits/rejected": -1.2630524635314941, |
|
"logps/chosen": -488.4208068847656, |
|
"logps/rejected": -1233.045166015625, |
|
"loss": 0.0768, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12492994964122772, |
|
"rewards/margins": 0.30061617493629456, |
|
"rewards/rejected": -0.4255460798740387, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3417599122003464e-06, |
|
"logits/chosen": -1.624969720840454, |
|
"logits/rejected": -0.8495733141899109, |
|
"logps/chosen": -633.3519897460938, |
|
"logps/rejected": -1253.824951171875, |
|
"loss": 0.0632, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18349340558052063, |
|
"rewards/margins": 0.2837851643562317, |
|
"rewards/rejected": -0.4672785699367523, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3211874947800747e-06, |
|
"logits/chosen": -1.4943301677703857, |
|
"logits/rejected": -1.0596059560775757, |
|
"logps/chosen": -581.8253784179688, |
|
"logps/rejected": -1226.0418701171875, |
|
"loss": 0.061, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1564798802137375, |
|
"rewards/margins": 0.30731362104415894, |
|
"rewards/rejected": -0.4637935161590576, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3007172168743854e-06, |
|
"logits/chosen": -1.4282827377319336, |
|
"logits/rejected": -1.0970618724822998, |
|
"logps/chosen": -622.9385375976562, |
|
"logps/rejected": -1321.9234619140625, |
|
"loss": 0.0718, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18767477571964264, |
|
"rewards/margins": 0.32170000672340393, |
|
"rewards/rejected": -0.5093748569488525, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.280350852153168e-06, |
|
"logits/chosen": -1.4260807037353516, |
|
"logits/rejected": -0.8716268539428711, |
|
"logps/chosen": -704.5125732421875, |
|
"logps/rejected": -1340.787353515625, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.22893400490283966, |
|
"rewards/margins": 0.28585508465766907, |
|
"rewards/rejected": -0.5147891044616699, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.260090165282645e-06, |
|
"logits/chosen": -1.5481798648834229, |
|
"logits/rejected": -1.048405647277832, |
|
"logps/chosen": -643.6202392578125, |
|
"logps/rejected": -1242.826416015625, |
|
"loss": 0.0806, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2178986817598343, |
|
"rewards/margins": 0.2719099521636963, |
|
"rewards/rejected": -0.4898086488246918, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2399369117724582e-06, |
|
"logits/chosen": -1.650472640991211, |
|
"logits/rejected": -0.9782799482345581, |
|
"logps/chosen": -707.5630493164062, |
|
"logps/rejected": -1298.345458984375, |
|
"loss": 0.0753, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16964152455329895, |
|
"rewards/margins": 0.2897980809211731, |
|
"rewards/rejected": -0.45943960547447205, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2198928378235717e-06, |
|
"logits/chosen": -1.6851444244384766, |
|
"logits/rejected": -0.92662113904953, |
|
"logps/chosen": -619.037841796875, |
|
"logps/rejected": -1287.4412841796875, |
|
"loss": 0.0522, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13626527786254883, |
|
"rewards/margins": 0.3253551125526428, |
|
"rewards/rejected": -0.46162039041519165, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1999596801769617e-06, |
|
"logits/chosen": -1.8352587223052979, |
|
"logits/rejected": -1.171582818031311, |
|
"logps/chosen": -616.7615966796875, |
|
"logps/rejected": -1335.4404296875, |
|
"loss": 0.049, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1257292926311493, |
|
"rewards/margins": 0.32309678196907043, |
|
"rewards/rejected": -0.4488261342048645, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1801391659631423e-06, |
|
"logits/chosen": -1.6399614810943604, |
|
"logits/rejected": -1.0923852920532227, |
|
"logps/chosen": -635.6760864257812, |
|
"logps/rejected": -1325.152099609375, |
|
"loss": 0.0653, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16147668659687042, |
|
"rewards/margins": 0.3237590193748474, |
|
"rewards/rejected": -0.48523569107055664, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.160433012552508e-06, |
|
"logits/chosen": -1.5931923389434814, |
|
"logits/rejected": -1.2261526584625244, |
|
"logps/chosen": -603.7941284179688, |
|
"logps/rejected": -1107.156982421875, |
|
"loss": 0.1027, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.14636698365211487, |
|
"rewards/margins": 0.22241589426994324, |
|
"rewards/rejected": -0.3687829077243805, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1408429274065418e-06, |
|
"logits/chosen": -1.481740117073059, |
|
"logits/rejected": -1.1304103136062622, |
|
"logps/chosen": -606.6870727539062, |
|
"logps/rejected": -1188.147216796875, |
|
"loss": 0.0763, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1330951750278473, |
|
"rewards/margins": 0.2732751667499542, |
|
"rewards/rejected": -0.4063703119754791, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1213706079298566e-06, |
|
"logits/chosen": -1.6204341650009155, |
|
"logits/rejected": -1.0541805028915405, |
|
"logps/chosen": -554.864990234375, |
|
"logps/rejected": -1167.746826171875, |
|
"loss": 0.0617, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11585495620965958, |
|
"rewards/margins": 0.30027204751968384, |
|
"rewards/rejected": -0.4161270260810852, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"logits/chosen": -1.6089589595794678, |
|
"logits/rejected": -1.0122666358947754, |
|
"logps/chosen": -685.2811889648438, |
|
"logps/rejected": -1211.408935546875, |
|
"loss": 0.0745, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16823996603488922, |
|
"rewards/margins": 0.2715032994747162, |
|
"rewards/rejected": -0.4397433400154114, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0827860044369226e-06, |
|
"logits/chosen": -1.7601515054702759, |
|
"logits/rejected": -1.007900595664978, |
|
"logps/chosen": -704.5645751953125, |
|
"logps/rejected": -1274.189453125, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17606690526008606, |
|
"rewards/margins": 0.30847105383872986, |
|
"rewards/rejected": -0.48453792929649353, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.06367706362636e-06, |
|
"logits/chosen": -1.5721882581710815, |
|
"logits/rejected": -1.0723769664764404, |
|
"logps/chosen": -665.5977783203125, |
|
"logps/rejected": -1278.2569580078125, |
|
"loss": 0.0799, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18703334033489227, |
|
"rewards/margins": 0.2840210795402527, |
|
"rewards/rejected": -0.47105446457862854, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0446925746067768e-06, |
|
"logits/chosen": -1.6236509084701538, |
|
"logits/rejected": -1.0335582494735718, |
|
"logps/chosen": -735.1541137695312, |
|
"logps/rejected": -1341.9544677734375, |
|
"loss": 0.0686, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1772700995206833, |
|
"rewards/margins": 0.31488290429115295, |
|
"rewards/rejected": -0.49215301871299744, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0258341823102418e-06, |
|
"logits/chosen": -1.6834526062011719, |
|
"logits/rejected": -1.0599212646484375, |
|
"logps/chosen": -650.5093994140625, |
|
"logps/rejected": -1288.187744140625, |
|
"loss": 0.0623, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14554272592067719, |
|
"rewards/margins": 0.3072082996368408, |
|
"rewards/rejected": -0.4527510106563568, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0071035207430352e-06, |
|
"logits/chosen": -1.7902591228485107, |
|
"logits/rejected": -1.289333462715149, |
|
"logps/chosen": -654.4462890625, |
|
"logps/rejected": -1320.8380126953125, |
|
"loss": 0.0673, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16027647256851196, |
|
"rewards/margins": 0.28841906785964966, |
|
"rewards/rejected": -0.44869551062583923, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.88502212844063e-07, |
|
"logits/chosen": -1.528241515159607, |
|
"logits/rejected": -1.140417218208313, |
|
"logps/chosen": -584.2600708007812, |
|
"logps/rejected": -1204.6785888671875, |
|
"loss": 0.0822, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12145284563302994, |
|
"rewards/margins": 0.2952510714530945, |
|
"rewards/rejected": -0.4167039394378662, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.700318703442437e-07, |
|
"logits/chosen": -1.6165825128555298, |
|
"logits/rejected": -1.2629040479660034, |
|
"logps/chosen": -692.536865234375, |
|
"logps/rejected": -1356.869384765625, |
|
"loss": 0.0861, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16812773048877716, |
|
"rewards/margins": 0.2822396159172058, |
|
"rewards/rejected": -0.4503673017024994, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.516940936268504e-07, |
|
"logits/chosen": -1.4886250495910645, |
|
"logits/rejected": -0.8492704629898071, |
|
"logps/chosen": -567.9114379882812, |
|
"logps/rejected": -1223.29931640625, |
|
"loss": 0.0907, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15036030113697052, |
|
"rewards/margins": 0.3028646409511566, |
|
"rewards/rejected": -0.45322495698928833, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.334904715888496e-07, |
|
"logits/chosen": -1.6458749771118164, |
|
"logits/rejected": -1.2635785341262817, |
|
"logps/chosen": -567.9539794921875, |
|
"logps/rejected": -1301.9921875, |
|
"loss": 0.0872, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1548917442560196, |
|
"rewards/margins": 0.31059738993644714, |
|
"rewards/rejected": -0.46548905968666077, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.154225815032242e-07, |
|
"logits/chosen": -1.5953037738800049, |
|
"logits/rejected": -1.0339276790618896, |
|
"logps/chosen": -624.7017822265625, |
|
"logps/rejected": -1236.400634765625, |
|
"loss": 0.0716, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.17240996658802032, |
|
"rewards/margins": 0.28697705268859863, |
|
"rewards/rejected": -0.45938700437545776, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.974919888823164e-07, |
|
"logits/chosen": -1.4576761722564697, |
|
"logits/rejected": -1.1351208686828613, |
|
"logps/chosen": -624.9636840820312, |
|
"logps/rejected": -1355.374755859375, |
|
"loss": 0.0514, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19172172248363495, |
|
"rewards/margins": 0.3108896315097809, |
|
"rewards/rejected": -0.5026113390922546, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.797002473421729e-07, |
|
"logits/chosen": -1.5604914426803589, |
|
"logits/rejected": -0.9569327235221863, |
|
"logps/chosen": -538.5881958007812, |
|
"logps/rejected": -1219.4466552734375, |
|
"loss": 0.0683, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15165778994560242, |
|
"rewards/margins": 0.28516024351119995, |
|
"rewards/rejected": -0.4368179738521576, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.620488984679378e-07, |
|
"logits/chosen": -1.852301836013794, |
|
"logits/rejected": -1.1759949922561646, |
|
"logps/chosen": -659.7407836914062, |
|
"logps/rejected": -1306.58203125, |
|
"loss": 0.066, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16961313784122467, |
|
"rewards/margins": 0.3043574392795563, |
|
"rewards/rejected": -0.47397056221961975, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.445394716802754e-07, |
|
"logits/chosen": -1.6400655508041382, |
|
"logits/rejected": -1.1373300552368164, |
|
"logps/chosen": -735.5645751953125, |
|
"logps/rejected": -1331.341064453125, |
|
"loss": 0.0866, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1878480315208435, |
|
"rewards/margins": 0.27262082695961, |
|
"rewards/rejected": -0.4604688286781311, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": -1.4932782649993896, |
|
"logits/rejected": -0.9225826263427734, |
|
"logps/chosen": -594.31982421875, |
|
"logps/rejected": -1214.7799072265625, |
|
"loss": 0.0686, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1466977298259735, |
|
"rewards/margins": 0.30178767442703247, |
|
"rewards/rejected": -0.4484853744506836, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.099524404308948e-07, |
|
"logits/chosen": -1.5658118724822998, |
|
"logits/rejected": -0.9562853574752808, |
|
"logps/chosen": -645.6381225585938, |
|
"logps/rejected": -1404.0450439453125, |
|
"loss": 0.0439, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.15565729141235352, |
|
"rewards/margins": 0.3485789895057678, |
|
"rewards/rejected": -0.5042362809181213, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.928778328007918e-07, |
|
"logits/chosen": -1.867717981338501, |
|
"logits/rejected": -0.9512616991996765, |
|
"logps/chosen": -674.2090454101562, |
|
"logps/rejected": -1350.9642333984375, |
|
"loss": 0.0573, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.13902226090431213, |
|
"rewards/margins": 0.3449219763278961, |
|
"rewards/rejected": -0.48394423723220825, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.759511406608255e-07, |
|
"logits/chosen": -1.5874018669128418, |
|
"logits/rejected": -1.1192893981933594, |
|
"logps/chosen": -586.0441284179688, |
|
"logps/rejected": -1302.760986328125, |
|
"loss": 0.0788, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13455680012702942, |
|
"rewards/margins": 0.2815057635307312, |
|
"rewards/rejected": -0.416062593460083, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.591738306429769e-07, |
|
"logits/chosen": -1.5606791973114014, |
|
"logits/rejected": -1.0470914840698242, |
|
"logps/chosen": -744.95263671875, |
|
"logps/rejected": -1285.8818359375, |
|
"loss": 0.0792, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18279020488262177, |
|
"rewards/margins": 0.2564167082309723, |
|
"rewards/rejected": -0.43920689821243286, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.425473564358457e-07, |
|
"logits/chosen": -1.6006968021392822, |
|
"logits/rejected": -1.0099936723709106, |
|
"logps/chosen": -720.3036499023438, |
|
"logps/rejected": -1374.542236328125, |
|
"loss": 0.0453, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.18247629702091217, |
|
"rewards/margins": 0.3243308663368225, |
|
"rewards/rejected": -0.5068072080612183, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.260731586586983e-07, |
|
"logits/chosen": -1.6800225973129272, |
|
"logits/rejected": -0.9249428510665894, |
|
"logps/chosen": -472.9820251464844, |
|
"logps/rejected": -1168.8466796875, |
|
"loss": 0.0482, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1155017763376236, |
|
"rewards/margins": 0.33621880412101746, |
|
"rewards/rejected": -0.45172062516212463, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.097526647366379e-07, |
|
"logits/chosen": -1.7830703258514404, |
|
"logits/rejected": -1.3318331241607666, |
|
"logps/chosen": -588.818359375, |
|
"logps/rejected": -1168.9097900390625, |
|
"loss": 0.0765, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1334531605243683, |
|
"rewards/margins": 0.28530946373939514, |
|
"rewards/rejected": -0.41876259446144104, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.935872887769299e-07, |
|
"logits/chosen": -1.593977928161621, |
|
"logits/rejected": -1.0323081016540527, |
|
"logps/chosen": -722.8409423828125, |
|
"logps/rejected": -1217.73681640625, |
|
"loss": 0.0888, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1773270219564438, |
|
"rewards/margins": 0.2691357731819153, |
|
"rewards/rejected": -0.4464627802371979, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.775784314464717e-07, |
|
"logits/chosen": -1.6172115802764893, |
|
"logits/rejected": -1.0769383907318115, |
|
"logps/chosen": -590.12939453125, |
|
"logps/rejected": -1277.718505859375, |
|
"loss": 0.0699, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1619381308555603, |
|
"rewards/margins": 0.30093225836753845, |
|
"rewards/rejected": -0.46287041902542114, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.617274798504286e-07, |
|
"logits/chosen": -1.748478651046753, |
|
"logits/rejected": -0.9003183245658875, |
|
"logps/chosen": -748.8468017578125, |
|
"logps/rejected": -1387.6318359375, |
|
"loss": 0.0683, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17095907032489777, |
|
"rewards/margins": 0.3251902461051941, |
|
"rewards/rejected": -0.49614930152893066, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.460358074120518e-07, |
|
"logits/chosen": -1.5600354671478271, |
|
"logits/rejected": -1.1617937088012695, |
|
"logps/chosen": -635.3890380859375, |
|
"logps/rejected": -1242.238525390625, |
|
"loss": 0.0767, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16152323782444, |
|
"rewards/margins": 0.26770225167274475, |
|
"rewards/rejected": -0.42922544479370117, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.305047737536707e-07, |
|
"logits/chosen": -1.6008501052856445, |
|
"logits/rejected": -1.1396461725234985, |
|
"logps/chosen": -604.7008666992188, |
|
"logps/rejected": -1217.7623291015625, |
|
"loss": 0.0703, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1483077108860016, |
|
"rewards/margins": 0.29934391379356384, |
|
"rewards/rejected": -0.44765162467956543, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.151357245788917e-07, |
|
"logits/chosen": -1.6701332330703735, |
|
"logits/rejected": -0.9330341219902039, |
|
"logps/chosen": -628.5372924804688, |
|
"logps/rejected": -1414.3253173828125, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.154875710606575, |
|
"rewards/margins": 0.35905200242996216, |
|
"rewards/rejected": -0.513927698135376, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.999299915559956e-07, |
|
"logits/chosen": -1.4873206615447998, |
|
"logits/rejected": -0.9139396548271179, |
|
"logps/chosen": -534.3379516601562, |
|
"logps/rejected": -1303.0247802734375, |
|
"loss": 0.0602, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1485694944858551, |
|
"rewards/margins": 0.3279011845588684, |
|
"rewards/rejected": -0.4764706492424011, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -1.5426976680755615, |
|
"logits/rejected": -1.0285556316375732, |
|
"logps/chosen": -576.2730102539062, |
|
"logps/rejected": -1104.698974609375, |
|
"loss": 0.0913, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1463908702135086, |
|
"rewards/margins": 0.2579698860645294, |
|
"rewards/rejected": -0.40436071157455444, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.700137297712749e-07, |
|
"logits/chosen": -1.5507534742355347, |
|
"logits/rejected": -1.1646429300308228, |
|
"logps/chosen": -678.5272216796875, |
|
"logps/rejected": -1307.9720458984375, |
|
"loss": 0.0558, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.19729594886302948, |
|
"rewards/margins": 0.2910037338733673, |
|
"rewards/rejected": -0.488299697637558, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.553057931370729e-07, |
|
"logits/chosen": -1.7067492008209229, |
|
"logits/rejected": -1.3142516613006592, |
|
"logps/chosen": -630.6482543945312, |
|
"logps/rejected": -1377.3018798828125, |
|
"loss": 0.053, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1730308085680008, |
|
"rewards/margins": 0.33525392413139343, |
|
"rewards/rejected": -0.5082847476005554, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.407663566854008e-07, |
|
"logits/chosen": -1.5402835607528687, |
|
"logits/rejected": -0.9547150731086731, |
|
"logps/chosen": -717.29833984375, |
|
"logps/rejected": -1315.943115234375, |
|
"loss": 0.0595, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.18772569298744202, |
|
"rewards/margins": 0.29532501101493835, |
|
"rewards/rejected": -0.48305076360702515, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.263966802018275e-07, |
|
"logits/chosen": -1.8520797491073608, |
|
"logits/rejected": -1.0401822328567505, |
|
"logps/chosen": -717.2581176757812, |
|
"logps/rejected": -1253.8289794921875, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16455858945846558, |
|
"rewards/margins": 0.29715651273727417, |
|
"rewards/rejected": -0.4617151618003845, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.121980087628802e-07, |
|
"logits/chosen": -1.7092409133911133, |
|
"logits/rejected": -1.2182010412216187, |
|
"logps/chosen": -586.7152099609375, |
|
"logps/rejected": -1214.601318359375, |
|
"loss": 0.051, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.137488454580307, |
|
"rewards/margins": 0.3142798840999603, |
|
"rewards/rejected": -0.45176833868026733, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.981715726281666e-07, |
|
"logits/chosen": -1.5101208686828613, |
|
"logits/rejected": -0.8277866244316101, |
|
"logps/chosen": -569.005615234375, |
|
"logps/rejected": -1231.107177734375, |
|
"loss": 0.0616, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13659389317035675, |
|
"rewards/margins": 0.32126671075820923, |
|
"rewards/rejected": -0.45786052942276, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.843185871337722e-07, |
|
"logits/chosen": -1.4731261730194092, |
|
"logits/rejected": -1.047473669052124, |
|
"logps/chosen": -693.8050537109375, |
|
"logps/rejected": -1432.60009765625, |
|
"loss": 0.0881, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19450101256370544, |
|
"rewards/margins": 0.3186071217060089, |
|
"rewards/rejected": -0.5131081342697144, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.706402525869633e-07, |
|
"logits/chosen": -1.3473070859909058, |
|
"logits/rejected": -1.2000024318695068, |
|
"logps/chosen": -589.5697021484375, |
|
"logps/rejected": -1317.3638916015625, |
|
"loss": 0.072, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16462837159633636, |
|
"rewards/margins": 0.2750544846057892, |
|
"rewards/rejected": -0.43968287110328674, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.5713775416217884e-07, |
|
"logits/chosen": -1.6565711498260498, |
|
"logits/rejected": -1.1259280443191528, |
|
"logps/chosen": -551.4010009765625, |
|
"logps/rejected": -1178.7850341796875, |
|
"loss": 0.0716, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13680130243301392, |
|
"rewards/margins": 0.30534160137176514, |
|
"rewards/rejected": -0.44214290380477905, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.438122617983442e-07, |
|
"logits/chosen": -1.5101524591445923, |
|
"logits/rejected": -0.8424856066703796, |
|
"logps/chosen": -591.2506103515625, |
|
"logps/rejected": -1273.046142578125, |
|
"loss": 0.0566, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.13511498272418976, |
|
"rewards/margins": 0.33536994457244873, |
|
"rewards/rejected": -0.4704849123954773, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.3066493009749853e-07, |
|
"logits/chosen": -1.7471688985824585, |
|
"logits/rejected": -1.1047070026397705, |
|
"logps/chosen": -767.1864013671875, |
|
"logps/rejected": -1380.8409423828125, |
|
"loss": 0.053, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.20385906100273132, |
|
"rewards/margins": 0.27676570415496826, |
|
"rewards/rejected": -0.4806247651576996, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1769689822475147e-07, |
|
"logits/chosen": -1.4887802600860596, |
|
"logits/rejected": -1.02260422706604, |
|
"logps/chosen": -691.14990234375, |
|
"logps/rejected": -1373.417724609375, |
|
"loss": 0.0965, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.20843443274497986, |
|
"rewards/margins": 0.2790736258029938, |
|
"rewards/rejected": -0.4875081181526184, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.049092898095816e-07, |
|
"logits/chosen": -1.5980995893478394, |
|
"logits/rejected": -1.023189902305603, |
|
"logps/chosen": -595.4110107421875, |
|
"logps/rejected": -1312.7945556640625, |
|
"loss": 0.0653, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16371554136276245, |
|
"rewards/margins": 0.2928038537502289, |
|
"rewards/rejected": -0.4565194249153137, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9230321284847856e-07, |
|
"logits/chosen": -1.5363506078720093, |
|
"logits/rejected": -1.0504465103149414, |
|
"logps/chosen": -489.7978515625, |
|
"logps/rejected": -1119.5257568359375, |
|
"loss": 0.0633, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11015214771032333, |
|
"rewards/margins": 0.2800517678260803, |
|
"rewards/rejected": -0.39020389318466187, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.798797596089351e-07, |
|
"logits/chosen": -1.6714694499969482, |
|
"logits/rejected": -1.125106692314148, |
|
"logps/chosen": -604.4114379882812, |
|
"logps/rejected": -1297.808349609375, |
|
"loss": 0.0516, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12939824163913727, |
|
"rewards/margins": 0.35474586486816406, |
|
"rewards/rejected": -0.4841441214084625, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6764000653481263e-07, |
|
"logits/chosen": -1.7083511352539062, |
|
"logits/rejected": -1.0139703750610352, |
|
"logps/chosen": -604.1871337890625, |
|
"logps/rejected": -1156.0162353515625, |
|
"loss": 0.0919, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1465051770210266, |
|
"rewards/margins": 0.27671653032302856, |
|
"rewards/rejected": -0.4232216775417328, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.555850141530659e-07, |
|
"logits/chosen": -1.7717567682266235, |
|
"logits/rejected": -1.1824371814727783, |
|
"logps/chosen": -566.564453125, |
|
"logps/rejected": -1101.685302734375, |
|
"loss": 0.0733, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.14669661223888397, |
|
"rewards/margins": 0.26966187357902527, |
|
"rewards/rejected": -0.41635847091674805, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4371582698185636e-07, |
|
"logits/chosen": -1.4069181680679321, |
|
"logits/rejected": -1.0164799690246582, |
|
"logps/chosen": -516.7618408203125, |
|
"logps/rejected": -1247.140380859375, |
|
"loss": 0.0714, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1548655778169632, |
|
"rewards/margins": 0.2954062819480896, |
|
"rewards/rejected": -0.4502718448638916, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3203347344004737e-07, |
|
"logits/chosen": -1.332287073135376, |
|
"logits/rejected": -1.1523934602737427, |
|
"logps/chosen": -562.6694946289062, |
|
"logps/rejected": -1197.0635986328125, |
|
"loss": 0.0921, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.19367225468158722, |
|
"rewards/margins": 0.24201972782611847, |
|
"rewards/rejected": -0.4356919825077057, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.2053896575809426e-07, |
|
"logits/chosen": -1.4262222051620483, |
|
"logits/rejected": -1.026829719543457, |
|
"logps/chosen": -569.9625854492188, |
|
"logps/rejected": -1329.806884765625, |
|
"loss": 0.0553, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.15987573564052582, |
|
"rewards/margins": 0.3234714865684509, |
|
"rewards/rejected": -0.48334717750549316, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.092332998903416e-07, |
|
"logits/chosen": -1.403305172920227, |
|
"logits/rejected": -1.228492021560669, |
|
"logps/chosen": -531.6251220703125, |
|
"logps/rejected": -1268.7862548828125, |
|
"loss": 0.0586, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14155253767967224, |
|
"rewards/margins": 0.29640236496925354, |
|
"rewards/rejected": -0.4379549026489258, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.981174554287239e-07, |
|
"logits/chosen": -1.4185243844985962, |
|
"logits/rejected": -1.2006936073303223, |
|
"logps/chosen": -635.041259765625, |
|
"logps/rejected": -1473.925537109375, |
|
"loss": 0.0452, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.16828233003616333, |
|
"rewards/margins": 0.3489801287651062, |
|
"rewards/rejected": -0.5172623991966248, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.871923955178918e-07, |
|
"logits/chosen": -1.4334993362426758, |
|
"logits/rejected": -0.9838508367538452, |
|
"logps/chosen": -607.728271484375, |
|
"logps/rejected": -1310.9564208984375, |
|
"loss": 0.0531, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.18386729061603546, |
|
"rewards/margins": 0.3068189322948456, |
|
"rewards/rejected": -0.49068623781204224, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.764590667717562e-07, |
|
"logits/chosen": -1.6019694805145264, |
|
"logits/rejected": -1.1419106721878052, |
|
"logps/chosen": -588.9683837890625, |
|
"logps/rejected": -1284.370849609375, |
|
"loss": 0.0586, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15091760456562042, |
|
"rewards/margins": 0.30733171105384827, |
|
"rewards/rejected": -0.4582493305206299, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6591839919146963e-07, |
|
"logits/chosen": -1.6173986196517944, |
|
"logits/rejected": -1.1335291862487793, |
|
"logps/chosen": -654.0939331054688, |
|
"logps/rejected": -1284.885986328125, |
|
"loss": 0.0572, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1872612088918686, |
|
"rewards/margins": 0.3091946244239807, |
|
"rewards/rejected": -0.4964558482170105, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.555713060848433e-07, |
|
"logits/chosen": -1.4421133995056152, |
|
"logits/rejected": -1.1747627258300781, |
|
"logps/chosen": -585.655517578125, |
|
"logps/rejected": -1284.943115234375, |
|
"loss": 0.0449, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1650674194097519, |
|
"rewards/margins": 0.31287819147109985, |
|
"rewards/rejected": -0.47794562578201294, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.454186839872158e-07, |
|
"logits/chosen": -1.7081034183502197, |
|
"logits/rejected": -0.94395911693573, |
|
"logps/chosen": -646.4754028320312, |
|
"logps/rejected": -1342.545166015625, |
|
"loss": 0.0709, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18191269040107727, |
|
"rewards/margins": 0.326556921005249, |
|
"rewards/rejected": -0.5084696412086487, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3546141258376786e-07, |
|
"logits/chosen": -1.3628816604614258, |
|
"logits/rejected": -0.9335969686508179, |
|
"logps/chosen": -603.8056640625, |
|
"logps/rejected": -1245.411376953125, |
|
"loss": 0.0594, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1723943054676056, |
|
"rewards/margins": 0.3125647008419037, |
|
"rewards/rejected": -0.48495903611183167, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.257003546333042e-07, |
|
"logits/chosen": -1.6881519556045532, |
|
"logits/rejected": -1.116393804550171, |
|
"logps/chosen": -680.4428100585938, |
|
"logps/rejected": -1417.369384765625, |
|
"loss": 0.0578, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21765998005867004, |
|
"rewards/margins": 0.3092319667339325, |
|
"rewards/rejected": -0.5268920063972473, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"logits/chosen": -1.7809503078460693, |
|
"logits/rejected": -1.235079050064087, |
|
"logps/chosen": -521.1368408203125, |
|
"logps/rejected": -1202.243408203125, |
|
"loss": 0.0625, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.13693402707576752, |
|
"rewards/margins": 0.2993922233581543, |
|
"rewards/rejected": -0.43632620573043823, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0677024504760752e-07, |
|
"logits/chosen": -1.5252535343170166, |
|
"logits/rejected": -1.2412452697753906, |
|
"logps/chosen": -526.3180541992188, |
|
"logps/rejected": -1315.9249267578125, |
|
"loss": 0.0633, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13880392909049988, |
|
"rewards/margins": 0.35248517990112305, |
|
"rewards/rejected": -0.4912891387939453, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9760283363267684e-07, |
|
"logits/chosen": -1.778957724571228, |
|
"logits/rejected": -1.0381158590316772, |
|
"logps/chosen": -641.0303955078125, |
|
"logps/rejected": -1229.374267578125, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.13701531291007996, |
|
"rewards/margins": 0.3019945025444031, |
|
"rewards/rejected": -0.43900981545448303, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8863491596921745e-07, |
|
"logits/chosen": -1.4902263879776, |
|
"logits/rejected": -0.8488144874572754, |
|
"logps/chosen": -537.8624267578125, |
|
"logps/rejected": -1138.487060546875, |
|
"loss": 0.0651, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12021678686141968, |
|
"rewards/margins": 0.3042137622833252, |
|
"rewards/rejected": -0.4244305491447449, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.798672690923828e-07, |
|
"logits/chosen": -1.604090929031372, |
|
"logits/rejected": -1.0566972494125366, |
|
"logps/chosen": -546.621826171875, |
|
"logps/rejected": -1305.0101318359375, |
|
"loss": 0.048, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1309491991996765, |
|
"rewards/margins": 0.3338525891304016, |
|
"rewards/rejected": -0.4648017883300781, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.713006526846439e-07, |
|
"logits/chosen": -1.6890567541122437, |
|
"logits/rejected": -1.0597703456878662, |
|
"logps/chosen": -638.17626953125, |
|
"logps/rejected": -1403.3868408203125, |
|
"loss": 0.0422, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.14803606271743774, |
|
"rewards/margins": 0.36922168731689453, |
|
"rewards/rejected": -0.517257809638977, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.629358090099639e-07, |
|
"logits/chosen": -1.772783637046814, |
|
"logits/rejected": -1.145141363143921, |
|
"logps/chosen": -645.2138671875, |
|
"logps/rejected": -1247.31005859375, |
|
"loss": 0.0995, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16153082251548767, |
|
"rewards/margins": 0.27567344903945923, |
|
"rewards/rejected": -0.4372042715549469, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5477346284948292e-07, |
|
"logits/chosen": -1.6137062311172485, |
|
"logits/rejected": -1.2323498725891113, |
|
"logps/chosen": -652.45068359375, |
|
"logps/rejected": -1297.9696044921875, |
|
"loss": 0.1, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18204265832901, |
|
"rewards/margins": 0.2792537212371826, |
|
"rewards/rejected": -0.4612963795661926, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4681432143872133e-07, |
|
"logits/chosen": -1.5417966842651367, |
|
"logits/rejected": -0.92780601978302, |
|
"logps/chosen": -655.7986450195312, |
|
"logps/rejected": -1352.8719482421875, |
|
"loss": 0.0452, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1652592271566391, |
|
"rewards/margins": 0.32993632555007935, |
|
"rewards/rejected": -0.49519556760787964, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3905907440629752e-07, |
|
"logits/chosen": -1.7428719997406006, |
|
"logits/rejected": -1.234071969985962, |
|
"logps/chosen": -683.9173583984375, |
|
"logps/rejected": -1301.064208984375, |
|
"loss": 0.0749, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1897701770067215, |
|
"rewards/margins": 0.26576271653175354, |
|
"rewards/rejected": -0.45553287863731384, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.31508393714177e-07, |
|
"logits/chosen": -1.6525071859359741, |
|
"logits/rejected": -1.269195795059204, |
|
"logps/chosen": -573.1322631835938, |
|
"logps/rejected": -1224.781005859375, |
|
"loss": 0.0717, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1454993188381195, |
|
"rewards/margins": 0.29666373133659363, |
|
"rewards/rejected": -0.44216299057006836, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.241629335994471e-07, |
|
"logits/chosen": -1.4832892417907715, |
|
"logits/rejected": -0.7595690488815308, |
|
"logps/chosen": -546.4700927734375, |
|
"logps/rejected": -1105.534912109375, |
|
"loss": 0.0961, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12580379843711853, |
|
"rewards/margins": 0.2655871510505676, |
|
"rewards/rejected": -0.39139097929000854, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1702333051763271e-07, |
|
"logits/chosen": -1.729821801185608, |
|
"logits/rejected": -1.004158854484558, |
|
"logps/chosen": -648.3139038085938, |
|
"logps/rejected": -1181.37939453125, |
|
"loss": 0.0907, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17357492446899414, |
|
"rewards/margins": 0.2692447900772095, |
|
"rewards/rejected": -0.4428196847438812, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1009020308754587e-07, |
|
"logits/chosen": -1.6045188903808594, |
|
"logits/rejected": -1.1483290195465088, |
|
"logps/chosen": -791.281494140625, |
|
"logps/rejected": -1432.56591796875, |
|
"loss": 0.0588, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.20312626659870148, |
|
"rewards/margins": 0.30023887753486633, |
|
"rewards/rejected": -0.503365159034729, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0336415203768962e-07, |
|
"logits/chosen": -1.4623007774353027, |
|
"logits/rejected": -0.9743086695671082, |
|
"logps/chosen": -684.1002807617188, |
|
"logps/rejected": -1355.83935546875, |
|
"loss": 0.0854, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15205714106559753, |
|
"rewards/margins": 0.3153363764286041, |
|
"rewards/rejected": -0.4673934876918793, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.684576015420277e-08, |
|
"logits/chosen": -1.5630353689193726, |
|
"logits/rejected": -1.1269890069961548, |
|
"logps/chosen": -500.76934814453125, |
|
"logps/rejected": -1174.6031494140625, |
|
"loss": 0.0748, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13523253798484802, |
|
"rewards/margins": 0.2928512990474701, |
|
"rewards/rejected": -0.4280838370323181, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.053559223036746e-08, |
|
"logits/chosen": -1.4134055376052856, |
|
"logits/rejected": -0.9375116229057312, |
|
"logps/chosen": -684.4464111328125, |
|
"logps/rejected": -1184.0179443359375, |
|
"loss": 0.097, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1784881353378296, |
|
"rewards/margins": 0.25100329518318176, |
|
"rewards/rejected": -0.42949143052101135, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.44341950176683e-08, |
|
"logits/chosen": -1.7058902978897095, |
|
"logits/rejected": -1.1464042663574219, |
|
"logps/chosen": -518.6105346679688, |
|
"logps/rejected": -1139.312744140625, |
|
"loss": 0.0706, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11180339008569717, |
|
"rewards/margins": 0.3046559691429138, |
|
"rewards/rejected": -0.4164593815803528, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits/chosen": -1.7066657543182373, |
|
"logits/rejected": -1.1052082777023315, |
|
"logps/chosen": -634.6749267578125, |
|
"logps/rejected": -1268.662353515625, |
|
"loss": 0.0549, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1490454524755478, |
|
"rewards/margins": 0.32269707322120667, |
|
"rewards/rejected": -0.4717424809932709, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.285980923996989e-08, |
|
"logits/chosen": -1.583821415901184, |
|
"logits/rejected": -0.9543322324752808, |
|
"logps/chosen": -620.9078979492188, |
|
"logps/rejected": -1256.222900390625, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14574182033538818, |
|
"rewards/margins": 0.3196600079536438, |
|
"rewards/rejected": -0.465401828289032, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.738782355044048e-08, |
|
"logits/chosen": -1.5047067403793335, |
|
"logits/rejected": -1.0737859010696411, |
|
"logps/chosen": -520.77294921875, |
|
"logps/rejected": -1155.83740234375, |
|
"loss": 0.0764, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1430043876171112, |
|
"rewards/margins": 0.28466781973838806, |
|
"rewards/rejected": -0.42767223715782166, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.212661423609184e-08, |
|
"logits/chosen": -1.3737636804580688, |
|
"logits/rejected": -1.1867458820343018, |
|
"logps/chosen": -623.9158935546875, |
|
"logps/rejected": -1239.9605712890625, |
|
"loss": 0.078, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1589566022157669, |
|
"rewards/margins": 0.27907633781433105, |
|
"rewards/rejected": -0.43803295493125916, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.707663716023021e-08, |
|
"logits/chosen": -1.5748332738876343, |
|
"logits/rejected": -1.063622236251831, |
|
"logps/chosen": -616.1275634765625, |
|
"logps/rejected": -1155.8143310546875, |
|
"loss": 0.0835, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15737947821617126, |
|
"rewards/margins": 0.2580162584781647, |
|
"rewards/rejected": -0.4153957962989807, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.22383298837098e-08, |
|
"logits/chosen": -1.6702196598052979, |
|
"logits/rejected": -0.9411935806274414, |
|
"logps/chosen": -713.735107421875, |
|
"logps/rejected": -1223.2811279296875, |
|
"loss": 0.0824, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17610540986061096, |
|
"rewards/margins": 0.29836469888687134, |
|
"rewards/rejected": -0.4744700491428375, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.761211162702117e-08, |
|
"logits/chosen": -1.500201940536499, |
|
"logits/rejected": -1.2098219394683838, |
|
"logps/chosen": -633.7230224609375, |
|
"logps/rejected": -1120.678466796875, |
|
"loss": 0.0984, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.17905206978321075, |
|
"rewards/margins": 0.2320644110441208, |
|
"rewards/rejected": -0.41111645102500916, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.319838323396691e-08, |
|
"logits/chosen": -1.6483008861541748, |
|
"logits/rejected": -1.2190577983856201, |
|
"logps/chosen": -614.4102783203125, |
|
"logps/rejected": -1260.265625, |
|
"loss": 0.0958, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.16128340363502502, |
|
"rewards/margins": 0.26488471031188965, |
|
"rewards/rejected": -0.4261681139469147, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.8997527136930004e-08, |
|
"logits/chosen": -1.429495930671692, |
|
"logits/rejected": -1.0348641872406006, |
|
"logps/chosen": -632.0252685546875, |
|
"logps/rejected": -1319.2584228515625, |
|
"loss": 0.0798, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17518481612205505, |
|
"rewards/margins": 0.2713126540184021, |
|
"rewards/rejected": -0.44649749994277954, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5009907323737826e-08, |
|
"logits/chosen": -1.551414132118225, |
|
"logits/rejected": -1.2644479274749756, |
|
"logps/chosen": -570.0910034179688, |
|
"logps/rejected": -1210.5382080078125, |
|
"loss": 0.0803, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1442852020263672, |
|
"rewards/margins": 0.26128098368644714, |
|
"rewards/rejected": -0.4055662155151367, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.1235869306123766e-08, |
|
"logits/chosen": -1.8781211376190186, |
|
"logits/rejected": -1.116159439086914, |
|
"logps/chosen": -654.27685546875, |
|
"logps/rejected": -1360.783447265625, |
|
"loss": 0.0586, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15850645303726196, |
|
"rewards/margins": 0.32171380519866943, |
|
"rewards/rejected": -0.480220228433609, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.767574008979007e-08, |
|
"logits/chosen": -1.7552858591079712, |
|
"logits/rejected": -1.1240081787109375, |
|
"logps/chosen": -649.2583618164062, |
|
"logps/rejected": -1285.60986328125, |
|
"loss": 0.0746, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1459151804447174, |
|
"rewards/margins": 0.29276043176651, |
|
"rewards/rejected": -0.43867558240890503, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"logits/chosen": -1.680267333984375, |
|
"logits/rejected": -0.990991473197937, |
|
"logps/chosen": -702.8885498046875, |
|
"logps/rejected": -1430.7728271484375, |
|
"loss": 0.0422, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.16736450791358948, |
|
"rewards/margins": 0.3372777998447418, |
|
"rewards/rejected": -0.5046423673629761, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1198423385220822e-08, |
|
"logits/chosen": -1.5231233835220337, |
|
"logits/rejected": -0.983518123626709, |
|
"logps/chosen": -574.9891967773438, |
|
"logps/rejected": -1192.521728515625, |
|
"loss": 0.091, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.13129422068595886, |
|
"rewards/margins": 0.2925337255001068, |
|
"rewards/rejected": -0.42382797598838806, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.82817971312621e-08, |
|
"logits/chosen": -1.689100980758667, |
|
"logits/rejected": -1.1573827266693115, |
|
"logps/chosen": -602.2514038085938, |
|
"logps/rejected": -1330.6248779296875, |
|
"loss": 0.0539, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1489819586277008, |
|
"rewards/margins": 0.32134801149368286, |
|
"rewards/rejected": -0.47032999992370605, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5580202098509078e-08, |
|
"logits/chosen": -1.7418603897094727, |
|
"logits/rejected": -1.2287579774856567, |
|
"logps/chosen": -539.5288696289062, |
|
"logps/rejected": -1092.644775390625, |
|
"loss": 0.084, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1297844648361206, |
|
"rewards/margins": 0.25757455825805664, |
|
"rewards/rejected": -0.38735905289649963, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3093872369654148e-08, |
|
"logits/chosen": -1.6315152645111084, |
|
"logits/rejected": -0.7913056015968323, |
|
"logps/chosen": -566.9249267578125, |
|
"logps/rejected": -1232.651611328125, |
|
"loss": 0.0596, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15806104242801666, |
|
"rewards/margins": 0.31945645809173584, |
|
"rewards/rejected": -0.4775174558162689, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0823023375489128e-08, |
|
"logits/chosen": -1.8176482915878296, |
|
"logits/rejected": -1.307217001914978, |
|
"logps/chosen": -596.6251220703125, |
|
"logps/rejected": -1259.1907958984375, |
|
"loss": 0.0741, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13308800756931305, |
|
"rewards/margins": 0.29899168014526367, |
|
"rewards/rejected": -0.4320797026157379, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.767851876239075e-09, |
|
"logits/chosen": -1.3138879537582397, |
|
"logits/rejected": -1.0358647108078003, |
|
"logps/chosen": -606.9632568359375, |
|
"logps/rejected": -1166.726806640625, |
|
"loss": 0.126, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17711606621742249, |
|
"rewards/margins": 0.24026331305503845, |
|
"rewards/rejected": -0.41737937927246094, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.9285359445145366e-09, |
|
"logits/chosen": -1.6356595754623413, |
|
"logits/rejected": -1.0119235515594482, |
|
"logps/chosen": -606.1302490234375, |
|
"logps/rejected": -1273.985595703125, |
|
"loss": 0.0883, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15847013890743256, |
|
"rewards/margins": 0.29371243715286255, |
|
"rewards/rejected": -0.4521825909614563, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.305234949880001e-09, |
|
"logits/chosen": -1.659597396850586, |
|
"logits/rejected": -1.0647908449172974, |
|
"logps/chosen": -593.581787109375, |
|
"logps/rejected": -1240.505615234375, |
|
"loss": 0.0669, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15421947836875916, |
|
"rewards/margins": 0.2849588096141815, |
|
"rewards/rejected": -0.4391782879829407, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.8980895450474455e-09, |
|
"logits/chosen": -1.4947640895843506, |
|
"logits/rejected": -1.1128860712051392, |
|
"logps/chosen": -551.9004516601562, |
|
"logps/rejected": -1167.104736328125, |
|
"loss": 0.053, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13484862446784973, |
|
"rewards/margins": 0.2813241481781006, |
|
"rewards/rejected": -0.4161728024482727, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7072216536885855e-09, |
|
"logits/chosen": -1.6360046863555908, |
|
"logits/rejected": -1.196569800376892, |
|
"logps/chosen": -659.7826538085938, |
|
"logps/rejected": -1305.6195068359375, |
|
"loss": 0.0578, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17655882239341736, |
|
"rewards/margins": 0.3209526538848877, |
|
"rewards/rejected": -0.49751147627830505, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.7327344598702667e-09, |
|
"logits/chosen": -1.4936860799789429, |
|
"logits/rejected": -1.0485936403274536, |
|
"logps/chosen": -584.678955078125, |
|
"logps/rejected": -1314.91650390625, |
|
"loss": 0.071, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14625641703605652, |
|
"rewards/margins": 0.3208056390285492, |
|
"rewards/rejected": -0.4670620858669281, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.747123991141193e-10, |
|
"logits/chosen": -1.4201769828796387, |
|
"logits/rejected": -1.045179009437561, |
|
"logps/chosen": -620.8831787109375, |
|
"logps/rejected": -1206.128662109375, |
|
"loss": 0.0691, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15646150708198547, |
|
"rewards/margins": 0.29528263211250305, |
|
"rewards/rejected": -0.4517441391944885, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.332211510807427e-10, |
|
"logits/chosen": -1.530029296875, |
|
"logits/rejected": -1.3592134714126587, |
|
"logps/chosen": -523.2725219726562, |
|
"logps/rejected": -1284.657958984375, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.13241766393184662, |
|
"rewards/margins": 0.30461427569389343, |
|
"rewards/rejected": -0.43703192472457886, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0830763387897902e-10, |
|
"logits/chosen": -1.5114389657974243, |
|
"logits/rejected": -0.964741587638855, |
|
"logps/chosen": -626.4848022460938, |
|
"logps/rejected": -1381.5963134765625, |
|
"loss": 0.0456, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.16040828824043274, |
|
"rewards/margins": 0.3570694923400879, |
|
"rewards/rejected": -0.517477810382843, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.877239465713501, |
|
"logits/rejected": -1.3499794006347656, |
|
"logps/chosen": -648.46875, |
|
"logps/rejected": -1231.8519287109375, |
|
"loss": 0.0717, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18231454491615295, |
|
"rewards/margins": 0.2813524603843689, |
|
"rewards/rejected": -0.46366700530052185, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3750, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0800992045879364, |
|
"train_runtime": 15706.9626, |
|
"train_samples_per_second": 0.955, |
|
"train_steps_per_second": 0.239 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3750, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|