|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.666666666666667e-08, |
|
"logits/chosen": -1.8473987579345703, |
|
"logits/rejected": -1.8474841117858887, |
|
"logps/chosen": -628.1939086914062, |
|
"logps/rejected": -1017.0994873046875, |
|
"loss": 0.1606, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"logits/chosen": -1.4316620826721191, |
|
"logits/rejected": -0.9766409397125244, |
|
"logps/chosen": -518.1865234375, |
|
"logps/rejected": -875.9132690429688, |
|
"loss": 0.2429, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -6.813897016400006e-06, |
|
"rewards/margins": 6.345117071759887e-06, |
|
"rewards/rejected": -1.3159031368559226e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"logits/chosen": -1.6879074573516846, |
|
"logits/rejected": -1.2949936389923096, |
|
"logps/chosen": -449.43621826171875, |
|
"logps/rejected": -834.7189331054688, |
|
"loss": 0.2148, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00029412214644253254, |
|
"rewards/margins": 0.00020992716599721462, |
|
"rewards/rejected": 8.419497316936031e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.000000000000001e-07, |
|
"logits/chosen": -1.7546484470367432, |
|
"logits/rejected": -1.0084898471832275, |
|
"logps/chosen": -496.73309326171875, |
|
"logps/rejected": -889.1583862304688, |
|
"loss": 0.2307, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0016126197297126055, |
|
"rewards/margins": 0.004062547814100981, |
|
"rewards/rejected": -0.002449928317219019, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.666666666666667e-07, |
|
"logits/chosen": -1.8569256067276, |
|
"logits/rejected": -1.1899112462997437, |
|
"logps/chosen": -448.177490234375, |
|
"logps/rejected": -852.029296875, |
|
"loss": 0.2095, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.003779575927183032, |
|
"rewards/margins": 0.012063580565154552, |
|
"rewards/rejected": -0.008284004405140877, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": -1.5573432445526123, |
|
"logits/rejected": -1.0242127180099487, |
|
"logps/chosen": -441.22540283203125, |
|
"logps/rejected": -806.1510009765625, |
|
"loss": 0.1942, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.011921902187168598, |
|
"rewards/margins": 0.029057607054710388, |
|
"rewards/rejected": -0.017135705798864365, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"logits/chosen": -1.7608438730239868, |
|
"logits/rejected": -1.3249375820159912, |
|
"logps/chosen": -435.998779296875, |
|
"logps/rejected": -766.541015625, |
|
"loss": 0.1739, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.01454467885196209, |
|
"rewards/margins": 0.036728017032146454, |
|
"rewards/rejected": -0.022183334454894066, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"logits/chosen": -1.7882951498031616, |
|
"logits/rejected": -0.9916883707046509, |
|
"logps/chosen": -463.21746826171875, |
|
"logps/rejected": -798.3153076171875, |
|
"loss": 0.1613, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.01092723198235035, |
|
"rewards/margins": 0.05515831708908081, |
|
"rewards/rejected": -0.04423108696937561, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"logits/chosen": -1.6877580881118774, |
|
"logits/rejected": -1.1066850423812866, |
|
"logps/chosen": -412.07342529296875, |
|
"logps/rejected": -912.7047119140625, |
|
"loss": 0.1534, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.0010939339408650994, |
|
"rewards/margins": 0.0824560895562172, |
|
"rewards/rejected": -0.08355002105236053, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5e-06, |
|
"logits/chosen": -1.5878902673721313, |
|
"logits/rejected": -1.1904621124267578, |
|
"logps/chosen": -521.6202392578125, |
|
"logps/rejected": -905.14794921875, |
|
"loss": 0.1709, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.05081214755773544, |
|
"rewards/margins": 0.08134042471647263, |
|
"rewards/rejected": -0.13215258717536926, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"logits/chosen": -1.6791446208953857, |
|
"logits/rejected": -1.182875394821167, |
|
"logps/chosen": -579.6087036132812, |
|
"logps/rejected": -993.3094482421875, |
|
"loss": 0.149, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09510762244462967, |
|
"rewards/margins": 0.09448549151420593, |
|
"rewards/rejected": -0.1895931363105774, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"logits/chosen": -1.6629585027694702, |
|
"logits/rejected": -1.0604465007781982, |
|
"logps/chosen": -638.2183837890625, |
|
"logps/rejected": -1162.047119140625, |
|
"loss": 0.1087, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12550528347492218, |
|
"rewards/margins": 0.14394623041152954, |
|
"rewards/rejected": -0.2694514989852905, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -1.751172423362732, |
|
"logits/rejected": -1.2760882377624512, |
|
"logps/chosen": -573.3961181640625, |
|
"logps/rejected": -981.23486328125, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09364907443523407, |
|
"rewards/margins": 0.13833266496658325, |
|
"rewards/rejected": -0.23198175430297852, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.166666666666667e-06, |
|
"logits/chosen": -1.813155174255371, |
|
"logits/rejected": -1.2474279403686523, |
|
"logps/chosen": -560.3488159179688, |
|
"logps/rejected": -1005.7208251953125, |
|
"loss": 0.1478, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.08083701133728027, |
|
"rewards/margins": 0.14707627892494202, |
|
"rewards/rejected": -0.2279132902622223, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"logits/chosen": -1.9064534902572632, |
|
"logits/rejected": -1.087799072265625, |
|
"logps/chosen": -576.6889038085938, |
|
"logps/rejected": -1124.29345703125, |
|
"loss": 0.1001, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11049157381057739, |
|
"rewards/margins": 0.1889657974243164, |
|
"rewards/rejected": -0.2994573712348938, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -1.498268723487854, |
|
"logits/rejected": -0.9696500897407532, |
|
"logps/chosen": -623.1941528320312, |
|
"logps/rejected": -1241.6993408203125, |
|
"loss": 0.0989, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18307581543922424, |
|
"rewards/margins": 0.2211984097957611, |
|
"rewards/rejected": -0.40427422523498535, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.666666666666667e-06, |
|
"logits/chosen": -1.6134288311004639, |
|
"logits/rejected": -0.7816992998123169, |
|
"logps/chosen": -624.9563598632812, |
|
"logps/rejected": -1204.363037109375, |
|
"loss": 0.0923, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17922857403755188, |
|
"rewards/margins": 0.23642221093177795, |
|
"rewards/rejected": -0.41565078496932983, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"logits/chosen": -1.5277217626571655, |
|
"logits/rejected": -1.1026668548583984, |
|
"logps/chosen": -653.5545654296875, |
|
"logps/rejected": -1159.62841796875, |
|
"loss": 0.0954, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.13700352609157562, |
|
"rewards/margins": 0.22152018547058105, |
|
"rewards/rejected": -0.35852372646331787, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3e-06, |
|
"logits/chosen": -1.6350257396697998, |
|
"logits/rejected": -1.2443767786026, |
|
"logps/chosen": -666.1326904296875, |
|
"logps/rejected": -1370.72412109375, |
|
"loss": 0.0655, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.19158519804477692, |
|
"rewards/margins": 0.27321988344192505, |
|
"rewards/rejected": -0.46480506658554077, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"logits/chosen": -1.6068347692489624, |
|
"logits/rejected": -1.0125865936279297, |
|
"logps/chosen": -535.7831420898438, |
|
"logps/rejected": -1120.1278076171875, |
|
"loss": 0.1016, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1586967408657074, |
|
"rewards/margins": 0.20938825607299805, |
|
"rewards/rejected": -0.36808499693870544, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -1.7533979415893555, |
|
"logits/rejected": -1.1328980922698975, |
|
"logps/chosen": -582.9685668945312, |
|
"logps/rejected": -1131.854736328125, |
|
"loss": 0.0781, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11236470937728882, |
|
"rewards/margins": 0.22253580391407013, |
|
"rewards/rejected": -0.33490046858787537, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.5e-06, |
|
"logits/chosen": -1.719315528869629, |
|
"logits/rejected": -0.9589029550552368, |
|
"logps/chosen": -651.3718872070312, |
|
"logps/rejected": -1270.794677734375, |
|
"loss": 0.1192, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1939801275730133, |
|
"rewards/margins": 0.23451688885688782, |
|
"rewards/rejected": -0.4284970164299011, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"logits/chosen": -1.8104807138442993, |
|
"logits/rejected": -1.117309808731079, |
|
"logps/chosen": -525.1863403320312, |
|
"logps/rejected": -1155.667724609375, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10046984255313873, |
|
"rewards/margins": 0.22820088267326355, |
|
"rewards/rejected": -0.3286706805229187, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.833333333333334e-06, |
|
"logits/chosen": -1.6729333400726318, |
|
"logits/rejected": -1.3031710386276245, |
|
"logps/chosen": -526.7484741210938, |
|
"logps/rejected": -1042.1409912109375, |
|
"loss": 0.1099, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13879971206188202, |
|
"rewards/margins": 0.17394515872001648, |
|
"rewards/rejected": -0.3127448856830597, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -1.7155532836914062, |
|
"logits/rejected": -1.3224143981933594, |
|
"logps/chosen": -568.672607421875, |
|
"logps/rejected": -1282.6646728515625, |
|
"loss": 0.0618, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1310817301273346, |
|
"rewards/margins": 0.2731386721134186, |
|
"rewards/rejected": -0.4042204022407532, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -1.694848656654358, |
|
"logits/rejected": -1.1057406663894653, |
|
"logps/chosen": -608.3328247070312, |
|
"logps/rejected": -1172.0714111328125, |
|
"loss": 0.1015, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1783088594675064, |
|
"rewards/margins": 0.22445425391197205, |
|
"rewards/rejected": -0.40276312828063965, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.333333333333334e-06, |
|
"logits/chosen": -1.4572826623916626, |
|
"logits/rejected": -0.9631636738777161, |
|
"logps/chosen": -678.6790161132812, |
|
"logps/rejected": -1274.690185546875, |
|
"loss": 0.0955, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18504425883293152, |
|
"rewards/margins": 0.24944782257080078, |
|
"rewards/rejected": -0.4344921112060547, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5e-06, |
|
"logits/chosen": -1.5380445718765259, |
|
"logits/rejected": -0.9687334299087524, |
|
"logps/chosen": -598.2360229492188, |
|
"logps/rejected": -1067.4052734375, |
|
"loss": 0.1052, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15487895905971527, |
|
"rewards/margins": 0.20970329642295837, |
|
"rewards/rejected": -0.36458227038383484, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.666666666666667e-06, |
|
"logits/chosen": -1.5993750095367432, |
|
"logits/rejected": -1.2316395044326782, |
|
"logps/chosen": -559.2089233398438, |
|
"logps/rejected": -1135.50048828125, |
|
"loss": 0.0949, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16124552488327026, |
|
"rewards/margins": 0.21940717101097107, |
|
"rewards/rejected": -0.38065269589424133, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.833333333333333e-06, |
|
"logits/chosen": -1.785839319229126, |
|
"logits/rejected": -1.2803369760513306, |
|
"logps/chosen": -712.4893188476562, |
|
"logps/rejected": -1194.8753662109375, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2521515488624573, |
|
"rewards/margins": 0.1936561018228531, |
|
"rewards/rejected": -0.44580763578414917, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -1.8430877923965454, |
|
"logits/rejected": -1.1291439533233643, |
|
"logps/chosen": -703.2359619140625, |
|
"logps/rejected": -1348.4500732421875, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2468656599521637, |
|
"rewards/margins": 0.2653115391731262, |
|
"rewards/rejected": -0.5121771693229675, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999830770009406e-06, |
|
"logits/chosen": -1.5511696338653564, |
|
"logits/rejected": -0.8779551386833191, |
|
"logps/chosen": -645.1519775390625, |
|
"logps/rejected": -1245.902587890625, |
|
"loss": 0.1052, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16054175794124603, |
|
"rewards/margins": 0.2655673027038574, |
|
"rewards/rejected": -0.4261090159416199, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999323102948655e-06, |
|
"logits/chosen": -1.9738438129425049, |
|
"logits/rejected": -1.4079262018203735, |
|
"logps/chosen": -632.3164672851562, |
|
"logps/rejected": -1090.2205810546875, |
|
"loss": 0.1445, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1474119871854782, |
|
"rewards/margins": 0.1809072643518448, |
|
"rewards/rejected": -0.328319251537323, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.99847706754774e-06, |
|
"logits/chosen": -1.6942188739776611, |
|
"logits/rejected": -1.3499834537506104, |
|
"logps/chosen": -545.2244262695312, |
|
"logps/rejected": -1028.6619873046875, |
|
"loss": 0.0807, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09895402193069458, |
|
"rewards/margins": 0.21848241984844208, |
|
"rewards/rejected": -0.31743642687797546, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997292778346312e-06, |
|
"logits/chosen": -1.8349497318267822, |
|
"logits/rejected": -1.173274278640747, |
|
"logps/chosen": -638.5476684570312, |
|
"logps/rejected": -1342.106201171875, |
|
"loss": 0.0821, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15458893775939941, |
|
"rewards/margins": 0.2725256085395813, |
|
"rewards/rejected": -0.4271145761013031, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995770395678171e-06, |
|
"logits/chosen": -1.6314140558242798, |
|
"logits/rejected": -1.0475269556045532, |
|
"logps/chosen": -508.9056701660156, |
|
"logps/rejected": -1216.2740478515625, |
|
"loss": 0.0992, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08030419796705246, |
|
"rewards/margins": 0.28404951095581055, |
|
"rewards/rejected": -0.3643537163734436, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993910125649561e-06, |
|
"logits/chosen": -1.792074203491211, |
|
"logits/rejected": -1.2048676013946533, |
|
"logps/chosen": -606.4808959960938, |
|
"logps/rejected": -1278.70703125, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.14861299097537994, |
|
"rewards/margins": 0.27981966733932495, |
|
"rewards/rejected": -0.4284326136112213, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.991712220111265e-06, |
|
"logits/chosen": -1.8713674545288086, |
|
"logits/rejected": -1.2337167263031006, |
|
"logps/chosen": -554.6063232421875, |
|
"logps/rejected": -996.7093505859375, |
|
"loss": 0.1127, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12071527540683746, |
|
"rewards/margins": 0.2035401612520218, |
|
"rewards/rejected": -0.32425546646118164, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989176976624511e-06, |
|
"logits/chosen": -1.7384974956512451, |
|
"logits/rejected": -1.1386005878448486, |
|
"logps/chosen": -556.4752807617188, |
|
"logps/rejected": -1155.403564453125, |
|
"loss": 0.0799, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.13758501410484314, |
|
"rewards/margins": 0.2377336472272873, |
|
"rewards/rejected": -0.3753186762332916, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.986304738420684e-06, |
|
"logits/chosen": -1.8614715337753296, |
|
"logits/rejected": -1.1824796199798584, |
|
"logps/chosen": -628.7012329101562, |
|
"logps/rejected": -1232.29296875, |
|
"loss": 0.0766, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13570387661457062, |
|
"rewards/margins": 0.2571316361427307, |
|
"rewards/rejected": -0.3928355276584625, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.983095894354858e-06, |
|
"logits/chosen": -1.7244367599487305, |
|
"logits/rejected": -1.2317596673965454, |
|
"logps/chosen": -643.6784057617188, |
|
"logps/rejected": -1263.03173828125, |
|
"loss": 0.0899, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20891329646110535, |
|
"rewards/margins": 0.2505062222480774, |
|
"rewards/rejected": -0.4594195485115051, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.979550878853154e-06, |
|
"logits/chosen": -1.7407877445220947, |
|
"logits/rejected": -0.9090331196784973, |
|
"logps/chosen": -650.4755859375, |
|
"logps/rejected": -1323.707763671875, |
|
"loss": 0.0489, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15291151404380798, |
|
"rewards/margins": 0.3299728333950043, |
|
"rewards/rejected": -0.48288434743881226, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.975670171853926e-06, |
|
"logits/chosen": -1.802283525466919, |
|
"logits/rejected": -1.0241405963897705, |
|
"logps/chosen": -637.089599609375, |
|
"logps/rejected": -1355.4974365234375, |
|
"loss": 0.0541, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.1302250325679779, |
|
"rewards/margins": 0.318683922290802, |
|
"rewards/rejected": -0.4489089548587799, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.971454298742779e-06, |
|
"logits/chosen": -1.6149762868881226, |
|
"logits/rejected": -1.1530131101608276, |
|
"logps/chosen": -572.8499755859375, |
|
"logps/rejected": -1171.6473388671875, |
|
"loss": 0.1168, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.14183470606803894, |
|
"rewards/margins": 0.26052767038345337, |
|
"rewards/rejected": -0.4023623466491699, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.966903830281449e-06, |
|
"logits/chosen": -1.600284218788147, |
|
"logits/rejected": -1.0997772216796875, |
|
"logps/chosen": -637.7472534179688, |
|
"logps/rejected": -1324.5953369140625, |
|
"loss": 0.073, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16721096634864807, |
|
"rewards/margins": 0.30269843339920044, |
|
"rewards/rejected": -0.4699093699455261, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.962019382530521e-06, |
|
"logits/chosen": -1.4888650178909302, |
|
"logits/rejected": -0.9744981527328491, |
|
"logps/chosen": -489.33538818359375, |
|
"logps/rejected": -1064.3255615234375, |
|
"loss": 0.1045, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1382354497909546, |
|
"rewards/margins": 0.23734359443187714, |
|
"rewards/rejected": -0.3755790591239929, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.956801616766033e-06, |
|
"logits/chosen": -1.4963598251342773, |
|
"logits/rejected": -1.1484572887420654, |
|
"logps/chosen": -499.119384765625, |
|
"logps/rejected": -1084.587158203125, |
|
"loss": 0.1161, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.14311179518699646, |
|
"rewards/margins": 0.24761180579662323, |
|
"rewards/rejected": -0.3907236158847809, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.951251239389949e-06, |
|
"logits/chosen": -1.7252105474472046, |
|
"logits/rejected": -1.16670823097229, |
|
"logps/chosen": -446.770263671875, |
|
"logps/rejected": -1193.3294677734375, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07284434884786606, |
|
"rewards/margins": 0.26454949378967285, |
|
"rewards/rejected": -0.3373938202857971, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": -1.9960441589355469, |
|
"logits/rejected": -1.3124287128448486, |
|
"logps/chosen": -519.2340087890625, |
|
"logps/rejected": -1184.5723876953125, |
|
"loss": 0.064, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.0516168549656868, |
|
"rewards/margins": 0.296741783618927, |
|
"rewards/rejected": -0.3483586013317108, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.939155700460536e-06, |
|
"logits/chosen": -1.7496163845062256, |
|
"logits/rejected": -1.1100032329559326, |
|
"logps/chosen": -532.7080078125, |
|
"logps/rejected": -1199.1121826171875, |
|
"loss": 0.1005, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.13813085854053497, |
|
"rewards/margins": 0.2582959532737732, |
|
"rewards/rejected": -0.396426796913147, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.93261217644956e-06, |
|
"logits/chosen": -1.8717424869537354, |
|
"logits/rejected": -1.1161689758300781, |
|
"logps/chosen": -709.0521240234375, |
|
"logps/rejected": -1247.1685791015625, |
|
"loss": 0.0938, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.21577858924865723, |
|
"rewards/margins": 0.2319638729095459, |
|
"rewards/rejected": -0.44774240255355835, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.925739315689991e-06, |
|
"logits/chosen": -1.7890201807022095, |
|
"logits/rejected": -1.2021349668502808, |
|
"logps/chosen": -548.541748046875, |
|
"logps/rejected": -1329.703857421875, |
|
"loss": 0.0815, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.11823161691427231, |
|
"rewards/margins": 0.31565752625465393, |
|
"rewards/rejected": -0.43388909101486206, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.91853804865716e-06, |
|
"logits/chosen": -2.0370450019836426, |
|
"logits/rejected": -1.1039373874664307, |
|
"logps/chosen": -516.0086669921875, |
|
"logps/rejected": -1140.8289794921875, |
|
"loss": 0.1045, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.031420618295669556, |
|
"rewards/margins": 0.28368836641311646, |
|
"rewards/rejected": -0.3151089549064636, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.911009350287348e-06, |
|
"logits/chosen": -2.096604824066162, |
|
"logits/rejected": -1.4794657230377197, |
|
"logps/chosen": -484.7252502441406, |
|
"logps/rejected": -1161.0413818359375, |
|
"loss": 0.0601, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.031848691403865814, |
|
"rewards/margins": 0.29393482208251953, |
|
"rewards/rejected": -0.32578352093696594, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.903154239845798e-06, |
|
"logits/chosen": -1.6835973262786865, |
|
"logits/rejected": -1.0456864833831787, |
|
"logps/chosen": -528.0047607421875, |
|
"logps/rejected": -1127.9058837890625, |
|
"loss": 0.1008, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.11576949059963226, |
|
"rewards/margins": 0.23724015057086945, |
|
"rewards/rejected": -0.3530096709728241, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.894973780788722e-06, |
|
"logits/chosen": -1.7007629871368408, |
|
"logits/rejected": -1.2928742170333862, |
|
"logps/chosen": -464.3915100097656, |
|
"logps/rejected": -941.9600830078125, |
|
"loss": 0.0885, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.10153596103191376, |
|
"rewards/margins": 0.20636403560638428, |
|
"rewards/rejected": -0.30790001153945923, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.88646908061933e-06, |
|
"logits/chosen": -1.7848955392837524, |
|
"logits/rejected": -1.275628924369812, |
|
"logps/chosen": -571.953369140625, |
|
"logps/rejected": -1022.10595703125, |
|
"loss": 0.1112, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12155290693044662, |
|
"rewards/margins": 0.1930900365114212, |
|
"rewards/rejected": -0.3146429657936096, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": -1.714331030845642, |
|
"logits/rejected": -1.2135049104690552, |
|
"logps/chosen": -649.1993408203125, |
|
"logps/rejected": -1231.960205078125, |
|
"loss": 0.0826, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.11209288984537125, |
|
"rewards/margins": 0.2815489172935486, |
|
"rewards/rejected": -0.39364179968833923, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.868491606285823e-06, |
|
"logits/chosen": -1.8973315954208374, |
|
"logits/rejected": -1.380531907081604, |
|
"logps/chosen": -590.170166015625, |
|
"logps/rejected": -1119.1627197265625, |
|
"loss": 0.0901, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.12205713987350464, |
|
"rewards/margins": 0.2438439577817917, |
|
"rewards/rejected": -0.36590105295181274, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.859021265983959e-06, |
|
"logits/chosen": -1.8590736389160156, |
|
"logits/rejected": -1.3631782531738281, |
|
"logps/chosen": -622.0940551757812, |
|
"logps/rejected": -1332.389404296875, |
|
"loss": 0.0855, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.13945552706718445, |
|
"rewards/margins": 0.28879013657569885, |
|
"rewards/rejected": -0.4282456338405609, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -1.587874174118042, |
|
"logits/rejected": -1.3274825811386108, |
|
"logps/chosen": -697.1937255859375, |
|
"logps/rejected": -1232.4580078125, |
|
"loss": 0.0898, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2024591863155365, |
|
"rewards/margins": 0.22971025109291077, |
|
"rewards/rejected": -0.43216943740844727, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.839123789598829e-06, |
|
"logits/chosen": -1.5191724300384521, |
|
"logits/rejected": -1.2849657535552979, |
|
"logps/chosen": -543.1875610351562, |
|
"logps/rejected": -1340.810791015625, |
|
"loss": 0.065, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11399342864751816, |
|
"rewards/margins": 0.3168831467628479, |
|
"rewards/rejected": -0.43087658286094666, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828699347315357e-06, |
|
"logits/chosen": -1.8808469772338867, |
|
"logits/rejected": -1.1103613376617432, |
|
"logps/chosen": -547.0265502929688, |
|
"logps/rejected": -1164.1422119140625, |
|
"loss": 0.0665, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07433022558689117, |
|
"rewards/margins": 0.2945881187915802, |
|
"rewards/rejected": -0.36891835927963257, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.817959636416969e-06, |
|
"logits/chosen": -1.5736801624298096, |
|
"logits/rejected": -0.8019571304321289, |
|
"logps/chosen": -586.7124633789062, |
|
"logps/rejected": -1286.5223388671875, |
|
"loss": 0.076, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1401849091053009, |
|
"rewards/margins": 0.3048226535320282, |
|
"rewards/rejected": -0.4450075030326843, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.806906110888606e-06, |
|
"logits/chosen": -1.6860675811767578, |
|
"logits/rejected": -1.2921956777572632, |
|
"logps/chosen": -588.60986328125, |
|
"logps/rejected": -1157.620361328125, |
|
"loss": 0.0857, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1366177797317505, |
|
"rewards/margins": 0.2525423467159271, |
|
"rewards/rejected": -0.3891601264476776, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7955402672006855e-06, |
|
"logits/chosen": -1.8331724405288696, |
|
"logits/rejected": -1.2644875049591064, |
|
"logps/chosen": -634.9255981445312, |
|
"logps/rejected": -1305.0550537109375, |
|
"loss": 0.084, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14361020922660828, |
|
"rewards/margins": 0.3017338216304779, |
|
"rewards/rejected": -0.4453439712524414, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.783863644106502e-06, |
|
"logits/chosen": -1.7160533666610718, |
|
"logits/rejected": -1.132188320159912, |
|
"logps/chosen": -611.103759765625, |
|
"logps/rejected": -1045.3682861328125, |
|
"loss": 0.1126, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1587737798690796, |
|
"rewards/margins": 0.1975192278623581, |
|
"rewards/rejected": -0.3562930226325989, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7718778224339115e-06, |
|
"logits/chosen": -1.7684398889541626, |
|
"logits/rejected": -1.1700496673583984, |
|
"logps/chosen": -532.6051025390625, |
|
"logps/rejected": -1173.128662109375, |
|
"loss": 0.0925, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08183015137910843, |
|
"rewards/margins": 0.2822849452495575, |
|
"rewards/rejected": -0.3641151189804077, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.759584424871302e-06, |
|
"logits/chosen": -1.9278764724731445, |
|
"logits/rejected": -0.9649882316589355, |
|
"logps/chosen": -626.4827270507812, |
|
"logps/rejected": -1222.0137939453125, |
|
"loss": 0.0645, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.106475330889225, |
|
"rewards/margins": 0.29441386461257935, |
|
"rewards/rejected": -0.40088921785354614, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.746985115747918e-06, |
|
"logits/chosen": -1.6022624969482422, |
|
"logits/rejected": -1.3960731029510498, |
|
"logps/chosen": -610.5640869140625, |
|
"logps/rejected": -1229.7557373046875, |
|
"loss": 0.0951, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1899951994419098, |
|
"rewards/margins": 0.2415318489074707, |
|
"rewards/rejected": -0.4315270483493805, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.734081600808531e-06, |
|
"logits/chosen": -1.6148655414581299, |
|
"logits/rejected": -1.0519297122955322, |
|
"logps/chosen": -587.4749755859375, |
|
"logps/rejected": -1153.4019775390625, |
|
"loss": 0.0781, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1272963136434555, |
|
"rewards/margins": 0.2538054585456848, |
|
"rewards/rejected": -0.3811017572879791, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.720875626982511e-06, |
|
"logits/chosen": -1.5970103740692139, |
|
"logits/rejected": -1.1024632453918457, |
|
"logps/chosen": -622.9388427734375, |
|
"logps/rejected": -1346.39453125, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17155757546424866, |
|
"rewards/margins": 0.3096774220466614, |
|
"rewards/rejected": -0.4812350273132324, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.707368982147318e-06, |
|
"logits/chosen": -1.564493179321289, |
|
"logits/rejected": -1.182403564453125, |
|
"logps/chosen": -557.9251098632812, |
|
"logps/rejected": -1197.14697265625, |
|
"loss": 0.085, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13098809123039246, |
|
"rewards/margins": 0.27282702922821045, |
|
"rewards/rejected": -0.4038150906562805, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.693563494886455e-06, |
|
"logits/chosen": -1.5731178522109985, |
|
"logits/rejected": -1.2045758962631226, |
|
"logps/chosen": -589.6763916015625, |
|
"logps/rejected": -1163.5670166015625, |
|
"loss": 0.0924, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1421278566122055, |
|
"rewards/margins": 0.26834186911582947, |
|
"rewards/rejected": -0.4104697108268738, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.679461034241906e-06, |
|
"logits/chosen": -1.5476421117782593, |
|
"logits/rejected": -1.2368953227996826, |
|
"logps/chosen": -624.2233276367188, |
|
"logps/rejected": -1224.5426025390625, |
|
"loss": 0.0832, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1516999900341034, |
|
"rewards/margins": 0.26089781522750854, |
|
"rewards/rejected": -0.41259780526161194, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.665063509461098e-06, |
|
"logits/chosen": -1.521240234375, |
|
"logits/rejected": -0.895469069480896, |
|
"logps/chosen": -604.10205078125, |
|
"logps/rejected": -1188.8580322265625, |
|
"loss": 0.0574, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1560906320810318, |
|
"rewards/margins": 0.2723260521888733, |
|
"rewards/rejected": -0.4284166693687439, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.650372869738415e-06, |
|
"logits/chosen": -1.7068440914154053, |
|
"logits/rejected": -1.068950891494751, |
|
"logps/chosen": -696.2512817382812, |
|
"logps/rejected": -1256.27001953125, |
|
"loss": 0.0588, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15982361137866974, |
|
"rewards/margins": 0.2860753834247589, |
|
"rewards/rejected": -0.44589900970458984, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.635391103951315e-06, |
|
"logits/chosen": -1.7902898788452148, |
|
"logits/rejected": -1.3396075963974, |
|
"logps/chosen": -573.7864990234375, |
|
"logps/rejected": -1184.572998046875, |
|
"loss": 0.086, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.14987358450889587, |
|
"rewards/margins": 0.27520817518234253, |
|
"rewards/rejected": -0.4250817894935608, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.620120240391065e-06, |
|
"logits/chosen": -1.7508132457733154, |
|
"logits/rejected": -1.1751254796981812, |
|
"logps/chosen": -639.8759765625, |
|
"logps/rejected": -1238.5482177734375, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18339064717292786, |
|
"rewards/margins": 0.2874487638473511, |
|
"rewards/rejected": -0.47083941102027893, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.604562346488144e-06, |
|
"logits/chosen": -1.7333307266235352, |
|
"logits/rejected": -1.1350371837615967, |
|
"logps/chosen": -610.3933715820312, |
|
"logps/rejected": -1281.648193359375, |
|
"loss": 0.066, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.15978248417377472, |
|
"rewards/margins": 0.2836895287036896, |
|
"rewards/rejected": -0.4434719681739807, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.588719528532342e-06, |
|
"logits/chosen": -1.5033605098724365, |
|
"logits/rejected": -1.08058500289917, |
|
"logps/chosen": -483.33892822265625, |
|
"logps/rejected": -1186.935546875, |
|
"loss": 0.0829, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12199674546718597, |
|
"rewards/margins": 0.32258424162864685, |
|
"rewards/rejected": -0.44458097219467163, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.572593931387604e-06, |
|
"logits/chosen": -1.4822036027908325, |
|
"logits/rejected": -1.1556123495101929, |
|
"logps/chosen": -739.7813110351562, |
|
"logps/rejected": -1216.9873046875, |
|
"loss": 0.1328, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.24871714413166046, |
|
"rewards/margins": 0.21633216738700867, |
|
"rewards/rejected": -0.4650493264198303, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.556187738201656e-06, |
|
"logits/chosen": -1.767778754234314, |
|
"logits/rejected": -1.1542942523956299, |
|
"logps/chosen": -614.69677734375, |
|
"logps/rejected": -1348.5400390625, |
|
"loss": 0.0518, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.14709432423114777, |
|
"rewards/margins": 0.3316415846347809, |
|
"rewards/rejected": -0.47873586416244507, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.539503170110431e-06, |
|
"logits/chosen": -1.6926486492156982, |
|
"logits/rejected": -1.1664977073669434, |
|
"logps/chosen": -735.2862548828125, |
|
"logps/rejected": -1265.770263671875, |
|
"loss": 0.0932, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18908649682998657, |
|
"rewards/margins": 0.24448461830615997, |
|
"rewards/rejected": -0.4335711598396301, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": -1.3090674877166748, |
|
"logits/rejected": -1.0646418333053589, |
|
"logps/chosen": -558.6735229492188, |
|
"logps/rejected": -1291.3309326171875, |
|
"loss": 0.073, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1368141919374466, |
|
"rewards/margins": 0.3053056597709656, |
|
"rewards/rejected": -0.44211989641189575, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.50530798188761e-06, |
|
"logits/chosen": -1.625422477722168, |
|
"logits/rejected": -0.9810975790023804, |
|
"logps/chosen": -609.5338134765625, |
|
"logps/rejected": -1308.818115234375, |
|
"loss": 0.0728, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.09720011800527573, |
|
"rewards/margins": 0.30527880787849426, |
|
"rewards/rejected": -0.4024789333343506, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.48780199123712e-06, |
|
"logits/chosen": -1.5866591930389404, |
|
"logits/rejected": -1.1791462898254395, |
|
"logps/chosen": -654.497314453125, |
|
"logps/rejected": -1295.8955078125, |
|
"loss": 0.1033, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1393161565065384, |
|
"rewards/margins": 0.2792035937309265, |
|
"rewards/rejected": -0.4185197353363037, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.470026884016805e-06, |
|
"logits/chosen": -1.6597150564193726, |
|
"logits/rejected": -1.071593999862671, |
|
"logps/chosen": -675.8863525390625, |
|
"logps/rejected": -1333.055908203125, |
|
"loss": 0.0584, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.16408918797969818, |
|
"rewards/margins": 0.300179660320282, |
|
"rewards/rejected": -0.46426883339881897, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.451985066691649e-06, |
|
"logits/chosen": -1.533385992050171, |
|
"logits/rejected": -0.9318403005599976, |
|
"logps/chosen": -672.769287109375, |
|
"logps/rejected": -1254.0054931640625, |
|
"loss": 0.0782, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18980053067207336, |
|
"rewards/margins": 0.24227580428123474, |
|
"rewards/rejected": -0.4320763647556305, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4336789818349105e-06, |
|
"logits/chosen": -1.9593079090118408, |
|
"logits/rejected": -1.2628549337387085, |
|
"logps/chosen": -551.6669311523438, |
|
"logps/rejected": -1144.910888671875, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11993531137704849, |
|
"rewards/margins": 0.2696130871772766, |
|
"rewards/rejected": -0.3895483613014221, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.415111107797445e-06, |
|
"logits/chosen": -1.7251970767974854, |
|
"logits/rejected": -1.1421195268630981, |
|
"logps/chosen": -561.43212890625, |
|
"logps/rejected": -1138.663818359375, |
|
"loss": 0.0933, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.08620895445346832, |
|
"rewards/margins": 0.272499144077301, |
|
"rewards/rejected": -0.35870811343193054, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.396283958372173e-06, |
|
"logits/chosen": -1.675585150718689, |
|
"logits/rejected": -1.1132071018218994, |
|
"logps/chosen": -531.9155883789062, |
|
"logps/rejected": -1153.8314208984375, |
|
"loss": 0.0866, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06681212037801743, |
|
"rewards/margins": 0.28186994791030884, |
|
"rewards/rejected": -0.34868210554122925, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.377200082453748e-06, |
|
"logits/chosen": -1.6496893167495728, |
|
"logits/rejected": -1.0332069396972656, |
|
"logps/chosen": -485.20947265625, |
|
"logps/rejected": -1017.17138671875, |
|
"loss": 0.0822, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.057542771100997925, |
|
"rewards/margins": 0.2658589780330658, |
|
"rewards/rejected": -0.3234017789363861, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.357862063693486e-06, |
|
"logits/chosen": -1.699328064918518, |
|
"logits/rejected": -0.9203550219535828, |
|
"logps/chosen": -631.9972534179688, |
|
"logps/rejected": -1354.697509765625, |
|
"loss": 0.0503, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.16203594207763672, |
|
"rewards/margins": 0.3373655676841736, |
|
"rewards/rejected": -0.4994015097618103, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.338272520149572e-06, |
|
"logits/chosen": -1.18538498878479, |
|
"logits/rejected": -0.7905349731445312, |
|
"logps/chosen": -693.11474609375, |
|
"logps/rejected": -1318.9244384765625, |
|
"loss": 0.0951, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.20799028873443604, |
|
"rewards/margins": 0.2950304448604584, |
|
"rewards/rejected": -0.5030207633972168, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.318434103932622e-06, |
|
"logits/chosen": -1.5692521333694458, |
|
"logits/rejected": -1.009714961051941, |
|
"logps/chosen": -633.1151123046875, |
|
"logps/rejected": -1297.8974609375, |
|
"loss": 0.0691, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19843876361846924, |
|
"rewards/margins": 0.31439024209976196, |
|
"rewards/rejected": -0.5128290057182312, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"logits/chosen": -1.5247453451156616, |
|
"logits/rejected": -0.8545368313789368, |
|
"logps/chosen": -540.1340942382812, |
|
"logps/rejected": -1173.1444091796875, |
|
"loss": 0.0566, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09455517679452896, |
|
"rewards/margins": 0.2999194264411926, |
|
"rewards/rejected": -0.394474595785141, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.278021430025343e-06, |
|
"logits/chosen": -1.7150371074676514, |
|
"logits/rejected": -0.8671354055404663, |
|
"logps/chosen": -531.0009155273438, |
|
"logps/rejected": -1244.0396728515625, |
|
"loss": 0.0531, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11200281232595444, |
|
"rewards/margins": 0.33397001028060913, |
|
"rewards/rejected": -0.44597283005714417, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.257452643564155e-06, |
|
"logits/chosen": -1.7381508350372314, |
|
"logits/rejected": -0.9717987179756165, |
|
"logps/chosen": -660.3062744140625, |
|
"logps/rejected": -1395.575927734375, |
|
"loss": 0.0381, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.16878202557563782, |
|
"rewards/margins": 0.35611775517463684, |
|
"rewards/rejected": -0.5248997807502747, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.236645926147493e-06, |
|
"logits/chosen": -1.6484124660491943, |
|
"logits/rejected": -1.1722246408462524, |
|
"logps/chosen": -589.8314208984375, |
|
"logps/rejected": -1167.5753173828125, |
|
"loss": 0.0772, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18166238069534302, |
|
"rewards/margins": 0.2549310624599457, |
|
"rewards/rejected": -0.4365933835506439, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.215604094671835e-06, |
|
"logits/chosen": -1.650120496749878, |
|
"logits/rejected": -1.1718031167984009, |
|
"logps/chosen": -673.8374633789062, |
|
"logps/rejected": -1340.0517578125, |
|
"loss": 0.0901, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.21558384597301483, |
|
"rewards/margins": 0.26856911182403564, |
|
"rewards/rejected": -0.4841529428958893, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.194329997864331e-06, |
|
"logits/chosen": -1.612789511680603, |
|
"logits/rejected": -1.0092923641204834, |
|
"logps/chosen": -659.1913452148438, |
|
"logps/rejected": -1288.3807373046875, |
|
"loss": 0.0598, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1437804251909256, |
|
"rewards/margins": 0.31068724393844604, |
|
"rewards/rejected": -0.45446768403053284, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.172826515897146e-06, |
|
"logits/chosen": -1.5820199251174927, |
|
"logits/rejected": -1.1678297519683838, |
|
"logps/chosen": -549.3736572265625, |
|
"logps/rejected": -1264.0751953125, |
|
"loss": 0.0723, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12197370827198029, |
|
"rewards/margins": 0.32163676619529724, |
|
"rewards/rejected": -0.4436105191707611, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.151096559997519e-06, |
|
"logits/chosen": -1.5460295677185059, |
|
"logits/rejected": -0.9339001774787903, |
|
"logps/chosen": -657.1842041015625, |
|
"logps/rejected": -1248.517822265625, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17015226185321808, |
|
"rewards/margins": 0.2676629424095154, |
|
"rewards/rejected": -0.43781518936157227, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.129143072053639e-06, |
|
"logits/chosen": -1.3850871324539185, |
|
"logits/rejected": -0.6763152480125427, |
|
"logps/chosen": -712.0982666015625, |
|
"logps/rejected": -1295.492431640625, |
|
"loss": 0.0918, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.24367225170135498, |
|
"rewards/margins": 0.27682679891586304, |
|
"rewards/rejected": -0.5204989910125732, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.106969024216348e-06, |
|
"logits/chosen": -1.5604678392410278, |
|
"logits/rejected": -1.1585630178451538, |
|
"logps/chosen": -631.67041015625, |
|
"logps/rejected": -1394.1226806640625, |
|
"loss": 0.0878, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.22048361599445343, |
|
"rewards/margins": 0.31476694345474243, |
|
"rewards/rejected": -0.5352505445480347, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.084577418496775e-06, |
|
"logits/chosen": -1.5729732513427734, |
|
"logits/rejected": -0.983799159526825, |
|
"logps/chosen": -644.2813720703125, |
|
"logps/rejected": -1280.1788330078125, |
|
"loss": 0.067, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21176350116729736, |
|
"rewards/margins": 0.33453696966171265, |
|
"rewards/rejected": -0.54630047082901, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0619712863599005e-06, |
|
"logits/chosen": -1.5669440031051636, |
|
"logits/rejected": -1.1483709812164307, |
|
"logps/chosen": -664.9472045898438, |
|
"logps/rejected": -1134.748046875, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2157289683818817, |
|
"rewards/margins": 0.22911766171455383, |
|
"rewards/rejected": -0.44484663009643555, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.039153688314146e-06, |
|
"logits/chosen": -1.594029188156128, |
|
"logits/rejected": -1.106323003768921, |
|
"logps/chosen": -597.1380615234375, |
|
"logps/rejected": -1102.068115234375, |
|
"loss": 0.1373, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17422744631767273, |
|
"rewards/margins": 0.21986624598503113, |
|
"rewards/rejected": -0.39409369230270386, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.016127713497034e-06, |
|
"logits/chosen": -1.6402870416641235, |
|
"logits/rejected": -0.9547089338302612, |
|
"logps/chosen": -685.6248168945312, |
|
"logps/rejected": -1294.0582275390625, |
|
"loss": 0.0874, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16791805624961853, |
|
"rewards/margins": 0.27242207527160645, |
|
"rewards/rejected": -0.440340131521225, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.992896479256966e-06, |
|
"logits/chosen": -1.5232279300689697, |
|
"logits/rejected": -1.086789608001709, |
|
"logps/chosen": -680.0406494140625, |
|
"logps/rejected": -1135.125, |
|
"loss": 0.1149, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.22198696434497833, |
|
"rewards/margins": 0.21400491893291473, |
|
"rewards/rejected": -0.43599191308021545, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.969463130731183e-06, |
|
"logits/chosen": -1.3749518394470215, |
|
"logits/rejected": -0.9445620775222778, |
|
"logps/chosen": -614.1600952148438, |
|
"logps/rejected": -1332.16650390625, |
|
"loss": 0.0712, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16799691319465637, |
|
"rewards/margins": 0.3212730288505554, |
|
"rewards/rejected": -0.4892699122428894, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.945830840419966e-06, |
|
"logits/chosen": -1.5307501554489136, |
|
"logits/rejected": -0.9613451957702637, |
|
"logps/chosen": -572.4707641601562, |
|
"logps/rejected": -1237.0767822265625, |
|
"loss": 0.0536, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14543718099594116, |
|
"rewards/margins": 0.29924580454826355, |
|
"rewards/rejected": -0.4446829855442047, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92200280775713e-06, |
|
"logits/chosen": -1.3332005739212036, |
|
"logits/rejected": -0.8951355218887329, |
|
"logps/chosen": -686.0225830078125, |
|
"logps/rejected": -1341.870361328125, |
|
"loss": 0.0835, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.22055836021900177, |
|
"rewards/margins": 0.28799134492874146, |
|
"rewards/rejected": -0.508549690246582, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.897982258676867e-06, |
|
"logits/chosen": -1.2923948764801025, |
|
"logits/rejected": -1.0148731470108032, |
|
"logps/chosen": -460.26336669921875, |
|
"logps/rejected": -1217.972412109375, |
|
"loss": 0.067, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09934146702289581, |
|
"rewards/margins": 0.30596452951431274, |
|
"rewards/rejected": -0.40530601143836975, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8737724451770155e-06, |
|
"logits/chosen": -1.660675287246704, |
|
"logits/rejected": -0.7988176345825195, |
|
"logps/chosen": -664.8593139648438, |
|
"logps/rejected": -1191.18017578125, |
|
"loss": 0.0708, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12432744354009628, |
|
"rewards/margins": 0.2866395115852356, |
|
"rewards/rejected": -0.4109669625759125, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.849376644878783e-06, |
|
"logits/chosen": -1.7713876962661743, |
|
"logits/rejected": -0.9808756113052368, |
|
"logps/chosen": -621.3865356445312, |
|
"logps/rejected": -1367.19482421875, |
|
"loss": 0.0447, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1509043574333191, |
|
"rewards/margins": 0.34973520040512085, |
|
"rewards/rejected": -0.5006396174430847, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.824798160583012e-06, |
|
"logits/chosen": -1.5198707580566406, |
|
"logits/rejected": -0.90589439868927, |
|
"logps/chosen": -679.7938232421875, |
|
"logps/rejected": -1217.0797119140625, |
|
"loss": 0.0979, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.20723366737365723, |
|
"rewards/margins": 0.2676594853401184, |
|
"rewards/rejected": -0.47489315271377563, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8000403198230385e-06, |
|
"logits/chosen": -1.8150522708892822, |
|
"logits/rejected": -0.9454204440116882, |
|
"logps/chosen": -640.8989868164062, |
|
"logps/rejected": -1308.7498779296875, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12217221409082413, |
|
"rewards/margins": 0.3217760920524597, |
|
"rewards/rejected": -0.44394832849502563, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7751064744141886e-06, |
|
"logits/chosen": -1.1891100406646729, |
|
"logits/rejected": -1.013519525527954, |
|
"logps/chosen": -462.0289001464844, |
|
"logps/rejected": -1144.2987060546875, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.057515986263751984, |
|
"rewards/margins": 0.3108125925064087, |
|
"rewards/rejected": -0.36832860112190247, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -1.53715980052948, |
|
"logits/rejected": -1.0485864877700806, |
|
"logps/chosen": -620.6614379882812, |
|
"logps/rejected": -1361.7210693359375, |
|
"loss": 0.0534, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.14007523655891418, |
|
"rewards/margins": 0.3020576536655426, |
|
"rewards/rejected": -0.4421328604221344, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.724724295595218e-06, |
|
"logits/chosen": -1.4643341302871704, |
|
"logits/rejected": -0.7922446131706238, |
|
"logps/chosen": -477.1775817871094, |
|
"logps/rejected": -1089.298583984375, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.0700678676366806, |
|
"rewards/margins": 0.27943548560142517, |
|
"rewards/rejected": -0.3495033383369446, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.699282783125616e-06, |
|
"logits/chosen": -1.6988475322723389, |
|
"logits/rejected": -1.08052396774292, |
|
"logps/chosen": -519.7030029296875, |
|
"logps/rejected": -1169.624755859375, |
|
"loss": 0.0815, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.10375889390707016, |
|
"rewards/margins": 0.2844247817993164, |
|
"rewards/rejected": -0.3881836533546448, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6736789069647273e-06, |
|
"logits/chosen": -1.5467779636383057, |
|
"logits/rejected": -0.878455638885498, |
|
"logps/chosen": -689.0154418945312, |
|
"logps/rejected": -1260.757568359375, |
|
"loss": 0.0964, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.20392462611198425, |
|
"rewards/margins": 0.26304885745048523, |
|
"rewards/rejected": -0.46697354316711426, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6479161334675294e-06, |
|
"logits/chosen": -1.4715616703033447, |
|
"logits/rejected": -0.9548036456108093, |
|
"logps/chosen": -503.00982666015625, |
|
"logps/rejected": -1227.5513916015625, |
|
"loss": 0.06, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09338453412055969, |
|
"rewards/margins": 0.32899850606918335, |
|
"rewards/rejected": -0.42238301038742065, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.621997950501156e-06, |
|
"logits/chosen": -1.564557433128357, |
|
"logits/rejected": -1.0119483470916748, |
|
"logps/chosen": -533.0374145507812, |
|
"logps/rejected": -1133.299072265625, |
|
"loss": 0.0832, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.12650765478610992, |
|
"rewards/margins": 0.2633581757545471, |
|
"rewards/rejected": -0.38986584544181824, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.595927866972694e-06, |
|
"logits/chosen": -1.5929651260375977, |
|
"logits/rejected": -0.946239173412323, |
|
"logps/chosen": -796.1531982421875, |
|
"logps/rejected": -1373.961181640625, |
|
"loss": 0.1067, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.258171945810318, |
|
"rewards/margins": 0.25018230080604553, |
|
"rewards/rejected": -0.5083541870117188, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5697094123541357e-06, |
|
"logits/chosen": -1.3169076442718506, |
|
"logits/rejected": -0.6554785370826721, |
|
"logps/chosen": -671.5099487304688, |
|
"logps/rejected": -1274.65576171875, |
|
"loss": 0.072, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.23343141376972198, |
|
"rewards/margins": 0.258681058883667, |
|
"rewards/rejected": -0.49211248755455017, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543346136204545e-06, |
|
"logits/chosen": -1.5627593994140625, |
|
"logits/rejected": -1.0817673206329346, |
|
"logps/chosen": -626.169189453125, |
|
"logps/rejected": -1331.075439453125, |
|
"loss": 0.0776, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.23857741057872772, |
|
"rewards/margins": 0.29800939559936523, |
|
"rewards/rejected": -0.5365867614746094, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.516841607689501e-06, |
|
"logits/chosen": -1.4650442600250244, |
|
"logits/rejected": -0.9328089952468872, |
|
"logps/chosen": -621.0172729492188, |
|
"logps/rejected": -1351.593017578125, |
|
"loss": 0.0719, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.204871267080307, |
|
"rewards/margins": 0.3157797157764435, |
|
"rewards/rejected": -0.5206509828567505, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4901994150978926e-06, |
|
"logits/chosen": -1.4772984981536865, |
|
"logits/rejected": -1.0536553859710693, |
|
"logps/chosen": -532.893310546875, |
|
"logps/rejected": -1081.2454833984375, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09208095073699951, |
|
"rewards/margins": 0.2642812132835388, |
|
"rewards/rejected": -0.35636216402053833, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4634231653561213e-06, |
|
"logits/chosen": -1.5390266180038452, |
|
"logits/rejected": -0.9685665965080261, |
|
"logps/chosen": -569.3391723632812, |
|
"logps/rejected": -1077.9581298828125, |
|
"loss": 0.0826, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08572982251644135, |
|
"rewards/margins": 0.25851181149482727, |
|
"rewards/rejected": -0.3442416191101074, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.436516483539781e-06, |
|
"logits/chosen": -1.7010656595230103, |
|
"logits/rejected": -1.0056097507476807, |
|
"logps/chosen": -629.4781494140625, |
|
"logps/rejected": -1221.6732177734375, |
|
"loss": 0.0792, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13440915942192078, |
|
"rewards/margins": 0.266291081905365, |
|
"rewards/rejected": -0.40070027112960815, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.409483012382879e-06, |
|
"logits/chosen": -1.8139673471450806, |
|
"logits/rejected": -1.0937250852584839, |
|
"logps/chosen": -542.4208984375, |
|
"logps/rejected": -1275.978271484375, |
|
"loss": 0.0647, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12924353778362274, |
|
"rewards/margins": 0.3123559355735779, |
|
"rewards/rejected": -0.44159942865371704, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3823264117846722e-06, |
|
"logits/chosen": -1.34089195728302, |
|
"logits/rejected": -1.0331387519836426, |
|
"logps/chosen": -669.740966796875, |
|
"logps/rejected": -1183.39404296875, |
|
"loss": 0.0927, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16455522179603577, |
|
"rewards/margins": 0.24129804968833923, |
|
"rewards/rejected": -0.4058533310890198, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": -1.4803988933563232, |
|
"logits/rejected": -1.2195205688476562, |
|
"logps/chosen": -631.5969848632812, |
|
"logps/rejected": -1318.831787109375, |
|
"loss": 0.0847, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.24165017902851105, |
|
"rewards/margins": 0.2722802758216858, |
|
"rewards/rejected": -0.5139304399490356, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3276585447123957e-06, |
|
"logits/chosen": -1.698537826538086, |
|
"logits/rejected": -1.1106715202331543, |
|
"logps/chosen": -758.1787719726562, |
|
"logps/rejected": -1358.7789306640625, |
|
"loss": 0.0781, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2571624517440796, |
|
"rewards/margins": 0.247967928647995, |
|
"rewards/rejected": -0.5051303505897522, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.300154679392429e-06, |
|
"logits/chosen": -1.565079927444458, |
|
"logits/rejected": -0.8933359980583191, |
|
"logps/chosen": -655.6455078125, |
|
"logps/rejected": -1246.6724853515625, |
|
"loss": 0.0855, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2172234058380127, |
|
"rewards/margins": 0.259296715259552, |
|
"rewards/rejected": -0.4765201508998871, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.272542485937369e-06, |
|
"logits/chosen": -1.7766815423965454, |
|
"logits/rejected": -1.0995599031448364, |
|
"logps/chosen": -622.3171997070312, |
|
"logps/rejected": -1246.709716796875, |
|
"loss": 0.0775, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16058366000652313, |
|
"rewards/margins": 0.2945863604545593, |
|
"rewards/rejected": -0.45517000555992126, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.244825702596205e-06, |
|
"logits/chosen": -1.6319786310195923, |
|
"logits/rejected": -1.2775378227233887, |
|
"logps/chosen": -543.9849853515625, |
|
"logps/rejected": -1321.574462890625, |
|
"loss": 0.0694, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13428989052772522, |
|
"rewards/margins": 0.3316313922405243, |
|
"rewards/rejected": -0.4659213125705719, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.217008081777726e-06, |
|
"logits/chosen": -1.6392805576324463, |
|
"logits/rejected": -1.2436825037002563, |
|
"logps/chosen": -662.4228515625, |
|
"logps/rejected": -1163.439208984375, |
|
"loss": 0.1084, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1475670039653778, |
|
"rewards/margins": 0.2356584370136261, |
|
"rewards/rejected": -0.38322538137435913, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.189093389542498e-06, |
|
"logits/chosen": -1.8173353672027588, |
|
"logits/rejected": -0.9919673204421997, |
|
"logps/chosen": -665.50537109375, |
|
"logps/rejected": -1430.367431640625, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.16980603337287903, |
|
"rewards/margins": 0.3760393559932709, |
|
"rewards/rejected": -0.5458453893661499, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1610854050930063e-06, |
|
"logits/chosen": -1.5377254486083984, |
|
"logits/rejected": -0.8976453542709351, |
|
"logps/chosen": -662.4138793945312, |
|
"logps/rejected": -1309.8470458984375, |
|
"loss": 0.0547, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.20192308723926544, |
|
"rewards/margins": 0.30307039618492126, |
|
"rewards/rejected": -0.5049934983253479, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.132987920262005e-06, |
|
"logits/chosen": -1.5729992389678955, |
|
"logits/rejected": -0.892600417137146, |
|
"logps/chosen": -780.0401000976562, |
|
"logps/rejected": -1402.983154296875, |
|
"loss": 0.0667, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18477649986743927, |
|
"rewards/margins": 0.33369770646095276, |
|
"rewards/rejected": -0.5184742212295532, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"logits/chosen": -1.5943243503570557, |
|
"logits/rejected": -0.9319471120834351, |
|
"logps/chosen": -730.2401733398438, |
|
"logps/rejected": -1355.317626953125, |
|
"loss": 0.0786, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1811259537935257, |
|
"rewards/margins": 0.3043223023414612, |
|
"rewards/rejected": -0.48544827103614807, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0765396768561005e-06, |
|
"logits/chosen": -1.8524019718170166, |
|
"logits/rejected": -0.9115484356880188, |
|
"logps/chosen": -643.4202270507812, |
|
"logps/rejected": -1369.478271484375, |
|
"loss": 0.0631, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1561841517686844, |
|
"rewards/margins": 0.35857564210891724, |
|
"rewards/rejected": -0.5147597193717957, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0481965604697582e-06, |
|
"logits/chosen": -1.8506492376327515, |
|
"logits/rejected": -1.3686341047286987, |
|
"logps/chosen": -625.1241455078125, |
|
"logps/rejected": -1145.7611083984375, |
|
"loss": 0.0856, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.14623615145683289, |
|
"rewards/margins": 0.25150930881500244, |
|
"rewards/rejected": -0.39774543046951294, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.019779227044398e-06, |
|
"logits/chosen": -1.5452914237976074, |
|
"logits/rejected": -1.0970354080200195, |
|
"logps/chosen": -576.4892578125, |
|
"logps/rejected": -1214.838623046875, |
|
"loss": 0.054, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15811112523078918, |
|
"rewards/margins": 0.29070132970809937, |
|
"rewards/rejected": -0.44881248474121094, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9912915238320755e-06, |
|
"logits/chosen": -1.5780893564224243, |
|
"logits/rejected": -1.1927622556686401, |
|
"logps/chosen": -574.8433227539062, |
|
"logps/rejected": -1284.197998046875, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14699247479438782, |
|
"rewards/margins": 0.30907437205314636, |
|
"rewards/rejected": -0.4560668468475342, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9627373076117864e-06, |
|
"logits/chosen": -1.6259567737579346, |
|
"logits/rejected": -1.0762062072753906, |
|
"logps/chosen": -532.4714965820312, |
|
"logps/rejected": -1246.502685546875, |
|
"loss": 0.0554, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15746064484119415, |
|
"rewards/margins": 0.3116622865200043, |
|
"rewards/rejected": -0.4691229462623596, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits/chosen": -1.4439929723739624, |
|
"logits/rejected": -1.0250179767608643, |
|
"logps/chosen": -622.338134765625, |
|
"logps/rejected": -1203.972412109375, |
|
"loss": 0.0955, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17773118615150452, |
|
"rewards/margins": 0.2580625116825104, |
|
"rewards/rejected": -0.4357937276363373, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9054448077639193e-06, |
|
"logits/chosen": -1.5762206315994263, |
|
"logits/rejected": -0.9138490557670593, |
|
"logps/chosen": -586.79345703125, |
|
"logps/rejected": -1128.914794921875, |
|
"loss": 0.1417, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.16046829521656036, |
|
"rewards/margins": 0.23752140998840332, |
|
"rewards/rejected": -0.3979896903038025, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.876714280623708e-06, |
|
"logits/chosen": -1.5804052352905273, |
|
"logits/rejected": -1.3289867639541626, |
|
"logps/chosen": -565.5819091796875, |
|
"logps/rejected": -1220.959228515625, |
|
"loss": 0.0819, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16751018166542053, |
|
"rewards/margins": 0.2462599277496338, |
|
"rewards/rejected": -0.4137701392173767, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.847932752400164e-06, |
|
"logits/chosen": -1.6598694324493408, |
|
"logits/rejected": -1.195133090019226, |
|
"logps/chosen": -587.2601318359375, |
|
"logps/rejected": -1277.15283203125, |
|
"loss": 0.0684, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12616164982318878, |
|
"rewards/margins": 0.29237666726112366, |
|
"rewards/rejected": -0.41853827238082886, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8191041196514874e-06, |
|
"logits/chosen": -1.658347725868225, |
|
"logits/rejected": -0.909757137298584, |
|
"logps/chosen": -614.3128662109375, |
|
"logps/rejected": -1271.3431396484375, |
|
"loss": 0.0714, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.14127101004123688, |
|
"rewards/margins": 0.2985311448574066, |
|
"rewards/rejected": -0.43980222940444946, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7902322853130758e-06, |
|
"logits/chosen": -1.7248843908309937, |
|
"logits/rejected": -1.4133408069610596, |
|
"logps/chosen": -530.6253662109375, |
|
"logps/rejected": -1091.8160400390625, |
|
"loss": 0.0698, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12663719058036804, |
|
"rewards/margins": 0.252753883600235, |
|
"rewards/rejected": -0.3793911337852478, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.761321158169134e-06, |
|
"logits/chosen": -1.8168293237686157, |
|
"logits/rejected": -1.3112378120422363, |
|
"logps/chosen": -532.3922119140625, |
|
"logps/rejected": -1184.728515625, |
|
"loss": 0.065, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11639068275690079, |
|
"rewards/margins": 0.30524441599845886, |
|
"rewards/rejected": -0.42163509130477905, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7323746523234813e-06, |
|
"logits/chosen": -1.6596038341522217, |
|
"logits/rejected": -1.0099501609802246, |
|
"logps/chosen": -620.3221435546875, |
|
"logps/rejected": -1408.0771484375, |
|
"loss": 0.035, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.11354754865169525, |
|
"rewards/margins": 0.3631085455417633, |
|
"rewards/rejected": -0.47665613889694214, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.703396686669646e-06, |
|
"logits/chosen": -1.677464246749878, |
|
"logits/rejected": -1.2843691110610962, |
|
"logps/chosen": -601.0008544921875, |
|
"logps/rejected": -1275.202880859375, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11773763597011566, |
|
"rewards/margins": 0.30820992588996887, |
|
"rewards/rejected": -0.4259475767612457, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6743911843603134e-06, |
|
"logits/chosen": -1.915787696838379, |
|
"logits/rejected": -0.9031229019165039, |
|
"logps/chosen": -596.1331176757812, |
|
"logps/rejected": -1234.94482421875, |
|
"loss": 0.0562, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11484452337026596, |
|
"rewards/margins": 0.32129010558128357, |
|
"rewards/rejected": -0.4361346364021301, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6453620722761897e-06, |
|
"logits/chosen": -1.800848364830017, |
|
"logits/rejected": -1.0119121074676514, |
|
"logps/chosen": -777.0930786132812, |
|
"logps/rejected": -1372.353271484375, |
|
"loss": 0.0545, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.13788454234600067, |
|
"rewards/margins": 0.31275519728660583, |
|
"rewards/rejected": -0.4506397843360901, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6163132804943675e-06, |
|
"logits/chosen": -1.7606815099716187, |
|
"logits/rejected": -1.1106458902359009, |
|
"logps/chosen": -619.3434448242188, |
|
"logps/rejected": -1193.679931640625, |
|
"loss": 0.0785, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1508094221353531, |
|
"rewards/margins": 0.26593416929244995, |
|
"rewards/rejected": -0.41674357652664185, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.587248741756253e-06, |
|
"logits/chosen": -1.8017308712005615, |
|
"logits/rejected": -1.0231897830963135, |
|
"logps/chosen": -573.2968139648438, |
|
"logps/rejected": -1028.872802734375, |
|
"loss": 0.0863, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.14274278283119202, |
|
"rewards/margins": 0.22178173065185547, |
|
"rewards/rejected": -0.3645245134830475, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.558172390935141e-06, |
|
"logits/chosen": -1.8495060205459595, |
|
"logits/rejected": -0.9550938606262207, |
|
"logps/chosen": -701.3528442382812, |
|
"logps/rejected": -1321.211181640625, |
|
"loss": 0.0576, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.20430326461791992, |
|
"rewards/margins": 0.292900025844574, |
|
"rewards/rejected": -0.4972032606601715, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5290881645034932e-06, |
|
"logits/chosen": -1.613519310951233, |
|
"logits/rejected": -1.0299922227859497, |
|
"logps/chosen": -682.4573974609375, |
|
"logps/rejected": -1327.379150390625, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2211301326751709, |
|
"rewards/margins": 0.2834627032279968, |
|
"rewards/rejected": -0.5045928359031677, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -1.8198554515838623, |
|
"logits/rejected": -1.2243192195892334, |
|
"logps/chosen": -673.7959594726562, |
|
"logps/rejected": -1209.7906494140625, |
|
"loss": 0.0879, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15559658408164978, |
|
"rewards/margins": 0.2655668258666992, |
|
"rewards/rejected": -0.4211633801460266, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.470911835496508e-06, |
|
"logits/chosen": -1.6637722253799438, |
|
"logits/rejected": -1.15647292137146, |
|
"logps/chosen": -611.1282348632812, |
|
"logps/rejected": -1223.1854248046875, |
|
"loss": 0.0801, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1450941264629364, |
|
"rewards/margins": 0.2773655652999878, |
|
"rewards/rejected": -0.4224596917629242, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4418276090648596e-06, |
|
"logits/chosen": -1.8464033603668213, |
|
"logits/rejected": -1.052440881729126, |
|
"logps/chosen": -644.9616088867188, |
|
"logps/rejected": -1258.923095703125, |
|
"loss": 0.0874, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1209816113114357, |
|
"rewards/margins": 0.2808608412742615, |
|
"rewards/rejected": -0.40184250473976135, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"logits/chosen": -1.685725212097168, |
|
"logits/rejected": -1.0185719728469849, |
|
"logps/chosen": -620.1656494140625, |
|
"logps/rejected": -1276.2811279296875, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14368347823619843, |
|
"rewards/margins": 0.2920317053794861, |
|
"rewards/rejected": -0.4357151985168457, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3836867195056333e-06, |
|
"logits/chosen": -1.6978428363800049, |
|
"logits/rejected": -1.209702730178833, |
|
"logps/chosen": -633.7965698242188, |
|
"logps/rejected": -1311.2330322265625, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15191814303398132, |
|
"rewards/margins": 0.2970329523086548, |
|
"rewards/rejected": -0.4489511549472809, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3546379277238107e-06, |
|
"logits/chosen": -1.4833507537841797, |
|
"logits/rejected": -1.0119761228561401, |
|
"logps/chosen": -680.2658081054688, |
|
"logps/rejected": -1296.93212890625, |
|
"loss": 0.0718, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.23375864326953888, |
|
"rewards/margins": 0.2688060402870178, |
|
"rewards/rejected": -0.5025646686553955, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.325608815639687e-06, |
|
"logits/chosen": -1.6255954504013062, |
|
"logits/rejected": -0.9894803166389465, |
|
"logps/chosen": -608.648681640625, |
|
"logps/rejected": -1257.4925537109375, |
|
"loss": 0.0806, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.19161316752433777, |
|
"rewards/margins": 0.27543801069259644, |
|
"rewards/rejected": -0.46705111861228943, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.296603313330355e-06, |
|
"logits/chosen": -1.3378230333328247, |
|
"logits/rejected": -0.8268178105354309, |
|
"logps/chosen": -624.6492309570312, |
|
"logps/rejected": -1211.5384521484375, |
|
"loss": 0.1075, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17298360168933868, |
|
"rewards/margins": 0.25485867261886597, |
|
"rewards/rejected": -0.42784222960472107, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.26762534767652e-06, |
|
"logits/chosen": -1.7436068058013916, |
|
"logits/rejected": -1.0120248794555664, |
|
"logps/chosen": -667.2508544921875, |
|
"logps/rejected": -1316.541015625, |
|
"loss": 0.0723, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.18062946200370789, |
|
"rewards/margins": 0.2949804961681366, |
|
"rewards/rejected": -0.47561001777648926, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.238678841830867e-06, |
|
"logits/chosen": -1.743772268295288, |
|
"logits/rejected": -0.9547621607780457, |
|
"logps/chosen": -719.8886108398438, |
|
"logps/rejected": -1386.740478515625, |
|
"loss": 0.0533, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1676359474658966, |
|
"rewards/margins": 0.31254929304122925, |
|
"rewards/rejected": -0.48018521070480347, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2097677146869242e-06, |
|
"logits/chosen": -1.7221221923828125, |
|
"logits/rejected": -1.1789166927337646, |
|
"logps/chosen": -515.1943359375, |
|
"logps/rejected": -1109.064453125, |
|
"loss": 0.0696, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11262670904397964, |
|
"rewards/margins": 0.2698899805545807, |
|
"rewards/rejected": -0.38251665234565735, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1808958803485134e-06, |
|
"logits/chosen": -1.7244141101837158, |
|
"logits/rejected": -1.0081803798675537, |
|
"logps/chosen": -617.8436889648438, |
|
"logps/rejected": -1301.739990234375, |
|
"loss": 0.0449, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.13489536941051483, |
|
"rewards/margins": 0.30581116676330566, |
|
"rewards/rejected": -0.4407065510749817, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1520672475998374e-06, |
|
"logits/chosen": -1.5150662660598755, |
|
"logits/rejected": -1.2830677032470703, |
|
"logps/chosen": -469.53497314453125, |
|
"logps/rejected": -1185.6640625, |
|
"loss": 0.1188, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12466877698898315, |
|
"rewards/margins": 0.25860312581062317, |
|
"rewards/rejected": -0.3832719027996063, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1232857193762923e-06, |
|
"logits/chosen": -1.6666500568389893, |
|
"logits/rejected": -1.1478335857391357, |
|
"logps/chosen": -630.8321533203125, |
|
"logps/rejected": -1219.052978515625, |
|
"loss": 0.0988, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14840959012508392, |
|
"rewards/margins": 0.2695569396018982, |
|
"rewards/rejected": -0.4179665148258209, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.094555192236082e-06, |
|
"logits/chosen": -1.6702663898468018, |
|
"logits/rejected": -1.1331311464309692, |
|
"logps/chosen": -639.830322265625, |
|
"logps/rejected": -1440.0772705078125, |
|
"loss": 0.0343, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.16856852173805237, |
|
"rewards/margins": 0.34757930040359497, |
|
"rewards/rejected": -0.5161478519439697, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -1.3389908075332642, |
|
"logits/rejected": -1.1332718133926392, |
|
"logps/chosen": -560.32763671875, |
|
"logps/rejected": -1140.04541015625, |
|
"loss": 0.0962, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18182942271232605, |
|
"rewards/margins": 0.22485823929309845, |
|
"rewards/rejected": -0.4066876769065857, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.037262692388214e-06, |
|
"logits/chosen": -1.5946425199508667, |
|
"logits/rejected": -1.050821304321289, |
|
"logps/chosen": -601.1361083984375, |
|
"logps/rejected": -1288.375244140625, |
|
"loss": 0.0656, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.13252010941505432, |
|
"rewards/margins": 0.3152475953102112, |
|
"rewards/rejected": -0.4477676749229431, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0087084761679245e-06, |
|
"logits/chosen": -1.9240398406982422, |
|
"logits/rejected": -1.0551602840423584, |
|
"logps/chosen": -725.8641967773438, |
|
"logps/rejected": -1185.0716552734375, |
|
"loss": 0.0752, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1920987367630005, |
|
"rewards/margins": 0.24843771755695343, |
|
"rewards/rejected": -0.4405364394187927, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9802207729556023e-06, |
|
"logits/chosen": -1.827103853225708, |
|
"logits/rejected": -1.2141703367233276, |
|
"logps/chosen": -591.169677734375, |
|
"logps/rejected": -1252.523193359375, |
|
"loss": 0.0783, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18946315348148346, |
|
"rewards/margins": 0.2804568111896515, |
|
"rewards/rejected": -0.46991997957229614, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9518034395302413e-06, |
|
"logits/chosen": -1.4736969470977783, |
|
"logits/rejected": -0.9016133546829224, |
|
"logps/chosen": -606.1239013671875, |
|
"logps/rejected": -1232.34912109375, |
|
"loss": 0.0613, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16971653699874878, |
|
"rewards/margins": 0.3104923367500305, |
|
"rewards/rejected": -0.4802088141441345, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9234603231439e-06, |
|
"logits/chosen": -1.7311079502105713, |
|
"logits/rejected": -1.0143775939941406, |
|
"logps/chosen": -625.5157470703125, |
|
"logps/rejected": -1218.10693359375, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12972071766853333, |
|
"rewards/margins": 0.30200275778770447, |
|
"rewards/rejected": -0.4317234456539154, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.895195261000831e-06, |
|
"logits/chosen": -1.702005386352539, |
|
"logits/rejected": -1.2821683883666992, |
|
"logps/chosen": -580.2069091796875, |
|
"logps/rejected": -1129.827880859375, |
|
"loss": 0.092, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12076668441295624, |
|
"rewards/margins": 0.2600669264793396, |
|
"rewards/rejected": -0.38083356618881226, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8670120797379958e-06, |
|
"logits/chosen": -1.5771738290786743, |
|
"logits/rejected": -1.2320917844772339, |
|
"logps/chosen": -590.98046875, |
|
"logps/rejected": -1251.115234375, |
|
"loss": 0.1923, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14493119716644287, |
|
"rewards/margins": 0.3147684633731842, |
|
"rewards/rejected": -0.4596996307373047, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8389145949069953e-06, |
|
"logits/chosen": -1.7696335315704346, |
|
"logits/rejected": -1.096189260482788, |
|
"logps/chosen": -578.0641479492188, |
|
"logps/rejected": -1174.998291015625, |
|
"loss": 0.0784, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.13082124292850494, |
|
"rewards/margins": 0.29184988141059875, |
|
"rewards/rejected": -0.4226710796356201, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8109066104575023e-06, |
|
"logits/chosen": -1.5320732593536377, |
|
"logits/rejected": -0.7170606851577759, |
|
"logps/chosen": -602.5262451171875, |
|
"logps/rejected": -1216.552978515625, |
|
"loss": 0.0593, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13688640296459198, |
|
"rewards/margins": 0.3167448043823242, |
|
"rewards/rejected": -0.4536312222480774, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7829919182222752e-06, |
|
"logits/chosen": -1.6282037496566772, |
|
"logits/rejected": -0.7399900555610657, |
|
"logps/chosen": -665.1109008789062, |
|
"logps/rejected": -1210.0506591796875, |
|
"loss": 0.0832, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17592889070510864, |
|
"rewards/margins": 0.27498775720596313, |
|
"rewards/rejected": -0.4509166181087494, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.755174297403795e-06, |
|
"logits/chosen": -1.5313676595687866, |
|
"logits/rejected": -1.0872125625610352, |
|
"logps/chosen": -673.0841064453125, |
|
"logps/rejected": -1284.4078369140625, |
|
"loss": 0.0509, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.22001402080059052, |
|
"rewards/margins": 0.2925686240196228, |
|
"rewards/rejected": -0.5125826597213745, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -1.5627118349075317, |
|
"logits/rejected": -0.9476549029350281, |
|
"logps/chosen": -889.22509765625, |
|
"logps/rejected": -1250.838134765625, |
|
"loss": 0.1224, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.24946339428424835, |
|
"rewards/margins": 0.240512415766716, |
|
"rewards/rejected": -0.4899757504463196, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.699845320607571e-06, |
|
"logits/chosen": -1.3387973308563232, |
|
"logits/rejected": -0.737518310546875, |
|
"logps/chosen": -649.2557373046875, |
|
"logps/rejected": -1256.31103515625, |
|
"loss": 0.0787, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2079259604215622, |
|
"rewards/margins": 0.2849615216255188, |
|
"rewards/rejected": -0.4928874373435974, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6723414552876052e-06, |
|
"logits/chosen": -1.6167535781860352, |
|
"logits/rejected": -1.154432773590088, |
|
"logps/chosen": -748.0882568359375, |
|
"logps/rejected": -1344.591064453125, |
|
"loss": 0.0877, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2506067454814911, |
|
"rewards/margins": 0.25507742166519165, |
|
"rewards/rejected": -0.5056841969490051, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6449496416858285e-06, |
|
"logits/chosen": -1.4003807306289673, |
|
"logits/rejected": -0.9338014721870422, |
|
"logps/chosen": -809.3727416992188, |
|
"logps/rejected": -1325.3411865234375, |
|
"loss": 0.0916, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.31113409996032715, |
|
"rewards/margins": 0.2267119586467743, |
|
"rewards/rejected": -0.5378460884094238, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6176735882153284e-06, |
|
"logits/chosen": -1.6452484130859375, |
|
"logits/rejected": -0.8376502990722656, |
|
"logps/chosen": -785.1629028320312, |
|
"logps/rejected": -1453.1751708984375, |
|
"loss": 0.0648, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.24854576587677002, |
|
"rewards/margins": 0.31561970710754395, |
|
"rewards/rejected": -0.564165472984314, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5905169876171223e-06, |
|
"logits/chosen": -1.3292005062103271, |
|
"logits/rejected": -1.1126399040222168, |
|
"logps/chosen": -715.500244140625, |
|
"logps/rejected": -1252.599365234375, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2204395830631256, |
|
"rewards/margins": 0.24764451384544373, |
|
"rewards/rejected": -0.46808409690856934, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.56348351646022e-06, |
|
"logits/chosen": -1.7485458850860596, |
|
"logits/rejected": -1.010534405708313, |
|
"logps/chosen": -690.407958984375, |
|
"logps/rejected": -1369.098876953125, |
|
"loss": 0.068, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21612036228179932, |
|
"rewards/margins": 0.3184736371040344, |
|
"rewards/rejected": -0.534593939781189, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5365768346438798e-06, |
|
"logits/chosen": -1.5913209915161133, |
|
"logits/rejected": -0.9691961407661438, |
|
"logps/chosen": -642.9549560546875, |
|
"logps/rejected": -1401.8271484375, |
|
"loss": 0.0482, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17725911736488342, |
|
"rewards/margins": 0.3471994996070862, |
|
"rewards/rejected": -0.5244585871696472, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.509800584902108e-06, |
|
"logits/chosen": -1.489768147468567, |
|
"logits/rejected": -0.9495828747749329, |
|
"logps/chosen": -701.9700317382812, |
|
"logps/rejected": -1363.816650390625, |
|
"loss": 0.1071, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21182182431221008, |
|
"rewards/margins": 0.28239232301712036, |
|
"rewards/rejected": -0.4942142069339752, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4831583923105e-06, |
|
"logits/chosen": -1.5869534015655518, |
|
"logits/rejected": -0.7756304740905762, |
|
"logps/chosen": -702.1505737304688, |
|
"logps/rejected": -1240.6885986328125, |
|
"loss": 0.0858, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17973639070987701, |
|
"rewards/margins": 0.26760077476501465, |
|
"rewards/rejected": -0.44733715057373047, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4566538637954556e-06, |
|
"logits/chosen": -1.632197618484497, |
|
"logits/rejected": -1.0302485227584839, |
|
"logps/chosen": -545.2433471679688, |
|
"logps/rejected": -1228.961669921875, |
|
"loss": 0.0726, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15146581828594208, |
|
"rewards/margins": 0.30513542890548706, |
|
"rewards/rejected": -0.45660123229026794, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.430290587645865e-06, |
|
"logits/chosen": -1.6993017196655273, |
|
"logits/rejected": -1.0094877481460571, |
|
"logps/chosen": -642.3917236328125, |
|
"logps/rejected": -1230.881103515625, |
|
"loss": 0.075, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17171506583690643, |
|
"rewards/margins": 0.2653944194316864, |
|
"rewards/rejected": -0.43710947036743164, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"logits/chosen": -1.5952852964401245, |
|
"logits/rejected": -1.0270891189575195, |
|
"logps/chosen": -582.1309814453125, |
|
"logps/rejected": -1269.764892578125, |
|
"loss": 0.0746, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1761622130870819, |
|
"rewards/margins": 0.2915706932544708, |
|
"rewards/rejected": -0.46773290634155273, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3780020494988447e-06, |
|
"logits/chosen": -1.3955180644989014, |
|
"logits/rejected": -0.8177375793457031, |
|
"logps/chosen": -722.6862182617188, |
|
"logps/rejected": -1415.1123046875, |
|
"loss": 0.0664, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.24629633128643036, |
|
"rewards/margins": 0.30118346214294434, |
|
"rewards/rejected": -0.5474798083305359, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3520838665324704e-06, |
|
"logits/chosen": -1.5909086465835571, |
|
"logits/rejected": -1.0436404943466187, |
|
"logps/chosen": -642.248291015625, |
|
"logps/rejected": -1155.293212890625, |
|
"loss": 0.1028, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.233381986618042, |
|
"rewards/margins": 0.2285003662109375, |
|
"rewards/rejected": -0.4618823528289795, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3263210930352737e-06, |
|
"logits/chosen": -1.6532557010650635, |
|
"logits/rejected": -1.1368399858474731, |
|
"logps/chosen": -737.0447387695312, |
|
"logps/rejected": -1343.158203125, |
|
"loss": 0.076, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2329857349395752, |
|
"rewards/margins": 0.27646780014038086, |
|
"rewards/rejected": -0.5094535946846008, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3007172168743854e-06, |
|
"logits/chosen": -1.4727094173431396, |
|
"logits/rejected": -0.934604287147522, |
|
"logps/chosen": -640.7996826171875, |
|
"logps/rejected": -1427.772216796875, |
|
"loss": 0.0472, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.19586428999900818, |
|
"rewards/margins": 0.3154672384262085, |
|
"rewards/rejected": -0.5113314986228943, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.275275704404783e-06, |
|
"logits/chosen": -1.6787872314453125, |
|
"logits/rejected": -1.2128015756607056, |
|
"logps/chosen": -691.6954345703125, |
|
"logps/rejected": -1384.380859375, |
|
"loss": 0.0654, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.19925867021083832, |
|
"rewards/margins": 0.3383576273918152, |
|
"rewards/rejected": -0.5376163721084595, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2500000000000007e-06, |
|
"logits/chosen": -1.4302475452423096, |
|
"logits/rejected": -0.8775968551635742, |
|
"logps/chosen": -651.3941040039062, |
|
"logps/rejected": -1211.107177734375, |
|
"loss": 0.0971, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.20647192001342773, |
|
"rewards/margins": 0.2682201862335205, |
|
"rewards/rejected": -0.4746921956539154, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2248935255858118e-06, |
|
"logits/chosen": -1.6478124856948853, |
|
"logits/rejected": -0.8647529482841492, |
|
"logps/chosen": -657.9767456054688, |
|
"logps/rejected": -1445.98291015625, |
|
"loss": 0.0464, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1874004602432251, |
|
"rewards/margins": 0.3412622809410095, |
|
"rewards/rejected": -0.5286628007888794, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1999596801769617e-06, |
|
"logits/chosen": -1.3120667934417725, |
|
"logits/rejected": -0.8835352063179016, |
|
"logps/chosen": -623.9490356445312, |
|
"logps/rejected": -1219.7760009765625, |
|
"loss": 0.0814, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.20036788284778595, |
|
"rewards/margins": 0.27707821130752563, |
|
"rewards/rejected": -0.4774460792541504, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1752018394169882e-06, |
|
"logits/chosen": -1.7081232070922852, |
|
"logits/rejected": -0.9557887315750122, |
|
"logps/chosen": -739.8932495117188, |
|
"logps/rejected": -1354.7886962890625, |
|
"loss": 0.063, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1747320294380188, |
|
"rewards/margins": 0.3161642551422119, |
|
"rewards/rejected": -0.4908962845802307, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1506233551212186e-06, |
|
"logits/chosen": -1.655374526977539, |
|
"logits/rejected": -0.7405561208724976, |
|
"logps/chosen": -661.6314697265625, |
|
"logps/rejected": -1389.5576171875, |
|
"loss": 0.0551, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.17144644260406494, |
|
"rewards/margins": 0.3735453486442566, |
|
"rewards/rejected": -0.5449918508529663, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1262275548229852e-06, |
|
"logits/chosen": -1.257212519645691, |
|
"logits/rejected": -0.9685500860214233, |
|
"logps/chosen": -522.0648193359375, |
|
"logps/rejected": -1224.8739013671875, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16989552974700928, |
|
"rewards/margins": 0.29921796917915344, |
|
"rewards/rejected": -0.4691134989261627, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"logits/chosen": -1.2228362560272217, |
|
"logits/rejected": -0.7848671674728394, |
|
"logps/chosen": -745.6207275390625, |
|
"logps/rejected": -1389.057373046875, |
|
"loss": 0.0749, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.23939558863639832, |
|
"rewards/margins": 0.28681597113609314, |
|
"rewards/rejected": -0.5262116193771362, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.077997192242871e-06, |
|
"logits/chosen": -1.4660097360610962, |
|
"logits/rejected": -0.974969208240509, |
|
"logps/chosen": -637.7769165039062, |
|
"logps/rejected": -1316.93017578125, |
|
"loss": 0.065, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.15726247429847717, |
|
"rewards/margins": 0.31098154187202454, |
|
"rewards/rejected": -0.4682440757751465, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0541691595800338e-06, |
|
"logits/chosen": -1.7434594631195068, |
|
"logits/rejected": -0.9831005930900574, |
|
"logps/chosen": -688.5543212890625, |
|
"logps/rejected": -1286.400634765625, |
|
"loss": 0.0618, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19961665570735931, |
|
"rewards/margins": 0.28607600927352905, |
|
"rewards/rejected": -0.48569273948669434, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0305368692688175e-06, |
|
"logits/chosen": -1.7225663661956787, |
|
"logits/rejected": -1.016959309577942, |
|
"logps/chosen": -655.6326904296875, |
|
"logps/rejected": -1282.568115234375, |
|
"loss": 0.0711, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19480985403060913, |
|
"rewards/margins": 0.30676570534706116, |
|
"rewards/rejected": -0.5015754699707031, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0071035207430352e-06, |
|
"logits/chosen": -1.6399486064910889, |
|
"logits/rejected": -1.1826612949371338, |
|
"logps/chosen": -634.7337646484375, |
|
"logps/rejected": -1298.8203125, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1779237538576126, |
|
"rewards/margins": 0.31172335147857666, |
|
"rewards/rejected": -0.4896470904350281, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.838722865029674e-07, |
|
"logits/chosen": -1.6983495950698853, |
|
"logits/rejected": -0.968538761138916, |
|
"logps/chosen": -577.1983642578125, |
|
"logps/rejected": -1137.4327392578125, |
|
"loss": 0.0539, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.150478333234787, |
|
"rewards/margins": 0.2877087891101837, |
|
"rewards/rejected": -0.4381870627403259, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.608463116858544e-07, |
|
"logits/chosen": -1.341863989830017, |
|
"logits/rejected": -0.9842678904533386, |
|
"logps/chosen": -667.66015625, |
|
"logps/rejected": -1293.984130859375, |
|
"loss": 0.0759, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.21305403113365173, |
|
"rewards/margins": 0.2678312361240387, |
|
"rewards/rejected": -0.48088526725769043, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.380287136401001e-07, |
|
"logits/chosen": -1.5788919925689697, |
|
"logits/rejected": -0.9648571014404297, |
|
"logps/chosen": -630.4158325195312, |
|
"logps/rejected": -1360.427001953125, |
|
"loss": 0.07, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1882987916469574, |
|
"rewards/margins": 0.3067074418067932, |
|
"rewards/rejected": -0.495006263256073, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.154225815032242e-07, |
|
"logits/chosen": -1.4030269384384155, |
|
"logits/rejected": -0.967923641204834, |
|
"logps/chosen": -672.2672119140625, |
|
"logps/rejected": -1179.1063232421875, |
|
"loss": 0.0872, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.20492562651634216, |
|
"rewards/margins": 0.24575969576835632, |
|
"rewards/rejected": -0.4506853520870209, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.930309757836517e-07, |
|
"logits/chosen": -1.5156638622283936, |
|
"logits/rejected": -1.0182697772979736, |
|
"logps/chosen": -763.292724609375, |
|
"logps/rejected": -1367.725341796875, |
|
"loss": 0.094, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2593090534210205, |
|
"rewards/margins": 0.27271029353141785, |
|
"rewards/rejected": -0.532019317150116, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.708569279463622e-07, |
|
"logits/chosen": -1.521528959274292, |
|
"logits/rejected": -0.7515259981155396, |
|
"logps/chosen": -701.5262451171875, |
|
"logps/rejected": -1299.10693359375, |
|
"loss": 0.0501, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.2057270109653473, |
|
"rewards/margins": 0.3127078413963318, |
|
"rewards/rejected": -0.5184348821640015, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.489034400024812e-07, |
|
"logits/chosen": -1.5848454236984253, |
|
"logits/rejected": -1.0163071155548096, |
|
"logps/chosen": -675.6258544921875, |
|
"logps/rejected": -1327.443603515625, |
|
"loss": 0.0724, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2220022976398468, |
|
"rewards/margins": 0.297424852848053, |
|
"rewards/rejected": -0.5194271802902222, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": -1.6320521831512451, |
|
"logits/rejected": -0.905319094657898, |
|
"logps/chosen": -691.1963500976562, |
|
"logps/rejected": -1361.6270751953125, |
|
"loss": 0.0634, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1820472925901413, |
|
"rewards/margins": 0.337322473526001, |
|
"rewards/rejected": -0.5193697810173035, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.056700021356695e-07, |
|
"logits/chosen": -1.7158523797988892, |
|
"logits/rejected": -1.1264632940292358, |
|
"logps/chosen": -621.7196044921875, |
|
"logps/rejected": -1224.8692626953125, |
|
"loss": 0.0612, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17123040556907654, |
|
"rewards/margins": 0.28949856758117676, |
|
"rewards/rejected": -0.4607289731502533, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.843959053281663e-07, |
|
"logits/chosen": -1.6252124309539795, |
|
"logits/rejected": -1.1049168109893799, |
|
"logps/chosen": -624.00390625, |
|
"logps/rejected": -1279.696044921875, |
|
"loss": 0.0827, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18267154693603516, |
|
"rewards/margins": 0.31361207365989685, |
|
"rewards/rejected": -0.496283620595932, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.633540738525066e-07, |
|
"logits/chosen": -1.5565481185913086, |
|
"logits/rejected": -1.1449733972549438, |
|
"logps/chosen": -699.10400390625, |
|
"logps/rejected": -1397.216796875, |
|
"loss": 0.0733, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18508002161979675, |
|
"rewards/margins": 0.33612412214279175, |
|
"rewards/rejected": -0.5212041735649109, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.425473564358457e-07, |
|
"logits/chosen": -1.5639346837997437, |
|
"logits/rejected": -1.0611830949783325, |
|
"logps/chosen": -636.5604248046875, |
|
"logps/rejected": -1440.516357421875, |
|
"loss": 0.0714, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18250158429145813, |
|
"rewards/margins": 0.3447728157043457, |
|
"rewards/rejected": -0.5272743701934814, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.219785699746573e-07, |
|
"logits/chosen": -1.436949610710144, |
|
"logits/rejected": -1.1259959936141968, |
|
"logps/chosen": -717.715576171875, |
|
"logps/rejected": -1378.193359375, |
|
"loss": 0.0757, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.22643479704856873, |
|
"rewards/margins": 0.29645177721977234, |
|
"rewards/rejected": -0.5228865742683411, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.016504991533727e-07, |
|
"logits/chosen": -1.5969784259796143, |
|
"logits/rejected": -0.9250253438949585, |
|
"logps/chosen": -714.7103881835938, |
|
"logps/rejected": -1392.815185546875, |
|
"loss": 0.0606, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.20075106620788574, |
|
"rewards/margins": 0.321119487285614, |
|
"rewards/rejected": -0.5218705534934998, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.815658960673782e-07, |
|
"logits/chosen": -1.3801023960113525, |
|
"logits/rejected": -0.8244778513908386, |
|
"logps/chosen": -762.7855834960938, |
|
"logps/rejected": -1422.857177734375, |
|
"loss": 0.0722, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.22916767001152039, |
|
"rewards/margins": 0.2947383224964142, |
|
"rewards/rejected": -0.5239059925079346, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.617274798504286e-07, |
|
"logits/chosen": -1.501314640045166, |
|
"logits/rejected": -1.1214609146118164, |
|
"logps/chosen": -670.3170776367188, |
|
"logps/rejected": -1361.3216552734375, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1791990101337433, |
|
"rewards/margins": 0.3246581256389618, |
|
"rewards/rejected": -0.5038571357727051, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.421379363065142e-07, |
|
"logits/chosen": -1.3665945529937744, |
|
"logits/rejected": -0.8138443231582642, |
|
"logps/chosen": -703.3013916015625, |
|
"logps/rejected": -1420.8404541015625, |
|
"loss": 0.0468, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.20666304230690002, |
|
"rewards/margins": 0.3045525550842285, |
|
"rewards/rejected": -0.5112155675888062, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.227999175462521e-07, |
|
"logits/chosen": -1.4125187397003174, |
|
"logits/rejected": -1.0406558513641357, |
|
"logps/chosen": -726.2548828125, |
|
"logps/rejected": -1322.2655029296875, |
|
"loss": 0.0807, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1995895951986313, |
|
"rewards/margins": 0.27303341031074524, |
|
"rewards/rejected": -0.47262296080589294, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.037160416278279e-07, |
|
"logits/chosen": -1.5233482122421265, |
|
"logits/rejected": -0.8162555694580078, |
|
"logps/chosen": -642.1959228515625, |
|
"logps/rejected": -1301.4527587890625, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18815341591835022, |
|
"rewards/margins": 0.29811039566993713, |
|
"rewards/rejected": -0.48626384139060974, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -1.6081314086914062, |
|
"logits/rejected": -1.1109412908554077, |
|
"logps/chosen": -642.6141357421875, |
|
"logps/rejected": -1297.754150390625, |
|
"loss": 0.0596, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.16165341436862946, |
|
"rewards/margins": 0.29734665155410767, |
|
"rewards/rejected": -0.45900002121925354, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.663210181650905e-07, |
|
"logits/chosen": -1.4959220886230469, |
|
"logits/rejected": -1.2034730911254883, |
|
"logps/chosen": -575.3321533203125, |
|
"logps/rejected": -1197.032470703125, |
|
"loss": 0.0979, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14985838532447815, |
|
"rewards/margins": 0.26243895292282104, |
|
"rewards/rejected": -0.4122973084449768, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.48014933308352e-07, |
|
"logits/chosen": -1.6470867395401, |
|
"logits/rejected": -1.3305919170379639, |
|
"logps/chosen": -586.1883544921875, |
|
"logps/rejected": -1270.7503662109375, |
|
"loss": 0.0629, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17088322341442108, |
|
"rewards/margins": 0.29427218437194824, |
|
"rewards/rejected": -0.4651554226875305, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.299731159831953e-07, |
|
"logits/chosen": -1.416419506072998, |
|
"logits/rejected": -0.9456356167793274, |
|
"logps/chosen": -537.1207275390625, |
|
"logps/rejected": -1264.95556640625, |
|
"loss": 0.0765, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15687143802642822, |
|
"rewards/margins": 0.3016758859157562, |
|
"rewards/rejected": -0.45854735374450684, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.121980087628802e-07, |
|
"logits/chosen": -1.742026686668396, |
|
"logits/rejected": -1.1850230693817139, |
|
"logps/chosen": -548.540283203125, |
|
"logps/rejected": -1200.302734375, |
|
"loss": 0.0542, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15248605608940125, |
|
"rewards/margins": 0.29664915800094604, |
|
"rewards/rejected": -0.4491352438926697, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.946920181123904e-07, |
|
"logits/chosen": -1.5911678075790405, |
|
"logits/rejected": -0.8707629442214966, |
|
"logps/chosen": -719.4567260742188, |
|
"logps/rejected": -1296.5850830078125, |
|
"loss": 0.062, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.19431941211223602, |
|
"rewards/margins": 0.2867654860019684, |
|
"rewards/rejected": -0.4810849130153656, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.774575140626317e-07, |
|
"logits/chosen": -1.5443083047866821, |
|
"logits/rejected": -1.0547590255737305, |
|
"logps/chosen": -627.6547241210938, |
|
"logps/rejected": -1386.137451171875, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.19703541696071625, |
|
"rewards/margins": 0.36265698075294495, |
|
"rewards/rejected": -0.5596923828125, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.6049682988957037e-07, |
|
"logits/chosen": -1.7288634777069092, |
|
"logits/rejected": -0.8852437138557434, |
|
"logps/chosen": -762.8133544921875, |
|
"logps/rejected": -1455.493896484375, |
|
"loss": 0.0512, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1930183470249176, |
|
"rewards/margins": 0.3427968919277191, |
|
"rewards/rejected": -0.5358152389526367, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.438122617983442e-07, |
|
"logits/chosen": -1.6805801391601562, |
|
"logits/rejected": -1.0256656408309937, |
|
"logps/chosen": -660.5001220703125, |
|
"logps/rejected": -1275.15625, |
|
"loss": 0.0772, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.208724707365036, |
|
"rewards/margins": 0.28110271692276, |
|
"rewards/rejected": -0.48982739448547363, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.27406068612396e-07, |
|
"logits/chosen": -1.6279792785644531, |
|
"logits/rejected": -0.9308198094367981, |
|
"logps/chosen": -610.5615844726562, |
|
"logps/rejected": -1186.8466796875, |
|
"loss": 0.0817, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.15405488014221191, |
|
"rewards/margins": 0.27832460403442383, |
|
"rewards/rejected": -0.43237948417663574, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1128047146765936e-07, |
|
"logits/chosen": -1.5780996084213257, |
|
"logits/rejected": -0.9356195330619812, |
|
"logps/chosen": -742.1123046875, |
|
"logps/rejected": -1309.2537841796875, |
|
"loss": 0.0887, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21093162894248962, |
|
"rewards/margins": 0.29827579855918884, |
|
"rewards/rejected": -0.5092074275016785, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.954376535118562e-07, |
|
"logits/chosen": -1.451724648475647, |
|
"logits/rejected": -1.0602259635925293, |
|
"logps/chosen": -743.4791870117188, |
|
"logps/rejected": -1402.3712158203125, |
|
"loss": 0.058, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.21480798721313477, |
|
"rewards/margins": 0.3146688640117645, |
|
"rewards/rejected": -0.5294768214225769, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.798797596089351e-07, |
|
"logits/chosen": -1.3941175937652588, |
|
"logits/rejected": -0.991183876991272, |
|
"logps/chosen": -676.042236328125, |
|
"logps/rejected": -1340.735595703125, |
|
"loss": 0.0826, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.23510794341564178, |
|
"rewards/margins": 0.2676921486854553, |
|
"rewards/rejected": -0.5028000473976135, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.646088960486863e-07, |
|
"logits/chosen": -1.6253753900527954, |
|
"logits/rejected": -1.1532971858978271, |
|
"logps/chosen": -519.7340087890625, |
|
"logps/rejected": -1152.824462890625, |
|
"loss": 0.0921, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14441636204719543, |
|
"rewards/margins": 0.2724858820438385, |
|
"rewards/rejected": -0.41690224409103394, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4962713026158697e-07, |
|
"logits/chosen": -1.607985258102417, |
|
"logits/rejected": -1.2400459051132202, |
|
"logps/chosen": -500.02020263671875, |
|
"logps/rejected": -1161.1961669921875, |
|
"loss": 0.0885, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1489984691143036, |
|
"rewards/margins": 0.2574361562728882, |
|
"rewards/rejected": -0.4064346253871918, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3493649053890325e-07, |
|
"logits/chosen": -1.5089218616485596, |
|
"logits/rejected": -0.8405025601387024, |
|
"logps/chosen": -708.8694458007812, |
|
"logps/rejected": -1470.578857421875, |
|
"loss": 0.0492, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.21505184471607208, |
|
"rewards/margins": 0.34505897760391235, |
|
"rewards/rejected": -0.5601108074188232, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.2053896575809426e-07, |
|
"logits/chosen": -1.639923334121704, |
|
"logits/rejected": -1.043080449104309, |
|
"logps/chosen": -783.112060546875, |
|
"logps/rejected": -1343.9681396484375, |
|
"loss": 0.0747, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.24453020095825195, |
|
"rewards/margins": 0.2654086947441101, |
|
"rewards/rejected": -0.5099388957023621, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.0643650511354483e-07, |
|
"logits/chosen": -1.6855356693267822, |
|
"logits/rejected": -0.8734441995620728, |
|
"logps/chosen": -713.609130859375, |
|
"logps/rejected": -1338.822998046875, |
|
"loss": 0.0699, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2088811695575714, |
|
"rewards/margins": 0.30321091413497925, |
|
"rewards/rejected": -0.512092113494873, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9263101785268253e-07, |
|
"logits/chosen": -1.40034818649292, |
|
"logits/rejected": -1.074161171913147, |
|
"logps/chosen": -614.1613159179688, |
|
"logps/rejected": -1244.793212890625, |
|
"loss": 0.0945, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.20661716163158417, |
|
"rewards/margins": 0.28118330240249634, |
|
"rewards/rejected": -0.4878004193305969, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.791243730174903e-07, |
|
"logits/chosen": -1.6715885400772095, |
|
"logits/rejected": -1.1529837846755981, |
|
"logps/chosen": -547.1351928710938, |
|
"logps/rejected": -1222.099365234375, |
|
"loss": 0.0664, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12953749299049377, |
|
"rewards/margins": 0.31725236773490906, |
|
"rewards/rejected": -0.44678983092308044, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6591839919146963e-07, |
|
"logits/chosen": -1.6793832778930664, |
|
"logits/rejected": -0.8579391241073608, |
|
"logps/chosen": -668.2606201171875, |
|
"logps/rejected": -1342.045166015625, |
|
"loss": 0.0454, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.16217480599880219, |
|
"rewards/margins": 0.35208019614219666, |
|
"rewards/rejected": -0.5142549276351929, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.53014884252083e-07, |
|
"logits/chosen": -1.469300627708435, |
|
"logits/rejected": -0.767549455165863, |
|
"logps/chosen": -720.5267333984375, |
|
"logps/rejected": -1461.4560546875, |
|
"loss": 0.0383, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.19582341611385345, |
|
"rewards/margins": 0.36342793703079224, |
|
"rewards/rejected": -0.5592513680458069, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.404155751286988e-07, |
|
"logits/chosen": -1.249018907546997, |
|
"logits/rejected": -0.6379393339157104, |
|
"logps/chosen": -634.1907958984375, |
|
"logps/rejected": -1247.8612060546875, |
|
"loss": 0.0579, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18129587173461914, |
|
"rewards/margins": 0.29834333062171936, |
|
"rewards/rejected": -0.4796391427516937, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.281221775660894e-07, |
|
"logits/chosen": -1.5495988130569458, |
|
"logits/rejected": -0.8539125323295593, |
|
"logps/chosen": -634.5662231445312, |
|
"logps/rejected": -1174.760498046875, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1546512097120285, |
|
"rewards/margins": 0.26017943024635315, |
|
"rewards/rejected": -0.41483062505722046, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"logits/chosen": -1.432281494140625, |
|
"logits/rejected": -0.9715501070022583, |
|
"logps/chosen": -674.9788818359375, |
|
"logps/rejected": -1332.5562744140625, |
|
"loss": 0.0969, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2190193384885788, |
|
"rewards/margins": 0.2877424359321594, |
|
"rewards/rejected": -0.5067617893218994, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.044597327993153e-07, |
|
"logits/chosen": -1.5616180896759033, |
|
"logits/rejected": -1.1563438177108765, |
|
"logps/chosen": -606.47509765625, |
|
"logps/rejected": -1302.349365234375, |
|
"loss": 0.0886, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17486010491847992, |
|
"rewards/margins": 0.30272260308265686, |
|
"rewards/rejected": -0.4775826930999756, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9309388911139427e-07, |
|
"logits/chosen": -1.6554429531097412, |
|
"logits/rejected": -0.9854539036750793, |
|
"logps/chosen": -705.8251342773438, |
|
"logps/rejected": -1167.0521240234375, |
|
"loss": 0.0627, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1600712388753891, |
|
"rewards/margins": 0.2684277892112732, |
|
"rewards/rejected": -0.4284990429878235, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8204036358303173e-07, |
|
"logits/chosen": -1.5156413316726685, |
|
"logits/rejected": -0.9938446879386902, |
|
"logps/chosen": -598.7017822265625, |
|
"logps/rejected": -1183.290771484375, |
|
"loss": 0.0615, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16524121165275574, |
|
"rewards/margins": 0.28775864839553833, |
|
"rewards/rejected": -0.45299988985061646, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.713006526846439e-07, |
|
"logits/chosen": -1.6532678604125977, |
|
"logits/rejected": -0.9341715574264526, |
|
"logps/chosen": -686.2755126953125, |
|
"logps/rejected": -1177.0118408203125, |
|
"loss": 0.1065, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18550768494606018, |
|
"rewards/margins": 0.24031631648540497, |
|
"rewards/rejected": -0.42582401633262634, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6087621040117157e-07, |
|
"logits/chosen": -1.7682663202285767, |
|
"logits/rejected": -0.9737985730171204, |
|
"logps/chosen": -617.552001953125, |
|
"logps/rejected": -1250.820068359375, |
|
"loss": 0.0657, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16673611104488373, |
|
"rewards/margins": 0.3138349950313568, |
|
"rewards/rejected": -0.48057112097740173, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.507684480352292e-07, |
|
"logits/chosen": -1.4090583324432373, |
|
"logits/rejected": -0.8811914324760437, |
|
"logps/chosen": -614.2474975585938, |
|
"logps/rejected": -1185.4610595703125, |
|
"loss": 0.0782, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1610531508922577, |
|
"rewards/margins": 0.2807305157184601, |
|
"rewards/rejected": -0.4417836666107178, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4097873401604124e-07, |
|
"logits/chosen": -1.5380343198776245, |
|
"logits/rejected": -1.1294854879379272, |
|
"logps/chosen": -611.0988159179688, |
|
"logps/rejected": -1329.0465087890625, |
|
"loss": 0.0768, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18254908919334412, |
|
"rewards/margins": 0.304229736328125, |
|
"rewards/rejected": -0.4867788255214691, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.31508393714177e-07, |
|
"logits/chosen": -1.3804000616073608, |
|
"logits/rejected": -0.9093223810195923, |
|
"logps/chosen": -625.8529052734375, |
|
"logps/rejected": -1367.1817626953125, |
|
"loss": 0.0544, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1812657117843628, |
|
"rewards/margins": 0.3490126132965088, |
|
"rewards/rejected": -0.5302783250808716, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.223587092621162e-07, |
|
"logits/chosen": -1.5867640972137451, |
|
"logits/rejected": -0.9399985074996948, |
|
"logps/chosen": -676.7730102539062, |
|
"logps/rejected": -1228.8385009765625, |
|
"loss": 0.0731, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17374689877033234, |
|
"rewards/margins": 0.2911613881587982, |
|
"rewards/rejected": -0.46490830183029175, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1353091938067024e-07, |
|
"logits/chosen": -1.3089746236801147, |
|
"logits/rejected": -1.0505130290985107, |
|
"logps/chosen": -693.4176025390625, |
|
"logps/rejected": -1227.46142578125, |
|
"loss": 0.108, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.20728778839111328, |
|
"rewards/margins": 0.26565903425216675, |
|
"rewards/rejected": -0.47294682264328003, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0502621921127776e-07, |
|
"logits/chosen": -1.6415075063705444, |
|
"logits/rejected": -1.284096360206604, |
|
"logps/chosen": -757.7032470703125, |
|
"logps/rejected": -1357.983154296875, |
|
"loss": 0.0815, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2325299233198166, |
|
"rewards/margins": 0.27662572264671326, |
|
"rewards/rejected": -0.5091556906700134, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.684576015420277e-08, |
|
"logits/chosen": -1.3691219091415405, |
|
"logits/rejected": -1.0339184999465942, |
|
"logps/chosen": -582.3773193359375, |
|
"logps/rejected": -1222.3829345703125, |
|
"loss": 0.0626, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17146027088165283, |
|
"rewards/margins": 0.299213707447052, |
|
"rewards/rejected": -0.47067388892173767, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.899064971265275e-08, |
|
"logits/chosen": -1.581775188446045, |
|
"logits/rejected": -1.0051343441009521, |
|
"logps/chosen": -718.9093017578125, |
|
"logps/rejected": -1241.8062744140625, |
|
"loss": 0.0669, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1594964563846588, |
|
"rewards/margins": 0.28466928005218506, |
|
"rewards/rejected": -0.44416576623916626, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.146195134284052e-08, |
|
"logits/chosen": -1.4833767414093018, |
|
"logits/rejected": -1.0147238969802856, |
|
"logps/chosen": -611.6956176757812, |
|
"logps/rejected": -1192.66015625, |
|
"loss": 0.0914, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.20624539256095886, |
|
"rewards/margins": 0.2417052984237671, |
|
"rewards/rejected": -0.4479507505893707, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.426068431000883e-08, |
|
"logits/chosen": -1.4513158798217773, |
|
"logits/rejected": -0.7021422982215881, |
|
"logps/chosen": -749.6524658203125, |
|
"logps/rejected": -1255.1658935546875, |
|
"loss": 0.0931, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2141745388507843, |
|
"rewards/margins": 0.2607952654361725, |
|
"rewards/rejected": -0.4749697744846344, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.738782355044048e-08, |
|
"logits/chosen": -1.3975991010665894, |
|
"logits/rejected": -0.9725745320320129, |
|
"logps/chosen": -653.4349365234375, |
|
"logps/rejected": -1304.32666015625, |
|
"loss": 0.0911, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2119159698486328, |
|
"rewards/margins": 0.2688130736351013, |
|
"rewards/rejected": -0.48072901368141174, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.084429953946452e-08, |
|
"logits/chosen": -1.5564731359481812, |
|
"logits/rejected": -1.0867516994476318, |
|
"logps/chosen": -614.5533447265625, |
|
"logps/rejected": -1332.2275390625, |
|
"loss": 0.0512, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.18066832423210144, |
|
"rewards/margins": 0.32241129875183105, |
|
"rewards/rejected": -0.5030795335769653, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.463099816548578e-08, |
|
"logits/chosen": -1.6008579730987549, |
|
"logits/rejected": -1.274701476097107, |
|
"logps/chosen": -693.5843505859375, |
|
"logps/rejected": -1352.4996337890625, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.21343615651130676, |
|
"rewards/margins": 0.2871994972229004, |
|
"rewards/rejected": -0.5006356835365295, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.8748760610051735e-08, |
|
"logits/chosen": -1.5801304578781128, |
|
"logits/rejected": -1.0879913568496704, |
|
"logps/chosen": -632.2335205078125, |
|
"logps/rejected": -1163.8685302734375, |
|
"loss": 0.1023, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.16370609402656555, |
|
"rewards/margins": 0.23538489639759064, |
|
"rewards/rejected": -0.3990909457206726, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.319838323396691e-08, |
|
"logits/chosen": -1.6419999599456787, |
|
"logits/rejected": -1.148740530014038, |
|
"logps/chosen": -622.5277099609375, |
|
"logps/rejected": -1213.3511962890625, |
|
"loss": 0.0825, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18520578742027283, |
|
"rewards/margins": 0.2649041712284088, |
|
"rewards/rejected": -0.45010989904403687, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": -1.5798577070236206, |
|
"logits/rejected": -1.0320630073547363, |
|
"logps/chosen": -704.6697998046875, |
|
"logps/rejected": -1352.01806640625, |
|
"loss": 0.0633, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.21166665852069855, |
|
"rewards/margins": 0.2847224175930023, |
|
"rewards/rejected": -0.49638909101486206, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.309616971855195e-08, |
|
"logits/chosen": -1.5923948287963867, |
|
"logits/rejected": -0.976751446723938, |
|
"logps/chosen": -629.5615844726562, |
|
"logps/rejected": -1377.3729248046875, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16095896065235138, |
|
"rewards/margins": 0.3290019631385803, |
|
"rewards/rejected": -0.4899609684944153, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.8545701257221003e-08, |
|
"logits/chosen": -1.5409938097000122, |
|
"logits/rejected": -1.0359934568405151, |
|
"logps/chosen": -694.2177124023438, |
|
"logps/rejected": -1343.0057373046875, |
|
"loss": 0.0705, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.20338015258312225, |
|
"rewards/margins": 0.2844991981983185, |
|
"rewards/rejected": -0.48787933588027954, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"logits/chosen": -1.6312742233276367, |
|
"logits/rejected": -1.199237585067749, |
|
"logps/chosen": -547.9122924804688, |
|
"logps/rejected": -1200.981689453125, |
|
"loss": 0.0883, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14529578387737274, |
|
"rewards/margins": 0.2816771864891052, |
|
"rewards/rejected": -0.42697301506996155, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0449121146845773e-08, |
|
"logits/chosen": -1.4686208963394165, |
|
"logits/rejected": -0.851632297039032, |
|
"logps/chosen": -645.3411254882812, |
|
"logps/rejected": -1267.0413818359375, |
|
"loss": 0.0672, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.20314595103263855, |
|
"rewards/margins": 0.28285151720046997, |
|
"rewards/rejected": -0.4859974980354309, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6904105645142443e-08, |
|
"logits/chosen": -1.631974220275879, |
|
"logits/rejected": -1.05061674118042, |
|
"logps/chosen": -530.013671875, |
|
"logps/rejected": -1199.45068359375, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15192578732967377, |
|
"rewards/margins": 0.31625282764434814, |
|
"rewards/rejected": -0.4681786596775055, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3695261579316776e-08, |
|
"logits/chosen": -1.4659382104873657, |
|
"logits/rejected": -0.9017642736434937, |
|
"logps/chosen": -520.8211669921875, |
|
"logps/rejected": -1325.6385498046875, |
|
"loss": 0.056, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.15825147926807404, |
|
"rewards/margins": 0.35485899448394775, |
|
"rewards/rejected": -0.5131104588508606, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0823023375489128e-08, |
|
"logits/chosen": -1.5723521709442139, |
|
"logits/rejected": -1.1977678537368774, |
|
"logps/chosen": -600.8292236328125, |
|
"logps/rejected": -1365.1005859375, |
|
"loss": 0.0466, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16827178001403809, |
|
"rewards/margins": 0.3262236714363098, |
|
"rewards/rejected": -0.4944954812526703, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.28777988873486e-09, |
|
"logits/chosen": -1.6231420040130615, |
|
"logits/rejected": -1.0731710195541382, |
|
"logps/chosen": -671.5623779296875, |
|
"logps/rejected": -1358.5367431640625, |
|
"loss": 0.0688, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.21294662356376648, |
|
"rewards/margins": 0.3046295642852783, |
|
"rewards/rejected": -0.5175761580467224, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.089874350439507e-09, |
|
"logits/chosen": -1.4259278774261475, |
|
"logits/rejected": -0.8798543810844421, |
|
"logps/chosen": -694.62548828125, |
|
"logps/rejected": -1346.7584228515625, |
|
"loss": 0.0846, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21204762160778046, |
|
"rewards/margins": 0.27931392192840576, |
|
"rewards/rejected": -0.4913616180419922, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.229604321829561e-09, |
|
"logits/chosen": -1.596842885017395, |
|
"logits/rejected": -1.0910255908966064, |
|
"logps/chosen": -653.8787231445312, |
|
"logps/rejected": -1310.4539794921875, |
|
"loss": 0.0672, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17626503109931946, |
|
"rewards/margins": 0.3032272756099701, |
|
"rewards/rejected": -0.47949227690696716, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7072216536885855e-09, |
|
"logits/chosen": -1.3221776485443115, |
|
"logits/rejected": -0.8105962872505188, |
|
"logps/chosen": -586.7859497070312, |
|
"logps/rejected": -1137.4781494140625, |
|
"loss": 0.099, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.19391945004463196, |
|
"rewards/margins": 0.2519974112510681, |
|
"rewards/rejected": -0.4459168314933777, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5229324522605949e-09, |
|
"logits/chosen": -1.553269624710083, |
|
"logits/rejected": -1.0229682922363281, |
|
"logps/chosen": -545.6754760742188, |
|
"logps/rejected": -1225.473388671875, |
|
"loss": 0.0631, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16567066311836243, |
|
"rewards/margins": 0.2939736247062683, |
|
"rewards/rejected": -0.45964425802230835, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.768970513457151e-10, |
|
"logits/chosen": -1.6384735107421875, |
|
"logits/rejected": -0.9449461102485657, |
|
"logps/chosen": -580.2606811523438, |
|
"logps/rejected": -1307.0478515625, |
|
"loss": 0.0692, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16747145354747772, |
|
"rewards/margins": 0.31812483072280884, |
|
"rewards/rejected": -0.48559626936912537, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.692299905944883e-10, |
|
"logits/chosen": -1.3800950050354004, |
|
"logits/rejected": -0.8507388234138489, |
|
"logps/chosen": -683.5490112304688, |
|
"logps/rejected": -1347.037841796875, |
|
"loss": 0.052, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19447694718837738, |
|
"rewards/margins": 0.3061479330062866, |
|
"rewards/rejected": -0.5006248950958252, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.6093953847885132, |
|
"logits/rejected": -0.8852132558822632, |
|
"logps/chosen": -607.3538208007812, |
|
"logps/rejected": -1118.9757080078125, |
|
"loss": 0.1084, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13724899291992188, |
|
"rewards/margins": 0.2455356866121292, |
|
"rewards/rejected": -0.3827846646308899, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3000, |
|
"total_flos": 0.0, |
|
"train_loss": 0.08193727386991183, |
|
"train_runtime": 13050.0697, |
|
"train_samples_per_second": 0.92, |
|
"train_steps_per_second": 0.23 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|