|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 3750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3333333333333334e-08, |
|
"logits/chosen": -1.4607182741165161, |
|
"logits/rejected": -1.0577633380889893, |
|
"logps/chosen": -377.1839599609375, |
|
"logps/rejected": -1292.140625, |
|
"loss": 0.3828, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3333333333333336e-07, |
|
"logits/chosen": -1.422508716583252, |
|
"logits/rejected": -1.0050468444824219, |
|
"logps/chosen": -603.676025390625, |
|
"logps/rejected": -1303.1278076171875, |
|
"loss": 0.3726, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": -0.00046346496674232185, |
|
"rewards/margins": -0.0005589782958850265, |
|
"rewards/rejected": 9.551318362355232e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.666666666666667e-07, |
|
"logits/chosen": -1.469663381576538, |
|
"logits/rejected": -1.0887094736099243, |
|
"logps/chosen": -544.6007080078125, |
|
"logps/rejected": -1043.440673828125, |
|
"loss": 0.4111, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0005788623820990324, |
|
"rewards/margins": 0.001082524424418807, |
|
"rewards/rejected": -0.0005036621587350965, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -1.6168861389160156, |
|
"logits/rejected": -1.055768609046936, |
|
"logps/chosen": -653.9005737304688, |
|
"logps/rejected": -1314.4986572265625, |
|
"loss": 0.3835, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -8.300555782625452e-05, |
|
"rewards/margins": 0.0015174217987805605, |
|
"rewards/rejected": -0.001600427320227027, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.333333333333335e-07, |
|
"logits/chosen": -1.3431918621063232, |
|
"logits/rejected": -0.6968151926994324, |
|
"logps/chosen": -665.5634765625, |
|
"logps/rejected": -1349.5455322265625, |
|
"loss": 0.3624, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0007652758504264057, |
|
"rewards/margins": 0.004389578010886908, |
|
"rewards/rejected": -0.005154854152351618, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.666666666666667e-07, |
|
"logits/chosen": -1.36802077293396, |
|
"logits/rejected": -0.7910489439964294, |
|
"logps/chosen": -665.3347778320312, |
|
"logps/rejected": -1172.180419921875, |
|
"loss": 0.3291, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.0013909535482525826, |
|
"rewards/margins": 0.00601952476426959, |
|
"rewards/rejected": -0.0074104792438447475, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -1.6208984851837158, |
|
"logits/rejected": -0.9291224479675293, |
|
"logps/chosen": -660.3453369140625, |
|
"logps/rejected": -1401.126220703125, |
|
"loss": 0.3343, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.00016163568943738937, |
|
"rewards/margins": 0.020959021523594856, |
|
"rewards/rejected": -0.02079738676548004, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.333333333333334e-07, |
|
"logits/chosen": -1.5982778072357178, |
|
"logits/rejected": -0.6397973299026489, |
|
"logps/chosen": -600.2814331054688, |
|
"logps/rejected": -1338.4075927734375, |
|
"loss": 0.3381, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0026822402141988277, |
|
"rewards/margins": 0.034768685698509216, |
|
"rewards/rejected": -0.032086439430713654, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.066666666666667e-06, |
|
"logits/chosen": -1.6199333667755127, |
|
"logits/rejected": -0.8920964002609253, |
|
"logps/chosen": -589.6707763671875, |
|
"logps/rejected": -1347.08837890625, |
|
"loss": 0.3062, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.005661836825311184, |
|
"rewards/margins": 0.04629923403263092, |
|
"rewards/rejected": -0.05196107178926468, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"logits/chosen": -1.1941367387771606, |
|
"logits/rejected": -0.7822949290275574, |
|
"logps/chosen": -618.1773681640625, |
|
"logps/rejected": -1214.458251953125, |
|
"loss": 0.3073, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.02980031445622444, |
|
"rewards/margins": 0.08387573063373566, |
|
"rewards/rejected": -0.1136760488152504, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"logits/chosen": -1.1643040180206299, |
|
"logits/rejected": -0.3697466552257538, |
|
"logps/chosen": -628.5645141601562, |
|
"logps/rejected": -1501.861572265625, |
|
"loss": 0.2341, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11866208165884018, |
|
"rewards/margins": 0.1647382378578186, |
|
"rewards/rejected": -0.283400297164917, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4666666666666669e-06, |
|
"logits/chosen": -1.1232091188430786, |
|
"logits/rejected": -0.17076462507247925, |
|
"logps/chosen": -800.4198608398438, |
|
"logps/rejected": -1697.21875, |
|
"loss": 0.1819, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.23575441539287567, |
|
"rewards/margins": 0.2496374547481537, |
|
"rewards/rejected": -0.48539191484451294, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -1.1497989892959595, |
|
"logits/rejected": -0.4379865527153015, |
|
"logps/chosen": -914.0667114257812, |
|
"logps/rejected": -1844.274169921875, |
|
"loss": 0.2595, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.29556211829185486, |
|
"rewards/margins": 0.2584637701511383, |
|
"rewards/rejected": -0.5540258288383484, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7333333333333336e-06, |
|
"logits/chosen": -1.2714927196502686, |
|
"logits/rejected": -0.7818469405174255, |
|
"logps/chosen": -809.2772216796875, |
|
"logps/rejected": -1702.701171875, |
|
"loss": 0.252, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20852358639240265, |
|
"rewards/margins": 0.2036806344985962, |
|
"rewards/rejected": -0.4122042655944824, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8666666666666669e-06, |
|
"logits/chosen": -1.4557206630706787, |
|
"logits/rejected": -0.5710101127624512, |
|
"logps/chosen": -727.69189453125, |
|
"logps/rejected": -1672.314208984375, |
|
"loss": 0.1792, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2208588570356369, |
|
"rewards/margins": 0.21561995148658752, |
|
"rewards/rejected": -0.436478853225708, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -1.2810709476470947, |
|
"logits/rejected": -0.3112773001194, |
|
"logps/chosen": -960.1036376953125, |
|
"logps/rejected": -1976.63671875, |
|
"loss": 0.2275, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3756103217601776, |
|
"rewards/margins": 0.28143054246902466, |
|
"rewards/rejected": -0.6570408344268799, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.133333333333334e-06, |
|
"logits/chosen": -1.1071354150772095, |
|
"logits/rejected": -0.2948054373264313, |
|
"logps/chosen": -707.2420654296875, |
|
"logps/rejected": -1617.572509765625, |
|
"loss": 0.3027, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2601652145385742, |
|
"rewards/margins": 0.25246208906173706, |
|
"rewards/rejected": -0.5126273036003113, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.266666666666667e-06, |
|
"logits/chosen": -1.1747493743896484, |
|
"logits/rejected": -0.516975998878479, |
|
"logps/chosen": -671.3464965820312, |
|
"logps/rejected": -1495.285400390625, |
|
"loss": 0.2483, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1535535305738449, |
|
"rewards/margins": 0.13311822712421417, |
|
"rewards/rejected": -0.2866717278957367, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -1.5080617666244507, |
|
"logits/rejected": -0.1595776528120041, |
|
"logps/chosen": -766.1541137695312, |
|
"logps/rejected": -1505.566162109375, |
|
"loss": 0.1896, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1698867678642273, |
|
"rewards/margins": 0.1528700590133667, |
|
"rewards/rejected": -0.322756826877594, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5333333333333338e-06, |
|
"logits/chosen": -1.4777127504348755, |
|
"logits/rejected": -0.1578340083360672, |
|
"logps/chosen": -883.9136962890625, |
|
"logps/rejected": -1729.061279296875, |
|
"loss": 0.2441, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2274136245250702, |
|
"rewards/margins": 0.1767813265323639, |
|
"rewards/rejected": -0.4041949212551117, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.666666666666667e-06, |
|
"logits/chosen": -1.5583598613739014, |
|
"logits/rejected": -0.15428844094276428, |
|
"logps/chosen": -954.4393310546875, |
|
"logps/rejected": -1927.611328125, |
|
"loss": 0.1691, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2696084976196289, |
|
"rewards/margins": 0.28209298849105835, |
|
"rewards/rejected": -0.5517014861106873, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"logits/chosen": -1.148453950881958, |
|
"logits/rejected": 0.03259178251028061, |
|
"logps/chosen": -963.2941284179688, |
|
"logps/rejected": -1878.1683349609375, |
|
"loss": 0.2653, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.29248175024986267, |
|
"rewards/margins": 0.2701197564601898, |
|
"rewards/rejected": -0.5626015067100525, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9333333333333338e-06, |
|
"logits/chosen": -1.4708116054534912, |
|
"logits/rejected": -0.4165799617767334, |
|
"logps/chosen": -663.6636352539062, |
|
"logps/rejected": -1678.5081787109375, |
|
"loss": 0.1756, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1571148931980133, |
|
"rewards/margins": 0.24838630855083466, |
|
"rewards/rejected": -0.40550118684768677, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.066666666666667e-06, |
|
"logits/chosen": -1.17020583152771, |
|
"logits/rejected": -0.47575148940086365, |
|
"logps/chosen": -920.1783447265625, |
|
"logps/rejected": -1948.2427978515625, |
|
"loss": 0.1913, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2500799894332886, |
|
"rewards/margins": 0.24250411987304688, |
|
"rewards/rejected": -0.49258413910865784, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": -1.615384817123413, |
|
"logits/rejected": -0.5452179312705994, |
|
"logps/chosen": -855.1349487304688, |
|
"logps/rejected": -1652.406494140625, |
|
"loss": 0.1915, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.20125408470630646, |
|
"rewards/margins": 0.23299658298492432, |
|
"rewards/rejected": -0.4342506527900696, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -1.231403112411499, |
|
"logits/rejected": 0.04080945998430252, |
|
"logps/chosen": -830.4852294921875, |
|
"logps/rejected": -1568.180908203125, |
|
"loss": 0.2531, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.16112910211086273, |
|
"rewards/margins": 0.1832246333360672, |
|
"rewards/rejected": -0.34435373544692993, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4666666666666672e-06, |
|
"logits/chosen": -1.4119511842727661, |
|
"logits/rejected": 0.3003128170967102, |
|
"logps/chosen": -697.4551391601562, |
|
"logps/rejected": -1790.427978515625, |
|
"loss": 0.1495, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12422380596399307, |
|
"rewards/margins": 0.2803717255592346, |
|
"rewards/rejected": -0.4045955538749695, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"logits/chosen": -1.4090235233306885, |
|
"logits/rejected": 0.20056810975074768, |
|
"logps/chosen": -781.8931884765625, |
|
"logps/rejected": -1546.812744140625, |
|
"loss": 0.2132, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.18441830575466156, |
|
"rewards/margins": 0.21885094046592712, |
|
"rewards/rejected": -0.40326929092407227, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.7333333333333337e-06, |
|
"logits/chosen": -1.3472883701324463, |
|
"logits/rejected": -0.16090384125709534, |
|
"logps/chosen": -674.9337158203125, |
|
"logps/rejected": -1524.169677734375, |
|
"loss": 0.2165, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.11200114339590073, |
|
"rewards/margins": 0.18906202912330627, |
|
"rewards/rejected": -0.3010631799697876, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.866666666666667e-06, |
|
"logits/chosen": -1.5704516172409058, |
|
"logits/rejected": -0.3292531967163086, |
|
"logps/chosen": -733.7753295898438, |
|
"logps/rejected": -1638.066162109375, |
|
"loss": 0.2036, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0726594403386116, |
|
"rewards/margins": 0.25331053137779236, |
|
"rewards/rejected": -0.32596996426582336, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -1.4241251945495605, |
|
"logits/rejected": -0.6696543097496033, |
|
"logps/chosen": -756.4315795898438, |
|
"logps/rejected": -1572.5772705078125, |
|
"loss": 0.1903, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.16283096373081207, |
|
"rewards/margins": 0.22412030398845673, |
|
"rewards/rejected": -0.3869512677192688, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.133333333333333e-06, |
|
"logits/chosen": -1.368127465248108, |
|
"logits/rejected": -0.3331999182701111, |
|
"logps/chosen": -967.3336791992188, |
|
"logps/rejected": -1911.6470947265625, |
|
"loss": 0.1603, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2711753249168396, |
|
"rewards/margins": 0.3143623471260071, |
|
"rewards/rejected": -0.5855377316474915, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.266666666666668e-06, |
|
"logits/chosen": -1.71210515499115, |
|
"logits/rejected": 0.0729786604642868, |
|
"logps/chosen": -1014.9069213867188, |
|
"logps/rejected": -1880.586669921875, |
|
"loss": 0.1866, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.31470808386802673, |
|
"rewards/margins": 0.3131619393825531, |
|
"rewards/rejected": -0.6278700828552246, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4e-06, |
|
"logits/chosen": -1.2539806365966797, |
|
"logits/rejected": -0.792563796043396, |
|
"logps/chosen": -697.0578002929688, |
|
"logps/rejected": -1576.9892578125, |
|
"loss": 0.1946, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.15875259041786194, |
|
"rewards/margins": 0.22502513229846954, |
|
"rewards/rejected": -0.3837777078151703, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.533333333333334e-06, |
|
"logits/chosen": -1.3388795852661133, |
|
"logits/rejected": -0.407992422580719, |
|
"logps/chosen": -761.8697509765625, |
|
"logps/rejected": -1639.280517578125, |
|
"loss": 1.8252, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2346605360507965, |
|
"rewards/margins": 0.4083401560783386, |
|
"rewards/rejected": -0.6430006623268127, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.666666666666667e-06, |
|
"logits/chosen": -1.370415449142456, |
|
"logits/rejected": -0.36617863178253174, |
|
"logps/chosen": -647.6007080078125, |
|
"logps/rejected": -1585.802001953125, |
|
"loss": 0.2431, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1556210219860077, |
|
"rewards/margins": 0.14714348316192627, |
|
"rewards/rejected": -0.30276453495025635, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": -1.4223321676254272, |
|
"logits/rejected": -0.7678893804550171, |
|
"logps/chosen": -862.3582763671875, |
|
"logps/rejected": -1554.5501708984375, |
|
"loss": 0.3499, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.15241160988807678, |
|
"rewards/margins": 0.07054910808801651, |
|
"rewards/rejected": -0.22296074032783508, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.933333333333334e-06, |
|
"logits/chosen": -1.6838983297348022, |
|
"logits/rejected": -0.8386624455451965, |
|
"logps/chosen": -854.6920776367188, |
|
"logps/rejected": -1489.2125244140625, |
|
"loss": 0.3149, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12944355607032776, |
|
"rewards/margins": 0.06882871687412262, |
|
"rewards/rejected": -0.19827227294445038, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999972922944898e-06, |
|
"logits/chosen": -1.6282720565795898, |
|
"logits/rejected": -1.0739606618881226, |
|
"logps/chosen": -714.4268188476562, |
|
"logps/rejected": -1637.6285400390625, |
|
"loss": 0.1918, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.07471625506877899, |
|
"rewards/margins": 0.17545118927955627, |
|
"rewards/rejected": -0.2501674294471741, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999756310023261e-06, |
|
"logits/chosen": -1.5494694709777832, |
|
"logits/rejected": -0.6611472964286804, |
|
"logps/chosen": -666.4691162109375, |
|
"logps/rejected": -1401.627685546875, |
|
"loss": 0.2341, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.024204634130001068, |
|
"rewards/margins": 0.2136712521314621, |
|
"rewards/rejected": -0.23787586390972137, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999323102948655e-06, |
|
"logits/chosen": -1.4422892332077026, |
|
"logits/rejected": -0.2696318030357361, |
|
"logps/chosen": -627.1585083007812, |
|
"logps/rejected": -1432.5504150390625, |
|
"loss": 0.2485, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0546126663684845, |
|
"rewards/margins": 0.1983325034379959, |
|
"rewards/rejected": -0.2529451549053192, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998673339256785e-06, |
|
"logits/chosen": -1.4705862998962402, |
|
"logits/rejected": -0.8628055453300476, |
|
"logps/chosen": -736.0491333007812, |
|
"logps/rejected": -1735.3140869140625, |
|
"loss": 0.1994, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1055409163236618, |
|
"rewards/margins": 0.21688199043273926, |
|
"rewards/rejected": -0.32242292165756226, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997807075247147e-06, |
|
"logits/chosen": -1.3044060468673706, |
|
"logits/rejected": -0.5804620981216431, |
|
"logps/chosen": -709.5520629882812, |
|
"logps/rejected": -1824.0869140625, |
|
"loss": 0.1758, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13376520574092865, |
|
"rewards/margins": 0.34297293424606323, |
|
"rewards/rejected": -0.4767381548881531, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.996724385978142e-06, |
|
"logits/chosen": -1.5715945959091187, |
|
"logits/rejected": -0.49768322706222534, |
|
"logps/chosen": -752.0654907226562, |
|
"logps/rejected": -1800.0816650390625, |
|
"loss": 0.1937, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12571558356285095, |
|
"rewards/margins": 0.3276565372943878, |
|
"rewards/rejected": -0.45337215065956116, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995425365260585e-06, |
|
"logits/chosen": -1.4646804332733154, |
|
"logits/rejected": -0.6573309898376465, |
|
"logps/chosen": -604.1141357421875, |
|
"logps/rejected": -1509.289306640625, |
|
"loss": 0.1685, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.08397423475980759, |
|
"rewards/margins": 0.22961445152759552, |
|
"rewards/rejected": -0.3135886788368225, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993910125649561e-06, |
|
"logits/chosen": -1.58323073387146, |
|
"logits/rejected": -0.9330165982246399, |
|
"logps/chosen": -575.3523559570312, |
|
"logps/rejected": -1430.528564453125, |
|
"loss": 0.1947, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.0727187842130661, |
|
"rewards/margins": 0.2145281583070755, |
|
"rewards/rejected": -0.2872469425201416, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992178798434684e-06, |
|
"logits/chosen": -1.5033951997756958, |
|
"logits/rejected": -0.5900696516036987, |
|
"logps/chosen": -897.4099731445312, |
|
"logps/rejected": -1658.7633056640625, |
|
"loss": 0.2771, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.17770914733409882, |
|
"rewards/margins": 0.19083838164806366, |
|
"rewards/rejected": -0.3685474991798401, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990231533628719e-06, |
|
"logits/chosen": -1.6505463123321533, |
|
"logits/rejected": -0.9378656148910522, |
|
"logps/chosen": -690.3605346679688, |
|
"logps/rejected": -1701.1597900390625, |
|
"loss": 0.2146, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13155733048915863, |
|
"rewards/margins": 0.2767486572265625, |
|
"rewards/rejected": -0.40830597281455994, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988068499954578e-06, |
|
"logits/chosen": -1.555983543395996, |
|
"logits/rejected": -0.4679872393608093, |
|
"logps/chosen": -698.6531982421875, |
|
"logps/rejected": -1681.360595703125, |
|
"loss": 0.2585, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12316081672906876, |
|
"rewards/margins": 0.31332293152809143, |
|
"rewards/rejected": -0.436483770608902, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985689884830711e-06, |
|
"logits/chosen": -1.5021615028381348, |
|
"logits/rejected": -0.6322463750839233, |
|
"logps/chosen": -743.8492431640625, |
|
"logps/rejected": -1656.78515625, |
|
"loss": 0.1861, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09808465093374252, |
|
"rewards/margins": 0.18296462297439575, |
|
"rewards/rejected": -0.28104931116104126, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.983095894354858e-06, |
|
"logits/chosen": -1.537832260131836, |
|
"logits/rejected": -0.5591806173324585, |
|
"logps/chosen": -771.895751953125, |
|
"logps/rejected": -1593.089111328125, |
|
"loss": 0.2529, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11263638734817505, |
|
"rewards/margins": 0.1450023353099823, |
|
"rewards/rejected": -0.25763875246047974, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980286753286196e-06, |
|
"logits/chosen": -1.4696406126022339, |
|
"logits/rejected": -0.6640992164611816, |
|
"logps/chosen": -686.0765380859375, |
|
"logps/rejected": -1460.5648193359375, |
|
"loss": 0.1911, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12830252945423126, |
|
"rewards/margins": 0.16823554039001465, |
|
"rewards/rejected": -0.2965380549430847, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.97726270502586e-06, |
|
"logits/chosen": -1.4664279222488403, |
|
"logits/rejected": -0.7313605546951294, |
|
"logps/chosen": -818.5562744140625, |
|
"logps/rejected": -1775.9241943359375, |
|
"loss": 0.1947, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20023846626281738, |
|
"rewards/margins": 0.24244621396064758, |
|
"rewards/rejected": -0.44268471002578735, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.974024011595864e-06, |
|
"logits/chosen": -1.5485690832138062, |
|
"logits/rejected": -0.6978067755699158, |
|
"logps/chosen": -942.9921875, |
|
"logps/rejected": -1794.7066650390625, |
|
"loss": 0.3032, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.21240389347076416, |
|
"rewards/margins": 0.2622910141944885, |
|
"rewards/rejected": -0.4746948778629303, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970570953616383e-06, |
|
"logits/chosen": -1.4141438007354736, |
|
"logits/rejected": -0.4546588957309723, |
|
"logps/chosen": -668.2946166992188, |
|
"logps/rejected": -1650.0318603515625, |
|
"loss": 0.2265, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1157810240983963, |
|
"rewards/margins": 0.2432054728269577, |
|
"rewards/rejected": -0.3589865267276764, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.966903830281449e-06, |
|
"logits/chosen": -1.8105099201202393, |
|
"logits/rejected": -0.36172086000442505, |
|
"logps/chosen": -677.1749267578125, |
|
"logps/rejected": -1432.0328369140625, |
|
"loss": 0.2364, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.14466652274131775, |
|
"rewards/margins": 0.17044545710086823, |
|
"rewards/rejected": -0.3151119649410248, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9630229593330226e-06, |
|
"logits/chosen": -1.544588327407837, |
|
"logits/rejected": -0.8351410031318665, |
|
"logps/chosen": -834.5636596679688, |
|
"logps/rejected": -1683.013671875, |
|
"loss": 0.2298, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.18364231288433075, |
|
"rewards/margins": 0.259293794631958, |
|
"rewards/rejected": -0.44293609261512756, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.958928677033465e-06, |
|
"logits/chosen": -1.6264123916625977, |
|
"logits/rejected": -0.5471200942993164, |
|
"logps/chosen": -858.4713134765625, |
|
"logps/rejected": -1681.949951171875, |
|
"loss": 0.2358, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2060559242963791, |
|
"rewards/margins": 0.24328169226646423, |
|
"rewards/rejected": -0.4493376612663269, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.954621338136399e-06, |
|
"logits/chosen": -1.4879562854766846, |
|
"logits/rejected": -0.6239393949508667, |
|
"logps/chosen": -916.1363525390625, |
|
"logps/rejected": -1731.7279052734375, |
|
"loss": 0.2434, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1614031344652176, |
|
"rewards/margins": 0.23414401710033417, |
|
"rewards/rejected": -0.39554715156555176, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.95010131585597e-06, |
|
"logits/chosen": -1.220314383506775, |
|
"logits/rejected": -0.44166284799575806, |
|
"logps/chosen": -772.2953491210938, |
|
"logps/rejected": -1575.3052978515625, |
|
"loss": 0.2662, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.08703169226646423, |
|
"rewards/margins": 0.2424904853105545, |
|
"rewards/rejected": -0.32952219247817993, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": -1.4573420286178589, |
|
"logits/rejected": -0.5127624273300171, |
|
"logps/chosen": -504.53057861328125, |
|
"logps/rejected": -1400.8695068359375, |
|
"loss": 0.1911, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.05916530638933182, |
|
"rewards/margins": 0.23245540261268616, |
|
"rewards/rejected": -0.291620671749115, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.940424806108619e-06, |
|
"logits/chosen": -1.6605002880096436, |
|
"logits/rejected": -0.6169177293777466, |
|
"logps/chosen": -778.3709716796875, |
|
"logps/rejected": -1566.796630859375, |
|
"loss": 0.1824, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.08246553689241409, |
|
"rewards/margins": 0.2251826822757721, |
|
"rewards/rejected": -0.307648241519928, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.935269157073597e-06, |
|
"logits/chosen": -1.6027615070343018, |
|
"logits/rejected": -1.1422998905181885, |
|
"logps/chosen": -592.0419921875, |
|
"logps/rejected": -1658.290283203125, |
|
"loss": 0.2057, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.048401519656181335, |
|
"rewards/margins": 0.334246426820755, |
|
"rewards/rejected": -0.38264790177345276, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9299025014463665e-06, |
|
"logits/chosen": -1.4095045328140259, |
|
"logits/rejected": -0.1883077323436737, |
|
"logps/chosen": -607.0613403320312, |
|
"logps/rejected": -1580.00341796875, |
|
"loss": 0.1792, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.07415771484375, |
|
"rewards/margins": 0.2879538834095001, |
|
"rewards/rejected": -0.3621116280555725, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924325304226745e-06, |
|
"logits/chosen": -1.6297693252563477, |
|
"logits/rejected": -0.49017101526260376, |
|
"logps/chosen": -792.6488037109375, |
|
"logps/rejected": -1665.556396484375, |
|
"loss": 0.1603, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09000878036022186, |
|
"rewards/margins": 0.2915512025356293, |
|
"rewards/rejected": -0.38155999779701233, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.91853804865716e-06, |
|
"logits/chosen": -1.1054975986480713, |
|
"logits/rejected": -0.42201828956604004, |
|
"logps/chosen": -713.8890991210938, |
|
"logps/rejected": -1576.742919921875, |
|
"loss": 0.1704, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.11752257496118546, |
|
"rewards/margins": 0.2575463354587555, |
|
"rewards/rejected": -0.37506890296936035, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.912541236180779e-06, |
|
"logits/chosen": -1.3571802377700806, |
|
"logits/rejected": -0.6059755086898804, |
|
"logps/chosen": -884.0383911132812, |
|
"logps/rejected": -1731.5504150390625, |
|
"loss": 0.2647, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.24475233256816864, |
|
"rewards/margins": 0.20648574829101562, |
|
"rewards/rejected": -0.45123806595802307, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9063353863980565e-06, |
|
"logits/chosen": -1.3680832386016846, |
|
"logits/rejected": -0.6090233325958252, |
|
"logps/chosen": -807.2792358398438, |
|
"logps/rejected": -1725.8609619140625, |
|
"loss": 0.1917, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.19080020487308502, |
|
"rewards/margins": 0.28758734464645386, |
|
"rewards/rejected": -0.4783875346183777, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.899921037021719e-06, |
|
"logits/chosen": -1.7092090845108032, |
|
"logits/rejected": -0.9691111445426941, |
|
"logps/chosen": -696.3843994140625, |
|
"logps/rejected": -1516.789794921875, |
|
"loss": 0.1585, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16210845112800598, |
|
"rewards/margins": 0.24107725918293, |
|
"rewards/rejected": -0.4031856954097748, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.893298743830168e-06, |
|
"logits/chosen": -1.375356674194336, |
|
"logits/rejected": -0.36455339193344116, |
|
"logps/chosen": -759.2677612304688, |
|
"logps/rejected": -1726.4449462890625, |
|
"loss": 0.1731, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.196892648935318, |
|
"rewards/margins": 0.2657999098300934, |
|
"rewards/rejected": -0.46269258856773376, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.88646908061933e-06, |
|
"logits/chosen": -1.4417951107025146, |
|
"logits/rejected": -0.3565208911895752, |
|
"logps/chosen": -741.1653442382812, |
|
"logps/rejected": -1683.4515380859375, |
|
"loss": 0.2447, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18826603889465332, |
|
"rewards/margins": 0.2816776633262634, |
|
"rewards/rejected": -0.46994370222091675, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.879432639152935e-06, |
|
"logits/chosen": -1.37065851688385, |
|
"logits/rejected": -0.2773549556732178, |
|
"logps/chosen": -838.9853515625, |
|
"logps/rejected": -1707.4296875, |
|
"loss": 0.2116, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12412204593420029, |
|
"rewards/margins": 0.23440977931022644, |
|
"rewards/rejected": -0.3585318624973297, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8721900291112415e-06, |
|
"logits/chosen": -1.580479621887207, |
|
"logits/rejected": -0.6059103608131409, |
|
"logps/chosen": -790.3890380859375, |
|
"logps/rejected": -1548.9473876953125, |
|
"loss": 0.2076, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11692629009485245, |
|
"rewards/margins": 0.20617632567882538, |
|
"rewards/rejected": -0.3231026232242584, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.864741878038218e-06, |
|
"logits/chosen": -1.4246976375579834, |
|
"logits/rejected": -0.6282674670219421, |
|
"logps/chosen": -739.7086791992188, |
|
"logps/rejected": -1689.615234375, |
|
"loss": 0.1781, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11477355659008026, |
|
"rewards/margins": 0.28999191522598267, |
|
"rewards/rejected": -0.40476545691490173, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.857088831287158e-06, |
|
"logits/chosen": -1.603228211402893, |
|
"logits/rejected": -0.13681410253047943, |
|
"logps/chosen": -734.5335693359375, |
|
"logps/rejected": -1756.3695068359375, |
|
"loss": 0.171, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16050152480602264, |
|
"rewards/margins": 0.3059554696083069, |
|
"rewards/rejected": -0.4664570391178131, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -1.3526674509048462, |
|
"logits/rejected": -0.1356133222579956, |
|
"logps/chosen": -775.5394897460938, |
|
"logps/rejected": -1612.2784423828125, |
|
"loss": 0.2257, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.16132906079292297, |
|
"rewards/margins": 0.2226000726222992, |
|
"rewards/rejected": -0.38392913341522217, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.841170720873723e-06, |
|
"logits/chosen": -1.466218113899231, |
|
"logits/rejected": -0.2698180377483368, |
|
"logps/chosen": -699.693359375, |
|
"logps/rejected": -1555.100341796875, |
|
"loss": 0.2182, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13787570595741272, |
|
"rewards/margins": 0.24788126349449158, |
|
"rewards/rejected": -0.3857569694519043, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.832907036453647e-06, |
|
"logits/chosen": -1.4457144737243652, |
|
"logits/rejected": -0.35442107915878296, |
|
"logps/chosen": -820.4182739257812, |
|
"logps/rejected": -1727.771484375, |
|
"loss": 0.1765, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12979528307914734, |
|
"rewards/margins": 0.2619260847568512, |
|
"rewards/rejected": -0.39172136783599854, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits/chosen": -1.2430192232131958, |
|
"logits/rejected": -0.31560835242271423, |
|
"logps/chosen": -599.2308959960938, |
|
"logps/rejected": -1673.4691162109375, |
|
"loss": 0.1464, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11397616565227509, |
|
"rewards/margins": 0.341776579618454, |
|
"rewards/rejected": -0.45575276017189026, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.815773989205165e-06, |
|
"logits/chosen": -1.5526068210601807, |
|
"logits/rejected": -0.6952439546585083, |
|
"logps/chosen": -760.5252685546875, |
|
"logps/rejected": -1961.7568359375, |
|
"loss": 0.1258, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16641755402088165, |
|
"rewards/margins": 0.44562679529190063, |
|
"rewards/rejected": -0.6120442748069763, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.806906110888606e-06, |
|
"logits/chosen": -1.4445289373397827, |
|
"logits/rejected": -0.6352332234382629, |
|
"logps/chosen": -702.0392456054688, |
|
"logps/rejected": -1625.85302734375, |
|
"loss": 0.1976, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.14837577939033508, |
|
"rewards/margins": 0.2570492923259735, |
|
"rewards/rejected": -0.4054250717163086, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7978383481380865e-06, |
|
"logits/chosen": -1.404865026473999, |
|
"logits/rejected": -0.4630287289619446, |
|
"logps/chosen": -645.7227172851562, |
|
"logps/rejected": -1415.0540771484375, |
|
"loss": 0.1912, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.13058145344257355, |
|
"rewards/margins": 0.18823085725307465, |
|
"rewards/rejected": -0.3188122808933258, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.788571486639948e-06, |
|
"logits/chosen": -1.32173752784729, |
|
"logits/rejected": -0.5882034301757812, |
|
"logps/chosen": -841.7330932617188, |
|
"logps/rejected": -1966.875732421875, |
|
"loss": 0.1613, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14226606488227844, |
|
"rewards/margins": 0.33964481949806213, |
|
"rewards/rejected": -0.4819108545780182, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.779106329331665e-06, |
|
"logits/chosen": -1.5085209608078003, |
|
"logits/rejected": -0.3688226044178009, |
|
"logps/chosen": -730.3353271484375, |
|
"logps/rejected": -1609.4810791015625, |
|
"loss": 0.1961, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14409688115119934, |
|
"rewards/margins": 0.268598735332489, |
|
"rewards/rejected": -0.41269564628601074, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.769443696332272e-06, |
|
"logits/chosen": -1.30078125, |
|
"logits/rejected": -0.17256946861743927, |
|
"logps/chosen": -978.0304565429688, |
|
"logps/rejected": -2078.25830078125, |
|
"loss": 0.1996, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.25719624757766724, |
|
"rewards/margins": 0.36804524064064026, |
|
"rewards/rejected": -0.6252414584159851, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.759584424871302e-06, |
|
"logits/chosen": -1.2128162384033203, |
|
"logits/rejected": -0.2545672059059143, |
|
"logps/chosen": -833.3894653320312, |
|
"logps/rejected": -1909.220703125, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.23837868869304657, |
|
"rewards/margins": 0.341843843460083, |
|
"rewards/rejected": -0.580222487449646, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.749529369216246e-06, |
|
"logits/chosen": -1.505275845527649, |
|
"logits/rejected": -0.8203606605529785, |
|
"logps/chosen": -819.6467895507812, |
|
"logps/rejected": -1751.1910400390625, |
|
"loss": 0.2127, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19994454085826874, |
|
"rewards/margins": 0.27609339356422424, |
|
"rewards/rejected": -0.47603797912597656, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7392794005985324e-06, |
|
"logits/chosen": -1.4275624752044678, |
|
"logits/rejected": -0.5869510769844055, |
|
"logps/chosen": -676.9829711914062, |
|
"logps/rejected": -1566.9033203125, |
|
"loss": 0.1924, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.13645704090595245, |
|
"rewards/margins": 0.26502570509910583, |
|
"rewards/rejected": -0.4014827311038971, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7288354071380415e-06, |
|
"logits/chosen": -1.2436352968215942, |
|
"logits/rejected": 0.3669028580188751, |
|
"logps/chosen": -650.0794067382812, |
|
"logps/rejected": -1452.456298828125, |
|
"loss": 0.1965, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1296919882297516, |
|
"rewards/margins": 0.22590501606464386, |
|
"rewards/rejected": -0.35559698939323425, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7181982937661485e-06, |
|
"logits/chosen": -1.2913326025009155, |
|
"logits/rejected": -0.41281813383102417, |
|
"logps/chosen": -856.18896484375, |
|
"logps/rejected": -1820.381591796875, |
|
"loss": 0.1497, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.22433075308799744, |
|
"rewards/margins": 0.2952847480773926, |
|
"rewards/rejected": -0.5196155309677124, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.707368982147318e-06, |
|
"logits/chosen": -1.3868077993392944, |
|
"logits/rejected": -0.7847863435745239, |
|
"logps/chosen": -884.7540283203125, |
|
"logps/rejected": -1902.289306640625, |
|
"loss": 0.1453, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.24957780539989471, |
|
"rewards/margins": 0.3298245370388031, |
|
"rewards/rejected": -0.5794023275375366, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.696348410599244e-06, |
|
"logits/chosen": -1.3086035251617432, |
|
"logits/rejected": -0.8314957618713379, |
|
"logps/chosen": -689.6898803710938, |
|
"logps/rejected": -1640.506103515625, |
|
"loss": 0.1847, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.20904815196990967, |
|
"rewards/margins": 0.30142712593078613, |
|
"rewards/rejected": -0.5104752779006958, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.685137534011549e-06, |
|
"logits/chosen": -1.565288782119751, |
|
"logits/rejected": -0.27172648906707764, |
|
"logps/chosen": -844.7750244140625, |
|
"logps/rejected": -1699.747314453125, |
|
"loss": 0.2055, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2513663172721863, |
|
"rewards/margins": 0.26970070600509644, |
|
"rewards/rejected": -0.5210670232772827, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.673737323763048e-06, |
|
"logits/chosen": -1.2228829860687256, |
|
"logits/rejected": -0.5248723030090332, |
|
"logps/chosen": -664.1156616210938, |
|
"logps/rejected": -1562.0504150390625, |
|
"loss": 0.1669, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.162824347615242, |
|
"rewards/margins": 0.25564128160476685, |
|
"rewards/rejected": -0.41846561431884766, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.662148767637578e-06, |
|
"logits/chosen": -1.2507654428482056, |
|
"logits/rejected": -0.5844524502754211, |
|
"logps/chosen": -839.5402221679688, |
|
"logps/rejected": -1746.392333984375, |
|
"loss": 0.2229, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.18832561373710632, |
|
"rewards/margins": 0.2621099650859833, |
|
"rewards/rejected": -0.4504355788230896, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.650372869738415e-06, |
|
"logits/chosen": -1.55803644657135, |
|
"logits/rejected": -0.7119165658950806, |
|
"logps/chosen": -736.268310546875, |
|
"logps/rejected": -1563.0191650390625, |
|
"loss": 0.2164, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10577349364757538, |
|
"rewards/margins": 0.2629134953022003, |
|
"rewards/rejected": -0.3686870038509369, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638410650401267e-06, |
|
"logits/chosen": -1.3386160135269165, |
|
"logits/rejected": -0.11453273147344589, |
|
"logps/chosen": -630.7725830078125, |
|
"logps/rejected": -1448.709228515625, |
|
"loss": 0.2449, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09674180299043655, |
|
"rewards/margins": 0.18708564341068268, |
|
"rewards/rejected": -0.28382742404937744, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626263146105875e-06, |
|
"logits/chosen": -1.6188023090362549, |
|
"logits/rejected": -0.530141294002533, |
|
"logps/chosen": -792.281005859375, |
|
"logps/rejected": -1616.726806640625, |
|
"loss": 0.2387, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.16821546852588654, |
|
"rewards/margins": 0.20957235991954803, |
|
"rewards/rejected": -0.37778785824775696, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.613931409386196e-06, |
|
"logits/chosen": -1.4189367294311523, |
|
"logits/rejected": -0.2773023247718811, |
|
"logps/chosen": -763.1712646484375, |
|
"logps/rejected": -1585.35791015625, |
|
"loss": 0.2073, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.15055999159812927, |
|
"rewards/margins": 0.23061557114124298, |
|
"rewards/rejected": -0.38117554783821106, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.601416508739211e-06, |
|
"logits/chosen": -1.4606117010116577, |
|
"logits/rejected": -0.6397336721420288, |
|
"logps/chosen": -748.8417358398438, |
|
"logps/rejected": -1670.744140625, |
|
"loss": 0.1761, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14542202651500702, |
|
"rewards/margins": 0.2664222717285156, |
|
"rewards/rejected": -0.4118443429470062, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.588719528532342e-06, |
|
"logits/chosen": -1.324894666671753, |
|
"logits/rejected": -0.27193373441696167, |
|
"logps/chosen": -764.4343872070312, |
|
"logps/rejected": -1688.8870849609375, |
|
"loss": 0.2086, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.13419455289840698, |
|
"rewards/margins": 0.2759809195995331, |
|
"rewards/rejected": -0.41017547249794006, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.575841568909494e-06, |
|
"logits/chosen": -1.6580641269683838, |
|
"logits/rejected": -0.3782033622264862, |
|
"logps/chosen": -732.3780517578125, |
|
"logps/rejected": -1696.1322021484375, |
|
"loss": 0.1872, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.10972636938095093, |
|
"rewards/margins": 0.29507070779800415, |
|
"rewards/rejected": -0.4047970771789551, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562783745695738e-06, |
|
"logits/chosen": -1.503248691558838, |
|
"logits/rejected": 0.1558968424797058, |
|
"logps/chosen": -716.7626953125, |
|
"logps/rejected": -1444.2918701171875, |
|
"loss": 0.2014, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09443630278110504, |
|
"rewards/margins": 0.21701212227344513, |
|
"rewards/rejected": -0.31144842505455017, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.549547190300622e-06, |
|
"logits/chosen": -1.3067868947982788, |
|
"logits/rejected": -0.4560883641242981, |
|
"logps/chosen": -790.8909912109375, |
|
"logps/rejected": -1757.387451171875, |
|
"loss": 0.224, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1645580530166626, |
|
"rewards/margins": 0.325479656457901, |
|
"rewards/rejected": -0.490037739276886, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536133049620143e-06, |
|
"logits/chosen": -1.140446662902832, |
|
"logits/rejected": -0.34088680148124695, |
|
"logps/chosen": -722.1959228515625, |
|
"logps/rejected": -1831.537353515625, |
|
"loss": 0.1268, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.12402956187725067, |
|
"rewards/margins": 0.37322643399238586, |
|
"rewards/rejected": -0.49725598096847534, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": -1.475990891456604, |
|
"logits/rejected": -0.5005327463150024, |
|
"logps/chosen": -783.8773193359375, |
|
"logps/rejected": -1766.515869140625, |
|
"loss": 0.176, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15318922698497772, |
|
"rewards/margins": 0.31895914673805237, |
|
"rewards/rejected": -0.4721483290195465, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.508776676821739e-06, |
|
"logits/chosen": -1.4537649154663086, |
|
"logits/rejected": -0.6859838366508484, |
|
"logps/chosen": -839.9093627929688, |
|
"logps/rejected": -1558.96435546875, |
|
"loss": 0.1925, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18507587909698486, |
|
"rewards/margins": 0.21864044666290283, |
|
"rewards/rejected": -0.4037163257598877, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.494836815027022e-06, |
|
"logits/chosen": -1.3298923969268799, |
|
"logits/rejected": 0.0534161701798439, |
|
"logps/chosen": -754.2385864257812, |
|
"logps/rejected": -1705.55078125, |
|
"loss": 0.232, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1495281457901001, |
|
"rewards/margins": 0.2592945694923401, |
|
"rewards/rejected": -0.4088227152824402, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4807241083879774e-06, |
|
"logits/chosen": -1.3468838930130005, |
|
"logits/rejected": -0.2606663107872009, |
|
"logps/chosen": -726.0997314453125, |
|
"logps/rejected": -1622.6500244140625, |
|
"loss": 0.203, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13949835300445557, |
|
"rewards/margins": 0.3188321590423584, |
|
"rewards/rejected": -0.45833054184913635, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.466439779715696e-06, |
|
"logits/chosen": -1.3387072086334229, |
|
"logits/rejected": -0.5179058313369751, |
|
"logps/chosen": -702.0155029296875, |
|
"logps/rejected": -1574.4173583984375, |
|
"loss": 0.1881, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08557260781526566, |
|
"rewards/margins": 0.29005369544029236, |
|
"rewards/rejected": -0.3756263256072998, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.451985066691649e-06, |
|
"logits/chosen": -1.3348770141601562, |
|
"logits/rejected": -0.6792385578155518, |
|
"logps/chosen": -666.2745361328125, |
|
"logps/rejected": -1547.283203125, |
|
"loss": 0.1755, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09519227594137192, |
|
"rewards/margins": 0.2866414785385132, |
|
"rewards/rejected": -0.3818337321281433, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.437361221760449e-06, |
|
"logits/chosen": -1.5723166465759277, |
|
"logits/rejected": -0.519290566444397, |
|
"logps/chosen": -685.1051025390625, |
|
"logps/rejected": -1876.2593994140625, |
|
"loss": 0.1298, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.10364224761724472, |
|
"rewards/margins": 0.37660035490989685, |
|
"rewards/rejected": -0.48024263978004456, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.422569512021332e-06, |
|
"logits/chosen": -1.4255059957504272, |
|
"logits/rejected": 0.1319495439529419, |
|
"logps/chosen": -696.9476318359375, |
|
"logps/rejected": -1619.9892578125, |
|
"loss": 0.2105, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0946447104215622, |
|
"rewards/margins": 0.2710058093070984, |
|
"rewards/rejected": -0.3656505048274994, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.407611219118363e-06, |
|
"logits/chosen": -1.3705596923828125, |
|
"logits/rejected": -0.5301432609558105, |
|
"logps/chosen": -698.9526977539062, |
|
"logps/rejected": -1700.2493896484375, |
|
"loss": 0.164, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1371428668498993, |
|
"rewards/margins": 0.29292064905166626, |
|
"rewards/rejected": -0.43006348609924316, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3924876391293915e-06, |
|
"logits/chosen": -1.3947621583938599, |
|
"logits/rejected": -0.8983534574508667, |
|
"logps/chosen": -651.66162109375, |
|
"logps/rejected": -1657.3372802734375, |
|
"loss": 0.1776, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11623096466064453, |
|
"rewards/margins": 0.2917022407054901, |
|
"rewards/rejected": -0.40793323516845703, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.377200082453748e-06, |
|
"logits/chosen": -1.3964405059814453, |
|
"logits/rejected": -0.6084726452827454, |
|
"logps/chosen": -692.3182983398438, |
|
"logps/rejected": -1706.6009521484375, |
|
"loss": 0.4044, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12358059734106064, |
|
"rewards/margins": 0.3641052842140198, |
|
"rewards/rejected": -0.4876858592033386, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.361749873698707e-06, |
|
"logits/chosen": -1.4753860235214233, |
|
"logits/rejected": -0.7048450708389282, |
|
"logps/chosen": -698.3816528320312, |
|
"logps/rejected": -1705.034423828125, |
|
"loss": 0.124, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08291604369878769, |
|
"rewards/margins": 0.2811528742313385, |
|
"rewards/rejected": -0.3640689253807068, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.346138351564711e-06, |
|
"logits/chosen": -1.4870970249176025, |
|
"logits/rejected": -0.9103308916091919, |
|
"logps/chosen": -671.7784423828125, |
|
"logps/rejected": -1529.8997802734375, |
|
"loss": 0.2008, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.06387507170438766, |
|
"rewards/margins": 0.2635475695133209, |
|
"rewards/rejected": -0.3274226784706116, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.330366868729376e-06, |
|
"logits/chosen": -1.421744465827942, |
|
"logits/rejected": -0.6302933692932129, |
|
"logps/chosen": -724.4271240234375, |
|
"logps/rejected": -1651.8883056640625, |
|
"loss": 0.1881, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1005069762468338, |
|
"rewards/margins": 0.2800332307815552, |
|
"rewards/rejected": -0.3805401921272278, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3144367917302964e-06, |
|
"logits/chosen": -1.408508062362671, |
|
"logits/rejected": -0.9906333088874817, |
|
"logps/chosen": -622.6488647460938, |
|
"logps/rejected": -1618.7296142578125, |
|
"loss": 0.1708, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08803695440292358, |
|
"rewards/margins": 0.27410316467285156, |
|
"rewards/rejected": -0.36214011907577515, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"logits/chosen": -1.4352304935455322, |
|
"logits/rejected": -0.9279989004135132, |
|
"logps/chosen": -665.1091918945312, |
|
"logps/rejected": -1825.142333984375, |
|
"loss": 0.2047, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10474482923746109, |
|
"rewards/margins": 0.35141628980636597, |
|
"rewards/rejected": -0.45616111159324646, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2821063899795015e-06, |
|
"logits/chosen": -1.563280463218689, |
|
"logits/rejected": -0.555804431438446, |
|
"logps/chosen": -646.5196533203125, |
|
"logps/rejected": -1623.5345458984375, |
|
"loss": 0.1522, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.055479682981967926, |
|
"rewards/margins": 0.325848251581192, |
|
"rewards/rejected": -0.38132789731025696, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.265708866531238e-06, |
|
"logits/chosen": -1.6181457042694092, |
|
"logits/rejected": -0.8855217695236206, |
|
"logps/chosen": -649.41455078125, |
|
"logps/rejected": -1402.5155029296875, |
|
"loss": 0.1827, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.051223646849393845, |
|
"rewards/margins": 0.2268896847963333, |
|
"rewards/rejected": -0.27811330556869507, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.249158351283414e-06, |
|
"logits/chosen": -1.4349555969238281, |
|
"logits/rejected": -0.6732171773910522, |
|
"logps/chosen": -621.8024291992188, |
|
"logps/rejected": -1621.55908203125, |
|
"loss": 0.1949, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.06152881309390068, |
|
"rewards/margins": 0.23424020409584045, |
|
"rewards/rejected": -0.2957690358161926, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.232456278273743e-06, |
|
"logits/chosen": -1.477827548980713, |
|
"logits/rejected": -1.140575647354126, |
|
"logps/chosen": -703.6815185546875, |
|
"logps/rejected": -1337.854248046875, |
|
"loss": 0.2197, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.10893256962299347, |
|
"rewards/margins": 0.16516511142253876, |
|
"rewards/rejected": -0.2740976810455322, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.215604094671835e-06, |
|
"logits/chosen": -1.6464779376983643, |
|
"logits/rejected": -0.990412712097168, |
|
"logps/chosen": -644.1171264648438, |
|
"logps/rejected": -1499.906005859375, |
|
"loss": 0.2661, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.10046522319316864, |
|
"rewards/margins": 0.22357575595378876, |
|
"rewards/rejected": -0.3240409791469574, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.198603260653792e-06, |
|
"logits/chosen": -1.5124647617340088, |
|
"logits/rejected": -0.45678481459617615, |
|
"logps/chosen": -716.1838989257812, |
|
"logps/rejected": -1518.812255859375, |
|
"loss": 0.165, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.05440264195203781, |
|
"rewards/margins": 0.2616801857948303, |
|
"rewards/rejected": -0.3160828649997711, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.181455249275701e-06, |
|
"logits/chosen": -1.2498810291290283, |
|
"logits/rejected": -0.8803867101669312, |
|
"logps/chosen": -659.9884643554688, |
|
"logps/rejected": -1640.7095947265625, |
|
"loss": 0.2331, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.08121126145124435, |
|
"rewards/margins": 0.27540498971939087, |
|
"rewards/rejected": -0.35661619901657104, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1641615463459926e-06, |
|
"logits/chosen": -1.5280659198760986, |
|
"logits/rejected": -0.723167359828949, |
|
"logps/chosen": -665.0728759765625, |
|
"logps/rejected": -1953.181884765625, |
|
"loss": 0.15, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.034947071224451065, |
|
"rewards/margins": 0.38297995924949646, |
|
"rewards/rejected": -0.417927086353302, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.146723650296701e-06, |
|
"logits/chosen": -1.5791549682617188, |
|
"logits/rejected": -0.7244309186935425, |
|
"logps/chosen": -609.816650390625, |
|
"logps/rejected": -1450.82080078125, |
|
"loss": 0.1426, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.05617643520236015, |
|
"rewards/margins": 0.2224070131778717, |
|
"rewards/rejected": -0.2785834074020386, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.129143072053639e-06, |
|
"logits/chosen": -1.7151685953140259, |
|
"logits/rejected": -0.39171552658081055, |
|
"logps/chosen": -769.5672607421875, |
|
"logps/rejected": -1687.483154296875, |
|
"loss": 0.1444, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.071159228682518, |
|
"rewards/margins": 0.29008185863494873, |
|
"rewards/rejected": -0.36124110221862793, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.111421334905468e-06, |
|
"logits/chosen": -1.49336838722229, |
|
"logits/rejected": -0.5438629984855652, |
|
"logps/chosen": -848.5822143554688, |
|
"logps/rejected": -1763.63671875, |
|
"loss": 0.2436, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.10583722591400146, |
|
"rewards/margins": 0.27904340624809265, |
|
"rewards/rejected": -0.38488060235977173, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.093559974371725e-06, |
|
"logits/chosen": -1.359708547592163, |
|
"logits/rejected": -0.6944249272346497, |
|
"logps/chosen": -919.8367919921875, |
|
"logps/rejected": -1900.1605224609375, |
|
"loss": 0.1501, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1508992612361908, |
|
"rewards/margins": 0.33068108558654785, |
|
"rewards/rejected": -0.48158034682273865, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.075560538069767e-06, |
|
"logits/chosen": -1.4843828678131104, |
|
"logits/rejected": -0.6843305826187134, |
|
"logps/chosen": -726.9734497070312, |
|
"logps/rejected": -1652.866943359375, |
|
"loss": 0.1679, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.08053059130907059, |
|
"rewards/margins": 0.38363346457481384, |
|
"rewards/rejected": -0.4641640782356262, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.05742458558068e-06, |
|
"logits/chosen": -1.5596168041229248, |
|
"logits/rejected": -0.7530814409255981, |
|
"logps/chosen": -646.64697265625, |
|
"logps/rejected": -1448.167724609375, |
|
"loss": 0.1954, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.07448308169841766, |
|
"rewards/margins": 0.23775526881217957, |
|
"rewards/rejected": -0.3122383654117584, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.039153688314146e-06, |
|
"logits/chosen": -1.5747339725494385, |
|
"logits/rejected": -0.38576704263687134, |
|
"logps/chosen": -659.3181762695312, |
|
"logps/rejected": -1664.3912353515625, |
|
"loss": 0.198, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.04063820466399193, |
|
"rewards/margins": 0.32130542397499084, |
|
"rewards/rejected": -0.36194363236427307, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020749429372286e-06, |
|
"logits/chosen": -1.5057035684585571, |
|
"logits/rejected": -0.5099014043807983, |
|
"logps/chosen": -759.4317626953125, |
|
"logps/rejected": -1881.2880859375, |
|
"loss": 0.1902, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08269429206848145, |
|
"rewards/margins": 0.3579484820365906, |
|
"rewards/rejected": -0.440642774105072, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.002213403412492e-06, |
|
"logits/chosen": -1.440815806388855, |
|
"logits/rejected": -0.31881892681121826, |
|
"logps/chosen": -595.3434448242188, |
|
"logps/rejected": -1587.8076171875, |
|
"loss": 0.2562, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.07667971402406693, |
|
"rewards/margins": 0.292102575302124, |
|
"rewards/rejected": -0.36878231167793274, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.983547216509254e-06, |
|
"logits/chosen": -1.7196152210235596, |
|
"logits/rejected": -0.5447965264320374, |
|
"logps/chosen": -607.3961181640625, |
|
"logps/rejected": -1578.577880859375, |
|
"loss": 0.1834, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.04431430250406265, |
|
"rewards/margins": 0.25382199883461, |
|
"rewards/rejected": -0.29813629388809204, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.964752486015001e-06, |
|
"logits/chosen": -1.5760372877120972, |
|
"logits/rejected": -0.7092293500900269, |
|
"logps/chosen": -662.7578125, |
|
"logps/rejected": -1628.713623046875, |
|
"loss": 0.1779, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06009943410754204, |
|
"rewards/margins": 0.26038116216659546, |
|
"rewards/rejected": -0.320480614900589, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.945830840419966e-06, |
|
"logits/chosen": -1.5873006582260132, |
|
"logits/rejected": -0.4512631893157959, |
|
"logps/chosen": -656.304931640625, |
|
"logps/rejected": -1755.419677734375, |
|
"loss": 0.1451, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.07909057289361954, |
|
"rewards/margins": 0.36252719163894653, |
|
"rewards/rejected": -0.44161778688430786, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92678391921108e-06, |
|
"logits/chosen": -1.5479198694229126, |
|
"logits/rejected": -0.4876307547092438, |
|
"logps/chosen": -590.7452392578125, |
|
"logps/rejected": -1592.341064453125, |
|
"loss": 0.1647, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07574008405208588, |
|
"rewards/margins": 0.30887579917907715, |
|
"rewards/rejected": -0.38461586833000183, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.907613372729916e-06, |
|
"logits/chosen": -1.3529613018035889, |
|
"logits/rejected": -0.5033882260322571, |
|
"logps/chosen": -667.5601806640625, |
|
"logps/rejected": -1620.945556640625, |
|
"loss": 0.1869, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10878247022628784, |
|
"rewards/margins": 0.27404552698135376, |
|
"rewards/rejected": -0.3828279972076416, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.888320862029699e-06, |
|
"logits/chosen": -1.7743628025054932, |
|
"logits/rejected": -0.5587132573127747, |
|
"logps/chosen": -699.76318359375, |
|
"logps/rejected": -1693.8353271484375, |
|
"loss": 0.2028, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07925678789615631, |
|
"rewards/margins": 0.30291515588760376, |
|
"rewards/rejected": -0.38217195868492126, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.868908058731376e-06, |
|
"logits/chosen": -1.607465386390686, |
|
"logits/rejected": -0.8050976991653442, |
|
"logps/chosen": -772.9467163085938, |
|
"logps/rejected": -1667.0992431640625, |
|
"loss": 0.1693, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.05015742778778076, |
|
"rewards/margins": 0.27269458770751953, |
|
"rewards/rejected": -0.32285207509994507, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.849376644878783e-06, |
|
"logits/chosen": -1.4109113216400146, |
|
"logits/rejected": -0.8917434811592102, |
|
"logps/chosen": -581.6261596679688, |
|
"logps/rejected": -1721.5927734375, |
|
"loss": 0.1552, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.02441004104912281, |
|
"rewards/margins": 0.3415711224079132, |
|
"rewards/rejected": -0.36598116159439087, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.829728312792895e-06, |
|
"logits/chosen": -1.7239364385604858, |
|
"logits/rejected": -0.6429109573364258, |
|
"logps/chosen": -607.516357421875, |
|
"logps/rejected": -1509.8909912109375, |
|
"loss": 0.2134, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0776829943060875, |
|
"rewards/margins": 0.28966718912124634, |
|
"rewards/rejected": -0.36735019087791443, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8099647649251984e-06, |
|
"logits/chosen": -1.3290464878082275, |
|
"logits/rejected": -0.5565693974494934, |
|
"logps/chosen": -806.9801025390625, |
|
"logps/rejected": -1813.9703369140625, |
|
"loss": 0.1661, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13428995013237, |
|
"rewards/margins": 0.3366580903530121, |
|
"rewards/rejected": -0.4709479808807373, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.790087713710179e-06, |
|
"logits/chosen": -1.167667031288147, |
|
"logits/rejected": -0.08988530933856964, |
|
"logps/chosen": -961.8670654296875, |
|
"logps/rejected": -1891.443115234375, |
|
"loss": 0.2308, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2256636619567871, |
|
"rewards/margins": 0.27781441807746887, |
|
"rewards/rejected": -0.5034780502319336, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.770098881416945e-06, |
|
"logits/chosen": -1.445077896118164, |
|
"logits/rejected": -0.7122173309326172, |
|
"logps/chosen": -851.2200927734375, |
|
"logps/rejected": -1791.288818359375, |
|
"loss": 0.194, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18502262234687805, |
|
"rewards/margins": 0.28681907057762146, |
|
"rewards/rejected": -0.4718416631221771, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -1.6007522344589233, |
|
"logits/rejected": -0.3576156497001648, |
|
"logps/chosen": -716.0091552734375, |
|
"logps/rejected": -1709.4010009765625, |
|
"loss": 0.1703, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12502431869506836, |
|
"rewards/margins": 0.3000204563140869, |
|
"rewards/rejected": -0.4250447750091553, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7297928109491765e-06, |
|
"logits/chosen": -1.4100292921066284, |
|
"logits/rejected": -0.429446280002594, |
|
"logps/chosen": -616.4102172851562, |
|
"logps/rejected": -1615.930908203125, |
|
"loss": 0.1452, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10738323628902435, |
|
"rewards/margins": 0.33697018027305603, |
|
"rewards/rejected": -0.4443534016609192, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.7094790651387414e-06, |
|
"logits/chosen": -1.6581932306289673, |
|
"logits/rejected": -0.812160849571228, |
|
"logps/chosen": -681.979736328125, |
|
"logps/rejected": -1556.7125244140625, |
|
"loss": 0.1798, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.13339491188526154, |
|
"rewards/margins": 0.25554531812667847, |
|
"rewards/rejected": -0.3889401853084564, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.689060522675689e-06, |
|
"logits/chosen": -1.450721025466919, |
|
"logits/rejected": -0.5784316062927246, |
|
"logps/chosen": -761.6971435546875, |
|
"logps/rejected": -1722.798583984375, |
|
"loss": 0.2113, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1309405416250229, |
|
"rewards/margins": 0.2867937386035919, |
|
"rewards/rejected": -0.417734295129776, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.668538952747236e-06, |
|
"logits/chosen": -1.659879446029663, |
|
"logits/rejected": -0.39928197860717773, |
|
"logps/chosen": -810.2103881835938, |
|
"logps/rejected": -1897.2626953125, |
|
"loss": 0.1285, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12557505071163177, |
|
"rewards/margins": 0.390308141708374, |
|
"rewards/rejected": -0.515883207321167, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6479161334675294e-06, |
|
"logits/chosen": -1.4575773477554321, |
|
"logits/rejected": -0.07413512468338013, |
|
"logps/chosen": -820.2180786132812, |
|
"logps/rejected": -1623.4227294921875, |
|
"loss": 0.2012, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13560107350349426, |
|
"rewards/margins": 0.23566505312919617, |
|
"rewards/rejected": -0.37126606702804565, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.627193851723577e-06, |
|
"logits/chosen": -1.3455625772476196, |
|
"logits/rejected": -0.9724159240722656, |
|
"logps/chosen": -803.569091796875, |
|
"logps/rejected": -1693.400390625, |
|
"loss": 0.2043, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13027095794677734, |
|
"rewards/margins": 0.29757434129714966, |
|
"rewards/rejected": -0.4278453290462494, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6063739030204226e-06, |
|
"logits/chosen": -1.5993826389312744, |
|
"logits/rejected": -0.9014598727226257, |
|
"logps/chosen": -670.8797607421875, |
|
"logps/rejected": -1565.011474609375, |
|
"loss": 0.1955, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.13589544594287872, |
|
"rewards/margins": 0.2834901213645935, |
|
"rewards/rejected": -0.4193855822086334, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5854580913255706e-06, |
|
"logits/chosen": -1.425690770149231, |
|
"logits/rejected": -0.18355034291744232, |
|
"logps/chosen": -754.1428833007812, |
|
"logps/rejected": -1659.813720703125, |
|
"loss": 0.2144, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17976424098014832, |
|
"rewards/margins": 0.26915082335472107, |
|
"rewards/rejected": -0.4489150941371918, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.564448228912682e-06, |
|
"logits/chosen": -1.5854262113571167, |
|
"logits/rejected": -0.6857632398605347, |
|
"logps/chosen": -745.6060791015625, |
|
"logps/rejected": -1763.920166015625, |
|
"loss": 0.1558, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12223930656909943, |
|
"rewards/margins": 0.31619927287101746, |
|
"rewards/rejected": -0.4384385943412781, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543346136204545e-06, |
|
"logits/chosen": -1.2614418268203735, |
|
"logits/rejected": -0.3045172095298767, |
|
"logps/chosen": -896.9788818359375, |
|
"logps/rejected": -1856.7265625, |
|
"loss": 0.1537, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1813734471797943, |
|
"rewards/margins": 0.2908100187778473, |
|
"rewards/rejected": -0.4721834659576416, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.522153641615345e-06, |
|
"logits/chosen": -1.4344582557678223, |
|
"logits/rejected": -0.6459347009658813, |
|
"logps/chosen": -729.3267822265625, |
|
"logps/rejected": -1761.7333984375, |
|
"loss": 0.1404, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12001194804906845, |
|
"rewards/margins": 0.3599693179130554, |
|
"rewards/rejected": -0.47998133301734924, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5008725813922383e-06, |
|
"logits/chosen": -1.4531586170196533, |
|
"logits/rejected": -0.5490698218345642, |
|
"logps/chosen": -856.87255859375, |
|
"logps/rejected": -1625.8597412109375, |
|
"loss": 0.1619, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.10974371433258057, |
|
"rewards/margins": 0.3060295283794403, |
|
"rewards/rejected": -0.4157732427120209, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4795047994562463e-06, |
|
"logits/chosen": -1.4780563116073608, |
|
"logits/rejected": -0.4708196222782135, |
|
"logps/chosen": -758.4917602539062, |
|
"logps/rejected": -1597.0938720703125, |
|
"loss": 0.2097, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12127669155597687, |
|
"rewards/margins": 0.24855844676494598, |
|
"rewards/rejected": -0.36983510851860046, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.458052147242494e-06, |
|
"logits/chosen": -1.7074508666992188, |
|
"logits/rejected": -0.8495294451713562, |
|
"logps/chosen": -809.2350463867188, |
|
"logps/rejected": -1472.79443359375, |
|
"loss": 0.186, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10575731843709946, |
|
"rewards/margins": 0.18694370985031128, |
|
"rewards/rejected": -0.29270103573799133, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.436516483539781e-06, |
|
"logits/chosen": -1.6757071018218994, |
|
"logits/rejected": -0.4471355378627777, |
|
"logps/chosen": -738.4520874023438, |
|
"logps/rejected": -1533.0238037109375, |
|
"loss": 0.1949, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09687335044145584, |
|
"rewards/margins": 0.2056708037853241, |
|
"rewards/rejected": -0.30254414677619934, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4148996743295305e-06, |
|
"logits/chosen": -1.704223394393921, |
|
"logits/rejected": -0.5917268991470337, |
|
"logps/chosen": -850.6807861328125, |
|
"logps/rejected": -1609.865478515625, |
|
"loss": 0.2066, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.05562291666865349, |
|
"rewards/margins": 0.2252390831708908, |
|
"rewards/rejected": -0.2808619737625122, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3932035926241103e-06, |
|
"logits/chosen": -1.5309293270111084, |
|
"logits/rejected": -1.2061899900436401, |
|
"logps/chosen": -625.7445068359375, |
|
"logps/rejected": -1734.443603515625, |
|
"loss": 0.1488, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.05109323933720589, |
|
"rewards/margins": 0.34637781977653503, |
|
"rewards/rejected": -0.3974711000919342, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3714301183045382e-06, |
|
"logits/chosen": -1.503655195236206, |
|
"logits/rejected": -0.6576262712478638, |
|
"logps/chosen": -691.9463500976562, |
|
"logps/rejected": -1632.8353271484375, |
|
"loss": 0.1711, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.08215099573135376, |
|
"rewards/margins": 0.270992249250412, |
|
"rewards/rejected": -0.35314327478408813, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.349581137957604e-06, |
|
"logits/chosen": -1.7165721654891968, |
|
"logits/rejected": -0.6651977896690369, |
|
"logps/chosen": -709.5531005859375, |
|
"logps/rejected": -1576.0364990234375, |
|
"loss": 0.2304, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.08810341358184814, |
|
"rewards/margins": 0.2739308476448059, |
|
"rewards/rejected": -0.36203423142433167, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3276585447123957e-06, |
|
"logits/chosen": -1.232627034187317, |
|
"logits/rejected": -0.1004166379570961, |
|
"logps/chosen": -676.5740966796875, |
|
"logps/rejected": -1472.2635498046875, |
|
"loss": 0.2152, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10900652408599854, |
|
"rewards/margins": 0.22487536072731018, |
|
"rewards/rejected": -0.3338818848133087, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3056642380762783e-06, |
|
"logits/chosen": -1.5267202854156494, |
|
"logits/rejected": -0.49788492918014526, |
|
"logps/chosen": -719.8569946289062, |
|
"logps/rejected": -1706.168212890625, |
|
"loss": 0.1489, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.04703357070684433, |
|
"rewards/margins": 0.3542724847793579, |
|
"rewards/rejected": -0.40130606293678284, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2836001237702993e-06, |
|
"logits/chosen": -1.6021124124526978, |
|
"logits/rejected": -0.44486141204833984, |
|
"logps/chosen": -737.7840576171875, |
|
"logps/rejected": -1778.464111328125, |
|
"loss": 0.1286, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.05765662342309952, |
|
"rewards/margins": 0.3132147192955017, |
|
"rewards/rejected": -0.370871365070343, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2614681135640696e-06, |
|
"logits/chosen": -1.5471503734588623, |
|
"logits/rejected": -0.8976337313652039, |
|
"logps/chosen": -721.9874267578125, |
|
"logps/rejected": -1760.84765625, |
|
"loss": 0.161, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0736970454454422, |
|
"rewards/margins": 0.24696488678455353, |
|
"rewards/rejected": -0.3206619620323181, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2392701251101172e-06, |
|
"logits/chosen": -1.4463790655136108, |
|
"logits/rejected": -0.8106206059455872, |
|
"logps/chosen": -668.1365356445312, |
|
"logps/rejected": -1573.209716796875, |
|
"loss": 0.141, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08170486986637115, |
|
"rewards/margins": 0.264334499835968, |
|
"rewards/rejected": -0.34603938460350037, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.217008081777726e-06, |
|
"logits/chosen": -1.5568472146987915, |
|
"logits/rejected": -0.6958287954330444, |
|
"logps/chosen": -861.89013671875, |
|
"logps/rejected": -1898.191162109375, |
|
"loss": 0.1875, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18643470108509064, |
|
"rewards/margins": 0.3724428713321686, |
|
"rewards/rejected": -0.5588775277137756, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1946839124862873e-06, |
|
"logits/chosen": -1.314772367477417, |
|
"logits/rejected": -0.18543431162834167, |
|
"logps/chosen": -708.21923828125, |
|
"logps/rejected": -1773.858642578125, |
|
"loss": 0.1311, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12136778980493546, |
|
"rewards/margins": 0.3536931574344635, |
|
"rewards/rejected": -0.4750608801841736, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1722995515381644e-06, |
|
"logits/chosen": -1.386833667755127, |
|
"logits/rejected": -0.44682741165161133, |
|
"logps/chosen": -727.7785034179688, |
|
"logps/rejected": -1677.187744140625, |
|
"loss": 0.2002, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.10554149001836777, |
|
"rewards/margins": 0.328396201133728, |
|
"rewards/rejected": -0.4339376389980316, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.149856938451094e-06, |
|
"logits/chosen": -1.6613432168960571, |
|
"logits/rejected": 0.04711759090423584, |
|
"logps/chosen": -814.40380859375, |
|
"logps/rejected": -1579.5726318359375, |
|
"loss": 0.1541, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12790891528129578, |
|
"rewards/margins": 0.259776771068573, |
|
"rewards/rejected": -0.387685626745224, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.127358017790132e-06, |
|
"logits/chosen": -1.352160096168518, |
|
"logits/rejected": 0.36388832330703735, |
|
"logps/chosen": -850.2373046875, |
|
"logps/rejected": -1793.983154296875, |
|
"loss": 0.1351, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15040907263755798, |
|
"rewards/margins": 0.26560917496681213, |
|
"rewards/rejected": -0.4160182476043701, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"logits/chosen": -1.3986834287643433, |
|
"logits/rejected": -0.2482280433177948, |
|
"logps/chosen": -609.4581298828125, |
|
"logps/rejected": -1647.325439453125, |
|
"loss": 0.1148, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.05277906730771065, |
|
"rewards/margins": 0.2984154522418976, |
|
"rewards/rejected": -0.35119450092315674, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.082199056232015e-06, |
|
"logits/chosen": -1.453552007675171, |
|
"logits/rejected": -1.1569862365722656, |
|
"logps/chosen": -593.3402709960938, |
|
"logps/rejected": -1446.66943359375, |
|
"loss": 0.2248, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06173533946275711, |
|
"rewards/margins": 0.21113066375255585, |
|
"rewards/rejected": -0.27286598086357117, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.059542928183079e-06, |
|
"logits/chosen": -1.123428463935852, |
|
"logits/rejected": -0.1749972403049469, |
|
"logps/chosen": -752.8836669921875, |
|
"logps/rejected": -1819.65234375, |
|
"loss": 0.1707, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07862022519111633, |
|
"rewards/margins": 0.3714417815208435, |
|
"rewards/rejected": -0.4500620365142822, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0368383179176584e-06, |
|
"logits/chosen": -1.3128252029418945, |
|
"logits/rejected": -0.5668991208076477, |
|
"logps/chosen": -749.9387817382812, |
|
"logps/rejected": -1737.239501953125, |
|
"loss": 0.1744, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1024591326713562, |
|
"rewards/margins": 0.33585435152053833, |
|
"rewards/rejected": -0.43831348419189453, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0140871927018466e-06, |
|
"logits/chosen": -1.5394883155822754, |
|
"logits/rejected": -0.7085919380187988, |
|
"logps/chosen": -835.1018676757812, |
|
"logps/rejected": -1881.120361328125, |
|
"loss": 0.1541, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16260461509227753, |
|
"rewards/margins": 0.349585622549057, |
|
"rewards/rejected": -0.5121902227401733, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9912915238320755e-06, |
|
"logits/chosen": -1.323406457901001, |
|
"logits/rejected": -0.5690110921859741, |
|
"logps/chosen": -649.0794677734375, |
|
"logps/rejected": -1694.8863525390625, |
|
"loss": 0.1603, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10500358045101166, |
|
"rewards/margins": 0.30655449628829956, |
|
"rewards/rejected": -0.41155806183815, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9684532864643123e-06, |
|
"logits/chosen": -1.6879428625106812, |
|
"logits/rejected": -0.8558928370475769, |
|
"logps/chosen": -687.2265014648438, |
|
"logps/rejected": -1546.723876953125, |
|
"loss": 0.1733, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14953789114952087, |
|
"rewards/margins": 0.27107977867126465, |
|
"rewards/rejected": -0.4206176698207855, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.945574459442917e-06, |
|
"logits/chosen": -1.5647737979888916, |
|
"logits/rejected": -1.020084261894226, |
|
"logps/chosen": -731.6143798828125, |
|
"logps/rejected": -1668.8765869140625, |
|
"loss": 0.1614, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15018759667873383, |
|
"rewards/margins": 0.3018895983695984, |
|
"rewards/rejected": -0.4520772099494934, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.922657025129185e-06, |
|
"logits/chosen": -1.4925051927566528, |
|
"logits/rejected": -0.517613410949707, |
|
"logps/chosen": -759.2529296875, |
|
"logps/rejected": -1762.791748046875, |
|
"loss": 0.2552, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16193100810050964, |
|
"rewards/margins": 0.29394176602363586, |
|
"rewards/rejected": -0.4558727741241455, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8997029692295875e-06, |
|
"logits/chosen": -1.5928837060928345, |
|
"logits/rejected": -0.7866376042366028, |
|
"logps/chosen": -774.6558837890625, |
|
"logps/rejected": -1785.3062744140625, |
|
"loss": 0.1509, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1210324764251709, |
|
"rewards/margins": 0.31033051013946533, |
|
"rewards/rejected": -0.43136295676231384, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.876714280623708e-06, |
|
"logits/chosen": -1.347715139389038, |
|
"logits/rejected": -0.3572938144207001, |
|
"logps/chosen": -739.8132934570312, |
|
"logps/rejected": -1673.948486328125, |
|
"loss": 0.1395, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0940694585442543, |
|
"rewards/margins": 0.3482546806335449, |
|
"rewards/rejected": -0.44232410192489624, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8536929511919227e-06, |
|
"logits/chosen": -1.5348753929138184, |
|
"logits/rejected": -0.0369141586124897, |
|
"logps/chosen": -672.0413818359375, |
|
"logps/rejected": -1597.8165283203125, |
|
"loss": 0.1693, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09073442965745926, |
|
"rewards/margins": 0.2980082929134369, |
|
"rewards/rejected": -0.38874274492263794, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8306409756428067e-06, |
|
"logits/chosen": -1.3153806924819946, |
|
"logits/rejected": -0.5097673535346985, |
|
"logps/chosen": -708.4708251953125, |
|
"logps/rejected": -1756.6849365234375, |
|
"loss": 0.1513, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.13321314752101898, |
|
"rewards/margins": 0.31301358342170715, |
|
"rewards/rejected": -0.44622668623924255, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.807560351340302e-06, |
|
"logits/chosen": -1.4458258152008057, |
|
"logits/rejected": -0.42417654395103455, |
|
"logps/chosen": -671.51611328125, |
|
"logps/rejected": -1660.485107421875, |
|
"loss": 0.1559, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11455672979354858, |
|
"rewards/margins": 0.3709821105003357, |
|
"rewards/rejected": -0.48553887009620667, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7844530781306544e-06, |
|
"logits/chosen": -1.3651716709136963, |
|
"logits/rejected": -0.4336569905281067, |
|
"logps/chosen": -840.4568481445312, |
|
"logps/rejected": -2070.00146484375, |
|
"loss": 0.1597, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19497820734977722, |
|
"rewards/margins": 0.396168977022171, |
|
"rewards/rejected": -0.5911471247673035, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.761321158169134e-06, |
|
"logits/chosen": -1.573979377746582, |
|
"logits/rejected": -0.7406612038612366, |
|
"logps/chosen": -825.8062744140625, |
|
"logps/rejected": -1895.0921630859375, |
|
"loss": 0.1527, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.156173974275589, |
|
"rewards/margins": 0.32018476724624634, |
|
"rewards/rejected": -0.4763587415218353, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.738166595746554e-06, |
|
"logits/chosen": -1.6485137939453125, |
|
"logits/rejected": -0.31914329528808594, |
|
"logps/chosen": -834.5794677734375, |
|
"logps/rejected": -1904.170654296875, |
|
"loss": 0.1606, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.15797320008277893, |
|
"rewards/margins": 0.32314831018447876, |
|
"rewards/rejected": -0.4811214506626129, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.7149913971156105e-06, |
|
"logits/chosen": -1.5849123001098633, |
|
"logits/rejected": -0.718601644039154, |
|
"logps/chosen": -758.3035278320312, |
|
"logps/rejected": -1674.5123291015625, |
|
"loss": 0.1629, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15117041766643524, |
|
"rewards/margins": 0.3155497610569, |
|
"rewards/rejected": -0.4667201042175293, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6917975703170466e-06, |
|
"logits/chosen": -1.2325398921966553, |
|
"logits/rejected": -0.8877838850021362, |
|
"logps/chosen": -891.3978271484375, |
|
"logps/rejected": -1764.385498046875, |
|
"loss": 0.149, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2210262268781662, |
|
"rewards/margins": 0.3151123523712158, |
|
"rewards/rejected": -0.536138653755188, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.668587125005663e-06, |
|
"logits/chosen": -1.5326184034347534, |
|
"logits/rejected": -0.6966463923454285, |
|
"logps/chosen": -733.0716552734375, |
|
"logps/rejected": -1570.1944580078125, |
|
"loss": 0.1168, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14229288697242737, |
|
"rewards/margins": 0.30633196234703064, |
|
"rewards/rejected": -0.44862478971481323, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6453620722761897e-06, |
|
"logits/chosen": -1.5661652088165283, |
|
"logits/rejected": -0.7556155920028687, |
|
"logps/chosen": -820.5909423828125, |
|
"logps/rejected": -1868.76171875, |
|
"loss": 0.1276, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21517057716846466, |
|
"rewards/margins": 0.35130250453948975, |
|
"rewards/rejected": -0.566473126411438, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6221244244890336e-06, |
|
"logits/chosen": -1.37876558303833, |
|
"logits/rejected": -0.1346241682767868, |
|
"logps/chosen": -989.1995239257812, |
|
"logps/rejected": -1853.1685791015625, |
|
"loss": 0.1738, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2625274062156677, |
|
"rewards/margins": 0.27383849024772644, |
|
"rewards/rejected": -0.5363659858703613, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5988761950959133e-06, |
|
"logits/chosen": -1.4494779109954834, |
|
"logits/rejected": -0.5709508061408997, |
|
"logps/chosen": -801.075927734375, |
|
"logps/rejected": -1952.412841796875, |
|
"loss": 0.1589, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18583470582962036, |
|
"rewards/margins": 0.39585351943969727, |
|
"rewards/rejected": -0.5816881656646729, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.575619398465402e-06, |
|
"logits/chosen": -1.5121077299118042, |
|
"logits/rejected": -0.7577626705169678, |
|
"logps/chosen": -630.1747436523438, |
|
"logps/rejected": -1437.3203125, |
|
"loss": 0.2045, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12697990238666534, |
|
"rewards/margins": 0.22890718281269073, |
|
"rewards/rejected": -0.35588711500167847, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5523560497083927e-06, |
|
"logits/chosen": -1.165709137916565, |
|
"logits/rejected": -0.6048153638839722, |
|
"logps/chosen": -719.2890625, |
|
"logps/rejected": -1616.2135009765625, |
|
"loss": 0.1598, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11988051235675812, |
|
"rewards/margins": 0.26138466596603394, |
|
"rewards/rejected": -0.38126516342163086, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5290881645034932e-06, |
|
"logits/chosen": -1.6587855815887451, |
|
"logits/rejected": -0.6381639838218689, |
|
"logps/chosen": -765.1217041015625, |
|
"logps/rejected": -1829.5943603515625, |
|
"loss": 0.1613, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16699892282485962, |
|
"rewards/margins": 0.37708956003189087, |
|
"rewards/rejected": -0.5440884828567505, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5058177589223766e-06, |
|
"logits/chosen": -1.379034399986267, |
|
"logits/rejected": -0.6342862248420715, |
|
"logps/chosen": -677.946533203125, |
|
"logps/rejected": -1782.3040771484375, |
|
"loss": 0.1529, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12249922752380371, |
|
"rewards/margins": 0.374000608921051, |
|
"rewards/rejected": -0.49649983644485474, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.482546849255096e-06, |
|
"logits/chosen": -1.2877978086471558, |
|
"logits/rejected": -0.628576397895813, |
|
"logps/chosen": -799.4703369140625, |
|
"logps/rejected": -1640.319580078125, |
|
"loss": 0.1346, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15292036533355713, |
|
"rewards/margins": 0.2804059386253357, |
|
"rewards/rejected": -0.4333263039588928, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4592774518353858e-06, |
|
"logits/chosen": -1.4938738346099854, |
|
"logits/rejected": -0.6422006487846375, |
|
"logps/chosen": -752.1285400390625, |
|
"logps/rejected": -1773.2620849609375, |
|
"loss": 0.1651, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16756217181682587, |
|
"rewards/margins": 0.3254481554031372, |
|
"rewards/rejected": -0.49301037192344666, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.436011582865945e-06, |
|
"logits/chosen": -1.5365828275680542, |
|
"logits/rejected": -0.6787657141685486, |
|
"logps/chosen": -893.0496215820312, |
|
"logps/rejected": -1954.225341796875, |
|
"loss": 0.1604, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.2155316323041916, |
|
"rewards/margins": 0.3428398668766022, |
|
"rewards/rejected": -0.5583714842796326, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"logits/chosen": -1.1174455881118774, |
|
"logits/rejected": 0.26067572832107544, |
|
"logps/chosen": -777.9120483398438, |
|
"logps/rejected": -1707.302001953125, |
|
"loss": 0.1845, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.17711251974105835, |
|
"rewards/margins": 0.2716800570487976, |
|
"rewards/rejected": -0.44879254698753357, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3894984933853734e-06, |
|
"logits/chosen": -1.5128055810928345, |
|
"logits/rejected": -0.5743480920791626, |
|
"logps/chosen": -856.6569213867188, |
|
"logps/rejected": -1764.4976806640625, |
|
"loss": 0.1695, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1948443353176117, |
|
"rewards/margins": 0.25961554050445557, |
|
"rewards/rejected": -0.4544598460197449, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.366255303052377e-06, |
|
"logits/chosen": -1.5328229665756226, |
|
"logits/rejected": -0.41585612297058105, |
|
"logps/chosen": -865.1114501953125, |
|
"logps/rejected": -1857.373046875, |
|
"loss": 0.1726, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16477537155151367, |
|
"rewards/margins": 0.2972187399864197, |
|
"rewards/rejected": -0.46199408173561096, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits/chosen": -1.5353227853775024, |
|
"logits/rejected": 0.3234299123287201, |
|
"logps/chosen": -791.5120239257812, |
|
"logps/rejected": -2002.2496337890625, |
|
"loss": 0.1074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15296730399131775, |
|
"rewards/margins": 0.41708940267562866, |
|
"rewards/rejected": -0.5700567960739136, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.319805700686257e-06, |
|
"logits/chosen": -1.2240071296691895, |
|
"logits/rejected": -0.7209154963493347, |
|
"logps/chosen": -790.4075927734375, |
|
"logps/rejected": -1767.944580078125, |
|
"loss": 0.1377, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12179949134588242, |
|
"rewards/margins": 0.326397180557251, |
|
"rewards/rejected": -0.4481966495513916, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.296603313330355e-06, |
|
"logits/chosen": -1.6259534358978271, |
|
"logits/rejected": -0.29775649309158325, |
|
"logps/chosen": -746.3842163085938, |
|
"logps/rejected": -1520.571533203125, |
|
"loss": 0.1711, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.13973134756088257, |
|
"rewards/margins": 0.26066604256629944, |
|
"rewards/rejected": -0.400397390127182, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2734185495055503e-06, |
|
"logits/chosen": -1.2232530117034912, |
|
"logits/rejected": -0.18962112069129944, |
|
"logps/chosen": -576.0113525390625, |
|
"logps/rejected": -1693.240966796875, |
|
"loss": 0.1384, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13496878743171692, |
|
"rewards/margins": 0.3316168785095215, |
|
"rewards/rejected": -0.466585636138916, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.250253418081373e-06, |
|
"logits/chosen": -1.4692437648773193, |
|
"logits/rejected": 0.04818035289645195, |
|
"logps/chosen": -899.2703857421875, |
|
"logps/rejected": -1808.1484375, |
|
"loss": 0.2009, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.21512703597545624, |
|
"rewards/margins": 0.30080264806747437, |
|
"rewards/rejected": -0.5159296989440918, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.22710992622628e-06, |
|
"logits/chosen": -1.5507208108901978, |
|
"logits/rejected": 0.1027313843369484, |
|
"logps/chosen": -805.1966552734375, |
|
"logps/rejected": -1811.1793212890625, |
|
"loss": 0.1454, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13587817549705505, |
|
"rewards/margins": 0.39650699496269226, |
|
"rewards/rejected": -0.5323852300643921, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2039900792337477e-06, |
|
"logits/chosen": -1.395817756652832, |
|
"logits/rejected": -0.45912352204322815, |
|
"logps/chosen": -768.6142578125, |
|
"logps/rejected": -1828.7073974609375, |
|
"loss": 0.1529, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1312076300382614, |
|
"rewards/margins": 0.3623288571834564, |
|
"rewards/rejected": -0.4935365319252014, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1808958803485134e-06, |
|
"logits/chosen": -1.5901384353637695, |
|
"logits/rejected": -0.8347817659378052, |
|
"logps/chosen": -523.8443603515625, |
|
"logps/rejected": -1578.4923095703125, |
|
"loss": 0.1123, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07345361262559891, |
|
"rewards/margins": 0.34433066844940186, |
|
"rewards/rejected": -0.41778427362442017, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.157829330593008e-06, |
|
"logits/chosen": -1.5194613933563232, |
|
"logits/rejected": 0.044505536556243896, |
|
"logps/chosen": -846.5181884765625, |
|
"logps/rejected": -1842.278564453125, |
|
"loss": 0.1109, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.18009065091609955, |
|
"rewards/margins": 0.37484800815582275, |
|
"rewards/rejected": -0.5549386739730835, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.134792428593971e-06, |
|
"logits/chosen": -1.517464280128479, |
|
"logits/rejected": -0.3259011209011078, |
|
"logps/chosen": -696.5281982421875, |
|
"logps/rejected": -1474.2158203125, |
|
"loss": 0.1964, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.13613803684711456, |
|
"rewards/margins": 0.25182804465293884, |
|
"rewards/rejected": -0.3879660964012146, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1117871704092818e-06, |
|
"logits/chosen": -1.5486299991607666, |
|
"logits/rejected": -0.5334731936454773, |
|
"logps/chosen": -767.1392822265625, |
|
"logps/rejected": -1842.3082275390625, |
|
"loss": 0.121, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1175505518913269, |
|
"rewards/margins": 0.3775568902492523, |
|
"rewards/rejected": -0.4951074719429016, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0888155493550027e-06, |
|
"logits/chosen": -1.5350594520568848, |
|
"logits/rejected": -0.2820148468017578, |
|
"logps/chosen": -626.4788208007812, |
|
"logps/rejected": -1641.033447265625, |
|
"loss": 0.1776, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.07961525768041611, |
|
"rewards/margins": 0.3609825670719147, |
|
"rewards/rejected": -0.4405978322029114, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -1.458655595779419, |
|
"logits/rejected": -0.2539186477661133, |
|
"logps/chosen": -764.8986206054688, |
|
"logps/rejected": -1802.0277099609375, |
|
"loss": 0.1842, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09169100970029831, |
|
"rewards/margins": 0.3511391580104828, |
|
"rewards/rejected": -0.4428301751613617, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0429811771568468e-06, |
|
"logits/chosen": -1.5172240734100342, |
|
"logits/rejected": -0.4895601272583008, |
|
"logps/chosen": -801.1700439453125, |
|
"logps/rejected": -1634.53955078125, |
|
"loss": 0.165, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1120598167181015, |
|
"rewards/margins": 0.27336305379867554, |
|
"rewards/rejected": -0.38542285561561584, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0201223973828917e-06, |
|
"logits/chosen": -1.627150535583496, |
|
"logits/rejected": -0.6142350435256958, |
|
"logps/chosen": -660.062744140625, |
|
"logps/rejected": -1752.922119140625, |
|
"loss": 0.1921, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.08810608088970184, |
|
"rewards/margins": 0.36186718940734863, |
|
"rewards/rejected": -0.44997328519821167, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.997305197135089e-06, |
|
"logits/chosen": -1.5025413036346436, |
|
"logits/rejected": -0.59123295545578, |
|
"logps/chosen": -817.2398681640625, |
|
"logps/rejected": -1722.5006103515625, |
|
"loss": 0.1627, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10656937211751938, |
|
"rewards/margins": 0.2904852032661438, |
|
"rewards/rejected": -0.3970545828342438, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9745315534350157e-06, |
|
"logits/chosen": -1.5288734436035156, |
|
"logits/rejected": -0.7481527328491211, |
|
"logps/chosen": -627.6370849609375, |
|
"logps/rejected": -1676.7398681640625, |
|
"loss": 0.1255, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09700825065374374, |
|
"rewards/margins": 0.31658482551574707, |
|
"rewards/rejected": -0.4135931134223938, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9518034395302413e-06, |
|
"logits/chosen": -1.2082513570785522, |
|
"logits/rejected": -0.5326002836227417, |
|
"logps/chosen": -606.5856323242188, |
|
"logps/rejected": -1724.590087890625, |
|
"loss": 0.1711, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08526596426963806, |
|
"rewards/margins": 0.3748168349266052, |
|
"rewards/rejected": -0.4600828289985657, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9291228247233607e-06, |
|
"logits/chosen": -1.349005103111267, |
|
"logits/rejected": -0.7115123867988586, |
|
"logps/chosen": -850.7883911132812, |
|
"logps/rejected": -1918.0609130859375, |
|
"loss": 0.1049, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1696614772081375, |
|
"rewards/margins": 0.3336087763309479, |
|
"rewards/rejected": -0.5032702684402466, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9064916742013515e-06, |
|
"logits/chosen": -1.4064449071884155, |
|
"logits/rejected": -0.41218453645706177, |
|
"logps/chosen": -922.0281372070312, |
|
"logps/rejected": -1926.2939453125, |
|
"loss": 0.1868, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.20819124579429626, |
|
"rewards/margins": 0.350273996591568, |
|
"rewards/rejected": -0.558465301990509, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.883911948865306e-06, |
|
"logits/chosen": -1.4412527084350586, |
|
"logits/rejected": -0.3628607392311096, |
|
"logps/chosen": -686.2496948242188, |
|
"logps/rejected": -1755.6363525390625, |
|
"loss": 0.1658, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1295745074748993, |
|
"rewards/margins": 0.372491717338562, |
|
"rewards/rejected": -0.5020662546157837, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8613856051605242e-06, |
|
"logits/chosen": -1.6775572299957275, |
|
"logits/rejected": -0.6549306511878967, |
|
"logps/chosen": -597.9432373046875, |
|
"logps/rejected": -1609.9539794921875, |
|
"loss": 0.137, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.10794492810964584, |
|
"rewards/margins": 0.3688591420650482, |
|
"rewards/rejected": -0.47680407762527466, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8389145949069953e-06, |
|
"logits/chosen": -1.382621169090271, |
|
"logits/rejected": -0.4452172815799713, |
|
"logps/chosen": -683.6560668945312, |
|
"logps/rejected": -1507.298095703125, |
|
"loss": 0.2337, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.110257588326931, |
|
"rewards/margins": 0.25430378317832947, |
|
"rewards/rejected": -0.36456140875816345, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.816500865130279e-06, |
|
"logits/chosen": -1.467057228088379, |
|
"logits/rejected": -0.31321266293525696, |
|
"logps/chosen": -672.0931396484375, |
|
"logps/rejected": -1508.775634765625, |
|
"loss": 0.1942, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09393011033535004, |
|
"rewards/margins": 0.24138036370277405, |
|
"rewards/rejected": -0.33531051874160767, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7941463578928088e-06, |
|
"logits/chosen": -1.6495239734649658, |
|
"logits/rejected": -0.6112552285194397, |
|
"logps/chosen": -657.7040405273438, |
|
"logps/rejected": -1789.280029296875, |
|
"loss": 0.149, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.07512945681810379, |
|
"rewards/margins": 0.32172054052352905, |
|
"rewards/rejected": -0.39685001969337463, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7718530101256115e-06, |
|
"logits/chosen": -1.4681954383850098, |
|
"logits/rejected": -0.752936840057373, |
|
"logps/chosen": -775.773193359375, |
|
"logps/rejected": -1576.1943359375, |
|
"loss": 0.2381, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12784716486930847, |
|
"rewards/margins": 0.20212575793266296, |
|
"rewards/rejected": -0.32997292280197144, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7496227534604859e-06, |
|
"logits/chosen": -1.6714906692504883, |
|
"logits/rejected": -1.0251556634902954, |
|
"logps/chosen": -675.6273803710938, |
|
"logps/rejected": -1630.176025390625, |
|
"loss": 0.1692, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.0969114676117897, |
|
"rewards/margins": 0.361731618642807, |
|
"rewards/rejected": -0.4586430490016937, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -1.5120388269424438, |
|
"logits/rejected": -0.5438052415847778, |
|
"logps/chosen": -692.0360717773438, |
|
"logps/rejected": -1721.4326171875, |
|
"loss": 0.1631, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11012852191925049, |
|
"rewards/margins": 0.3708108365535736, |
|
"rewards/rejected": -0.4809393882751465, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7053592124637557e-06, |
|
"logits/chosen": -1.6181414127349854, |
|
"logits/rejected": -0.6608393788337708, |
|
"logps/chosen": -747.2515869140625, |
|
"logps/rejected": -1643.6500244140625, |
|
"loss": 0.2218, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13988645374774933, |
|
"rewards/margins": 0.2650088965892792, |
|
"rewards/rejected": -0.4048953652381897, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6833297633956647e-06, |
|
"logits/chosen": -1.3472005128860474, |
|
"logits/rejected": 0.12191818654537201, |
|
"logps/chosen": -693.1132202148438, |
|
"logps/rejected": -1696.821533203125, |
|
"loss": 0.1063, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07493006438016891, |
|
"rewards/margins": 0.32017913460731506, |
|
"rewards/rejected": -0.3951091766357422, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.661371075624363e-06, |
|
"logits/chosen": -1.4592828750610352, |
|
"logits/rejected": -0.45236214995384216, |
|
"logps/chosen": -696.2838745117188, |
|
"logps/rejected": -1729.255126953125, |
|
"loss": 0.1603, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.061267025768756866, |
|
"rewards/margins": 0.35353952646255493, |
|
"rewards/rejected": -0.4148065447807312, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6394850517846621e-06, |
|
"logits/chosen": -1.6494789123535156, |
|
"logits/rejected": -0.9741545915603638, |
|
"logps/chosen": -759.4490966796875, |
|
"logps/rejected": -1714.441162109375, |
|
"loss": 0.1556, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10241154581308365, |
|
"rewards/margins": 0.3450758159160614, |
|
"rewards/rejected": -0.44748735427856445, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6176735882153284e-06, |
|
"logits/chosen": -1.468384027481079, |
|
"logits/rejected": -0.16317103803157806, |
|
"logps/chosen": -605.1905517578125, |
|
"logps/rejected": -1460.53955078125, |
|
"loss": 0.1345, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0676204189658165, |
|
"rewards/margins": 0.30662956833839417, |
|
"rewards/rejected": -0.37424999475479126, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5959385747947697e-06, |
|
"logits/chosen": -1.4594337940216064, |
|
"logits/rejected": -0.8179009556770325, |
|
"logps/chosen": -606.8065185546875, |
|
"logps/rejected": -1657.3125, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06961175799369812, |
|
"rewards/margins": 0.3652622103691101, |
|
"rewards/rejected": -0.4348739683628082, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5742818947772875e-06, |
|
"logits/chosen": -1.5698840618133545, |
|
"logits/rejected": 0.06299029290676117, |
|
"logps/chosen": -768.6961669921875, |
|
"logps/rejected": -1631.1295166015625, |
|
"loss": 0.137, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09274804592132568, |
|
"rewards/margins": 0.27431541681289673, |
|
"rewards/rejected": -0.3670634627342224, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.552705424629898e-06, |
|
"logits/chosen": -1.4378259181976318, |
|
"logits/rejected": -0.6328508257865906, |
|
"logps/chosen": -757.0548095703125, |
|
"logps/rejected": -1625.8509521484375, |
|
"loss": 0.1496, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09750431030988693, |
|
"rewards/margins": 0.25974932312965393, |
|
"rewards/rejected": -0.35725364089012146, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5312110338697427e-06, |
|
"logits/chosen": -1.429025411605835, |
|
"logits/rejected": -1.0502439737319946, |
|
"logps/chosen": -620.97998046875, |
|
"logps/rejected": -1421.07470703125, |
|
"loss": 0.1467, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0735950618982315, |
|
"rewards/margins": 0.2558991312980652, |
|
"rewards/rejected": -0.3294941782951355, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.509800584902108e-06, |
|
"logits/chosen": -1.2865421772003174, |
|
"logits/rejected": -0.21516099572181702, |
|
"logps/chosen": -712.9671630859375, |
|
"logps/rejected": -1577.719970703125, |
|
"loss": 0.1254, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08029041439294815, |
|
"rewards/margins": 0.2842417359352112, |
|
"rewards/rejected": -0.36453211307525635, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4884759328590476e-06, |
|
"logits/chosen": -1.7966272830963135, |
|
"logits/rejected": -0.5356184244155884, |
|
"logps/chosen": -774.8032836914062, |
|
"logps/rejected": -1728.052490234375, |
|
"loss": 0.1569, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.13011132180690765, |
|
"rewards/margins": 0.38214725255966187, |
|
"rewards/rejected": -0.5122585892677307, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.467238925438646e-06, |
|
"logits/chosen": -1.6186519861221313, |
|
"logits/rejected": 0.41130122542381287, |
|
"logps/chosen": -872.7478637695312, |
|
"logps/rejected": -1909.5640869140625, |
|
"loss": 0.148, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1565207540988922, |
|
"rewards/margins": 0.3737161159515381, |
|
"rewards/rejected": -0.5302368402481079, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.446091402744923e-06, |
|
"logits/chosen": -1.2679738998413086, |
|
"logits/rejected": -0.5303937196731567, |
|
"logps/chosen": -702.9267578125, |
|
"logps/rejected": -1642.1829833984375, |
|
"loss": 0.1461, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1064765453338623, |
|
"rewards/margins": 0.34039077162742615, |
|
"rewards/rejected": -0.44686728715896606, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4250351971283937e-06, |
|
"logits/chosen": -1.604107141494751, |
|
"logits/rejected": 0.4407750070095062, |
|
"logps/chosen": -770.2025146484375, |
|
"logps/rejected": -1616.8951416015625, |
|
"loss": 0.1425, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11423502117395401, |
|
"rewards/margins": 0.29226523637771606, |
|
"rewards/rejected": -0.4065002501010895, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"logits/chosen": -1.4690310955047607, |
|
"logits/rejected": 0.40654927492141724, |
|
"logps/chosen": -752.0418090820312, |
|
"logps/rejected": -1654.997802734375, |
|
"loss": 0.1918, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11529115587472916, |
|
"rewards/margins": 0.2776246964931488, |
|
"rewards/rejected": -0.392915815114975, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3832040268095589e-06, |
|
"logits/chosen": -1.2964470386505127, |
|
"logits/rejected": -0.35982269048690796, |
|
"logps/chosen": -707.73681640625, |
|
"logps/rejected": -1798.7490234375, |
|
"loss": 0.1272, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.09786094725131989, |
|
"rewards/margins": 0.3779224455356598, |
|
"rewards/rejected": -0.4757833480834961, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.362432686615316e-06, |
|
"logits/chosen": -1.4789488315582275, |
|
"logits/rejected": -0.4336570203304291, |
|
"logps/chosen": -560.9461669921875, |
|
"logps/rejected": -1716.0234375, |
|
"loss": 0.164, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.058935634791851044, |
|
"rewards/margins": 0.3738183081150055, |
|
"rewards/rejected": -0.43275389075279236, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3417599122003464e-06, |
|
"logits/chosen": -1.5111268758773804, |
|
"logits/rejected": -0.40306025743484497, |
|
"logps/chosen": -659.0839233398438, |
|
"logps/rejected": -1699.853515625, |
|
"loss": 0.1041, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08692941069602966, |
|
"rewards/margins": 0.35781151056289673, |
|
"rewards/rejected": -0.4447408616542816, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3211874947800747e-06, |
|
"logits/chosen": -1.552757978439331, |
|
"logits/rejected": -0.733113169670105, |
|
"logps/chosen": -668.4918212890625, |
|
"logps/rejected": -1597.004150390625, |
|
"loss": 0.1547, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.09527282416820526, |
|
"rewards/margins": 0.2590247094631195, |
|
"rewards/rejected": -0.35429757833480835, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3007172168743854e-06, |
|
"logits/chosen": -1.4352308511734009, |
|
"logits/rejected": -0.04908560588955879, |
|
"logps/chosen": -665.5040893554688, |
|
"logps/rejected": -1593.777587890625, |
|
"loss": 0.1445, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.08846768736839294, |
|
"rewards/margins": 0.32587721943855286, |
|
"rewards/rejected": -0.4143448770046234, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.280350852153168e-06, |
|
"logits/chosen": -1.3806612491607666, |
|
"logits/rejected": -0.20799100399017334, |
|
"logps/chosen": -762.9417724609375, |
|
"logps/rejected": -1701.4697265625, |
|
"loss": 0.148, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15475930273532867, |
|
"rewards/margins": 0.29663509130477905, |
|
"rewards/rejected": -0.45139437913894653, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.260090165282645e-06, |
|
"logits/chosen": -1.4364516735076904, |
|
"logits/rejected": 0.2683382034301758, |
|
"logps/chosen": -701.4534912109375, |
|
"logps/rejected": -1679.4847412109375, |
|
"loss": 0.1688, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.19048824906349182, |
|
"rewards/margins": 0.27079007029533386, |
|
"rewards/rejected": -0.46127834916114807, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2399369117724582e-06, |
|
"logits/chosen": -1.4400079250335693, |
|
"logits/rejected": -0.4409395158290863, |
|
"logps/chosen": -797.1912841796875, |
|
"logps/rejected": -1693.439453125, |
|
"loss": 0.1558, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12387750297784805, |
|
"rewards/margins": 0.3022800087928772, |
|
"rewards/rejected": -0.42615753412246704, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2198928378235717e-06, |
|
"logits/chosen": -1.5424561500549316, |
|
"logits/rejected": 0.6223300695419312, |
|
"logps/chosen": -694.9400024414062, |
|
"logps/rejected": -1691.6669921875, |
|
"loss": 0.1657, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0836716815829277, |
|
"rewards/margins": 0.3352142870426178, |
|
"rewards/rejected": -0.4188859462738037, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1999596801769617e-06, |
|
"logits/chosen": -1.6569328308105469, |
|
"logits/rejected": -0.4357103407382965, |
|
"logps/chosen": -693.4281005859375, |
|
"logps/rejected": -1608.7647705078125, |
|
"loss": 0.1473, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.08369255810976028, |
|
"rewards/margins": 0.2921767234802246, |
|
"rewards/rejected": -0.3758693337440491, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1801391659631423e-06, |
|
"logits/chosen": -1.5653746128082275, |
|
"logits/rejected": 0.7312607169151306, |
|
"logps/chosen": -660.3027954101562, |
|
"logps/rejected": -1588.2919921875, |
|
"loss": 0.1214, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06633373349905014, |
|
"rewards/margins": 0.30183035135269165, |
|
"rewards/rejected": -0.3681640923023224, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.160433012552508e-06, |
|
"logits/chosen": -1.5090980529785156, |
|
"logits/rejected": -0.8780859112739563, |
|
"logps/chosen": -695.6319580078125, |
|
"logps/rejected": -1500.2325439453125, |
|
"loss": 0.1882, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.07338380068540573, |
|
"rewards/margins": 0.2535490393638611, |
|
"rewards/rejected": -0.326932817697525, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1408429274065418e-06, |
|
"logits/chosen": -1.3488575220108032, |
|
"logits/rejected": -0.6483387351036072, |
|
"logps/chosen": -572.9019165039062, |
|
"logps/rejected": -1599.5225830078125, |
|
"loss": 0.1575, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07061342149972916, |
|
"rewards/margins": 0.2944713532924652, |
|
"rewards/rejected": -0.36508476734161377, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1213706079298566e-06, |
|
"logits/chosen": -1.4909251928329468, |
|
"logits/rejected": -0.27080297470092773, |
|
"logps/chosen": -564.2769165039062, |
|
"logps/rejected": -1579.8958740234375, |
|
"loss": 0.1631, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.053073953837156296, |
|
"rewards/margins": 0.32997792959213257, |
|
"rewards/rejected": -0.38305193185806274, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"logits/chosen": -1.5161458253860474, |
|
"logits/rejected": -0.2539205551147461, |
|
"logps/chosen": -698.4984741210938, |
|
"logps/rejected": -1621.921630859375, |
|
"loss": 0.1693, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09174446761608124, |
|
"rewards/margins": 0.29953649640083313, |
|
"rewards/rejected": -0.3912809491157532, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0827860044369226e-06, |
|
"logits/chosen": -1.5986204147338867, |
|
"logits/rejected": -0.2638501226902008, |
|
"logps/chosen": -766.909423828125, |
|
"logps/rejected": -1732.082275390625, |
|
"loss": 0.1602, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12383983284235, |
|
"rewards/margins": 0.32002753019332886, |
|
"rewards/rejected": -0.44386744499206543, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.06367706362636e-06, |
|
"logits/chosen": -1.4342091083526611, |
|
"logits/rejected": -0.8792727589607239, |
|
"logps/chosen": -737.6398315429688, |
|
"logps/rejected": -1701.187255859375, |
|
"loss": 0.1727, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1248563751578331, |
|
"rewards/margins": 0.2940226197242737, |
|
"rewards/rejected": -0.41887903213500977, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0446925746067768e-06, |
|
"logits/chosen": -1.3923488855361938, |
|
"logits/rejected": -0.20048478245735168, |
|
"logps/chosen": -803.1317138671875, |
|
"logps/rejected": -1872.0126953125, |
|
"loss": 0.1227, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1411004215478897, |
|
"rewards/margins": 0.36530831456184387, |
|
"rewards/rejected": -0.5064087510108948, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0258341823102418e-06, |
|
"logits/chosen": -1.5749719142913818, |
|
"logits/rejected": -0.40622204542160034, |
|
"logps/chosen": -757.8853759765625, |
|
"logps/rejected": -1855.6015625, |
|
"loss": 0.1441, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1515471190214157, |
|
"rewards/margins": 0.4066466689109802, |
|
"rewards/rejected": -0.5581938624382019, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0071035207430352e-06, |
|
"logits/chosen": -1.6754239797592163, |
|
"logits/rejected": -0.716029167175293, |
|
"logps/chosen": -787.7699584960938, |
|
"logps/rejected": -1843.915283203125, |
|
"loss": 0.1577, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1490515172481537, |
|
"rewards/margins": 0.3542669713497162, |
|
"rewards/rejected": -0.5033184885978699, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.88502212844063e-07, |
|
"logits/chosen": -1.3399386405944824, |
|
"logits/rejected": -0.6510659456253052, |
|
"logps/chosen": -645.7489013671875, |
|
"logps/rejected": -1645.0814208984375, |
|
"loss": 0.1528, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12475794553756714, |
|
"rewards/margins": 0.3248399794101715, |
|
"rewards/rejected": -0.44959789514541626, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.700318703442437e-07, |
|
"logits/chosen": -1.5086935758590698, |
|
"logits/rejected": -0.44206541776657104, |
|
"logps/chosen": -751.8470458984375, |
|
"logps/rejected": -1835.744873046875, |
|
"loss": 0.1461, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0970626249909401, |
|
"rewards/margins": 0.4153032898902893, |
|
"rewards/rejected": -0.5123659372329712, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.516940936268504e-07, |
|
"logits/chosen": -1.4146547317504883, |
|
"logits/rejected": -0.39967912435531616, |
|
"logps/chosen": -655.8258666992188, |
|
"logps/rejected": -1597.619873046875, |
|
"loss": 0.1199, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.10331164300441742, |
|
"rewards/margins": 0.29826727509498596, |
|
"rewards/rejected": -0.4015789031982422, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.334904715888496e-07, |
|
"logits/chosen": -1.5796334743499756, |
|
"logits/rejected": -0.18904080986976624, |
|
"logps/chosen": -649.2426147460938, |
|
"logps/rejected": -1707.6396484375, |
|
"loss": 0.1682, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08159293234348297, |
|
"rewards/margins": 0.3529675602912903, |
|
"rewards/rejected": -0.43456047773361206, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.154225815032242e-07, |
|
"logits/chosen": -1.5274991989135742, |
|
"logits/rejected": -0.7836966514587402, |
|
"logps/chosen": -610.8500366210938, |
|
"logps/rejected": -1656.6763916015625, |
|
"loss": 0.1472, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.06673729419708252, |
|
"rewards/margins": 0.36455440521240234, |
|
"rewards/rejected": -0.4312916696071625, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.974919888823164e-07, |
|
"logits/chosen": -1.4849748611450195, |
|
"logits/rejected": -0.4282095432281494, |
|
"logps/chosen": -743.9561767578125, |
|
"logps/rejected": -1814.4990234375, |
|
"loss": 0.1699, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.09201905876398087, |
|
"rewards/margins": 0.36516329646110535, |
|
"rewards/rejected": -0.4571823179721832, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.797002473421729e-07, |
|
"logits/chosen": -1.4566679000854492, |
|
"logits/rejected": -0.5152000188827515, |
|
"logps/chosen": -523.7813720703125, |
|
"logps/rejected": -1502.1715087890625, |
|
"loss": 0.1961, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.058007530868053436, |
|
"rewards/margins": 0.3089088201522827, |
|
"rewards/rejected": -0.36691635847091675, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.620488984679378e-07, |
|
"logits/chosen": -1.7398220300674438, |
|
"logits/rejected": -0.2614799737930298, |
|
"logps/chosen": -641.33447265625, |
|
"logps/rejected": -1628.4310302734375, |
|
"loss": 0.1416, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.05575539544224739, |
|
"rewards/margins": 0.34235674142837524, |
|
"rewards/rejected": -0.39811214804649353, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.445394716802754e-07, |
|
"logits/chosen": -1.5343586206436157, |
|
"logits/rejected": -0.6991978883743286, |
|
"logps/chosen": -695.736328125, |
|
"logps/rejected": -1697.3258056640625, |
|
"loss": 0.17, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0863880068063736, |
|
"rewards/margins": 0.3482551872730255, |
|
"rewards/rejected": -0.4346431791782379, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": -1.3757431507110596, |
|
"logits/rejected": -0.4847453236579895, |
|
"logps/chosen": -624.9798583984375, |
|
"logps/rejected": -1600.3311767578125, |
|
"loss": 0.1704, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07361427694559097, |
|
"rewards/margins": 0.3176981210708618, |
|
"rewards/rejected": -0.3913124203681946, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.099524404308948e-07, |
|
"logits/chosen": -1.463273525238037, |
|
"logits/rejected": -0.44319063425064087, |
|
"logps/chosen": -733.7213745117188, |
|
"logps/rejected": -1909.9351806640625, |
|
"loss": 0.1014, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1007709726691246, |
|
"rewards/margins": 0.38669538497924805, |
|
"rewards/rejected": -0.487466424703598, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.928778328007918e-07, |
|
"logits/chosen": -1.7150824069976807, |
|
"logits/rejected": -0.10497765243053436, |
|
"logps/chosen": -676.3504028320312, |
|
"logps/rejected": -1732.474609375, |
|
"loss": 0.1293, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07949225604534149, |
|
"rewards/margins": 0.3401327431201935, |
|
"rewards/rejected": -0.4196249842643738, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.759511406608255e-07, |
|
"logits/chosen": -1.4279053211212158, |
|
"logits/rejected": -0.10710684955120087, |
|
"logps/chosen": -716.6738891601562, |
|
"logps/rejected": -1643.4683837890625, |
|
"loss": 0.1605, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09047757089138031, |
|
"rewards/margins": 0.2613658308982849, |
|
"rewards/rejected": -0.35184338688850403, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.591738306429769e-07, |
|
"logits/chosen": -1.4157854318618774, |
|
"logits/rejected": -0.362983763217926, |
|
"logps/chosen": -813.336181640625, |
|
"logps/rejected": -1738.5064697265625, |
|
"loss": 0.1146, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08977197110652924, |
|
"rewards/margins": 0.333584189414978, |
|
"rewards/rejected": -0.42335623502731323, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.425473564358457e-07, |
|
"logits/chosen": -1.2446939945220947, |
|
"logits/rejected": -0.24595150351524353, |
|
"logps/chosen": -870.3673706054688, |
|
"logps/rejected": -1808.233642578125, |
|
"loss": 0.2744, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14074157178401947, |
|
"rewards/margins": 0.33063870668411255, |
|
"rewards/rejected": -0.4713803231716156, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.260731586586983e-07, |
|
"logits/chosen": -1.6176745891571045, |
|
"logits/rejected": -0.5913767218589783, |
|
"logps/chosen": -506.3935546875, |
|
"logps/rejected": -1553.77099609375, |
|
"loss": 0.1245, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.04700837656855583, |
|
"rewards/margins": 0.33918124437332153, |
|
"rewards/rejected": -0.38618963956832886, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.097526647366379e-07, |
|
"logits/chosen": -1.7674744129180908, |
|
"logits/rejected": -0.6992005705833435, |
|
"logps/chosen": -651.4013061523438, |
|
"logps/rejected": -1570.8125, |
|
"loss": 0.1434, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.058572918176651, |
|
"rewards/margins": 0.3027827739715576, |
|
"rewards/rejected": -0.3613556921482086, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.935872887769299e-07, |
|
"logits/chosen": -1.6404222249984741, |
|
"logits/rejected": -0.85943204164505, |
|
"logps/chosen": -782.1417846679688, |
|
"logps/rejected": -1594.085205078125, |
|
"loss": 0.2041, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0960140973329544, |
|
"rewards/margins": 0.2894643545150757, |
|
"rewards/rejected": -0.3854784369468689, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.775784314464717e-07, |
|
"logits/chosen": -1.4033766984939575, |
|
"logits/rejected": -0.9920269846916199, |
|
"logps/chosen": -629.6160888671875, |
|
"logps/rejected": -1653.112548828125, |
|
"loss": 0.1683, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.07426479458808899, |
|
"rewards/margins": 0.29479536414146423, |
|
"rewards/rejected": -0.36906009912490845, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.617274798504286e-07, |
|
"logits/chosen": -1.6468874216079712, |
|
"logits/rejected": -0.37529969215393066, |
|
"logps/chosen": -776.8147583007812, |
|
"logps/rejected": -1830.4251708984375, |
|
"loss": 0.1382, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07489114999771118, |
|
"rewards/margins": 0.3607185184955597, |
|
"rewards/rejected": -0.4356096684932709, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.460358074120518e-07, |
|
"logits/chosen": -1.5551801919937134, |
|
"logits/rejected": -0.9461766481399536, |
|
"logps/chosen": -714.2229614257812, |
|
"logps/rejected": -1528.100341796875, |
|
"loss": 0.1998, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06713708490133286, |
|
"rewards/margins": 0.25901907682418823, |
|
"rewards/rejected": -0.3261561691761017, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.305047737536707e-07, |
|
"logits/chosen": -1.5757367610931396, |
|
"logits/rejected": -1.0922707319259644, |
|
"logps/chosen": -647.8770751953125, |
|
"logps/rejected": -1738.0703125, |
|
"loss": 0.1411, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06151123717427254, |
|
"rewards/margins": 0.37969422340393066, |
|
"rewards/rejected": -0.4412055015563965, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.151357245788917e-07, |
|
"logits/chosen": -1.52052903175354, |
|
"logits/rejected": -0.5594236850738525, |
|
"logps/chosen": -708.1317138671875, |
|
"logps/rejected": -1907.6968994140625, |
|
"loss": 0.1327, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.08830620348453522, |
|
"rewards/margins": 0.3933480679988861, |
|
"rewards/rejected": -0.4816543161869049, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.999299915559956e-07, |
|
"logits/chosen": -1.422783374786377, |
|
"logits/rejected": -0.7664706110954285, |
|
"logps/chosen": -648.51171875, |
|
"logps/rejected": -1761.1148681640625, |
|
"loss": 0.1385, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07798387855291367, |
|
"rewards/margins": 0.35578176379203796, |
|
"rewards/rejected": -0.4337656497955322, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -1.4832850694656372, |
|
"logits/rejected": -0.704194962978363, |
|
"logps/chosen": -635.0929565429688, |
|
"logps/rejected": -1439.96630859375, |
|
"loss": 0.2039, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.09270008653402328, |
|
"rewards/margins": 0.2593896985054016, |
|
"rewards/rejected": -0.3520897328853607, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.700137297712749e-07, |
|
"logits/chosen": -1.560736894607544, |
|
"logits/rejected": -0.25968214869499207, |
|
"logps/chosen": -725.5360107421875, |
|
"logps/rejected": -1755.325927734375, |
|
"loss": 0.1363, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08439795672893524, |
|
"rewards/margins": 0.36337295174598694, |
|
"rewards/rejected": -0.4477709233760834, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.553057931370729e-07, |
|
"logits/chosen": -1.666855812072754, |
|
"logits/rejected": -0.44792652130126953, |
|
"logps/chosen": -635.80419921875, |
|
"logps/rejected": -1666.586669921875, |
|
"loss": 0.1712, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07980332523584366, |
|
"rewards/margins": 0.3341625928878784, |
|
"rewards/rejected": -0.41396594047546387, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.407663566854008e-07, |
|
"logits/chosen": -1.5879957675933838, |
|
"logits/rejected": -0.9095015525817871, |
|
"logps/chosen": -787.1989135742188, |
|
"logps/rejected": -1740.4361572265625, |
|
"loss": 0.1449, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08927465230226517, |
|
"rewards/margins": 0.3183870315551758, |
|
"rewards/rejected": -0.4076617360115051, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.263966802018275e-07, |
|
"logits/chosen": -1.6660913228988647, |
|
"logits/rejected": -0.7353159785270691, |
|
"logps/chosen": -805.4395751953125, |
|
"logps/rejected": -1653.5638427734375, |
|
"loss": 0.1664, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08214981853961945, |
|
"rewards/margins": 0.31050822138786316, |
|
"rewards/rejected": -0.3926580250263214, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.121980087628802e-07, |
|
"logits/chosen": -1.63559091091156, |
|
"logits/rejected": -0.620925784111023, |
|
"logps/chosen": -642.1688232421875, |
|
"logps/rejected": -1510.009521484375, |
|
"loss": 0.186, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.04546310380101204, |
|
"rewards/margins": 0.30619844794273376, |
|
"rewards/rejected": -0.3516615927219391, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.981715726281666e-07, |
|
"logits/chosen": -1.423666000366211, |
|
"logits/rejected": -0.12835507094860077, |
|
"logps/chosen": -631.965576171875, |
|
"logps/rejected": -1562.4158935546875, |
|
"loss": 0.2021, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06688202172517776, |
|
"rewards/margins": 0.2849853038787842, |
|
"rewards/rejected": -0.35186734795570374, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.843185871337722e-07, |
|
"logits/chosen": -1.3471550941467285, |
|
"logits/rejected": -0.5594549179077148, |
|
"logps/chosen": -620.7034301757812, |
|
"logps/rejected": -1736.4898681640625, |
|
"loss": 0.1361, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.05083579942584038, |
|
"rewards/margins": 0.33074623346328735, |
|
"rewards/rejected": -0.3815820515155792, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.706402525869633e-07, |
|
"logits/chosen": -1.2812891006469727, |
|
"logits/rejected": -0.4547126293182373, |
|
"logps/chosen": -562.1158447265625, |
|
"logps/rejected": -1769.2896728515625, |
|
"loss": 0.1329, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.04002733901143074, |
|
"rewards/margins": 0.383873850107193, |
|
"rewards/rejected": -0.423901230096817, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.5713775416217884e-07, |
|
"logits/chosen": -1.571263313293457, |
|
"logits/rejected": -0.40705451369285583, |
|
"logps/chosen": -605.5236206054688, |
|
"logps/rejected": -1484.4984130859375, |
|
"loss": 0.1846, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05240412801504135, |
|
"rewards/margins": 0.2970747649669647, |
|
"rewards/rejected": -0.34947890043258667, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.438122617983442e-07, |
|
"logits/chosen": -1.3903917074203491, |
|
"logits/rejected": -0.20577654242515564, |
|
"logps/chosen": -603.0784301757812, |
|
"logps/rejected": -1667.8463134765625, |
|
"loss": 0.1127, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.0214361734688282, |
|
"rewards/margins": 0.37630894780158997, |
|
"rewards/rejected": -0.3977451026439667, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.3066493009749853e-07, |
|
"logits/chosen": -1.6927878856658936, |
|
"logits/rejected": -0.6562541723251343, |
|
"logps/chosen": -784.9993896484375, |
|
"logps/rejected": -1762.9515380859375, |
|
"loss": 0.19, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07724924385547638, |
|
"rewards/margins": 0.29403752088546753, |
|
"rewards/rejected": -0.3712867796421051, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1769689822475147e-07, |
|
"logits/chosen": -1.457176923751831, |
|
"logits/rejected": -0.6649435758590698, |
|
"logps/chosen": -731.3934326171875, |
|
"logps/rejected": -1780.1842041015625, |
|
"loss": 0.1694, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06437239050865173, |
|
"rewards/margins": 0.33346226811408997, |
|
"rewards/rejected": -0.3978345990180969, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.049092898095816e-07, |
|
"logits/chosen": -1.2814868688583374, |
|
"logits/rejected": -0.6186938285827637, |
|
"logps/chosen": -650.4574584960938, |
|
"logps/rejected": -1637.374755859375, |
|
"loss": 0.1541, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06206426024436951, |
|
"rewards/margins": 0.26436498761177063, |
|
"rewards/rejected": -0.32642924785614014, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9230321284847856e-07, |
|
"logits/chosen": -1.5192755460739136, |
|
"logits/rejected": -0.237101748585701, |
|
"logps/chosen": -517.3258666992188, |
|
"logps/rejected": -1354.155517578125, |
|
"loss": 0.1789, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.014683857560157776, |
|
"rewards/margins": 0.2717076539993286, |
|
"rewards/rejected": -0.2863914966583252, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.798797596089351e-07, |
|
"logits/chosen": -1.514344573020935, |
|
"logits/rejected": -1.0580737590789795, |
|
"logps/chosen": -651.7825927734375, |
|
"logps/rejected": -1709.284423828125, |
|
"loss": 0.1427, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06703333556652069, |
|
"rewards/margins": 0.3800427317619324, |
|
"rewards/rejected": -0.4470759928226471, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6764000653481263e-07, |
|
"logits/chosen": -1.5731451511383057, |
|
"logits/rejected": -0.5786491632461548, |
|
"logps/chosen": -680.1246337890625, |
|
"logps/rejected": -1564.331787109375, |
|
"loss": 0.1837, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08038660138845444, |
|
"rewards/margins": 0.2984033226966858, |
|
"rewards/rejected": -0.3787899315357208, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.555850141530659e-07, |
|
"logits/chosen": -1.620054006576538, |
|
"logits/rejected": -0.6548138856887817, |
|
"logps/chosen": -725.5187377929688, |
|
"logps/rejected": -1559.7091064453125, |
|
"loss": 0.1401, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.058062393218278885, |
|
"rewards/margins": 0.3103691041469574, |
|
"rewards/rejected": -0.3684315085411072, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4371582698185636e-07, |
|
"logits/chosen": -1.4922726154327393, |
|
"logits/rejected": -0.5721961259841919, |
|
"logps/chosen": -574.6033935546875, |
|
"logps/rejected": -1700.106689453125, |
|
"loss": 0.1561, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.055240534245967865, |
|
"rewards/margins": 0.36047983169555664, |
|
"rewards/rejected": -0.4157203733921051, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3203347344004737e-07, |
|
"logits/chosen": -1.260347604751587, |
|
"logits/rejected": -0.1747826784849167, |
|
"logps/chosen": -695.7683715820312, |
|
"logps/rejected": -1630.91064453125, |
|
"loss": 0.1731, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.13098278641700745, |
|
"rewards/margins": 0.2832750380039215, |
|
"rewards/rejected": -0.41425782442092896, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.2053896575809426e-07, |
|
"logits/chosen": -1.39348566532135, |
|
"logits/rejected": -0.686271607875824, |
|
"logps/chosen": -608.1319580078125, |
|
"logps/rejected": -1700.7486572265625, |
|
"loss": 0.0992, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.0731215551495552, |
|
"rewards/margins": 0.36609500646591187, |
|
"rewards/rejected": -0.4392165243625641, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.092332998903416e-07, |
|
"logits/chosen": -1.3719862699508667, |
|
"logits/rejected": -0.6818580031394958, |
|
"logps/chosen": -606.0661010742188, |
|
"logps/rejected": -1553.185791015625, |
|
"loss": 0.1497, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06806117296218872, |
|
"rewards/margins": 0.30099108815193176, |
|
"rewards/rejected": -0.3690522313117981, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.981174554287239e-07, |
|
"logits/chosen": -1.3921059370040894, |
|
"logits/rejected": -0.7532171010971069, |
|
"logps/chosen": -703.4324951171875, |
|
"logps/rejected": -1927.8841552734375, |
|
"loss": 0.1379, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07026232779026031, |
|
"rewards/margins": 0.38420066237449646, |
|
"rewards/rejected": -0.4544629156589508, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.871923955178918e-07, |
|
"logits/chosen": -1.2359908819198608, |
|
"logits/rejected": -0.5525861978530884, |
|
"logps/chosen": -692.30419921875, |
|
"logps/rejected": -1788.237060546875, |
|
"loss": 0.1208, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08661970496177673, |
|
"rewards/margins": 0.37772631645202637, |
|
"rewards/rejected": -0.4643460810184479, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.764590667717562e-07, |
|
"logits/chosen": -1.4797961711883545, |
|
"logits/rejected": -0.3416746258735657, |
|
"logps/chosen": -617.2152099609375, |
|
"logps/rejected": -1625.399658203125, |
|
"loss": 0.1826, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0840892642736435, |
|
"rewards/margins": 0.2994067072868347, |
|
"rewards/rejected": -0.3834959864616394, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6591839919146963e-07, |
|
"logits/chosen": -1.503493070602417, |
|
"logits/rejected": 0.3321295380592346, |
|
"logps/chosen": -688.2984619140625, |
|
"logps/rejected": -1707.145263671875, |
|
"loss": 0.1299, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08260496705770493, |
|
"rewards/margins": 0.3498184084892273, |
|
"rewards/rejected": -0.43242329359054565, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.555713060848433e-07, |
|
"logits/chosen": -1.3645676374435425, |
|
"logits/rejected": -0.04500112682580948, |
|
"logps/chosen": -643.5718994140625, |
|
"logps/rejected": -1641.6656494140625, |
|
"loss": 0.1557, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0737595409154892, |
|
"rewards/margins": 0.3242550790309906, |
|
"rewards/rejected": -0.3980146050453186, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.454186839872158e-07, |
|
"logits/chosen": -1.6310592889785767, |
|
"logits/rejected": -0.6514387130737305, |
|
"logps/chosen": -804.0472412109375, |
|
"logps/rejected": -1859.6383056640625, |
|
"loss": 0.1372, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12043702602386475, |
|
"rewards/margins": 0.3875194489955902, |
|
"rewards/rejected": -0.5079564452171326, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3546141258376786e-07, |
|
"logits/chosen": -1.3590515851974487, |
|
"logits/rejected": -0.43198928236961365, |
|
"logps/chosen": -733.1363525390625, |
|
"logps/rejected": -1653.435791015625, |
|
"loss": 0.1397, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11448194831609726, |
|
"rewards/margins": 0.33477213978767395, |
|
"rewards/rejected": -0.4492540955543518, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.257003546333042e-07, |
|
"logits/chosen": -1.4484094381332397, |
|
"logits/rejected": -0.5377854108810425, |
|
"logps/chosen": -742.8893432617188, |
|
"logps/rejected": -1920.5609130859375, |
|
"loss": 0.1697, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11689500510692596, |
|
"rewards/margins": 0.35355696082115173, |
|
"rewards/rejected": -0.4704520106315613, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"logits/chosen": -1.6482737064361572, |
|
"logits/rejected": -0.8282219767570496, |
|
"logps/chosen": -565.1773681640625, |
|
"logps/rejected": -1599.753662109375, |
|
"loss": 0.1429, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.056347571313381195, |
|
"rewards/margins": 0.3239589333534241, |
|
"rewards/rejected": -0.38030651211738586, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0677024504760752e-07, |
|
"logits/chosen": -1.502986192703247, |
|
"logits/rejected": -0.47991952300071716, |
|
"logps/chosen": -559.9749755859375, |
|
"logps/rejected": -1734.3677978515625, |
|
"loss": 0.1338, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.041349343955516815, |
|
"rewards/margins": 0.3722584545612335, |
|
"rewards/rejected": -0.41360777616500854, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9760283363267684e-07, |
|
"logits/chosen": -1.6863027811050415, |
|
"logits/rejected": -0.2434253990650177, |
|
"logps/chosen": -727.5802612304688, |
|
"logps/rejected": -1704.158203125, |
|
"loss": 0.1468, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.07457654923200607, |
|
"rewards/margins": 0.3438524007797241, |
|
"rewards/rejected": -0.4184289872646332, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8863491596921745e-07, |
|
"logits/chosen": -1.3177156448364258, |
|
"logits/rejected": -0.1511184722185135, |
|
"logps/chosen": -603.4699096679688, |
|
"logps/rejected": -1553.7645263671875, |
|
"loss": 0.114, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.07171906530857086, |
|
"rewards/margins": 0.3328208923339844, |
|
"rewards/rejected": -0.40453997254371643, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.798672690923828e-07, |
|
"logits/chosen": -1.5630302429199219, |
|
"logits/rejected": -0.8224193453788757, |
|
"logps/chosen": -620.0535888671875, |
|
"logps/rejected": -1780.6702880859375, |
|
"loss": 0.0976, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06969192624092102, |
|
"rewards/margins": 0.3709218502044678, |
|
"rewards/rejected": -0.4406138062477112, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.713006526846439e-07, |
|
"logits/chosen": -1.5444920063018799, |
|
"logits/rejected": -0.24943670630455017, |
|
"logps/chosen": -683.787841796875, |
|
"logps/rejected": -1977.3343505859375, |
|
"loss": 0.0802, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.06767871230840683, |
|
"rewards/margins": 0.4605562686920166, |
|
"rewards/rejected": -0.5282350778579712, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.629358090099639e-07, |
|
"logits/chosen": -1.5846331119537354, |
|
"logits/rejected": -0.536185085773468, |
|
"logps/chosen": -703.1005859375, |
|
"logps/rejected": -1691.5517578125, |
|
"loss": 0.1945, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07973551005125046, |
|
"rewards/margins": 0.3327817916870117, |
|
"rewards/rejected": -0.4125173091888428, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5477346284948292e-07, |
|
"logits/chosen": -1.572539210319519, |
|
"logits/rejected": -0.8882861137390137, |
|
"logps/chosen": -748.1613159179688, |
|
"logps/rejected": -1634.09521484375, |
|
"loss": 0.1691, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09525839984416962, |
|
"rewards/margins": 0.28481072187423706, |
|
"rewards/rejected": -0.38006913661956787, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4681432143872133e-07, |
|
"logits/chosen": -1.2806552648544312, |
|
"logits/rejected": -0.47933030128479004, |
|
"logps/chosen": -767.8941650390625, |
|
"logps/rejected": -1739.5386962890625, |
|
"loss": 0.1768, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09690088033676147, |
|
"rewards/margins": 0.33204126358032227, |
|
"rewards/rejected": -0.42894211411476135, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3905907440629752e-07, |
|
"logits/chosen": -1.619302749633789, |
|
"logits/rejected": -0.4543309211730957, |
|
"logps/chosen": -705.6532592773438, |
|
"logps/rejected": -1661.1187744140625, |
|
"loss": 0.2429, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09244809299707413, |
|
"rewards/margins": 0.27722662687301636, |
|
"rewards/rejected": -0.3696747124195099, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.31508393714177e-07, |
|
"logits/chosen": -1.576765537261963, |
|
"logits/rejected": -0.9673765301704407, |
|
"logps/chosen": -700.4979248046875, |
|
"logps/rejected": -1635.6195068359375, |
|
"loss": 0.1433, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.08088205009698868, |
|
"rewards/margins": 0.33026668429374695, |
|
"rewards/rejected": -0.41114872694015503, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.241629335994471e-07, |
|
"logits/chosen": -1.3158491849899292, |
|
"logits/rejected": -0.2821682393550873, |
|
"logps/chosen": -567.6187133789062, |
|
"logps/rejected": -1448.58837890625, |
|
"loss": 0.2002, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.06683605909347534, |
|
"rewards/margins": 0.2942168712615967, |
|
"rewards/rejected": -0.361052930355072, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1702333051763271e-07, |
|
"logits/chosen": -1.630567193031311, |
|
"logits/rejected": -0.7469101548194885, |
|
"logps/chosen": -718.3992919921875, |
|
"logps/rejected": -1641.1156005859375, |
|
"loss": 0.1399, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.10309512913227081, |
|
"rewards/margins": 0.3157398998737335, |
|
"rewards/rejected": -0.4188350737094879, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1009020308754587e-07, |
|
"logits/chosen": -1.4223135709762573, |
|
"logits/rejected": -0.4289831519126892, |
|
"logps/chosen": -924.49609375, |
|
"logps/rejected": -1811.7252197265625, |
|
"loss": 0.2089, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.13550476729869843, |
|
"rewards/margins": 0.3179660737514496, |
|
"rewards/rejected": -0.4534708857536316, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0336415203768962e-07, |
|
"logits/chosen": -1.4638688564300537, |
|
"logits/rejected": 0.3213498592376709, |
|
"logps/chosen": -778.1163330078125, |
|
"logps/rejected": -1671.373779296875, |
|
"loss": 0.1553, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07180814445018768, |
|
"rewards/margins": 0.3324059545993805, |
|
"rewards/rejected": -0.404214084148407, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.684576015420277e-08, |
|
"logits/chosen": -1.3937320709228516, |
|
"logits/rejected": -0.29455748200416565, |
|
"logps/chosen": -558.4961547851562, |
|
"logps/rejected": -1537.4754638671875, |
|
"loss": 0.1932, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07989932596683502, |
|
"rewards/margins": 0.2945837080478668, |
|
"rewards/rejected": -0.37448304891586304, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.053559223036746e-08, |
|
"logits/chosen": -1.3823366165161133, |
|
"logits/rejected": -0.34489625692367554, |
|
"logps/chosen": -750.284423828125, |
|
"logps/rejected": -1547.081298828125, |
|
"loss": 0.1481, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07611394673585892, |
|
"rewards/margins": 0.2931041121482849, |
|
"rewards/rejected": -0.36921799182891846, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.44341950176683e-08, |
|
"logits/chosen": -1.58815598487854, |
|
"logits/rejected": -0.0034358978737145662, |
|
"logps/chosen": -567.5117797851562, |
|
"logps/rejected": -1468.9776611328125, |
|
"loss": 0.1411, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.03928015008568764, |
|
"rewards/margins": 0.3018725514411926, |
|
"rewards/rejected": -0.34115269780158997, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits/chosen": -1.5472205877304077, |
|
"logits/rejected": -0.9048103094100952, |
|
"logps/chosen": -627.4727783203125, |
|
"logps/rejected": -1713.844970703125, |
|
"loss": 0.0771, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.04558110982179642, |
|
"rewards/margins": 0.3975328207015991, |
|
"rewards/rejected": -0.44311389327049255, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.285980923996989e-08, |
|
"logits/chosen": -1.4914085865020752, |
|
"logits/rejected": -0.47641056776046753, |
|
"logps/chosen": -650.9934692382812, |
|
"logps/rejected": -1637.280029296875, |
|
"loss": 0.1166, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.054985035210847855, |
|
"rewards/margins": 0.3437821865081787, |
|
"rewards/rejected": -0.3987672030925751, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.738782355044048e-08, |
|
"logits/chosen": -1.564391016960144, |
|
"logits/rejected": -0.30673253536224365, |
|
"logps/chosen": -597.55322265625, |
|
"logps/rejected": -1578.419677734375, |
|
"loss": 0.1353, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.04587271437048912, |
|
"rewards/margins": 0.32063284516334534, |
|
"rewards/rejected": -0.36650553345680237, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.212661423609184e-08, |
|
"logits/chosen": -1.298776626586914, |
|
"logits/rejected": -0.1061747819185257, |
|
"logps/chosen": -640.8598022460938, |
|
"logps/rejected": -1533.592529296875, |
|
"loss": 0.1815, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.05050724744796753, |
|
"rewards/margins": 0.27905362844467163, |
|
"rewards/rejected": -0.32956087589263916, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.707663716023021e-08, |
|
"logits/chosen": -1.5522761344909668, |
|
"logits/rejected": -0.26008373498916626, |
|
"logps/chosen": -703.5093994140625, |
|
"logps/rejected": -1482.5135498046875, |
|
"loss": 0.1582, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.06401093304157257, |
|
"rewards/margins": 0.2837941348552704, |
|
"rewards/rejected": -0.34780508279800415, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.22383298837098e-08, |
|
"logits/chosen": -1.5810019969940186, |
|
"logits/rejected": 0.17494335770606995, |
|
"logps/chosen": -811.2449340820312, |
|
"logps/rejected": -1590.089599609375, |
|
"loss": 0.0992, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09410594403743744, |
|
"rewards/margins": 0.29233235120773315, |
|
"rewards/rejected": -0.3864383101463318, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.761211162702117e-08, |
|
"logits/chosen": -1.484390139579773, |
|
"logits/rejected": -0.5827163457870483, |
|
"logps/chosen": -721.3407592773438, |
|
"logps/rejected": -1430.334716796875, |
|
"loss": 0.1909, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10846199095249176, |
|
"rewards/margins": 0.22209401428699493, |
|
"rewards/rejected": -0.3305560052394867, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.319838323396691e-08, |
|
"logits/chosen": -1.5092474222183228, |
|
"logits/rejected": -0.8641079068183899, |
|
"logps/chosen": -654.3946533203125, |
|
"logps/rejected": -1624.559326171875, |
|
"loss": 0.1749, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07206736505031586, |
|
"rewards/margins": 0.3177274167537689, |
|
"rewards/rejected": -0.3897947669029236, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.8997527136930004e-08, |
|
"logits/chosen": -1.3939855098724365, |
|
"logits/rejected": -0.9460384249687195, |
|
"logps/chosen": -721.7215576171875, |
|
"logps/rejected": -1758.743408203125, |
|
"loss": 0.1378, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1156727522611618, |
|
"rewards/margins": 0.3109985291957855, |
|
"rewards/rejected": -0.4266713261604309, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5009907323737826e-08, |
|
"logits/chosen": -1.390904188156128, |
|
"logits/rejected": -0.4405292570590973, |
|
"logps/chosen": -571.7150268554688, |
|
"logps/rejected": -1526.440673828125, |
|
"loss": 0.172, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.04342179745435715, |
|
"rewards/margins": 0.2945147156715393, |
|
"rewards/rejected": -0.33793652057647705, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.1235869306123766e-08, |
|
"logits/chosen": -1.743300199508667, |
|
"logits/rejected": -0.7482790350914001, |
|
"logps/chosen": -623.4041748046875, |
|
"logps/rejected": -1765.6488037109375, |
|
"loss": 0.1102, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.057156067341566086, |
|
"rewards/margins": 0.3713191747665405, |
|
"rewards/rejected": -0.4284752309322357, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.767574008979007e-08, |
|
"logits/chosen": -1.62771475315094, |
|
"logits/rejected": -0.4970121383666992, |
|
"logps/chosen": -712.7774047851562, |
|
"logps/rejected": -1704.322021484375, |
|
"loss": 0.1109, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07325717061758041, |
|
"rewards/margins": 0.3189954161643982, |
|
"rewards/rejected": -0.3922525942325592, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"logits/chosen": -1.5333154201507568, |
|
"logits/rejected": -0.363052636384964, |
|
"logps/chosen": -755.8592529296875, |
|
"logps/rejected": -1792.1357421875, |
|
"loss": 0.1142, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.06991975009441376, |
|
"rewards/margins": 0.3443168103694916, |
|
"rewards/rejected": -0.41423654556274414, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1198423385220822e-08, |
|
"logits/chosen": -1.5201735496520996, |
|
"logits/rejected": -0.7737599611282349, |
|
"logps/chosen": -648.6341552734375, |
|
"logps/rejected": -1637.8931884765625, |
|
"loss": 0.1757, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.046808890998363495, |
|
"rewards/margins": 0.346151202917099, |
|
"rewards/rejected": -0.3929601311683655, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.82817971312621e-08, |
|
"logits/chosen": -1.6378930807113647, |
|
"logits/rejected": -0.5780726671218872, |
|
"logps/chosen": -671.5693969726562, |
|
"logps/rejected": -1713.7747802734375, |
|
"loss": 0.1688, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07191529124975204, |
|
"rewards/margins": 0.33269262313842773, |
|
"rewards/rejected": -0.4046078622341156, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5580202098509078e-08, |
|
"logits/chosen": -1.6032174825668335, |
|
"logits/rejected": -0.24984005093574524, |
|
"logps/chosen": -638.01123046875, |
|
"logps/rejected": -1515.741943359375, |
|
"loss": 0.1446, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06576866656541824, |
|
"rewards/margins": 0.31248709559440613, |
|
"rewards/rejected": -0.37825578451156616, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3093872369654148e-08, |
|
"logits/chosen": -1.462215781211853, |
|
"logits/rejected": 0.025119613856077194, |
|
"logps/chosen": -680.1953125, |
|
"logps/rejected": -1622.0228271484375, |
|
"loss": 0.2123, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08275710791349411, |
|
"rewards/margins": 0.2952966094017029, |
|
"rewards/rejected": -0.3780537247657776, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0823023375489128e-08, |
|
"logits/chosen": -1.6868362426757812, |
|
"logits/rejected": -0.5641859173774719, |
|
"logps/chosen": -618.9349975585938, |
|
"logps/rejected": -1718.207275390625, |
|
"loss": 0.107, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.05680794641375542, |
|
"rewards/margins": 0.3729218542575836, |
|
"rewards/rejected": -0.42972975969314575, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.767851876239075e-09, |
|
"logits/chosen": -1.2520034313201904, |
|
"logits/rejected": -0.46264973282814026, |
|
"logps/chosen": -756.2525024414062, |
|
"logps/rejected": -1568.090576171875, |
|
"loss": 0.1772, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10075948387384415, |
|
"rewards/margins": 0.28606051206588745, |
|
"rewards/rejected": -0.3868200182914734, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.9285359445145366e-09, |
|
"logits/chosen": -1.6095876693725586, |
|
"logits/rejected": -0.49465712904930115, |
|
"logps/chosen": -636.206298828125, |
|
"logps/rejected": -1638.2972412109375, |
|
"loss": 0.1288, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03626566380262375, |
|
"rewards/margins": 0.34111684560775757, |
|
"rewards/rejected": -0.3773825466632843, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.305234949880001e-09, |
|
"logits/chosen": -1.4576680660247803, |
|
"logits/rejected": -0.8192172050476074, |
|
"logps/chosen": -602.4050903320312, |
|
"logps/rejected": -1541.21142578125, |
|
"loss": 0.2052, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.056397415697574615, |
|
"rewards/margins": 0.27618226408958435, |
|
"rewards/rejected": -0.33257967233657837, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.8980895450474455e-09, |
|
"logits/chosen": -1.4624910354614258, |
|
"logits/rejected": -0.06623925268650055, |
|
"logps/chosen": -615.46142578125, |
|
"logps/rejected": -1512.7928466796875, |
|
"loss": 0.1562, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.050837211310863495, |
|
"rewards/margins": 0.3221223056316376, |
|
"rewards/rejected": -0.37295952439308167, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7072216536885855e-09, |
|
"logits/chosen": -1.576015591621399, |
|
"logits/rejected": -0.6331445574760437, |
|
"logps/chosen": -652.0988159179688, |
|
"logps/rejected": -1652.5101318359375, |
|
"loss": 0.0999, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0720575600862503, |
|
"rewards/margins": 0.32736578583717346, |
|
"rewards/rejected": -0.3994233012199402, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.7327344598702667e-09, |
|
"logits/chosen": -1.4119715690612793, |
|
"logits/rejected": -0.8666298985481262, |
|
"logps/chosen": -634.987548828125, |
|
"logps/rejected": -1896.078125, |
|
"loss": 0.0889, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.053931184113025665, |
|
"rewards/margins": 0.42121997475624084, |
|
"rewards/rejected": -0.4751511216163635, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.747123991141193e-10, |
|
"logits/chosen": -1.3923088312149048, |
|
"logits/rejected": -0.4560534358024597, |
|
"logps/chosen": -703.5958862304688, |
|
"logps/rejected": -1692.0693359375, |
|
"loss": 0.1027, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07977847754955292, |
|
"rewards/margins": 0.34622496366500854, |
|
"rewards/rejected": -0.42600345611572266, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.332211510807427e-10, |
|
"logits/chosen": -1.3781083822250366, |
|
"logits/rejected": -0.20015454292297363, |
|
"logps/chosen": -547.2339477539062, |
|
"logps/rejected": -1573.963623046875, |
|
"loss": 0.1506, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0455051027238369, |
|
"rewards/margins": 0.3225332200527191, |
|
"rewards/rejected": -0.3680383563041687, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0830763387897902e-10, |
|
"logits/chosen": -1.4161818027496338, |
|
"logits/rejected": 0.6542869806289673, |
|
"logps/chosen": -718.9110107421875, |
|
"logps/rejected": -1792.678955078125, |
|
"loss": 0.0937, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.0775211825966835, |
|
"rewards/margins": 0.3633776307106018, |
|
"rewards/rejected": -0.4408988058567047, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.728247046470642, |
|
"logits/rejected": -1.0278023481369019, |
|
"logps/chosen": -706.55908203125, |
|
"logps/rejected": -1668.5732421875, |
|
"loss": 0.1603, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07834981381893158, |
|
"rewards/margins": 0.3416889011859894, |
|
"rewards/rejected": -0.42003870010375977, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3750, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1829929338614146, |
|
"train_runtime": 17952.2095, |
|
"train_samples_per_second": 0.836, |
|
"train_steps_per_second": 0.209 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3750, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|