|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 3750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3333333333333334e-08, |
|
"logits/chosen": -1.9468104839324951, |
|
"logits/rejected": -1.3551281690597534, |
|
"logps/chosen": -418.5311279296875, |
|
"logps/rejected": -228.03335571289062, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3333333333333336e-07, |
|
"logits/chosen": -2.1891419887542725, |
|
"logits/rejected": -1.312096118927002, |
|
"logps/chosen": -321.9786682128906, |
|
"logps/rejected": -262.5549011230469, |
|
"loss": 0.3989, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": -0.00040232870378531516, |
|
"rewards/margins": 0.00023253644758369774, |
|
"rewards/rejected": -0.0006348651950247586, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.666666666666667e-07, |
|
"logits/chosen": -1.7885940074920654, |
|
"logits/rejected": -1.4744794368743896, |
|
"logps/chosen": -270.1197204589844, |
|
"logps/rejected": -280.1589050292969, |
|
"loss": 0.3157, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0014780608471482992, |
|
"rewards/margins": 0.002728077583014965, |
|
"rewards/rejected": -0.0012500169686973095, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -1.9892597198486328, |
|
"logits/rejected": -1.2171941995620728, |
|
"logps/chosen": -319.71331787109375, |
|
"logps/rejected": -257.2882995605469, |
|
"loss": 0.3278, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.00036752174491994083, |
|
"rewards/margins": 0.0073011466301977634, |
|
"rewards/rejected": -0.006933624390512705, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.333333333333335e-07, |
|
"logits/chosen": -1.955718994140625, |
|
"logits/rejected": -1.1262027025222778, |
|
"logps/chosen": -356.73663330078125, |
|
"logps/rejected": -266.71392822265625, |
|
"loss": 0.2937, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.003951634746044874, |
|
"rewards/margins": 0.028395619243383408, |
|
"rewards/rejected": -0.024443982169032097, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.666666666666667e-07, |
|
"logits/chosen": -1.9514992237091064, |
|
"logits/rejected": -1.382391333580017, |
|
"logps/chosen": -284.41790771484375, |
|
"logps/rejected": -230.9414825439453, |
|
"loss": 0.2014, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0021220450289547443, |
|
"rewards/margins": 0.07282118499279022, |
|
"rewards/rejected": -0.07069914042949677, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -1.9106193780899048, |
|
"logits/rejected": -1.3348861932754517, |
|
"logps/chosen": -272.3318786621094, |
|
"logps/rejected": -241.1548309326172, |
|
"loss": 0.2992, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0524127297103405, |
|
"rewards/margins": 0.1212296113371849, |
|
"rewards/rejected": -0.1736423522233963, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.333333333333334e-07, |
|
"logits/chosen": -1.9291963577270508, |
|
"logits/rejected": -1.4450151920318604, |
|
"logps/chosen": -287.9643249511719, |
|
"logps/rejected": -281.98760986328125, |
|
"loss": 0.2153, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.07112433016300201, |
|
"rewards/margins": 0.18519330024719238, |
|
"rewards/rejected": -0.256317675113678, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.066666666666667e-06, |
|
"logits/chosen": -1.8260694742202759, |
|
"logits/rejected": -1.2290842533111572, |
|
"logps/chosen": -311.75787353515625, |
|
"logps/rejected": -303.3421325683594, |
|
"loss": 0.1991, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.050730206072330475, |
|
"rewards/margins": 0.30313217639923096, |
|
"rewards/rejected": -0.35386237502098083, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"logits/chosen": -2.1392667293548584, |
|
"logits/rejected": -1.5895378589630127, |
|
"logps/chosen": -264.8464050292969, |
|
"logps/rejected": -284.0433349609375, |
|
"loss": 0.1789, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06132856756448746, |
|
"rewards/margins": 0.34737664461135864, |
|
"rewards/rejected": -0.4087051749229431, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"logits/chosen": -2.010281801223755, |
|
"logits/rejected": -1.4457181692123413, |
|
"logps/chosen": -280.8337707519531, |
|
"logps/rejected": -301.07421875, |
|
"loss": 0.1651, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.003755016950890422, |
|
"rewards/margins": 0.3854002356529236, |
|
"rewards/rejected": -0.38164520263671875, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4666666666666669e-06, |
|
"logits/chosen": -1.8176605701446533, |
|
"logits/rejected": -1.4668805599212646, |
|
"logps/chosen": -213.2742919921875, |
|
"logps/rejected": -196.9753875732422, |
|
"loss": 0.2645, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.08552353084087372, |
|
"rewards/margins": 0.1824944019317627, |
|
"rewards/rejected": -0.2680179476737976, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -1.8461990356445312, |
|
"logits/rejected": -1.6458969116210938, |
|
"logps/chosen": -206.00204467773438, |
|
"logps/rejected": -255.66989135742188, |
|
"loss": 0.2099, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.07237216085195541, |
|
"rewards/margins": 0.22069768607616425, |
|
"rewards/rejected": -0.29306983947753906, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7333333333333336e-06, |
|
"logits/chosen": -2.1012439727783203, |
|
"logits/rejected": -1.28009831905365, |
|
"logps/chosen": -324.0368957519531, |
|
"logps/rejected": -254.3141632080078, |
|
"loss": 0.0919, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.01445714384317398, |
|
"rewards/margins": 0.3454706370830536, |
|
"rewards/rejected": -0.3310135304927826, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8666666666666669e-06, |
|
"logits/chosen": -1.9437087774276733, |
|
"logits/rejected": -1.5214288234710693, |
|
"logps/chosen": -294.4078063964844, |
|
"logps/rejected": -301.8307800292969, |
|
"loss": 0.1193, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.07305511832237244, |
|
"rewards/margins": 0.38559332489967346, |
|
"rewards/rejected": -0.4586483836174011, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -1.9032337665557861, |
|
"logits/rejected": -1.50299870967865, |
|
"logps/chosen": -223.5890655517578, |
|
"logps/rejected": -276.28955078125, |
|
"loss": 0.1492, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.035106100142002106, |
|
"rewards/margins": 0.4305610656738281, |
|
"rewards/rejected": -0.46566715836524963, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.133333333333334e-06, |
|
"logits/chosen": -1.9716815948486328, |
|
"logits/rejected": -1.577401876449585, |
|
"logps/chosen": -264.44256591796875, |
|
"logps/rejected": -326.4450988769531, |
|
"loss": 0.1267, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.05759359151124954, |
|
"rewards/margins": 0.33429816365242004, |
|
"rewards/rejected": -0.3918917775154114, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.266666666666667e-06, |
|
"logits/chosen": -1.8987305164337158, |
|
"logits/rejected": -1.3819479942321777, |
|
"logps/chosen": -205.10104370117188, |
|
"logps/rejected": -223.96218872070312, |
|
"loss": 0.1927, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09939566999673843, |
|
"rewards/margins": 0.24186280369758606, |
|
"rewards/rejected": -0.3412584662437439, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -2.106255292892456, |
|
"logits/rejected": -1.655613660812378, |
|
"logps/chosen": -264.42547607421875, |
|
"logps/rejected": -308.27130126953125, |
|
"loss": 0.2318, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09074126183986664, |
|
"rewards/margins": 0.29490557312965393, |
|
"rewards/rejected": -0.385646790266037, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5333333333333338e-06, |
|
"logits/chosen": -2.1222376823425293, |
|
"logits/rejected": -1.7176287174224854, |
|
"logps/chosen": -266.733154296875, |
|
"logps/rejected": -274.0609436035156, |
|
"loss": 0.1424, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11357426643371582, |
|
"rewards/margins": 0.35620003938674927, |
|
"rewards/rejected": -0.4697743356227875, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.666666666666667e-06, |
|
"logits/chosen": -1.969417929649353, |
|
"logits/rejected": -1.3014360666275024, |
|
"logps/chosen": -277.68536376953125, |
|
"logps/rejected": -318.40496826171875, |
|
"loss": 0.1694, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08816908299922943, |
|
"rewards/margins": 0.3808407485485077, |
|
"rewards/rejected": -0.4690098166465759, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"logits/chosen": -2.0031707286834717, |
|
"logits/rejected": -1.5582258701324463, |
|
"logps/chosen": -235.62319946289062, |
|
"logps/rejected": -280.3308410644531, |
|
"loss": 0.1577, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.06687523424625397, |
|
"rewards/margins": 0.30328303575515747, |
|
"rewards/rejected": -0.37015828490257263, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9333333333333338e-06, |
|
"logits/chosen": -2.026477336883545, |
|
"logits/rejected": -1.461743950843811, |
|
"logps/chosen": -259.7619934082031, |
|
"logps/rejected": -246.75588989257812, |
|
"loss": 0.1748, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11929011344909668, |
|
"rewards/margins": 0.2787408232688904, |
|
"rewards/rejected": -0.39803093671798706, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.066666666666667e-06, |
|
"logits/chosen": -2.0350136756896973, |
|
"logits/rejected": -1.6047760248184204, |
|
"logps/chosen": -291.62371826171875, |
|
"logps/rejected": -326.29083251953125, |
|
"loss": 0.1776, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.035884954035282135, |
|
"rewards/margins": 0.35993653535842896, |
|
"rewards/rejected": -0.3958215117454529, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": -1.63616144657135, |
|
"logits/rejected": -1.2675909996032715, |
|
"logps/chosen": -295.3503723144531, |
|
"logps/rejected": -272.33544921875, |
|
"loss": 0.1512, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.04819333180785179, |
|
"rewards/margins": 0.3286735713481903, |
|
"rewards/rejected": -0.3768669664859772, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -2.153151035308838, |
|
"logits/rejected": -1.5773793458938599, |
|
"logps/chosen": -258.7474670410156, |
|
"logps/rejected": -249.0330047607422, |
|
"loss": 0.2077, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.07630246877670288, |
|
"rewards/margins": 0.2940545678138733, |
|
"rewards/rejected": -0.37035703659057617, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4666666666666672e-06, |
|
"logits/chosen": -1.8532987833023071, |
|
"logits/rejected": -1.365553855895996, |
|
"logps/chosen": -308.9462585449219, |
|
"logps/rejected": -293.080810546875, |
|
"loss": 0.0988, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.003472552401944995, |
|
"rewards/margins": 0.37654823064804077, |
|
"rewards/rejected": -0.38002076745033264, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"logits/chosen": -1.8927972316741943, |
|
"logits/rejected": -1.3772236108779907, |
|
"logps/chosen": -268.60845947265625, |
|
"logps/rejected": -279.3491516113281, |
|
"loss": 0.1617, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.05976073071360588, |
|
"rewards/margins": 0.3105442523956299, |
|
"rewards/rejected": -0.37030500173568726, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.7333333333333337e-06, |
|
"logits/chosen": -2.0276782512664795, |
|
"logits/rejected": -1.21445894241333, |
|
"logps/chosen": -353.6800842285156, |
|
"logps/rejected": -307.1755676269531, |
|
"loss": 0.1506, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0015403844881802797, |
|
"rewards/margins": 0.5047177672386169, |
|
"rewards/rejected": -0.5062581300735474, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.866666666666667e-06, |
|
"logits/chosen": -2.00919771194458, |
|
"logits/rejected": -1.3203434944152832, |
|
"logps/chosen": -269.7672119140625, |
|
"logps/rejected": -210.7847900390625, |
|
"loss": 0.1969, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.009751735255122185, |
|
"rewards/margins": 0.2548676133155823, |
|
"rewards/rejected": -0.24511587619781494, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -1.9286314249038696, |
|
"logits/rejected": -1.4645249843597412, |
|
"logps/chosen": -261.30047607421875, |
|
"logps/rejected": -284.4623718261719, |
|
"loss": 0.2013, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03132876753807068, |
|
"rewards/margins": 0.20891091227531433, |
|
"rewards/rejected": -0.24023966491222382, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.133333333333333e-06, |
|
"logits/chosen": -1.958105444908142, |
|
"logits/rejected": -1.4698688983917236, |
|
"logps/chosen": -237.3049774169922, |
|
"logps/rejected": -291.87469482421875, |
|
"loss": 0.1677, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07907866686582565, |
|
"rewards/margins": 0.34895187616348267, |
|
"rewards/rejected": -0.4280305802822113, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.266666666666668e-06, |
|
"logits/chosen": -1.717402696609497, |
|
"logits/rejected": -1.14840567111969, |
|
"logps/chosen": -294.26641845703125, |
|
"logps/rejected": -323.1462707519531, |
|
"loss": 0.1992, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0834791511297226, |
|
"rewards/margins": 0.3541187047958374, |
|
"rewards/rejected": -0.4375979006290436, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4e-06, |
|
"logits/chosen": -1.8336519002914429, |
|
"logits/rejected": -1.480302333831787, |
|
"logps/chosen": -237.1054229736328, |
|
"logps/rejected": -276.50689697265625, |
|
"loss": 0.0847, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.024688202887773514, |
|
"rewards/margins": 0.40132126212120056, |
|
"rewards/rejected": -0.3766331076622009, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.533333333333334e-06, |
|
"logits/chosen": -1.9516515731811523, |
|
"logits/rejected": -1.399285912513733, |
|
"logps/chosen": -326.23236083984375, |
|
"logps/rejected": -318.9416198730469, |
|
"loss": 0.1551, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.04323751479387283, |
|
"rewards/margins": 0.38205739855766296, |
|
"rewards/rejected": -0.3388199210166931, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.666666666666667e-06, |
|
"logits/chosen": -1.6592636108398438, |
|
"logits/rejected": -1.313191533088684, |
|
"logps/chosen": -246.4650421142578, |
|
"logps/rejected": -316.64849853515625, |
|
"loss": 0.226, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.002355779055505991, |
|
"rewards/margins": 0.34276098012924194, |
|
"rewards/rejected": -0.3404052257537842, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": -1.8236520290374756, |
|
"logits/rejected": -1.3795961141586304, |
|
"logps/chosen": -311.75341796875, |
|
"logps/rejected": -295.3544921875, |
|
"loss": 0.0932, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.029393743723630905, |
|
"rewards/margins": 0.3503008484840393, |
|
"rewards/rejected": -0.3209070861339569, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.933333333333334e-06, |
|
"logits/chosen": -1.7931430339813232, |
|
"logits/rejected": -1.2451846599578857, |
|
"logps/chosen": -292.35650634765625, |
|
"logps/rejected": -278.6131286621094, |
|
"loss": 0.1793, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10223956406116486, |
|
"rewards/margins": 0.31418344378471375, |
|
"rewards/rejected": -0.41642293334007263, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999972922944898e-06, |
|
"logits/chosen": -1.858270287513733, |
|
"logits/rejected": -1.498663306236267, |
|
"logps/chosen": -246.0679168701172, |
|
"logps/rejected": -283.5242614746094, |
|
"loss": 0.1688, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.23864665627479553, |
|
"rewards/margins": 0.2593352198600769, |
|
"rewards/rejected": -0.49798187613487244, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999756310023261e-06, |
|
"logits/chosen": -2.070420265197754, |
|
"logits/rejected": -1.164734959602356, |
|
"logps/chosen": -382.75543212890625, |
|
"logps/rejected": -294.0628662109375, |
|
"loss": 0.112, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1453527957201004, |
|
"rewards/margins": 0.3971993327140808, |
|
"rewards/rejected": -0.54255211353302, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999323102948655e-06, |
|
"logits/chosen": -1.7827831506729126, |
|
"logits/rejected": -1.5060867071151733, |
|
"logps/chosen": -227.31982421875, |
|
"logps/rejected": -290.09149169921875, |
|
"loss": 0.1737, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.17244981229305267, |
|
"rewards/margins": 0.3016803562641144, |
|
"rewards/rejected": -0.47413015365600586, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.998673339256785e-06, |
|
"logits/chosen": -1.8005807399749756, |
|
"logits/rejected": -1.5056277513504028, |
|
"logps/chosen": -285.5323791503906, |
|
"logps/rejected": -313.2432556152344, |
|
"loss": 0.1204, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14575102925300598, |
|
"rewards/margins": 0.3333708941936493, |
|
"rewards/rejected": -0.4791219234466553, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997807075247147e-06, |
|
"logits/chosen": -1.7242355346679688, |
|
"logits/rejected": -1.4590495824813843, |
|
"logps/chosen": -228.1505126953125, |
|
"logps/rejected": -295.41241455078125, |
|
"loss": 0.1442, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12705926597118378, |
|
"rewards/margins": 0.3108959197998047, |
|
"rewards/rejected": -0.43795520067214966, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.996724385978142e-06, |
|
"logits/chosen": -1.8692153692245483, |
|
"logits/rejected": -1.2414253950119019, |
|
"logps/chosen": -298.9205322265625, |
|
"logps/rejected": -313.76934814453125, |
|
"loss": 0.1191, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0782383382320404, |
|
"rewards/margins": 0.44491782784461975, |
|
"rewards/rejected": -0.5231561064720154, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995425365260585e-06, |
|
"logits/chosen": -2.0321779251098633, |
|
"logits/rejected": -1.462066411972046, |
|
"logps/chosen": -301.1550598144531, |
|
"logps/rejected": -312.3240966796875, |
|
"loss": 0.1602, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.031835995614528656, |
|
"rewards/margins": 0.3314369320869446, |
|
"rewards/rejected": -0.3632729649543762, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993910125649561e-06, |
|
"logits/chosen": -1.8992176055908203, |
|
"logits/rejected": -1.2387077808380127, |
|
"logps/chosen": -250.6095428466797, |
|
"logps/rejected": -202.1986083984375, |
|
"loss": 0.174, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.06963483989238739, |
|
"rewards/margins": 0.25463372468948364, |
|
"rewards/rejected": -0.32426854968070984, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992178798434684e-06, |
|
"logits/chosen": -1.8955596685409546, |
|
"logits/rejected": -1.1744760274887085, |
|
"logps/chosen": -389.3876647949219, |
|
"logps/rejected": -292.69659423828125, |
|
"loss": 0.1316, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.06589002162218094, |
|
"rewards/margins": 0.41696634888648987, |
|
"rewards/rejected": -0.4828563630580902, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990231533628719e-06, |
|
"logits/chosen": -1.8016172647476196, |
|
"logits/rejected": -1.3248649835586548, |
|
"logps/chosen": -238.6211700439453, |
|
"logps/rejected": -275.3337707519531, |
|
"loss": 0.1501, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.05240337923169136, |
|
"rewards/margins": 0.3930678069591522, |
|
"rewards/rejected": -0.4454711973667145, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.988068499954578e-06, |
|
"logits/chosen": -1.6587965488433838, |
|
"logits/rejected": -1.3655837774276733, |
|
"logps/chosen": -190.276611328125, |
|
"logps/rejected": -268.18341064453125, |
|
"loss": 0.1726, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.02386285737156868, |
|
"rewards/margins": 0.3009086549282074, |
|
"rewards/rejected": -0.3247714936733246, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.985689884830711e-06, |
|
"logits/chosen": -2.0086019039154053, |
|
"logits/rejected": -1.4022401571273804, |
|
"logps/chosen": -269.7921447753906, |
|
"logps/rejected": -315.36761474609375, |
|
"loss": 0.1211, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.0037451249081641436, |
|
"rewards/margins": 0.34425827860832214, |
|
"rewards/rejected": -0.3480033874511719, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.983095894354858e-06, |
|
"logits/chosen": -2.0869174003601074, |
|
"logits/rejected": -1.3568693399429321, |
|
"logps/chosen": -354.6680603027344, |
|
"logps/rejected": -353.8328552246094, |
|
"loss": 0.1258, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07166734337806702, |
|
"rewards/margins": 0.38118380308151245, |
|
"rewards/rejected": -0.45285120606422424, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980286753286196e-06, |
|
"logits/chosen": -1.717799186706543, |
|
"logits/rejected": -1.27217698097229, |
|
"logps/chosen": -256.9685974121094, |
|
"logps/rejected": -279.153076171875, |
|
"loss": 0.1799, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14614014327526093, |
|
"rewards/margins": 0.37342625856399536, |
|
"rewards/rejected": -0.5195664167404175, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.97726270502586e-06, |
|
"logits/chosen": -1.9094120264053345, |
|
"logits/rejected": -1.4152483940124512, |
|
"logps/chosen": -214.5406951904297, |
|
"logps/rejected": -217.8648223876953, |
|
"loss": 0.1649, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2031155824661255, |
|
"rewards/margins": 0.3454675078392029, |
|
"rewards/rejected": -0.5485831499099731, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.974024011595864e-06, |
|
"logits/chosen": -1.9784595966339111, |
|
"logits/rejected": -1.203086495399475, |
|
"logps/chosen": -336.6568908691406, |
|
"logps/rejected": -286.4167785644531, |
|
"loss": 0.1357, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.10868784040212631, |
|
"rewards/margins": 0.566789984703064, |
|
"rewards/rejected": -0.6754778623580933, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970570953616383e-06, |
|
"logits/chosen": -1.8285566568374634, |
|
"logits/rejected": -1.075903296470642, |
|
"logps/chosen": -302.0743103027344, |
|
"logps/rejected": -253.8324432373047, |
|
"loss": 0.124, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.14791302382946014, |
|
"rewards/margins": 0.370217502117157, |
|
"rewards/rejected": -0.5181306004524231, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.966903830281449e-06, |
|
"logits/chosen": -1.774610161781311, |
|
"logits/rejected": -1.4740030765533447, |
|
"logps/chosen": -263.2217712402344, |
|
"logps/rejected": -301.24090576171875, |
|
"loss": 0.1898, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15221451222896576, |
|
"rewards/margins": 0.2885417342185974, |
|
"rewards/rejected": -0.440756231546402, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9630229593330226e-06, |
|
"logits/chosen": -1.808828592300415, |
|
"logits/rejected": -1.260229468345642, |
|
"logps/chosen": -251.52474975585938, |
|
"logps/rejected": -294.1851806640625, |
|
"loss": 0.1529, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11935459077358246, |
|
"rewards/margins": 0.34634923934936523, |
|
"rewards/rejected": -0.4657038748264313, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.958928677033465e-06, |
|
"logits/chosen": -1.7420295476913452, |
|
"logits/rejected": -1.058304786682129, |
|
"logps/chosen": -264.7672424316406, |
|
"logps/rejected": -247.16189575195312, |
|
"loss": 0.164, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.18925343453884125, |
|
"rewards/margins": 0.3564419448375702, |
|
"rewards/rejected": -0.545695424079895, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.954621338136399e-06, |
|
"logits/chosen": -1.4567300081253052, |
|
"logits/rejected": -0.9836466908454895, |
|
"logps/chosen": -324.0819396972656, |
|
"logps/rejected": -318.97796630859375, |
|
"loss": 0.1473, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11459589004516602, |
|
"rewards/margins": 0.3253883421421051, |
|
"rewards/rejected": -0.4399842321872711, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.95010131585597e-06, |
|
"logits/chosen": -1.8161170482635498, |
|
"logits/rejected": -1.4203059673309326, |
|
"logps/chosen": -230.9877166748047, |
|
"logps/rejected": -267.8335266113281, |
|
"loss": 0.1543, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07126758992671967, |
|
"rewards/margins": 0.3637334704399109, |
|
"rewards/rejected": -0.43500104546546936, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": -1.8937923908233643, |
|
"logits/rejected": -1.3107613325119019, |
|
"logps/chosen": -273.46002197265625, |
|
"logps/rejected": -197.23764038085938, |
|
"loss": 0.175, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.032990988343954086, |
|
"rewards/margins": 0.2917155623435974, |
|
"rewards/rejected": -0.324706494808197, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.940424806108619e-06, |
|
"logits/chosen": -1.6622874736785889, |
|
"logits/rejected": -1.2222545146942139, |
|
"logps/chosen": -258.8514404296875, |
|
"logps/rejected": -311.0167236328125, |
|
"loss": 0.1296, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07337610423564911, |
|
"rewards/margins": 0.39577198028564453, |
|
"rewards/rejected": -0.46914809942245483, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.935269157073597e-06, |
|
"logits/chosen": -1.6483392715454102, |
|
"logits/rejected": -1.3003222942352295, |
|
"logps/chosen": -242.5626983642578, |
|
"logps/rejected": -345.08624267578125, |
|
"loss": 0.1396, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.01702979765832424, |
|
"rewards/margins": 0.4016871452331543, |
|
"rewards/rejected": -0.4187169671058655, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9299025014463665e-06, |
|
"logits/chosen": -1.6613142490386963, |
|
"logits/rejected": -1.1542552709579468, |
|
"logps/chosen": -288.81915283203125, |
|
"logps/rejected": -262.18365478515625, |
|
"loss": 0.123, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.008388234302401543, |
|
"rewards/margins": 0.43170255422592163, |
|
"rewards/rejected": -0.4400908350944519, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924325304226745e-06, |
|
"logits/chosen": -1.544345736503601, |
|
"logits/rejected": -1.0032846927642822, |
|
"logps/chosen": -304.28668212890625, |
|
"logps/rejected": -205.69210815429688, |
|
"loss": 0.1941, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.11076553165912628, |
|
"rewards/margins": 0.2719104588031769, |
|
"rewards/rejected": -0.38267600536346436, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.91853804865716e-06, |
|
"logits/chosen": -1.6050885915756226, |
|
"logits/rejected": -1.3113044500350952, |
|
"logps/chosen": -191.19522094726562, |
|
"logps/rejected": -226.1024932861328, |
|
"loss": 0.1883, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.07253624498844147, |
|
"rewards/margins": 0.2440672218799591, |
|
"rewards/rejected": -0.31660348176956177, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.912541236180779e-06, |
|
"logits/chosen": -1.586315393447876, |
|
"logits/rejected": -1.435160756111145, |
|
"logps/chosen": -202.68026733398438, |
|
"logps/rejected": -291.9640197753906, |
|
"loss": 0.1334, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.04099782556295395, |
|
"rewards/margins": 0.39952850341796875, |
|
"rewards/rejected": -0.3585307002067566, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9063353863980565e-06, |
|
"logits/chosen": -1.5614166259765625, |
|
"logits/rejected": -1.4164257049560547, |
|
"logps/chosen": -227.6790008544922, |
|
"logps/rejected": -290.1402893066406, |
|
"loss": 0.1119, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13384926319122314, |
|
"rewards/margins": 0.3537839353084564, |
|
"rewards/rejected": -0.48763322830200195, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.899921037021719e-06, |
|
"logits/chosen": -1.6380398273468018, |
|
"logits/rejected": -1.1617525815963745, |
|
"logps/chosen": -242.23193359375, |
|
"logps/rejected": -266.6896057128906, |
|
"loss": 0.1565, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.10104341804981232, |
|
"rewards/margins": 0.41885095834732056, |
|
"rewards/rejected": -0.5198943614959717, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.893298743830168e-06, |
|
"logits/chosen": -1.5904242992401123, |
|
"logits/rejected": -1.138726830482483, |
|
"logps/chosen": -317.62701416015625, |
|
"logps/rejected": -297.424072265625, |
|
"loss": 0.163, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16763003170490265, |
|
"rewards/margins": 0.35251811146736145, |
|
"rewards/rejected": -0.5201481580734253, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.88646908061933e-06, |
|
"logits/chosen": -1.7436052560806274, |
|
"logits/rejected": -1.1463674306869507, |
|
"logps/chosen": -322.777587890625, |
|
"logps/rejected": -294.9422912597656, |
|
"loss": 0.1452, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.07538704574108124, |
|
"rewards/margins": 0.40292349457740784, |
|
"rewards/rejected": -0.4783105254173279, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.879432639152935e-06, |
|
"logits/chosen": -1.7265838384628296, |
|
"logits/rejected": -1.3548028469085693, |
|
"logps/chosen": -256.05902099609375, |
|
"logps/rejected": -303.35430908203125, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.07280706614255905, |
|
"rewards/margins": 0.35696297883987427, |
|
"rewards/rejected": -0.4297700524330139, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8721900291112415e-06, |
|
"logits/chosen": -1.704933524131775, |
|
"logits/rejected": -1.3638499975204468, |
|
"logps/chosen": -240.88784790039062, |
|
"logps/rejected": -307.12860107421875, |
|
"loss": 0.1792, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.04838447645306587, |
|
"rewards/margins": 0.36189574003219604, |
|
"rewards/rejected": -0.41028016805648804, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.864741878038218e-06, |
|
"logits/chosen": -1.490235686302185, |
|
"logits/rejected": -1.2437798976898193, |
|
"logps/chosen": -210.82626342773438, |
|
"logps/rejected": -235.6994171142578, |
|
"loss": 0.1351, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.02063031867146492, |
|
"rewards/margins": 0.4025086462497711, |
|
"rewards/rejected": -0.42313894629478455, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.857088831287158e-06, |
|
"logits/chosen": -1.521240234375, |
|
"logits/rejected": -1.1565624475479126, |
|
"logps/chosen": -254.61605834960938, |
|
"logps/rejected": -277.0546569824219, |
|
"loss": 0.167, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.011497171595692635, |
|
"rewards/margins": 0.3846563696861267, |
|
"rewards/rejected": -0.3731592297554016, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -1.518618106842041, |
|
"logits/rejected": -0.9731992483139038, |
|
"logps/chosen": -308.2921142578125, |
|
"logps/rejected": -279.6849365234375, |
|
"loss": 0.1304, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.006476040929555893, |
|
"rewards/margins": 0.32605165243148804, |
|
"rewards/rejected": -0.31957560777664185, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.841170720873723e-06, |
|
"logits/chosen": -1.4644962549209595, |
|
"logits/rejected": -0.6850159764289856, |
|
"logps/chosen": -292.0091247558594, |
|
"logps/rejected": -256.04095458984375, |
|
"loss": 0.171, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.06350520253181458, |
|
"rewards/margins": 0.3185378611087799, |
|
"rewards/rejected": -0.3820430338382721, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.832907036453647e-06, |
|
"logits/chosen": -1.5311912298202515, |
|
"logits/rejected": -1.4509427547454834, |
|
"logps/chosen": -149.7740478515625, |
|
"logps/rejected": -253.37808227539062, |
|
"loss": 0.1809, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.014500129036605358, |
|
"rewards/margins": 0.3270939886569977, |
|
"rewards/rejected": -0.3415941298007965, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits/chosen": -1.3183726072311401, |
|
"logits/rejected": -1.2068135738372803, |
|
"logps/chosen": -227.69140625, |
|
"logps/rejected": -288.60577392578125, |
|
"loss": 0.1621, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.009285451844334602, |
|
"rewards/margins": 0.39103344082832336, |
|
"rewards/rejected": -0.3817480206489563, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.815773989205165e-06, |
|
"logits/chosen": -1.6014219522476196, |
|
"logits/rejected": -1.1616547107696533, |
|
"logps/chosen": -275.4478454589844, |
|
"logps/rejected": -282.85992431640625, |
|
"loss": 0.1306, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.057247720658779144, |
|
"rewards/margins": 0.3137260377407074, |
|
"rewards/rejected": -0.3709737956523895, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.806906110888606e-06, |
|
"logits/chosen": -1.7149804830551147, |
|
"logits/rejected": -1.0609508752822876, |
|
"logps/chosen": -247.8963165283203, |
|
"logps/rejected": -251.2998504638672, |
|
"loss": 0.1311, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.028542649000883102, |
|
"rewards/margins": 0.3868168890476227, |
|
"rewards/rejected": -0.4153594970703125, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7978383481380865e-06, |
|
"logits/chosen": -1.5295007228851318, |
|
"logits/rejected": -1.3165438175201416, |
|
"logps/chosen": -269.34539794921875, |
|
"logps/rejected": -316.3207702636719, |
|
"loss": 0.1328, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.05052419751882553, |
|
"rewards/margins": 0.3775237798690796, |
|
"rewards/rejected": -0.4280479848384857, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.788571486639948e-06, |
|
"logits/chosen": -1.4131073951721191, |
|
"logits/rejected": -1.1108410358428955, |
|
"logps/chosen": -291.181640625, |
|
"logps/rejected": -362.20416259765625, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.13415458798408508, |
|
"rewards/margins": 0.4179055094718933, |
|
"rewards/rejected": -0.5520601272583008, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.779106329331665e-06, |
|
"logits/chosen": -1.445796251296997, |
|
"logits/rejected": -1.2411746978759766, |
|
"logps/chosen": -247.7615203857422, |
|
"logps/rejected": -265.7200927734375, |
|
"loss": 0.1505, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.047227971255779266, |
|
"rewards/margins": 0.3476831614971161, |
|
"rewards/rejected": -0.39491117000579834, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.769443696332272e-06, |
|
"logits/chosen": -1.595717430114746, |
|
"logits/rejected": -1.014550805091858, |
|
"logps/chosen": -277.58428955078125, |
|
"logps/rejected": -247.3020477294922, |
|
"loss": 0.1934, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.07608959078788757, |
|
"rewards/margins": 0.350595623254776, |
|
"rewards/rejected": -0.4266851842403412, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.759584424871302e-06, |
|
"logits/chosen": -1.7956939935684204, |
|
"logits/rejected": -1.1622785329818726, |
|
"logps/chosen": -268.0885009765625, |
|
"logps/rejected": -260.65069580078125, |
|
"loss": 0.1546, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11695736646652222, |
|
"rewards/margins": 0.3590616285800934, |
|
"rewards/rejected": -0.4760190546512604, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.749529369216246e-06, |
|
"logits/chosen": -1.682879090309143, |
|
"logits/rejected": -1.2164032459259033, |
|
"logps/chosen": -253.8575439453125, |
|
"logps/rejected": -316.49078369140625, |
|
"loss": 0.1558, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.07984492182731628, |
|
"rewards/margins": 0.36469632387161255, |
|
"rewards/rejected": -0.44454121589660645, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7392794005985324e-06, |
|
"logits/chosen": -1.8441412448883057, |
|
"logits/rejected": -1.0908098220825195, |
|
"logps/chosen": -320.3082580566406, |
|
"logps/rejected": -322.8711853027344, |
|
"loss": 0.1675, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.027216767892241478, |
|
"rewards/margins": 0.3997232913970947, |
|
"rewards/rejected": -0.42694005370140076, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7288354071380415e-06, |
|
"logits/chosen": -1.5839688777923584, |
|
"logits/rejected": -1.1818835735321045, |
|
"logps/chosen": -305.91607666015625, |
|
"logps/rejected": -304.3662109375, |
|
"loss": 0.165, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08447151631116867, |
|
"rewards/margins": 0.3237994313240051, |
|
"rewards/rejected": -0.4082708954811096, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7181982937661485e-06, |
|
"logits/chosen": -1.5419700145721436, |
|
"logits/rejected": -1.2130048274993896, |
|
"logps/chosen": -266.9795837402344, |
|
"logps/rejected": -272.76708984375, |
|
"loss": 0.1834, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.0005973346414975822, |
|
"rewards/margins": 0.3700031042098999, |
|
"rewards/rejected": -0.3694057762622833, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.707368982147318e-06, |
|
"logits/chosen": -1.478826880455017, |
|
"logits/rejected": -1.1886264085769653, |
|
"logps/chosen": -249.00161743164062, |
|
"logps/rejected": -311.38360595703125, |
|
"loss": 0.1527, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.010296178050339222, |
|
"rewards/margins": 0.27028435468673706, |
|
"rewards/rejected": -0.2599882185459137, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.696348410599244e-06, |
|
"logits/chosen": -1.6099306344985962, |
|
"logits/rejected": -1.0822367668151855, |
|
"logps/chosen": -259.72216796875, |
|
"logps/rejected": -275.6427001953125, |
|
"loss": 0.1892, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.02974315918982029, |
|
"rewards/margins": 0.31014934182167053, |
|
"rewards/rejected": -0.2804061770439148, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.685137534011549e-06, |
|
"logits/chosen": -1.7383455038070679, |
|
"logits/rejected": -1.079681158065796, |
|
"logps/chosen": -249.6421661376953, |
|
"logps/rejected": -230.7484893798828, |
|
"loss": 0.1554, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.0027087554335594177, |
|
"rewards/margins": 0.32712188363075256, |
|
"rewards/rejected": -0.32441315054893494, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.673737323763048e-06, |
|
"logits/chosen": -1.6007206439971924, |
|
"logits/rejected": -1.1181820631027222, |
|
"logps/chosen": -323.9920349121094, |
|
"logps/rejected": -341.07568359375, |
|
"loss": 0.137, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.006621385924518108, |
|
"rewards/margins": 0.36024874448776245, |
|
"rewards/rejected": -0.36687013506889343, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.662148767637578e-06, |
|
"logits/chosen": -1.8025529384613037, |
|
"logits/rejected": -1.0332825183868408, |
|
"logps/chosen": -384.01666259765625, |
|
"logps/rejected": -335.40374755859375, |
|
"loss": 0.1161, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.06002165004611015, |
|
"rewards/margins": 0.43708348274230957, |
|
"rewards/rejected": -0.37706178426742554, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.650372869738415e-06, |
|
"logits/chosen": -1.8271507024765015, |
|
"logits/rejected": -1.211963415145874, |
|
"logps/chosen": -328.3116760253906, |
|
"logps/rejected": -304.9596252441406, |
|
"loss": 0.1885, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.017533788457512856, |
|
"rewards/margins": 0.3359457552433014, |
|
"rewards/rejected": -0.3534795641899109, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638410650401267e-06, |
|
"logits/chosen": -1.4427958726882935, |
|
"logits/rejected": -1.1035863161087036, |
|
"logps/chosen": -261.233154296875, |
|
"logps/rejected": -296.86358642578125, |
|
"loss": 0.1704, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.11386547982692719, |
|
"rewards/margins": 0.3565741181373596, |
|
"rewards/rejected": -0.470439612865448, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626263146105875e-06, |
|
"logits/chosen": -1.6715164184570312, |
|
"logits/rejected": -1.2049211263656616, |
|
"logps/chosen": -267.9721374511719, |
|
"logps/rejected": -272.67218017578125, |
|
"loss": 0.1604, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.14121413230895996, |
|
"rewards/margins": 0.35565823316574097, |
|
"rewards/rejected": -0.49687233567237854, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.613931409386196e-06, |
|
"logits/chosen": -1.614467978477478, |
|
"logits/rejected": -1.3309428691864014, |
|
"logps/chosen": -282.2423400878906, |
|
"logps/rejected": -315.1558532714844, |
|
"loss": 0.1396, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09741922467947006, |
|
"rewards/margins": 0.3464723527431488, |
|
"rewards/rejected": -0.44389158487319946, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.601416508739211e-06, |
|
"logits/chosen": -1.6913681030273438, |
|
"logits/rejected": -1.0562645196914673, |
|
"logps/chosen": -260.50457763671875, |
|
"logps/rejected": -236.1143798828125, |
|
"loss": 0.252, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.14730839431285858, |
|
"rewards/margins": 0.2597261965274811, |
|
"rewards/rejected": -0.40703457593917847, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.588719528532342e-06, |
|
"logits/chosen": -1.7020518779754639, |
|
"logits/rejected": -1.1313974857330322, |
|
"logps/chosen": -307.2850646972656, |
|
"logps/rejected": -269.5376892089844, |
|
"loss": 0.1474, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.04724062234163284, |
|
"rewards/margins": 0.38787880539894104, |
|
"rewards/rejected": -0.4351194500923157, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.575841568909494e-06, |
|
"logits/chosen": -1.3848850727081299, |
|
"logits/rejected": -1.0428290367126465, |
|
"logps/chosen": -240.163330078125, |
|
"logps/rejected": -317.8233337402344, |
|
"loss": 0.2369, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.028999319300055504, |
|
"rewards/margins": 0.30743494629859924, |
|
"rewards/rejected": -0.3364342451095581, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.562783745695738e-06, |
|
"logits/chosen": -1.3958890438079834, |
|
"logits/rejected": -1.2130458354949951, |
|
"logps/chosen": -203.06283569335938, |
|
"logps/rejected": -265.59600830078125, |
|
"loss": 0.1827, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.020870503038167953, |
|
"rewards/margins": 0.3209065794944763, |
|
"rewards/rejected": -0.34177708625793457, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.549547190300622e-06, |
|
"logits/chosen": -1.5980967283248901, |
|
"logits/rejected": -1.1070952415466309, |
|
"logps/chosen": -262.9022216796875, |
|
"logps/rejected": -253.2421112060547, |
|
"loss": 0.1491, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.010922988876700401, |
|
"rewards/margins": 0.3387434184551239, |
|
"rewards/rejected": -0.34966641664505005, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536133049620143e-06, |
|
"logits/chosen": -1.7377541065216064, |
|
"logits/rejected": -1.0692071914672852, |
|
"logps/chosen": -338.3387145996094, |
|
"logps/rejected": -277.5804443359375, |
|
"loss": 0.1656, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.01836409978568554, |
|
"rewards/margins": 0.3910614550113678, |
|
"rewards/rejected": -0.4094255566596985, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.522542485937369e-06, |
|
"logits/chosen": -1.4216773509979248, |
|
"logits/rejected": -0.8998391032218933, |
|
"logps/chosen": -255.6497039794922, |
|
"logps/rejected": -270.85980224609375, |
|
"loss": 0.1628, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.049015216529369354, |
|
"rewards/margins": 0.348868191242218, |
|
"rewards/rejected": -0.3978833854198456, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.508776676821739e-06, |
|
"logits/chosen": -1.501990556716919, |
|
"logits/rejected": -1.2285462617874146, |
|
"logps/chosen": -260.5689392089844, |
|
"logps/rejected": -342.2594299316406, |
|
"loss": 0.1413, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14666099846363068, |
|
"rewards/margins": 0.32123270630836487, |
|
"rewards/rejected": -0.46789368987083435, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.494836815027022e-06, |
|
"logits/chosen": -1.86409592628479, |
|
"logits/rejected": -1.2688671350479126, |
|
"logps/chosen": -265.83343505859375, |
|
"logps/rejected": -272.33636474609375, |
|
"loss": 0.1657, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.08181764930486679, |
|
"rewards/margins": 0.3258339464664459, |
|
"rewards/rejected": -0.4076516032218933, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4807241083879774e-06, |
|
"logits/chosen": -1.7254140377044678, |
|
"logits/rejected": -1.1994249820709229, |
|
"logps/chosen": -274.38104248046875, |
|
"logps/rejected": -289.0826416015625, |
|
"loss": 0.1598, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.08081559091806412, |
|
"rewards/margins": 0.4157637655735016, |
|
"rewards/rejected": -0.4965793192386627, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.466439779715696e-06, |
|
"logits/chosen": -1.5560534000396729, |
|
"logits/rejected": -1.0609302520751953, |
|
"logps/chosen": -320.48931884765625, |
|
"logps/rejected": -237.6038055419922, |
|
"loss": 0.1151, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13456735014915466, |
|
"rewards/margins": 0.3497200012207031, |
|
"rewards/rejected": -0.48428741097450256, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.451985066691649e-06, |
|
"logits/chosen": -1.5969184637069702, |
|
"logits/rejected": -1.100097417831421, |
|
"logps/chosen": -258.00811767578125, |
|
"logps/rejected": -307.08203125, |
|
"loss": 0.1526, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07336065918207169, |
|
"rewards/margins": 0.3625486493110657, |
|
"rewards/rejected": -0.43590933084487915, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.437361221760449e-06, |
|
"logits/chosen": -1.6261990070343018, |
|
"logits/rejected": -0.9406700134277344, |
|
"logps/chosen": -304.49530029296875, |
|
"logps/rejected": -281.0793151855469, |
|
"loss": 0.1026, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.03398927301168442, |
|
"rewards/margins": 0.4203387200832367, |
|
"rewards/rejected": -0.4543279707431793, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.422569512021332e-06, |
|
"logits/chosen": -1.5429813861846924, |
|
"logits/rejected": -0.9988912343978882, |
|
"logps/chosen": -248.56948852539062, |
|
"logps/rejected": -265.1681823730469, |
|
"loss": 0.0936, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.09713175892829895, |
|
"rewards/margins": 0.3802811801433563, |
|
"rewards/rejected": -0.4774129390716553, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.407611219118363e-06, |
|
"logits/chosen": -1.5700933933258057, |
|
"logits/rejected": -1.0617953538894653, |
|
"logps/chosen": -233.2037353515625, |
|
"logps/rejected": -200.68634033203125, |
|
"loss": 0.144, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11756577342748642, |
|
"rewards/margins": 0.31560632586479187, |
|
"rewards/rejected": -0.4331720769405365, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3924876391293915e-06, |
|
"logits/chosen": -1.8913711309432983, |
|
"logits/rejected": -1.039268970489502, |
|
"logps/chosen": -353.1129455566406, |
|
"logps/rejected": -297.39617919921875, |
|
"loss": 0.1334, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08230503648519516, |
|
"rewards/margins": 0.47759518027305603, |
|
"rewards/rejected": -0.5599002838134766, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.377200082453748e-06, |
|
"logits/chosen": -1.538246512413025, |
|
"logits/rejected": -1.0102214813232422, |
|
"logps/chosen": -364.6609802246094, |
|
"logps/rejected": -325.3028869628906, |
|
"loss": 0.1223, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1402122676372528, |
|
"rewards/margins": 0.35043373703956604, |
|
"rewards/rejected": -0.49064597487449646, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.361749873698707e-06, |
|
"logits/chosen": -1.58868408203125, |
|
"logits/rejected": -1.048269271850586, |
|
"logps/chosen": -210.7605743408203, |
|
"logps/rejected": -217.05050659179688, |
|
"loss": 0.1285, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09829956293106079, |
|
"rewards/margins": 0.3576180338859558, |
|
"rewards/rejected": -0.4559175372123718, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.346138351564711e-06, |
|
"logits/chosen": -1.5400859117507935, |
|
"logits/rejected": -0.9626834988594055, |
|
"logps/chosen": -281.67779541015625, |
|
"logps/rejected": -280.89837646484375, |
|
"loss": 0.2163, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11693022400140762, |
|
"rewards/margins": 0.3672144412994385, |
|
"rewards/rejected": -0.4841446876525879, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.330366868729376e-06, |
|
"logits/chosen": -1.5415042638778687, |
|
"logits/rejected": -1.026490569114685, |
|
"logps/chosen": -233.54293823242188, |
|
"logps/rejected": -307.4654846191406, |
|
"loss": 0.1482, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.061702560633420944, |
|
"rewards/margins": 0.3937299847602844, |
|
"rewards/rejected": -0.45543256402015686, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3144367917302964e-06, |
|
"logits/chosen": -1.8853957653045654, |
|
"logits/rejected": -1.1902307271957397, |
|
"logps/chosen": -313.8409118652344, |
|
"logps/rejected": -296.0729064941406, |
|
"loss": 0.1522, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.017522500827908516, |
|
"rewards/margins": 0.3799929618835449, |
|
"rewards/rejected": -0.39751550555229187, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"logits/chosen": -1.3914777040481567, |
|
"logits/rejected": -0.9230860471725464, |
|
"logps/chosen": -266.98321533203125, |
|
"logps/rejected": -292.6563415527344, |
|
"loss": 0.1513, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.04734548181295395, |
|
"rewards/margins": 0.3820473849773407, |
|
"rewards/rejected": -0.42939287424087524, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2821063899795015e-06, |
|
"logits/chosen": -1.335268259048462, |
|
"logits/rejected": -1.4020699262619019, |
|
"logps/chosen": -224.03970336914062, |
|
"logps/rejected": -348.4554138183594, |
|
"loss": 0.1196, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.0435425229370594, |
|
"rewards/margins": 0.4308520257472992, |
|
"rewards/rejected": -0.4743945598602295, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.265708866531238e-06, |
|
"logits/chosen": -1.5381969213485718, |
|
"logits/rejected": -1.2920863628387451, |
|
"logps/chosen": -273.07086181640625, |
|
"logps/rejected": -312.44000244140625, |
|
"loss": 0.1688, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.03625233843922615, |
|
"rewards/margins": 0.26211121678352356, |
|
"rewards/rejected": -0.29836350679397583, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.249158351283414e-06, |
|
"logits/chosen": -1.5409828424453735, |
|
"logits/rejected": -1.1099916696548462, |
|
"logps/chosen": -269.79833984375, |
|
"logps/rejected": -264.7692565917969, |
|
"loss": 0.1503, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.07565125823020935, |
|
"rewards/margins": 0.32862424850463867, |
|
"rewards/rejected": -0.404275506734848, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.232456278273743e-06, |
|
"logits/chosen": -1.8242772817611694, |
|
"logits/rejected": -1.0602861642837524, |
|
"logps/chosen": -371.48248291015625, |
|
"logps/rejected": -312.5940856933594, |
|
"loss": 0.1571, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.10061591863632202, |
|
"rewards/margins": 0.3603518605232239, |
|
"rewards/rejected": -0.4609677791595459, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.215604094671835e-06, |
|
"logits/chosen": -1.5043697357177734, |
|
"logits/rejected": -1.3470711708068848, |
|
"logps/chosen": -348.822021484375, |
|
"logps/rejected": -378.1570739746094, |
|
"loss": 0.1618, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17215891182422638, |
|
"rewards/margins": 0.3960806727409363, |
|
"rewards/rejected": -0.5682395696640015, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.198603260653792e-06, |
|
"logits/chosen": -1.1872601509094238, |
|
"logits/rejected": -0.9705106616020203, |
|
"logps/chosen": -272.1139221191406, |
|
"logps/rejected": -295.6619873046875, |
|
"loss": 0.154, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19514642655849457, |
|
"rewards/margins": 0.3168686032295227, |
|
"rewards/rejected": -0.5120150446891785, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.181455249275701e-06, |
|
"logits/chosen": -1.668910264968872, |
|
"logits/rejected": -1.1042711734771729, |
|
"logps/chosen": -373.8921813964844, |
|
"logps/rejected": -264.4703369140625, |
|
"loss": 0.2755, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1737692654132843, |
|
"rewards/margins": 0.26853370666503906, |
|
"rewards/rejected": -0.44230300188064575, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1641615463459926e-06, |
|
"logits/chosen": -1.7166597843170166, |
|
"logits/rejected": -0.9484345316886902, |
|
"logps/chosen": -329.9338684082031, |
|
"logps/rejected": -262.08343505859375, |
|
"loss": 0.138, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.15403738617897034, |
|
"rewards/margins": 0.3067986071109772, |
|
"rewards/rejected": -0.4608360230922699, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.146723650296701e-06, |
|
"logits/chosen": -1.5266129970550537, |
|
"logits/rejected": -1.1601712703704834, |
|
"logps/chosen": -353.5336608886719, |
|
"logps/rejected": -334.5635681152344, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.12546080350875854, |
|
"rewards/margins": 0.4080546796321869, |
|
"rewards/rejected": -0.5335155129432678, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.129143072053639e-06, |
|
"logits/chosen": -1.7788454294204712, |
|
"logits/rejected": -1.1070853471755981, |
|
"logps/chosen": -301.1758728027344, |
|
"logps/rejected": -272.003662109375, |
|
"loss": 0.1172, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1457098424434662, |
|
"rewards/margins": 0.41548848152160645, |
|
"rewards/rejected": -0.5611982345581055, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.111421334905468e-06, |
|
"logits/chosen": -1.445004940032959, |
|
"logits/rejected": -1.095983862876892, |
|
"logps/chosen": -219.327392578125, |
|
"logps/rejected": -277.88983154296875, |
|
"loss": 0.2043, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19351212680339813, |
|
"rewards/margins": 0.2906314730644226, |
|
"rewards/rejected": -0.48414358496665955, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.093559974371725e-06, |
|
"logits/chosen": -1.4865139722824097, |
|
"logits/rejected": -1.354561686515808, |
|
"logps/chosen": -248.8349151611328, |
|
"logps/rejected": -252.2455291748047, |
|
"loss": 0.1729, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1259056180715561, |
|
"rewards/margins": 0.31513845920562744, |
|
"rewards/rejected": -0.4410440921783447, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.075560538069767e-06, |
|
"logits/chosen": -1.8343995809555054, |
|
"logits/rejected": -1.280969500541687, |
|
"logps/chosen": -254.92056274414062, |
|
"logps/rejected": -305.6126708984375, |
|
"loss": 0.1306, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1363029032945633, |
|
"rewards/margins": 0.37759169936180115, |
|
"rewards/rejected": -0.5138946175575256, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.05742458558068e-06, |
|
"logits/chosen": -1.6347767114639282, |
|
"logits/rejected": -1.1042792797088623, |
|
"logps/chosen": -292.10552978515625, |
|
"logps/rejected": -341.2361145019531, |
|
"loss": 0.1214, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.08132176101207733, |
|
"rewards/margins": 0.5133122801780701, |
|
"rewards/rejected": -0.5946341156959534, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.039153688314146e-06, |
|
"logits/chosen": -1.6722145080566406, |
|
"logits/rejected": -1.1385935544967651, |
|
"logps/chosen": -247.4423370361328, |
|
"logps/rejected": -231.4825439453125, |
|
"loss": 0.1096, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11472739279270172, |
|
"rewards/margins": 0.34514716267585754, |
|
"rewards/rejected": -0.45987454056739807, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020749429372286e-06, |
|
"logits/chosen": -1.7172428369522095, |
|
"logits/rejected": -1.2585347890853882, |
|
"logps/chosen": -310.28179931640625, |
|
"logps/rejected": -296.54071044921875, |
|
"loss": 0.1346, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11566674709320068, |
|
"rewards/margins": 0.3406696021556854, |
|
"rewards/rejected": -0.4563364088535309, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.002213403412492e-06, |
|
"logits/chosen": -1.479115605354309, |
|
"logits/rejected": -1.075224757194519, |
|
"logps/chosen": -250.99026489257812, |
|
"logps/rejected": -262.92755126953125, |
|
"loss": 0.1859, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.14899684488773346, |
|
"rewards/margins": 0.3160502314567566, |
|
"rewards/rejected": -0.46504706144332886, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.983547216509254e-06, |
|
"logits/chosen": -1.6718261241912842, |
|
"logits/rejected": -1.1382570266723633, |
|
"logps/chosen": -298.2779235839844, |
|
"logps/rejected": -322.4492492675781, |
|
"loss": 0.1162, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.099492147564888, |
|
"rewards/margins": 0.4109002947807312, |
|
"rewards/rejected": -0.510392427444458, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.964752486015001e-06, |
|
"logits/chosen": -1.741803765296936, |
|
"logits/rejected": -1.154526948928833, |
|
"logps/chosen": -322.1985778808594, |
|
"logps/rejected": -258.2569885253906, |
|
"loss": 0.1312, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.0690280944108963, |
|
"rewards/margins": 0.4529304504394531, |
|
"rewards/rejected": -0.5219585299491882, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.945830840419966e-06, |
|
"logits/chosen": -1.4246766567230225, |
|
"logits/rejected": -1.1284135580062866, |
|
"logps/chosen": -253.0252227783203, |
|
"logps/rejected": -245.19723510742188, |
|
"loss": 0.1713, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10711170732975006, |
|
"rewards/margins": 0.3032890558242798, |
|
"rewards/rejected": -0.41040077805519104, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92678391921108e-06, |
|
"logits/chosen": -1.580693006515503, |
|
"logits/rejected": -1.0569651126861572, |
|
"logps/chosen": -289.4805603027344, |
|
"logps/rejected": -289.3725280761719, |
|
"loss": 0.1539, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.08762092143297195, |
|
"rewards/margins": 0.33070939779281616, |
|
"rewards/rejected": -0.4183303415775299, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.907613372729916e-06, |
|
"logits/chosen": -1.6741529703140259, |
|
"logits/rejected": -0.962120532989502, |
|
"logps/chosen": -332.26080322265625, |
|
"logps/rejected": -344.08209228515625, |
|
"loss": 0.1367, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15168026089668274, |
|
"rewards/margins": 0.4351193308830261, |
|
"rewards/rejected": -0.5867995619773865, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.888320862029699e-06, |
|
"logits/chosen": -1.477236032485962, |
|
"logits/rejected": -1.1961562633514404, |
|
"logps/chosen": -251.10671997070312, |
|
"logps/rejected": -272.6755676269531, |
|
"loss": 0.1711, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.10620205104351044, |
|
"rewards/margins": 0.34451884031295776, |
|
"rewards/rejected": -0.450720876455307, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.868908058731376e-06, |
|
"logits/chosen": -1.5320178270339966, |
|
"logits/rejected": -1.2863932847976685, |
|
"logps/chosen": -252.96298217773438, |
|
"logps/rejected": -267.1416320800781, |
|
"loss": 0.1757, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.13874351978302002, |
|
"rewards/margins": 0.3073849678039551, |
|
"rewards/rejected": -0.4461284577846527, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.849376644878783e-06, |
|
"logits/chosen": -1.695948839187622, |
|
"logits/rejected": -1.3465334177017212, |
|
"logps/chosen": -256.7879333496094, |
|
"logps/rejected": -291.6668701171875, |
|
"loss": 0.1414, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08056484907865524, |
|
"rewards/margins": 0.3060542345046997, |
|
"rewards/rejected": -0.38661906123161316, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.829728312792895e-06, |
|
"logits/chosen": -1.4604734182357788, |
|
"logits/rejected": -1.1766102313995361, |
|
"logps/chosen": -228.4718780517578, |
|
"logps/rejected": -230.71182250976562, |
|
"loss": 0.2127, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.10572078078985214, |
|
"rewards/margins": 0.21217215061187744, |
|
"rewards/rejected": -0.3178929388523102, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8099647649251984e-06, |
|
"logits/chosen": -1.9331858158111572, |
|
"logits/rejected": -1.301509976387024, |
|
"logps/chosen": -338.3063049316406, |
|
"logps/rejected": -301.18365478515625, |
|
"loss": 0.1575, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.11515772342681885, |
|
"rewards/margins": 0.33505210280418396, |
|
"rewards/rejected": -0.4502098560333252, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.790087713710179e-06, |
|
"logits/chosen": -1.7642886638641357, |
|
"logits/rejected": -1.1480679512023926, |
|
"logps/chosen": -296.9232177734375, |
|
"logps/rejected": -282.36151123046875, |
|
"loss": 0.1711, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.017070520669221878, |
|
"rewards/margins": 0.4179867208003998, |
|
"rewards/rejected": -0.43505725264549255, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.770098881416945e-06, |
|
"logits/chosen": -1.6907027959823608, |
|
"logits/rejected": -1.1980526447296143, |
|
"logps/chosen": -303.41778564453125, |
|
"logps/rejected": -332.55352783203125, |
|
"loss": 0.2434, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10351963341236115, |
|
"rewards/margins": 0.2676793932914734, |
|
"rewards/rejected": -0.37119898200035095, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -1.7876999378204346, |
|
"logits/rejected": -1.3319778442382812, |
|
"logps/chosen": -264.2928466796875, |
|
"logps/rejected": -257.09503173828125, |
|
"loss": 0.1408, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.0641285628080368, |
|
"rewards/margins": 0.3521239161491394, |
|
"rewards/rejected": -0.4162525236606598, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7297928109491765e-06, |
|
"logits/chosen": -1.5949997901916504, |
|
"logits/rejected": -1.1277220249176025, |
|
"logps/chosen": -234.57373046875, |
|
"logps/rejected": -195.37899780273438, |
|
"loss": 0.2328, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13893568515777588, |
|
"rewards/margins": 0.26205331087112427, |
|
"rewards/rejected": -0.40098896622657776, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.7094790651387414e-06, |
|
"logits/chosen": -1.6848033666610718, |
|
"logits/rejected": -1.2798172235488892, |
|
"logps/chosen": -236.25436401367188, |
|
"logps/rejected": -269.56182861328125, |
|
"loss": 0.2051, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09305624663829803, |
|
"rewards/margins": 0.26189345121383667, |
|
"rewards/rejected": -0.3549497723579407, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.689060522675689e-06, |
|
"logits/chosen": -1.6376606225967407, |
|
"logits/rejected": -1.0957934856414795, |
|
"logps/chosen": -259.7792663574219, |
|
"logps/rejected": -254.48489379882812, |
|
"loss": 0.1368, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.09541145712137222, |
|
"rewards/margins": 0.31637534499168396, |
|
"rewards/rejected": -0.4117867946624756, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.668538952747236e-06, |
|
"logits/chosen": -1.534891128540039, |
|
"logits/rejected": -1.1751198768615723, |
|
"logps/chosen": -208.9001007080078, |
|
"logps/rejected": -289.44873046875, |
|
"loss": 0.1984, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.031213950365781784, |
|
"rewards/margins": 0.39292722940444946, |
|
"rewards/rejected": -0.42414116859436035, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6479161334675294e-06, |
|
"logits/chosen": -1.7336101531982422, |
|
"logits/rejected": -1.0565919876098633, |
|
"logps/chosen": -326.5076599121094, |
|
"logps/rejected": -337.8768005371094, |
|
"loss": 0.1143, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.03437976911664009, |
|
"rewards/margins": 0.49583953619003296, |
|
"rewards/rejected": -0.46145981550216675, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.627193851723577e-06, |
|
"logits/chosen": -1.7617276906967163, |
|
"logits/rejected": -0.987285315990448, |
|
"logps/chosen": -256.71514892578125, |
|
"logps/rejected": -230.3134002685547, |
|
"loss": 0.1108, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.02552700974047184, |
|
"rewards/margins": 0.3413180708885193, |
|
"rewards/rejected": -0.3668450117111206, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6063739030204226e-06, |
|
"logits/chosen": -1.606143593788147, |
|
"logits/rejected": -1.3145328760147095, |
|
"logps/chosen": -222.5089874267578, |
|
"logps/rejected": -284.4768981933594, |
|
"loss": 0.1263, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.04628473147749901, |
|
"rewards/margins": 0.3009468913078308, |
|
"rewards/rejected": -0.3472316563129425, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5854580913255706e-06, |
|
"logits/chosen": -1.6054519414901733, |
|
"logits/rejected": -1.0964720249176025, |
|
"logps/chosen": -284.88446044921875, |
|
"logps/rejected": -234.81417846679688, |
|
"loss": 0.1716, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.049481119960546494, |
|
"rewards/margins": 0.32506829500198364, |
|
"rewards/rejected": -0.37454938888549805, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.564448228912682e-06, |
|
"logits/chosen": -1.707132339477539, |
|
"logits/rejected": -1.1808102130889893, |
|
"logps/chosen": -215.00680541992188, |
|
"logps/rejected": -242.06338500976562, |
|
"loss": 0.1289, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.016758020967245102, |
|
"rewards/margins": 0.34254926443099976, |
|
"rewards/rejected": -0.35930731892585754, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543346136204545e-06, |
|
"logits/chosen": -1.423949122428894, |
|
"logits/rejected": -0.9146944284439087, |
|
"logps/chosen": -251.60986328125, |
|
"logps/rejected": -246.3702850341797, |
|
"loss": 0.1596, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.025834297761321068, |
|
"rewards/margins": 0.4158453941345215, |
|
"rewards/rejected": -0.39001110196113586, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.522153641615345e-06, |
|
"logits/chosen": -1.6994764804840088, |
|
"logits/rejected": -1.346040964126587, |
|
"logps/chosen": -256.00592041015625, |
|
"logps/rejected": -256.01300048828125, |
|
"loss": 0.1082, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.021267935633659363, |
|
"rewards/margins": 0.4258691370487213, |
|
"rewards/rejected": -0.40460118651390076, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5008725813922383e-06, |
|
"logits/chosen": -1.6546297073364258, |
|
"logits/rejected": -1.3536561727523804, |
|
"logps/chosen": -195.32785034179688, |
|
"logps/rejected": -239.7685546875, |
|
"loss": 0.1337, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.07065526396036148, |
|
"rewards/margins": 0.36889463663101196, |
|
"rewards/rejected": -0.2982393801212311, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.4795047994562463e-06, |
|
"logits/chosen": -1.8300390243530273, |
|
"logits/rejected": -1.308199167251587, |
|
"logps/chosen": -241.9082489013672, |
|
"logps/rejected": -253.09469604492188, |
|
"loss": 0.1305, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.07730422914028168, |
|
"rewards/margins": 0.380765438079834, |
|
"rewards/rejected": -0.3034612536430359, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.458052147242494e-06, |
|
"logits/chosen": -1.8392903804779053, |
|
"logits/rejected": -1.4302622079849243, |
|
"logps/chosen": -275.21636962890625, |
|
"logps/rejected": -269.1678161621094, |
|
"loss": 0.1645, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.058570653200149536, |
|
"rewards/margins": 0.3049880862236023, |
|
"rewards/rejected": -0.24641743302345276, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.436516483539781e-06, |
|
"logits/chosen": -1.8582258224487305, |
|
"logits/rejected": -1.2484705448150635, |
|
"logps/chosen": -289.6882019042969, |
|
"logps/rejected": -296.31927490234375, |
|
"loss": 0.1642, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.04015364870429039, |
|
"rewards/margins": 0.35936877131462097, |
|
"rewards/rejected": -0.3192150890827179, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4148996743295305e-06, |
|
"logits/chosen": -1.3779845237731934, |
|
"logits/rejected": -0.940921425819397, |
|
"logps/chosen": -277.53009033203125, |
|
"logps/rejected": -309.68157958984375, |
|
"loss": 0.1488, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.04079504683613777, |
|
"rewards/margins": 0.37808963656425476, |
|
"rewards/rejected": -0.3372945785522461, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3932035926241103e-06, |
|
"logits/chosen": -1.6403146982192993, |
|
"logits/rejected": -1.2490508556365967, |
|
"logps/chosen": -270.91436767578125, |
|
"logps/rejected": -305.6242370605469, |
|
"loss": 0.121, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.047386664897203445, |
|
"rewards/margins": 0.4330657422542572, |
|
"rewards/rejected": -0.38567906618118286, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3714301183045382e-06, |
|
"logits/chosen": -1.4912313222885132, |
|
"logits/rejected": -1.1307358741760254, |
|
"logps/chosen": -237.8841552734375, |
|
"logps/rejected": -263.740966796875, |
|
"loss": 0.15, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.04139203205704689, |
|
"rewards/margins": 0.30756354331970215, |
|
"rewards/rejected": -0.26617151498794556, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.349581137957604e-06, |
|
"logits/chosen": -1.5840933322906494, |
|
"logits/rejected": -1.2103346586227417, |
|
"logps/chosen": -267.5755920410156, |
|
"logps/rejected": -250.2428741455078, |
|
"loss": 0.1769, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.055591464042663574, |
|
"rewards/margins": 0.32637280225753784, |
|
"rewards/rejected": -0.27078136801719666, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3276585447123957e-06, |
|
"logits/chosen": -1.5938819646835327, |
|
"logits/rejected": -1.0726745128631592, |
|
"logps/chosen": -224.1567840576172, |
|
"logps/rejected": -237.98696899414062, |
|
"loss": 0.1735, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.08412063121795654, |
|
"rewards/margins": 0.37582629919052124, |
|
"rewards/rejected": -0.2917056381702423, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3056642380762783e-06, |
|
"logits/chosen": -1.77345871925354, |
|
"logits/rejected": -1.088060736656189, |
|
"logps/chosen": -291.2659606933594, |
|
"logps/rejected": -249.79153442382812, |
|
"loss": 0.118, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.06221754476428032, |
|
"rewards/margins": 0.37834566831588745, |
|
"rewards/rejected": -0.31612807512283325, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2836001237702993e-06, |
|
"logits/chosen": -1.7594726085662842, |
|
"logits/rejected": -1.1787726879119873, |
|
"logps/chosen": -252.16256713867188, |
|
"logps/rejected": -276.36224365234375, |
|
"loss": 0.1346, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.03996184095740318, |
|
"rewards/margins": 0.4077722430229187, |
|
"rewards/rejected": -0.36781036853790283, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2614681135640696e-06, |
|
"logits/chosen": -1.6873159408569336, |
|
"logits/rejected": -1.11794114112854, |
|
"logps/chosen": -331.9217529296875, |
|
"logps/rejected": -301.14129638671875, |
|
"loss": 0.1049, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.008476448245346546, |
|
"rewards/margins": 0.42262354493141174, |
|
"rewards/rejected": -0.4310999810695648, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2392701251101172e-06, |
|
"logits/chosen": -1.977207899093628, |
|
"logits/rejected": -1.3080086708068848, |
|
"logps/chosen": -304.4135437011719, |
|
"logps/rejected": -268.8409729003906, |
|
"loss": 0.1299, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.01740439608693123, |
|
"rewards/margins": 0.3875492513179779, |
|
"rewards/rejected": -0.3701448440551758, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.217008081777726e-06, |
|
"logits/chosen": -1.6914336681365967, |
|
"logits/rejected": -1.2114366292953491, |
|
"logps/chosen": -291.5057067871094, |
|
"logps/rejected": -306.0625, |
|
"loss": 0.1202, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.02021079882979393, |
|
"rewards/margins": 0.4694185256958008, |
|
"rewards/rejected": -0.44920778274536133, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1946839124862873e-06, |
|
"logits/chosen": -1.695512056350708, |
|
"logits/rejected": -1.1958303451538086, |
|
"logps/chosen": -275.1326599121094, |
|
"logps/rejected": -306.881591796875, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.05711999535560608, |
|
"rewards/margins": 0.4282289147377014, |
|
"rewards/rejected": -0.4853488802909851, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1722995515381644e-06, |
|
"logits/chosen": -1.6805217266082764, |
|
"logits/rejected": -1.306983232498169, |
|
"logps/chosen": -227.75039672851562, |
|
"logps/rejected": -306.4040222167969, |
|
"loss": 0.139, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.028795797377824783, |
|
"rewards/margins": 0.3543476462364197, |
|
"rewards/rejected": -0.38314345479011536, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.149856938451094e-06, |
|
"logits/chosen": -1.7246391773223877, |
|
"logits/rejected": -1.169914960861206, |
|
"logps/chosen": -301.69305419921875, |
|
"logps/rejected": -275.41375732421875, |
|
"loss": 0.1121, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.026128137484192848, |
|
"rewards/margins": 0.3503456711769104, |
|
"rewards/rejected": -0.3242174983024597, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.127358017790132e-06, |
|
"logits/chosen": -1.6676828861236572, |
|
"logits/rejected": -1.0200117826461792, |
|
"logps/chosen": -245.4638214111328, |
|
"logps/rejected": -210.855224609375, |
|
"loss": 0.1885, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.012035062536597252, |
|
"rewards/margins": 0.33619141578674316, |
|
"rewards/rejected": -0.32415634393692017, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"logits/chosen": -1.7602665424346924, |
|
"logits/rejected": -1.1496754884719849, |
|
"logps/chosen": -310.8785095214844, |
|
"logps/rejected": -265.19781494140625, |
|
"loss": 0.1373, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.04427367448806763, |
|
"rewards/margins": 0.34181416034698486, |
|
"rewards/rejected": -0.29754048585891724, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.082199056232015e-06, |
|
"logits/chosen": -1.7544816732406616, |
|
"logits/rejected": -1.1616899967193604, |
|
"logps/chosen": -374.69097900390625, |
|
"logps/rejected": -291.6187744140625, |
|
"loss": 0.1528, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.029999136924743652, |
|
"rewards/margins": 0.37635478377342224, |
|
"rewards/rejected": -0.346355676651001, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.059542928183079e-06, |
|
"logits/chosen": -1.6221444606781006, |
|
"logits/rejected": -1.0722931623458862, |
|
"logps/chosen": -284.0110168457031, |
|
"logps/rejected": -225.7612762451172, |
|
"loss": 0.1678, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0024113513063639402, |
|
"rewards/margins": 0.3399294316768646, |
|
"rewards/rejected": -0.34234076738357544, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0368383179176584e-06, |
|
"logits/chosen": -1.583720088005066, |
|
"logits/rejected": -0.8212550282478333, |
|
"logps/chosen": -322.15496826171875, |
|
"logps/rejected": -311.7652282714844, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.02223331294953823, |
|
"rewards/margins": 0.37121888995170593, |
|
"rewards/rejected": -0.34898558259010315, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0140871927018466e-06, |
|
"logits/chosen": -1.6727044582366943, |
|
"logits/rejected": -1.3792378902435303, |
|
"logps/chosen": -273.42938232421875, |
|
"logps/rejected": -241.69912719726562, |
|
"loss": 0.1723, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.006121881306171417, |
|
"rewards/margins": 0.2609279155731201, |
|
"rewards/rejected": -0.2548060119152069, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9912915238320755e-06, |
|
"logits/chosen": -1.4808580875396729, |
|
"logits/rejected": -1.312792181968689, |
|
"logps/chosen": -254.2381134033203, |
|
"logps/rejected": -339.28192138671875, |
|
"loss": 0.1501, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.029569197446107864, |
|
"rewards/margins": 0.403090238571167, |
|
"rewards/rejected": -0.43265944719314575, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9684532864643123e-06, |
|
"logits/chosen": -1.459582805633545, |
|
"logits/rejected": -1.37647545337677, |
|
"logps/chosen": -223.28524780273438, |
|
"logps/rejected": -268.646484375, |
|
"loss": 0.2247, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.08847669512033463, |
|
"rewards/margins": 0.2632191777229309, |
|
"rewards/rejected": -0.3516958951950073, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.945574459442917e-06, |
|
"logits/chosen": -1.753136396408081, |
|
"logits/rejected": -1.0855618715286255, |
|
"logps/chosen": -294.66461181640625, |
|
"logps/rejected": -320.9414978027344, |
|
"loss": 0.1462, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.02983088418841362, |
|
"rewards/margins": 0.34618809819221497, |
|
"rewards/rejected": -0.3760189414024353, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.922657025129185e-06, |
|
"logits/chosen": -1.909767508506775, |
|
"logits/rejected": -1.1421029567718506, |
|
"logps/chosen": -255.0342559814453, |
|
"logps/rejected": -262.11151123046875, |
|
"loss": 0.0831, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.0237131267786026, |
|
"rewards/margins": 0.4205241799354553, |
|
"rewards/rejected": -0.3968110680580139, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8997029692295875e-06, |
|
"logits/chosen": -1.6980245113372803, |
|
"logits/rejected": -1.4830033779144287, |
|
"logps/chosen": -216.7667694091797, |
|
"logps/rejected": -314.1409912109375, |
|
"loss": 0.1811, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03470195457339287, |
|
"rewards/margins": 0.33328303694725037, |
|
"rewards/rejected": -0.3679850101470947, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.876714280623708e-06, |
|
"logits/chosen": -1.5797358751296997, |
|
"logits/rejected": -1.1642903089523315, |
|
"logps/chosen": -249.17001342773438, |
|
"logps/rejected": -283.16326904296875, |
|
"loss": 0.1434, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.01058603823184967, |
|
"rewards/margins": 0.3307330906391144, |
|
"rewards/rejected": -0.32014700770378113, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8536929511919227e-06, |
|
"logits/chosen": -1.848733901977539, |
|
"logits/rejected": -1.0242502689361572, |
|
"logps/chosen": -324.2806091308594, |
|
"logps/rejected": -243.5651092529297, |
|
"loss": 0.118, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.039272844791412354, |
|
"rewards/margins": 0.3580833077430725, |
|
"rewards/rejected": -0.31881046295166016, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8306409756428067e-06, |
|
"logits/chosen": -1.5901503562927246, |
|
"logits/rejected": -1.3664997816085815, |
|
"logps/chosen": -298.6604309082031, |
|
"logps/rejected": -299.3940734863281, |
|
"loss": 0.122, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.025997215881943703, |
|
"rewards/margins": 0.39433664083480835, |
|
"rewards/rejected": -0.368339478969574, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.807560351340302e-06, |
|
"logits/chosen": -1.6624196767807007, |
|
"logits/rejected": -1.1935114860534668, |
|
"logps/chosen": -271.36151123046875, |
|
"logps/rejected": -300.11187744140625, |
|
"loss": 0.134, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.02754376269876957, |
|
"rewards/margins": 0.36226534843444824, |
|
"rewards/rejected": -0.33472156524658203, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7844530781306544e-06, |
|
"logits/chosen": -1.7045570611953735, |
|
"logits/rejected": -0.918795108795166, |
|
"logps/chosen": -334.0121154785156, |
|
"logps/rejected": -271.56024169921875, |
|
"loss": 0.134, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.007435244508087635, |
|
"rewards/margins": 0.3523639440536499, |
|
"rewards/rejected": -0.3597991466522217, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.761321158169134e-06, |
|
"logits/chosen": -1.7817418575286865, |
|
"logits/rejected": -1.3035436868667603, |
|
"logps/chosen": -290.12249755859375, |
|
"logps/rejected": -248.0795135498047, |
|
"loss": 0.1305, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.0643903911113739, |
|
"rewards/margins": 0.3860814571380615, |
|
"rewards/rejected": -0.32169100642204285, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.738166595746554e-06, |
|
"logits/chosen": -1.5566781759262085, |
|
"logits/rejected": -1.3110841512680054, |
|
"logps/chosen": -178.944091796875, |
|
"logps/rejected": -215.97299194335938, |
|
"loss": 0.1811, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.07068820297718048, |
|
"rewards/margins": 0.3479788601398468, |
|
"rewards/rejected": -0.2772907018661499, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.7149913971156105e-06, |
|
"logits/chosen": -1.5236642360687256, |
|
"logits/rejected": -1.2001490592956543, |
|
"logps/chosen": -231.1402130126953, |
|
"logps/rejected": -246.59591674804688, |
|
"loss": 0.1994, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.03314649313688278, |
|
"rewards/margins": 0.3443582057952881, |
|
"rewards/rejected": -0.3112117052078247, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6917975703170466e-06, |
|
"logits/chosen": -1.6045347452163696, |
|
"logits/rejected": -1.246459722518921, |
|
"logps/chosen": -213.5222625732422, |
|
"logps/rejected": -257.655029296875, |
|
"loss": 0.1179, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.06492890417575836, |
|
"rewards/margins": 0.34639525413513184, |
|
"rewards/rejected": -0.2814663350582123, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.668587125005663e-06, |
|
"logits/chosen": -1.5607236623764038, |
|
"logits/rejected": -1.0532560348510742, |
|
"logps/chosen": -249.9466094970703, |
|
"logps/rejected": -285.72845458984375, |
|
"loss": 0.1563, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.05708497762680054, |
|
"rewards/margins": 0.33061715960502625, |
|
"rewards/rejected": -0.2735321819782257, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6453620722761897e-06, |
|
"logits/chosen": -1.512621521949768, |
|
"logits/rejected": -1.4036608934402466, |
|
"logps/chosen": -217.87484741210938, |
|
"logps/rejected": -261.9187316894531, |
|
"loss": 0.176, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.025976067408919334, |
|
"rewards/margins": 0.3155084252357483, |
|
"rewards/rejected": -0.3414844870567322, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6221244244890336e-06, |
|
"logits/chosen": -1.547397494316101, |
|
"logits/rejected": -1.2165327072143555, |
|
"logps/chosen": -319.05810546875, |
|
"logps/rejected": -346.9829406738281, |
|
"loss": 0.1403, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.00498613715171814, |
|
"rewards/margins": 0.37501031160354614, |
|
"rewards/rejected": -0.370024174451828, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5988761950959133e-06, |
|
"logits/chosen": -1.7019367218017578, |
|
"logits/rejected": -1.3284026384353638, |
|
"logps/chosen": -230.9080047607422, |
|
"logps/rejected": -242.2611846923828, |
|
"loss": 0.2393, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.054250530898571014, |
|
"rewards/margins": 0.22886402904987335, |
|
"rewards/rejected": -0.28311458230018616, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.575619398465402e-06, |
|
"logits/chosen": -1.6088831424713135, |
|
"logits/rejected": -1.3118395805358887, |
|
"logps/chosen": -209.9443817138672, |
|
"logps/rejected": -267.1958312988281, |
|
"loss": 0.2087, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.013560935854911804, |
|
"rewards/margins": 0.3352377414703369, |
|
"rewards/rejected": -0.3216767907142639, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5523560497083927e-06, |
|
"logits/chosen": -1.6162147521972656, |
|
"logits/rejected": -1.2510854005813599, |
|
"logps/chosen": -231.2008819580078, |
|
"logps/rejected": -321.4208068847656, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.03159638121724129, |
|
"rewards/margins": 0.33668285608291626, |
|
"rewards/rejected": -0.30508649349212646, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5290881645034932e-06, |
|
"logits/chosen": -1.6034374237060547, |
|
"logits/rejected": -1.2775676250457764, |
|
"logps/chosen": -293.4057312011719, |
|
"logps/rejected": -363.25360107421875, |
|
"loss": 0.1661, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.00653398921713233, |
|
"rewards/margins": 0.32143646478652954, |
|
"rewards/rejected": -0.3279704451560974, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5058177589223766e-06, |
|
"logits/chosen": -1.5212050676345825, |
|
"logits/rejected": -1.210669755935669, |
|
"logps/chosen": -240.904296875, |
|
"logps/rejected": -274.26605224609375, |
|
"loss": 0.1595, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.03204118087887764, |
|
"rewards/margins": 0.3350418508052826, |
|
"rewards/rejected": -0.3030007481575012, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.482546849255096e-06, |
|
"logits/chosen": -1.7197357416152954, |
|
"logits/rejected": -1.0405242443084717, |
|
"logps/chosen": -314.89984130859375, |
|
"logps/rejected": -287.49224853515625, |
|
"loss": 0.0899, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.03998409956693649, |
|
"rewards/margins": 0.43929505348205566, |
|
"rewards/rejected": -0.39931100606918335, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4592774518353858e-06, |
|
"logits/chosen": -1.688256025314331, |
|
"logits/rejected": -1.1770254373550415, |
|
"logps/chosen": -238.84921264648438, |
|
"logps/rejected": -256.46063232421875, |
|
"loss": 0.142, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.046583134680986404, |
|
"rewards/margins": 0.42613476514816284, |
|
"rewards/rejected": -0.37955164909362793, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.436011582865945e-06, |
|
"logits/chosen": -1.4531993865966797, |
|
"logits/rejected": -0.9476866722106934, |
|
"logps/chosen": -272.5257568359375, |
|
"logps/rejected": -301.6541442871094, |
|
"loss": 0.107, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.08881069719791412, |
|
"rewards/margins": 0.4413018226623535, |
|
"rewards/rejected": -0.352491170167923, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"logits/chosen": -1.719260811805725, |
|
"logits/rejected": -1.2589495182037354, |
|
"logps/chosen": -293.53753662109375, |
|
"logps/rejected": -286.5969543457031, |
|
"loss": 0.1363, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.01633891463279724, |
|
"rewards/margins": 0.3913685381412506, |
|
"rewards/rejected": -0.37502965331077576, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3894984933853734e-06, |
|
"logits/chosen": -1.6526820659637451, |
|
"logits/rejected": -1.3534865379333496, |
|
"logps/chosen": -206.388671875, |
|
"logps/rejected": -241.95187377929688, |
|
"loss": 0.1525, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.007084892597049475, |
|
"rewards/margins": 0.36700910329818726, |
|
"rewards/rejected": -0.35992416739463806, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.366255303052377e-06, |
|
"logits/chosen": -1.6136153936386108, |
|
"logits/rejected": -1.188907504081726, |
|
"logps/chosen": -286.76373291015625, |
|
"logps/rejected": -252.4059600830078, |
|
"loss": 0.1859, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.005851163994520903, |
|
"rewards/margins": 0.316637247800827, |
|
"rewards/rejected": -0.31078606843948364, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits/chosen": -1.654313325881958, |
|
"logits/rejected": -1.2479978799819946, |
|
"logps/chosen": -226.00390625, |
|
"logps/rejected": -232.9287109375, |
|
"loss": 0.1541, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.08312289416790009, |
|
"rewards/margins": 0.3975786864757538, |
|
"rewards/rejected": -0.3144558072090149, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.319805700686257e-06, |
|
"logits/chosen": -1.6342413425445557, |
|
"logits/rejected": -1.109515905380249, |
|
"logps/chosen": -234.4524383544922, |
|
"logps/rejected": -233.2206573486328, |
|
"loss": 0.1168, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.08855343610048294, |
|
"rewards/margins": 0.38437074422836304, |
|
"rewards/rejected": -0.2958173155784607, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.296603313330355e-06, |
|
"logits/chosen": -1.8358131647109985, |
|
"logits/rejected": -1.035390019416809, |
|
"logps/chosen": -394.54071044921875, |
|
"logps/rejected": -347.41680908203125, |
|
"loss": 0.2742, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.042184554040431976, |
|
"rewards/margins": 0.3534103333950043, |
|
"rewards/rejected": -0.3112257719039917, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2734185495055503e-06, |
|
"logits/chosen": -1.5863606929779053, |
|
"logits/rejected": -1.3380165100097656, |
|
"logps/chosen": -271.6787414550781, |
|
"logps/rejected": -278.33502197265625, |
|
"loss": 0.1506, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.020747099071741104, |
|
"rewards/margins": 0.34211981296539307, |
|
"rewards/rejected": -0.32137271761894226, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.250253418081373e-06, |
|
"logits/chosen": -1.695081353187561, |
|
"logits/rejected": -1.1732103824615479, |
|
"logps/chosen": -280.98675537109375, |
|
"logps/rejected": -266.2809143066406, |
|
"loss": 0.1702, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.008993232622742653, |
|
"rewards/margins": 0.3143147826194763, |
|
"rewards/rejected": -0.3053215444087982, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.22710992622628e-06, |
|
"logits/chosen": -1.6708568334579468, |
|
"logits/rejected": -1.061140775680542, |
|
"logps/chosen": -281.99456787109375, |
|
"logps/rejected": -249.1426544189453, |
|
"loss": 0.1246, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.04111826419830322, |
|
"rewards/margins": 0.35372304916381836, |
|
"rewards/rejected": -0.31260478496551514, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2039900792337477e-06, |
|
"logits/chosen": -1.6424753665924072, |
|
"logits/rejected": -1.2045528888702393, |
|
"logps/chosen": -228.9128875732422, |
|
"logps/rejected": -266.5892028808594, |
|
"loss": 0.1208, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.0656760185956955, |
|
"rewards/margins": 0.4023992121219635, |
|
"rewards/rejected": -0.3367232382297516, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1808958803485134e-06, |
|
"logits/chosen": -1.7162901163101196, |
|
"logits/rejected": -1.1374019384384155, |
|
"logps/chosen": -264.79522705078125, |
|
"logps/rejected": -279.41595458984375, |
|
"loss": 0.1364, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.055478811264038086, |
|
"rewards/margins": 0.4204636216163635, |
|
"rewards/rejected": -0.36498481035232544, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.157829330593008e-06, |
|
"logits/chosen": -1.537788987159729, |
|
"logits/rejected": -1.129476547241211, |
|
"logps/chosen": -278.3310241699219, |
|
"logps/rejected": -257.82232666015625, |
|
"loss": 0.1514, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.022755134850740433, |
|
"rewards/margins": 0.32495397329330444, |
|
"rewards/rejected": -0.34770917892456055, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.134792428593971e-06, |
|
"logits/chosen": -1.6505171060562134, |
|
"logits/rejected": -1.1392511129379272, |
|
"logps/chosen": -315.64190673828125, |
|
"logps/rejected": -261.23101806640625, |
|
"loss": 0.1454, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.04185132309794426, |
|
"rewards/margins": 0.35440436005592346, |
|
"rewards/rejected": -0.3125530779361725, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1117871704092818e-06, |
|
"logits/chosen": -1.822789192199707, |
|
"logits/rejected": -1.2018978595733643, |
|
"logps/chosen": -249.8803253173828, |
|
"logps/rejected": -278.4112243652344, |
|
"loss": 0.1295, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.07924628257751465, |
|
"rewards/margins": 0.4235721528530121, |
|
"rewards/rejected": -0.3443259298801422, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0888155493550027e-06, |
|
"logits/chosen": -1.7266429662704468, |
|
"logits/rejected": -1.0663232803344727, |
|
"logps/chosen": -269.5586853027344, |
|
"logps/rejected": -230.1761474609375, |
|
"loss": 0.1131, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.0925656110048294, |
|
"rewards/margins": 0.38963964581489563, |
|
"rewards/rejected": -0.2970740795135498, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -1.8484570980072021, |
|
"logits/rejected": -1.2969257831573486, |
|
"logps/chosen": -248.79464721679688, |
|
"logps/rejected": -239.7518768310547, |
|
"loss": 0.1618, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.05543617531657219, |
|
"rewards/margins": 0.3949527144432068, |
|
"rewards/rejected": -0.3395165801048279, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0429811771568468e-06, |
|
"logits/chosen": -1.5278081893920898, |
|
"logits/rejected": -1.0012762546539307, |
|
"logps/chosen": -280.8786315917969, |
|
"logps/rejected": -276.75689697265625, |
|
"loss": 0.0882, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.07547347992658615, |
|
"rewards/margins": 0.4585336148738861, |
|
"rewards/rejected": -0.38306012749671936, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0201223973828917e-06, |
|
"logits/chosen": -1.6746526956558228, |
|
"logits/rejected": -1.205840826034546, |
|
"logps/chosen": -274.74298095703125, |
|
"logps/rejected": -298.1522216796875, |
|
"loss": 0.1112, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.032690681517124176, |
|
"rewards/margins": 0.4341716170310974, |
|
"rewards/rejected": -0.4014809727668762, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.997305197135089e-06, |
|
"logits/chosen": -1.420925259590149, |
|
"logits/rejected": -1.1142023801803589, |
|
"logps/chosen": -235.92758178710938, |
|
"logps/rejected": -251.2263641357422, |
|
"loss": 0.1505, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.04943353682756424, |
|
"rewards/margins": 0.3853815197944641, |
|
"rewards/rejected": -0.33594799041748047, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9745315534350157e-06, |
|
"logits/chosen": -1.5054067373275757, |
|
"logits/rejected": -1.0819201469421387, |
|
"logps/chosen": -313.4949951171875, |
|
"logps/rejected": -295.75164794921875, |
|
"loss": 0.1577, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.1620264053344727e-05, |
|
"rewards/margins": 0.33664727210998535, |
|
"rewards/rejected": -0.3366788923740387, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9518034395302413e-06, |
|
"logits/chosen": -1.6651229858398438, |
|
"logits/rejected": -1.2321889400482178, |
|
"logps/chosen": -296.817138671875, |
|
"logps/rejected": -304.33294677734375, |
|
"loss": 0.1157, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.07290974259376526, |
|
"rewards/margins": 0.38127079606056213, |
|
"rewards/rejected": -0.3083610236644745, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9291228247233607e-06, |
|
"logits/chosen": -1.7819957733154297, |
|
"logits/rejected": -1.2075005769729614, |
|
"logps/chosen": -259.632080078125, |
|
"logps/rejected": -265.5983581542969, |
|
"loss": 0.1464, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.04883255437016487, |
|
"rewards/margins": 0.39770543575286865, |
|
"rewards/rejected": -0.3488728702068329, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9064916742013515e-06, |
|
"logits/chosen": -1.7982776165008545, |
|
"logits/rejected": -1.0426654815673828, |
|
"logps/chosen": -284.5262756347656, |
|
"logps/rejected": -271.22930908203125, |
|
"loss": 0.1141, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.08063653856515884, |
|
"rewards/margins": 0.4655955731868744, |
|
"rewards/rejected": -0.38495901226997375, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.883911948865306e-06, |
|
"logits/chosen": -1.424214243888855, |
|
"logits/rejected": -1.0795605182647705, |
|
"logps/chosen": -344.02264404296875, |
|
"logps/rejected": -331.96990966796875, |
|
"loss": 0.3091, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.012640709057450294, |
|
"rewards/margins": 0.2547581195831299, |
|
"rewards/rejected": -0.24211737513542175, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8613856051605242e-06, |
|
"logits/chosen": -1.6110725402832031, |
|
"logits/rejected": -1.2309287786483765, |
|
"logps/chosen": -276.41448974609375, |
|
"logps/rejected": -281.2205505371094, |
|
"loss": 0.1099, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.046147290617227554, |
|
"rewards/margins": 0.3609221577644348, |
|
"rewards/rejected": -0.31477484107017517, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8389145949069953e-06, |
|
"logits/chosen": -1.9351005554199219, |
|
"logits/rejected": -1.308622121810913, |
|
"logps/chosen": -283.0077209472656, |
|
"logps/rejected": -286.6006164550781, |
|
"loss": 0.1261, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.1078963652253151, |
|
"rewards/margins": 0.4045773446559906, |
|
"rewards/rejected": -0.2966809570789337, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.816500865130279e-06, |
|
"logits/chosen": -1.6536674499511719, |
|
"logits/rejected": -1.3896677494049072, |
|
"logps/chosen": -203.09043884277344, |
|
"logps/rejected": -169.11422729492188, |
|
"loss": 0.1889, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.039239563047885895, |
|
"rewards/margins": 0.27253058552742004, |
|
"rewards/rejected": -0.23329100012779236, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7941463578928088e-06, |
|
"logits/chosen": -1.620586633682251, |
|
"logits/rejected": -1.1168583631515503, |
|
"logps/chosen": -280.3619079589844, |
|
"logps/rejected": -252.0579071044922, |
|
"loss": 0.1894, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.06918063014745712, |
|
"rewards/margins": 0.3512100577354431, |
|
"rewards/rejected": -0.2820294499397278, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7718530101256115e-06, |
|
"logits/chosen": -1.887487769126892, |
|
"logits/rejected": -1.241081714630127, |
|
"logps/chosen": -284.033935546875, |
|
"logps/rejected": -275.2795104980469, |
|
"loss": 0.1358, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.09624558687210083, |
|
"rewards/margins": 0.3296979069709778, |
|
"rewards/rejected": -0.23345234990119934, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7496227534604859e-06, |
|
"logits/chosen": -1.785048246383667, |
|
"logits/rejected": -1.2693895101547241, |
|
"logps/chosen": -263.9631042480469, |
|
"logps/rejected": -324.4400634765625, |
|
"loss": 0.1713, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.08735568076372147, |
|
"rewards/margins": 0.380867063999176, |
|
"rewards/rejected": -0.29351136088371277, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -1.5414637327194214, |
|
"logits/rejected": -0.9286526441574097, |
|
"logps/chosen": -300.5523986816406, |
|
"logps/rejected": -227.65713500976562, |
|
"loss": 0.1229, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.10164747387170792, |
|
"rewards/margins": 0.3701394498348236, |
|
"rewards/rejected": -0.2684919834136963, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7053592124637557e-06, |
|
"logits/chosen": -1.7873646020889282, |
|
"logits/rejected": -1.1277539730072021, |
|
"logps/chosen": -322.7608337402344, |
|
"logps/rejected": -267.81787109375, |
|
"loss": 0.148, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.05880016088485718, |
|
"rewards/margins": 0.3506908118724823, |
|
"rewards/rejected": -0.2918906509876251, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6833297633956647e-06, |
|
"logits/chosen": -1.532529592514038, |
|
"logits/rejected": -1.4235907793045044, |
|
"logps/chosen": -165.92489624023438, |
|
"logps/rejected": -237.23782348632812, |
|
"loss": 0.1796, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.004235130734741688, |
|
"rewards/margins": 0.31066879630088806, |
|
"rewards/rejected": -0.30643370747566223, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.661371075624363e-06, |
|
"logits/chosen": -1.508122444152832, |
|
"logits/rejected": -1.1396775245666504, |
|
"logps/chosen": -234.5397186279297, |
|
"logps/rejected": -293.2003479003906, |
|
"loss": 0.1418, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.06259538978338242, |
|
"rewards/margins": 0.40436553955078125, |
|
"rewards/rejected": -0.3417701721191406, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6394850517846621e-06, |
|
"logits/chosen": -1.6401771306991577, |
|
"logits/rejected": -0.8912142515182495, |
|
"logps/chosen": -283.13238525390625, |
|
"logps/rejected": -221.9040985107422, |
|
"loss": 0.1623, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.08341696858406067, |
|
"rewards/margins": 0.4074879288673401, |
|
"rewards/rejected": -0.32407090067863464, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6176735882153284e-06, |
|
"logits/chosen": -1.9544626474380493, |
|
"logits/rejected": -1.3714215755462646, |
|
"logps/chosen": -302.2862548828125, |
|
"logps/rejected": -252.0078582763672, |
|
"loss": 0.1017, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.09628540277481079, |
|
"rewards/margins": 0.3641790747642517, |
|
"rewards/rejected": -0.2678936719894409, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5959385747947697e-06, |
|
"logits/chosen": -1.4549901485443115, |
|
"logits/rejected": -1.019054651260376, |
|
"logps/chosen": -258.96661376953125, |
|
"logps/rejected": -268.4969177246094, |
|
"loss": 0.1197, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.13393627107143402, |
|
"rewards/margins": 0.4325905740261078, |
|
"rewards/rejected": -0.29865431785583496, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5742818947772875e-06, |
|
"logits/chosen": -1.6369373798370361, |
|
"logits/rejected": -1.1893460750579834, |
|
"logps/chosen": -270.46185302734375, |
|
"logps/rejected": -253.77633666992188, |
|
"loss": 0.1302, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.04096044600009918, |
|
"rewards/margins": 0.3416442275047302, |
|
"rewards/rejected": -0.30068379640579224, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.552705424629898e-06, |
|
"logits/chosen": -1.5808465480804443, |
|
"logits/rejected": -1.141884446144104, |
|
"logps/chosen": -285.2702941894531, |
|
"logps/rejected": -277.57745361328125, |
|
"loss": 0.1158, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.059014301747083664, |
|
"rewards/margins": 0.3862994611263275, |
|
"rewards/rejected": -0.32728514075279236, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5312110338697427e-06, |
|
"logits/chosen": -1.755192518234253, |
|
"logits/rejected": -1.2506240606307983, |
|
"logps/chosen": -246.61972045898438, |
|
"logps/rejected": -237.236328125, |
|
"loss": 0.1112, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.06155257299542427, |
|
"rewards/margins": 0.430799663066864, |
|
"rewards/rejected": -0.3692471385002136, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.509800584902108e-06, |
|
"logits/chosen": -1.532773733139038, |
|
"logits/rejected": -1.2026466131210327, |
|
"logps/chosen": -243.93820190429688, |
|
"logps/rejected": -262.2892150878906, |
|
"loss": 0.1839, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.07341442257165909, |
|
"rewards/margins": 0.35419246554374695, |
|
"rewards/rejected": -0.28077805042266846, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4884759328590476e-06, |
|
"logits/chosen": -1.8502527475357056, |
|
"logits/rejected": -1.1260955333709717, |
|
"logps/chosen": -276.49945068359375, |
|
"logps/rejected": -261.7967529296875, |
|
"loss": 0.1949, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.07880761474370956, |
|
"rewards/margins": 0.402778685092926, |
|
"rewards/rejected": -0.32397109270095825, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.467238925438646e-06, |
|
"logits/chosen": -1.6124885082244873, |
|
"logits/rejected": -1.1764719486236572, |
|
"logps/chosen": -242.35806274414062, |
|
"logps/rejected": -281.0080261230469, |
|
"loss": 0.1401, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.05401073768734932, |
|
"rewards/margins": 0.40248528122901917, |
|
"rewards/rejected": -0.34847456216812134, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.446091402744923e-06, |
|
"logits/chosen": -1.5193445682525635, |
|
"logits/rejected": -0.925061821937561, |
|
"logps/chosen": -275.9288024902344, |
|
"logps/rejected": -256.40155029296875, |
|
"loss": 0.097, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.02620106376707554, |
|
"rewards/margins": 0.3423638343811035, |
|
"rewards/rejected": -0.31616276502609253, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4250351971283937e-06, |
|
"logits/chosen": -1.7999191284179688, |
|
"logits/rejected": -1.2691766023635864, |
|
"logps/chosen": -227.58023071289062, |
|
"logps/rejected": -259.0445556640625, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.06122167035937309, |
|
"rewards/margins": 0.41422295570373535, |
|
"rewards/rejected": -0.35300129652023315, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"logits/chosen": -1.9836969375610352, |
|
"logits/rejected": -1.2529933452606201, |
|
"logps/chosen": -243.0897216796875, |
|
"logps/rejected": -234.3355712890625, |
|
"loss": 0.1477, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.0859227403998375, |
|
"rewards/margins": 0.4090031683444977, |
|
"rewards/rejected": -0.323080450296402, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3832040268095589e-06, |
|
"logits/chosen": -1.5172902345657349, |
|
"logits/rejected": -1.0578333139419556, |
|
"logps/chosen": -260.90411376953125, |
|
"logps/rejected": -258.8797302246094, |
|
"loss": 0.1353, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.05479772016406059, |
|
"rewards/margins": 0.37679368257522583, |
|
"rewards/rejected": -0.32199597358703613, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.362432686615316e-06, |
|
"logits/chosen": -1.5748417377471924, |
|
"logits/rejected": -1.0296505689620972, |
|
"logps/chosen": -261.697021484375, |
|
"logps/rejected": -210.9502410888672, |
|
"loss": 0.1437, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.08591978251934052, |
|
"rewards/margins": 0.34443560242652893, |
|
"rewards/rejected": -0.2585158050060272, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3417599122003464e-06, |
|
"logits/chosen": -1.7466052770614624, |
|
"logits/rejected": -1.1247678995132446, |
|
"logps/chosen": -253.7733917236328, |
|
"logps/rejected": -230.14089965820312, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.10326331853866577, |
|
"rewards/margins": 0.384945809841156, |
|
"rewards/rejected": -0.28168249130249023, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3211874947800747e-06, |
|
"logits/chosen": -1.6235640048980713, |
|
"logits/rejected": -1.2449982166290283, |
|
"logps/chosen": -286.81756591796875, |
|
"logps/rejected": -286.7332763671875, |
|
"loss": 0.2038, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.03231514245271683, |
|
"rewards/margins": 0.31858521699905396, |
|
"rewards/rejected": -0.2862700819969177, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3007172168743854e-06, |
|
"logits/chosen": -1.9692258834838867, |
|
"logits/rejected": -1.3329085111618042, |
|
"logps/chosen": -256.65008544921875, |
|
"logps/rejected": -220.8751678466797, |
|
"loss": 0.1944, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.06459324806928635, |
|
"rewards/margins": 0.3410438299179077, |
|
"rewards/rejected": -0.27645057439804077, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.280350852153168e-06, |
|
"logits/chosen": -1.6254962682724, |
|
"logits/rejected": -1.4488201141357422, |
|
"logps/chosen": -212.2681427001953, |
|
"logps/rejected": -273.9403076171875, |
|
"loss": 0.1321, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.09227583557367325, |
|
"rewards/margins": 0.3278459906578064, |
|
"rewards/rejected": -0.23557014763355255, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.260090165282645e-06, |
|
"logits/chosen": -1.6674009561538696, |
|
"logits/rejected": -1.145491361618042, |
|
"logps/chosen": -273.3908996582031, |
|
"logps/rejected": -231.9703369140625, |
|
"loss": 0.189, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.08204850554466248, |
|
"rewards/margins": 0.3708895742893219, |
|
"rewards/rejected": -0.2888410687446594, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2399369117724582e-06, |
|
"logits/chosen": -1.8269517421722412, |
|
"logits/rejected": -1.1365658044815063, |
|
"logps/chosen": -305.4709167480469, |
|
"logps/rejected": -273.973388671875, |
|
"loss": 0.1584, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.08552964776754379, |
|
"rewards/margins": 0.3883412480354309, |
|
"rewards/rejected": -0.30281156301498413, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2198928378235717e-06, |
|
"logits/chosen": -1.6987979412078857, |
|
"logits/rejected": -1.3719052076339722, |
|
"logps/chosen": -225.90505981445312, |
|
"logps/rejected": -248.9863739013672, |
|
"loss": 0.1964, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.09808424860239029, |
|
"rewards/margins": 0.2932734787464142, |
|
"rewards/rejected": -0.1951892375946045, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1999596801769617e-06, |
|
"logits/chosen": -1.6622822284698486, |
|
"logits/rejected": -1.46449875831604, |
|
"logps/chosen": -194.4739227294922, |
|
"logps/rejected": -270.63922119140625, |
|
"loss": 0.16, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.08343388140201569, |
|
"rewards/margins": 0.3558526933193207, |
|
"rewards/rejected": -0.2724188268184662, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1801391659631423e-06, |
|
"logits/chosen": -1.4197065830230713, |
|
"logits/rejected": -0.8729090690612793, |
|
"logps/chosen": -261.99517822265625, |
|
"logps/rejected": -240.2467041015625, |
|
"loss": 0.1424, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.09511663019657135, |
|
"rewards/margins": 0.3631947338581085, |
|
"rewards/rejected": -0.2680780589580536, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.160433012552508e-06, |
|
"logits/chosen": -1.6888864040374756, |
|
"logits/rejected": -1.1725207567214966, |
|
"logps/chosen": -265.87957763671875, |
|
"logps/rejected": -228.31982421875, |
|
"loss": 0.115, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.11969444900751114, |
|
"rewards/margins": 0.4062970280647278, |
|
"rewards/rejected": -0.28660255670547485, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1408429274065418e-06, |
|
"logits/chosen": -1.6261924505233765, |
|
"logits/rejected": -1.3828227519989014, |
|
"logps/chosen": -223.134521484375, |
|
"logps/rejected": -277.44903564453125, |
|
"loss": 0.1311, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.11289075762033463, |
|
"rewards/margins": 0.34422147274017334, |
|
"rewards/rejected": -0.23133070766925812, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1213706079298566e-06, |
|
"logits/chosen": -1.8407529592514038, |
|
"logits/rejected": -1.3841874599456787, |
|
"logps/chosen": -282.0005187988281, |
|
"logps/rejected": -308.8989562988281, |
|
"loss": 0.1637, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.08910055458545685, |
|
"rewards/margins": 0.3223797082901001, |
|
"rewards/rejected": -0.23327915370464325, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"logits/chosen": -1.6514075994491577, |
|
"logits/rejected": -1.1626676321029663, |
|
"logps/chosen": -206.6435546875, |
|
"logps/rejected": -248.57595825195312, |
|
"loss": 0.0849, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.0976477712392807, |
|
"rewards/margins": 0.3547229468822479, |
|
"rewards/rejected": -0.25707516074180603, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0827860044369226e-06, |
|
"logits/chosen": -1.8002774715423584, |
|
"logits/rejected": -1.2231605052947998, |
|
"logps/chosen": -263.72991943359375, |
|
"logps/rejected": -265.7281799316406, |
|
"loss": 0.0857, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.09039691835641861, |
|
"rewards/margins": 0.3749600052833557, |
|
"rewards/rejected": -0.2845631241798401, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.06367706362636e-06, |
|
"logits/chosen": -1.7300952672958374, |
|
"logits/rejected": -1.0147895812988281, |
|
"logps/chosen": -234.8489532470703, |
|
"logps/rejected": -245.09164428710938, |
|
"loss": 0.0854, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.09726179391145706, |
|
"rewards/margins": 0.37331247329711914, |
|
"rewards/rejected": -0.27605074644088745, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0446925746067768e-06, |
|
"logits/chosen": -1.8114553689956665, |
|
"logits/rejected": -1.3846733570098877, |
|
"logps/chosen": -270.171142578125, |
|
"logps/rejected": -253.19253540039062, |
|
"loss": 0.1582, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.08086392283439636, |
|
"rewards/margins": 0.3226754665374756, |
|
"rewards/rejected": -0.24181151390075684, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0258341823102418e-06, |
|
"logits/chosen": -1.5032013654708862, |
|
"logits/rejected": -1.2384767532348633, |
|
"logps/chosen": -202.64346313476562, |
|
"logps/rejected": -241.52383422851562, |
|
"loss": 0.1571, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.04403448849916458, |
|
"rewards/margins": 0.30034139752388, |
|
"rewards/rejected": -0.25630688667297363, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0071035207430352e-06, |
|
"logits/chosen": -1.6960475444793701, |
|
"logits/rejected": -1.202266812324524, |
|
"logps/chosen": -228.84579467773438, |
|
"logps/rejected": -282.767333984375, |
|
"loss": 0.1184, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.08044974505901337, |
|
"rewards/margins": 0.4057890474796295, |
|
"rewards/rejected": -0.32533928751945496, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.88502212844063e-07, |
|
"logits/chosen": -1.7069820165634155, |
|
"logits/rejected": -1.3219921588897705, |
|
"logps/chosen": -232.90579223632812, |
|
"logps/rejected": -257.5270690917969, |
|
"loss": 0.1422, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.050495147705078125, |
|
"rewards/margins": 0.3361809551715851, |
|
"rewards/rejected": -0.2856857478618622, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.700318703442437e-07, |
|
"logits/chosen": -1.813307762145996, |
|
"logits/rejected": -1.2688970565795898, |
|
"logps/chosen": -252.4823760986328, |
|
"logps/rejected": -221.725830078125, |
|
"loss": 0.1892, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.03987887501716614, |
|
"rewards/margins": 0.2988082468509674, |
|
"rewards/rejected": -0.2589293420314789, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.516940936268504e-07, |
|
"logits/chosen": -1.8856050968170166, |
|
"logits/rejected": -1.4962949752807617, |
|
"logps/chosen": -211.00314331054688, |
|
"logps/rejected": -229.956298828125, |
|
"loss": 0.2038, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.026142999529838562, |
|
"rewards/margins": 0.2894168496131897, |
|
"rewards/rejected": -0.2632738947868347, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.334904715888496e-07, |
|
"logits/chosen": -1.7890255451202393, |
|
"logits/rejected": -1.1871464252471924, |
|
"logps/chosen": -290.72113037109375, |
|
"logps/rejected": -280.96856689453125, |
|
"loss": 0.1023, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.048764027655124664, |
|
"rewards/margins": 0.323917955160141, |
|
"rewards/rejected": -0.2751539349555969, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.154225815032242e-07, |
|
"logits/chosen": -1.4765609502792358, |
|
"logits/rejected": -1.052433729171753, |
|
"logps/chosen": -270.6768493652344, |
|
"logps/rejected": -251.9602508544922, |
|
"loss": 0.1566, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.08185350149869919, |
|
"rewards/margins": 0.3346666097640991, |
|
"rewards/rejected": -0.2528131604194641, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.974919888823164e-07, |
|
"logits/chosen": -1.5708179473876953, |
|
"logits/rejected": -1.1366405487060547, |
|
"logps/chosen": -260.322265625, |
|
"logps/rejected": -328.4196472167969, |
|
"loss": 0.1115, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.075380340218544, |
|
"rewards/margins": 0.4507225453853607, |
|
"rewards/rejected": -0.3753421902656555, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.797002473421729e-07, |
|
"logits/chosen": -1.6871554851531982, |
|
"logits/rejected": -1.015560269355774, |
|
"logps/chosen": -312.00396728515625, |
|
"logps/rejected": -289.4263000488281, |
|
"loss": 0.0967, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.08846127986907959, |
|
"rewards/margins": 0.4753798544406891, |
|
"rewards/rejected": -0.3869186043739319, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.620488984679378e-07, |
|
"logits/chosen": -1.5905934572219849, |
|
"logits/rejected": -1.0190634727478027, |
|
"logps/chosen": -270.3796081542969, |
|
"logps/rejected": -255.5897216796875, |
|
"loss": 0.1175, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.06361018866300583, |
|
"rewards/margins": 0.41740432381629944, |
|
"rewards/rejected": -0.3537940979003906, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.445394716802754e-07, |
|
"logits/chosen": -1.716925859451294, |
|
"logits/rejected": -1.2719004154205322, |
|
"logps/chosen": -282.26434326171875, |
|
"logps/rejected": -315.83294677734375, |
|
"loss": 0.1543, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.10373795032501221, |
|
"rewards/margins": 0.4591018557548523, |
|
"rewards/rejected": -0.3553639352321625, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": -1.6560224294662476, |
|
"logits/rejected": -1.2010629177093506, |
|
"logps/chosen": -273.56549072265625, |
|
"logps/rejected": -295.66180419921875, |
|
"loss": 0.1, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.09685816615819931, |
|
"rewards/margins": 0.42575687170028687, |
|
"rewards/rejected": -0.32889872789382935, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.099524404308948e-07, |
|
"logits/chosen": -1.5945422649383545, |
|
"logits/rejected": -1.19219172000885, |
|
"logps/chosen": -182.81103515625, |
|
"logps/rejected": -238.70919799804688, |
|
"loss": 0.161, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.06724058091640472, |
|
"rewards/margins": 0.31565195322036743, |
|
"rewards/rejected": -0.24841134250164032, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.928778328007918e-07, |
|
"logits/chosen": -1.9054105281829834, |
|
"logits/rejected": -1.2900282144546509, |
|
"logps/chosen": -260.6278991699219, |
|
"logps/rejected": -260.5713806152344, |
|
"loss": 0.1954, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.06147562339901924, |
|
"rewards/margins": 0.29727303981781006, |
|
"rewards/rejected": -0.23579740524291992, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.759511406608255e-07, |
|
"logits/chosen": -1.8338701725006104, |
|
"logits/rejected": -1.3442243337631226, |
|
"logps/chosen": -289.9970703125, |
|
"logps/rejected": -252.2522735595703, |
|
"loss": 0.189, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.06250108033418655, |
|
"rewards/margins": 0.26397019624710083, |
|
"rewards/rejected": -0.20146910846233368, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.591738306429769e-07, |
|
"logits/chosen": -1.7106062173843384, |
|
"logits/rejected": -0.9574893712997437, |
|
"logps/chosen": -306.48663330078125, |
|
"logps/rejected": -267.07965087890625, |
|
"loss": 0.1403, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.03719034045934677, |
|
"rewards/margins": 0.3732340633869171, |
|
"rewards/rejected": -0.33604371547698975, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.425473564358457e-07, |
|
"logits/chosen": -1.8127809762954712, |
|
"logits/rejected": -1.1028550863265991, |
|
"logps/chosen": -264.7284240722656, |
|
"logps/rejected": -247.4577178955078, |
|
"loss": 0.1306, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.077170729637146, |
|
"rewards/margins": 0.40461188554763794, |
|
"rewards/rejected": -0.32744115591049194, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.260731586586983e-07, |
|
"logits/chosen": -1.756270408630371, |
|
"logits/rejected": -1.4134676456451416, |
|
"logps/chosen": -213.25234985351562, |
|
"logps/rejected": -228.63436889648438, |
|
"loss": 0.1606, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0521668866276741, |
|
"rewards/margins": 0.32329946756362915, |
|
"rewards/rejected": -0.27113252878189087, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.097526647366379e-07, |
|
"logits/chosen": -1.8526099920272827, |
|
"logits/rejected": -1.2808058261871338, |
|
"logps/chosen": -266.97918701171875, |
|
"logps/rejected": -249.385986328125, |
|
"loss": 0.1041, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.08820641040802002, |
|
"rewards/margins": 0.35536572337150574, |
|
"rewards/rejected": -0.2671593129634857, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.935872887769299e-07, |
|
"logits/chosen": -1.659076452255249, |
|
"logits/rejected": -1.0968009233474731, |
|
"logps/chosen": -307.5328063964844, |
|
"logps/rejected": -245.99307250976562, |
|
"loss": 0.1618, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.07833143323659897, |
|
"rewards/margins": 0.386299729347229, |
|
"rewards/rejected": -0.3079683184623718, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.775784314464717e-07, |
|
"logits/chosen": -1.5469852685928345, |
|
"logits/rejected": -1.411586046218872, |
|
"logps/chosen": -225.2750701904297, |
|
"logps/rejected": -242.4783935546875, |
|
"loss": 0.1542, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.04528594762086868, |
|
"rewards/margins": 0.3220139145851135, |
|
"rewards/rejected": -0.27672794461250305, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.617274798504286e-07, |
|
"logits/chosen": -1.6576216220855713, |
|
"logits/rejected": -1.1561254262924194, |
|
"logps/chosen": -238.072021484375, |
|
"logps/rejected": -259.99688720703125, |
|
"loss": 0.086, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.1014413982629776, |
|
"rewards/margins": 0.43401598930358887, |
|
"rewards/rejected": -0.33257460594177246, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.460358074120518e-07, |
|
"logits/chosen": -1.9168809652328491, |
|
"logits/rejected": -1.2557293176651, |
|
"logps/chosen": -242.58401489257812, |
|
"logps/rejected": -269.3041687011719, |
|
"loss": 0.152, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.09284786880016327, |
|
"rewards/margins": 0.39133232831954956, |
|
"rewards/rejected": -0.2984844744205475, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.305047737536707e-07, |
|
"logits/chosen": -1.9695804119110107, |
|
"logits/rejected": -1.1151535511016846, |
|
"logps/chosen": -290.43756103515625, |
|
"logps/rejected": -263.0965881347656, |
|
"loss": 0.1432, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.08852732926607132, |
|
"rewards/margins": 0.39591920375823975, |
|
"rewards/rejected": -0.307391881942749, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.151357245788917e-07, |
|
"logits/chosen": -1.6923097372055054, |
|
"logits/rejected": -0.981489360332489, |
|
"logps/chosen": -299.1329650878906, |
|
"logps/rejected": -261.29949951171875, |
|
"loss": 0.1661, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.09116804599761963, |
|
"rewards/margins": 0.45771628618240356, |
|
"rewards/rejected": -0.36654824018478394, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.999299915559956e-07, |
|
"logits/chosen": -1.7366340160369873, |
|
"logits/rejected": -1.307422399520874, |
|
"logps/chosen": -270.86468505859375, |
|
"logps/rejected": -257.4667663574219, |
|
"loss": 0.1394, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.14943814277648926, |
|
"rewards/margins": 0.4425061345100403, |
|
"rewards/rejected": -0.293067991733551, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -1.6904795169830322, |
|
"logits/rejected": -1.514775037765503, |
|
"logps/chosen": -245.5784454345703, |
|
"logps/rejected": -294.32110595703125, |
|
"loss": 0.1271, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.1196504607796669, |
|
"rewards/margins": 0.3875979781150818, |
|
"rewards/rejected": -0.2679474949836731, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.700137297712749e-07, |
|
"logits/chosen": -1.6396605968475342, |
|
"logits/rejected": -1.020140290260315, |
|
"logps/chosen": -293.1066589355469, |
|
"logps/rejected": -233.6679229736328, |
|
"loss": 0.1135, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.13376173377037048, |
|
"rewards/margins": 0.3968765139579773, |
|
"rewards/rejected": -0.26311472058296204, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.553057931370729e-07, |
|
"logits/chosen": -1.6284799575805664, |
|
"logits/rejected": -1.0204269886016846, |
|
"logps/chosen": -281.06866455078125, |
|
"logps/rejected": -226.4606170654297, |
|
"loss": 0.1341, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.11780212819576263, |
|
"rewards/margins": 0.3741871416568756, |
|
"rewards/rejected": -0.25638502836227417, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.407663566854008e-07, |
|
"logits/chosen": -1.796983003616333, |
|
"logits/rejected": -1.2661386728286743, |
|
"logps/chosen": -281.26593017578125, |
|
"logps/rejected": -282.1707458496094, |
|
"loss": 0.1166, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.0918949544429779, |
|
"rewards/margins": 0.3467631936073303, |
|
"rewards/rejected": -0.25486817955970764, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.263966802018275e-07, |
|
"logits/chosen": -1.7147595882415771, |
|
"logits/rejected": -1.1799060106277466, |
|
"logps/chosen": -264.71673583984375, |
|
"logps/rejected": -275.89239501953125, |
|
"loss": 0.1189, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.08099232614040375, |
|
"rewards/margins": 0.3739253282546997, |
|
"rewards/rejected": -0.29293301701545715, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.121980087628802e-07, |
|
"logits/chosen": -1.8341114521026611, |
|
"logits/rejected": -1.1735069751739502, |
|
"logps/chosen": -296.3938903808594, |
|
"logps/rejected": -258.4015808105469, |
|
"loss": 0.1205, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.06104854494333267, |
|
"rewards/margins": 0.4131019711494446, |
|
"rewards/rejected": -0.35205337405204773, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.981715726281666e-07, |
|
"logits/chosen": -1.6473156213760376, |
|
"logits/rejected": -1.303798794746399, |
|
"logps/chosen": -253.82205200195312, |
|
"logps/rejected": -311.9048767089844, |
|
"loss": 0.0928, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.08462455123662949, |
|
"rewards/margins": 0.483224093914032, |
|
"rewards/rejected": -0.3985995352268219, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.843185871337722e-07, |
|
"logits/chosen": -1.7317914962768555, |
|
"logits/rejected": -1.2134945392608643, |
|
"logps/chosen": -224.9326629638672, |
|
"logps/rejected": -257.9466857910156, |
|
"loss": 0.1239, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.10292349010705948, |
|
"rewards/margins": 0.4141048789024353, |
|
"rewards/rejected": -0.3111814260482788, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.706402525869633e-07, |
|
"logits/chosen": -1.887621521949768, |
|
"logits/rejected": -1.0996310710906982, |
|
"logps/chosen": -326.8731384277344, |
|
"logps/rejected": -258.768798828125, |
|
"loss": 0.1689, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0996357649564743, |
|
"rewards/margins": 0.36071866750717163, |
|
"rewards/rejected": -0.2610829174518585, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.5713775416217884e-07, |
|
"logits/chosen": -1.3531527519226074, |
|
"logits/rejected": -1.1717339754104614, |
|
"logps/chosen": -224.5545654296875, |
|
"logps/rejected": -308.23358154296875, |
|
"loss": 0.109, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.10629155486822128, |
|
"rewards/margins": 0.40465980768203735, |
|
"rewards/rejected": -0.29836827516555786, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.438122617983442e-07, |
|
"logits/chosen": -1.9190078973770142, |
|
"logits/rejected": -1.3009909391403198, |
|
"logps/chosen": -285.29534912109375, |
|
"logps/rejected": -245.940673828125, |
|
"loss": 0.1582, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.12030161917209625, |
|
"rewards/margins": 0.36642009019851685, |
|
"rewards/rejected": -0.246118426322937, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.3066493009749853e-07, |
|
"logits/chosen": -1.6608394384384155, |
|
"logits/rejected": -1.2748819589614868, |
|
"logps/chosen": -249.3994140625, |
|
"logps/rejected": -303.565673828125, |
|
"loss": 0.0973, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.09208109974861145, |
|
"rewards/margins": 0.3958059251308441, |
|
"rewards/rejected": -0.3037247955799103, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1769689822475147e-07, |
|
"logits/chosen": -1.7159957885742188, |
|
"logits/rejected": -1.2019530534744263, |
|
"logps/chosen": -243.9926300048828, |
|
"logps/rejected": -216.5254364013672, |
|
"loss": 0.1449, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.0733565241098404, |
|
"rewards/margins": 0.32575708627700806, |
|
"rewards/rejected": -0.25240057706832886, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.049092898095816e-07, |
|
"logits/chosen": -1.695738434791565, |
|
"logits/rejected": -1.370802402496338, |
|
"logps/chosen": -242.7845458984375, |
|
"logps/rejected": -251.65859985351562, |
|
"loss": 0.1549, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.05762239173054695, |
|
"rewards/margins": 0.2988077998161316, |
|
"rewards/rejected": -0.24118542671203613, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9230321284847856e-07, |
|
"logits/chosen": -1.5597546100616455, |
|
"logits/rejected": -1.1920053958892822, |
|
"logps/chosen": -202.78463745117188, |
|
"logps/rejected": -235.42611694335938, |
|
"loss": 0.1828, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.08725061267614365, |
|
"rewards/margins": 0.3124812841415405, |
|
"rewards/rejected": -0.22523066401481628, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.798797596089351e-07, |
|
"logits/chosen": -1.710565209388733, |
|
"logits/rejected": -1.3830753564834595, |
|
"logps/chosen": -191.14996337890625, |
|
"logps/rejected": -228.13143920898438, |
|
"loss": 0.1745, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.07240664213895798, |
|
"rewards/margins": 0.2955719232559204, |
|
"rewards/rejected": -0.22316527366638184, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6764000653481263e-07, |
|
"logits/chosen": -1.6754013299942017, |
|
"logits/rejected": -1.4069788455963135, |
|
"logps/chosen": -212.1529541015625, |
|
"logps/rejected": -288.8616027832031, |
|
"loss": 0.1174, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.01046234555542469, |
|
"rewards/margins": 0.29962268471717834, |
|
"rewards/rejected": -0.2891603410243988, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.555850141530659e-07, |
|
"logits/chosen": -1.9133336544036865, |
|
"logits/rejected": -1.308585286140442, |
|
"logps/chosen": -272.24310302734375, |
|
"logps/rejected": -284.73089599609375, |
|
"loss": 0.1114, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.06770848482847214, |
|
"rewards/margins": 0.39665132761001587, |
|
"rewards/rejected": -0.3289428651332855, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4371582698185636e-07, |
|
"logits/chosen": -1.67145574092865, |
|
"logits/rejected": -1.438444972038269, |
|
"logps/chosen": -208.4847412109375, |
|
"logps/rejected": -290.7266540527344, |
|
"loss": 0.1622, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.05329760164022446, |
|
"rewards/margins": 0.38010022044181824, |
|
"rewards/rejected": -0.3268026113510132, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.3203347344004737e-07, |
|
"logits/chosen": -1.6708052158355713, |
|
"logits/rejected": -1.0412744283676147, |
|
"logps/chosen": -257.42547607421875, |
|
"logps/rejected": -255.15597534179688, |
|
"loss": 0.099, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.09596081078052521, |
|
"rewards/margins": 0.3767240345478058, |
|
"rewards/rejected": -0.2807632088661194, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.2053896575809426e-07, |
|
"logits/chosen": -1.5217217206954956, |
|
"logits/rejected": -1.2141330242156982, |
|
"logps/chosen": -183.3319091796875, |
|
"logps/rejected": -227.89114379882812, |
|
"loss": 0.1609, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.0313902273774147, |
|
"rewards/margins": 0.31350037455558777, |
|
"rewards/rejected": -0.28211015462875366, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.092332998903416e-07, |
|
"logits/chosen": -1.786128282546997, |
|
"logits/rejected": -1.0563210248947144, |
|
"logps/chosen": -275.27703857421875, |
|
"logps/rejected": -250.85952758789062, |
|
"loss": 0.1375, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.09343347698450089, |
|
"rewards/margins": 0.4091528058052063, |
|
"rewards/rejected": -0.3157193064689636, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.981174554287239e-07, |
|
"logits/chosen": -1.4629366397857666, |
|
"logits/rejected": -1.2211610078811646, |
|
"logps/chosen": -213.59805297851562, |
|
"logps/rejected": -242.63827514648438, |
|
"loss": 0.1274, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.09229601919651031, |
|
"rewards/margins": 0.42800164222717285, |
|
"rewards/rejected": -0.33570563793182373, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.871923955178918e-07, |
|
"logits/chosen": -1.6027485132217407, |
|
"logits/rejected": -1.358605146408081, |
|
"logps/chosen": -266.48590087890625, |
|
"logps/rejected": -279.26641845703125, |
|
"loss": 0.1804, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.019939150661230087, |
|
"rewards/margins": 0.34129849076271057, |
|
"rewards/rejected": -0.3213593065738678, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.764590667717562e-07, |
|
"logits/chosen": -1.699567437171936, |
|
"logits/rejected": -1.2492908239364624, |
|
"logps/chosen": -185.8858184814453, |
|
"logps/rejected": -198.7812042236328, |
|
"loss": 0.1461, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.0718788132071495, |
|
"rewards/margins": 0.35736599564552307, |
|
"rewards/rejected": -0.28548720479011536, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6591839919146963e-07, |
|
"logits/chosen": -1.7908653020858765, |
|
"logits/rejected": -1.3732439279556274, |
|
"logps/chosen": -259.0335998535156, |
|
"logps/rejected": -245.4866485595703, |
|
"loss": 0.1154, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.03901948407292366, |
|
"rewards/margins": 0.35004353523254395, |
|
"rewards/rejected": -0.3110240697860718, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.555713060848433e-07, |
|
"logits/chosen": -1.8778269290924072, |
|
"logits/rejected": -1.3474853038787842, |
|
"logps/chosen": -365.189208984375, |
|
"logps/rejected": -387.7873229980469, |
|
"loss": 0.105, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.05286934971809387, |
|
"rewards/margins": 0.4107195734977722, |
|
"rewards/rejected": -0.35785022377967834, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.454186839872158e-07, |
|
"logits/chosen": -1.6174417734146118, |
|
"logits/rejected": -1.245863914489746, |
|
"logps/chosen": -199.4110107421875, |
|
"logps/rejected": -256.9593200683594, |
|
"loss": 0.0899, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.06935293972492218, |
|
"rewards/margins": 0.3890882432460785, |
|
"rewards/rejected": -0.3197353482246399, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3546141258376786e-07, |
|
"logits/chosen": -1.9288465976715088, |
|
"logits/rejected": -1.4281173944473267, |
|
"logps/chosen": -290.6533508300781, |
|
"logps/rejected": -269.7490539550781, |
|
"loss": 0.0973, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.1082816869020462, |
|
"rewards/margins": 0.44768819212913513, |
|
"rewards/rejected": -0.3394065499305725, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.257003546333042e-07, |
|
"logits/chosen": -2.0008137226104736, |
|
"logits/rejected": -1.320913314819336, |
|
"logps/chosen": -320.672119140625, |
|
"logps/rejected": -271.7630310058594, |
|
"loss": 0.1208, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.07430247217416763, |
|
"rewards/margins": 0.415428102016449, |
|
"rewards/rejected": -0.3411256670951843, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"logits/chosen": -1.7060868740081787, |
|
"logits/rejected": -1.4465411901474, |
|
"logps/chosen": -220.87380981445312, |
|
"logps/rejected": -298.72625732421875, |
|
"loss": 0.1216, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.09051541984081268, |
|
"rewards/margins": 0.4572044014930725, |
|
"rewards/rejected": -0.366688996553421, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0677024504760752e-07, |
|
"logits/chosen": -1.638347864151001, |
|
"logits/rejected": -1.0680015087127686, |
|
"logps/chosen": -267.92608642578125, |
|
"logps/rejected": -270.8176574707031, |
|
"loss": 0.121, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.08300314098596573, |
|
"rewards/margins": 0.42494750022888184, |
|
"rewards/rejected": -0.3419443368911743, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9760283363267684e-07, |
|
"logits/chosen": -1.5746757984161377, |
|
"logits/rejected": -1.1135761737823486, |
|
"logps/chosen": -221.92343139648438, |
|
"logps/rejected": -250.6765899658203, |
|
"loss": 0.1311, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.07789792120456696, |
|
"rewards/margins": 0.39919474720954895, |
|
"rewards/rejected": -0.3212968707084656, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.8863491596921745e-07, |
|
"logits/chosen": -1.5667272806167603, |
|
"logits/rejected": -0.9896243810653687, |
|
"logps/chosen": -296.76202392578125, |
|
"logps/rejected": -303.38665771484375, |
|
"loss": 0.1393, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.0856994241476059, |
|
"rewards/margins": 0.44408607482910156, |
|
"rewards/rejected": -0.35838669538497925, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.798672690923828e-07, |
|
"logits/chosen": -1.6267836093902588, |
|
"logits/rejected": -1.1321130990982056, |
|
"logps/chosen": -253.453369140625, |
|
"logps/rejected": -270.5718078613281, |
|
"loss": 0.1355, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.03761683404445648, |
|
"rewards/margins": 0.36388128995895386, |
|
"rewards/rejected": -0.3262644410133362, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.713006526846439e-07, |
|
"logits/chosen": -1.678847312927246, |
|
"logits/rejected": -1.140943169593811, |
|
"logps/chosen": -262.73638916015625, |
|
"logps/rejected": -221.0591583251953, |
|
"loss": 0.12, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.05530963093042374, |
|
"rewards/margins": 0.353360652923584, |
|
"rewards/rejected": -0.29805102944374084, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.629358090099639e-07, |
|
"logits/chosen": -1.7336199283599854, |
|
"logits/rejected": -1.1567232608795166, |
|
"logps/chosen": -307.455078125, |
|
"logps/rejected": -310.3108825683594, |
|
"loss": 0.1219, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.049434565007686615, |
|
"rewards/margins": 0.42315396666526794, |
|
"rewards/rejected": -0.37371936440467834, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5477346284948292e-07, |
|
"logits/chosen": -1.746050238609314, |
|
"logits/rejected": -1.4284813404083252, |
|
"logps/chosen": -237.4105682373047, |
|
"logps/rejected": -288.8939514160156, |
|
"loss": 0.1522, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.028758401051163673, |
|
"rewards/margins": 0.33460500836372375, |
|
"rewards/rejected": -0.30584657192230225, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4681432143872133e-07, |
|
"logits/chosen": -1.6106250286102295, |
|
"logits/rejected": -1.148560881614685, |
|
"logps/chosen": -258.1077575683594, |
|
"logps/rejected": -318.8696594238281, |
|
"loss": 0.1528, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.04034858196973801, |
|
"rewards/margins": 0.350983202457428, |
|
"rewards/rejected": -0.3106346130371094, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3905907440629752e-07, |
|
"logits/chosen": -1.8278043270111084, |
|
"logits/rejected": -1.0977909564971924, |
|
"logps/chosen": -309.5404357910156, |
|
"logps/rejected": -250.6240692138672, |
|
"loss": 0.1186, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.07560743391513824, |
|
"rewards/margins": 0.4015159606933594, |
|
"rewards/rejected": -0.3259085416793823, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.31508393714177e-07, |
|
"logits/chosen": -1.6950404644012451, |
|
"logits/rejected": -1.275434136390686, |
|
"logps/chosen": -280.67218017578125, |
|
"logps/rejected": -243.7939910888672, |
|
"loss": 0.1406, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.06180018186569214, |
|
"rewards/margins": 0.3164129853248596, |
|
"rewards/rejected": -0.25461283326148987, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.241629335994471e-07, |
|
"logits/chosen": -1.6643226146697998, |
|
"logits/rejected": -0.9809878468513489, |
|
"logps/chosen": -246.31515502929688, |
|
"logps/rejected": -255.1239013671875, |
|
"loss": 0.1209, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.06152065843343735, |
|
"rewards/margins": 0.4374101161956787, |
|
"rewards/rejected": -0.37588945031166077, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1702333051763271e-07, |
|
"logits/chosen": -1.6431770324707031, |
|
"logits/rejected": -1.1887956857681274, |
|
"logps/chosen": -247.9853515625, |
|
"logps/rejected": -250.0074920654297, |
|
"loss": 0.1573, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.052826426923274994, |
|
"rewards/margins": 0.3623102903366089, |
|
"rewards/rejected": -0.3094838261604309, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1009020308754587e-07, |
|
"logits/chosen": -1.7229608297348022, |
|
"logits/rejected": -1.225477933883667, |
|
"logps/chosen": -235.5467529296875, |
|
"logps/rejected": -204.58261108398438, |
|
"loss": 0.1929, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.04221180081367493, |
|
"rewards/margins": 0.2941130995750427, |
|
"rewards/rejected": -0.2519012987613678, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0336415203768962e-07, |
|
"logits/chosen": -1.4921534061431885, |
|
"logits/rejected": -1.1619257926940918, |
|
"logps/chosen": -285.34259033203125, |
|
"logps/rejected": -295.04864501953125, |
|
"loss": 0.1181, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.08926016837358475, |
|
"rewards/margins": 0.4191388189792633, |
|
"rewards/rejected": -0.32987862825393677, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.684576015420277e-08, |
|
"logits/chosen": -1.4940868616104126, |
|
"logits/rejected": -0.863502025604248, |
|
"logps/chosen": -264.92169189453125, |
|
"logps/rejected": -247.3212890625, |
|
"loss": 0.1366, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.027351949363946915, |
|
"rewards/margins": 0.341216504573822, |
|
"rewards/rejected": -0.3138645589351654, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.053559223036746e-08, |
|
"logits/chosen": -1.7528457641601562, |
|
"logits/rejected": -1.0926567316055298, |
|
"logps/chosen": -313.1197509765625, |
|
"logps/rejected": -255.8910675048828, |
|
"loss": 0.1122, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.09485974907875061, |
|
"rewards/margins": 0.4161204397678375, |
|
"rewards/rejected": -0.3212606906890869, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.44341950176683e-08, |
|
"logits/chosen": -1.5928373336791992, |
|
"logits/rejected": -1.244089126586914, |
|
"logps/chosen": -265.8317565917969, |
|
"logps/rejected": -325.6632080078125, |
|
"loss": 0.1168, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.02779344841837883, |
|
"rewards/margins": 0.3878537714481354, |
|
"rewards/rejected": -0.36006033420562744, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits/chosen": -1.4396581649780273, |
|
"logits/rejected": -1.2138065099716187, |
|
"logps/chosen": -240.5614776611328, |
|
"logps/rejected": -349.2239685058594, |
|
"loss": 0.1016, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.06546439230442047, |
|
"rewards/margins": 0.43350672721862793, |
|
"rewards/rejected": -0.36804237961769104, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.285980923996989e-08, |
|
"logits/chosen": -1.752174735069275, |
|
"logits/rejected": -1.3645591735839844, |
|
"logps/chosen": -234.2113800048828, |
|
"logps/rejected": -227.34848022460938, |
|
"loss": 0.1145, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.06558214873075485, |
|
"rewards/margins": 0.37207797169685364, |
|
"rewards/rejected": -0.30649590492248535, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.738782355044048e-08, |
|
"logits/chosen": -1.3806583881378174, |
|
"logits/rejected": -1.1515109539031982, |
|
"logps/chosen": -230.22073364257812, |
|
"logps/rejected": -265.16314697265625, |
|
"loss": 0.1637, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.03167334571480751, |
|
"rewards/margins": 0.4240112900733948, |
|
"rewards/rejected": -0.39233797788619995, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.212661423609184e-08, |
|
"logits/chosen": -1.608327865600586, |
|
"logits/rejected": -0.9549871683120728, |
|
"logps/chosen": -225.1513214111328, |
|
"logps/rejected": -209.7755584716797, |
|
"loss": 0.134, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.05912930518388748, |
|
"rewards/margins": 0.37384232878685, |
|
"rewards/rejected": -0.3147130608558655, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.707663716023021e-08, |
|
"logits/chosen": -1.853945016860962, |
|
"logits/rejected": -1.118769884109497, |
|
"logps/chosen": -280.1803283691406, |
|
"logps/rejected": -295.4366760253906, |
|
"loss": 0.1538, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.05890347808599472, |
|
"rewards/margins": 0.4038105010986328, |
|
"rewards/rejected": -0.3449070453643799, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.22383298837098e-08, |
|
"logits/chosen": -1.6122610569000244, |
|
"logits/rejected": -1.0327246189117432, |
|
"logps/chosen": -242.87600708007812, |
|
"logps/rejected": -208.82144165039062, |
|
"loss": 0.0935, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.06353352963924408, |
|
"rewards/margins": 0.38507527112960815, |
|
"rewards/rejected": -0.3215416669845581, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.761211162702117e-08, |
|
"logits/chosen": -1.680824875831604, |
|
"logits/rejected": -1.130185604095459, |
|
"logps/chosen": -244.988037109375, |
|
"logps/rejected": -263.0980529785156, |
|
"loss": 0.1587, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.06494718790054321, |
|
"rewards/margins": 0.3642052412033081, |
|
"rewards/rejected": -0.2992580533027649, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.319838323396691e-08, |
|
"logits/chosen": -1.6353555917739868, |
|
"logits/rejected": -1.2781399488449097, |
|
"logps/chosen": -228.3214569091797, |
|
"logps/rejected": -280.24066162109375, |
|
"loss": 0.1688, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.013865587301552296, |
|
"rewards/margins": 0.3643207550048828, |
|
"rewards/rejected": -0.3781863749027252, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.8997527136930004e-08, |
|
"logits/chosen": -1.7263469696044922, |
|
"logits/rejected": -1.1389892101287842, |
|
"logps/chosen": -293.1500244140625, |
|
"logps/rejected": -265.1076354980469, |
|
"loss": 0.0911, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.08173110336065292, |
|
"rewards/margins": 0.42599812150001526, |
|
"rewards/rejected": -0.3442670404911041, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.5009907323737826e-08, |
|
"logits/chosen": -1.561417579650879, |
|
"logits/rejected": -1.165183424949646, |
|
"logps/chosen": -252.6567840576172, |
|
"logps/rejected": -276.4109802246094, |
|
"loss": 0.0987, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.07267307490110397, |
|
"rewards/margins": 0.37683752179145813, |
|
"rewards/rejected": -0.30416446924209595, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.1235869306123766e-08, |
|
"logits/chosen": -1.6449648141860962, |
|
"logits/rejected": -1.2687057256698608, |
|
"logps/chosen": -221.9775848388672, |
|
"logps/rejected": -275.42449951171875, |
|
"loss": 0.1551, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.01698826439678669, |
|
"rewards/margins": 0.35954660177230835, |
|
"rewards/rejected": -0.3425583243370056, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.767574008979007e-08, |
|
"logits/chosen": -1.640044927597046, |
|
"logits/rejected": -1.0068800449371338, |
|
"logps/chosen": -286.2217712402344, |
|
"logps/rejected": -278.906494140625, |
|
"loss": 0.0995, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.073039710521698, |
|
"rewards/margins": 0.4568893015384674, |
|
"rewards/rejected": -0.3838495910167694, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"logits/chosen": -1.9251073598861694, |
|
"logits/rejected": -1.0933765172958374, |
|
"logps/chosen": -352.7594299316406, |
|
"logps/rejected": -274.07269287109375, |
|
"loss": 0.1353, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.07194405049085617, |
|
"rewards/margins": 0.3892292380332947, |
|
"rewards/rejected": -0.3172852396965027, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.1198423385220822e-08, |
|
"logits/chosen": -1.6453202962875366, |
|
"logits/rejected": -1.2082674503326416, |
|
"logps/chosen": -257.38250732421875, |
|
"logps/rejected": -320.1260986328125, |
|
"loss": 0.0771, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.10315804183483124, |
|
"rewards/margins": 0.4763513505458832, |
|
"rewards/rejected": -0.37319326400756836, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.82817971312621e-08, |
|
"logits/chosen": -1.790305733680725, |
|
"logits/rejected": -1.1114693880081177, |
|
"logps/chosen": -260.489990234375, |
|
"logps/rejected": -238.48779296875, |
|
"loss": 0.1119, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.09379683434963226, |
|
"rewards/margins": 0.4416617453098297, |
|
"rewards/rejected": -0.34786492586135864, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5580202098509078e-08, |
|
"logits/chosen": -1.6579450368881226, |
|
"logits/rejected": -1.3401272296905518, |
|
"logps/chosen": -250.71633911132812, |
|
"logps/rejected": -241.178466796875, |
|
"loss": 0.211, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.027279715985059738, |
|
"rewards/margins": 0.31590917706489563, |
|
"rewards/rejected": -0.2886294722557068, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3093872369654148e-08, |
|
"logits/chosen": -1.72607421875, |
|
"logits/rejected": -1.3506278991699219, |
|
"logps/chosen": -237.07730102539062, |
|
"logps/rejected": -328.91607666015625, |
|
"loss": 0.1135, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0632893294095993, |
|
"rewards/margins": 0.4014991819858551, |
|
"rewards/rejected": -0.3382098376750946, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0823023375489128e-08, |
|
"logits/chosen": -1.8103011846542358, |
|
"logits/rejected": -1.331343412399292, |
|
"logps/chosen": -218.52322387695312, |
|
"logps/rejected": -215.2735595703125, |
|
"loss": 0.1715, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.05841076374053955, |
|
"rewards/margins": 0.369015634059906, |
|
"rewards/rejected": -0.31060490012168884, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.767851876239075e-09, |
|
"logits/chosen": -1.3743187189102173, |
|
"logits/rejected": -1.1308249235153198, |
|
"logps/chosen": -215.2615966796875, |
|
"logps/rejected": -300.07080078125, |
|
"loss": 0.0964, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0790005549788475, |
|
"rewards/margins": 0.41065654158592224, |
|
"rewards/rejected": -0.33165597915649414, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.9285359445145366e-09, |
|
"logits/chosen": -1.418792486190796, |
|
"logits/rejected": -1.1899298429489136, |
|
"logps/chosen": -232.254638671875, |
|
"logps/rejected": -266.04376220703125, |
|
"loss": 0.153, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.1082618460059166, |
|
"rewards/margins": 0.42941126227378845, |
|
"rewards/rejected": -0.3211493492126465, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.305234949880001e-09, |
|
"logits/chosen": -1.5959584712982178, |
|
"logits/rejected": -1.2097722291946411, |
|
"logps/chosen": -276.74713134765625, |
|
"logps/rejected": -315.0145568847656, |
|
"loss": 0.1814, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.07109376043081284, |
|
"rewards/margins": 0.38560524582862854, |
|
"rewards/rejected": -0.3145114779472351, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.8980895450474455e-09, |
|
"logits/chosen": -1.7472782135009766, |
|
"logits/rejected": -1.2426128387451172, |
|
"logps/chosen": -275.06793212890625, |
|
"logps/rejected": -278.3852233886719, |
|
"loss": 0.1241, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.09598256647586823, |
|
"rewards/margins": 0.42074212431907654, |
|
"rewards/rejected": -0.3247596323490143, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7072216536885855e-09, |
|
"logits/chosen": -1.481673002243042, |
|
"logits/rejected": -0.9478602409362793, |
|
"logps/chosen": -281.0965576171875, |
|
"logps/rejected": -267.8004150390625, |
|
"loss": 0.1391, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.02585453912615776, |
|
"rewards/margins": 0.3552091717720032, |
|
"rewards/rejected": -0.32935458421707153, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.7327344598702667e-09, |
|
"logits/chosen": -1.533521056175232, |
|
"logits/rejected": -1.2224196195602417, |
|
"logps/chosen": -232.5213623046875, |
|
"logps/rejected": -339.6204528808594, |
|
"loss": 0.1444, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.004062544088810682, |
|
"rewards/margins": 0.3145105242729187, |
|
"rewards/rejected": -0.3185730576515198, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.747123991141193e-10, |
|
"logits/chosen": -1.80594801902771, |
|
"logits/rejected": -1.2266263961791992, |
|
"logps/chosen": -253.4310760498047, |
|
"logps/rejected": -281.5027160644531, |
|
"loss": 0.1023, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.06998688727617264, |
|
"rewards/margins": 0.3998931050300598, |
|
"rewards/rejected": -0.3299062252044678, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.332211510807427e-10, |
|
"logits/chosen": -1.7258926630020142, |
|
"logits/rejected": -1.4480842351913452, |
|
"logps/chosen": -233.64529418945312, |
|
"logps/rejected": -240.29598999023438, |
|
"loss": 0.1289, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.031451232731342316, |
|
"rewards/margins": 0.3203813433647156, |
|
"rewards/rejected": -0.28893011808395386, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0830763387897902e-10, |
|
"logits/chosen": -1.8174076080322266, |
|
"logits/rejected": -1.2164661884307861, |
|
"logps/chosen": -286.5456237792969, |
|
"logps/rejected": -223.0185546875, |
|
"loss": 0.1329, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.08067300915718079, |
|
"rewards/margins": 0.36825209856033325, |
|
"rewards/rejected": -0.2875790596008301, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.574351191520691, |
|
"logits/rejected": -1.151698112487793, |
|
"logps/chosen": -267.740234375, |
|
"logps/rejected": -287.2723693847656, |
|
"loss": 0.1084, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0795169472694397, |
|
"rewards/margins": 0.420736163854599, |
|
"rewards/rejected": -0.3412191867828369, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3750, |
|
"total_flos": 0.0, |
|
"train_loss": 0.15019961676597596, |
|
"train_runtime": 15622.399, |
|
"train_samples_per_second": 0.96, |
|
"train_steps_per_second": 0.24 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3750, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|