|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 100, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.856400966644287, |
|
"logits/rejected": -2.6539194583892822, |
|
"logps/chosen": -302.289794921875, |
|
"logps/rejected": -253.04373168945312, |
|
"loss": 2500.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": -2.5851330757141113, |
|
"logits/rejected": -2.6188478469848633, |
|
"logps/chosen": -265.6952209472656, |
|
"logps/rejected": -261.4213562011719, |
|
"loss": 2495.385, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.005977082531899214, |
|
"rewards/margins": 0.0005994850071147084, |
|
"rewards/rejected": 0.005377596709877253, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -2.6101512908935547, |
|
"logits/rejected": -2.5939109325408936, |
|
"logps/chosen": -255.68185424804688, |
|
"logps/rejected": -248.1254119873047, |
|
"loss": 2457.86, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": 0.013690793886780739, |
|
"rewards/margins": 0.00916606467217207, |
|
"rewards/rejected": 0.004524729214608669, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.604323148727417, |
|
"logits/rejected": -2.598053455352783, |
|
"logps/chosen": -254.423095703125, |
|
"logps/rejected": -226.73153686523438, |
|
"loss": 2402.3988, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.01266755722463131, |
|
"rewards/margins": 0.024019470438361168, |
|
"rewards/rejected": -0.01135191135108471, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -2.6043972969055176, |
|
"logits/rejected": -2.582412004470825, |
|
"logps/chosen": -279.12042236328125, |
|
"logps/rejected": -241.2065887451172, |
|
"loss": 2290.4264, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": 0.024520257487893105, |
|
"rewards/margins": 0.0557018406689167, |
|
"rewards/rejected": -0.031181585043668747, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999731868769027e-06, |
|
"logits/chosen": -2.531161308288574, |
|
"logits/rejected": -2.5264387130737305, |
|
"logps/chosen": -252.51846313476562, |
|
"logps/rejected": -247.7227325439453, |
|
"loss": 2291.9322, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.029673133045434952, |
|
"rewards/margins": 0.08245684206485748, |
|
"rewards/rejected": -0.05278371647000313, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9903533134293035e-06, |
|
"logits/chosen": -2.545037031173706, |
|
"logits/rejected": -2.5416412353515625, |
|
"logps/chosen": -260.83905029296875, |
|
"logps/rejected": -239.8417205810547, |
|
"loss": 2269.9371, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.03231300041079521, |
|
"rewards/margins": 0.09112317860126495, |
|
"rewards/rejected": -0.05881017446517944, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967625656594782e-06, |
|
"logits/chosen": -2.5832419395446777, |
|
"logits/rejected": -2.564356565475464, |
|
"logps/chosen": -275.95452880859375, |
|
"logps/rejected": -264.7611083984375, |
|
"loss": 2236.1113, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": 0.036882974207401276, |
|
"rewards/margins": 0.08578891307115555, |
|
"rewards/rejected": -0.048905935138463974, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.93167072587771e-06, |
|
"logits/chosen": -2.552919864654541, |
|
"logits/rejected": -2.524970293045044, |
|
"logps/chosen": -257.78448486328125, |
|
"logps/rejected": -262.3812561035156, |
|
"loss": 2220.0893, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.037375591695308685, |
|
"rewards/margins": 0.11339374631643295, |
|
"rewards/rejected": -0.07601816952228546, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368549e-06, |
|
"logits/chosen": -2.56257963180542, |
|
"logits/rejected": -2.5289363861083984, |
|
"logps/chosen": -239.4860382080078, |
|
"logps/rejected": -252.36196899414062, |
|
"loss": 2167.3848, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.04182355850934982, |
|
"rewards/margins": 0.10886694490909576, |
|
"rewards/rejected": -0.06704337894916534, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8209198325401815e-06, |
|
"logits/chosen": -2.5551962852478027, |
|
"logits/rejected": -2.562063455581665, |
|
"logps/chosen": -266.8739013671875, |
|
"logps/rejected": -269.649169921875, |
|
"loss": 2149.4746, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.04759662598371506, |
|
"rewards/margins": 0.1307816356420517, |
|
"rewards/rejected": -0.08318501710891724, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.222931385040283, |
|
"eval_logits/rejected": -2.1770126819610596, |
|
"eval_logps/chosen": -260.57818603515625, |
|
"eval_logps/rejected": -253.25228881835938, |
|
"eval_loss": 2190.7666015625, |
|
"eval_rewards/accuracies": 0.7460317611694336, |
|
"eval_rewards/chosen": 0.044464047998189926, |
|
"eval_rewards/margins": 0.12927772104740143, |
|
"eval_rewards/rejected": -0.0848136618733406, |
|
"eval_runtime": 549.355, |
|
"eval_samples_per_second": 3.641, |
|
"eval_steps_per_second": 0.115, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.746717530629565e-06, |
|
"logits/chosen": -2.5229454040527344, |
|
"logits/rejected": -2.5105621814727783, |
|
"logps/chosen": -261.46649169921875, |
|
"logps/rejected": -256.37835693359375, |
|
"loss": 2174.1184, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": 0.03517655283212662, |
|
"rewards/margins": 0.11897265911102295, |
|
"rewards/rejected": -0.08379611372947693, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.660472094042121e-06, |
|
"logits/chosen": -2.5114097595214844, |
|
"logits/rejected": -2.481840133666992, |
|
"logps/chosen": -246.70370483398438, |
|
"logps/rejected": -238.27621459960938, |
|
"loss": 2181.3053, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.044524095952510834, |
|
"rewards/margins": 0.10293309390544891, |
|
"rewards/rejected": -0.05840899422764778, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5626458262912745e-06, |
|
"logits/chosen": -2.4726600646972656, |
|
"logits/rejected": -2.46514630317688, |
|
"logps/chosen": -271.7862548828125, |
|
"logps/rejected": -260.61676025390625, |
|
"loss": 2175.3252, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.06200919300317764, |
|
"rewards/margins": 0.12613125145435333, |
|
"rewards/rejected": -0.06412206590175629, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.453763107901676e-06, |
|
"logits/chosen": -2.506436586380005, |
|
"logits/rejected": -2.5005128383636475, |
|
"logps/chosen": -237.8655242919922, |
|
"logps/rejected": -249.9298553466797, |
|
"loss": 2167.2516, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.024008702486753464, |
|
"rewards/margins": 0.1495535969734192, |
|
"rewards/rejected": -0.12554487586021423, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.33440758555951e-06, |
|
"logits/chosen": -2.5227842330932617, |
|
"logits/rejected": -2.536785364151001, |
|
"logps/chosen": -260.7518005371094, |
|
"logps/rejected": -235.9630889892578, |
|
"loss": 2119.4062, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": 0.04733316972851753, |
|
"rewards/margins": 0.12345732748508453, |
|
"rewards/rejected": -0.0761241465806961, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.205219043576955e-06, |
|
"logits/chosen": -2.5534234046936035, |
|
"logits/rejected": -2.4914207458496094, |
|
"logps/chosen": -254.14065551757812, |
|
"logps/rejected": -250.95700073242188, |
|
"loss": 2114.7645, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": 0.06031092256307602, |
|
"rewards/margins": 0.15202957391738892, |
|
"rewards/rejected": -0.09171866625547409, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.066889974440757e-06, |
|
"logits/chosen": -2.5092320442199707, |
|
"logits/rejected": -2.4965577125549316, |
|
"logps/chosen": -254.91439819335938, |
|
"logps/rejected": -242.8040008544922, |
|
"loss": 2229.8135, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.034448813647031784, |
|
"rewards/margins": 0.12951095402240753, |
|
"rewards/rejected": -0.09506212174892426, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92016186682789e-06, |
|
"logits/chosen": -2.521221399307251, |
|
"logits/rejected": -2.533686399459839, |
|
"logps/chosen": -251.4235382080078, |
|
"logps/rejected": -259.76220703125, |
|
"loss": 2175.5213, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.04062749817967415, |
|
"rewards/margins": 0.120635487139225, |
|
"rewards/rejected": -0.08000798523426056, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7658212309857576e-06, |
|
"logits/chosen": -2.5192363262176514, |
|
"logits/rejected": -2.4917151927948, |
|
"logps/chosen": -255.2060089111328, |
|
"logps/rejected": -250.82022094726562, |
|
"loss": 2099.443, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": 0.0492943711578846, |
|
"rewards/margins": 0.14053165912628174, |
|
"rewards/rejected": -0.09123729914426804, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.604695382782159e-06, |
|
"logits/chosen": -2.5251801013946533, |
|
"logits/rejected": -2.5034642219543457, |
|
"logps/chosen": -269.3675537109375, |
|
"logps/rejected": -262.86376953125, |
|
"loss": 2105.1256, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.0575677752494812, |
|
"rewards/margins": 0.14340198040008545, |
|
"rewards/rejected": -0.08583419024944305, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -2.260270833969116, |
|
"eval_logits/rejected": -2.2073864936828613, |
|
"eval_logps/chosen": -259.5941467285156, |
|
"eval_logps/rejected": -254.3839874267578, |
|
"eval_loss": 2151.155517578125, |
|
"eval_rewards/accuracies": 0.7599206566810608, |
|
"eval_rewards/chosen": 0.05430443957448006, |
|
"eval_rewards/margins": 0.15043501555919647, |
|
"eval_rewards/rejected": -0.09613056480884552, |
|
"eval_runtime": 548.195, |
|
"eval_samples_per_second": 3.648, |
|
"eval_steps_per_second": 0.115, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.437648009023905e-06, |
|
"logits/chosen": -2.533383369445801, |
|
"logits/rejected": -2.4935860633850098, |
|
"logps/chosen": -243.6236114501953, |
|
"logps/rejected": -238.85140991210938, |
|
"loss": 2145.5416, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.06410142779350281, |
|
"rewards/margins": 0.14374245703220367, |
|
"rewards/rejected": -0.07964102178812027, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.265574537815398e-06, |
|
"logits/chosen": -2.554565906524658, |
|
"logits/rejected": -2.56289005279541, |
|
"logps/chosen": -277.4061584472656, |
|
"logps/rejected": -253.40048217773438, |
|
"loss": 2196.8484, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.052330613136291504, |
|
"rewards/margins": 0.11339585483074188, |
|
"rewards/rejected": -0.06106524541974068, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.089397338773569e-06, |
|
"logits/chosen": -2.4857611656188965, |
|
"logits/rejected": -2.473193407058716, |
|
"logps/chosen": -247.3427276611328, |
|
"logps/rejected": -241.8627471923828, |
|
"loss": 2160.1729, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.03845102712512016, |
|
"rewards/margins": 0.11976752430200577, |
|
"rewards/rejected": -0.0813164934515953, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9100607788275547e-06, |
|
"logits/chosen": -2.5121560096740723, |
|
"logits/rejected": -2.516338586807251, |
|
"logps/chosen": -257.1769714355469, |
|
"logps/rejected": -247.3695068359375, |
|
"loss": 2185.7641, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": 0.0379050187766552, |
|
"rewards/margins": 0.11140499264001846, |
|
"rewards/rejected": -0.07349997013807297, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.72852616010567e-06, |
|
"logits/chosen": -2.5092978477478027, |
|
"logits/rejected": -2.487090826034546, |
|
"logps/chosen": -264.5955505371094, |
|
"logps/rejected": -246.3382110595703, |
|
"loss": 2136.6197, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": 0.039962492883205414, |
|
"rewards/margins": 0.1403963267803192, |
|
"rewards/rejected": -0.1004338413476944, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5457665670441937e-06, |
|
"logits/chosen": -2.5069711208343506, |
|
"logits/rejected": -2.5030505657196045, |
|
"logps/chosen": -257.4859619140625, |
|
"logps/rejected": -231.91958618164062, |
|
"loss": 2085.2795, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.05723271518945694, |
|
"rewards/margins": 0.15024301409721375, |
|
"rewards/rejected": -0.0930103212594986, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3627616503391813e-06, |
|
"logits/chosen": -2.525665760040283, |
|
"logits/rejected": -2.5043163299560547, |
|
"logps/chosen": -280.7471618652344, |
|
"logps/rejected": -267.36712646484375, |
|
"loss": 2089.859, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.05569761246442795, |
|
"rewards/margins": 0.179846853017807, |
|
"rewards/rejected": -0.12414924055337906, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1804923757009885e-06, |
|
"logits/chosen": -2.500837564468384, |
|
"logits/rejected": -2.501950740814209, |
|
"logps/chosen": -270.04193115234375, |
|
"logps/rejected": -248.61978149414062, |
|
"loss": 2111.6906, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.05320361256599426, |
|
"rewards/margins": 0.1410333216190338, |
|
"rewards/rejected": -0.08782971650362015, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9999357655598894e-06, |
|
"logits/chosen": -2.5122292041778564, |
|
"logits/rejected": -2.50368070602417, |
|
"logps/chosen": -258.72686767578125, |
|
"logps/rejected": -256.91387939453125, |
|
"loss": 2137.0592, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.053160279989242554, |
|
"rewards/margins": 0.15454119443893433, |
|
"rewards/rejected": -0.10138092190027237, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089576e-06, |
|
"logits/chosen": -2.471623659133911, |
|
"logits/rejected": -2.4690403938293457, |
|
"logps/chosen": -246.51766967773438, |
|
"logps/rejected": -251.79257202148438, |
|
"loss": 2135.4973, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.0453377440571785, |
|
"rewards/margins": 0.12641170620918274, |
|
"rewards/rejected": -0.08107397705316544, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -2.2764506340026855, |
|
"eval_logits/rejected": -2.2231767177581787, |
|
"eval_logps/chosen": -258.7624206542969, |
|
"eval_logps/rejected": -252.75852966308594, |
|
"eval_loss": 2129.089599609375, |
|
"eval_rewards/accuracies": 0.7559523582458496, |
|
"eval_rewards/chosen": 0.06262180209159851, |
|
"eval_rewards/margins": 0.14249789714813232, |
|
"eval_rewards/rejected": -0.07987607270479202, |
|
"eval_runtime": 547.9938, |
|
"eval_samples_per_second": 3.65, |
|
"eval_steps_per_second": 0.115, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.647817538357072e-06, |
|
"logits/chosen": -2.5041086673736572, |
|
"logits/rejected": -2.495436191558838, |
|
"logps/chosen": -264.5109558105469, |
|
"logps/rejected": -248.3275604248047, |
|
"loss": 2107.123, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.05480458214879036, |
|
"rewards/margins": 0.13964474201202393, |
|
"rewards/rejected": -0.08484016358852386, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4781433892011132e-06, |
|
"logits/chosen": -2.53191876411438, |
|
"logits/rejected": -2.4989166259765625, |
|
"logps/chosen": -242.36599731445312, |
|
"logps/rejected": -243.78067016601562, |
|
"loss": 2076.0621, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": 0.05456935614347458, |
|
"rewards/margins": 0.14978976547718048, |
|
"rewards/rejected": -0.0952204093337059, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135999e-06, |
|
"logits/chosen": -2.4768006801605225, |
|
"logits/rejected": -2.4569873809814453, |
|
"logps/chosen": -263.0523681640625, |
|
"logps/rejected": -250.5469207763672, |
|
"loss": 2112.1141, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.044828929007053375, |
|
"rewards/margins": 0.13050048053264618, |
|
"rewards/rejected": -0.0856715738773346, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1561076868822756e-06, |
|
"logits/chosen": -2.5158028602600098, |
|
"logits/rejected": -2.5096983909606934, |
|
"logps/chosen": -275.6848449707031, |
|
"logps/rejected": -246.7259979248047, |
|
"loss": 2151.2445, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": 0.052164845168590546, |
|
"rewards/margins": 0.15314052999019623, |
|
"rewards/rejected": -0.10097566992044449, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0054723495346484e-06, |
|
"logits/chosen": -2.518799304962158, |
|
"logits/rejected": -2.4620516300201416, |
|
"logps/chosen": -249.27401733398438, |
|
"logps/rejected": -218.7183074951172, |
|
"loss": 2093.9803, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": 0.0662151575088501, |
|
"rewards/margins": 0.14556364715099335, |
|
"rewards/rejected": -0.07934850454330444, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367876e-07, |
|
"logits/chosen": -2.5340943336486816, |
|
"logits/rejected": -2.5006654262542725, |
|
"logps/chosen": -260.32464599609375, |
|
"logps/rejected": -237.3218536376953, |
|
"loss": 2094.1246, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": 0.05396001785993576, |
|
"rewards/margins": 0.15317106246948242, |
|
"rewards/rejected": -0.09921105206012726, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.289996455765749e-07, |
|
"logits/chosen": -2.529265880584717, |
|
"logits/rejected": -2.515712261199951, |
|
"logps/chosen": -266.943115234375, |
|
"logps/rejected": -246.0579376220703, |
|
"loss": 2115.357, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.052078358829021454, |
|
"rewards/margins": 0.1462351232767105, |
|
"rewards/rejected": -0.09415675699710846, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.046442623320145e-07, |
|
"logits/chosen": -2.4891440868377686, |
|
"logits/rejected": -2.499753952026367, |
|
"logps/chosen": -253.51632690429688, |
|
"logps/rejected": -245.4505615234375, |
|
"loss": 2082.182, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": 0.051686953753232956, |
|
"rewards/margins": 0.1390691101551056, |
|
"rewards/rejected": -0.08738215267658234, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-07, |
|
"logits/chosen": -2.532160997390747, |
|
"logits/rejected": -2.5001654624938965, |
|
"logps/chosen": -280.9754333496094, |
|
"logps/rejected": -279.0588684082031, |
|
"loss": 2114.3043, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": 0.0547635443508625, |
|
"rewards/margins": 0.14076778292655945, |
|
"rewards/rejected": -0.08600424975156784, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.8702478614051353e-07, |
|
"logits/chosen": -2.4791765213012695, |
|
"logits/rejected": -2.4799935817718506, |
|
"logps/chosen": -246.14102172851562, |
|
"logps/rejected": -251.533447265625, |
|
"loss": 2099.8018, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.0392024889588356, |
|
"rewards/margins": 0.13221651315689087, |
|
"rewards/rejected": -0.09301402419805527, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -2.254145860671997, |
|
"eval_logits/rejected": -2.2016360759735107, |
|
"eval_logps/chosen": -259.64398193359375, |
|
"eval_logps/rejected": -254.3590850830078, |
|
"eval_loss": 2121.667236328125, |
|
"eval_rewards/accuracies": 0.7539682388305664, |
|
"eval_rewards/chosen": 0.05380600318312645, |
|
"eval_rewards/margins": 0.14968746900558472, |
|
"eval_rewards/rejected": -0.09588146954774857, |
|
"eval_runtime": 547.9727, |
|
"eval_samples_per_second": 3.65, |
|
"eval_steps_per_second": 0.115, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9492720416985004e-07, |
|
"logits/chosen": -2.4832329750061035, |
|
"logits/rejected": -2.463463306427002, |
|
"logps/chosen": -284.7741394042969, |
|
"logps/rejected": -252.4269561767578, |
|
"loss": 2145.448, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.05263269692659378, |
|
"rewards/margins": 0.15021036565303802, |
|
"rewards/rejected": -0.09757767617702484, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020857e-07, |
|
"logits/chosen": -2.4804348945617676, |
|
"logits/rejected": -2.457764148712158, |
|
"logps/chosen": -254.78604125976562, |
|
"logps/rejected": -278.61346435546875, |
|
"loss": 2123.6629, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": 0.033899884670972824, |
|
"rewards/margins": 0.11116783320903778, |
|
"rewards/rejected": -0.07726795971393585, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4662207078575685e-07, |
|
"logits/chosen": -2.4848549365997314, |
|
"logits/rejected": -2.485640048980713, |
|
"logps/chosen": -268.3457336425781, |
|
"logps/rejected": -268.5885925292969, |
|
"loss": 2144.4309, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": 0.03841588646173477, |
|
"rewards/margins": 0.13024446368217468, |
|
"rewards/rejected": -0.09182857722043991, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.120948298936422e-08, |
|
"logits/chosen": -2.457054615020752, |
|
"logits/rejected": -2.4329726696014404, |
|
"logps/chosen": -231.9584197998047, |
|
"logps/rejected": -234.6277313232422, |
|
"loss": 2118.3984, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.038600482046604156, |
|
"rewards/margins": 0.13669805228710175, |
|
"rewards/rejected": -0.09809757024049759, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.870879364444109e-08, |
|
"logits/chosen": -2.5156655311584473, |
|
"logits/rejected": -2.563300848007202, |
|
"logps/chosen": -263.9936218261719, |
|
"logps/rejected": -265.6227722167969, |
|
"loss": 2123.5402, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.04902677983045578, |
|
"rewards/margins": 0.1260160207748413, |
|
"rewards/rejected": -0.07698923349380493, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.93478202307823e-08, |
|
"logits/chosen": -2.470996379852295, |
|
"logits/rejected": -2.4720451831817627, |
|
"logps/chosen": -258.21734619140625, |
|
"logps/rejected": -262.04925537109375, |
|
"loss": 2078.5094, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": 0.04391016811132431, |
|
"rewards/margins": 0.14817874133586884, |
|
"rewards/rejected": -0.10426857322454453, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.283947088983663e-09, |
|
"logits/chosen": -2.513140916824341, |
|
"logits/rejected": -2.535651206970215, |
|
"logps/chosen": -249.6727752685547, |
|
"logps/rejected": -248.2782745361328, |
|
"loss": 2093.2779, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.04741714522242546, |
|
"rewards/margins": 0.143958181142807, |
|
"rewards/rejected": -0.09654103964567184, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 2164.5614415454666, |
|
"train_runtime": 32346.8016, |
|
"train_samples_per_second": 1.89, |
|
"train_steps_per_second": 0.015 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|