|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994666666666666, |
|
"eval_steps": 500, |
|
"global_step": 937, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.319148936170213e-08, |
|
"logits/chosen": -0.1885092854499817, |
|
"logits/rejected": -0.3158565163612366, |
|
"logps/chosen": -579.0175170898438, |
|
"logps/rejected": -485.2366638183594, |
|
"loss": 0.2285, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.319148936170213e-07, |
|
"logits/chosen": -0.14487330615520477, |
|
"logits/rejected": -0.07772153615951538, |
|
"logps/chosen": -490.6266174316406, |
|
"logps/rejected": -480.7708740234375, |
|
"loss": 0.21, |
|
"rewards/accuracies": 0.3263888955116272, |
|
"rewards/chosen": -0.00022973844897933304, |
|
"rewards/margins": -7.616530638188124e-05, |
|
"rewards/rejected": -0.0001535731425974518, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"logits/chosen": -0.17436349391937256, |
|
"logits/rejected": -0.20474335551261902, |
|
"logps/chosen": -491.7076721191406, |
|
"logps/rejected": -509.813232421875, |
|
"loss": 0.2025, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -9.073010733118281e-05, |
|
"rewards/margins": 9.670343570178375e-05, |
|
"rewards/rejected": -0.00018743352848105133, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.595744680851064e-06, |
|
"logits/chosen": -0.13612958788871765, |
|
"logits/rejected": -0.13220253586769104, |
|
"logps/chosen": -529.9112548828125, |
|
"logps/rejected": -542.8663940429688, |
|
"loss": 0.2153, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0004884627414867282, |
|
"rewards/margins": 0.00024191811098717153, |
|
"rewards/rejected": -0.0007303808815777302, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"logits/chosen": -0.10267192125320435, |
|
"logits/rejected": -0.09803201258182526, |
|
"logps/chosen": -578.48291015625, |
|
"logps/rejected": -594.6197509765625, |
|
"loss": 0.2087, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.0006708616274408996, |
|
"rewards/margins": 0.0006775633082725108, |
|
"rewards/rejected": -0.0013484249357134104, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6595744680851065e-06, |
|
"logits/chosen": -0.10303981602191925, |
|
"logits/rejected": -0.133346289396286, |
|
"logps/chosen": -511.7806091308594, |
|
"logps/rejected": -534.8740234375, |
|
"loss": 0.21, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.0018150504911318421, |
|
"rewards/margins": 0.0009007491171360016, |
|
"rewards/rejected": -0.0027157997246831656, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.191489361702128e-06, |
|
"logits/chosen": -0.22180834412574768, |
|
"logits/rejected": -0.08587469905614853, |
|
"logps/chosen": -550.4360961914062, |
|
"logps/rejected": -563.314697265625, |
|
"loss": 0.212, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.005344606935977936, |
|
"rewards/margins": 0.0011552043724805117, |
|
"rewards/rejected": -0.006499811075627804, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.723404255319149e-06, |
|
"logits/chosen": -0.1192946806550026, |
|
"logits/rejected": -0.10370689630508423, |
|
"logps/chosen": -537.7994384765625, |
|
"logps/rejected": -544.24951171875, |
|
"loss": 0.2132, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.010287364944815636, |
|
"rewards/margins": 0.003959923516958952, |
|
"rewards/rejected": -0.0142472879961133, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.255319148936171e-06, |
|
"logits/chosen": -0.12416684627532959, |
|
"logits/rejected": -0.1819939911365509, |
|
"logps/chosen": -515.0903930664062, |
|
"logps/rejected": -531.8945922851562, |
|
"loss": 0.21, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.015505559742450714, |
|
"rewards/margins": 0.003083501709625125, |
|
"rewards/rejected": -0.018589060753583908, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.787234042553192e-06, |
|
"logits/chosen": -0.2256217896938324, |
|
"logits/rejected": -0.21886661648750305, |
|
"logps/chosen": -503.46875, |
|
"logps/rejected": -517.8480224609375, |
|
"loss": 0.2086, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.02677309513092041, |
|
"rewards/margins": 0.004604019224643707, |
|
"rewards/rejected": -0.03137711435556412, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999375059004058e-06, |
|
"logits/chosen": -0.14829424023628235, |
|
"logits/rejected": -0.23846416175365448, |
|
"logps/chosen": -551.840087890625, |
|
"logps/rejected": -553.8361206054688, |
|
"loss": 0.1984, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.04528447985649109, |
|
"rewards/margins": 0.004657699726521969, |
|
"rewards/rejected": -0.04994218051433563, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9955571065548795e-06, |
|
"logits/chosen": -0.25657883286476135, |
|
"logits/rejected": -0.28871434926986694, |
|
"logps/chosen": -600.0863647460938, |
|
"logps/rejected": -634.4995727539062, |
|
"loss": 0.1992, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0750977173447609, |
|
"rewards/margins": 0.020321359857916832, |
|
"rewards/rejected": -0.09541907161474228, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9882736864879e-06, |
|
"logits/chosen": -0.2847597897052765, |
|
"logits/rejected": -0.2475430965423584, |
|
"logps/chosen": -621.210693359375, |
|
"logps/rejected": -635.7638549804688, |
|
"loss": 0.2029, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.08808945119380951, |
|
"rewards/margins": 0.016084905713796616, |
|
"rewards/rejected": -0.10417436063289642, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.977534912960124e-06, |
|
"logits/chosen": -0.19638505578041077, |
|
"logits/rejected": -0.2249457836151123, |
|
"logps/chosen": -586.533203125, |
|
"logps/rejected": -623.6920166015625, |
|
"loss": 0.1987, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.09554356336593628, |
|
"rewards/margins": 0.026452088728547096, |
|
"rewards/rejected": -0.12199564278125763, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963355698422092e-06, |
|
"logits/chosen": -0.2038092166185379, |
|
"logits/rejected": -0.2346273958683014, |
|
"logps/chosen": -569.3733520507812, |
|
"logps/rejected": -591.7291259765625, |
|
"loss": 0.1968, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.08128508180379868, |
|
"rewards/margins": 0.024774957448244095, |
|
"rewards/rejected": -0.10606005042791367, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.945755732909625e-06, |
|
"logits/chosen": -0.26792481541633606, |
|
"logits/rejected": -0.16466854512691498, |
|
"logps/chosen": -599.0369262695312, |
|
"logps/rejected": -654.7034912109375, |
|
"loss": 0.2017, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.08535520732402802, |
|
"rewards/margins": 0.025586510077118874, |
|
"rewards/rejected": -0.11094172298908234, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924759456701167e-06, |
|
"logits/chosen": -0.1864621341228485, |
|
"logits/rejected": -0.2081078737974167, |
|
"logps/chosen": -529.7960205078125, |
|
"logps/rejected": -589.953857421875, |
|
"loss": 0.199, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.06579065322875977, |
|
"rewards/margins": 0.027334023267030716, |
|
"rewards/rejected": -0.09312467277050018, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.900396026378671e-06, |
|
"logits/chosen": -0.26899057626724243, |
|
"logits/rejected": -0.22697623074054718, |
|
"logps/chosen": -629.7769165039062, |
|
"logps/rejected": -715.6519775390625, |
|
"loss": 0.1984, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.08880215883255005, |
|
"rewards/margins": 0.038344450294971466, |
|
"rewards/rejected": -0.1271466165781021, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.872699274339169e-06, |
|
"logits/chosen": -0.2153758555650711, |
|
"logits/rejected": -0.09847669303417206, |
|
"logps/chosen": -593.626953125, |
|
"logps/rejected": -643.131591796875, |
|
"loss": 0.2048, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.08825898915529251, |
|
"rewards/margins": 0.031147807836532593, |
|
"rewards/rejected": -0.1194067969918251, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8417076618132434e-06, |
|
"logits/chosen": -0.12460646778345108, |
|
"logits/rejected": -0.21595044434070587, |
|
"logps/chosen": -633.2433471679688, |
|
"logps/rejected": -660.8668212890625, |
|
"loss": 0.2002, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.09575756639242172, |
|
"rewards/margins": 0.027992555871605873, |
|
"rewards/rejected": -0.12375012785196304, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.807464225455655e-06, |
|
"logits/chosen": -0.14323315024375916, |
|
"logits/rejected": -0.2139279544353485, |
|
"logps/chosen": -578.2418212890625, |
|
"logps/rejected": -605.6033325195312, |
|
"loss": 0.2023, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.08724905550479889, |
|
"rewards/margins": 0.01588294841349125, |
|
"rewards/rejected": -0.1031319871544838, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.770016517582283e-06, |
|
"logits/chosen": -0.15528282523155212, |
|
"logits/rejected": -0.15554766356945038, |
|
"logps/chosen": -628.6737060546875, |
|
"logps/rejected": -688.3468017578125, |
|
"loss": 0.1956, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.08062631636857986, |
|
"rewards/margins": 0.026851017028093338, |
|
"rewards/rejected": -0.1074773296713829, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7294165401363616e-06, |
|
"logits/chosen": -0.18030421435832977, |
|
"logits/rejected": -0.06279157847166061, |
|
"logps/chosen": -651.3761596679688, |
|
"logps/rejected": -689.3553466796875, |
|
"loss": 0.1887, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.09219054132699966, |
|
"rewards/margins": 0.028040152043104172, |
|
"rewards/rejected": -0.12023069709539413, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68572067247573e-06, |
|
"logits/chosen": -0.16874362528324127, |
|
"logits/rejected": -0.1639997959136963, |
|
"logps/chosen": -555.4114990234375, |
|
"logps/rejected": -628.8604736328125, |
|
"loss": 0.1989, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.1045011430978775, |
|
"rewards/margins": 0.02383086457848549, |
|
"rewards/rejected": -0.1283320039510727, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638989593081364e-06, |
|
"logits/chosen": -0.12631872296333313, |
|
"logits/rejected": -0.13256661593914032, |
|
"logps/chosen": -553.3485717773438, |
|
"logps/rejected": -603.0968627929688, |
|
"loss": 0.1915, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.08940593898296356, |
|
"rewards/margins": 0.04212746024131775, |
|
"rewards/rejected": -0.1315334141254425, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5892881952959015e-06, |
|
"logits/chosen": -0.1083054393529892, |
|
"logits/rejected": -0.0742143839597702, |
|
"logps/chosen": -599.7116088867188, |
|
"logps/rejected": -652.4061279296875, |
|
"loss": 0.1884, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.09931150823831558, |
|
"rewards/margins": 0.03657541796565056, |
|
"rewards/rejected": -0.13588692247867584, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536685497209182e-06, |
|
"logits/chosen": -0.11171998083591461, |
|
"logits/rejected": -0.16903842985630035, |
|
"logps/chosen": -593.5734252929688, |
|
"logps/rejected": -672.16552734375, |
|
"loss": 0.1929, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.09612639248371124, |
|
"rewards/margins": 0.05510964244604111, |
|
"rewards/rejected": -0.15123602747917175, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.481254545815943e-06, |
|
"logits/chosen": -0.09803778678178787, |
|
"logits/rejected": -0.08885441720485687, |
|
"logps/chosen": -665.0200805664062, |
|
"logps/rejected": -690.63818359375, |
|
"loss": 0.2074, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.08989432454109192, |
|
"rewards/margins": 0.02165227010846138, |
|
"rewards/rejected": -0.111546590924263, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.42307231557875e-06, |
|
"logits/chosen": -0.005570247769355774, |
|
"logits/rejected": -0.13455268740653992, |
|
"logps/chosen": -609.916748046875, |
|
"logps/rejected": -643.1370849609375, |
|
"loss": 0.1925, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.06449146568775177, |
|
"rewards/margins": 0.022723758593201637, |
|
"rewards/rejected": -0.08721521496772766, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3622196015370305e-06, |
|
"logits/chosen": -0.027301525697112083, |
|
"logits/rejected": -0.0810176208615303, |
|
"logps/chosen": -541.7484130859375, |
|
"logps/rejected": -617.4515380859375, |
|
"loss": 0.1958, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.062103599309921265, |
|
"rewards/margins": 0.0201987586915493, |
|
"rewards/rejected": -0.08230235427618027, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.298780907110648e-06, |
|
"logits/chosen": 0.005954580847173929, |
|
"logits/rejected": -0.03331022337079048, |
|
"logps/chosen": -575.2536010742188, |
|
"logps/rejected": -629.2679443359375, |
|
"loss": 0.1965, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.07110811769962311, |
|
"rewards/margins": 0.021345119923353195, |
|
"rewards/rejected": -0.0924532413482666, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.23284432675381e-06, |
|
"logits/chosen": -0.05120337754487991, |
|
"logits/rejected": -0.07742851972579956, |
|
"logps/chosen": -555.53662109375, |
|
"logps/rejected": -598.7825927734375, |
|
"loss": 0.1928, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.07844728976488113, |
|
"rewards/margins": 0.03324751555919647, |
|
"rewards/rejected": -0.11169479787349701, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.164501423622277e-06, |
|
"logits/chosen": -0.10243277251720428, |
|
"logits/rejected": -0.03584013134241104, |
|
"logps/chosen": -573.73046875, |
|
"logps/rejected": -552.7764892578125, |
|
"loss": 0.206, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.06825848668813705, |
|
"rewards/margins": 0.009243585169315338, |
|
"rewards/rejected": -0.0775020569562912, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.0938471024237355e-06, |
|
"logits/chosen": -0.028592532500624657, |
|
"logits/rejected": 0.022961582988500595, |
|
"logps/chosen": -559.4745483398438, |
|
"logps/rejected": -626.19482421875, |
|
"loss": 0.1891, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.05221911519765854, |
|
"rewards/margins": 0.037141233682632446, |
|
"rewards/rejected": -0.08936034888029099, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020979477627907e-06, |
|
"logits/chosen": 0.02807508036494255, |
|
"logits/rejected": -0.025245526805520058, |
|
"logps/chosen": -576.9495239257812, |
|
"logps/rejected": -612.4227905273438, |
|
"loss": 0.1949, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.04981788620352745, |
|
"rewards/margins": 0.029762808233499527, |
|
"rewards/rejected": -0.07958068698644638, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9459997372194105e-06, |
|
"logits/chosen": 0.014679011888802052, |
|
"logits/rejected": 0.05258216708898544, |
|
"logps/chosen": -537.677978515625, |
|
"logps/rejected": -593.5046997070312, |
|
"loss": 0.1982, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.063970185816288, |
|
"rewards/margins": 0.030909085646271706, |
|
"rewards/rejected": -0.09487926214933395, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.869012002182573e-06, |
|
"logits/chosen": -0.10759621858596802, |
|
"logits/rejected": -0.07801146060228348, |
|
"logps/chosen": -577.882568359375, |
|
"logps/rejected": -652.2096557617188, |
|
"loss": 0.1919, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.07402776181697845, |
|
"rewards/margins": 0.038211267441511154, |
|
"rewards/rejected": -0.1122390404343605, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.7901231819133104e-06, |
|
"logits/chosen": -0.02245343290269375, |
|
"logits/rejected": 0.03868962079286575, |
|
"logps/chosen": -616.5308837890625, |
|
"logps/rejected": -642.6898193359375, |
|
"loss": 0.1978, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.08398912847042084, |
|
"rewards/margins": 0.02132398448884487, |
|
"rewards/rejected": -0.10531310737133026, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.709442825758875e-06, |
|
"logits/chosen": -0.07354136556386948, |
|
"logits/rejected": -0.04209035634994507, |
|
"logps/chosen": -616.562744140625, |
|
"logps/rejected": -685.1181030273438, |
|
"loss": 0.2001, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0954984650015831, |
|
"rewards/margins": 0.04605044052004814, |
|
"rewards/rejected": -0.14154890179634094, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6270829708916113e-06, |
|
"logits/chosen": -0.11906121671199799, |
|
"logits/rejected": -0.056663453578948975, |
|
"logps/chosen": -596.8269653320312, |
|
"logps/rejected": -644.7449340820312, |
|
"loss": 0.1951, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.08853301405906677, |
|
"rewards/margins": 0.0294196717441082, |
|
"rewards/rejected": -0.11795268207788467, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543157986727991e-06, |
|
"logits/chosen": 0.013636293821036816, |
|
"logits/rejected": -0.04586212337017059, |
|
"logps/chosen": -599.9013671875, |
|
"logps/rejected": -657.8106079101562, |
|
"loss": 0.2035, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.09106438606977463, |
|
"rewards/margins": 0.04611728712916374, |
|
"rewards/rejected": -0.13718166947364807, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4577844161089614e-06, |
|
"logits/chosen": 0.08837394416332245, |
|
"logits/rejected": -0.07880507409572601, |
|
"logps/chosen": -598.9387817382812, |
|
"logps/rejected": -617.7493286132812, |
|
"loss": 0.2047, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.07388485968112946, |
|
"rewards/margins": 0.019855108112096786, |
|
"rewards/rejected": -0.09373997151851654, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3710808134621577e-06, |
|
"logits/chosen": -0.005928731057792902, |
|
"logits/rejected": -0.030420657247304916, |
|
"logps/chosen": -558.4488525390625, |
|
"logps/rejected": -602.2744750976562, |
|
"loss": 0.2045, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.06963202357292175, |
|
"rewards/margins": 0.03080623783171177, |
|
"rewards/rejected": -0.10043825954198837, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2831675801707126e-06, |
|
"logits/chosen": -0.06051339581608772, |
|
"logits/rejected": 0.0010267883772030473, |
|
"logps/chosen": -594.7051391601562, |
|
"logps/rejected": -662.65625, |
|
"loss": 0.1935, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.07640456408262253, |
|
"rewards/margins": 0.04043372720479965, |
|
"rewards/rejected": -0.11683829128742218, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.194166797377289e-06, |
|
"logits/chosen": -0.07280877977609634, |
|
"logits/rejected": -0.03391597419977188, |
|
"logps/chosen": -590.2994384765625, |
|
"logps/rejected": -659.5791015625, |
|
"loss": 0.1965, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.068049356341362, |
|
"rewards/margins": 0.03281210735440254, |
|
"rewards/rejected": -0.10086147487163544, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.104202056455501e-06, |
|
"logits/chosen": -0.00242437282577157, |
|
"logits/rejected": -0.059494733810424805, |
|
"logps/chosen": -576.4191284179688, |
|
"logps/rejected": -637.6677856445312, |
|
"loss": 0.189, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.06889624148607254, |
|
"rewards/margins": 0.041952721774578094, |
|
"rewards/rejected": -0.11084897816181183, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.013398287384144e-06, |
|
"logits/chosen": 0.023568499833345413, |
|
"logits/rejected": -0.002035862300544977, |
|
"logps/chosen": -549.110107421875, |
|
"logps/rejected": -576.5599365234375, |
|
"loss": 0.1978, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.06544467061758041, |
|
"rewards/margins": 0.02002917230129242, |
|
"rewards/rejected": -0.08547384291887283, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9218815852625717e-06, |
|
"logits/chosen": -0.07891889661550522, |
|
"logits/rejected": 0.012334518134593964, |
|
"logps/chosen": -603.6246337890625, |
|
"logps/rejected": -670.80615234375, |
|
"loss": 0.1957, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.07147827744483948, |
|
"rewards/margins": 0.032703179866075516, |
|
"rewards/rejected": -0.10418146848678589, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.829779035208113e-06, |
|
"logits/chosen": 0.04133855551481247, |
|
"logits/rejected": -0.06480925530195236, |
|
"logps/chosen": -609.1380004882812, |
|
"logps/rejected": -640.5394287109375, |
|
"loss": 0.1896, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.07929681241512299, |
|
"rewards/margins": 0.025448182597756386, |
|
"rewards/rejected": -0.10474499315023422, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.737218535878705e-06, |
|
"logits/chosen": 0.053513627499341965, |
|
"logits/rejected": -0.0014798849588260055, |
|
"logps/chosen": -560.655517578125, |
|
"logps/rejected": -680.7060546875, |
|
"loss": 0.1833, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.06273628771305084, |
|
"rewards/margins": 0.05317532271146774, |
|
"rewards/rejected": -0.11591160297393799, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.64432862186579e-06, |
|
"logits/chosen": -0.02448629029095173, |
|
"logits/rejected": 0.0629236251115799, |
|
"logps/chosen": -592.8604125976562, |
|
"logps/rejected": -623.6454467773438, |
|
"loss": 0.188, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.07425443828105927, |
|
"rewards/margins": 0.044731441885232925, |
|
"rewards/rejected": -0.1189858689904213, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.551238285204126e-06, |
|
"logits/chosen": -0.08944495767354965, |
|
"logits/rejected": 0.10957720130681992, |
|
"logps/chosen": -582.0028076171875, |
|
"logps/rejected": -623.3414306640625, |
|
"loss": 0.1852, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.08551887422800064, |
|
"rewards/margins": 0.04520031064748764, |
|
"rewards/rejected": -0.1307191699743271, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4580767962463688e-06, |
|
"logits/chosen": -0.06701700389385223, |
|
"logits/rejected": -0.07697690278291702, |
|
"logps/chosen": -553.2176513671875, |
|
"logps/rejected": -593.4279174804688, |
|
"loss": 0.199, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.09521619975566864, |
|
"rewards/margins": 0.033401116728782654, |
|
"rewards/rejected": -0.1286173164844513, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3649735241511546e-06, |
|
"logits/chosen": -0.0331454835832119, |
|
"logits/rejected": -0.10423725843429565, |
|
"logps/chosen": -543.7550048828125, |
|
"logps/rejected": -637.5558471679688, |
|
"loss": 0.1833, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.07857955992221832, |
|
"rewards/margins": 0.043745510280132294, |
|
"rewards/rejected": -0.12232507765293121, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2720577572339914e-06, |
|
"logits/chosen": -0.04820919781923294, |
|
"logits/rejected": -0.08856736123561859, |
|
"logps/chosen": -550.280517578125, |
|
"logps/rejected": -695.6846313476562, |
|
"loss": 0.182, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0971294417977333, |
|
"rewards/margins": 0.06545992940664291, |
|
"rewards/rejected": -0.16258935630321503, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1794585234303995e-06, |
|
"logits/chosen": -0.06888549029827118, |
|
"logits/rejected": -0.019281355664134026, |
|
"logps/chosen": -631.0794067382812, |
|
"logps/rejected": -681.6578369140625, |
|
"loss": 0.1999, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0913894921541214, |
|
"rewards/margins": 0.034925952553749084, |
|
"rewards/rejected": -0.12631544470787048, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0873044111206407e-06, |
|
"logits/chosen": 0.05780111625790596, |
|
"logits/rejected": -0.08857734501361847, |
|
"logps/chosen": -554.0203857421875, |
|
"logps/rejected": -672.1288452148438, |
|
"loss": 0.1885, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08766164630651474, |
|
"rewards/margins": 0.05516546964645386, |
|
"rewards/rejected": -0.1428271234035492, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9957233905648293e-06, |
|
"logits/chosen": -0.07425542920827866, |
|
"logits/rejected": -0.14232522249221802, |
|
"logps/chosen": -590.5303955078125, |
|
"logps/rejected": -634.2200317382812, |
|
"loss": 0.1922, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.08620771020650864, |
|
"rewards/margins": 0.02542172744870186, |
|
"rewards/rejected": -0.1116294413805008, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.904842636196402e-06, |
|
"logits/chosen": -0.04322206228971481, |
|
"logits/rejected": 0.028172429651021957, |
|
"logps/chosen": -600.7364501953125, |
|
"logps/rejected": -676.2041625976562, |
|
"loss": 0.1917, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.07297880947589874, |
|
"rewards/margins": 0.03739452734589577, |
|
"rewards/rejected": -0.11037333309650421, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.814788350020726e-06, |
|
"logits/chosen": 0.03321009874343872, |
|
"logits/rejected": -0.012934012338519096, |
|
"logps/chosen": -562.2665405273438, |
|
"logps/rejected": -677.3361206054688, |
|
"loss": 0.1969, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.06490649282932281, |
|
"rewards/margins": 0.03305616229772568, |
|
"rewards/rejected": -0.09796266257762909, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.725685586364051e-06, |
|
"logits/chosen": -0.0036629363894462585, |
|
"logits/rejected": 0.04252059385180473, |
|
"logps/chosen": -574.91064453125, |
|
"logps/rejected": -599.8869018554688, |
|
"loss": 0.1941, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.06440480798482895, |
|
"rewards/margins": 0.02049572765827179, |
|
"rewards/rejected": -0.08490053564310074, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6376580782162172e-06, |
|
"logits/chosen": -0.028811585158109665, |
|
"logits/rejected": -0.0507902130484581, |
|
"logps/chosen": -614.2153930664062, |
|
"logps/rejected": -697.7538452148438, |
|
"loss": 0.1922, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.07627642154693604, |
|
"rewards/margins": 0.028858337551355362, |
|
"rewards/rejected": -0.1051347628235817, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.550828065408227e-06, |
|
"logits/chosen": -0.06321687996387482, |
|
"logits/rejected": 0.04666703939437866, |
|
"logps/chosen": -562.6372680664062, |
|
"logps/rejected": -623.3756713867188, |
|
"loss": 0.1964, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.07641318440437317, |
|
"rewards/margins": 0.02390345185995102, |
|
"rewards/rejected": -0.10031662881374359, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4653161248633053e-06, |
|
"logits/chosen": -0.03132300823926926, |
|
"logits/rejected": 0.05715359374880791, |
|
"logps/chosen": -556.3076171875, |
|
"logps/rejected": -603.981689453125, |
|
"loss": 0.188, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.0778612419962883, |
|
"rewards/margins": 0.032999925315380096, |
|
"rewards/rejected": -0.1108611598610878, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.381241003157162e-06, |
|
"logits/chosen": -0.0160093754529953, |
|
"logits/rejected": -0.033678505569696426, |
|
"logps/chosen": -561.7026977539062, |
|
"logps/rejected": -632.2767333984375, |
|
"loss": 0.1998, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.08461053669452667, |
|
"rewards/margins": 0.027624454349279404, |
|
"rewards/rejected": -0.11223499476909637, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.298719451619979e-06, |
|
"logits/chosen": 0.033716317266225815, |
|
"logits/rejected": -0.05850861221551895, |
|
"logps/chosen": -576.638916015625, |
|
"logps/rejected": -658.7437133789062, |
|
"loss": 0.1877, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.07561091333627701, |
|
"rewards/margins": 0.035250525921583176, |
|
"rewards/rejected": -0.11086144298315048, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2178660642091036e-06, |
|
"logits/chosen": -0.0035046476405113935, |
|
"logits/rejected": -0.01969769224524498, |
|
"logps/chosen": -594.3090209960938, |
|
"logps/rejected": -659.2559814453125, |
|
"loss": 0.1831, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.07316828519105911, |
|
"rewards/margins": 0.05110269784927368, |
|
"rewards/rejected": -0.12427099049091339, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1387931183775821e-06, |
|
"logits/chosen": 0.015228897333145142, |
|
"logits/rejected": -0.07918987423181534, |
|
"logps/chosen": -573.0462646484375, |
|
"logps/rejected": -665.521728515625, |
|
"loss": 0.1846, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.060265183448791504, |
|
"rewards/margins": 0.053003955632448196, |
|
"rewards/rejected": -0.1132691353559494, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.061610419159532e-06, |
|
"logits/chosen": 0.04909176006913185, |
|
"logits/rejected": 0.0019339825958013535, |
|
"logps/chosen": -591.9546508789062, |
|
"logps/rejected": -646.3989868164062, |
|
"loss": 0.1872, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.07109365612268448, |
|
"rewards/margins": 0.050017982721328735, |
|
"rewards/rejected": -0.12111164629459381, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.864251466888364e-07, |
|
"logits/chosen": -0.06201664358377457, |
|
"logits/rejected": 0.021130381152033806, |
|
"logps/chosen": -599.9959716796875, |
|
"logps/rejected": -649.0542602539062, |
|
"loss": 0.1803, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.07730476558208466, |
|
"rewards/margins": 0.035370949655771255, |
|
"rewards/rejected": -0.11267571151256561, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.133417073629288e-07, |
|
"logits/chosen": -0.060487646609544754, |
|
"logits/rejected": -0.031666141003370285, |
|
"logps/chosen": -546.1062622070312, |
|
"logps/rejected": -622.996337890625, |
|
"loss": 0.1898, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.07208921015262604, |
|
"rewards/margins": 0.048208087682724, |
|
"rewards/rejected": -0.12029729783535004, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.424615888583332e-07, |
|
"logits/chosen": -0.043236102908849716, |
|
"logits/rejected": 0.010083493776619434, |
|
"logps/chosen": -565.9916381835938, |
|
"logps/rejected": -666.7619018554688, |
|
"loss": 0.1877, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.07324135303497314, |
|
"rewards/margins": 0.0542600154876709, |
|
"rewards/rejected": -0.12750136852264404, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.738832191993092e-07, |
|
"logits/chosen": -0.03003951907157898, |
|
"logits/rejected": -0.023655174300074577, |
|
"logps/chosen": -593.333740234375, |
|
"logps/rejected": -679.3707275390625, |
|
"loss": 0.1821, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.07741405069828033, |
|
"rewards/margins": 0.040637485682964325, |
|
"rewards/rejected": -0.11805154383182526, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.077018300752917e-07, |
|
"logits/chosen": -0.10231657326221466, |
|
"logits/rejected": -0.00636244285851717, |
|
"logps/chosen": -564.58154296875, |
|
"logps/rejected": -615.8031005859375, |
|
"loss": 0.1959, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.07730068266391754, |
|
"rewards/margins": 0.038891423493623734, |
|
"rewards/rejected": -0.11619208753108978, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.440093245969342e-07, |
|
"logits/chosen": -0.12688763439655304, |
|
"logits/rejected": 0.07261992990970612, |
|
"logps/chosen": -589.0942993164062, |
|
"logps/rejected": -604.7650756835938, |
|
"loss": 0.193, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09481467306613922, |
|
"rewards/margins": 0.03096696175634861, |
|
"rewards/rejected": -0.12578162550926208, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.828941496744075e-07, |
|
"logits/chosen": -0.0817611813545227, |
|
"logits/rejected": -0.010524662211537361, |
|
"logps/chosen": -537.4359130859375, |
|
"logps/rejected": -617.6580810546875, |
|
"loss": 0.1898, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.08204906433820724, |
|
"rewards/margins": 0.05059989541769028, |
|
"rewards/rejected": -0.13264895975589752, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.244411731951671e-07, |
|
"logits/chosen": -0.08540595322847366, |
|
"logits/rejected": -0.019884133711457253, |
|
"logps/chosen": -569.3452758789062, |
|
"logps/rejected": -623.3587646484375, |
|
"loss": 0.1916, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.07342539727687836, |
|
"rewards/margins": 0.03685514256358147, |
|
"rewards/rejected": -0.11028053611516953, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.6873156617173594e-07, |
|
"logits/chosen": -0.0825069323182106, |
|
"logits/rejected": 0.03379444032907486, |
|
"logps/chosen": -560.5318603515625, |
|
"logps/rejected": -629.8617553710938, |
|
"loss": 0.1859, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.07376088947057724, |
|
"rewards/margins": 0.036222193390131, |
|
"rewards/rejected": -0.10998308658599854, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1584269002318653e-07, |
|
"logits/chosen": 0.015585740096867085, |
|
"logits/rejected": -0.09954878687858582, |
|
"logps/chosen": -536.1882934570312, |
|
"logps/rejected": -646.0011596679688, |
|
"loss": 0.1864, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08902934193611145, |
|
"rewards/margins": 0.054770153015851974, |
|
"rewards/rejected": -0.14379946887493134, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.658479891468258e-07, |
|
"logits/chosen": -0.0846419557929039, |
|
"logits/rejected": -0.027601266279816628, |
|
"logps/chosen": -524.1680297851562, |
|
"logps/rejected": -655.0399169921875, |
|
"loss": 0.1905, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.06788254529237747, |
|
"rewards/margins": 0.07369416952133179, |
|
"rewards/rejected": -0.14157672226428986, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.18816888929272e-07, |
|
"logits/chosen": -0.04564080387353897, |
|
"logits/rejected": -0.056423623114824295, |
|
"logps/chosen": -556.4688720703125, |
|
"logps/rejected": -622.6414794921875, |
|
"loss": 0.1852, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0697588101029396, |
|
"rewards/margins": 0.047328755259513855, |
|
"rewards/rejected": -0.11708758026361465, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.748146993385484e-07, |
|
"logits/chosen": -0.00408951798453927, |
|
"logits/rejected": -0.05295505374670029, |
|
"logps/chosen": -588.3592529296875, |
|
"logps/rejected": -639.141845703125, |
|
"loss": 0.1884, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08244818449020386, |
|
"rewards/margins": 0.04398036748170853, |
|
"rewards/rejected": -0.1264285445213318, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3390252423108077e-07, |
|
"logits/chosen": -0.023698529228568077, |
|
"logits/rejected": -0.033614348620176315, |
|
"logps/chosen": -550.3116455078125, |
|
"logps/rejected": -649.8366088867188, |
|
"loss": 0.1813, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.06834039092063904, |
|
"rewards/margins": 0.056421488523483276, |
|
"rewards/rejected": -0.12476189434528351, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.961371764995243e-07, |
|
"logits/chosen": -0.020034242421388626, |
|
"logits/rejected": -0.053077150136232376, |
|
"logps/chosen": -530.62744140625, |
|
"logps/rejected": -573.3870239257812, |
|
"loss": 0.1829, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.07050155103206635, |
|
"rewards/margins": 0.03381948173046112, |
|
"rewards/rejected": -0.10432104766368866, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.61571099179261e-07, |
|
"logits/chosen": -0.03530939295887947, |
|
"logits/rejected": -0.05062105506658554, |
|
"logps/chosen": -612.7421875, |
|
"logps/rejected": -640.69580078125, |
|
"loss": 0.1912, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.08575752377510071, |
|
"rewards/margins": 0.034512124955654144, |
|
"rewards/rejected": -0.12026965618133545, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3025229262312367e-07, |
|
"logits/chosen": -0.02685430273413658, |
|
"logits/rejected": -0.050860174000263214, |
|
"logps/chosen": -561.8297119140625, |
|
"logps/rejected": -601.8411865234375, |
|
"loss": 0.1938, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.07358418405056, |
|
"rewards/margins": 0.03662911430001259, |
|
"rewards/rejected": -0.11021329462528229, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0222424784546853e-07, |
|
"logits/chosen": -0.0493302047252655, |
|
"logits/rejected": 0.006892223842442036, |
|
"logps/chosen": -599.8060913085938, |
|
"logps/rejected": -692.0012817382812, |
|
"loss": 0.1973, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.07736798375844955, |
|
"rewards/margins": 0.050996191799640656, |
|
"rewards/rejected": -0.1283641755580902, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.752588612816553e-08, |
|
"logits/chosen": -0.02599761262536049, |
|
"logits/rejected": -0.013713346794247627, |
|
"logps/chosen": -580.1739501953125, |
|
"logps/rejected": -671.7718505859375, |
|
"loss": 0.1925, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.07879254966974258, |
|
"rewards/margins": 0.05689109489321709, |
|
"rewards/rejected": -0.13568365573883057, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.619150497236991e-08, |
|
"logits/chosen": 0.02115291729569435, |
|
"logits/rejected": -0.052051056176424026, |
|
"logps/chosen": -584.3345336914062, |
|
"logps/rejected": -611.1611328125, |
|
"loss": 0.1831, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0652504563331604, |
|
"rewards/margins": 0.03656047582626343, |
|
"rewards/rejected": -0.10181091725826263, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.825073047112743e-08, |
|
"logits/chosen": 0.001644585281610489, |
|
"logits/rejected": -0.010257053188979626, |
|
"logps/chosen": -632.9583740234375, |
|
"logps/rejected": -683.1160888671875, |
|
"loss": 0.204, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.08045943081378937, |
|
"rewards/margins": 0.047462526708841324, |
|
"rewards/rejected": -0.1279219686985016, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.372847616895685e-08, |
|
"logits/chosen": -0.13961699604988098, |
|
"logits/rejected": -0.039934299886226654, |
|
"logps/chosen": -542.2841186523438, |
|
"logps/rejected": -602.2158813476562, |
|
"loss": 0.1903, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.08329685032367706, |
|
"rewards/margins": 0.034699175506830215, |
|
"rewards/rejected": -0.11799603700637817, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.264490846553279e-08, |
|
"logits/chosen": -0.06002987176179886, |
|
"logits/rejected": -0.00015243441157508641, |
|
"logps/chosen": -576.7723388671875, |
|
"logps/rejected": -622.7515869140625, |
|
"loss": 0.1949, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0770721584558487, |
|
"rewards/margins": 0.03859367594122887, |
|
"rewards/rejected": -0.11566583812236786, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.015418611516165e-09, |
|
"logits/chosen": 0.030324691906571388, |
|
"logits/rejected": -0.10050855576992035, |
|
"logps/chosen": -561.7693481445312, |
|
"logps/rejected": -647.3151245117188, |
|
"loss": 0.1875, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.07844243943691254, |
|
"rewards/margins": 0.04898718744516373, |
|
"rewards/rejected": -0.12742963433265686, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.506013354186993e-10, |
|
"logits/chosen": 0.004024127032607794, |
|
"logits/rejected": -0.07468675822019577, |
|
"logps/chosen": -574.8939208984375, |
|
"logps/rejected": -618.0397338867188, |
|
"loss": 0.1861, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.08176033198833466, |
|
"rewards/margins": 0.037909943610429764, |
|
"rewards/rejected": -0.11967027187347412, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 937, |
|
"total_flos": 0.0, |
|
"train_loss": 0.19473119514220044, |
|
"train_runtime": 7963.8235, |
|
"train_samples_per_second": 3.767, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 937, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|