|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.1666858196258545, |
|
"logits/rejected": -2.182244300842285, |
|
"logps/chosen": -12.368609428405762, |
|
"logps/rejected": -24.687644958496094, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 10.0 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": -2.2281553745269775, |
|
"logits/rejected": -2.276446580886841, |
|
"logps/chosen": -57.036190032958984, |
|
"logps/rejected": -66.88007354736328, |
|
"loss": 0.6927, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.24013157188892365, |
|
"rewards/chosen": 0.003924594726413488, |
|
"rewards/margins": 0.0009102027979679406, |
|
"rewards/rejected": 0.0030143915209919214, |
|
"step": 20, |
|
"use_label": 170.0 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.6328125, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -2.2738099098205566, |
|
"logits/rejected": -2.2623789310455322, |
|
"logps/chosen": -54.78137969970703, |
|
"logps/rejected": -67.2437515258789, |
|
"loss": 0.6914, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.24687500298023224, |
|
"rewards/chosen": 0.01747792772948742, |
|
"rewards/margins": 0.001674558618105948, |
|
"rewards/rejected": 0.015803368762135506, |
|
"step": 40, |
|
"use_label": 482.0 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.3237431049346924, |
|
"logits/rejected": -2.321906089782715, |
|
"logps/chosen": -75.5770034790039, |
|
"logps/rejected": -87.68544006347656, |
|
"loss": 0.6885, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.031676117330789566, |
|
"rewards/margins": 0.009719676338136196, |
|
"rewards/rejected": 0.021956440061330795, |
|
"step": 60, |
|
"use_label": 802.0 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -2.2948005199432373, |
|
"logits/rejected": -2.2623462677001953, |
|
"logps/chosen": -79.29240417480469, |
|
"logps/rejected": -83.04844665527344, |
|
"loss": 0.6876, |
|
"pred_label": 5.800000190734863, |
|
"rewards/accuracies": 0.3343749940395355, |
|
"rewards/chosen": 0.016009245067834854, |
|
"rewards/margins": 0.018887853249907494, |
|
"rewards/rejected": -0.0028786074835807085, |
|
"step": 80, |
|
"use_label": 1116.199951171875 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 4.9997324926814375e-06, |
|
"logits/chosen": -2.2056884765625, |
|
"logits/rejected": -2.210036039352417, |
|
"logps/chosen": -68.87937927246094, |
|
"logps/rejected": -77.87590026855469, |
|
"loss": 0.6876, |
|
"pred_label": 27.537500381469727, |
|
"rewards/accuracies": 0.34062498807907104, |
|
"rewards/chosen": -0.010471501387655735, |
|
"rewards/margins": 0.03584115579724312, |
|
"rewards/rejected": -0.04631265625357628, |
|
"step": 100, |
|
"use_label": 1414.4625244140625 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -2.1076083183288574, |
|
"eval_logits/rejected": -2.0761499404907227, |
|
"eval_logps/chosen": -74.44951629638672, |
|
"eval_logps/rejected": -85.2883071899414, |
|
"eval_loss": 0.6895647048950195, |
|
"eval_pred_label": 89.14286041259766, |
|
"eval_rewards/accuracies": 0.335317462682724, |
|
"eval_rewards/chosen": -0.05548960343003273, |
|
"eval_rewards/margins": 0.04341282695531845, |
|
"eval_rewards/rejected": -0.09890241920948029, |
|
"eval_runtime": 247.5952, |
|
"eval_samples_per_second": 8.078, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 1766.857177734375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 4.9903757462135984e-06, |
|
"logits/chosen": -2.2542896270751953, |
|
"logits/rejected": -2.1902401447296143, |
|
"logps/chosen": -70.2941665649414, |
|
"logps/rejected": -84.7874755859375, |
|
"loss": 0.6884, |
|
"pred_label": 155.6374969482422, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.023759985342621803, |
|
"rewards/margins": 0.051492441445589066, |
|
"rewards/rejected": -0.07525241374969482, |
|
"step": 120, |
|
"use_label": 2110.362548828125 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 4.967700826904229e-06, |
|
"logits/chosen": -2.1823272705078125, |
|
"logits/rejected": -2.210157632827759, |
|
"logps/chosen": -61.80498504638672, |
|
"logps/rejected": -76.43424224853516, |
|
"loss": 0.6907, |
|
"pred_label": 204.22500610351562, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": -0.029314354062080383, |
|
"rewards/margins": 0.036702848970890045, |
|
"rewards/rejected": -0.06601719558238983, |
|
"step": 140, |
|
"use_label": 2381.77490234375 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 4.931828996974498e-06, |
|
"logits/chosen": -2.251568555831909, |
|
"logits/rejected": -2.220432996749878, |
|
"logps/chosen": -66.60148620605469, |
|
"logps/rejected": -71.53702545166016, |
|
"loss": 0.69, |
|
"pred_label": 257.2124938964844, |
|
"rewards/accuracies": 0.3343749940395355, |
|
"rewards/chosen": -0.020524730905890465, |
|
"rewards/margins": 0.05932433158159256, |
|
"rewards/rejected": -0.07984906435012817, |
|
"step": 160, |
|
"use_label": 2648.78759765625 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 4.882952093833628e-06, |
|
"logits/chosen": -2.114015817642212, |
|
"logits/rejected": -2.126950740814209, |
|
"logps/chosen": -66.40071868896484, |
|
"logps/rejected": -78.54503631591797, |
|
"loss": 0.6901, |
|
"pred_label": 319.9624938964844, |
|
"rewards/accuracies": 0.328125, |
|
"rewards/chosen": -0.03171534463763237, |
|
"rewards/margins": 0.0544399619102478, |
|
"rewards/rejected": -0.08615531027317047, |
|
"step": 180, |
|
"use_label": 2906.03759765625 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 4.821331504159906e-06, |
|
"logits/chosen": -2.138213872909546, |
|
"logits/rejected": -2.108750343322754, |
|
"logps/chosen": -77.92289733886719, |
|
"logps/rejected": -78.32075500488281, |
|
"loss": 0.6892, |
|
"pred_label": 383.5249938964844, |
|
"rewards/accuracies": 0.37812501192092896, |
|
"rewards/chosen": -0.009543296881020069, |
|
"rewards/margins": 0.06037301942706108, |
|
"rewards/rejected": -0.06991632282733917, |
|
"step": 200, |
|
"use_label": 3162.47509765625 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.051973581314087, |
|
"eval_logits/rejected": -2.028658390045166, |
|
"eval_logps/chosen": -69.3875503540039, |
|
"eval_logps/rejected": -80.99542999267578, |
|
"eval_loss": 0.6893584132194519, |
|
"eval_pred_label": 459.1111145019531, |
|
"eval_rewards/accuracies": 0.3492063581943512, |
|
"eval_rewards/chosen": -0.0048699695616960526, |
|
"eval_rewards/margins": 0.05110359564423561, |
|
"eval_rewards/rejected": -0.05597356706857681, |
|
"eval_runtime": 247.8689, |
|
"eval_samples_per_second": 8.069, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 3500.888916015625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 4.747296766042161e-06, |
|
"logits/chosen": -2.172316074371338, |
|
"logits/rejected": -2.1599390506744385, |
|
"logps/chosen": -73.75865173339844, |
|
"logps/rejected": -76.45826721191406, |
|
"loss": 0.6906, |
|
"pred_label": 537.4000244140625, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.017265746369957924, |
|
"rewards/margins": 0.061459798365831375, |
|
"rewards/rejected": -0.07872554659843445, |
|
"step": 220, |
|
"use_label": 3832.60009765625 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 4.661243806657256e-06, |
|
"logits/chosen": -2.1377243995666504, |
|
"logits/rejected": -2.114131450653076, |
|
"logps/chosen": -78.08522033691406, |
|
"logps/rejected": -88.16291809082031, |
|
"loss": 0.6906, |
|
"pred_label": 610.8624877929688, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.06858871877193451, |
|
"rewards/margins": 0.07855252921581268, |
|
"rewards/rejected": -0.1471412628889084, |
|
"step": 240, |
|
"use_label": 4079.137451171875 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 4.563632824908252e-06, |
|
"logits/chosen": -2.1762757301330566, |
|
"logits/rejected": -2.173243999481201, |
|
"logps/chosen": -69.33678436279297, |
|
"logps/rejected": -82.98787689208984, |
|
"loss": 0.6907, |
|
"pred_label": 682.2750244140625, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.06302420794963837, |
|
"rewards/margins": 0.0732887014746666, |
|
"rewards/rejected": -0.13631291687488556, |
|
"step": 260, |
|
"use_label": 4327.72509765625 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.625, |
|
"learning_rate": 4.454985830346574e-06, |
|
"logits/chosen": -2.16465425491333, |
|
"logits/rejected": -2.1788923740386963, |
|
"logps/chosen": -74.41441345214844, |
|
"logps/rejected": -78.55416870117188, |
|
"loss": 0.6892, |
|
"pred_label": 749.125, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.06083650514483452, |
|
"rewards/margins": 0.04520425945520401, |
|
"rewards/rejected": -0.10604077577590942, |
|
"step": 280, |
|
"use_label": 4580.875 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 4.335883851539693e-06, |
|
"logits/chosen": -2.0553781986236572, |
|
"logits/rejected": -2.0573229789733887, |
|
"logps/chosen": -69.96788024902344, |
|
"logps/rejected": -80.52223205566406, |
|
"loss": 0.6904, |
|
"pred_label": 824.5499877929688, |
|
"rewards/accuracies": 0.359375, |
|
"rewards/chosen": -0.04866168648004532, |
|
"rewards/margins": 0.09801270812749863, |
|
"rewards/rejected": -0.14667439460754395, |
|
"step": 300, |
|
"use_label": 4825.4501953125 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -2.0163989067077637, |
|
"eval_logits/rejected": -1.9942671060562134, |
|
"eval_logps/chosen": -75.15243530273438, |
|
"eval_logps/rejected": -89.50163269042969, |
|
"eval_loss": 0.6908969879150391, |
|
"eval_pred_label": 923.3174438476562, |
|
"eval_rewards/accuracies": 0.3531745970249176, |
|
"eval_rewards/chosen": -0.06251893937587738, |
|
"eval_rewards/margins": 0.07851671427488327, |
|
"eval_rewards/rejected": -0.14103564620018005, |
|
"eval_runtime": 247.8241, |
|
"eval_samples_per_second": 8.07, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 5140.6826171875, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 4.206963828813555e-06, |
|
"logits/chosen": -2.065279483795166, |
|
"logits/rejected": -2.0684821605682373, |
|
"logps/chosen": -72.58639526367188, |
|
"logps/rejected": -89.45655822753906, |
|
"loss": 0.6899, |
|
"pred_label": 1033.7874755859375, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.11120834201574326, |
|
"rewards/margins": 0.0645986869931221, |
|
"rewards/rejected": -0.17580702900886536, |
|
"step": 320, |
|
"use_label": 5440.21240234375 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 4.068915207986931e-06, |
|
"logits/chosen": -2.033398151397705, |
|
"logits/rejected": -1.991502046585083, |
|
"logps/chosen": -71.1894760131836, |
|
"logps/rejected": -84.0774154663086, |
|
"loss": 0.6917, |
|
"pred_label": 1122.112548828125, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.07950185984373093, |
|
"rewards/margins": 0.08617939054965973, |
|
"rewards/rejected": -0.16568127274513245, |
|
"step": 340, |
|
"use_label": 5671.8876953125 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.84765625, |
|
"learning_rate": 3.922476253313921e-06, |
|
"logits/chosen": -2.0358688831329346, |
|
"logits/rejected": -2.0224781036376953, |
|
"logps/chosen": -76.57051849365234, |
|
"logps/rejected": -84.2589340209961, |
|
"loss": 0.6914, |
|
"pred_label": 1204.4124755859375, |
|
"rewards/accuracies": 0.31562501192092896, |
|
"rewards/chosen": -0.11715561151504517, |
|
"rewards/margins": 0.07723374664783478, |
|
"rewards/rejected": -0.19438934326171875, |
|
"step": 360, |
|
"use_label": 5909.58740234375 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 3.768430099352445e-06, |
|
"logits/chosen": -2.12782621383667, |
|
"logits/rejected": -2.086026430130005, |
|
"logps/chosen": -74.41622161865234, |
|
"logps/rejected": -85.17180633544922, |
|
"loss": 0.6918, |
|
"pred_label": 1289.9375, |
|
"rewards/accuracies": 0.3656249940395355, |
|
"rewards/chosen": -0.07592298835515976, |
|
"rewards/margins": 0.08457346260547638, |
|
"rewards/rejected": -0.16049645841121674, |
|
"step": 380, |
|
"use_label": 6144.0625 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 3.607600562872785e-06, |
|
"logits/chosen": -2.126784086227417, |
|
"logits/rejected": -2.1261298656463623, |
|
"logps/chosen": -83.82131958007812, |
|
"logps/rejected": -86.00455474853516, |
|
"loss": 0.6906, |
|
"pred_label": 1373.137451171875, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.05874443054199219, |
|
"rewards/margins": 0.06775099784135818, |
|
"rewards/rejected": -0.12649545073509216, |
|
"step": 400, |
|
"use_label": 6380.8623046875 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -2.0480618476867676, |
|
"eval_logits/rejected": -2.0248324871063232, |
|
"eval_logps/chosen": -75.26866149902344, |
|
"eval_logps/rejected": -90.80635070800781, |
|
"eval_loss": 0.6920759081840515, |
|
"eval_pred_label": 1472.5714111328125, |
|
"eval_rewards/accuracies": 0.3511904776096344, |
|
"eval_rewards/chosen": -0.06368114054203033, |
|
"eval_rewards/margins": 0.09040173143148422, |
|
"eval_rewards/rejected": -0.15408287942409515, |
|
"eval_runtime": 248.0088, |
|
"eval_samples_per_second": 8.064, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 6695.4287109375, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 3.4408477372034743e-06, |
|
"logits/chosen": -2.055358409881592, |
|
"logits/rejected": -2.068175792694092, |
|
"logps/chosen": -70.47552490234375, |
|
"logps/rejected": -79.02010345458984, |
|
"loss": 0.6903, |
|
"pred_label": 1589.0374755859375, |
|
"rewards/accuracies": 0.3656249940395355, |
|
"rewards/chosen": -0.06399895995855331, |
|
"rewards/margins": 0.0963120311498642, |
|
"rewards/rejected": -0.16031098365783691, |
|
"step": 420, |
|
"use_label": 6988.96240234375 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.95703125, |
|
"learning_rate": 3.269063392575352e-06, |
|
"logits/chosen": -2.0893940925598145, |
|
"logits/rejected": -2.09212589263916, |
|
"logps/chosen": -85.68560028076172, |
|
"logps/rejected": -87.41291809082031, |
|
"loss": 0.6912, |
|
"pred_label": 1667.6875, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.13728377223014832, |
|
"rewards/margins": 0.07875251770019531, |
|
"rewards/rejected": -0.21603628993034363, |
|
"step": 440, |
|
"use_label": 7230.3125 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 3.09316620706208e-06, |
|
"logits/chosen": -2.079465389251709, |
|
"logits/rejected": -2.091001033782959, |
|
"logps/chosen": -73.67254638671875, |
|
"logps/rejected": -81.05415344238281, |
|
"loss": 0.6916, |
|
"pred_label": 1751.75, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.0876312330365181, |
|
"rewards/margins": 0.08376732468605042, |
|
"rewards/rejected": -0.17139855027198792, |
|
"step": 460, |
|
"use_label": 7466.25 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.69921875, |
|
"learning_rate": 2.91409685362137e-06, |
|
"logits/chosen": -2.0379364490509033, |
|
"logits/rejected": -2.0492634773254395, |
|
"logps/chosen": -77.06828308105469, |
|
"logps/rejected": -89.38865661621094, |
|
"loss": 0.6912, |
|
"pred_label": 1832.6500244140625, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.06041146069765091, |
|
"rewards/margins": 0.10216375440359116, |
|
"rewards/rejected": -0.16257521510124207, |
|
"step": 480, |
|
"use_label": 7705.35009765625 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.86328125, |
|
"learning_rate": 2.7328129695107205e-06, |
|
"logits/chosen": -2.031346082687378, |
|
"logits/rejected": -2.0272762775421143, |
|
"logps/chosen": -79.55888366699219, |
|
"logps/rejected": -84.47586822509766, |
|
"loss": 0.6903, |
|
"pred_label": 1919.5374755859375, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.08177755773067474, |
|
"rewards/margins": 0.08017835766077042, |
|
"rewards/rejected": -0.16195592284202576, |
|
"step": 500, |
|
"use_label": 7938.46240234375 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.0070507526397705, |
|
"eval_logits/rejected": -1.9800992012023926, |
|
"eval_logps/chosen": -76.36968231201172, |
|
"eval_logps/rejected": -92.65614318847656, |
|
"eval_loss": 0.6914148926734924, |
|
"eval_pred_label": 2025.793701171875, |
|
"eval_rewards/accuracies": 0.3492063581943512, |
|
"eval_rewards/chosen": -0.07469133287668228, |
|
"eval_rewards/margins": 0.09788943827152252, |
|
"eval_rewards/rejected": -0.1725807636976242, |
|
"eval_runtime": 247.8554, |
|
"eval_samples_per_second": 8.069, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 8246.2060546875, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.78125, |
|
"learning_rate": 2.5502840349805074e-06, |
|
"logits/chosen": -2.026449203491211, |
|
"logits/rejected": -2.0701510906219482, |
|
"logps/chosen": -75.1209487915039, |
|
"logps/rejected": -88.01356506347656, |
|
"loss": 0.6913, |
|
"pred_label": 2148.887451171875, |
|
"rewards/accuracies": 0.3531250059604645, |
|
"rewards/chosen": -0.06801941990852356, |
|
"rewards/margins": 0.09691040217876434, |
|
"rewards/rejected": -0.1649298369884491, |
|
"step": 520, |
|
"use_label": 8533.1123046875 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 2.367486188632446e-06, |
|
"logits/chosen": -2.0245327949523926, |
|
"logits/rejected": -2.0479135513305664, |
|
"logps/chosen": -84.60169219970703, |
|
"logps/rejected": -90.6330795288086, |
|
"loss": 0.692, |
|
"pred_label": 2235.550048828125, |
|
"rewards/accuracies": 0.359375, |
|
"rewards/chosen": -0.09091995656490326, |
|
"rewards/margins": 0.11123095452785492, |
|
"rewards/rejected": -0.20215091109275818, |
|
"step": 540, |
|
"use_label": 8766.4501953125 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 2.1853970071701415e-06, |
|
"logits/chosen": -2.0177600383758545, |
|
"logits/rejected": -2.016798257827759, |
|
"logps/chosen": -78.94650268554688, |
|
"logps/rejected": -80.36412811279297, |
|
"loss": 0.6917, |
|
"pred_label": 2319.53759765625, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.10138510167598724, |
|
"rewards/margins": 0.06911652535200119, |
|
"rewards/rejected": -0.17050163447856903, |
|
"step": 560, |
|
"use_label": 9002.462890625 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.71875, |
|
"learning_rate": 2.00499027745888e-06, |
|
"logits/chosen": -2.054065704345703, |
|
"logits/rejected": -2.0555384159088135, |
|
"logps/chosen": -80.3529281616211, |
|
"logps/rejected": -95.12947082519531, |
|
"loss": 0.6919, |
|
"pred_label": 2401.675048828125, |
|
"rewards/accuracies": 0.359375, |
|
"rewards/chosen": -0.09597108513116837, |
|
"rewards/margins": 0.09131233394145966, |
|
"rewards/rejected": -0.18728342652320862, |
|
"step": 580, |
|
"use_label": 9240.3251953125 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 1.8272307888529276e-06, |
|
"logits/chosen": -2.059126377105713, |
|
"logits/rejected": -2.099806547164917, |
|
"logps/chosen": -89.58797454833984, |
|
"logps/rejected": -108.6166000366211, |
|
"loss": 0.6903, |
|
"pred_label": 2492.9375, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.12580521404743195, |
|
"rewards/margins": 0.10241512209177017, |
|
"rewards/rejected": -0.22822031378746033, |
|
"step": 600, |
|
"use_label": 9469.0625 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -1.9870026111602783, |
|
"eval_logits/rejected": -1.960112452507019, |
|
"eval_logps/chosen": -78.95431518554688, |
|
"eval_logps/rejected": -95.86695861816406, |
|
"eval_loss": 0.6917396187782288, |
|
"eval_pred_label": 2603.9365234375, |
|
"eval_rewards/accuracies": 0.3551587164402008, |
|
"eval_rewards/chosen": -0.1005377396941185, |
|
"eval_rewards/margins": 0.104151152074337, |
|
"eval_rewards/rejected": -0.2046888917684555, |
|
"eval_runtime": 247.9642, |
|
"eval_samples_per_second": 8.066, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 9772.0634765625, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 1.6530691736402317e-06, |
|
"logits/chosen": -1.9752880334854126, |
|
"logits/rejected": -2.011981964111328, |
|
"logps/chosen": -69.71615600585938, |
|
"logps/rejected": -95.88337707519531, |
|
"loss": 0.6918, |
|
"pred_label": 2726.324951171875, |
|
"rewards/accuracies": 0.34687501192092896, |
|
"rewards/chosen": -0.09408678859472275, |
|
"rewards/margins": 0.09362435340881348, |
|
"rewards/rejected": -0.18771114945411682, |
|
"step": 620, |
|
"use_label": 10059.6748046875 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 1.4834368231970922e-06, |
|
"logits/chosen": -2.0288071632385254, |
|
"logits/rejected": -2.0409998893737793, |
|
"logps/chosen": -82.56907653808594, |
|
"logps/rejected": -90.75765228271484, |
|
"loss": 0.6894, |
|
"pred_label": 2805.512451171875, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.10210500657558441, |
|
"rewards/margins": 0.10695278644561768, |
|
"rewards/rejected": -0.2090577781200409, |
|
"step": 640, |
|
"use_label": 10300.4873046875 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 1.3192409070404582e-06, |
|
"logits/chosen": -2.055405855178833, |
|
"logits/rejected": -2.0071816444396973, |
|
"logps/chosen": -77.25361633300781, |
|
"logps/rejected": -88.34065246582031, |
|
"loss": 0.6915, |
|
"pred_label": 2899.9375, |
|
"rewards/accuracies": 0.34687501192092896, |
|
"rewards/chosen": -0.11595650017261505, |
|
"rewards/margins": 0.0952102541923523, |
|
"rewards/rejected": -0.21116676926612854, |
|
"step": 660, |
|
"use_label": 10526.0625 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 1.1613595214152713e-06, |
|
"logits/chosen": -2.056795597076416, |
|
"logits/rejected": -2.071035861968994, |
|
"logps/chosen": -88.15283203125, |
|
"logps/rejected": -96.39839172363281, |
|
"loss": 0.6918, |
|
"pred_label": 2978.0625, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.12273094803094864, |
|
"rewards/margins": 0.09404005855321884, |
|
"rewards/rejected": -0.2167709767818451, |
|
"step": 680, |
|
"use_label": 10767.9375 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 1.0106369933615043e-06, |
|
"logits/chosen": -2.0782313346862793, |
|
"logits/rejected": -2.0467371940612793, |
|
"logps/chosen": -97.93621826171875, |
|
"logps/rejected": -106.91497802734375, |
|
"loss": 0.6917, |
|
"pred_label": 3075.71240234375, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.1391007900238037, |
|
"rewards/margins": 0.10766571760177612, |
|
"rewards/rejected": -0.24676652252674103, |
|
"step": 700, |
|
"use_label": 10990.287109375 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -1.9658821821212769, |
|
"eval_logits/rejected": -1.9401167631149292, |
|
"eval_logps/chosen": -80.06806182861328, |
|
"eval_logps/rejected": -97.64107513427734, |
|
"eval_loss": 0.6917343735694885, |
|
"eval_pred_label": 3195.22216796875, |
|
"eval_rewards/accuracies": 0.3511904776096344, |
|
"eval_rewards/chosen": -0.11167524009943008, |
|
"eval_rewards/margins": 0.1107548326253891, |
|
"eval_rewards/rejected": -0.2224300652742386, |
|
"eval_runtime": 247.943, |
|
"eval_samples_per_second": 8.066, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 11284.77734375, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 8.678793653740633e-07, |
|
"logits/chosen": -2.015249729156494, |
|
"logits/rejected": -2.0358498096466064, |
|
"logps/chosen": -70.9017562866211, |
|
"logps/rejected": -86.4397201538086, |
|
"loss": 0.6908, |
|
"pred_label": 3306.39990234375, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.10931293666362762, |
|
"rewards/margins": 0.0925455391407013, |
|
"rewards/rejected": -0.20185847580432892, |
|
"step": 720, |
|
"use_label": 11583.599609375 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.83203125, |
|
"learning_rate": 7.338500848029603e-07, |
|
"logits/chosen": -2.01334810256958, |
|
"logits/rejected": -2.0296788215637207, |
|
"logps/chosen": -74.19635772705078, |
|
"logps/rejected": -83.99024200439453, |
|
"loss": 0.6911, |
|
"pred_label": 3386.16259765625, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.08706559240818024, |
|
"rewards/margins": 0.11473299562931061, |
|
"rewards/rejected": -0.20179858803749084, |
|
"step": 740, |
|
"use_label": 11823.837890625 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 6.092659210462232e-07, |
|
"logits/chosen": -2.052433967590332, |
|
"logits/rejected": -2.060997724533081, |
|
"logps/chosen": -76.93110656738281, |
|
"logps/rejected": -97.30107879638672, |
|
"loss": 0.6904, |
|
"pred_label": 3466.5, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.11182014644145966, |
|
"rewards/margins": 0.07981495559215546, |
|
"rewards/rejected": -0.1916351020336151, |
|
"step": 760, |
|
"use_label": 12063.5 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 4.947931323697983e-07, |
|
"logits/chosen": -2.032320737838745, |
|
"logits/rejected": -2.047227144241333, |
|
"logps/chosen": -89.46810913085938, |
|
"logps/rejected": -95.58660125732422, |
|
"loss": 0.6913, |
|
"pred_label": 3558.875, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.11294672638177872, |
|
"rewards/margins": 0.11753211170434952, |
|
"rewards/rejected": -0.23047883808612823, |
|
"step": 780, |
|
"use_label": 12291.125 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.74609375, |
|
"learning_rate": 3.910439028537638e-07, |
|
"logits/chosen": -2.010045289993286, |
|
"logits/rejected": -1.989505410194397, |
|
"logps/chosen": -70.47514343261719, |
|
"logps/rejected": -75.11082458496094, |
|
"loss": 0.6912, |
|
"pred_label": 3649.22509765625, |
|
"rewards/accuracies": 0.3656249940395355, |
|
"rewards/chosen": -0.08034199476242065, |
|
"rewards/margins": 0.0995674580335617, |
|
"rewards/rejected": -0.17990948259830475, |
|
"step": 800, |
|
"use_label": 12520.775390625 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -1.9421576261520386, |
|
"eval_logits/rejected": -1.9144233465194702, |
|
"eval_logps/chosen": -77.5874252319336, |
|
"eval_logps/rejected": -95.20885467529297, |
|
"eval_loss": 0.6917100548744202, |
|
"eval_pred_label": 3757.174560546875, |
|
"eval_rewards/accuracies": 0.363095223903656, |
|
"eval_rewards/chosen": -0.08686873316764832, |
|
"eval_rewards/margins": 0.11123905330896378, |
|
"eval_rewards/rejected": -0.19810780882835388, |
|
"eval_runtime": 247.8932, |
|
"eval_samples_per_second": 8.068, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 12826.8251953125, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.828125, |
|
"learning_rate": 2.98573068519539e-07, |
|
"logits/chosen": -2.035728931427002, |
|
"logits/rejected": -2.029679775238037, |
|
"logps/chosen": -74.97032165527344, |
|
"logps/rejected": -84.2763900756836, |
|
"loss": 0.6908, |
|
"pred_label": 3872.199951171875, |
|
"rewards/accuracies": 0.3343749940395355, |
|
"rewards/chosen": -0.1004786491394043, |
|
"rewards/margins": 0.08142165094614029, |
|
"rewards/rejected": -0.181900292634964, |
|
"step": 820, |
|
"use_label": 13121.7998046875 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 2.178751501463036e-07, |
|
"logits/chosen": -2.0276803970336914, |
|
"logits/rejected": -2.0149848461151123, |
|
"logps/chosen": -66.70552062988281, |
|
"logps/rejected": -70.63726806640625, |
|
"loss": 0.6915, |
|
"pred_label": 3954.60009765625, |
|
"rewards/accuracies": 0.28437501192092896, |
|
"rewards/chosen": -0.08035041391849518, |
|
"rewards/margins": 0.07462439686059952, |
|
"rewards/rejected": -0.1549748182296753, |
|
"step": 840, |
|
"use_label": 13359.400390625 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 1.4938170864468636e-07, |
|
"logits/chosen": -2.048083543777466, |
|
"logits/rejected": -2.0321922302246094, |
|
"logps/chosen": -90.8042221069336, |
|
"logps/rejected": -100.8233413696289, |
|
"loss": 0.69, |
|
"pred_label": 4041.72509765625, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.0809466689825058, |
|
"rewards/margins": 0.1332779824733734, |
|
"rewards/rejected": -0.2142246663570404, |
|
"step": 860, |
|
"use_label": 13592.275390625 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.5546875, |
|
"learning_rate": 9.345903713082305e-08, |
|
"logits/chosen": -2.047487735748291, |
|
"logits/rejected": -2.034466505050659, |
|
"logps/chosen": -81.69231414794922, |
|
"logps/rejected": -101.5263442993164, |
|
"loss": 0.6915, |
|
"pred_label": 4142.625, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.09660721570253372, |
|
"rewards/margins": 0.13364934921264648, |
|
"rewards/rejected": -0.23025652766227722, |
|
"step": 880, |
|
"use_label": 13811.375 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 5.0406202043228604e-08, |
|
"logits/chosen": -1.9304163455963135, |
|
"logits/rejected": -1.9657026529312134, |
|
"logps/chosen": -75.30284118652344, |
|
"logps/rejected": -99.71704864501953, |
|
"loss": 0.6914, |
|
"pred_label": 4235.9248046875, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.08683101832866669, |
|
"rewards/margins": 0.10066400468349457, |
|
"rewards/rejected": -0.18749502301216125, |
|
"step": 900, |
|
"use_label": 14038.0751953125 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -1.939072847366333, |
|
"eval_logits/rejected": -1.9112603664398193, |
|
"eval_logps/chosen": -77.5274658203125, |
|
"eval_logps/rejected": -95.22908020019531, |
|
"eval_loss": 0.6917905211448669, |
|
"eval_pred_label": 4352.28564453125, |
|
"eval_rewards/accuracies": 0.3571428656578064, |
|
"eval_rewards/chosen": -0.08626923710107803, |
|
"eval_rewards/margins": 0.1120409369468689, |
|
"eval_rewards/rejected": -0.19831016659736633, |
|
"eval_runtime": 247.7794, |
|
"eval_samples_per_second": 8.072, |
|
"eval_steps_per_second": 0.254, |
|
"eval_use_label": 14335.7138671875, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 2.0453443778310766e-08, |
|
"logits/chosen": -1.9801095724105835, |
|
"logits/rejected": -1.9714418649673462, |
|
"logps/chosen": -63.8930778503418, |
|
"logps/rejected": -85.15528869628906, |
|
"loss": 0.6906, |
|
"pred_label": 4473.8125, |
|
"rewards/accuracies": 0.31562501192092896, |
|
"rewards/chosen": -0.06585933268070221, |
|
"rewards/margins": 0.11039040982723236, |
|
"rewards/rejected": -0.17624975740909576, |
|
"step": 920, |
|
"use_label": 14624.1875 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.8359375, |
|
"learning_rate": 3.760945397705828e-09, |
|
"logits/chosen": -1.9589160680770874, |
|
"logits/rejected": -1.9971154928207397, |
|
"logps/chosen": -74.0462646484375, |
|
"logps/rejected": -91.64708709716797, |
|
"loss": 0.6913, |
|
"pred_label": 4558.71240234375, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.0799408107995987, |
|
"rewards/margins": 0.10116855055093765, |
|
"rewards/rejected": -0.18110935389995575, |
|
"step": 940, |
|
"use_label": 14859.287109375 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6906769273168754, |
|
"train_runtime": 20027.4031, |
|
"train_samples_per_second": 3.053, |
|
"train_steps_per_second": 0.048 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 955, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|