|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 422, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004739336492890996, |
|
"grad_norm": 4.468888282775879, |
|
"learning_rate": 2.3255813953488374e-07, |
|
"logits/chosen": 0.17294234037399292, |
|
"logits/rejected": 0.25382864475250244, |
|
"logps/chosen": -432.88006591796875, |
|
"logps/rejected": -432.22393798828125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009478672985781991, |
|
"grad_norm": 4.5246357917785645, |
|
"learning_rate": 4.651162790697675e-07, |
|
"logits/chosen": -0.02501661702990532, |
|
"logits/rejected": -0.052543528378009796, |
|
"logps/chosen": -435.35772705078125, |
|
"logps/rejected": -450.81500244140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.014218009478672985, |
|
"grad_norm": 4.940310001373291, |
|
"learning_rate": 6.976744186046513e-07, |
|
"logits/chosen": -0.32847464084625244, |
|
"logits/rejected": -0.23367789387702942, |
|
"logps/chosen": -383.3726806640625, |
|
"logps/rejected": -430.9427185058594, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.022322356700897217, |
|
"rewards/margins": 0.011036252602934837, |
|
"rewards/rejected": 0.011286105029284954, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.018957345971563982, |
|
"grad_norm": 4.610801696777344, |
|
"learning_rate": 9.30232558139535e-07, |
|
"logits/chosen": -0.053772248327732086, |
|
"logits/rejected": 0.035066261887550354, |
|
"logps/chosen": -511.0668640136719, |
|
"logps/rejected": -549.9104614257812, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.03445637226104736, |
|
"rewards/margins": 0.005329892970621586, |
|
"rewards/rejected": 0.029126476496458054, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.023696682464454975, |
|
"grad_norm": 5.364634990692139, |
|
"learning_rate": 1.1627906976744188e-06, |
|
"logits/chosen": -0.0910184308886528, |
|
"logits/rejected": -0.18476980924606323, |
|
"logps/chosen": -398.50714111328125, |
|
"logps/rejected": -442.03125, |
|
"loss": 0.7036, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.030631696805357933, |
|
"rewards/margins": -0.017725910991430283, |
|
"rewards/rejected": -0.012905789539217949, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02843601895734597, |
|
"grad_norm": 4.0496826171875, |
|
"learning_rate": 1.3953488372093025e-06, |
|
"logits/chosen": -0.08958485722541809, |
|
"logits/rejected": -0.16801398992538452, |
|
"logps/chosen": -450.0301818847656, |
|
"logps/rejected": -480.5152282714844, |
|
"loss": 0.7015, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.02475794591009617, |
|
"rewards/margins": -0.015588914975523949, |
|
"rewards/rejected": -0.009169031865894794, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03317535545023697, |
|
"grad_norm": 4.628786563873291, |
|
"learning_rate": 1.6279069767441862e-06, |
|
"logits/chosen": 0.3181225061416626, |
|
"logits/rejected": 0.4123896062374115, |
|
"logps/chosen": -390.36358642578125, |
|
"logps/rejected": -464.38818359375, |
|
"loss": 0.6947, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.021739648655056953, |
|
"rewards/margins": -0.00039539369754493237, |
|
"rewards/rejected": 0.022135045379400253, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.037914691943127965, |
|
"grad_norm": 9.313066482543945, |
|
"learning_rate": 1.86046511627907e-06, |
|
"logits/chosen": 0.05047750473022461, |
|
"logits/rejected": 0.2502543330192566, |
|
"logps/chosen": -357.781982421875, |
|
"logps/rejected": -437.6925964355469, |
|
"loss": 0.6953, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.04442901909351349, |
|
"rewards/margins": -0.0012866007164120674, |
|
"rewards/rejected": 0.04571562260389328, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.04265402843601896, |
|
"grad_norm": 5.039402961730957, |
|
"learning_rate": 2.0930232558139536e-06, |
|
"logits/chosen": -0.023771263659000397, |
|
"logits/rejected": -0.10481404513120651, |
|
"logps/chosen": -551.095458984375, |
|
"logps/rejected": -534.9522705078125, |
|
"loss": 0.7109, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.014494383707642555, |
|
"rewards/margins": -0.032524872571229935, |
|
"rewards/rejected": 0.01803048700094223, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04739336492890995, |
|
"grad_norm": 5.4466633796691895, |
|
"learning_rate": 2.3255813953488376e-06, |
|
"logits/chosen": 0.3669472336769104, |
|
"logits/rejected": 0.3625236749649048, |
|
"logps/chosen": -443.6736755371094, |
|
"logps/rejected": -452.95770263671875, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 1.2372620403766632e-05, |
|
"rewards/margins": 0.013320828787982464, |
|
"rewards/rejected": -0.013308453373610973, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.052132701421800945, |
|
"grad_norm": 4.648890495300293, |
|
"learning_rate": 2.558139534883721e-06, |
|
"logits/chosen": -0.03190188109874725, |
|
"logits/rejected": -0.17422696948051453, |
|
"logps/chosen": -453.6244812011719, |
|
"logps/rejected": -391.9805603027344, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.011450385674834251, |
|
"rewards/margins": 0.0010257705580443144, |
|
"rewards/rejected": 0.010424615815281868, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05687203791469194, |
|
"grad_norm": 4.735479354858398, |
|
"learning_rate": 2.790697674418605e-06, |
|
"logits/chosen": 0.19764292240142822, |
|
"logits/rejected": 0.31575778126716614, |
|
"logps/chosen": -536.1707763671875, |
|
"logps/rejected": -618.8275146484375, |
|
"loss": 0.7078, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.008671605959534645, |
|
"rewards/margins": -0.026794325560331345, |
|
"rewards/rejected": 0.01812272146344185, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.061611374407582936, |
|
"grad_norm": 4.4631147384643555, |
|
"learning_rate": 3.0232558139534885e-06, |
|
"logits/chosen": 0.054879866540431976, |
|
"logits/rejected": 0.17054584622383118, |
|
"logps/chosen": -529.5116577148438, |
|
"logps/rejected": -521.1355590820312, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.0014204876497387886, |
|
"rewards/margins": 0.014372622594237328, |
|
"rewards/rejected": -0.015793107450008392, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.06635071090047394, |
|
"grad_norm": 5.378948211669922, |
|
"learning_rate": 3.2558139534883724e-06, |
|
"logits/chosen": 0.06459379941225052, |
|
"logits/rejected": 0.09842963516712189, |
|
"logps/chosen": -496.6351013183594, |
|
"logps/rejected": -505.83819580078125, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.006574093829840422, |
|
"rewards/margins": 0.0018039373680949211, |
|
"rewards/rejected": -0.008378027006983757, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07109004739336493, |
|
"grad_norm": 3.6396803855895996, |
|
"learning_rate": 3.4883720930232564e-06, |
|
"logits/chosen": 0.30802494287490845, |
|
"logits/rejected": 0.6311283111572266, |
|
"logps/chosen": -366.74993896484375, |
|
"logps/rejected": -442.3916015625, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.011980474926531315, |
|
"rewards/margins": 0.023720718920230865, |
|
"rewards/rejected": -0.011740244925022125, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07582938388625593, |
|
"grad_norm": 4.317215919494629, |
|
"learning_rate": 3.72093023255814e-06, |
|
"logits/chosen": 0.19539549946784973, |
|
"logits/rejected": 0.1992233693599701, |
|
"logps/chosen": -458.35693359375, |
|
"logps/rejected": -509.75177001953125, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0014730808325111866, |
|
"rewards/margins": 0.048042502254247665, |
|
"rewards/rejected": -0.049515582621097565, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08056872037914692, |
|
"grad_norm": 4.411427974700928, |
|
"learning_rate": 3.953488372093024e-06, |
|
"logits/chosen": 0.2250944823026657, |
|
"logits/rejected": 0.23612278699874878, |
|
"logps/chosen": -529.0587158203125, |
|
"logps/rejected": -525.65380859375, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.037429191172122955, |
|
"rewards/margins": 0.009371446445584297, |
|
"rewards/rejected": -0.0468006357550621, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.08530805687203792, |
|
"grad_norm": 5.560946941375732, |
|
"learning_rate": 4.186046511627907e-06, |
|
"logits/chosen": 0.3321930170059204, |
|
"logits/rejected": 0.4437577724456787, |
|
"logps/chosen": -366.59173583984375, |
|
"logps/rejected": -427.55413818359375, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0035821665078401566, |
|
"rewards/margins": 0.0638398677110672, |
|
"rewards/rejected": -0.060257695615291595, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.09004739336492891, |
|
"grad_norm": 5.310560703277588, |
|
"learning_rate": 4.418604651162791e-06, |
|
"logits/chosen": 0.4229128360748291, |
|
"logits/rejected": 0.4650568962097168, |
|
"logps/chosen": -397.71112060546875, |
|
"logps/rejected": -421.1392822265625, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0423818975687027, |
|
"rewards/margins": 0.025886641815304756, |
|
"rewards/rejected": -0.0682685375213623, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0947867298578199, |
|
"grad_norm": 4.838053226470947, |
|
"learning_rate": 4.651162790697675e-06, |
|
"logits/chosen": 0.2030269056558609, |
|
"logits/rejected": -0.20078778266906738, |
|
"logps/chosen": -507.3893737792969, |
|
"logps/rejected": -482.47882080078125, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.08994510769844055, |
|
"rewards/margins": 0.015940451994538307, |
|
"rewards/rejected": -0.10588555783033371, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0995260663507109, |
|
"grad_norm": 3.838012456893921, |
|
"learning_rate": 4.883720930232559e-06, |
|
"logits/chosen": 0.024930402636528015, |
|
"logits/rejected": 0.07886520028114319, |
|
"logps/chosen": -404.5774230957031, |
|
"logps/rejected": -484.91766357421875, |
|
"loss": 0.6642, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.043739788234233856, |
|
"rewards/margins": 0.0630723387002945, |
|
"rewards/rejected": -0.10681212693452835, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.10426540284360189, |
|
"grad_norm": 4.912895202636719, |
|
"learning_rate": 5.116279069767442e-06, |
|
"logits/chosen": -0.024710316210985184, |
|
"logits/rejected": 0.08874442428350449, |
|
"logps/chosen": -380.9420166015625, |
|
"logps/rejected": -390.6964111328125, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.06719113886356354, |
|
"rewards/margins": 0.0431598462164402, |
|
"rewards/rejected": -0.11035098135471344, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.10900473933649289, |
|
"grad_norm": 4.68002462387085, |
|
"learning_rate": 5.348837209302326e-06, |
|
"logits/chosen": 0.09284278750419617, |
|
"logits/rejected": 0.36523595452308655, |
|
"logps/chosen": -291.48138427734375, |
|
"logps/rejected": -371.36328125, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.08336787670850754, |
|
"rewards/margins": 0.06293603777885437, |
|
"rewards/rejected": -0.1463039219379425, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.11374407582938388, |
|
"grad_norm": 4.751553058624268, |
|
"learning_rate": 5.58139534883721e-06, |
|
"logits/chosen": 0.1513872593641281, |
|
"logits/rejected": 0.3874552845954895, |
|
"logps/chosen": -542.8883056640625, |
|
"logps/rejected": -584.90869140625, |
|
"loss": 0.6544, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.14591416716575623, |
|
"rewards/margins": 0.09063325077295303, |
|
"rewards/rejected": -0.23654744029045105, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.11848341232227488, |
|
"grad_norm": 5.642929553985596, |
|
"learning_rate": 5.8139534883720935e-06, |
|
"logits/chosen": 0.34986090660095215, |
|
"logits/rejected": 0.1881510615348816, |
|
"logps/chosen": -462.16815185546875, |
|
"logps/rejected": -504.80389404296875, |
|
"loss": 0.7215, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.19018429517745972, |
|
"rewards/margins": -0.04456442594528198, |
|
"rewards/rejected": -0.14561986923217773, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.12322274881516587, |
|
"grad_norm": 4.584853172302246, |
|
"learning_rate": 6.046511627906977e-06, |
|
"logits/chosen": 0.5207738876342773, |
|
"logits/rejected": 0.3684951066970825, |
|
"logps/chosen": -432.26153564453125, |
|
"logps/rejected": -423.8664855957031, |
|
"loss": 0.6668, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.17351892590522766, |
|
"rewards/margins": 0.06544799357652664, |
|
"rewards/rejected": -0.2389669120311737, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.12796208530805686, |
|
"grad_norm": 4.37154483795166, |
|
"learning_rate": 6.279069767441861e-06, |
|
"logits/chosen": 0.17182780802249908, |
|
"logits/rejected": 0.043800633400678635, |
|
"logps/chosen": -356.12115478515625, |
|
"logps/rejected": -354.2303466796875, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.14720980823040009, |
|
"rewards/margins": 0.05135134607553482, |
|
"rewards/rejected": -0.1985611617565155, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.13270142180094788, |
|
"grad_norm": 4.520308017730713, |
|
"learning_rate": 6.511627906976745e-06, |
|
"logits/chosen": 0.09184260666370392, |
|
"logits/rejected": 0.19092530012130737, |
|
"logps/chosen": -400.747314453125, |
|
"logps/rejected": -500.5384521484375, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1759171187877655, |
|
"rewards/margins": 0.05190613120794296, |
|
"rewards/rejected": -0.22782325744628906, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.13744075829383887, |
|
"grad_norm": 4.618706226348877, |
|
"learning_rate": 6.744186046511628e-06, |
|
"logits/chosen": -0.016063084825873375, |
|
"logits/rejected": -0.07409578561782837, |
|
"logps/chosen": -472.5158996582031, |
|
"logps/rejected": -468.94647216796875, |
|
"loss": 0.626, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.18945825099945068, |
|
"rewards/margins": 0.16357678174972534, |
|
"rewards/rejected": -0.353035032749176, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.14218009478672985, |
|
"grad_norm": 4.5725998878479, |
|
"learning_rate": 6.976744186046513e-06, |
|
"logits/chosen": 0.35617199540138245, |
|
"logits/rejected": 0.371913880109787, |
|
"logps/chosen": -473.22821044921875, |
|
"logps/rejected": -441.5536193847656, |
|
"loss": 0.6542, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.2541047930717468, |
|
"rewards/margins": 0.09789780527353287, |
|
"rewards/rejected": -0.3520025908946991, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14691943127962084, |
|
"grad_norm": 4.441779613494873, |
|
"learning_rate": 7.209302325581395e-06, |
|
"logits/chosen": -0.3232620358467102, |
|
"logits/rejected": -0.3168506920337677, |
|
"logps/chosen": -440.8768005371094, |
|
"logps/rejected": -491.36517333984375, |
|
"loss": 0.6467, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1957191824913025, |
|
"rewards/margins": 0.12279976159334183, |
|
"rewards/rejected": -0.3185189366340637, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.15165876777251186, |
|
"grad_norm": 4.8312201499938965, |
|
"learning_rate": 7.44186046511628e-06, |
|
"logits/chosen": 0.20703125, |
|
"logits/rejected": 0.158937007188797, |
|
"logps/chosen": -474.72247314453125, |
|
"logps/rejected": -430.9871520996094, |
|
"loss": 0.6253, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2362278252840042, |
|
"rewards/margins": 0.1699885129928589, |
|
"rewards/rejected": -0.4062163233757019, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.15639810426540285, |
|
"grad_norm": 4.127583026885986, |
|
"learning_rate": 7.674418604651164e-06, |
|
"logits/chosen": 0.012601375579833984, |
|
"logits/rejected": 0.16285735368728638, |
|
"logps/chosen": -512.3396606445312, |
|
"logps/rejected": -529.702392578125, |
|
"loss": 0.6475, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.2504887282848358, |
|
"rewards/margins": 0.11251004040241241, |
|
"rewards/rejected": -0.3629987835884094, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.16113744075829384, |
|
"grad_norm": 4.508236885070801, |
|
"learning_rate": 7.906976744186048e-06, |
|
"logits/chosen": -0.10328027606010437, |
|
"logits/rejected": -0.07670910656452179, |
|
"logps/chosen": -533.043701171875, |
|
"logps/rejected": -608.9775390625, |
|
"loss": 0.633, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.265808641910553, |
|
"rewards/margins": 0.15493102371692657, |
|
"rewards/rejected": -0.42073971033096313, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.16587677725118483, |
|
"grad_norm": 3.833841323852539, |
|
"learning_rate": 8.139534883720931e-06, |
|
"logits/chosen": -0.00557418167591095, |
|
"logits/rejected": 0.06319032609462738, |
|
"logps/chosen": -440.6729431152344, |
|
"logps/rejected": -457.2427978515625, |
|
"loss": 0.6497, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.27264466881752014, |
|
"rewards/margins": 0.13150610029697418, |
|
"rewards/rejected": -0.40415075421333313, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.17061611374407584, |
|
"grad_norm": 4.1322197914123535, |
|
"learning_rate": 8.372093023255815e-06, |
|
"logits/chosen": 0.3982032537460327, |
|
"logits/rejected": 0.033034421503543854, |
|
"logps/chosen": -511.9226989746094, |
|
"logps/rejected": -457.24462890625, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.4206628203392029, |
|
"rewards/margins": 0.09507569670677185, |
|
"rewards/rejected": -0.5157385468482971, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.17535545023696683, |
|
"grad_norm": 4.012040138244629, |
|
"learning_rate": 8.604651162790698e-06, |
|
"logits/chosen": -0.22452551126480103, |
|
"logits/rejected": 0.13141538202762604, |
|
"logps/chosen": -365.06890869140625, |
|
"logps/rejected": -484.9878845214844, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3754124045372009, |
|
"rewards/margins": 0.17148733139038086, |
|
"rewards/rejected": -0.5468997359275818, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.18009478672985782, |
|
"grad_norm": 5.076535701751709, |
|
"learning_rate": 8.837209302325582e-06, |
|
"logits/chosen": 0.38980191946029663, |
|
"logits/rejected": 0.29863467812538147, |
|
"logps/chosen": -453.6428527832031, |
|
"logps/rejected": -450.8316650390625, |
|
"loss": 0.6328, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.4603796899318695, |
|
"rewards/margins": 0.19886130094528198, |
|
"rewards/rejected": -0.6592409610748291, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1848341232227488, |
|
"grad_norm": 4.119342803955078, |
|
"learning_rate": 9.069767441860465e-06, |
|
"logits/chosen": 0.32120779156684875, |
|
"logits/rejected": 0.2722249925136566, |
|
"logps/chosen": -403.55084228515625, |
|
"logps/rejected": -382.61187744140625, |
|
"loss": 0.6389, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5036495923995972, |
|
"rewards/margins": 0.18385529518127441, |
|
"rewards/rejected": -0.6875049471855164, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 3.443678379058838, |
|
"learning_rate": 9.30232558139535e-06, |
|
"logits/chosen": -0.06186838448047638, |
|
"logits/rejected": 0.4023992419242859, |
|
"logps/chosen": -342.36395263671875, |
|
"logps/rejected": -402.0476379394531, |
|
"loss": 0.5339, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3065139651298523, |
|
"rewards/margins": 0.5137544274330139, |
|
"rewards/rejected": -0.820268452167511, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1943127962085308, |
|
"grad_norm": 4.469974994659424, |
|
"learning_rate": 9.534883720930234e-06, |
|
"logits/chosen": -0.039235230535268784, |
|
"logits/rejected": -0.04238155111670494, |
|
"logps/chosen": -412.5050354003906, |
|
"logps/rejected": -429.1283264160156, |
|
"loss": 0.6047, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3574320673942566, |
|
"rewards/margins": 0.24463370442390442, |
|
"rewards/rejected": -0.6020657420158386, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1990521327014218, |
|
"grad_norm": 4.837284564971924, |
|
"learning_rate": 9.767441860465117e-06, |
|
"logits/chosen": -0.07826237380504608, |
|
"logits/rejected": 0.1768716275691986, |
|
"logps/chosen": -434.6158752441406, |
|
"logps/rejected": -486.9299011230469, |
|
"loss": 0.6364, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5381902456283569, |
|
"rewards/margins": 0.2264844924211502, |
|
"rewards/rejected": -0.7646747827529907, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2037914691943128, |
|
"grad_norm": 5.0225067138671875, |
|
"learning_rate": 1e-05, |
|
"logits/chosen": 0.0798925831913948, |
|
"logits/rejected": -0.17932507395744324, |
|
"logps/chosen": -346.35858154296875, |
|
"logps/rejected": -313.78546142578125, |
|
"loss": 0.7154, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5803533792495728, |
|
"rewards/margins": 0.07696369290351868, |
|
"rewards/rejected": -0.657317042350769, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.20853080568720378, |
|
"grad_norm": 4.39346981048584, |
|
"learning_rate": 9.999828225438954e-06, |
|
"logits/chosen": -0.09884198009967804, |
|
"logits/rejected": -0.10861906409263611, |
|
"logps/chosen": -421.8805847167969, |
|
"logps/rejected": -475.72491455078125, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5504233837127686, |
|
"rewards/margins": 0.2702760398387909, |
|
"rewards/rejected": -0.8206994533538818, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2132701421800948, |
|
"grad_norm": 4.667287349700928, |
|
"learning_rate": 9.999312913558414e-06, |
|
"logits/chosen": 0.11334254592657089, |
|
"logits/rejected": 0.03371644765138626, |
|
"logps/chosen": -425.48028564453125, |
|
"logps/rejected": -387.76605224609375, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6281574964523315, |
|
"rewards/margins": 0.3120865821838379, |
|
"rewards/rejected": -0.9402441382408142, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.21800947867298578, |
|
"grad_norm": 4.5937418937683105, |
|
"learning_rate": 9.998454099765368e-06, |
|
"logits/chosen": 0.04098134487867355, |
|
"logits/rejected": 0.05865456163883209, |
|
"logps/chosen": -364.27978515625, |
|
"logps/rejected": -396.471923828125, |
|
"loss": 0.6359, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5031190514564514, |
|
"rewards/margins": 0.2547236979007721, |
|
"rewards/rejected": -0.7578427791595459, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.22274881516587677, |
|
"grad_norm": 4.146316051483154, |
|
"learning_rate": 9.997251843068763e-06, |
|
"logits/chosen": 0.25591805577278137, |
|
"logits/rejected": -0.025539308786392212, |
|
"logps/chosen": -524.7024536132812, |
|
"logps/rejected": -446.9095458984375, |
|
"loss": 0.6168, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7542736530303955, |
|
"rewards/margins": 0.2909476161003113, |
|
"rewards/rejected": -1.045221209526062, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.22748815165876776, |
|
"grad_norm": 3.7932281494140625, |
|
"learning_rate": 9.995706226075445e-06, |
|
"logits/chosen": 0.14000652730464935, |
|
"logits/rejected": 0.14868205785751343, |
|
"logps/chosen": -457.1429138183594, |
|
"logps/rejected": -432.461181640625, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6045001745223999, |
|
"rewards/margins": 0.5118697881698608, |
|
"rewards/rejected": -1.1163699626922607, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.23222748815165878, |
|
"grad_norm": 8.021098136901855, |
|
"learning_rate": 9.993817354984485e-06, |
|
"logits/chosen": 0.5763025283813477, |
|
"logits/rejected": 0.5917288661003113, |
|
"logps/chosen": -491.5912170410156, |
|
"logps/rejected": -451.4367370605469, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5605940818786621, |
|
"rewards/margins": 0.4851032793521881, |
|
"rewards/rejected": -1.0456973314285278, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.23696682464454977, |
|
"grad_norm": 3.2265753746032715, |
|
"learning_rate": 9.991585359579886e-06, |
|
"logits/chosen": 0.3617466390132904, |
|
"logits/rejected": 0.3173240125179291, |
|
"logps/chosen": -372.00970458984375, |
|
"logps/rejected": -400.85772705078125, |
|
"loss": 0.5642, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5665906667709351, |
|
"rewards/margins": 0.4627717435359955, |
|
"rewards/rejected": -1.029362440109253, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24170616113744076, |
|
"grad_norm": 5.218678951263428, |
|
"learning_rate": 9.989010393221657e-06, |
|
"logits/chosen": 0.4278137981891632, |
|
"logits/rejected": 0.45443713665008545, |
|
"logps/chosen": -380.2688293457031, |
|
"logps/rejected": -458.2431640625, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6341795921325684, |
|
"rewards/margins": 0.33292362093925476, |
|
"rewards/rejected": -0.9671032428741455, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.24644549763033174, |
|
"grad_norm": 3.540945053100586, |
|
"learning_rate": 9.986092632835287e-06, |
|
"logits/chosen": 0.16431818902492523, |
|
"logits/rejected": 0.29724952578544617, |
|
"logps/chosen": -462.51519775390625, |
|
"logps/rejected": -506.53662109375, |
|
"loss": 0.5851, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.5587910413742065, |
|
"rewards/margins": 0.4125136137008667, |
|
"rewards/rejected": -0.9713046550750732, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.25118483412322273, |
|
"grad_norm": 3.1407723426818848, |
|
"learning_rate": 9.982832278899582e-06, |
|
"logits/chosen": -0.12014055997133255, |
|
"logits/rejected": -0.07937571406364441, |
|
"logps/chosen": -293.3476867675781, |
|
"logps/rejected": -329.0513916015625, |
|
"loss": 0.4514, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.43457847833633423, |
|
"rewards/margins": 0.7354463338851929, |
|
"rewards/rejected": -1.1700247526168823, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2559241706161137, |
|
"grad_norm": 4.521864891052246, |
|
"learning_rate": 9.979229555432884e-06, |
|
"logits/chosen": 0.311644583940506, |
|
"logits/rejected": 0.13228081166744232, |
|
"logps/chosen": -477.2836608886719, |
|
"logps/rejected": -484.87908935546875, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6064110994338989, |
|
"rewards/margins": 0.5102828145027161, |
|
"rewards/rejected": -1.1166939735412598, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.26066350710900477, |
|
"grad_norm": 4.260916233062744, |
|
"learning_rate": 9.97528470997769e-06, |
|
"logits/chosen": -0.25045445561408997, |
|
"logits/rejected": -0.012767120264470577, |
|
"logps/chosen": -327.21075439453125, |
|
"logps/rejected": -342.33819580078125, |
|
"loss": 0.5444, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7582281231880188, |
|
"rewards/margins": 0.687684178352356, |
|
"rewards/rejected": -1.44591224193573, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.26540284360189575, |
|
"grad_norm": 4.670787811279297, |
|
"learning_rate": 9.970998013583643e-06, |
|
"logits/chosen": -0.09858112782239914, |
|
"logits/rejected": 0.059129491448402405, |
|
"logps/chosen": -434.63409423828125, |
|
"logps/rejected": -482.1939392089844, |
|
"loss": 0.6477, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.7279840111732483, |
|
"rewards/margins": 0.4684455394744873, |
|
"rewards/rejected": -1.1964294910430908, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.27014218009478674, |
|
"grad_norm": 4.042267799377441, |
|
"learning_rate": 9.966369760788895e-06, |
|
"logits/chosen": 0.23529131710529327, |
|
"logits/rejected": 0.20713797211647034, |
|
"logps/chosen": -387.02764892578125, |
|
"logps/rejected": -347.3026428222656, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6105923652648926, |
|
"rewards/margins": 0.441728413105011, |
|
"rewards/rejected": -1.0523207187652588, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.27488151658767773, |
|
"grad_norm": 4.473219394683838, |
|
"learning_rate": 9.961400269599885e-06, |
|
"logits/chosen": 0.11737806349992752, |
|
"logits/rejected": 0.036520302295684814, |
|
"logps/chosen": -529.5386962890625, |
|
"logps/rejected": -520.997314453125, |
|
"loss": 0.6037, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.1079742908477783, |
|
"rewards/margins": 0.503280758857727, |
|
"rewards/rejected": -1.6112549304962158, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.2796208530805687, |
|
"grad_norm": 4.352895736694336, |
|
"learning_rate": 9.956089881469481e-06, |
|
"logits/chosen": 0.04756692051887512, |
|
"logits/rejected": 0.06536822766065598, |
|
"logps/chosen": -573.615478515625, |
|
"logps/rejected": -529.3450317382812, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.8045705556869507, |
|
"rewards/margins": 0.36565208435058594, |
|
"rewards/rejected": -1.1702226400375366, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.2843601895734597, |
|
"grad_norm": 5.498625755310059, |
|
"learning_rate": 9.950438961273517e-06, |
|
"logits/chosen": 0.075594462454319, |
|
"logits/rejected": -0.22279205918312073, |
|
"logps/chosen": -511.9619140625, |
|
"logps/rejected": -507.88232421875, |
|
"loss": 0.6781, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.8025746941566467, |
|
"rewards/margins": 0.23016151785850525, |
|
"rewards/rejected": -1.0327363014221191, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2890995260663507, |
|
"grad_norm": 5.152298450469971, |
|
"learning_rate": 9.94444789728573e-06, |
|
"logits/chosen": -0.11181878298521042, |
|
"logits/rejected": -0.005520373582839966, |
|
"logps/chosen": -480.442138671875, |
|
"logps/rejected": -491.8740539550781, |
|
"loss": 0.5376, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5731409788131714, |
|
"rewards/margins": 0.5521252155303955, |
|
"rewards/rejected": -1.125266194343567, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.2938388625592417, |
|
"grad_norm": 3.373650550842285, |
|
"learning_rate": 9.93811710115107e-06, |
|
"logits/chosen": 0.43766140937805176, |
|
"logits/rejected": 0.4306361675262451, |
|
"logps/chosen": -407.8697509765625, |
|
"logps/rejected": -409.0340576171875, |
|
"loss": 0.6231, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5725124478340149, |
|
"rewards/margins": 0.3360479176044464, |
|
"rewards/rejected": -0.9085603952407837, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.2985781990521327, |
|
"grad_norm": 7.151122093200684, |
|
"learning_rate": 9.931447007857433e-06, |
|
"logits/chosen": 0.1370643973350525, |
|
"logits/rejected": -0.04412531852722168, |
|
"logps/chosen": -422.349853515625, |
|
"logps/rejected": -445.34442138671875, |
|
"loss": 0.6487, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6347966194152832, |
|
"rewards/margins": 0.35178446769714355, |
|
"rewards/rejected": -0.9865810871124268, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3033175355450237, |
|
"grad_norm": 4.557085990905762, |
|
"learning_rate": 9.924438075705755e-06, |
|
"logits/chosen": 0.3025519847869873, |
|
"logits/rejected": 0.3690996766090393, |
|
"logps/chosen": -429.2871398925781, |
|
"logps/rejected": -478.50494384765625, |
|
"loss": 0.5381, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5606861114501953, |
|
"rewards/margins": 0.6541247963905334, |
|
"rewards/rejected": -1.2148109674453735, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3080568720379147, |
|
"grad_norm": 4.507608890533447, |
|
"learning_rate": 9.917090786278533e-06, |
|
"logits/chosen": 0.14490962028503418, |
|
"logits/rejected": 0.20545634627342224, |
|
"logps/chosen": -404.56536865234375, |
|
"logps/rejected": -478.4001159667969, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4651917815208435, |
|
"rewards/margins": 0.5367893576622009, |
|
"rewards/rejected": -1.0019811391830444, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3127962085308057, |
|
"grad_norm": 3.593722105026245, |
|
"learning_rate": 9.909405644406738e-06, |
|
"logits/chosen": -0.40012237429618835, |
|
"logits/rejected": -0.3070281147956848, |
|
"logps/chosen": -450.7489929199219, |
|
"logps/rejected": -488.6943054199219, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6263750791549683, |
|
"rewards/margins": 0.5151941776275635, |
|
"rewards/rejected": -1.1415691375732422, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.3175355450236967, |
|
"grad_norm": 3.789606809616089, |
|
"learning_rate": 9.901383178135113e-06, |
|
"logits/chosen": 0.3894610106945038, |
|
"logits/rejected": 0.41446149349212646, |
|
"logps/chosen": -353.05316162109375, |
|
"logps/rejected": -367.498046875, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.4287835657596588, |
|
"rewards/margins": 0.5223977565765381, |
|
"rewards/rejected": -0.9511812925338745, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.3222748815165877, |
|
"grad_norm": 3.219656467437744, |
|
"learning_rate": 9.893023938685911e-06, |
|
"logits/chosen": -0.24614711105823517, |
|
"logits/rejected": -0.00788339227437973, |
|
"logps/chosen": -466.84930419921875, |
|
"logps/rejected": -469.96142578125, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5285323858261108, |
|
"rewards/margins": 0.5399156808853149, |
|
"rewards/rejected": -1.0684479475021362, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.32701421800947866, |
|
"grad_norm": 3.697284698486328, |
|
"learning_rate": 9.884328500421005e-06, |
|
"logits/chosen": 0.18675777316093445, |
|
"logits/rejected": 0.45374056696891785, |
|
"logps/chosen": -363.5547790527344, |
|
"logps/rejected": -370.5771484375, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.3627626299858093, |
|
"rewards/margins": 0.4581056833267212, |
|
"rewards/rejected": -0.8208683133125305, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.33175355450236965, |
|
"grad_norm": 4.949876308441162, |
|
"learning_rate": 9.87529746080243e-06, |
|
"logits/chosen": 0.0644780769944191, |
|
"logits/rejected": 0.23735983669757843, |
|
"logps/chosen": -365.99298095703125, |
|
"logps/rejected": -462.49224853515625, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.4640830159187317, |
|
"rewards/margins": 0.4212184548377991, |
|
"rewards/rejected": -0.8853014707565308, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.33649289099526064, |
|
"grad_norm": 3.680051565170288, |
|
"learning_rate": 9.865931440351338e-06, |
|
"logits/chosen": 0.41640418767929077, |
|
"logits/rejected": 0.5758565664291382, |
|
"logps/chosen": -414.5774230957031, |
|
"logps/rejected": -456.61993408203125, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7768192887306213, |
|
"rewards/margins": 0.48619693517684937, |
|
"rewards/rejected": -1.2630162239074707, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3412322274881517, |
|
"grad_norm": 4.522833824157715, |
|
"learning_rate": 9.856231082605344e-06, |
|
"logits/chosen": 0.10547494888305664, |
|
"logits/rejected": 0.06724017858505249, |
|
"logps/chosen": -442.7686462402344, |
|
"logps/rejected": -453.6602783203125, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5551941990852356, |
|
"rewards/margins": 0.4050966501235962, |
|
"rewards/rejected": -0.9602909088134766, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.3459715639810427, |
|
"grad_norm": 4.556945323944092, |
|
"learning_rate": 9.846197054074325e-06, |
|
"logits/chosen": 0.17173755168914795, |
|
"logits/rejected": 0.1122516393661499, |
|
"logps/chosen": -542.6156005859375, |
|
"logps/rejected": -547.9420776367188, |
|
"loss": 0.5518, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.42728012800216675, |
|
"rewards/margins": 0.4563668966293335, |
|
"rewards/rejected": -0.8836470246315002, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.35071090047393366, |
|
"grad_norm": 4.758415699005127, |
|
"learning_rate": 9.835830044194625e-06, |
|
"logits/chosen": 0.2476077526807785, |
|
"logits/rejected": 0.2559727430343628, |
|
"logps/chosen": -467.78192138671875, |
|
"logps/rejected": -531.5208129882812, |
|
"loss": 0.638, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.7260033488273621, |
|
"rewards/margins": 0.5160312056541443, |
|
"rewards/rejected": -1.2420345544815063, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.35545023696682465, |
|
"grad_norm": 7.977559566497803, |
|
"learning_rate": 9.825130765281668e-06, |
|
"logits/chosen": 0.034104928374290466, |
|
"logits/rejected": 0.12088382244110107, |
|
"logps/chosen": -359.2759704589844, |
|
"logps/rejected": -388.21124267578125, |
|
"loss": 0.472, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.19367767870426178, |
|
"rewards/margins": 0.8202424049377441, |
|
"rewards/rejected": -1.0139200687408447, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.36018957345971564, |
|
"grad_norm": 5.986490726470947, |
|
"learning_rate": 9.814099952481031e-06, |
|
"logits/chosen": -0.1111285462975502, |
|
"logits/rejected": 0.2162112295627594, |
|
"logps/chosen": -320.3255920410156, |
|
"logps/rejected": -356.7198181152344, |
|
"loss": 0.4873, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.21107947826385498, |
|
"rewards/margins": 0.7759298086166382, |
|
"rewards/rejected": -0.9870092868804932, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.36492890995260663, |
|
"grad_norm": 4.860304355621338, |
|
"learning_rate": 9.802738363717927e-06, |
|
"logits/chosen": 0.24542132019996643, |
|
"logits/rejected": 0.26582974195480347, |
|
"logps/chosen": -474.4150085449219, |
|
"logps/rejected": -491.0259704589844, |
|
"loss": 0.6354, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.4190457761287689, |
|
"rewards/margins": 0.2866881489753723, |
|
"rewards/rejected": -0.7057338953018188, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.3696682464454976, |
|
"grad_norm": 3.6817736625671387, |
|
"learning_rate": 9.791046779645121e-06, |
|
"logits/chosen": 0.18554499745368958, |
|
"logits/rejected": 0.08219505846500397, |
|
"logps/chosen": -367.29046630859375, |
|
"logps/rejected": -377.1193542480469, |
|
"loss": 0.5124, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5033748149871826, |
|
"rewards/margins": 0.595119833946228, |
|
"rewards/rejected": -1.0984946489334106, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3744075829383886, |
|
"grad_norm": 4.450536251068115, |
|
"learning_rate": 9.779026003589304e-06, |
|
"logits/chosen": 0.15589359402656555, |
|
"logits/rejected": -0.018097057938575745, |
|
"logps/chosen": -354.592041015625, |
|
"logps/rejected": -387.45611572265625, |
|
"loss": 0.6397, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6710065603256226, |
|
"rewards/margins": 0.43026500940322876, |
|
"rewards/rejected": -1.101271629333496, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 4.346735954284668, |
|
"learning_rate": 9.766676861495888e-06, |
|
"logits/chosen": 0.4525200128555298, |
|
"logits/rejected": 0.45285752415657043, |
|
"logps/chosen": -460.7939147949219, |
|
"logps/rejected": -497.489501953125, |
|
"loss": 0.5764, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.3665574789047241, |
|
"rewards/margins": 0.5861377716064453, |
|
"rewards/rejected": -0.9526952505111694, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.38388625592417064, |
|
"grad_norm": 4.672888278961182, |
|
"learning_rate": 9.754000201872258e-06, |
|
"logits/chosen": 0.2648187279701233, |
|
"logits/rejected": 0.12692871689796448, |
|
"logps/chosen": -462.68035888671875, |
|
"logps/rejected": -446.92498779296875, |
|
"loss": 0.5777, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.3308072090148926, |
|
"rewards/margins": 0.4868035316467285, |
|
"rewards/rejected": -0.8176107406616211, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.3886255924170616, |
|
"grad_norm": 4.8653130531311035, |
|
"learning_rate": 9.74099689572947e-06, |
|
"logits/chosen": 0.6338875889778137, |
|
"logits/rejected": 0.5265121459960938, |
|
"logps/chosen": -543.588623046875, |
|
"logps/rejected": -460.85968017578125, |
|
"loss": 0.4477, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7309427857398987, |
|
"rewards/margins": 0.8499249219894409, |
|
"rewards/rejected": -1.5808677673339844, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.3933649289099526, |
|
"grad_norm": 3.210146188735962, |
|
"learning_rate": 9.727667836522408e-06, |
|
"logits/chosen": 0.1440345197916031, |
|
"logits/rejected": 0.17331324517726898, |
|
"logps/chosen": -403.3349609375, |
|
"logps/rejected": -424.3764953613281, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.47448769211769104, |
|
"rewards/margins": 0.8300908803939819, |
|
"rewards/rejected": -1.3045785427093506, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.3981042654028436, |
|
"grad_norm": 3.6252570152282715, |
|
"learning_rate": 9.714013940088388e-06, |
|
"logits/chosen": 0.35549432039260864, |
|
"logits/rejected": 0.06034126877784729, |
|
"logps/chosen": -417.7995300292969, |
|
"logps/rejected": -471.5025634765625, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.4528375267982483, |
|
"rewards/margins": 0.6505048274993896, |
|
"rewards/rejected": -1.1033422946929932, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.4028436018957346, |
|
"grad_norm": 3.3232247829437256, |
|
"learning_rate": 9.700036144584237e-06, |
|
"logits/chosen": 0.2271973192691803, |
|
"logits/rejected": 0.47027313709259033, |
|
"logps/chosen": -413.29510498046875, |
|
"logps/rejected": -434.47119140625, |
|
"loss": 0.4943, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6590039134025574, |
|
"rewards/margins": 0.5970410108566284, |
|
"rewards/rejected": -1.256044864654541, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.4075829383886256, |
|
"grad_norm": 5.408909320831299, |
|
"learning_rate": 9.68573541042183e-06, |
|
"logits/chosen": 0.13957802951335907, |
|
"logits/rejected": -0.01698121801018715, |
|
"logps/chosen": -397.6392517089844, |
|
"logps/rejected": -405.05560302734375, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.647673487663269, |
|
"rewards/margins": 0.43150627613067627, |
|
"rewards/rejected": -1.0791796445846558, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.41232227488151657, |
|
"grad_norm": 3.922076940536499, |
|
"learning_rate": 9.6711127202021e-06, |
|
"logits/chosen": 0.09419327974319458, |
|
"logits/rejected": -0.07248637080192566, |
|
"logps/chosen": -391.400390625, |
|
"logps/rejected": -396.4415283203125, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.48413023352622986, |
|
"rewards/margins": 0.5888237953186035, |
|
"rewards/rejected": -1.0729540586471558, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.41706161137440756, |
|
"grad_norm": 4.851089000701904, |
|
"learning_rate": 9.656169078647525e-06, |
|
"logits/chosen": 0.2503357231616974, |
|
"logits/rejected": 0.17676730453968048, |
|
"logps/chosen": -513.9971923828125, |
|
"logps/rejected": -513.199951171875, |
|
"loss": 0.6425, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.8412836194038391, |
|
"rewards/margins": 0.38562363386154175, |
|
"rewards/rejected": -1.2269072532653809, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.4218009478672986, |
|
"grad_norm": 3.372457504272461, |
|
"learning_rate": 9.640905512533092e-06, |
|
"logits/chosen": -0.17032505571842194, |
|
"logits/rejected": -0.10938653349876404, |
|
"logps/chosen": -418.69781494140625, |
|
"logps/rejected": -375.2335205078125, |
|
"loss": 0.487, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6325968503952026, |
|
"rewards/margins": 0.7536253929138184, |
|
"rewards/rejected": -1.3862221240997314, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.4265402843601896, |
|
"grad_norm": 3.9925975799560547, |
|
"learning_rate": 9.625323070615751e-06, |
|
"logits/chosen": 0.13802289962768555, |
|
"logits/rejected": 0.20489519834518433, |
|
"logps/chosen": -473.04534912109375, |
|
"logps/rejected": -501.421630859375, |
|
"loss": 0.5283, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.5786250829696655, |
|
"rewards/margins": 0.5097320079803467, |
|
"rewards/rejected": -1.0883569717407227, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4312796208530806, |
|
"grad_norm": 4.2329936027526855, |
|
"learning_rate": 9.609422823562345e-06, |
|
"logits/chosen": 0.15958569943904877, |
|
"logits/rejected": 0.17509707808494568, |
|
"logps/chosen": -400.16668701171875, |
|
"logps/rejected": -358.54327392578125, |
|
"loss": 0.6406, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.5397723317146301, |
|
"rewards/margins": 0.3339753746986389, |
|
"rewards/rejected": -0.873747706413269, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.43601895734597157, |
|
"grad_norm": 3.645078182220459, |
|
"learning_rate": 9.593205863876062e-06, |
|
"logits/chosen": 0.5184372663497925, |
|
"logits/rejected": 0.299396812915802, |
|
"logps/chosen": -413.80816650390625, |
|
"logps/rejected": -438.11663818359375, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.656336784362793, |
|
"rewards/margins": 0.5980467796325684, |
|
"rewards/rejected": -1.2543835639953613, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.44075829383886256, |
|
"grad_norm": 5.40891695022583, |
|
"learning_rate": 9.576673305821353e-06, |
|
"logits/chosen": 0.4078008830547333, |
|
"logits/rejected": 0.4366263449192047, |
|
"logps/chosen": -335.3667297363281, |
|
"logps/rejected": -352.97711181640625, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5545145273208618, |
|
"rewards/margins": 0.23330146074295044, |
|
"rewards/rejected": -0.7878159284591675, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.44549763033175355, |
|
"grad_norm": 3.772813558578491, |
|
"learning_rate": 9.55982628534738e-06, |
|
"logits/chosen": 0.04203655570745468, |
|
"logits/rejected": 0.07998668402433395, |
|
"logps/chosen": -416.1581115722656, |
|
"logps/rejected": -376.9602355957031, |
|
"loss": 0.4464, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.26440906524658203, |
|
"rewards/margins": 0.8176214098930359, |
|
"rewards/rejected": -1.0820305347442627, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.45023696682464454, |
|
"grad_norm": 4.10103702545166, |
|
"learning_rate": 9.54266596000996e-06, |
|
"logits/chosen": 0.3479623794555664, |
|
"logits/rejected": 0.24994038045406342, |
|
"logps/chosen": -473.52728271484375, |
|
"logps/rejected": -449.1643981933594, |
|
"loss": 0.6106, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.8664776086807251, |
|
"rewards/margins": 0.31524401903152466, |
|
"rewards/rejected": -1.1817216873168945, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4549763033175355, |
|
"grad_norm": 5.940450668334961, |
|
"learning_rate": 9.525193508892034e-06, |
|
"logits/chosen": 0.12402056902647018, |
|
"logits/rejected": 0.3821167051792145, |
|
"logps/chosen": -390.4453430175781, |
|
"logps/rejected": -498.83343505859375, |
|
"loss": 0.5119, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.408942312002182, |
|
"rewards/margins": 0.7732877135276794, |
|
"rewards/rejected": -1.182229995727539, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.4597156398104265, |
|
"grad_norm": 4.347564220428467, |
|
"learning_rate": 9.507410132522652e-06, |
|
"logits/chosen": 0.3013518452644348, |
|
"logits/rejected": 0.24353615939617157, |
|
"logps/chosen": -519.27490234375, |
|
"logps/rejected": -445.6244812011719, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5490106344223022, |
|
"rewards/margins": 0.48622170090675354, |
|
"rewards/rejected": -1.035232424736023, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.46445497630331756, |
|
"grad_norm": 5.856849670410156, |
|
"learning_rate": 9.489317052794482e-06, |
|
"logits/chosen": 0.18041574954986572, |
|
"logits/rejected": 0.12899483740329742, |
|
"logps/chosen": -535.3862915039062, |
|
"logps/rejected": -527.10986328125, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6298852562904358, |
|
"rewards/margins": 0.45580825209617615, |
|
"rewards/rejected": -1.0856934785842896, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.46919431279620855, |
|
"grad_norm": 6.874017715454102, |
|
"learning_rate": 9.470915512879853e-06, |
|
"logits/chosen": 0.17756867408752441, |
|
"logits/rejected": 0.1246391087770462, |
|
"logps/chosen": -489.64434814453125, |
|
"logps/rejected": -523.1498413085938, |
|
"loss": 0.6378, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.9944641590118408, |
|
"rewards/margins": 0.40292513370513916, |
|
"rewards/rejected": -1.39738929271698, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.47393364928909953, |
|
"grad_norm": 4.015621185302734, |
|
"learning_rate": 9.452206777145343e-06, |
|
"logits/chosen": -0.0221431702375412, |
|
"logits/rejected": 0.039132505655288696, |
|
"logps/chosen": -505.1900634765625, |
|
"logps/rejected": -460.162353515625, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8876866102218628, |
|
"rewards/margins": 0.621982991695404, |
|
"rewards/rejected": -1.509669542312622, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4786729857819905, |
|
"grad_norm": 3.8857150077819824, |
|
"learning_rate": 9.4331921310649e-06, |
|
"logits/chosen": 0.14884430170059204, |
|
"logits/rejected": 0.24638631939888, |
|
"logps/chosen": -409.65289306640625, |
|
"logps/rejected": -438.5373229980469, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.9228386878967285, |
|
"rewards/margins": 0.7441995739936829, |
|
"rewards/rejected": -1.6670382022857666, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.4834123222748815, |
|
"grad_norm": 3.3094451427459717, |
|
"learning_rate": 9.413872881131519e-06, |
|
"logits/chosen": 0.12251371890306473, |
|
"logits/rejected": 0.2717350125312805, |
|
"logps/chosen": -373.90777587890625, |
|
"logps/rejected": -377.60546875, |
|
"loss": 0.4469, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9558136463165283, |
|
"rewards/margins": 0.9420158863067627, |
|
"rewards/rejected": -1.8978296518325806, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.4881516587677725, |
|
"grad_norm": 2.9940288066864014, |
|
"learning_rate": 9.394250354767467e-06, |
|
"logits/chosen": 0.00997384637594223, |
|
"logits/rejected": -0.056838229298591614, |
|
"logps/chosen": -479.9316711425781, |
|
"logps/rejected": -432.07196044921875, |
|
"loss": 0.3949, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5534849166870117, |
|
"rewards/margins": 1.0289510488510132, |
|
"rewards/rejected": -1.5824360847473145, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.4928909952606635, |
|
"grad_norm": 3.61464524269104, |
|
"learning_rate": 9.374325900233087e-06, |
|
"logits/chosen": 0.5703778266906738, |
|
"logits/rejected": 0.3040878474712372, |
|
"logps/chosen": -341.7536926269531, |
|
"logps/rejected": -341.8878173828125, |
|
"loss": 0.4715, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.46594947576522827, |
|
"rewards/margins": 0.9190239906311035, |
|
"rewards/rejected": -1.3849735260009766, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.4976303317535545, |
|
"grad_norm": 4.028224945068359, |
|
"learning_rate": 9.354100886534152e-06, |
|
"logits/chosen": 0.11834748834371567, |
|
"logits/rejected": 0.14537352323532104, |
|
"logps/chosen": -376.5423583984375, |
|
"logps/rejected": -432.7532958984375, |
|
"loss": 0.482, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.4749223589897156, |
|
"rewards/margins": 0.890184223651886, |
|
"rewards/rejected": -1.365106463432312, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5023696682464455, |
|
"grad_norm": 3.813199281692505, |
|
"learning_rate": 9.333576703327803e-06, |
|
"logits/chosen": 0.23954571783542633, |
|
"logits/rejected": 0.18846619129180908, |
|
"logps/chosen": -429.6502685546875, |
|
"logps/rejected": -413.8288269042969, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8689689636230469, |
|
"rewards/margins": 0.7078925371170044, |
|
"rewards/rejected": -1.5768615007400513, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.5071090047393365, |
|
"grad_norm": 3.947744131088257, |
|
"learning_rate": 9.312754760827061e-06, |
|
"logits/chosen": 0.2891814112663269, |
|
"logits/rejected": 0.10350443422794342, |
|
"logps/chosen": -481.8363342285156, |
|
"logps/rejected": -498.7795104980469, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8580895662307739, |
|
"rewards/margins": 0.4509773850440979, |
|
"rewards/rejected": -1.309066891670227, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5118483412322274, |
|
"grad_norm": 9.869499206542969, |
|
"learning_rate": 9.291636489703943e-06, |
|
"logits/chosen": 0.04037293419241905, |
|
"logits/rejected": 0.007937178015708923, |
|
"logps/chosen": -313.94744873046875, |
|
"logps/rejected": -337.9429931640625, |
|
"loss": 0.5211, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6449707746505737, |
|
"rewards/margins": 0.8841655254364014, |
|
"rewards/rejected": -1.529136300086975, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.5165876777251185, |
|
"grad_norm": 2.261894464492798, |
|
"learning_rate": 9.270223340991147e-06, |
|
"logits/chosen": 0.45323365926742554, |
|
"logits/rejected": 0.31425660848617554, |
|
"logps/chosen": -518.0477905273438, |
|
"logps/rejected": -518.2217407226562, |
|
"loss": 0.4742, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.0188068151474, |
|
"rewards/margins": 0.7373921871185303, |
|
"rewards/rejected": -1.7561988830566406, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.5213270142180095, |
|
"grad_norm": 5.321746826171875, |
|
"learning_rate": 9.248516785982365e-06, |
|
"logits/chosen": 0.05109311267733574, |
|
"logits/rejected": -0.22819462418556213, |
|
"logps/chosen": -456.70703125, |
|
"logps/rejected": -432.2109680175781, |
|
"loss": 0.5835, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.9423930644989014, |
|
"rewards/margins": 0.6178849935531616, |
|
"rewards/rejected": -1.560278058052063, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5260663507109005, |
|
"grad_norm": 4.055011749267578, |
|
"learning_rate": 9.226518316131176e-06, |
|
"logits/chosen": 0.47002896666526794, |
|
"logits/rejected": 0.16273677349090576, |
|
"logps/chosen": -509.3221740722656, |
|
"logps/rejected": -459.65460205078125, |
|
"loss": 0.6222, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0381011962890625, |
|
"rewards/margins": 0.38067197799682617, |
|
"rewards/rejected": -1.4187732934951782, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.5308056872037915, |
|
"grad_norm": 3.9691321849823, |
|
"learning_rate": 9.204229442948585e-06, |
|
"logits/chosen": -0.06951727718114853, |
|
"logits/rejected": 0.031339049339294434, |
|
"logps/chosen": -489.317138671875, |
|
"logps/rejected": -497.0957336425781, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6243175864219666, |
|
"rewards/margins": 0.8498878479003906, |
|
"rewards/rejected": -1.4742053747177124, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.5355450236966824, |
|
"grad_norm": 3.802368640899658, |
|
"learning_rate": 9.181651697899153e-06, |
|
"logits/chosen": 0.04360884055495262, |
|
"logits/rejected": 0.08763141930103302, |
|
"logps/chosen": -426.64227294921875, |
|
"logps/rejected": -385.061767578125, |
|
"loss": 0.4656, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.930860698223114, |
|
"rewards/margins": 0.9945526719093323, |
|
"rewards/rejected": -1.9254133701324463, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.5402843601895735, |
|
"grad_norm": 4.00538969039917, |
|
"learning_rate": 9.158786632295776e-06, |
|
"logits/chosen": -0.1742032915353775, |
|
"logits/rejected": -0.051782816648483276, |
|
"logps/chosen": -377.5306091308594, |
|
"logps/rejected": -421.15216064453125, |
|
"loss": 0.4133, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.657263457775116, |
|
"rewards/margins": 0.9514760375022888, |
|
"rewards/rejected": -1.6087393760681152, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.5450236966824644, |
|
"grad_norm": 2.9875290393829346, |
|
"learning_rate": 9.1356358171931e-06, |
|
"logits/chosen": -0.22956418991088867, |
|
"logits/rejected": -0.03947896510362625, |
|
"logps/chosen": -410.5157470703125, |
|
"logps/rejected": -540.3042602539062, |
|
"loss": 0.4268, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0504915714263916, |
|
"rewards/margins": 1.2017786502838135, |
|
"rewards/rejected": -2.252270221710205, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.5497630331753555, |
|
"grad_norm": 4.0371994972229, |
|
"learning_rate": 9.112200843279565e-06, |
|
"logits/chosen": 0.2015472799539566, |
|
"logits/rejected": 0.17825239896774292, |
|
"logps/chosen": -471.33587646484375, |
|
"logps/rejected": -473.25396728515625, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7962719202041626, |
|
"rewards/margins": 0.7986223697662354, |
|
"rewards/rejected": -1.5948941707611084, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.5545023696682464, |
|
"grad_norm": 3.3941633701324463, |
|
"learning_rate": 9.088483320768114e-06, |
|
"logits/chosen": -0.3230453431606293, |
|
"logits/rejected": 0.14231722056865692, |
|
"logps/chosen": -340.79547119140625, |
|
"logps/rejected": -417.64764404296875, |
|
"loss": 0.3766, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7756860256195068, |
|
"rewards/margins": 1.1981326341629028, |
|
"rewards/rejected": -1.9738185405731201, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.5592417061611374, |
|
"grad_norm": 3.545902729034424, |
|
"learning_rate": 9.064484879285555e-06, |
|
"logits/chosen": 0.16888631880283356, |
|
"logits/rejected": 0.36799800395965576, |
|
"logps/chosen": -433.0151062011719, |
|
"logps/rejected": -458.9261474609375, |
|
"loss": 0.4182, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6435146331787109, |
|
"rewards/margins": 1.1051644086837769, |
|
"rewards/rejected": -1.7486791610717773, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5639810426540285, |
|
"grad_norm": 4.029533386230469, |
|
"learning_rate": 9.040207167760587e-06, |
|
"logits/chosen": 0.1876976191997528, |
|
"logits/rejected": 0.3228274881839752, |
|
"logps/chosen": -434.99456787109375, |
|
"logps/rejected": -496.84783935546875, |
|
"loss": 0.5507, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.1236001253128052, |
|
"rewards/margins": 0.6825075149536133, |
|
"rewards/rejected": -1.8061076402664185, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 6.328832149505615, |
|
"learning_rate": 9.015651854310505e-06, |
|
"logits/chosen": 0.5520612001419067, |
|
"logits/rejected": 0.27829062938690186, |
|
"logps/chosen": -524.2474975585938, |
|
"logps/rejected": -500.3922424316406, |
|
"loss": 0.5677, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6885144710540771, |
|
"rewards/margins": 0.6914829015731812, |
|
"rewards/rejected": -1.3799974918365479, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5734597156398105, |
|
"grad_norm": 4.104017734527588, |
|
"learning_rate": 8.99082062612659e-06, |
|
"logits/chosen": -0.059406764805316925, |
|
"logits/rejected": 0.06676661223173141, |
|
"logps/chosen": -397.0128173828125, |
|
"logps/rejected": -465.90789794921875, |
|
"loss": 0.4868, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9093624353408813, |
|
"rewards/margins": 0.9140886068344116, |
|
"rewards/rejected": -1.823451042175293, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.5781990521327014, |
|
"grad_norm": 6.223032474517822, |
|
"learning_rate": 8.965715189358165e-06, |
|
"logits/chosen": 0.4450455605983734, |
|
"logits/rejected": 0.40869832038879395, |
|
"logps/chosen": -459.8601989746094, |
|
"logps/rejected": -437.349365234375, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1986238956451416, |
|
"rewards/margins": 0.5860114097595215, |
|
"rewards/rejected": -1.784635305404663, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5829383886255924, |
|
"grad_norm": 3.415480613708496, |
|
"learning_rate": 8.940337268995385e-06, |
|
"logits/chosen": 0.1984516978263855, |
|
"logits/rejected": 0.016264721751213074, |
|
"logps/chosen": -404.98858642578125, |
|
"logps/rejected": -388.9734191894531, |
|
"loss": 0.4402, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8752626776695251, |
|
"rewards/margins": 1.1214944124221802, |
|
"rewards/rejected": -1.9967570304870605, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.5876777251184834, |
|
"grad_norm": 5.029397010803223, |
|
"learning_rate": 8.9146886087507e-06, |
|
"logits/chosen": 0.2871186137199402, |
|
"logits/rejected": 0.08225645869970322, |
|
"logps/chosen": -431.2898254394531, |
|
"logps/rejected": -411.8453063964844, |
|
"loss": 0.598, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2498584985733032, |
|
"rewards/margins": 0.6237522959709167, |
|
"rewards/rejected": -1.8736108541488647, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.5924170616113744, |
|
"grad_norm": 4.441456317901611, |
|
"learning_rate": 8.888770970939058e-06, |
|
"logits/chosen": 0.1208953708410263, |
|
"logits/rejected": 0.29052025079727173, |
|
"logps/chosen": -439.34161376953125, |
|
"logps/rejected": -485.6873474121094, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.4302982091903687, |
|
"rewards/margins": 0.5271378755569458, |
|
"rewards/rejected": -1.9574360847473145, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5971563981042654, |
|
"grad_norm": 4.362582206726074, |
|
"learning_rate": 8.862586136356794e-06, |
|
"logits/chosen": 0.03537209331989288, |
|
"logits/rejected": 0.15606579184532166, |
|
"logps/chosen": -459.3066711425781, |
|
"logps/rejected": -456.8779296875, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2706105709075928, |
|
"rewards/margins": 0.9128097891807556, |
|
"rewards/rejected": -2.183420419692993, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.6018957345971564, |
|
"grad_norm": 4.520688056945801, |
|
"learning_rate": 8.836135904159301e-06, |
|
"logits/chosen": 0.5592459440231323, |
|
"logits/rejected": 0.4826202690601349, |
|
"logps/chosen": -376.41552734375, |
|
"logps/rejected": -411.979248046875, |
|
"loss": 0.5533, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8318498134613037, |
|
"rewards/margins": 0.8411949276924133, |
|
"rewards/rejected": -1.6730448007583618, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.6066350710900474, |
|
"grad_norm": 3.8013787269592285, |
|
"learning_rate": 8.809422091737388e-06, |
|
"logits/chosen": 0.3936067521572113, |
|
"logits/rejected": 0.41032177209854126, |
|
"logps/chosen": -373.95660400390625, |
|
"logps/rejected": -375.69482421875, |
|
"loss": 0.4864, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8840177655220032, |
|
"rewards/margins": 1.040097951889038, |
|
"rewards/rejected": -1.9241158962249756, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.6113744075829384, |
|
"grad_norm": 3.8055856227874756, |
|
"learning_rate": 8.782446534592412e-06, |
|
"logits/chosen": 0.2286073863506317, |
|
"logits/rejected": 0.13278520107269287, |
|
"logps/chosen": -407.1571350097656, |
|
"logps/rejected": -483.6879577636719, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2148370742797852, |
|
"rewards/margins": 0.8403700590133667, |
|
"rewards/rejected": -2.0552072525024414, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.6161137440758294, |
|
"grad_norm": 5.259015083312988, |
|
"learning_rate": 8.755211086210172e-06, |
|
"logits/chosen": 0.014815211296081543, |
|
"logits/rejected": -0.05246584862470627, |
|
"logps/chosen": -524.09765625, |
|
"logps/rejected": -512.9163208007812, |
|
"loss": 0.5758, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9052078723907471, |
|
"rewards/margins": 0.7172214984893799, |
|
"rewards/rejected": -1.6224294900894165, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6208530805687204, |
|
"grad_norm": 3.168894052505493, |
|
"learning_rate": 8.727717617933545e-06, |
|
"logits/chosen": 0.14947810769081116, |
|
"logits/rejected": 0.01599368453025818, |
|
"logps/chosen": -430.6431579589844, |
|
"logps/rejected": -431.24945068359375, |
|
"loss": 0.3968, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7660319805145264, |
|
"rewards/margins": 1.2241827249526978, |
|
"rewards/rejected": -1.9902147054672241, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.6255924170616114, |
|
"grad_norm": 2.955793857574463, |
|
"learning_rate": 8.699968018833903e-06, |
|
"logits/chosen": 0.14900101721286774, |
|
"logits/rejected": 0.20070943236351013, |
|
"logps/chosen": -505.23480224609375, |
|
"logps/rejected": -445.3937683105469, |
|
"loss": 0.4073, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6804631352424622, |
|
"rewards/margins": 1.0907790660858154, |
|
"rewards/rejected": -1.7712422609329224, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.6303317535545023, |
|
"grad_norm": 3.240260601043701, |
|
"learning_rate": 8.671964195581336e-06, |
|
"logits/chosen": 0.03239504247903824, |
|
"logits/rejected": 0.16187995672225952, |
|
"logps/chosen": -396.4449157714844, |
|
"logps/rejected": -417.7769775390625, |
|
"loss": 0.413, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2402898073196411, |
|
"rewards/margins": 1.028062105178833, |
|
"rewards/rejected": -2.2683520317077637, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.6350710900473934, |
|
"grad_norm": 3.1110308170318604, |
|
"learning_rate": 8.643708072313619e-06, |
|
"logits/chosen": 0.13824498653411865, |
|
"logits/rejected": 0.1213301569223404, |
|
"logps/chosen": -312.6982421875, |
|
"logps/rejected": -343.7561340332031, |
|
"loss": 0.403, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.7657801508903503, |
|
"rewards/margins": 1.0914647579193115, |
|
"rewards/rejected": -1.8572447299957275, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.6398104265402843, |
|
"grad_norm": 4.998546600341797, |
|
"learning_rate": 8.615201590504016e-06, |
|
"logits/chosen": 0.2914201617240906, |
|
"logits/rejected": 0.4888171851634979, |
|
"logps/chosen": -459.3593444824219, |
|
"logps/rejected": -461.03265380859375, |
|
"loss": 0.5944, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.0463979244232178, |
|
"rewards/margins": 0.7969912886619568, |
|
"rewards/rejected": -1.8433892726898193, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.6445497630331753, |
|
"grad_norm": 4.108184337615967, |
|
"learning_rate": 8.586446708827896e-06, |
|
"logits/chosen": -0.007564418017864227, |
|
"logits/rejected": -0.18753255903720856, |
|
"logps/chosen": -378.04345703125, |
|
"logps/rejected": -370.3771667480469, |
|
"loss": 0.5053, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8995454907417297, |
|
"rewards/margins": 0.9091691374778748, |
|
"rewards/rejected": -1.808714747428894, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.6492890995260664, |
|
"grad_norm": 5.3410868644714355, |
|
"learning_rate": 8.557445403028122e-06, |
|
"logits/chosen": 0.23545342683792114, |
|
"logits/rejected": 0.17830106616020203, |
|
"logps/chosen": -533.0670776367188, |
|
"logps/rejected": -545.6083984375, |
|
"loss": 0.5424, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9214141368865967, |
|
"rewards/margins": 0.982886016368866, |
|
"rewards/rejected": -1.9043000936508179, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.6540284360189573, |
|
"grad_norm": 4.749429702758789, |
|
"learning_rate": 8.52819966577933e-06, |
|
"logits/chosen": -0.03985929116606712, |
|
"logits/rejected": 0.04942977800965309, |
|
"logps/chosen": -502.4162902832031, |
|
"logps/rejected": -525.4476318359375, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.466578483581543, |
|
"rewards/margins": 0.7347130179405212, |
|
"rewards/rejected": -2.201291561126709, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.6587677725118484, |
|
"grad_norm": 4.427668571472168, |
|
"learning_rate": 8.498711506550984e-06, |
|
"logits/chosen": 0.10724569857120514, |
|
"logits/rejected": -0.042100317776203156, |
|
"logps/chosen": -424.52679443359375, |
|
"logps/rejected": -472.065673828125, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4550511837005615, |
|
"rewards/margins": 1.1595511436462402, |
|
"rewards/rejected": -2.6146023273468018, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.6635071090047393, |
|
"grad_norm": 3.2623369693756104, |
|
"learning_rate": 8.468982951469334e-06, |
|
"logits/chosen": 0.20815303921699524, |
|
"logits/rejected": -0.07932104170322418, |
|
"logps/chosen": -477.3085021972656, |
|
"logps/rejected": -457.29376220703125, |
|
"loss": 0.4059, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5089505910873413, |
|
"rewards/margins": 1.101161241531372, |
|
"rewards/rejected": -2.610111713409424, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6682464454976303, |
|
"grad_norm": 3.9398345947265625, |
|
"learning_rate": 8.439016043178176e-06, |
|
"logits/chosen": -0.0940229594707489, |
|
"logits/rejected": -0.044844143092632294, |
|
"logps/chosen": -409.72442626953125, |
|
"logps/rejected": -431.5579833984375, |
|
"loss": 0.5502, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1987154483795166, |
|
"rewards/margins": 1.0584969520568848, |
|
"rewards/rejected": -2.2572121620178223, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.6729857819905213, |
|
"grad_norm": 3.487725019454956, |
|
"learning_rate": 8.408812840698517e-06, |
|
"logits/chosen": -0.07787841558456421, |
|
"logits/rejected": 0.11167508363723755, |
|
"logps/chosen": -436.72723388671875, |
|
"logps/rejected": -441.3120422363281, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.9513049125671387, |
|
"rewards/margins": 0.7828549742698669, |
|
"rewards/rejected": -1.7341598272323608, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6777251184834123, |
|
"grad_norm": 4.247493743896484, |
|
"learning_rate": 8.378375419287098e-06, |
|
"logits/chosen": 0.11863535642623901, |
|
"logits/rejected": 0.010739833116531372, |
|
"logps/chosen": -420.63818359375, |
|
"logps/rejected": -442.8919372558594, |
|
"loss": 0.4843, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8487941026687622, |
|
"rewards/margins": 0.7895632386207581, |
|
"rewards/rejected": -1.6383572816848755, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.6824644549763034, |
|
"grad_norm": 3.8593363761901855, |
|
"learning_rate": 8.3477058702938e-06, |
|
"logits/chosen": 0.14162854850292206, |
|
"logits/rejected": 0.2802544832229614, |
|
"logps/chosen": -383.4293518066406, |
|
"logps/rejected": -507.2069091796875, |
|
"loss": 0.495, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2597415447235107, |
|
"rewards/margins": 0.9453345537185669, |
|
"rewards/rejected": -2.205075979232788, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.6872037914691943, |
|
"grad_norm": 5.035678386688232, |
|
"learning_rate": 8.31680630101795e-06, |
|
"logits/chosen": 0.09797637909650803, |
|
"logits/rejected": -0.0621592253446579, |
|
"logps/chosen": -503.2251281738281, |
|
"logps/rejected": -537.2774047851562, |
|
"loss": 0.5595, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.707611083984375, |
|
"rewards/margins": 0.8160245418548584, |
|
"rewards/rejected": -2.5236356258392334, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6919431279620853, |
|
"grad_norm": 4.702147006988525, |
|
"learning_rate": 8.285678834563524e-06, |
|
"logits/chosen": 0.025235813111066818, |
|
"logits/rejected": 0.17205679416656494, |
|
"logps/chosen": -457.6421203613281, |
|
"logps/rejected": -525.4061279296875, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.136285424232483, |
|
"rewards/margins": 1.050948143005371, |
|
"rewards/rejected": -2.1872334480285645, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.6966824644549763, |
|
"grad_norm": 4.684539794921875, |
|
"learning_rate": 8.25432560969328e-06, |
|
"logits/chosen": 0.06013096123933792, |
|
"logits/rejected": -0.04088731110095978, |
|
"logps/chosen": -354.15350341796875, |
|
"logps/rejected": -332.03009033203125, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3446829319000244, |
|
"rewards/margins": 0.8111345767974854, |
|
"rewards/rejected": -2.1558172702789307, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.7014218009478673, |
|
"grad_norm": 4.876381874084473, |
|
"learning_rate": 8.22274878068179e-06, |
|
"logits/chosen": 0.2597839832305908, |
|
"logits/rejected": 0.029643792659044266, |
|
"logps/chosen": -407.7253112792969, |
|
"logps/rejected": -370.39093017578125, |
|
"loss": 0.5041, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9906654953956604, |
|
"rewards/margins": 0.9019131660461426, |
|
"rewards/rejected": -1.8925787210464478, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.7061611374407583, |
|
"grad_norm": 4.655327796936035, |
|
"learning_rate": 8.190950517167438e-06, |
|
"logits/chosen": -0.03012210875749588, |
|
"logits/rejected": 0.011793004348874092, |
|
"logps/chosen": -451.87530517578125, |
|
"logps/rejected": -471.72772216796875, |
|
"loss": 0.5981, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.4353324174880981, |
|
"rewards/margins": 0.618436872959137, |
|
"rewards/rejected": -2.05376935005188, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.7109004739336493, |
|
"grad_norm": 3.944150924682617, |
|
"learning_rate": 8.15893300400332e-06, |
|
"logits/chosen": 0.07667016983032227, |
|
"logits/rejected": -0.02443986013531685, |
|
"logps/chosen": -449.56829833984375, |
|
"logps/rejected": -527.9904174804688, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1610910892486572, |
|
"rewards/margins": 0.8665155172348022, |
|
"rewards/rejected": -2.02760648727417, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7156398104265402, |
|
"grad_norm": 3.960772752761841, |
|
"learning_rate": 8.126698441107146e-06, |
|
"logits/chosen": 0.08127880841493607, |
|
"logits/rejected": 0.21806366741657257, |
|
"logps/chosen": -358.4165954589844, |
|
"logps/rejected": -442.06890869140625, |
|
"loss": 0.4735, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9019060134887695, |
|
"rewards/margins": 1.199015498161316, |
|
"rewards/rejected": -2.100921630859375, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.7203791469194313, |
|
"grad_norm": 3.778970718383789, |
|
"learning_rate": 8.094249043310074e-06, |
|
"logits/chosen": 0.38509824872016907, |
|
"logits/rejected": 0.17644795775413513, |
|
"logps/chosen": -482.83935546875, |
|
"logps/rejected": -469.6008605957031, |
|
"loss": 0.4956, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3014028072357178, |
|
"rewards/margins": 0.7829511165618896, |
|
"rewards/rejected": -2.0843539237976074, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.7251184834123223, |
|
"grad_norm": 4.452014446258545, |
|
"learning_rate": 8.061587040204528e-06, |
|
"logits/chosen": 0.08200319856405258, |
|
"logits/rejected": 0.11494339257478714, |
|
"logps/chosen": -437.13232421875, |
|
"logps/rejected": -410.8484191894531, |
|
"loss": 0.5028, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0245773792266846, |
|
"rewards/margins": 1.0526708364486694, |
|
"rewards/rejected": -2.0772480964660645, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.7298578199052133, |
|
"grad_norm": 5.873034954071045, |
|
"learning_rate": 8.028714675991008e-06, |
|
"logits/chosen": 0.11512620002031326, |
|
"logits/rejected": 0.27921169996261597, |
|
"logps/chosen": -414.84832763671875, |
|
"logps/rejected": -395.1297607421875, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3350560665130615, |
|
"rewards/margins": 0.6761168241500854, |
|
"rewards/rejected": -2.0111730098724365, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.7345971563981043, |
|
"grad_norm": 3.5979249477386475, |
|
"learning_rate": 7.995634209323886e-06, |
|
"logits/chosen": 0.3013925850391388, |
|
"logits/rejected": 0.07984738051891327, |
|
"logps/chosen": -405.5412292480469, |
|
"logps/rejected": -362.7952575683594, |
|
"loss": 0.3824, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.0144352912902832, |
|
"rewards/margins": 1.340194821357727, |
|
"rewards/rejected": -2.3546299934387207, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.7393364928909952, |
|
"grad_norm": 4.739889144897461, |
|
"learning_rate": 7.962347913156217e-06, |
|
"logits/chosen": 0.3595688045024872, |
|
"logits/rejected": 0.21113444864749908, |
|
"logps/chosen": -439.633544921875, |
|
"logps/rejected": -453.5863952636719, |
|
"loss": 0.5315, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0545451641082764, |
|
"rewards/margins": 0.9417620301246643, |
|
"rewards/rejected": -1.996307134628296, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.7440758293838863, |
|
"grad_norm": 3.8085782527923584, |
|
"learning_rate": 7.92885807458357e-06, |
|
"logits/chosen": 0.11922314763069153, |
|
"logits/rejected": -0.1748376190662384, |
|
"logps/chosen": -459.07763671875, |
|
"logps/rejected": -382.17578125, |
|
"loss": 0.4493, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6546452045440674, |
|
"rewards/margins": 1.0991568565368652, |
|
"rewards/rejected": -1.753801941871643, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.7488151658767772, |
|
"grad_norm": 3.6355323791503906, |
|
"learning_rate": 7.895166994686869e-06, |
|
"logits/chosen": 0.022161336615681648, |
|
"logits/rejected": 0.10442040860652924, |
|
"logps/chosen": -426.6912841796875, |
|
"logps/rejected": -411.6988525390625, |
|
"loss": 0.5699, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2272857427597046, |
|
"rewards/margins": 0.8166685104370117, |
|
"rewards/rejected": -2.0439541339874268, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.7535545023696683, |
|
"grad_norm": 4.030194282531738, |
|
"learning_rate": 7.861276988374303e-06, |
|
"logits/chosen": 0.031902965158224106, |
|
"logits/rejected": -0.08190877735614777, |
|
"logps/chosen": -392.4819030761719, |
|
"logps/rejected": -432.68878173828125, |
|
"loss": 0.5024, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4378745555877686, |
|
"rewards/margins": 1.0156277418136597, |
|
"rewards/rejected": -2.4535024166107178, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 3.8472750186920166, |
|
"learning_rate": 7.827190384222249e-06, |
|
"logits/chosen": 0.0007600486278533936, |
|
"logits/rejected": 0.19025975465774536, |
|
"logps/chosen": -417.6579895019531, |
|
"logps/rejected": -449.7379150390625, |
|
"loss": 0.5382, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.9642939567565918, |
|
"rewards/margins": 1.006424903869629, |
|
"rewards/rejected": -1.9707188606262207, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7630331753554502, |
|
"grad_norm": 3.9590466022491455, |
|
"learning_rate": 7.7929095243153e-06, |
|
"logits/chosen": 0.13090184330940247, |
|
"logits/rejected": 0.11003851890563965, |
|
"logps/chosen": -411.51605224609375, |
|
"logps/rejected": -465.15020751953125, |
|
"loss": 0.5193, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.2225273847579956, |
|
"rewards/margins": 0.8529220819473267, |
|
"rewards/rejected": -2.0754494667053223, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.7677725118483413, |
|
"grad_norm": 4.315500259399414, |
|
"learning_rate": 7.758436764085315e-06, |
|
"logits/chosen": 0.024673327803611755, |
|
"logits/rejected": 0.28246718645095825, |
|
"logps/chosen": -484.12347412109375, |
|
"logps/rejected": -489.55560302734375, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.279213547706604, |
|
"rewards/margins": 0.6366978883743286, |
|
"rewards/rejected": -1.9159114360809326, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.7725118483412322, |
|
"grad_norm": 5.20791482925415, |
|
"learning_rate": 7.723774472149601e-06, |
|
"logits/chosen": -0.07025258243083954, |
|
"logits/rejected": 0.05357924848794937, |
|
"logps/chosen": -453.21856689453125, |
|
"logps/rejected": -479.0498962402344, |
|
"loss": 0.4967, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.7992047071456909, |
|
"rewards/margins": 0.9488848447799683, |
|
"rewards/rejected": -1.7480895519256592, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.7772511848341233, |
|
"grad_norm": 3.974989414215088, |
|
"learning_rate": 7.68892503014815e-06, |
|
"logits/chosen": -0.2237871140241623, |
|
"logits/rejected": -0.042462363839149475, |
|
"logps/chosen": -474.47930908203125, |
|
"logps/rejected": -524.5669555664062, |
|
"loss": 0.4879, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8265652656555176, |
|
"rewards/margins": 0.8583210110664368, |
|
"rewards/rejected": -1.6848862171173096, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.7819905213270142, |
|
"grad_norm": 4.090900421142578, |
|
"learning_rate": 7.653890832580003e-06, |
|
"logits/chosen": 0.3308762013912201, |
|
"logits/rejected": 0.18862006068229675, |
|
"logps/chosen": -356.6459655761719, |
|
"logps/rejected": -375.0365295410156, |
|
"loss": 0.4406, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9785811901092529, |
|
"rewards/margins": 1.0347230434417725, |
|
"rewards/rejected": -2.0133042335510254, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.7867298578199052, |
|
"grad_norm": 4.364016532897949, |
|
"learning_rate": 7.61867428663872e-06, |
|
"logits/chosen": -0.40676772594451904, |
|
"logits/rejected": -0.011762067675590515, |
|
"logps/chosen": -437.35137939453125, |
|
"logps/rejected": -484.0832214355469, |
|
"loss": 0.4485, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8578428030014038, |
|
"rewards/margins": 1.132391095161438, |
|
"rewards/rejected": -1.9902338981628418, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.7914691943127962, |
|
"grad_norm": 3.145108222961426, |
|
"learning_rate": 7.583277812046993e-06, |
|
"logits/chosen": 0.23439201712608337, |
|
"logits/rejected": 0.25310468673706055, |
|
"logps/chosen": -469.490478515625, |
|
"logps/rejected": -526.10693359375, |
|
"loss": 0.3757, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6878955960273743, |
|
"rewards/margins": 1.2261947393417358, |
|
"rewards/rejected": -1.9140903949737549, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.7962085308056872, |
|
"grad_norm": 5.070702075958252, |
|
"learning_rate": 7.547703840890377e-06, |
|
"logits/chosen": -0.17462563514709473, |
|
"logits/rejected": 0.02190116047859192, |
|
"logps/chosen": -328.0517272949219, |
|
"logps/rejected": -350.8951721191406, |
|
"loss": 0.461, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6660545468330383, |
|
"rewards/margins": 1.430792212486267, |
|
"rewards/rejected": -2.09684681892395, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.8009478672985783, |
|
"grad_norm": 3.767350196838379, |
|
"learning_rate": 7.511954817450181e-06, |
|
"logits/chosen": -0.22375909984111786, |
|
"logits/rejected": -0.385820597410202, |
|
"logps/chosen": -455.30841064453125, |
|
"logps/rejected": -472.73822021484375, |
|
"loss": 0.4375, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4691078662872314, |
|
"rewards/margins": 1.1987533569335938, |
|
"rewards/rejected": -2.667861223220825, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.8056872037914692, |
|
"grad_norm": 3.563715696334839, |
|
"learning_rate": 7.476033198035532e-06, |
|
"logits/chosen": -0.21226292848587036, |
|
"logits/rejected": -0.0044862329959869385, |
|
"logps/chosen": -328.20428466796875, |
|
"logps/rejected": -420.17022705078125, |
|
"loss": 0.4346, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0093038082122803, |
|
"rewards/margins": 1.093737244606018, |
|
"rewards/rejected": -2.103041172027588, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8104265402843602, |
|
"grad_norm": 3.6341946125030518, |
|
"learning_rate": 7.439941450814591e-06, |
|
"logits/chosen": -0.03815871477127075, |
|
"logits/rejected": 0.10924215614795685, |
|
"logps/chosen": -331.86993408203125, |
|
"logps/rejected": -382.3277282714844, |
|
"loss": 0.3771, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.846264123916626, |
|
"rewards/margins": 1.4025492668151855, |
|
"rewards/rejected": -2.2488136291503906, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.8151658767772512, |
|
"grad_norm": 4.456596851348877, |
|
"learning_rate": 7.4036820556449704e-06, |
|
"logits/chosen": -0.03349509835243225, |
|
"logits/rejected": -0.18871161341667175, |
|
"logps/chosen": -516.8641967773438, |
|
"logps/rejected": -498.6783447265625, |
|
"loss": 0.5287, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3133604526519775, |
|
"rewards/margins": 0.7690563797950745, |
|
"rewards/rejected": -2.0824170112609863, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.8199052132701422, |
|
"grad_norm": 3.4334120750427246, |
|
"learning_rate": 7.367257503903349e-06, |
|
"logits/chosen": -0.13274812698364258, |
|
"logits/rejected": -0.00023769401013851166, |
|
"logps/chosen": -447.26861572265625, |
|
"logps/rejected": -463.07232666015625, |
|
"loss": 0.3816, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.9694781303405762, |
|
"rewards/margins": 1.1989856958389282, |
|
"rewards/rejected": -2.168463945388794, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.8246445497630331, |
|
"grad_norm": 2.757255792617798, |
|
"learning_rate": 7.330670298314282e-06, |
|
"logits/chosen": 0.38800767064094543, |
|
"logits/rejected": 0.5887751579284668, |
|
"logps/chosen": -386.99114990234375, |
|
"logps/rejected": -446.8887023925781, |
|
"loss": 0.383, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8738099932670593, |
|
"rewards/margins": 1.4890499114990234, |
|
"rewards/rejected": -2.3628599643707275, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.8293838862559242, |
|
"grad_norm": 3.8897387981414795, |
|
"learning_rate": 7.293922952778239e-06, |
|
"logits/chosen": 0.17443230748176575, |
|
"logits/rejected": -0.09194624423980713, |
|
"logps/chosen": -379.7416076660156, |
|
"logps/rejected": -319.03955078125, |
|
"loss": 0.399, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.069253921508789, |
|
"rewards/margins": 1.2073869705200195, |
|
"rewards/rejected": -2.2766408920288086, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.8341232227488151, |
|
"grad_norm": 4.58038854598999, |
|
"learning_rate": 7.257017992198879e-06, |
|
"logits/chosen": -0.27831190824508667, |
|
"logits/rejected": -0.11275164037942886, |
|
"logps/chosen": -335.1506652832031, |
|
"logps/rejected": -393.7295227050781, |
|
"loss": 0.4676, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8761013150215149, |
|
"rewards/margins": 0.9996771812438965, |
|
"rewards/rejected": -1.8757784366607666, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.8388625592417062, |
|
"grad_norm": 4.329710960388184, |
|
"learning_rate": 7.219957952309568e-06, |
|
"logits/chosen": 0.41356244683265686, |
|
"logits/rejected": 0.5604581236839294, |
|
"logps/chosen": -379.12786865234375, |
|
"logps/rejected": -415.4062805175781, |
|
"loss": 0.4857, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2384291887283325, |
|
"rewards/margins": 1.2731423377990723, |
|
"rewards/rejected": -2.5115714073181152, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.8436018957345972, |
|
"grad_norm": 2.7617781162261963, |
|
"learning_rate": 7.182745379499134e-06, |
|
"logits/chosen": 0.45363107323646545, |
|
"logits/rejected": 0.35966387391090393, |
|
"logps/chosen": -350.72308349609375, |
|
"logps/rejected": -379.54150390625, |
|
"loss": 0.2672, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.8143477439880371, |
|
"rewards/margins": 2.0239524841308594, |
|
"rewards/rejected": -2.8383002281188965, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.8483412322274881, |
|
"grad_norm": 3.4556071758270264, |
|
"learning_rate": 7.145382830636925e-06, |
|
"logits/chosen": 0.22734937071800232, |
|
"logits/rejected": -0.007221966981887817, |
|
"logps/chosen": -451.68951416015625, |
|
"logps/rejected": -407.8339538574219, |
|
"loss": 0.4269, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2716048955917358, |
|
"rewards/margins": 1.0846253633499146, |
|
"rewards/rejected": -2.3562304973602295, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.8530805687203792, |
|
"grad_norm": 3.9663004875183105, |
|
"learning_rate": 7.107872872897112e-06, |
|
"logits/chosen": 0.43388858437538147, |
|
"logits/rejected": 0.3206801414489746, |
|
"logps/chosen": -492.4407958984375, |
|
"logps/rejected": -506.46466064453125, |
|
"loss": 0.508, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.438705325126648, |
|
"rewards/margins": 0.8778525590896606, |
|
"rewards/rejected": -2.3165578842163086, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8578199052132701, |
|
"grad_norm": 3.9396262168884277, |
|
"learning_rate": 7.070218083582306e-06, |
|
"logits/chosen": -0.01552341878414154, |
|
"logits/rejected": -0.040410738438367844, |
|
"logps/chosen": -434.32440185546875, |
|
"logps/rejected": -409.5101013183594, |
|
"loss": 0.478, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0567607879638672, |
|
"rewards/margins": 1.020298719406128, |
|
"rewards/rejected": -2.077059268951416, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.8625592417061612, |
|
"grad_norm": 3.724102258682251, |
|
"learning_rate": 7.032421049946467e-06, |
|
"logits/chosen": -0.15967610478401184, |
|
"logits/rejected": -0.3239976465702057, |
|
"logps/chosen": -533.2361450195312, |
|
"logps/rejected": -586.9674682617188, |
|
"loss": 0.4023, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5228052139282227, |
|
"rewards/margins": 1.5015686750411987, |
|
"rewards/rejected": -3.024374008178711, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.8672985781990521, |
|
"grad_norm": 3.7462353706359863, |
|
"learning_rate": 6.994484369017142e-06, |
|
"logits/chosen": 0.2605791985988617, |
|
"logits/rejected": 0.2061198353767395, |
|
"logps/chosen": -387.6251220703125, |
|
"logps/rejected": -398.81976318359375, |
|
"loss": 0.3708, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1234267950057983, |
|
"rewards/margins": 1.770387887954712, |
|
"rewards/rejected": -2.8938148021698, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.8720379146919431, |
|
"grad_norm": 4.484975337982178, |
|
"learning_rate": 6.9564106474170165e-06, |
|
"logits/chosen": 0.14537742733955383, |
|
"logits/rejected": 0.3496534824371338, |
|
"logps/chosen": -483.0646667480469, |
|
"logps/rejected": -531.8125610351562, |
|
"loss": 0.5152, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5899852514266968, |
|
"rewards/margins": 0.8374561071395874, |
|
"rewards/rejected": -2.427441358566284, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.8767772511848341, |
|
"grad_norm": 5.1885552406311035, |
|
"learning_rate": 6.9182025011848156e-06, |
|
"logits/chosen": 0.16828392446041107, |
|
"logits/rejected": 0.11987558007240295, |
|
"logps/chosen": -531.3955688476562, |
|
"logps/rejected": -551.9052124023438, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5659953355789185, |
|
"rewards/margins": 0.9932804107666016, |
|
"rewards/rejected": -2.5592756271362305, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.8815165876777251, |
|
"grad_norm": 4.2418293952941895, |
|
"learning_rate": 6.879862555595559e-06, |
|
"logits/chosen": 0.2174081802368164, |
|
"logits/rejected": 0.09607304632663727, |
|
"logps/chosen": -375.9927062988281, |
|
"logps/rejected": -363.47442626953125, |
|
"loss": 0.4827, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5877923965454102, |
|
"rewards/margins": 1.0314569473266602, |
|
"rewards/rejected": -2.6192493438720703, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.8862559241706162, |
|
"grad_norm": 4.966315269470215, |
|
"learning_rate": 6.841393444980177e-06, |
|
"logits/chosen": 0.13937367498874664, |
|
"logits/rejected": 0.11896172165870667, |
|
"logps/chosen": -465.7598876953125, |
|
"logps/rejected": -472.2224426269531, |
|
"loss": 0.5893, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.710962176322937, |
|
"rewards/margins": 0.8563452959060669, |
|
"rewards/rejected": -2.567307472229004, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.8909952606635071, |
|
"grad_norm": 4.166539192199707, |
|
"learning_rate": 6.802797812544503e-06, |
|
"logits/chosen": -0.12522928416728973, |
|
"logits/rejected": 0.08225654065608978, |
|
"logps/chosen": -495.0504150390625, |
|
"logps/rejected": -538.3297729492188, |
|
"loss": 0.4766, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.693655252456665, |
|
"rewards/margins": 1.2182517051696777, |
|
"rewards/rejected": -2.9119069576263428, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.8957345971563981, |
|
"grad_norm": 4.550651550292969, |
|
"learning_rate": 6.764078310187667e-06, |
|
"logits/chosen": 0.05809233337640762, |
|
"logits/rejected": -0.03961513191461563, |
|
"logps/chosen": -395.87408447265625, |
|
"logps/rejected": -448.75543212890625, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.5405490398406982, |
|
"rewards/margins": 1.279184341430664, |
|
"rewards/rejected": -2.819733142852783, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.9004739336492891, |
|
"grad_norm": 3.5397918224334717, |
|
"learning_rate": 6.725237598319877e-06, |
|
"logits/chosen": 0.2512727975845337, |
|
"logits/rejected": 0.4075152277946472, |
|
"logps/chosen": -414.4294128417969, |
|
"logps/rejected": -459.6047058105469, |
|
"loss": 0.3978, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8530254364013672, |
|
"rewards/margins": 1.6902107000350952, |
|
"rewards/rejected": -3.543236255645752, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9052132701421801, |
|
"grad_norm": 5.755289554595947, |
|
"learning_rate": 6.686278345679626e-06, |
|
"logits/chosen": 0.19829702377319336, |
|
"logits/rejected": 0.030918434262275696, |
|
"logps/chosen": -425.8815002441406, |
|
"logps/rejected": -406.3869323730469, |
|
"loss": 0.5094, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4468215703964233, |
|
"rewards/margins": 1.3219070434570312, |
|
"rewards/rejected": -2.768728494644165, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.909952606635071, |
|
"grad_norm": 4.378885746002197, |
|
"learning_rate": 6.647203229150323e-06, |
|
"logits/chosen": 0.011551991105079651, |
|
"logits/rejected": 0.19708117842674255, |
|
"logps/chosen": -427.0396728515625, |
|
"logps/rejected": -457.7227783203125, |
|
"loss": 0.4901, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.6884351968765259, |
|
"rewards/margins": 1.0523674488067627, |
|
"rewards/rejected": -2.740802526473999, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.9146919431279621, |
|
"grad_norm": 3.92692232131958, |
|
"learning_rate": 6.608014933576362e-06, |
|
"logits/chosen": 0.24341322481632233, |
|
"logits/rejected": 0.24133481085300446, |
|
"logps/chosen": -467.0690002441406, |
|
"logps/rejected": -515.750244140625, |
|
"loss": 0.4556, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0007805824279785, |
|
"rewards/margins": 1.1552082300186157, |
|
"rewards/rejected": -3.1559886932373047, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.919431279620853, |
|
"grad_norm": 5.567759037017822, |
|
"learning_rate": 6.568716151578653e-06, |
|
"logits/chosen": 0.18795177340507507, |
|
"logits/rejected": 0.128329798579216, |
|
"logps/chosen": -408.3992919921875, |
|
"logps/rejected": -449.4272155761719, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.641984224319458, |
|
"rewards/margins": 1.3132719993591309, |
|
"rewards/rejected": -2.955256462097168, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.9241706161137441, |
|
"grad_norm": 2.768008232116699, |
|
"learning_rate": 6.529309583369606e-06, |
|
"logits/chosen": 0.4808191657066345, |
|
"logits/rejected": 0.31546342372894287, |
|
"logps/chosen": -395.28741455078125, |
|
"logps/rejected": -452.6786193847656, |
|
"loss": 0.3497, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0516895055770874, |
|
"rewards/margins": 1.699798822402954, |
|
"rewards/rejected": -2.751488208770752, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.9289099526066351, |
|
"grad_norm": 4.628823757171631, |
|
"learning_rate": 6.489797936567604e-06, |
|
"logits/chosen": -0.099003866314888, |
|
"logits/rejected": 0.046685896813869476, |
|
"logps/chosen": -515.1678466796875, |
|
"logps/rejected": -565.892578125, |
|
"loss": 0.521, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.4594172239303589, |
|
"rewards/margins": 1.0597906112670898, |
|
"rewards/rejected": -2.5192079544067383, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.933649289099526, |
|
"grad_norm": 3.6848068237304688, |
|
"learning_rate": 6.45018392601096e-06, |
|
"logits/chosen": 0.2267099916934967, |
|
"logits/rejected": 0.1157350018620491, |
|
"logps/chosen": -493.6409606933594, |
|
"logps/rejected": -533.650390625, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -1.6284621953964233, |
|
"rewards/margins": 1.1400401592254639, |
|
"rewards/rejected": -2.7685022354125977, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.9383886255924171, |
|
"grad_norm": 4.227545261383057, |
|
"learning_rate": 6.410470273571386e-06, |
|
"logits/chosen": -0.06747046858072281, |
|
"logits/rejected": 0.018513869494199753, |
|
"logps/chosen": -481.58990478515625, |
|
"logps/rejected": -538.8641967773438, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0931174755096436, |
|
"rewards/margins": 0.9540358781814575, |
|
"rewards/rejected": -3.0471532344818115, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.943127962085308, |
|
"grad_norm": 5.139426231384277, |
|
"learning_rate": 6.370659707966967e-06, |
|
"logits/chosen": 0.326305627822876, |
|
"logits/rejected": 0.3613607883453369, |
|
"logps/chosen": -530.1773681640625, |
|
"logps/rejected": -555.13720703125, |
|
"loss": 0.6477, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.7542710304260254, |
|
"rewards/margins": 0.993602454662323, |
|
"rewards/rejected": -2.7478737831115723, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 4.221323490142822, |
|
"learning_rate": 6.3307549645746756e-06, |
|
"logits/chosen": 0.173638254404068, |
|
"logits/rejected": 0.30959224700927734, |
|
"logps/chosen": -390.31353759765625, |
|
"logps/rejected": -444.4742431640625, |
|
"loss": 0.4113, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6390478610992432, |
|
"rewards/margins": 1.65326988697052, |
|
"rewards/rejected": -3.2923176288604736, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.95260663507109, |
|
"grad_norm": 4.31206750869751, |
|
"learning_rate": 6.290758785242425e-06, |
|
"logits/chosen": 0.06258545815944672, |
|
"logits/rejected": 0.35492444038391113, |
|
"logps/chosen": -358.9757080078125, |
|
"logps/rejected": -418.6335754394531, |
|
"loss": 0.4458, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3634282350540161, |
|
"rewards/margins": 1.3426121473312378, |
|
"rewards/rejected": -2.706040143966675, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.957345971563981, |
|
"grad_norm": 5.077805995941162, |
|
"learning_rate": 6.250673918100671e-06, |
|
"logits/chosen": 0.0679941326379776, |
|
"logits/rejected": 0.21719148755073547, |
|
"logps/chosen": -397.947265625, |
|
"logps/rejected": -410.0476379394531, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.099858283996582, |
|
"rewards/margins": 0.8603665828704834, |
|
"rewards/rejected": -2.9602246284484863, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.9620853080568721, |
|
"grad_norm": 3.2156455516815186, |
|
"learning_rate": 6.210503117373601e-06, |
|
"logits/chosen": -0.03979983180761337, |
|
"logits/rejected": -0.14512544870376587, |
|
"logps/chosen": -471.88946533203125, |
|
"logps/rejected": -456.22967529296875, |
|
"loss": 0.3515, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.0477066040039062, |
|
"rewards/margins": 1.6663435697555542, |
|
"rewards/rejected": -3.714050054550171, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.966824644549763, |
|
"grad_norm": 4.370273113250732, |
|
"learning_rate": 6.170249143189878e-06, |
|
"logits/chosen": 0.10432646423578262, |
|
"logits/rejected": -0.09444320946931839, |
|
"logps/chosen": -398.3092041015625, |
|
"logps/rejected": -394.1639709472656, |
|
"loss": 0.4812, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.064572334289551, |
|
"rewards/margins": 1.2048075199127197, |
|
"rewards/rejected": -3.2693798542022705, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.9715639810426541, |
|
"grad_norm": 4.564243793487549, |
|
"learning_rate": 6.129914761393001e-06, |
|
"logits/chosen": -0.1701563596725464, |
|
"logits/rejected": -0.0811741054058075, |
|
"logps/chosen": -342.63153076171875, |
|
"logps/rejected": -331.88641357421875, |
|
"loss": 0.5135, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.414586067199707, |
|
"rewards/margins": 1.498974323272705, |
|
"rewards/rejected": -2.913560390472412, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.976303317535545, |
|
"grad_norm": 4.680627822875977, |
|
"learning_rate": 6.089502743351259e-06, |
|
"logits/chosen": 0.41326117515563965, |
|
"logits/rejected": 0.31307393312454224, |
|
"logps/chosen": -523.71484375, |
|
"logps/rejected": -527.9585571289062, |
|
"loss": 0.5034, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9313371181488037, |
|
"rewards/margins": 1.1199026107788086, |
|
"rewards/rejected": -3.0512397289276123, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.981042654028436, |
|
"grad_norm": 7.876077175140381, |
|
"learning_rate": 6.049015865767318e-06, |
|
"logits/chosen": -0.13413012027740479, |
|
"logits/rejected": 0.10655626654624939, |
|
"logps/chosen": -447.656005859375, |
|
"logps/rejected": -466.0491943359375, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.5786304473876953, |
|
"rewards/margins": 0.6814507246017456, |
|
"rewards/rejected": -2.2600812911987305, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.985781990521327, |
|
"grad_norm": 4.203638076782227, |
|
"learning_rate": 6.0084569104874276e-06, |
|
"logits/chosen": -0.014623567461967468, |
|
"logits/rejected": 0.012134553864598274, |
|
"logps/chosen": -424.0162353515625, |
|
"logps/rejected": -511.604248046875, |
|
"loss": 0.5633, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.323808431625366, |
|
"rewards/margins": 0.872652530670166, |
|
"rewards/rejected": -3.1964609622955322, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.990521327014218, |
|
"grad_norm": 3.263500213623047, |
|
"learning_rate": 5.967828664310283e-06, |
|
"logits/chosen": 0.13446447253227234, |
|
"logits/rejected": 0.23876450955867767, |
|
"logps/chosen": -381.72882080078125, |
|
"logps/rejected": -437.77508544921875, |
|
"loss": 0.3952, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.151979923248291, |
|
"rewards/margins": 1.6185417175292969, |
|
"rewards/rejected": -2.770521640777588, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.995260663507109, |
|
"grad_norm": 4.874969959259033, |
|
"learning_rate": 5.927133918795548e-06, |
|
"logits/chosen": -0.3298684358596802, |
|
"logits/rejected": -0.1773088425397873, |
|
"logps/chosen": -498.17230224609375, |
|
"logps/rejected": -499.8948669433594, |
|
"loss": 0.4685, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.082857608795166, |
|
"rewards/margins": 1.1343200206756592, |
|
"rewards/rejected": -3.217177629470825, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.354842185974121, |
|
"learning_rate": 5.8863754700720396e-06, |
|
"logits/chosen": 0.06099194288253784, |
|
"logits/rejected": 0.0313149094581604, |
|
"logps/chosen": -570.9532470703125, |
|
"logps/rejected": -558.2776489257812, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.5208333134651184, |
|
"rewards/chosen": -2.8509020805358887, |
|
"rewards/margins": 0.9109289646148682, |
|
"rewards/rejected": -3.7618308067321777, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.004739336492891, |
|
"grad_norm": 3.0235018730163574, |
|
"learning_rate": 5.845556118645613e-06, |
|
"logits/chosen": 0.2777591347694397, |
|
"logits/rejected": 0.28722092509269714, |
|
"logps/chosen": -455.98736572265625, |
|
"logps/rejected": -512.40087890625, |
|
"loss": 0.4001, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.6508451700210571, |
|
"rewards/margins": 1.2029709815979004, |
|
"rewards/rejected": -2.853816032409668, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.009478672985782, |
|
"grad_norm": 4.504083156585693, |
|
"learning_rate": 5.804678669206738e-06, |
|
"logits/chosen": 0.2876705229282379, |
|
"logits/rejected": 0.2990463376045227, |
|
"logps/chosen": -469.29486083984375, |
|
"logps/rejected": -462.4917297363281, |
|
"loss": 0.5315, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.7522639036178589, |
|
"rewards/margins": 1.0054848194122314, |
|
"rewards/rejected": -2.757749080657959, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.014218009478673, |
|
"grad_norm": 3.2557713985443115, |
|
"learning_rate": 5.763745930437787e-06, |
|
"logits/chosen": 0.2619165778160095, |
|
"logits/rejected": 0.18662908673286438, |
|
"logps/chosen": -522.1643676757812, |
|
"logps/rejected": -524.9882202148438, |
|
"loss": 0.2847, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.959651231765747, |
|
"rewards/margins": 1.7540154457092285, |
|
"rewards/rejected": -3.7136669158935547, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.018957345971564, |
|
"grad_norm": 3.222653388977051, |
|
"learning_rate": 5.722760714820057e-06, |
|
"logits/chosen": -0.08670102059841156, |
|
"logits/rejected": -0.035274673253297806, |
|
"logps/chosen": -492.94903564453125, |
|
"logps/rejected": -469.5622863769531, |
|
"loss": 0.4155, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.162580966949463, |
|
"rewards/margins": 1.281327724456787, |
|
"rewards/rejected": -3.44390869140625, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.0236966824644549, |
|
"grad_norm": 3.483283758163452, |
|
"learning_rate": 5.681725838440515e-06, |
|
"logits/chosen": -0.15460936725139618, |
|
"logits/rejected": -0.12697185575962067, |
|
"logps/chosen": -485.9268493652344, |
|
"logps/rejected": -487.96917724609375, |
|
"loss": 0.364, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5690550804138184, |
|
"rewards/margins": 1.441988468170166, |
|
"rewards/rejected": -3.0110433101654053, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.028436018957346, |
|
"grad_norm": 3.2059872150421143, |
|
"learning_rate": 5.640644120798312e-06, |
|
"logits/chosen": 0.17371203005313873, |
|
"logits/rejected": 0.09106084704399109, |
|
"logps/chosen": -375.935546875, |
|
"logps/rejected": -388.53076171875, |
|
"loss": 0.332, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.4462567567825317, |
|
"rewards/margins": 1.620455026626587, |
|
"rewards/rejected": -3.066711902618408, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.033175355450237, |
|
"grad_norm": 3.8562071323394775, |
|
"learning_rate": 5.599518384611052e-06, |
|
"logits/chosen": 0.2562037706375122, |
|
"logits/rejected": 0.18646736443042755, |
|
"logps/chosen": -418.3273620605469, |
|
"logps/rejected": -407.41162109375, |
|
"loss": 0.2776, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.1467158794403076, |
|
"rewards/margins": 2.1470839977264404, |
|
"rewards/rejected": -3.293799877166748, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.037914691943128, |
|
"grad_norm": 2.9552791118621826, |
|
"learning_rate": 5.558351455620852e-06, |
|
"logits/chosen": 0.2555815577507019, |
|
"logits/rejected": 0.17480355501174927, |
|
"logps/chosen": -402.4105529785156, |
|
"logps/rejected": -408.10552978515625, |
|
"loss": 0.282, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.7671201825141907, |
|
"rewards/margins": 1.6860198974609375, |
|
"rewards/rejected": -2.4531400203704834, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.042654028436019, |
|
"grad_norm": 2.3003854751586914, |
|
"learning_rate": 5.517146162400171e-06, |
|
"logits/chosen": 0.08371573686599731, |
|
"logits/rejected": 0.0009252801537513733, |
|
"logps/chosen": -368.94744873046875, |
|
"logps/rejected": -376.25372314453125, |
|
"loss": 0.3108, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2716785669326782, |
|
"rewards/margins": 1.600062608718872, |
|
"rewards/rejected": -2.8717408180236816, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.04739336492891, |
|
"grad_norm": 3.363819122314453, |
|
"learning_rate": 5.475905336157473e-06, |
|
"logits/chosen": 0.395662784576416, |
|
"logits/rejected": 0.32149434089660645, |
|
"logps/chosen": -459.91204833984375, |
|
"logps/rejected": -506.9620361328125, |
|
"loss": 0.3905, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9222174882888794, |
|
"rewards/margins": 1.5406379699707031, |
|
"rewards/rejected": -3.462855815887451, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.052132701421801, |
|
"grad_norm": 3.3270413875579834, |
|
"learning_rate": 5.434631810542688e-06, |
|
"logits/chosen": 0.1943908929824829, |
|
"logits/rejected": 0.13462212681770325, |
|
"logps/chosen": -437.5592041015625, |
|
"logps/rejected": -512.220947265625, |
|
"loss": 0.3339, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5035971403121948, |
|
"rewards/margins": 1.7122483253479004, |
|
"rewards/rejected": -3.2158455848693848, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.0568720379146919, |
|
"grad_norm": 4.051716327667236, |
|
"learning_rate": 5.393328421452514e-06, |
|
"logits/chosen": 0.09970849752426147, |
|
"logits/rejected": 0.34582486748695374, |
|
"logps/chosen": -404.18646240234375, |
|
"logps/rejected": -461.6804504394531, |
|
"loss": 0.4114, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4395058155059814, |
|
"rewards/margins": 1.6363253593444824, |
|
"rewards/rejected": -3.075831413269043, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.061611374407583, |
|
"grad_norm": 3.6509335041046143, |
|
"learning_rate": 5.351998006835563e-06, |
|
"logits/chosen": 0.016243021935224533, |
|
"logits/rejected": 0.12777027487754822, |
|
"logps/chosen": -506.6263427734375, |
|
"logps/rejected": -551.2019653320312, |
|
"loss": 0.4227, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7827990055084229, |
|
"rewards/margins": 1.2781894207000732, |
|
"rewards/rejected": -3.060988187789917, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.066350710900474, |
|
"grad_norm": 3.744976043701172, |
|
"learning_rate": 5.3106434064973665e-06, |
|
"logits/chosen": 0.16931653022766113, |
|
"logits/rejected": 0.07942713797092438, |
|
"logps/chosen": -528.5944213867188, |
|
"logps/rejected": -571.782958984375, |
|
"loss": 0.3732, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5323777198791504, |
|
"rewards/margins": 1.6922569274902344, |
|
"rewards/rejected": -3.2246346473693848, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.0710900473933649, |
|
"grad_norm": 2.9027132987976074, |
|
"learning_rate": 5.269267461905253e-06, |
|
"logits/chosen": -0.03642325848340988, |
|
"logits/rejected": 0.27906686067581177, |
|
"logps/chosen": -315.7832336425781, |
|
"logps/rejected": -381.1293029785156, |
|
"loss": 0.3682, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1626783609390259, |
|
"rewards/margins": 1.8461182117462158, |
|
"rewards/rejected": -3.0087969303131104, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.0758293838862558, |
|
"grad_norm": 4.7471208572387695, |
|
"learning_rate": 5.227873015993108e-06, |
|
"logits/chosen": -0.5098191499710083, |
|
"logits/rejected": -0.4582579731941223, |
|
"logps/chosen": -518.41455078125, |
|
"logps/rejected": -516.23193359375, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9056390523910522, |
|
"rewards/margins": 1.5796176195144653, |
|
"rewards/rejected": -3.4852566719055176, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.080568720379147, |
|
"grad_norm": 3.349379062652588, |
|
"learning_rate": 5.186462912966047e-06, |
|
"logits/chosen": 0.2078540027141571, |
|
"logits/rejected": 0.22412914037704468, |
|
"logps/chosen": -354.3533935546875, |
|
"logps/rejected": -354.4374084472656, |
|
"loss": 0.3274, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2849071025848389, |
|
"rewards/margins": 1.9719514846801758, |
|
"rewards/rejected": -3.2568585872650146, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.085308056872038, |
|
"grad_norm": 2.522663116455078, |
|
"learning_rate": 5.145039998104975e-06, |
|
"logits/chosen": -0.10203185677528381, |
|
"logits/rejected": 0.12238292396068573, |
|
"logps/chosen": -420.65643310546875, |
|
"logps/rejected": -487.3912353515625, |
|
"loss": 0.3761, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.6576614379882812, |
|
"rewards/margins": 1.5825456380844116, |
|
"rewards/rejected": -3.2402069568634033, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.0900473933649288, |
|
"grad_norm": 3.699955701828003, |
|
"learning_rate": 5.1036071175710984e-06, |
|
"logits/chosen": 0.3874337077140808, |
|
"logits/rejected": 0.3852435052394867, |
|
"logps/chosen": -425.8739013671875, |
|
"logps/rejected": -397.9554748535156, |
|
"loss": 0.4133, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5295315980911255, |
|
"rewards/margins": 1.3557639122009277, |
|
"rewards/rejected": -2.885295867919922, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.09478672985782, |
|
"grad_norm": 2.6708872318267822, |
|
"learning_rate": 5.062167118210367e-06, |
|
"logits/chosen": 0.000867149792611599, |
|
"logits/rejected": -0.034792158752679825, |
|
"logps/chosen": -434.9617919921875, |
|
"logps/rejected": -451.12725830078125, |
|
"loss": 0.3438, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3274954557418823, |
|
"rewards/margins": 1.2974351644515991, |
|
"rewards/rejected": -2.6249306201934814, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.099526066350711, |
|
"grad_norm": 4.831852436065674, |
|
"learning_rate": 5.020722847357858e-06, |
|
"logits/chosen": -0.03341293707489967, |
|
"logits/rejected": -0.06871610134840012, |
|
"logps/chosen": -563.6790161132812, |
|
"logps/rejected": -461.8121337890625, |
|
"loss": 0.4987, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9696635007858276, |
|
"rewards/margins": 1.1347928047180176, |
|
"rewards/rejected": -3.1044564247131348, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.1042654028436019, |
|
"grad_norm": 3.7005701065063477, |
|
"learning_rate": 4.979277152642145e-06, |
|
"logits/chosen": 0.03712880611419678, |
|
"logits/rejected": 0.08507107198238373, |
|
"logps/chosen": -336.78387451171875, |
|
"logps/rejected": -477.3684387207031, |
|
"loss": 0.3123, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.632716178894043, |
|
"rewards/margins": 2.3301315307617188, |
|
"rewards/rejected": -3.9628477096557617, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.1090047393364928, |
|
"grad_norm": 3.4187428951263428, |
|
"learning_rate": 4.937832881789635e-06, |
|
"logits/chosen": -0.0916728526353836, |
|
"logits/rejected": -0.02996966987848282, |
|
"logps/chosen": -460.55352783203125, |
|
"logps/rejected": -454.41326904296875, |
|
"loss": 0.3771, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8706817626953125, |
|
"rewards/margins": 1.590428352355957, |
|
"rewards/rejected": -3.4611101150512695, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.113744075829384, |
|
"grad_norm": 2.6118838787078857, |
|
"learning_rate": 4.8963928824289015e-06, |
|
"logits/chosen": 0.1105157807469368, |
|
"logits/rejected": 0.5052557587623596, |
|
"logps/chosen": -362.1018981933594, |
|
"logps/rejected": -456.45074462890625, |
|
"loss": 0.3053, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.652755856513977, |
|
"rewards/margins": 1.88970148563385, |
|
"rewards/rejected": -3.5424575805664062, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.1184834123222749, |
|
"grad_norm": 3.3146190643310547, |
|
"learning_rate": 4.854960001895027e-06, |
|
"logits/chosen": 0.19854995608329773, |
|
"logits/rejected": 0.2642470598220825, |
|
"logps/chosen": -455.2829895019531, |
|
"logps/rejected": -508.4591064453125, |
|
"loss": 0.3811, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.6405863761901855, |
|
"rewards/margins": 1.4157403707504272, |
|
"rewards/rejected": -3.0563266277313232, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.1232227488151658, |
|
"grad_norm": 2.650205612182617, |
|
"learning_rate": 4.813537087033954e-06, |
|
"logits/chosen": -0.1753956526517868, |
|
"logits/rejected": -0.15638449788093567, |
|
"logps/chosen": -425.37603759765625, |
|
"logps/rejected": -428.813232421875, |
|
"loss": 0.2871, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.447309970855713, |
|
"rewards/margins": 1.9025884866714478, |
|
"rewards/rejected": -3.349898338317871, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.1279620853080567, |
|
"grad_norm": 5.000858783721924, |
|
"learning_rate": 4.772126984006892e-06, |
|
"logits/chosen": 0.25283899903297424, |
|
"logits/rejected": 0.16875059902668, |
|
"logps/chosen": -455.42425537109375, |
|
"logps/rejected": -470.0882873535156, |
|
"loss": 0.4152, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5532653331756592, |
|
"rewards/margins": 1.7187979221343994, |
|
"rewards/rejected": -3.2720632553100586, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.132701421800948, |
|
"grad_norm": 2.7232418060302734, |
|
"learning_rate": 4.7307325380947495e-06, |
|
"logits/chosen": 0.0400099977850914, |
|
"logits/rejected": 0.13005331158638, |
|
"logps/chosen": -358.5569152832031, |
|
"logps/rejected": -500.84222412109375, |
|
"loss": 0.3338, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.523422122001648, |
|
"rewards/margins": 1.6523953676223755, |
|
"rewards/rejected": -3.1758174896240234, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.1374407582938388, |
|
"grad_norm": 3.443223714828491, |
|
"learning_rate": 4.689356593502635e-06, |
|
"logits/chosen": 0.3939206600189209, |
|
"logits/rejected": 0.2064402997493744, |
|
"logps/chosen": -429.4366760253906, |
|
"logps/rejected": -439.154296875, |
|
"loss": 0.3795, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.792637825012207, |
|
"rewards/margins": 1.6197113990783691, |
|
"rewards/rejected": -3.412349224090576, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.1421800947867298, |
|
"grad_norm": 2.805168390274048, |
|
"learning_rate": 4.648001993164438e-06, |
|
"logits/chosen": 0.06738889217376709, |
|
"logits/rejected": 0.238703191280365, |
|
"logps/chosen": -490.4261169433594, |
|
"logps/rejected": -511.69317626953125, |
|
"loss": 0.2848, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.8254410028457642, |
|
"rewards/margins": 2.290480375289917, |
|
"rewards/rejected": -4.115921497344971, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.146919431279621, |
|
"grad_norm": 4.80793571472168, |
|
"learning_rate": 4.606671578547488e-06, |
|
"logits/chosen": 0.3581339418888092, |
|
"logits/rejected": -0.04958381503820419, |
|
"logps/chosen": -646.1534423828125, |
|
"logps/rejected": -614.028564453125, |
|
"loss": 0.467, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.5761334896087646, |
|
"rewards/margins": 1.4988954067230225, |
|
"rewards/rejected": -4.075028896331787, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.1516587677725119, |
|
"grad_norm": 3.446601152420044, |
|
"learning_rate": 4.565368189457313e-06, |
|
"logits/chosen": -0.129550501704216, |
|
"logits/rejected": 0.18526773154735565, |
|
"logps/chosen": -371.20709228515625, |
|
"logps/rejected": -493.4662780761719, |
|
"loss": 0.3148, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.3364325761795044, |
|
"rewards/margins": 1.9403244256973267, |
|
"rewards/rejected": -3.27675724029541, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.1563981042654028, |
|
"grad_norm": 3.778529167175293, |
|
"learning_rate": 4.524094663842528e-06, |
|
"logits/chosen": -0.27983617782592773, |
|
"logits/rejected": -0.11589246988296509, |
|
"logps/chosen": -510.7512512207031, |
|
"logps/rejected": -511.7879333496094, |
|
"loss": 0.4457, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7793432474136353, |
|
"rewards/margins": 1.390049695968628, |
|
"rewards/rejected": -3.1693930625915527, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.161137440758294, |
|
"grad_norm": 3.1189873218536377, |
|
"learning_rate": 4.482853837599831e-06, |
|
"logits/chosen": -0.13306924700737, |
|
"logits/rejected": -0.3103134334087372, |
|
"logps/chosen": -494.39837646484375, |
|
"logps/rejected": -465.83734130859375, |
|
"loss": 0.3125, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1813969612121582, |
|
"rewards/margins": 1.8126945495605469, |
|
"rewards/rejected": -2.994091510772705, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.1658767772511849, |
|
"grad_norm": 3.904042959213257, |
|
"learning_rate": 4.441648544379149e-06, |
|
"logits/chosen": 0.007268290966749191, |
|
"logits/rejected": 0.07453174144029617, |
|
"logps/chosen": -432.7102966308594, |
|
"logps/rejected": -487.7734069824219, |
|
"loss": 0.4032, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6436212062835693, |
|
"rewards/margins": 1.462231159210205, |
|
"rewards/rejected": -3.1058521270751953, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.1706161137440758, |
|
"grad_norm": 3.9296388626098633, |
|
"learning_rate": 4.400481615388948e-06, |
|
"logits/chosen": 0.17288221418857574, |
|
"logits/rejected": 0.1198100745677948, |
|
"logps/chosen": -484.23211669921875, |
|
"logps/rejected": -434.79248046875, |
|
"loss": 0.468, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9902846813201904, |
|
"rewards/margins": 1.1091970205307007, |
|
"rewards/rejected": -3.0994820594787598, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.1753554502369667, |
|
"grad_norm": 3.4638829231262207, |
|
"learning_rate": 4.359355879201691e-06, |
|
"logits/chosen": 0.0034363269805908203, |
|
"logits/rejected": 0.07013356685638428, |
|
"logps/chosen": -434.6395263671875, |
|
"logps/rejected": -472.5997619628906, |
|
"loss": 0.2568, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.535360336303711, |
|
"rewards/margins": 2.1144678592681885, |
|
"rewards/rejected": -3.6498279571533203, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.180094786729858, |
|
"grad_norm": 4.689034938812256, |
|
"learning_rate": 4.318274161559487e-06, |
|
"logits/chosen": -0.19193895161151886, |
|
"logits/rejected": -0.24672627449035645, |
|
"logps/chosen": -499.9013671875, |
|
"logps/rejected": -477.8822021484375, |
|
"loss": 0.5502, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.9828531742095947, |
|
"rewards/margins": 0.8065874576568604, |
|
"rewards/rejected": -2.789440631866455, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.1848341232227488, |
|
"grad_norm": 2.314577341079712, |
|
"learning_rate": 4.277239285179944e-06, |
|
"logits/chosen": 0.2183598279953003, |
|
"logits/rejected": 0.3239366412162781, |
|
"logps/chosen": -481.9766540527344, |
|
"logps/rejected": -555.5982666015625, |
|
"loss": 0.3188, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.300562858581543, |
|
"rewards/margins": 1.5612691640853882, |
|
"rewards/rejected": -2.8618321418762207, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.1895734597156398, |
|
"grad_norm": 3.666198968887329, |
|
"learning_rate": 4.236254069562214e-06, |
|
"logits/chosen": -0.1494772732257843, |
|
"logits/rejected": -0.3153489828109741, |
|
"logps/chosen": -548.8350219726562, |
|
"logps/rejected": -581.28857421875, |
|
"loss": 0.3582, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.901492953300476, |
|
"rewards/margins": 1.6575918197631836, |
|
"rewards/rejected": -3.559084892272949, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.1943127962085307, |
|
"grad_norm": 3.301483631134033, |
|
"learning_rate": 4.195321330793264e-06, |
|
"logits/chosen": 0.3761504590511322, |
|
"logits/rejected": 0.12152983993291855, |
|
"logps/chosen": -474.353759765625, |
|
"logps/rejected": -493.3826904296875, |
|
"loss": 0.3582, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.6552190780639648, |
|
"rewards/margins": 1.3365132808685303, |
|
"rewards/rejected": -2.991732358932495, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.1990521327014219, |
|
"grad_norm": 2.335012197494507, |
|
"learning_rate": 4.154443881354388e-06, |
|
"logits/chosen": -0.2192879021167755, |
|
"logits/rejected": -0.32967904210090637, |
|
"logps/chosen": -446.9404602050781, |
|
"logps/rejected": -512.4267578125, |
|
"loss": 0.3133, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2855887413024902, |
|
"rewards/margins": 1.6910157203674316, |
|
"rewards/rejected": -2.976604461669922, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.2037914691943128, |
|
"grad_norm": 5.097269058227539, |
|
"learning_rate": 4.113624529927963e-06, |
|
"logits/chosen": 0.1676633656024933, |
|
"logits/rejected": 0.22459819912910461, |
|
"logps/chosen": -380.5593566894531, |
|
"logps/rejected": -356.56219482421875, |
|
"loss": 0.336, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4843287467956543, |
|
"rewards/margins": 1.580984354019165, |
|
"rewards/rejected": -3.0653133392333984, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.2085308056872037, |
|
"grad_norm": 2.614943265914917, |
|
"learning_rate": 4.072866081204453e-06, |
|
"logits/chosen": 0.11476369202136993, |
|
"logits/rejected": 0.1240728348493576, |
|
"logps/chosen": -502.7626953125, |
|
"logps/rejected": -518.6180419921875, |
|
"loss": 0.2659, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6948649883270264, |
|
"rewards/margins": 1.8275578022003174, |
|
"rewards/rejected": -3.5224225521087646, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.2132701421800949, |
|
"grad_norm": 3.184537649154663, |
|
"learning_rate": 4.032171335689718e-06, |
|
"logits/chosen": 0.15463709831237793, |
|
"logits/rejected": -0.19407552480697632, |
|
"logps/chosen": -424.0015869140625, |
|
"logps/rejected": -435.4846496582031, |
|
"loss": 0.3429, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.1620564460754395, |
|
"rewards/margins": 1.5143887996673584, |
|
"rewards/rejected": -3.6764450073242188, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.2180094786729858, |
|
"grad_norm": 5.3068437576293945, |
|
"learning_rate": 3.991543089512574e-06, |
|
"logits/chosen": -0.03403574973344803, |
|
"logits/rejected": 0.20418822765350342, |
|
"logps/chosen": -546.001220703125, |
|
"logps/rejected": -661.1378784179688, |
|
"loss": 0.4368, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.00406813621521, |
|
"rewards/margins": 1.6435837745666504, |
|
"rewards/rejected": -3.6476516723632812, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.2227488151658767, |
|
"grad_norm": 2.3214268684387207, |
|
"learning_rate": 3.9509841342326835e-06, |
|
"logits/chosen": 0.18236321210861206, |
|
"logits/rejected": 0.012637853622436523, |
|
"logps/chosen": -340.8662109375, |
|
"logps/rejected": -393.6156005859375, |
|
"loss": 0.242, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4383803606033325, |
|
"rewards/margins": 2.1315698623657227, |
|
"rewards/rejected": -3.5699501037597656, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.2274881516587677, |
|
"grad_norm": 2.3699498176574707, |
|
"learning_rate": 3.910497256648742e-06, |
|
"logits/chosen": -0.17738784849643707, |
|
"logits/rejected": 0.058183833956718445, |
|
"logps/chosen": -337.60540771484375, |
|
"logps/rejected": -408.6331481933594, |
|
"loss": 0.2667, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7594650983810425, |
|
"rewards/margins": 2.0310773849487305, |
|
"rewards/rejected": -3.7905426025390625, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.2322274881516588, |
|
"grad_norm": 4.384218692779541, |
|
"learning_rate": 3.870085238607002e-06, |
|
"logits/chosen": 0.002813771367073059, |
|
"logits/rejected": -0.24638204276561737, |
|
"logps/chosen": -433.4595031738281, |
|
"logps/rejected": -462.3036193847656, |
|
"loss": 0.3907, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.449141263961792, |
|
"rewards/margins": 1.7780585289001465, |
|
"rewards/rejected": -3.2271997928619385, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.2369668246445498, |
|
"grad_norm": 3.777799606323242, |
|
"learning_rate": 3.829750856810123e-06, |
|
"logits/chosen": 0.040886975824832916, |
|
"logits/rejected": 0.15636533498764038, |
|
"logps/chosen": -398.0843200683594, |
|
"logps/rejected": -536.947998046875, |
|
"loss": 0.3703, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.132894515991211, |
|
"rewards/margins": 1.518797755241394, |
|
"rewards/rejected": -3.6516923904418945, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.2417061611374407, |
|
"grad_norm": 3.2709872722625732, |
|
"learning_rate": 3.7894968826263993e-06, |
|
"logits/chosen": 0.35484781861305237, |
|
"logits/rejected": -0.043566398322582245, |
|
"logps/chosen": -551.0684814453125, |
|
"logps/rejected": -534.43701171875, |
|
"loss": 0.2318, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -1.4083945751190186, |
|
"rewards/margins": 2.284555673599243, |
|
"rewards/rejected": -3.6929502487182617, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.2464454976303316, |
|
"grad_norm": 3.492917060852051, |
|
"learning_rate": 3.7493260818993295e-06, |
|
"logits/chosen": -0.24769189953804016, |
|
"logits/rejected": -0.033029571175575256, |
|
"logps/chosen": -437.48638916015625, |
|
"logps/rejected": -473.00543212890625, |
|
"loss": 0.3564, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5751187801361084, |
|
"rewards/margins": 1.6503984928131104, |
|
"rewards/rejected": -3.2255170345306396, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.2511848341232228, |
|
"grad_norm": 2.8335609436035156, |
|
"learning_rate": 3.7092412147575763e-06, |
|
"logits/chosen": 0.08972194790840149, |
|
"logits/rejected": 0.0525023378431797, |
|
"logps/chosen": -399.0003662109375, |
|
"logps/rejected": -462.21539306640625, |
|
"loss": 0.3082, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.152060031890869, |
|
"rewards/margins": 2.0616841316223145, |
|
"rewards/rejected": -4.213744163513184, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.2559241706161137, |
|
"grad_norm": 3.3234729766845703, |
|
"learning_rate": 3.6692450354253244e-06, |
|
"logits/chosen": -0.11747504025697708, |
|
"logits/rejected": -0.09165113419294357, |
|
"logps/chosen": -487.0151062011719, |
|
"logps/rejected": -492.8780212402344, |
|
"loss": 0.3621, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7996101379394531, |
|
"rewards/margins": 1.4663692712783813, |
|
"rewards/rejected": -3.265979528427124, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.2606635071090047, |
|
"grad_norm": 2.617558240890503, |
|
"learning_rate": 3.6293402920330345e-06, |
|
"logits/chosen": 0.3675645589828491, |
|
"logits/rejected": 0.2720622420310974, |
|
"logps/chosen": -421.515869140625, |
|
"logps/rejected": -430.33367919921875, |
|
"loss": 0.369, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.132265567779541, |
|
"rewards/margins": 1.4423668384552002, |
|
"rewards/rejected": -3.5746326446533203, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.2654028436018958, |
|
"grad_norm": 2.5565578937530518, |
|
"learning_rate": 3.589529726428615e-06, |
|
"logits/chosen": 0.07273396104574203, |
|
"logits/rejected": 0.060611553490161896, |
|
"logps/chosen": -418.9151306152344, |
|
"logps/rejected": -446.34698486328125, |
|
"loss": 0.3304, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6537911891937256, |
|
"rewards/margins": 1.5491554737091064, |
|
"rewards/rejected": -3.202946662902832, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.2701421800947867, |
|
"grad_norm": 3.8108534812927246, |
|
"learning_rate": 3.54981607398904e-06, |
|
"logits/chosen": 0.035040952265262604, |
|
"logits/rejected": 0.022193975746631622, |
|
"logps/chosen": -488.48675537109375, |
|
"logps/rejected": -434.921875, |
|
"loss": 0.3831, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.611822485923767, |
|
"rewards/margins": 1.6759777069091797, |
|
"rewards/rejected": -3.2878000736236572, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.2748815165876777, |
|
"grad_norm": 2.969774007797241, |
|
"learning_rate": 3.5102020634323974e-06, |
|
"logits/chosen": 0.2675975561141968, |
|
"logits/rejected": 0.2470274567604065, |
|
"logps/chosen": -481.4004821777344, |
|
"logps/rejected": -485.03863525390625, |
|
"loss": 0.432, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.6142653226852417, |
|
"rewards/margins": 1.4393080472946167, |
|
"rewards/rejected": -3.0535733699798584, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.2796208530805688, |
|
"grad_norm": 2.932152271270752, |
|
"learning_rate": 3.470690416630395e-06, |
|
"logits/chosen": -0.06690172851085663, |
|
"logits/rejected": 0.149437814950943, |
|
"logps/chosen": -383.09259033203125, |
|
"logps/rejected": -452.948974609375, |
|
"loss": 0.3637, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.385080099105835, |
|
"rewards/margins": 1.3777412176132202, |
|
"rewards/rejected": -2.7628211975097656, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.2843601895734598, |
|
"grad_norm": 3.806013584136963, |
|
"learning_rate": 3.431283848421347e-06, |
|
"logits/chosen": 0.1305023580789566, |
|
"logits/rejected": -0.12427529692649841, |
|
"logps/chosen": -432.52520751953125, |
|
"logps/rejected": -456.1821594238281, |
|
"loss": 0.3401, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.410179853439331, |
|
"rewards/margins": 1.6679764986038208, |
|
"rewards/rejected": -3.078155994415283, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.2890995260663507, |
|
"grad_norm": 3.743184804916382, |
|
"learning_rate": 3.39198506642364e-06, |
|
"logits/chosen": 0.18078383803367615, |
|
"logits/rejected": 0.16558128595352173, |
|
"logps/chosen": -404.4733581542969, |
|
"logps/rejected": -463.1748046875, |
|
"loss": 0.4068, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.8519794940948486, |
|
"rewards/margins": 1.207563042640686, |
|
"rewards/rejected": -3.059542655944824, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.2938388625592416, |
|
"grad_norm": 3.420830726623535, |
|
"learning_rate": 3.352796770849679e-06, |
|
"logits/chosen": -0.22084331512451172, |
|
"logits/rejected": -0.03927930071949959, |
|
"logps/chosen": -379.3165588378906, |
|
"logps/rejected": -375.982666015625, |
|
"loss": 0.2515, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.0934913158416748, |
|
"rewards/margins": 2.5695981979370117, |
|
"rewards/rejected": -3.6630892753601074, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.2985781990521326, |
|
"grad_norm": 2.730637788772583, |
|
"learning_rate": 3.3137216543203747e-06, |
|
"logits/chosen": 0.3634192943572998, |
|
"logits/rejected": 0.02868695929646492, |
|
"logps/chosen": -395.314697265625, |
|
"logps/rejected": -386.64801025390625, |
|
"loss": 0.3487, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4456108808517456, |
|
"rewards/margins": 1.609388828277588, |
|
"rewards/rejected": -3.054999828338623, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.3033175355450237, |
|
"grad_norm": 5.403792858123779, |
|
"learning_rate": 3.2747624016801245e-06, |
|
"logits/chosen": -0.20156198740005493, |
|
"logits/rejected": 0.0912720337510109, |
|
"logps/chosen": -481.54888916015625, |
|
"logps/rejected": -518.7413940429688, |
|
"loss": 0.5796, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.1031124591827393, |
|
"rewards/margins": 1.3092026710510254, |
|
"rewards/rejected": -3.4123151302337646, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.3080568720379147, |
|
"grad_norm": 3.5099925994873047, |
|
"learning_rate": 3.2359216898123343e-06, |
|
"logits/chosen": -0.05037354677915573, |
|
"logits/rejected": -0.1334826946258545, |
|
"logps/chosen": -556.6934814453125, |
|
"logps/rejected": -531.0350341796875, |
|
"loss": 0.4337, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.9162890911102295, |
|
"rewards/margins": 1.1660747528076172, |
|
"rewards/rejected": -3.0823636054992676, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.3127962085308056, |
|
"grad_norm": 3.462001323699951, |
|
"learning_rate": 3.197202187455498e-06, |
|
"logits/chosen": -0.10171887278556824, |
|
"logits/rejected": -0.15261708199977875, |
|
"logps/chosen": -555.4314575195312, |
|
"logps/rejected": -588.11767578125, |
|
"loss": 0.3584, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.8054559230804443, |
|
"rewards/margins": 1.69133722782135, |
|
"rewards/rejected": -3.496793270111084, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.3175355450236967, |
|
"grad_norm": 2.394829511642456, |
|
"learning_rate": 3.158606555019826e-06, |
|
"logits/chosen": -0.19711580872535706, |
|
"logits/rejected": -0.17292340099811554, |
|
"logps/chosen": -590.0509033203125, |
|
"logps/rejected": -577.7540893554688, |
|
"loss": 0.2994, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7370967864990234, |
|
"rewards/margins": 1.9557228088378906, |
|
"rewards/rejected": -3.692819833755493, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.3222748815165877, |
|
"grad_norm": 3.8144333362579346, |
|
"learning_rate": 3.120137444404442e-06, |
|
"logits/chosen": 0.1638478934764862, |
|
"logits/rejected": 0.1452624797821045, |
|
"logps/chosen": -334.2840576171875, |
|
"logps/rejected": -392.561279296875, |
|
"loss": 0.373, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.239801049232483, |
|
"rewards/margins": 1.3264257907867432, |
|
"rewards/rejected": -2.5662269592285156, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.3270142180094786, |
|
"grad_norm": 3.646484613418579, |
|
"learning_rate": 3.081797498815185e-06, |
|
"logits/chosen": -0.1268320083618164, |
|
"logits/rejected": -0.17580845952033997, |
|
"logps/chosen": -460.2949523925781, |
|
"logps/rejected": -452.21783447265625, |
|
"loss": 0.3658, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.822378158569336, |
|
"rewards/margins": 1.5197012424468994, |
|
"rewards/rejected": -3.3420791625976562, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.3317535545023698, |
|
"grad_norm": 3.2970638275146484, |
|
"learning_rate": 3.0435893525829847e-06, |
|
"logits/chosen": 0.13139204680919647, |
|
"logits/rejected": 0.24725201725959778, |
|
"logps/chosen": -373.4732971191406, |
|
"logps/rejected": -456.06658935546875, |
|
"loss": 0.2123, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.9837716221809387, |
|
"rewards/margins": 2.4389843940734863, |
|
"rewards/rejected": -3.4227561950683594, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.3364928909952607, |
|
"grad_norm": 2.8067758083343506, |
|
"learning_rate": 3.005515630982858e-06, |
|
"logits/chosen": 0.13529516756534576, |
|
"logits/rejected": 0.13115084171295166, |
|
"logps/chosen": -541.8878784179688, |
|
"logps/rejected": -540.979736328125, |
|
"loss": 0.3042, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.8211042881011963, |
|
"rewards/margins": 1.9124205112457275, |
|
"rewards/rejected": -3.733524799346924, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.3412322274881516, |
|
"grad_norm": 4.505223751068115, |
|
"learning_rate": 2.9675789500535328e-06, |
|
"logits/chosen": 0.28925737738609314, |
|
"logits/rejected": -0.0881756916642189, |
|
"logps/chosen": -447.83154296875, |
|
"logps/rejected": -411.06842041015625, |
|
"loss": 0.3877, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7665940523147583, |
|
"rewards/margins": 1.4863183498382568, |
|
"rewards/rejected": -3.2529125213623047, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.3459715639810428, |
|
"grad_norm": 3.6730592250823975, |
|
"learning_rate": 2.9297819164176964e-06, |
|
"logits/chosen": -0.024140790104866028, |
|
"logits/rejected": -0.05335076153278351, |
|
"logps/chosen": -509.5386047363281, |
|
"logps/rejected": -589.5108642578125, |
|
"loss": 0.4042, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.5151710510253906, |
|
"rewards/margins": 1.5130798816680908, |
|
"rewards/rejected": -4.0282511711120605, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.3507109004739337, |
|
"grad_norm": 2.930845260620117, |
|
"learning_rate": 2.8921271271028896e-06, |
|
"logits/chosen": 0.4924960732460022, |
|
"logits/rejected": 0.44390055537223816, |
|
"logps/chosen": -551.4659423828125, |
|
"logps/rejected": -540.36181640625, |
|
"loss": 0.3399, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.004122495651245, |
|
"rewards/margins": 1.6733444929122925, |
|
"rewards/rejected": -3.677466869354248, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.3554502369668247, |
|
"grad_norm": 3.2506113052368164, |
|
"learning_rate": 2.854617169363075e-06, |
|
"logits/chosen": -0.24163638055324554, |
|
"logits/rejected": 0.028947792947292328, |
|
"logps/chosen": -525.6647338867188, |
|
"logps/rejected": -594.4169311523438, |
|
"loss": 0.2765, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7295421361923218, |
|
"rewards/margins": 2.143195390701294, |
|
"rewards/rejected": -3.8727376461029053, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.3601895734597156, |
|
"grad_norm": 4.907602787017822, |
|
"learning_rate": 2.817254620500868e-06, |
|
"logits/chosen": -0.05144549906253815, |
|
"logits/rejected": -0.03435233607888222, |
|
"logps/chosen": -322.93670654296875, |
|
"logps/rejected": -336.6121826171875, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.0222997665405273, |
|
"rewards/margins": 1.0926249027252197, |
|
"rewards/rejected": -3.114924669265747, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.3649289099526065, |
|
"grad_norm": 2.1702044010162354, |
|
"learning_rate": 2.7800420476904337e-06, |
|
"logits/chosen": 0.17638254165649414, |
|
"logits/rejected": 0.03706669062376022, |
|
"logps/chosen": -482.89410400390625, |
|
"logps/rejected": -495.45635986328125, |
|
"loss": 0.2756, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5554332733154297, |
|
"rewards/margins": 1.9448996782302856, |
|
"rewards/rejected": -3.500332832336426, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.3696682464454977, |
|
"grad_norm": 3.919252872467041, |
|
"learning_rate": 2.7429820078011215e-06, |
|
"logits/chosen": 0.1973332315683365, |
|
"logits/rejected": 0.26703959703445435, |
|
"logps/chosen": -465.7378845214844, |
|
"logps/rejected": -464.8862609863281, |
|
"loss": 0.4067, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.0374157428741455, |
|
"rewards/margins": 1.585756540298462, |
|
"rewards/rejected": -3.6231720447540283, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.3744075829383886, |
|
"grad_norm": 2.8202483654022217, |
|
"learning_rate": 2.7060770472217635e-06, |
|
"logits/chosen": 0.22294802963733673, |
|
"logits/rejected": 0.4262656569480896, |
|
"logps/chosen": -464.995849609375, |
|
"logps/rejected": -498.488037109375, |
|
"loss": 0.254, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.8536605834960938, |
|
"rewards/margins": 2.032717704772949, |
|
"rewards/rejected": -3.886378288269043, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.3791469194312795, |
|
"grad_norm": 3.604506015777588, |
|
"learning_rate": 2.669329701685719e-06, |
|
"logits/chosen": 0.27663156390190125, |
|
"logits/rejected": 0.06393498927354813, |
|
"logps/chosen": -639.620849609375, |
|
"logps/rejected": -598.626220703125, |
|
"loss": 0.3477, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.7442690134048462, |
|
"rewards/margins": 1.770451307296753, |
|
"rewards/rejected": -3.5147204399108887, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.3838862559241707, |
|
"grad_norm": 4.539163112640381, |
|
"learning_rate": 2.632742496096651e-06, |
|
"logits/chosen": -0.019270386546850204, |
|
"logits/rejected": -0.1904076784849167, |
|
"logps/chosen": -312.65716552734375, |
|
"logps/rejected": -371.73785400390625, |
|
"loss": 0.3459, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4120981693267822, |
|
"rewards/margins": 1.6321998834609985, |
|
"rewards/rejected": -3.0442981719970703, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.3886255924170616, |
|
"grad_norm": 3.6314282417297363, |
|
"learning_rate": 2.59631794435503e-06, |
|
"logits/chosen": 0.3927982747554779, |
|
"logits/rejected": 0.5379042625427246, |
|
"logps/chosen": -445.4381103515625, |
|
"logps/rejected": -451.68243408203125, |
|
"loss": 0.4375, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.476792573928833, |
|
"rewards/margins": 1.4225971698760986, |
|
"rewards/rejected": -2.8993897438049316, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.3933649289099526, |
|
"grad_norm": 2.562574625015259, |
|
"learning_rate": 2.560058549185412e-06, |
|
"logits/chosen": -0.25260213017463684, |
|
"logits/rejected": -0.2509850561618805, |
|
"logps/chosen": -364.51385498046875, |
|
"logps/rejected": -367.390869140625, |
|
"loss": 0.2561, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4405786991119385, |
|
"rewards/margins": 2.285914182662964, |
|
"rewards/rejected": -3.7264928817749023, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.3981042654028437, |
|
"grad_norm": 3.246412754058838, |
|
"learning_rate": 2.523966801964468e-06, |
|
"logits/chosen": 0.0612456277012825, |
|
"logits/rejected": 0.02570994198322296, |
|
"logps/chosen": -399.10467529296875, |
|
"logps/rejected": -454.84991455078125, |
|
"loss": 0.3405, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6198558807373047, |
|
"rewards/margins": 1.5225186347961426, |
|
"rewards/rejected": -3.1423745155334473, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.4028436018957346, |
|
"grad_norm": 3.30670166015625, |
|
"learning_rate": 2.488045182549819e-06, |
|
"logits/chosen": 0.0521719716489315, |
|
"logits/rejected": 0.15891367197036743, |
|
"logps/chosen": -341.91900634765625, |
|
"logps/rejected": -461.2604064941406, |
|
"loss": 0.2899, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.4679073095321655, |
|
"rewards/margins": 2.0705184936523438, |
|
"rewards/rejected": -3.5384254455566406, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.4075829383886256, |
|
"grad_norm": 4.309845924377441, |
|
"learning_rate": 2.4522961591096246e-06, |
|
"logits/chosen": -0.23573561012744904, |
|
"logits/rejected": -0.42021211981773376, |
|
"logps/chosen": -423.3658752441406, |
|
"logps/rejected": -412.6981201171875, |
|
"loss": 0.2852, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.66180419921875, |
|
"rewards/margins": 1.9530575275421143, |
|
"rewards/rejected": -3.614861488342285, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.4123222748815165, |
|
"grad_norm": 2.6515750885009766, |
|
"learning_rate": 2.4167221879530063e-06, |
|
"logits/chosen": 0.01921391859650612, |
|
"logits/rejected": -0.13795889914035797, |
|
"logps/chosen": -400.0218505859375, |
|
"logps/rejected": -504.5172424316406, |
|
"loss": 0.3005, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8166048526763916, |
|
"rewards/margins": 2.0179452896118164, |
|
"rewards/rejected": -3.834550380706787, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.4170616113744074, |
|
"grad_norm": 3.4206173419952393, |
|
"learning_rate": 2.381325713361283e-06, |
|
"logits/chosen": -0.13432744145393372, |
|
"logits/rejected": 0.1736375093460083, |
|
"logps/chosen": -385.46893310546875, |
|
"logps/rejected": -490.6543273925781, |
|
"loss": 0.318, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.8471072912216187, |
|
"rewards/margins": 2.0031168460845947, |
|
"rewards/rejected": -3.850224018096924, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.4218009478672986, |
|
"grad_norm": 4.789295673370361, |
|
"learning_rate": 2.3461091674199998e-06, |
|
"logits/chosen": 0.19233857095241547, |
|
"logits/rejected": 0.11930015683174133, |
|
"logps/chosen": -492.05926513671875, |
|
"logps/rejected": -572.2630615234375, |
|
"loss": 0.3674, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.9044451713562012, |
|
"rewards/margins": 2.156116008758545, |
|
"rewards/rejected": -4.060561180114746, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.4265402843601895, |
|
"grad_norm": 3.0746889114379883, |
|
"learning_rate": 2.311074969851852e-06, |
|
"logits/chosen": 0.2876991927623749, |
|
"logits/rejected": 0.5138101577758789, |
|
"logps/chosen": -518.71630859375, |
|
"logps/rejected": -563.4703979492188, |
|
"loss": 0.385, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7743141651153564, |
|
"rewards/margins": 1.297276258468628, |
|
"rewards/rejected": -3.0715904235839844, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.4312796208530805, |
|
"grad_norm": 4.4295654296875, |
|
"learning_rate": 2.276225527850401e-06, |
|
"logits/chosen": 0.11437669396400452, |
|
"logits/rejected": 0.037944670766592026, |
|
"logps/chosen": -390.86199951171875, |
|
"logps/rejected": -374.7806701660156, |
|
"loss": 0.4342, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7658305168151855, |
|
"rewards/margins": 1.4489760398864746, |
|
"rewards/rejected": -3.21480655670166, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.4360189573459716, |
|
"grad_norm": 3.8123362064361572, |
|
"learning_rate": 2.2415632359146855e-06, |
|
"logits/chosen": 0.03791449964046478, |
|
"logits/rejected": 0.27941614389419556, |
|
"logps/chosen": -424.6317138671875, |
|
"logps/rejected": -474.95452880859375, |
|
"loss": 0.4074, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4859094619750977, |
|
"rewards/margins": 1.23709237575531, |
|
"rewards/rejected": -3.723001480102539, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.4407582938388626, |
|
"grad_norm": 3.9473981857299805, |
|
"learning_rate": 2.2070904756847023e-06, |
|
"logits/chosen": -0.010878009721636772, |
|
"logits/rejected": 0.011582762002944946, |
|
"logps/chosen": -429.3747863769531, |
|
"logps/rejected": -443.65106201171875, |
|
"loss": 0.2943, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.9164514541625977, |
|
"rewards/margins": 2.2701520919799805, |
|
"rewards/rejected": -4.186603546142578, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.4454976303317535, |
|
"grad_norm": 3.664656400680542, |
|
"learning_rate": 2.1728096157777516e-06, |
|
"logits/chosen": 0.11286801099777222, |
|
"logits/rejected": 0.1344190537929535, |
|
"logps/chosen": -492.8208923339844, |
|
"logps/rejected": -498.2126159667969, |
|
"loss": 0.3742, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.920929193496704, |
|
"rewards/margins": 1.7695159912109375, |
|
"rewards/rejected": -3.6904454231262207, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.4502369668246446, |
|
"grad_norm": 3.3644583225250244, |
|
"learning_rate": 2.1387230116257004e-06, |
|
"logits/chosen": 0.29534584283828735, |
|
"logits/rejected": 0.16407078504562378, |
|
"logps/chosen": -429.521728515625, |
|
"logps/rejected": -413.7642822265625, |
|
"loss": 0.4601, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.3842999935150146, |
|
"rewards/margins": 1.3130927085876465, |
|
"rewards/rejected": -3.697392702102661, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.4549763033175356, |
|
"grad_norm": 3.42510986328125, |
|
"learning_rate": 2.104833005313131e-06, |
|
"logits/chosen": 0.04616807401180267, |
|
"logits/rejected": 0.15600745379924774, |
|
"logps/chosen": -403.85400390625, |
|
"logps/rejected": -450.656982421875, |
|
"loss": 0.3924, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.6164143085479736, |
|
"rewards/margins": 2.050086498260498, |
|
"rewards/rejected": -4.666500568389893, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.4597156398104265, |
|
"grad_norm": 4.1046271324157715, |
|
"learning_rate": 2.071141925416431e-06, |
|
"logits/chosen": -0.1839747130870819, |
|
"logits/rejected": -0.08555589616298676, |
|
"logps/chosen": -362.833740234375, |
|
"logps/rejected": -407.0841064453125, |
|
"loss": 0.3899, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.9180059432983398, |
|
"rewards/margins": 1.3669946193695068, |
|
"rewards/rejected": -3.2850005626678467, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.4644549763033177, |
|
"grad_norm": 4.176200866699219, |
|
"learning_rate": 2.0376520868437838e-06, |
|
"logits/chosen": 0.3360610902309418, |
|
"logits/rejected": 0.32926660776138306, |
|
"logps/chosen": -462.2861328125, |
|
"logps/rejected": -485.8418884277344, |
|
"loss": 0.4298, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.327528715133667, |
|
"rewards/margins": 1.478387475013733, |
|
"rewards/rejected": -3.8059163093566895, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.4691943127962086, |
|
"grad_norm": 3.149693727493286, |
|
"learning_rate": 2.0043657906761144e-06, |
|
"logits/chosen": 0.013693787157535553, |
|
"logits/rejected": 0.25990521907806396, |
|
"logps/chosen": -371.7657165527344, |
|
"logps/rejected": -410.69744873046875, |
|
"loss": 0.3518, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1581993103027344, |
|
"rewards/margins": 2.091628313064575, |
|
"rewards/rejected": -4.2498273849487305, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.4739336492890995, |
|
"grad_norm": 2.863271951675415, |
|
"learning_rate": 1.971285324008994e-06, |
|
"logits/chosen": 0.43576109409332275, |
|
"logits/rejected": 0.3071168065071106, |
|
"logps/chosen": -366.2781677246094, |
|
"logps/rejected": -386.5714111328125, |
|
"loss": 0.2796, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.3572580814361572, |
|
"rewards/margins": 1.936234951019287, |
|
"rewards/rejected": -3.2934930324554443, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.4786729857819905, |
|
"grad_norm": 3.9290127754211426, |
|
"learning_rate": 1.938412959795472e-06, |
|
"logits/chosen": 0.11994842439889908, |
|
"logits/rejected": 0.14935952425003052, |
|
"logps/chosen": -545.4857177734375, |
|
"logps/rejected": -637.3936767578125, |
|
"loss": 0.3632, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.8452956676483154, |
|
"rewards/margins": 1.575514554977417, |
|
"rewards/rejected": -4.420809745788574, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.4834123222748814, |
|
"grad_norm": 2.9018099308013916, |
|
"learning_rate": 1.9057509566899268e-06, |
|
"logits/chosen": 0.07368522882461548, |
|
"logits/rejected": 0.16368550062179565, |
|
"logps/chosen": -349.2361145019531, |
|
"logps/rejected": -417.56201171875, |
|
"loss": 0.2909, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.6519646644592285, |
|
"rewards/margins": 1.7734508514404297, |
|
"rewards/rejected": -3.425415515899658, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.4881516587677726, |
|
"grad_norm": 3.14565110206604, |
|
"learning_rate": 1.873301558892855e-06, |
|
"logits/chosen": 0.06656618416309357, |
|
"logits/rejected": 0.12518161535263062, |
|
"logps/chosen": -396.86651611328125, |
|
"logps/rejected": -444.3167724609375, |
|
"loss": 0.364, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1594903469085693, |
|
"rewards/margins": 1.6573652029037476, |
|
"rewards/rejected": -3.8168554306030273, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.4928909952606635, |
|
"grad_norm": 3.205932855606079, |
|
"learning_rate": 1.8410669959966804e-06, |
|
"logits/chosen": 0.07552038878202438, |
|
"logits/rejected": 0.12510335445404053, |
|
"logps/chosen": -420.3112487792969, |
|
"logps/rejected": -473.9259033203125, |
|
"loss": 0.3091, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.9273097515106201, |
|
"rewards/margins": 1.7265255451202393, |
|
"rewards/rejected": -3.6538352966308594, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.4976303317535544, |
|
"grad_norm": 3.193234443664551, |
|
"learning_rate": 1.8090494828325634e-06, |
|
"logits/chosen": -0.24580290913581848, |
|
"logits/rejected": -0.2829708456993103, |
|
"logps/chosen": -416.750732421875, |
|
"logps/rejected": -461.3362731933594, |
|
"loss": 0.2596, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.465597152709961, |
|
"rewards/margins": 2.6725990772247314, |
|
"rewards/rejected": -4.1381964683532715, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.5023696682464456, |
|
"grad_norm": 2.840527057647705, |
|
"learning_rate": 1.7772512193182096e-06, |
|
"logits/chosen": 0.32139959931373596, |
|
"logits/rejected": 0.3541162610054016, |
|
"logps/chosen": -432.45733642578125, |
|
"logps/rejected": -466.65545654296875, |
|
"loss": 0.3187, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.2416555881500244, |
|
"rewards/margins": 1.9411081075668335, |
|
"rewards/rejected": -4.182764053344727, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.5071090047393365, |
|
"grad_norm": 1.9531362056732178, |
|
"learning_rate": 1.745674390306722e-06, |
|
"logits/chosen": 0.16892415285110474, |
|
"logits/rejected": 0.2631104588508606, |
|
"logps/chosen": -413.987548828125, |
|
"logps/rejected": -437.920166015625, |
|
"loss": 0.2867, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9473943710327148, |
|
"rewards/margins": 2.0647332668304443, |
|
"rewards/rejected": -4.012127876281738, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.5118483412322274, |
|
"grad_norm": 2.907794237136841, |
|
"learning_rate": 1.7143211654364761e-06, |
|
"logits/chosen": 0.061596665531396866, |
|
"logits/rejected": 0.03803150728344917, |
|
"logps/chosen": -405.4927062988281, |
|
"logps/rejected": -378.6966247558594, |
|
"loss": 0.2711, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5833370685577393, |
|
"rewards/margins": 2.4727444648742676, |
|
"rewards/rejected": -4.056081771850586, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.5165876777251186, |
|
"grad_norm": 5.282046794891357, |
|
"learning_rate": 1.6831936989820507e-06, |
|
"logits/chosen": 0.13025835156440735, |
|
"logits/rejected": -0.02494555525481701, |
|
"logps/chosen": -419.9814758300781, |
|
"logps/rejected": -380.5059814453125, |
|
"loss": 0.381, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.9292106628417969, |
|
"rewards/margins": 1.795340895652771, |
|
"rewards/rejected": -3.7245516777038574, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.5213270142180095, |
|
"grad_norm": 3.417613983154297, |
|
"learning_rate": 1.6522941297061996e-06, |
|
"logits/chosen": 0.20090161263942719, |
|
"logits/rejected": 0.2483268678188324, |
|
"logps/chosen": -479.95306396484375, |
|
"logps/rejected": -536.9041137695312, |
|
"loss": 0.3483, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.9097940921783447, |
|
"rewards/margins": 1.6619257926940918, |
|
"rewards/rejected": -3.5717201232910156, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.5260663507109005, |
|
"grad_norm": 4.454870700836182, |
|
"learning_rate": 1.6216245807129005e-06, |
|
"logits/chosen": 0.013529401272535324, |
|
"logits/rejected": 0.07809659838676453, |
|
"logps/chosen": -399.9565124511719, |
|
"logps/rejected": -465.4268798828125, |
|
"loss": 0.4229, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.1004106998443604, |
|
"rewards/margins": 2.0842761993408203, |
|
"rewards/rejected": -4.18468713760376, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.5308056872037916, |
|
"grad_norm": 3.5815956592559814, |
|
"learning_rate": 1.5911871593014838e-06, |
|
"logits/chosen": 0.4060709774494171, |
|
"logits/rejected": 0.6513478755950928, |
|
"logps/chosen": -345.8062438964844, |
|
"logps/rejected": -354.0713806152344, |
|
"loss": 0.4097, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.48841392993927, |
|
"rewards/margins": 1.9469943046569824, |
|
"rewards/rejected": -3.435408115386963, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.5355450236966823, |
|
"grad_norm": 2.94301176071167, |
|
"learning_rate": 1.5609839568218248e-06, |
|
"logits/chosen": 0.16032955050468445, |
|
"logits/rejected": 0.19198182225227356, |
|
"logps/chosen": -486.0198669433594, |
|
"logps/rejected": -518.156494140625, |
|
"loss": 0.2529, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.108952760696411, |
|
"rewards/margins": 2.007476329803467, |
|
"rewards/rejected": -4.116428852081299, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.5402843601895735, |
|
"grad_norm": 3.2696046829223633, |
|
"learning_rate": 1.5310170485306675e-06, |
|
"logits/chosen": 0.5102041959762573, |
|
"logits/rejected": 0.533448338508606, |
|
"logps/chosen": -525.1609497070312, |
|
"logps/rejected": -560.2240600585938, |
|
"loss": 0.2644, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.106445074081421, |
|
"rewards/margins": 2.0785865783691406, |
|
"rewards/rejected": -4.185031890869141, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.5450236966824644, |
|
"grad_norm": 2.56247878074646, |
|
"learning_rate": 1.5012884934490168e-06, |
|
"logits/chosen": 0.10596761107444763, |
|
"logits/rejected": -0.22906875610351562, |
|
"logps/chosen": -428.1797180175781, |
|
"logps/rejected": -428.2669372558594, |
|
"loss": 0.3297, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.578533411026001, |
|
"rewards/margins": 1.8542442321777344, |
|
"rewards/rejected": -3.4327776432037354, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.5497630331753554, |
|
"grad_norm": 3.698256731033325, |
|
"learning_rate": 1.4718003342206722e-06, |
|
"logits/chosen": -0.03314054012298584, |
|
"logits/rejected": 0.09218861162662506, |
|
"logps/chosen": -460.627685546875, |
|
"logps/rejected": -545.28125, |
|
"loss": 0.4134, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.307816743850708, |
|
"rewards/margins": 1.7128374576568604, |
|
"rewards/rejected": -4.020654201507568, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.5545023696682465, |
|
"grad_norm": 3.7442150115966797, |
|
"learning_rate": 1.44255459697188e-06, |
|
"logits/chosen": 0.434778094291687, |
|
"logits/rejected": 0.3917992115020752, |
|
"logps/chosen": -400.646484375, |
|
"logps/rejected": -504.3604736328125, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.231884002685547, |
|
"rewards/margins": 1.6030129194259644, |
|
"rewards/rejected": -3.8348968029022217, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.5592417061611374, |
|
"grad_norm": 2.8240530490875244, |
|
"learning_rate": 1.413553291172106e-06, |
|
"logits/chosen": 0.16396990418434143, |
|
"logits/rejected": -0.013539992272853851, |
|
"logps/chosen": -416.2026062011719, |
|
"logps/rejected": -393.49652099609375, |
|
"loss": 0.2875, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.2988057136535645, |
|
"rewards/margins": 1.8778635263442993, |
|
"rewards/rejected": -3.176669120788574, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.5639810426540284, |
|
"grad_norm": 3.9064056873321533, |
|
"learning_rate": 1.3847984094959843e-06, |
|
"logits/chosen": -0.3242209553718567, |
|
"logits/rejected": -0.22871044278144836, |
|
"logps/chosen": -350.4272155761719, |
|
"logps/rejected": -461.5364990234375, |
|
"loss": 0.3061, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.0482442378997803, |
|
"rewards/margins": 2.088869571685791, |
|
"rewards/rejected": -4.137113571166992, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.5687203791469195, |
|
"grad_norm": 2.855518341064453, |
|
"learning_rate": 1.3562919276863846e-06, |
|
"logits/chosen": -0.12499962747097015, |
|
"logits/rejected": 0.03998437523841858, |
|
"logps/chosen": -443.1326904296875, |
|
"logps/rejected": -450.186767578125, |
|
"loss": 0.3047, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8554315567016602, |
|
"rewards/margins": 1.7155771255493164, |
|
"rewards/rejected": -3.5710082054138184, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.5734597156398105, |
|
"grad_norm": 2.2054951190948486, |
|
"learning_rate": 1.3280358044186648e-06, |
|
"logits/chosen": 0.0808248519897461, |
|
"logits/rejected": 0.0358705073595047, |
|
"logps/chosen": -460.5960693359375, |
|
"logps/rejected": -517.7080078125, |
|
"loss": 0.2432, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.286323070526123, |
|
"rewards/margins": 2.1876108646392822, |
|
"rewards/rejected": -4.473934173583984, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.5781990521327014, |
|
"grad_norm": 4.370570182800293, |
|
"learning_rate": 1.3000319811660967e-06, |
|
"logits/chosen": -0.05356225371360779, |
|
"logits/rejected": -0.1574479043483734, |
|
"logps/chosen": -510.5133361816406, |
|
"logps/rejected": -494.0157470703125, |
|
"loss": 0.4098, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.589010238647461, |
|
"rewards/margins": 1.8569483757019043, |
|
"rewards/rejected": -3.4459586143493652, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.5829383886255926, |
|
"grad_norm": 3.086522340774536, |
|
"learning_rate": 1.2722823820664577e-06, |
|
"logits/chosen": 0.25446993112564087, |
|
"logits/rejected": 0.10102240741252899, |
|
"logps/chosen": -556.471923828125, |
|
"logps/rejected": -558.4532470703125, |
|
"loss": 0.3263, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.7523441314697266, |
|
"rewards/margins": 1.519374966621399, |
|
"rewards/rejected": -4.271718978881836, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.5876777251184833, |
|
"grad_norm": 6.325353145599365, |
|
"learning_rate": 1.2447889137898295e-06, |
|
"logits/chosen": 0.0681871697306633, |
|
"logits/rejected": 0.07994731515645981, |
|
"logps/chosen": -447.1323547363281, |
|
"logps/rejected": -446.35675048828125, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.193911075592041, |
|
"rewards/margins": 1.1045620441436768, |
|
"rewards/rejected": -3.2984731197357178, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.5924170616113744, |
|
"grad_norm": 4.334328651428223, |
|
"learning_rate": 1.2175534654075887e-06, |
|
"logits/chosen": -0.12054158747196198, |
|
"logits/rejected": -0.25014710426330566, |
|
"logps/chosen": -368.5024108886719, |
|
"logps/rejected": -418.28466796875, |
|
"loss": 0.3067, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.830749273300171, |
|
"rewards/margins": 2.00579571723938, |
|
"rewards/rejected": -3.836544990539551, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.5971563981042654, |
|
"grad_norm": 3.325434923171997, |
|
"learning_rate": 1.190577908262614e-06, |
|
"logits/chosen": 0.1261437088251114, |
|
"logits/rejected": 0.15501365065574646, |
|
"logps/chosen": -501.2939453125, |
|
"logps/rejected": -559.4841918945312, |
|
"loss": 0.3501, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.977611780166626, |
|
"rewards/margins": 1.6551153659820557, |
|
"rewards/rejected": -3.6327271461486816, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.6018957345971563, |
|
"grad_norm": 5.469727993011475, |
|
"learning_rate": 1.1638640958407e-06, |
|
"logits/chosen": 0.11842088401317596, |
|
"logits/rejected": 0.2777405381202698, |
|
"logps/chosen": -422.850341796875, |
|
"logps/rejected": -439.694580078125, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.9282774925231934, |
|
"rewards/margins": 1.2889878749847412, |
|
"rewards/rejected": -3.2172651290893555, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.6066350710900474, |
|
"grad_norm": 4.092334747314453, |
|
"learning_rate": 1.1374138636432054e-06, |
|
"logits/chosen": -0.13197553157806396, |
|
"logits/rejected": -0.16363224387168884, |
|
"logps/chosen": -526.3876953125, |
|
"logps/rejected": -595.45947265625, |
|
"loss": 0.4096, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.6760362386703491, |
|
"rewards/margins": 1.171120524406433, |
|
"rewards/rejected": -2.8471570014953613, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.6113744075829384, |
|
"grad_norm": 3.614046812057495, |
|
"learning_rate": 1.1112290290609444e-06, |
|
"logits/chosen": -0.2544490098953247, |
|
"logits/rejected": -0.02984345145523548, |
|
"logps/chosen": -474.2083740234375, |
|
"logps/rejected": -504.4893493652344, |
|
"loss": 0.3167, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.2328014373779297, |
|
"rewards/margins": 1.6542941331863403, |
|
"rewards/rejected": -3.8870954513549805, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.6161137440758293, |
|
"grad_norm": 3.6634645462036133, |
|
"learning_rate": 1.085311391249299e-06, |
|
"logits/chosen": 0.023314230144023895, |
|
"logits/rejected": 0.059964969754219055, |
|
"logps/chosen": -469.96905517578125, |
|
"logps/rejected": -568.8093872070312, |
|
"loss": 0.3357, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.085527181625366, |
|
"rewards/margins": 1.6656991243362427, |
|
"rewards/rejected": -3.7512261867523193, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.6208530805687205, |
|
"grad_norm": 3.1424379348754883, |
|
"learning_rate": 1.0596627310046165e-06, |
|
"logits/chosen": 0.31347164511680603, |
|
"logits/rejected": 0.1853667050600052, |
|
"logps/chosen": -552.0761108398438, |
|
"logps/rejected": -505.68707275390625, |
|
"loss": 0.3661, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.9287023544311523, |
|
"rewards/margins": 1.3750154972076416, |
|
"rewards/rejected": -3.303718090057373, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.6255924170616114, |
|
"grad_norm": 2.257039785385132, |
|
"learning_rate": 1.0342848106418368e-06, |
|
"logits/chosen": -0.1234850063920021, |
|
"logits/rejected": 0.1165139451622963, |
|
"logps/chosen": -371.6340637207031, |
|
"logps/rejected": -417.32159423828125, |
|
"loss": 0.2868, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.7357150316238403, |
|
"rewards/margins": 1.8563973903656006, |
|
"rewards/rejected": -3.5921125411987305, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.6303317535545023, |
|
"grad_norm": 3.4644458293914795, |
|
"learning_rate": 1.0091793738734113e-06, |
|
"logits/chosen": 0.21419385075569153, |
|
"logits/rejected": -0.015899434685707092, |
|
"logps/chosen": -417.99237060546875, |
|
"logps/rejected": -396.4125061035156, |
|
"loss": 0.336, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.6989412307739258, |
|
"rewards/margins": 1.975459098815918, |
|
"rewards/rejected": -3.6744003295898438, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.6350710900473935, |
|
"grad_norm": 3.3371543884277344, |
|
"learning_rate": 9.843481456894949e-07, |
|
"logits/chosen": 0.14167696237564087, |
|
"logits/rejected": 0.13582485914230347, |
|
"logps/chosen": -405.79290771484375, |
|
"logps/rejected": -467.8961486816406, |
|
"loss": 0.3739, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.369532823562622, |
|
"rewards/margins": 1.6152913570404053, |
|
"rewards/rejected": -3.9848241806030273, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.6398104265402842, |
|
"grad_norm": 3.4584085941314697, |
|
"learning_rate": 9.59792832239415e-07, |
|
"logits/chosen": 0.1842474341392517, |
|
"logits/rejected": 0.36914995312690735, |
|
"logps/chosen": -430.4465026855469, |
|
"logps/rejected": -491.39923095703125, |
|
"loss": 0.4111, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.37587308883667, |
|
"rewards/margins": 1.4323196411132812, |
|
"rewards/rejected": -3.808192729949951, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.6445497630331753, |
|
"grad_norm": 3.6146063804626465, |
|
"learning_rate": 9.35515120714447e-07, |
|
"logits/chosen": 0.21485333144664764, |
|
"logits/rejected": 0.3836483955383301, |
|
"logps/chosen": -385.3580017089844, |
|
"logps/rejected": -413.75958251953125, |
|
"loss": 0.3057, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.34371280670166, |
|
"rewards/margins": 1.8871243000030518, |
|
"rewards/rejected": -4.230837345123291, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.6492890995260665, |
|
"grad_norm": 2.6541919708251953, |
|
"learning_rate": 9.115166792318858e-07, |
|
"logits/chosen": -0.2523440718650818, |
|
"logits/rejected": -0.3873641788959503, |
|
"logps/chosen": -405.66363525390625, |
|
"logps/rejected": -387.85675048828125, |
|
"loss": 0.2713, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.7661632299423218, |
|
"rewards/margins": 2.105201244354248, |
|
"rewards/rejected": -3.8713645935058594, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.6540284360189572, |
|
"grad_norm": 2.7787058353424072, |
|
"learning_rate": 8.877991567204353e-07, |
|
"logits/chosen": 0.04583475738763809, |
|
"logits/rejected": 0.22186128795146942, |
|
"logps/chosen": -469.5939025878906, |
|
"logps/rejected": -490.9723815917969, |
|
"loss": 0.3186, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.797175407409668, |
|
"rewards/margins": 1.9968000650405884, |
|
"rewards/rejected": -3.793975830078125, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.6587677725118484, |
|
"grad_norm": 3.4025254249572754, |
|
"learning_rate": 8.643641828069004e-07, |
|
"logits/chosen": 0.1861867904663086, |
|
"logits/rejected": 0.26269257068634033, |
|
"logps/chosen": -388.3940734863281, |
|
"logps/rejected": -477.13372802734375, |
|
"loss": 0.3042, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.0583114624023438, |
|
"rewards/margins": 2.277298927307129, |
|
"rewards/rejected": -4.335610389709473, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.6635071090047393, |
|
"grad_norm": 2.7238893508911133, |
|
"learning_rate": 8.412133677042239e-07, |
|
"logits/chosen": 0.2202744483947754, |
|
"logits/rejected": 0.14431580901145935, |
|
"logps/chosen": -494.9244689941406, |
|
"logps/rejected": -511.6275939941406, |
|
"loss": 0.2694, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.7077330350875854, |
|
"rewards/margins": 2.26680850982666, |
|
"rewards/rejected": -3.974541425704956, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.6682464454976302, |
|
"grad_norm": 2.596195697784424, |
|
"learning_rate": 8.183483021008498e-07, |
|
"logits/chosen": 0.059778667986392975, |
|
"logits/rejected": 0.16687698662281036, |
|
"logps/chosen": -459.7418518066406, |
|
"logps/rejected": -528.6278076171875, |
|
"loss": 0.3188, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.31968092918396, |
|
"rewards/margins": 1.6766258478164673, |
|
"rewards/rejected": -3.996306896209717, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.6729857819905214, |
|
"grad_norm": 3.991448402404785, |
|
"learning_rate": 7.957705570514163e-07, |
|
"logits/chosen": 0.13453525304794312, |
|
"logits/rejected": 0.25876981019973755, |
|
"logps/chosen": -451.383056640625, |
|
"logps/rejected": -449.3511657714844, |
|
"loss": 0.4566, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.6044600009918213, |
|
"rewards/margins": 1.0652509927749634, |
|
"rewards/rejected": -2.669711112976074, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.6777251184834123, |
|
"grad_norm": 3.8690874576568604, |
|
"learning_rate": 7.734816838688247e-07, |
|
"logits/chosen": 0.13873904943466187, |
|
"logits/rejected": -0.005100712180137634, |
|
"logps/chosen": -468.6593017578125, |
|
"logps/rejected": -474.06884765625, |
|
"loss": 0.283, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.1768066883087158, |
|
"rewards/margins": 2.2671823501586914, |
|
"rewards/rejected": -3.4439892768859863, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.6824644549763033, |
|
"grad_norm": 3.6120941638946533, |
|
"learning_rate": 7.51483214017637e-07, |
|
"logits/chosen": 0.019669074565172195, |
|
"logits/rejected": 0.13328783214092255, |
|
"logps/chosen": -476.302001953125, |
|
"logps/rejected": -512.1363525390625, |
|
"loss": 0.3317, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.948580265045166, |
|
"rewards/margins": 1.781925916671753, |
|
"rewards/rejected": -3.73050594329834, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.6872037914691944, |
|
"grad_norm": 3.360703229904175, |
|
"learning_rate": 7.297766590088529e-07, |
|
"logits/chosen": 0.19215039908885956, |
|
"logits/rejected": 0.12218157947063446, |
|
"logps/chosen": -403.3554382324219, |
|
"logps/rejected": -421.522705078125, |
|
"loss": 0.3112, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5849626064300537, |
|
"rewards/margins": 1.5150251388549805, |
|
"rewards/rejected": -3.0999879837036133, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.6919431279620853, |
|
"grad_norm": 4.554477691650391, |
|
"learning_rate": 7.083635102960584e-07, |
|
"logits/chosen": -0.1721559464931488, |
|
"logits/rejected": 0.14503641426563263, |
|
"logps/chosen": -373.804443359375, |
|
"logps/rejected": -457.3600769042969, |
|
"loss": 0.3669, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.9074959754943848, |
|
"rewards/margins": 1.5698236227035522, |
|
"rewards/rejected": -3.4773197174072266, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.6966824644549763, |
|
"grad_norm": 5.0574140548706055, |
|
"learning_rate": 6.8724523917294e-07, |
|
"logits/chosen": 0.19052550196647644, |
|
"logits/rejected": 0.1522517204284668, |
|
"logps/chosen": -394.22149658203125, |
|
"logps/rejected": -469.25360107421875, |
|
"loss": 0.3164, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4775445461273193, |
|
"rewards/margins": 1.945703148841858, |
|
"rewards/rejected": -3.423247814178467, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.7014218009478674, |
|
"grad_norm": 3.6440603733062744, |
|
"learning_rate": 6.664232966721995e-07, |
|
"logits/chosen": -0.30218714475631714, |
|
"logits/rejected": -0.21477612853050232, |
|
"logps/chosen": -362.7777099609375, |
|
"logps/rejected": -383.90869140625, |
|
"loss": 0.2301, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3972433805465698, |
|
"rewards/margins": 2.2594199180603027, |
|
"rewards/rejected": -3.656663179397583, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.7061611374407581, |
|
"grad_norm": 3.930510997772217, |
|
"learning_rate": 6.458991134658487e-07, |
|
"logits/chosen": -0.056913457810878754, |
|
"logits/rejected": -0.14112192392349243, |
|
"logps/chosen": -438.1789855957031, |
|
"logps/rejected": -482.021240234375, |
|
"loss": 0.2415, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.4954298734664917, |
|
"rewards/margins": 2.081784248352051, |
|
"rewards/rejected": -3.577214002609253, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.7109004739336493, |
|
"grad_norm": 3.474404811859131, |
|
"learning_rate": 6.256740997669142e-07, |
|
"logits/chosen": 0.305749773979187, |
|
"logits/rejected": 0.372690349817276, |
|
"logps/chosen": -465.25396728515625, |
|
"logps/rejected": -520.6639404296875, |
|
"loss": 0.3113, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.961679220199585, |
|
"rewards/margins": 1.8247203826904297, |
|
"rewards/rejected": -3.7863998413085938, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.7156398104265402, |
|
"grad_norm": 4.286850929260254, |
|
"learning_rate": 6.057496452325345e-07, |
|
"logits/chosen": 0.06337082386016846, |
|
"logits/rejected": 0.03823350369930267, |
|
"logps/chosen": -510.4189453125, |
|
"logps/rejected": -534.214111328125, |
|
"loss": 0.3706, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.2660622596740723, |
|
"rewards/margins": 1.5975626707077026, |
|
"rewards/rejected": -3.8636245727539062, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.7203791469194312, |
|
"grad_norm": 7.2974042892456055, |
|
"learning_rate": 5.861271188684819e-07, |
|
"logits/chosen": -0.47425615787506104, |
|
"logits/rejected": -0.38017037510871887, |
|
"logps/chosen": -471.4033508300781, |
|
"logps/rejected": -448.49322509765625, |
|
"loss": 0.4664, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.868884801864624, |
|
"rewards/margins": 1.9620721340179443, |
|
"rewards/rejected": -3.8309569358825684, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.7251184834123223, |
|
"grad_norm": 2.614375352859497, |
|
"learning_rate": 5.668078689351008e-07, |
|
"logits/chosen": 0.08667676150798798, |
|
"logits/rejected": 0.026178985834121704, |
|
"logps/chosen": -527.921142578125, |
|
"logps/rejected": -473.9797058105469, |
|
"loss": 0.3323, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2779064178466797, |
|
"rewards/margins": 1.8057652711868286, |
|
"rewards/rejected": -3.0836715698242188, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.7298578199052133, |
|
"grad_norm": 4.204647064208984, |
|
"learning_rate": 5.477932228546574e-07, |
|
"logits/chosen": -0.001731783151626587, |
|
"logits/rejected": -0.11266829073429108, |
|
"logps/chosen": -507.29205322265625, |
|
"logps/rejected": -497.1200256347656, |
|
"loss": 0.3806, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.4385986328125, |
|
"rewards/margins": 1.3973621129989624, |
|
"rewards/rejected": -3.835960626602173, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.7345971563981042, |
|
"grad_norm": 4.324245452880859, |
|
"learning_rate": 5.290844871201483e-07, |
|
"logits/chosen": 0.09860187768936157, |
|
"logits/rejected": 0.0050681233406066895, |
|
"logps/chosen": -489.361083984375, |
|
"logps/rejected": -526.6915893554688, |
|
"loss": 0.4866, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.8216359615325928, |
|
"rewards/margins": 1.4107340574264526, |
|
"rewards/rejected": -3.232370138168335, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.7393364928909953, |
|
"grad_norm": 4.808085918426514, |
|
"learning_rate": 5.106829472055203e-07, |
|
"logits/chosen": 0.05839775502681732, |
|
"logits/rejected": -0.02708090841770172, |
|
"logps/chosen": -545.265869140625, |
|
"logps/rejected": -521.0439453125, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.1644363403320312, |
|
"rewards/margins": 1.105941653251648, |
|
"rewards/rejected": -3.2703781127929688, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.7440758293838863, |
|
"grad_norm": 4.010900020599365, |
|
"learning_rate": 4.925898674773488e-07, |
|
"logits/chosen": 0.7153588533401489, |
|
"logits/rejected": 0.9175931215286255, |
|
"logps/chosen": -495.9670104980469, |
|
"logps/rejected": -522.7073364257812, |
|
"loss": 0.4523, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1978981494903564, |
|
"rewards/margins": 1.3793647289276123, |
|
"rewards/rejected": -3.5772628784179688, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.7488151658767772, |
|
"grad_norm": 4.280209064483643, |
|
"learning_rate": 4.7480649110796693e-07, |
|
"logits/chosen": -0.2100297212600708, |
|
"logits/rejected": -0.035903140902519226, |
|
"logps/chosen": -503.14190673828125, |
|
"logps/rejected": -545.4959716796875, |
|
"loss": 0.352, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.9035875797271729, |
|
"rewards/margins": 2.3268721103668213, |
|
"rewards/rejected": -4.230460166931152, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.7535545023696684, |
|
"grad_norm": 3.580965518951416, |
|
"learning_rate": 4.5733403999004167e-07, |
|
"logits/chosen": 0.03610946238040924, |
|
"logits/rejected": 0.12372472882270813, |
|
"logps/chosen": -351.2373352050781, |
|
"logps/rejected": -424.42236328125, |
|
"loss": 0.3629, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7467509508132935, |
|
"rewards/margins": 2.5998826026916504, |
|
"rewards/rejected": -4.346633434295654, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.758293838862559, |
|
"grad_norm": 5.5822529792785645, |
|
"learning_rate": 4.401737146526219e-07, |
|
"logits/chosen": 0.11202114820480347, |
|
"logits/rejected": 0.2623312473297119, |
|
"logps/chosen": -404.4681701660156, |
|
"logps/rejected": -468.0955810546875, |
|
"loss": 0.4676, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.091884136199951, |
|
"rewards/margins": 1.5518180131912231, |
|
"rewards/rejected": -3.6437020301818848, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.7630331753554502, |
|
"grad_norm": 3.6275458335876465, |
|
"learning_rate": 4.2332669417864734e-07, |
|
"logits/chosen": 0.2248685210943222, |
|
"logits/rejected": 0.5064141750335693, |
|
"logps/chosen": -422.96673583984375, |
|
"logps/rejected": -538.043701171875, |
|
"loss": 0.343, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6720224618911743, |
|
"rewards/margins": 1.9135386943817139, |
|
"rewards/rejected": -3.5855612754821777, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.7677725118483414, |
|
"grad_norm": 2.365963935852051, |
|
"learning_rate": 4.067941361239386e-07, |
|
"logits/chosen": 0.22563423216342926, |
|
"logits/rejected": 0.27167263627052307, |
|
"logps/chosen": -459.28155517578125, |
|
"logps/rejected": -489.7861633300781, |
|
"loss": 0.2706, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.141444206237793, |
|
"rewards/margins": 2.016284465789795, |
|
"rewards/rejected": -4.157728672027588, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.772511848341232, |
|
"grad_norm": 3.179948329925537, |
|
"learning_rate": 3.905771764376554e-07, |
|
"logits/chosen": -0.04605560749769211, |
|
"logits/rejected": -0.13275614380836487, |
|
"logps/chosen": -500.1363220214844, |
|
"logps/rejected": -484.475341796875, |
|
"loss": 0.3462, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7449672222137451, |
|
"rewards/margins": 1.7927236557006836, |
|
"rewards/rejected": -3.5376906394958496, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.7772511848341233, |
|
"grad_norm": 3.8642663955688477, |
|
"learning_rate": 3.746769293842506e-07, |
|
"logits/chosen": 0.32506126165390015, |
|
"logits/rejected": 0.25315555930137634, |
|
"logps/chosen": -408.6797790527344, |
|
"logps/rejected": -459.6020202636719, |
|
"loss": 0.3154, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.6381193399429321, |
|
"rewards/margins": 2.112222671508789, |
|
"rewards/rejected": -3.7503418922424316, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.7819905213270142, |
|
"grad_norm": 1.845032811164856, |
|
"learning_rate": 3.590944874669089e-07, |
|
"logits/chosen": 0.16336679458618164, |
|
"logits/rejected": -0.06345739960670471, |
|
"logps/chosen": -456.1724853515625, |
|
"logps/rejected": -412.36376953125, |
|
"loss": 0.2595, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.056734561920166, |
|
"rewards/margins": 2.2098302841186523, |
|
"rewards/rejected": -4.26656436920166, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.7867298578199051, |
|
"grad_norm": 3.6954963207244873, |
|
"learning_rate": 3.4383092135247543e-07, |
|
"logits/chosen": -0.042060717940330505, |
|
"logits/rejected": -0.28105732798576355, |
|
"logps/chosen": -455.3412170410156, |
|
"logps/rejected": -469.8854064941406, |
|
"loss": 0.3753, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.4552671909332275, |
|
"rewards/margins": 1.3413841724395752, |
|
"rewards/rejected": -3.7966513633728027, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.7914691943127963, |
|
"grad_norm": 3.0183236598968506, |
|
"learning_rate": 3.288872797979009e-07, |
|
"logits/chosen": -0.005820997059345245, |
|
"logits/rejected": 0.14580360054969788, |
|
"logps/chosen": -479.91644287109375, |
|
"logps/rejected": -515.1129760742188, |
|
"loss": 0.285, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.6726536750793457, |
|
"rewards/margins": 2.280189275741577, |
|
"rewards/rejected": -3.952843189239502, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.7962085308056872, |
|
"grad_norm": 3.68314528465271, |
|
"learning_rate": 3.1426458957817154e-07, |
|
"logits/chosen": 0.29249462485313416, |
|
"logits/rejected": 0.28890830278396606, |
|
"logps/chosen": -384.9244384765625, |
|
"logps/rejected": -381.51416015625, |
|
"loss": 0.3259, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.2649922370910645, |
|
"rewards/margins": 1.6742699146270752, |
|
"rewards/rejected": -3.9392619132995605, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.8009478672985781, |
|
"grad_norm": 4.089728832244873, |
|
"learning_rate": 2.999638554157636e-07, |
|
"logits/chosen": 0.16874819993972778, |
|
"logits/rejected": 0.33503326773643494, |
|
"logps/chosen": -405.65863037109375, |
|
"logps/rejected": -436.0166320800781, |
|
"loss": 0.331, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2233431339263916, |
|
"rewards/margins": 1.7847846746444702, |
|
"rewards/rejected": -3.0081279277801514, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.8056872037914693, |
|
"grad_norm": 3.578709363937378, |
|
"learning_rate": 2.859860599116127e-07, |
|
"logits/chosen": 0.0348060317337513, |
|
"logits/rejected": -0.028133656829595566, |
|
"logps/chosen": -424.2684020996094, |
|
"logps/rejected": -405.78546142578125, |
|
"loss": 0.2824, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.338637113571167, |
|
"rewards/margins": 2.2470810413360596, |
|
"rewards/rejected": -3.5857181549072266, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.8104265402843602, |
|
"grad_norm": 3.0040953159332275, |
|
"learning_rate": 2.723321634775927e-07, |
|
"logits/chosen": 0.14009594917297363, |
|
"logits/rejected": 0.09075860679149628, |
|
"logps/chosen": -466.3572692871094, |
|
"logps/rejected": -451.00390625, |
|
"loss": 0.34, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8811650276184082, |
|
"rewards/margins": 1.9055883884429932, |
|
"rewards/rejected": -3.7867536544799805, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.8151658767772512, |
|
"grad_norm": 3.576838731765747, |
|
"learning_rate": 2.590031042705304e-07, |
|
"logits/chosen": 0.00659569725394249, |
|
"logits/rejected": -0.039983510971069336, |
|
"logps/chosen": -386.4693908691406, |
|
"logps/rejected": -437.7843933105469, |
|
"loss": 0.3241, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.694931983947754, |
|
"rewards/margins": 2.0972633361816406, |
|
"rewards/rejected": -3.7921950817108154, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.8199052132701423, |
|
"grad_norm": 3.0422523021698, |
|
"learning_rate": 2.459997981277423e-07, |
|
"logits/chosen": 0.3022235333919525, |
|
"logits/rejected": 0.3308006227016449, |
|
"logps/chosen": -515.8807373046875, |
|
"logps/rejected": -566.62646484375, |
|
"loss": 0.2557, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.9875129461288452, |
|
"rewards/margins": 2.3573031425476074, |
|
"rewards/rejected": -4.344816207885742, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.824644549763033, |
|
"grad_norm": 3.56526780128479, |
|
"learning_rate": 2.3332313850411238e-07, |
|
"logits/chosen": 0.15668299794197083, |
|
"logits/rejected": 0.3108135461807251, |
|
"logps/chosen": -376.9952697753906, |
|
"logps/rejected": -416.5478515625, |
|
"loss": 0.4554, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9508757591247559, |
|
"rewards/margins": 1.343916893005371, |
|
"rewards/rejected": -3.294792652130127, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.8293838862559242, |
|
"grad_norm": 4.2836503982543945, |
|
"learning_rate": 2.2097399641069662e-07, |
|
"logits/chosen": -0.1609744131565094, |
|
"logits/rejected": -0.16733995079994202, |
|
"logps/chosen": -461.4750061035156, |
|
"logps/rejected": -506.1349792480469, |
|
"loss": 0.4442, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.8771613836288452, |
|
"rewards/margins": 1.3958663940429688, |
|
"rewards/rejected": -3.2730276584625244, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.8341232227488151, |
|
"grad_norm": 5.192793369293213, |
|
"learning_rate": 2.0895322035487942e-07, |
|
"logits/chosen": -0.011422211304306984, |
|
"logits/rejected": -0.3574138581752777, |
|
"logps/chosen": -494.67852783203125, |
|
"logps/rejected": -470.2755126953125, |
|
"loss": 0.4215, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6957664489746094, |
|
"rewards/margins": 1.5437688827514648, |
|
"rewards/rejected": -3.2395355701446533, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.838862559241706, |
|
"grad_norm": 4.824533462524414, |
|
"learning_rate": 1.972616362820745e-07, |
|
"logits/chosen": 0.04909311980009079, |
|
"logits/rejected": -0.057477012276649475, |
|
"logps/chosen": -415.3355712890625, |
|
"logps/rejected": -469.35955810546875, |
|
"loss": 0.3899, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.789413332939148, |
|
"rewards/margins": 1.886869192123413, |
|
"rewards/rejected": -3.6762826442718506, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.8436018957345972, |
|
"grad_norm": 3.7289130687713623, |
|
"learning_rate": 1.8590004751896872e-07, |
|
"logits/chosen": 0.011208392679691315, |
|
"logits/rejected": 0.10715402662754059, |
|
"logps/chosen": -448.327880859375, |
|
"logps/rejected": -458.47607421875, |
|
"loss": 0.332, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5432627201080322, |
|
"rewards/margins": 1.664372444152832, |
|
"rewards/rejected": -3.207634925842285, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.8483412322274881, |
|
"grad_norm": 2.420408010482788, |
|
"learning_rate": 1.7486923471833283e-07, |
|
"logits/chosen": 1.776963472366333e-05, |
|
"logits/rejected": 0.13077270984649658, |
|
"logps/chosen": -420.689697265625, |
|
"logps/rejected": -484.0677490234375, |
|
"loss": 0.2435, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5387448072433472, |
|
"rewards/margins": 2.5836400985717773, |
|
"rewards/rejected": -4.122385025024414, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.853080568720379, |
|
"grad_norm": 4.320888042449951, |
|
"learning_rate": 1.6416995580537665e-07, |
|
"logits/chosen": -0.17437168955802917, |
|
"logits/rejected": 0.09179955720901489, |
|
"logps/chosen": -436.16131591796875, |
|
"logps/rejected": -444.27105712890625, |
|
"loss": 0.4105, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3086187839508057, |
|
"rewards/margins": 1.6180086135864258, |
|
"rewards/rejected": -2.9266276359558105, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.8578199052132702, |
|
"grad_norm": 4.555395603179932, |
|
"learning_rate": 1.5380294592567514e-07, |
|
"logits/chosen": 0.3249743580818176, |
|
"logits/rejected": -0.06773354858160019, |
|
"logps/chosen": -475.42095947265625, |
|
"logps/rejected": -426.3134765625, |
|
"loss": 0.3782, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.2762033939361572, |
|
"rewards/margins": 1.5152695178985596, |
|
"rewards/rejected": -3.791472911834717, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.8625592417061612, |
|
"grad_norm": 4.829329967498779, |
|
"learning_rate": 1.4376891739465826e-07, |
|
"logits/chosen": 0.21649743616580963, |
|
"logits/rejected": 0.0726737305521965, |
|
"logps/chosen": -464.2098388671875, |
|
"logps/rejected": -444.3758544921875, |
|
"loss": 0.5695, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -2.082812786102295, |
|
"rewards/margins": 1.1739708185195923, |
|
"rewards/rejected": -3.2567837238311768, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.867298578199052, |
|
"grad_norm": 4.030221939086914, |
|
"learning_rate": 1.3406855964866405e-07, |
|
"logits/chosen": 0.029259920120239258, |
|
"logits/rejected": 0.09799515455961227, |
|
"logps/chosen": -440.05010986328125, |
|
"logps/rejected": -463.4384765625, |
|
"loss": 0.3544, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6618742942810059, |
|
"rewards/margins": 1.9635189771652222, |
|
"rewards/rejected": -3.6253933906555176, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.8720379146919433, |
|
"grad_norm": 2.8961992263793945, |
|
"learning_rate": 1.247025391975698e-07, |
|
"logits/chosen": -0.30902552604675293, |
|
"logits/rejected": -0.13309930264949799, |
|
"logps/chosen": -462.56378173828125, |
|
"logps/rejected": -469.8658447265625, |
|
"loss": 0.3764, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.1713790893554688, |
|
"rewards/margins": 1.4321413040161133, |
|
"rewards/rejected": -3.603520631790161, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.876777251184834, |
|
"grad_norm": 3.45127010345459, |
|
"learning_rate": 1.1567149957899592e-07, |
|
"logits/chosen": 0.017819374799728394, |
|
"logits/rejected": -0.011249087750911713, |
|
"logps/chosen": -431.1867370605469, |
|
"logps/rejected": -441.91229248046875, |
|
"loss": 0.324, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7995635271072388, |
|
"rewards/margins": 1.8794021606445312, |
|
"rewards/rejected": -3.6789655685424805, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.8815165876777251, |
|
"grad_norm": 3.5824666023254395, |
|
"learning_rate": 1.0697606131408966e-07, |
|
"logits/chosen": 0.0838717445731163, |
|
"logits/rejected": 0.017135512083768845, |
|
"logps/chosen": -521.8138427734375, |
|
"logps/rejected": -532.1849365234375, |
|
"loss": 0.3537, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.107038974761963, |
|
"rewards/margins": 1.835722804069519, |
|
"rewards/rejected": -3.9427616596221924, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.8862559241706163, |
|
"grad_norm": 3.6466410160064697, |
|
"learning_rate": 9.861682186488697e-08, |
|
"logits/chosen": 0.21266713738441467, |
|
"logits/rejected": 0.2840477526187897, |
|
"logps/chosen": -412.107666015625, |
|
"logps/rejected": -420.9535827636719, |
|
"loss": 0.3189, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7778931856155396, |
|
"rewards/margins": 1.6844923496246338, |
|
"rewards/rejected": -3.462385416030884, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.890995260663507, |
|
"grad_norm": 4.38985013961792, |
|
"learning_rate": 9.059435559326258e-08, |
|
"logits/chosen": 0.14922155439853668, |
|
"logits/rejected": 0.4349655210971832, |
|
"logps/chosen": -356.972412109375, |
|
"logps/rejected": -440.1178283691406, |
|
"loss": 0.4094, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0182883739471436, |
|
"rewards/margins": 1.7074682712554932, |
|
"rewards/rejected": -3.7257564067840576, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 3.211653232574463, |
|
"learning_rate": 8.290921372146654e-08, |
|
"logits/chosen": -0.14845699071884155, |
|
"logits/rejected": -0.0703466609120369, |
|
"logps/chosen": -477.7309875488281, |
|
"logps/rejected": -578.8424682617188, |
|
"loss": 0.3564, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.0129945278167725, |
|
"rewards/margins": 1.810671091079712, |
|
"rewards/rejected": -3.8236656188964844, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.900473933649289, |
|
"grad_norm": 3.0175628662109375, |
|
"learning_rate": 7.55619242942457e-08, |
|
"logits/chosen": 0.2208501398563385, |
|
"logits/rejected": 0.3406308889389038, |
|
"logps/chosen": -396.18011474609375, |
|
"logps/rejected": -454.838134765625, |
|
"loss": 0.3354, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.469225525856018, |
|
"rewards/margins": 1.550800085067749, |
|
"rewards/rejected": -3.0200257301330566, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.90521327014218, |
|
"grad_norm": 3.1398770809173584, |
|
"learning_rate": 6.855299214256817e-08, |
|
"logits/chosen": -0.11644039303064346, |
|
"logits/rejected": -0.09911998361349106, |
|
"logps/chosen": -411.9039001464844, |
|
"logps/rejected": -435.63165283203125, |
|
"loss": 0.3259, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.821744680404663, |
|
"rewards/margins": 1.5382413864135742, |
|
"rewards/rejected": -3.3599863052368164, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.9099526066350712, |
|
"grad_norm": 3.548295497894287, |
|
"learning_rate": 6.188289884893062e-08, |
|
"logits/chosen": 0.13989268243312836, |
|
"logits/rejected": 0.13753288984298706, |
|
"logps/chosen": -420.6592712402344, |
|
"logps/rejected": -420.6832275390625, |
|
"loss": 0.3468, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8524384498596191, |
|
"rewards/margins": 2.089343309402466, |
|
"rewards/rejected": -3.941781997680664, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.914691943127962, |
|
"grad_norm": 2.794528007507324, |
|
"learning_rate": 5.5552102714271914e-08, |
|
"logits/chosen": 0.07798265665769577, |
|
"logits/rejected": 0.2044173628091812, |
|
"logps/chosen": -465.3638916015625, |
|
"logps/rejected": -485.63140869140625, |
|
"loss": 0.3254, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.057124137878418, |
|
"rewards/margins": 1.8226839303970337, |
|
"rewards/rejected": -3.879808187484741, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.919431279620853, |
|
"grad_norm": 4.69295072555542, |
|
"learning_rate": 4.9561038726483326e-08, |
|
"logits/chosen": -0.28620800375938416, |
|
"logits/rejected": -0.35853612422943115, |
|
"logps/chosen": -459.1697692871094, |
|
"logps/rejected": -483.31903076171875, |
|
"loss": 0.3833, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.9887439012527466, |
|
"rewards/margins": 1.8978276252746582, |
|
"rewards/rejected": -3.8865716457366943, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.9241706161137442, |
|
"grad_norm": 4.612025260925293, |
|
"learning_rate": 4.391011853051963e-08, |
|
"logits/chosen": 0.013378657400608063, |
|
"logits/rejected": 0.2598281502723694, |
|
"logps/chosen": -445.204345703125, |
|
"logps/rejected": -443.0850830078125, |
|
"loss": 0.414, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9544767141342163, |
|
"rewards/margins": 1.2461696863174438, |
|
"rewards/rejected": -3.20064640045166, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.9289099526066351, |
|
"grad_norm": 3.3378067016601562, |
|
"learning_rate": 3.859973040011511e-08, |
|
"logits/chosen": 0.12009446322917938, |
|
"logits/rejected": -0.013997566886246204, |
|
"logps/chosen": -415.43780517578125, |
|
"logps/rejected": -425.35455322265625, |
|
"loss": 0.283, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.4632946252822876, |
|
"rewards/margins": 2.7979512214660645, |
|
"rewards/rejected": -4.2612457275390625, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.933649289099526, |
|
"grad_norm": 2.524122953414917, |
|
"learning_rate": 3.363023921110542e-08, |
|
"logits/chosen": -0.16053307056427002, |
|
"logits/rejected": -0.2901630699634552, |
|
"logps/chosen": -484.54705810546875, |
|
"logps/rejected": -468.14947509765625, |
|
"loss": 0.3086, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6832842826843262, |
|
"rewards/margins": 1.9632474184036255, |
|
"rewards/rejected": -3.646531581878662, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.9383886255924172, |
|
"grad_norm": 5.851565361022949, |
|
"learning_rate": 2.9001986416358763e-08, |
|
"logits/chosen": -0.3602317273616791, |
|
"logits/rejected": -0.4473731815814972, |
|
"logps/chosen": -341.96038818359375, |
|
"logps/rejected": -375.1501159667969, |
|
"loss": 0.5682, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.745245337486267, |
|
"rewards/margins": 1.315721035003662, |
|
"rewards/rejected": -3.0609664916992188, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.943127962085308, |
|
"grad_norm": 4.054327011108398, |
|
"learning_rate": 2.4715290022310212e-08, |
|
"logits/chosen": 0.09333401173353195, |
|
"logits/rejected": 0.12337815761566162, |
|
"logps/chosen": -407.29718017578125, |
|
"logps/rejected": -444.52783203125, |
|
"loss": 0.3384, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.8622604608535767, |
|
"rewards/margins": 1.8048306703567505, |
|
"rewards/rejected": -3.667090892791748, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.947867298578199, |
|
"grad_norm": 5.784478187561035, |
|
"learning_rate": 2.0770444567118075e-08, |
|
"logits/chosen": 0.12872330844402313, |
|
"logits/rejected": -0.020948439836502075, |
|
"logps/chosen": -689.5375366210938, |
|
"logps/rejected": -653.1371459960938, |
|
"loss": 0.5016, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.1100049018859863, |
|
"rewards/margins": 1.1901695728302002, |
|
"rewards/rejected": -3.3001744747161865, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.95260663507109, |
|
"grad_norm": 3.3096017837524414, |
|
"learning_rate": 1.7167721100420086e-08, |
|
"logits/chosen": -0.1594451665878296, |
|
"logits/rejected": -0.20826411247253418, |
|
"logps/chosen": -327.7048034667969, |
|
"logps/rejected": -397.76702880859375, |
|
"loss": 0.2559, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.474926471710205, |
|
"rewards/margins": 2.178825855255127, |
|
"rewards/rejected": -3.653752326965332, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.957345971563981, |
|
"grad_norm": 3.7895545959472656, |
|
"learning_rate": 1.3907367164713304e-08, |
|
"logits/chosen": -0.2774580717086792, |
|
"logits/rejected": -0.025296274572610855, |
|
"logps/chosen": -454.33074951171875, |
|
"logps/rejected": -562.2850341796875, |
|
"loss": 0.3308, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.998180866241455, |
|
"rewards/margins": 1.9229161739349365, |
|
"rewards/rejected": -3.9210968017578125, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.962085308056872, |
|
"grad_norm": 5.27892541885376, |
|
"learning_rate": 1.0989606778344375e-08, |
|
"logits/chosen": 0.028818532824516296, |
|
"logits/rejected": -0.06880553811788559, |
|
"logps/chosen": -479.7122802734375, |
|
"logps/rejected": -466.845947265625, |
|
"loss": 0.3081, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.92867112159729, |
|
"rewards/margins": 2.0744426250457764, |
|
"rewards/rejected": -4.003113746643066, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.966824644549763, |
|
"grad_norm": 4.537078857421875, |
|
"learning_rate": 8.414640420116305e-09, |
|
"logits/chosen": 0.08707749843597412, |
|
"logits/rejected": -0.05305865406990051, |
|
"logps/chosen": -447.78302001953125, |
|
"logps/rejected": -430.87030029296875, |
|
"loss": 0.367, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6580579280853271, |
|
"rewards/margins": 1.6174925565719604, |
|
"rewards/rejected": -3.275550365447998, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.971563981042654, |
|
"grad_norm": 2.382775068283081, |
|
"learning_rate": 6.182645015516131e-09, |
|
"logits/chosen": 0.23202013969421387, |
|
"logits/rejected": 0.24202974140644073, |
|
"logps/chosen": -426.9042663574219, |
|
"logps/rejected": -456.701904296875, |
|
"loss": 0.2973, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9331663846969604, |
|
"rewards/margins": 1.9195029735565186, |
|
"rewards/rejected": -3.8526692390441895, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.9763033175355451, |
|
"grad_norm": 4.986690521240234, |
|
"learning_rate": 4.2937739245563216e-09, |
|
"logits/chosen": 0.08618678152561188, |
|
"logits/rejected": 0.05446825921535492, |
|
"logps/chosen": -410.23797607421875, |
|
"logps/rejected": -479.15032958984375, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.676472783088684, |
|
"rewards/margins": 1.6006869077682495, |
|
"rewards/rejected": -3.2771594524383545, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.981042654028436, |
|
"grad_norm": 4.937625408172607, |
|
"learning_rate": 2.7481569312381995e-09, |
|
"logits/chosen": 0.2503683865070343, |
|
"logits/rejected": 0.25900202989578247, |
|
"logps/chosen": -392.8948974609375, |
|
"logps/rejected": -445.6235046386719, |
|
"loss": 0.2971, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.1311068534851074, |
|
"rewards/margins": 2.716494083404541, |
|
"rewards/rejected": -4.847600936889648, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.985781990521327, |
|
"grad_norm": 3.498244524002075, |
|
"learning_rate": 1.5459002346324137e-09, |
|
"logits/chosen": -0.018450409173965454, |
|
"logits/rejected": -0.062519371509552, |
|
"logps/chosen": -406.3209228515625, |
|
"logps/rejected": -451.2742919921875, |
|
"loss": 0.3417, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.9635508060455322, |
|
"rewards/margins": 1.8704030513763428, |
|
"rewards/rejected": -3.833953857421875, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.9905213270142181, |
|
"grad_norm": 2.944807767868042, |
|
"learning_rate": 6.870864415875478e-10, |
|
"logits/chosen": -0.22453802824020386, |
|
"logits/rejected": 0.1273200809955597, |
|
"logps/chosen": -398.6549987792969, |
|
"logps/rejected": -451.1443176269531, |
|
"loss": 0.3175, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.9729655981063843, |
|
"rewards/margins": 1.8536114692687988, |
|
"rewards/rejected": -3.8265771865844727, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.9952606635071088, |
|
"grad_norm": 2.9744060039520264, |
|
"learning_rate": 1.7177456104688905e-10, |
|
"logits/chosen": -0.29849839210510254, |
|
"logits/rejected": -0.11612643301486969, |
|
"logps/chosen": -422.93353271484375, |
|
"logps/rejected": -584.3493041992188, |
|
"loss": 0.2149, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9120306968688965, |
|
"rewards/margins": 2.681218385696411, |
|
"rewards/rejected": -4.593249320983887, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.863999843597412, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 0.13755494356155396, |
|
"logits/rejected": 0.18007084727287292, |
|
"logps/chosen": -402.926513671875, |
|
"logps/rejected": -388.6621398925781, |
|
"loss": 0.5596, |
|
"rewards/accuracies": 0.6458333134651184, |
|
"rewards/chosen": -2.5218539237976074, |
|
"rewards/margins": 0.7689557671546936, |
|
"rewards/rejected": -3.2908096313476562, |
|
"step": 422 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 422, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|