llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-he_scale-rerun
/
checkpoint-467
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.9982631930527722, | |
"eval_steps": 400, | |
"global_step": 467, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.01068804275217101, | |
"grad_norm": 57.237549195872155, | |
"learning_rate": 1.0638297872340425e-07, | |
"logits/chosen": -1.0180665254592896, | |
"logits/rejected": -0.9884552955627441, | |
"logps/chosen": -0.27425095438957214, | |
"logps/rejected": -0.2716319262981415, | |
"loss": 3.1091, | |
"rewards/accuracies": 0.4312500059604645, | |
"rewards/chosen": -2.7425098419189453, | |
"rewards/margins": -0.02619057334959507, | |
"rewards/rejected": -2.7163190841674805, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.02137608550434202, | |
"grad_norm": 36.2177280707271, | |
"learning_rate": 2.127659574468085e-07, | |
"logits/chosen": -1.047877311706543, | |
"logits/rejected": -0.9804394841194153, | |
"logps/chosen": -0.2944500744342804, | |
"logps/rejected": -0.29980722069740295, | |
"loss": 3.1522, | |
"rewards/accuracies": 0.512499988079071, | |
"rewards/chosen": -2.944500684738159, | |
"rewards/margins": 0.05357087776064873, | |
"rewards/rejected": -2.9980719089508057, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.03206412825651302, | |
"grad_norm": 51.02954591523818, | |
"learning_rate": 3.1914893617021275e-07, | |
"logits/chosen": -0.9653420448303223, | |
"logits/rejected": -0.9844053983688354, | |
"logps/chosen": -0.26417964696884155, | |
"logps/rejected": -0.30082693696022034, | |
"loss": 3.2048, | |
"rewards/accuracies": 0.6000000238418579, | |
"rewards/chosen": -2.641796588897705, | |
"rewards/margins": 0.3664725720882416, | |
"rewards/rejected": -3.0082690715789795, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.04275217100868404, | |
"grad_norm": 56.89476138009963, | |
"learning_rate": 4.25531914893617e-07, | |
"logits/chosen": -0.9597972631454468, | |
"logits/rejected": -0.9341325759887695, | |
"logps/chosen": -0.27756327390670776, | |
"logps/rejected": -0.2916925251483917, | |
"loss": 3.1321, | |
"rewards/accuracies": 0.512499988079071, | |
"rewards/chosen": -2.7756330966949463, | |
"rewards/margins": 0.14129219949245453, | |
"rewards/rejected": -2.9169249534606934, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.053440213760855046, | |
"grad_norm": 56.48955746474513, | |
"learning_rate": 5.319148936170212e-07, | |
"logits/chosen": -1.001181960105896, | |
"logits/rejected": -0.9730860590934753, | |
"logps/chosen": -0.2715573310852051, | |
"logps/rejected": -0.27819815278053284, | |
"loss": 3.3596, | |
"rewards/accuracies": 0.5062500238418579, | |
"rewards/chosen": -2.7155730724334717, | |
"rewards/margins": 0.06640852242708206, | |
"rewards/rejected": -2.7819817066192627, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.06412825651302605, | |
"grad_norm": 47.66267593497189, | |
"learning_rate": 6.382978723404255e-07, | |
"logits/chosen": -1.0001966953277588, | |
"logits/rejected": -0.9549218416213989, | |
"logps/chosen": -0.2734990119934082, | |
"logps/rejected": -0.2796509861946106, | |
"loss": 2.9655, | |
"rewards/accuracies": 0.45625001192092896, | |
"rewards/chosen": -2.734990358352661, | |
"rewards/margins": 0.06151958554983139, | |
"rewards/rejected": -2.7965099811553955, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.07481629926519706, | |
"grad_norm": 57.03302592987705, | |
"learning_rate": 7.446808510638297e-07, | |
"logits/chosen": -1.0495048761367798, | |
"logits/rejected": -0.9743221998214722, | |
"logps/chosen": -0.2940281331539154, | |
"logps/rejected": -0.31984126567840576, | |
"loss": 3.0572, | |
"rewards/accuracies": 0.5249999761581421, | |
"rewards/chosen": -2.940281391143799, | |
"rewards/margins": 0.25813135504722595, | |
"rewards/rejected": -3.1984126567840576, | |
"step": 35 | |
}, | |
{ | |
"epoch": 0.08550434201736808, | |
"grad_norm": 64.29646368113443, | |
"learning_rate": 8.51063829787234e-07, | |
"logits/chosen": -1.0000861883163452, | |
"logits/rejected": -0.9559175372123718, | |
"logps/chosen": -0.28027427196502686, | |
"logps/rejected": -0.3249492049217224, | |
"loss": 3.0201, | |
"rewards/accuracies": 0.59375, | |
"rewards/chosen": -2.8027429580688477, | |
"rewards/margins": 0.4467490315437317, | |
"rewards/rejected": -3.2494919300079346, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.09619238476953908, | |
"grad_norm": 34.0521027952876, | |
"learning_rate": 9.574468085106384e-07, | |
"logits/chosen": -1.049403429031372, | |
"logits/rejected": -1.0066633224487305, | |
"logps/chosen": -0.3022717535495758, | |
"logps/rejected": -0.355845183134079, | |
"loss": 3.1061, | |
"rewards/accuracies": 0.5375000238418579, | |
"rewards/chosen": -3.0227174758911133, | |
"rewards/margins": 0.5357345342636108, | |
"rewards/rejected": -3.5584518909454346, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.10688042752171009, | |
"grad_norm": 50.184137131794785, | |
"learning_rate": 9.998741174712533e-07, | |
"logits/chosen": -1.0293877124786377, | |
"logits/rejected": -0.9806405901908875, | |
"logps/chosen": -0.3117847442626953, | |
"logps/rejected": -0.3513973653316498, | |
"loss": 3.1525, | |
"rewards/accuracies": 0.46875, | |
"rewards/chosen": -3.1178476810455322, | |
"rewards/margins": 0.39612606167793274, | |
"rewards/rejected": -3.5139732360839844, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.11756847027388109, | |
"grad_norm": 139.4899548956689, | |
"learning_rate": 9.991050648838675e-07, | |
"logits/chosen": -1.0580527782440186, | |
"logits/rejected": -1.0236852169036865, | |
"logps/chosen": -0.29338452219963074, | |
"logps/rejected": -0.36238163709640503, | |
"loss": 2.8456, | |
"rewards/accuracies": 0.6187499761581421, | |
"rewards/chosen": -2.933845043182373, | |
"rewards/margins": 0.6899713277816772, | |
"rewards/rejected": -3.6238162517547607, | |
"step": 55 | |
}, | |
{ | |
"epoch": 0.1282565130260521, | |
"grad_norm": 58.45122397836986, | |
"learning_rate": 9.97637968732563e-07, | |
"logits/chosen": -1.0895339250564575, | |
"logits/rejected": -1.0574713945388794, | |
"logps/chosen": -0.33461707830429077, | |
"logps/rejected": -0.35189467668533325, | |
"loss": 2.9738, | |
"rewards/accuracies": 0.5562499761581421, | |
"rewards/chosen": -3.3461709022521973, | |
"rewards/margins": 0.17277587950229645, | |
"rewards/rejected": -3.518946886062622, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.13894455577822312, | |
"grad_norm": 100.62827839328082, | |
"learning_rate": 9.954748808839674e-07, | |
"logits/chosen": -1.011530876159668, | |
"logits/rejected": -0.9821838140487671, | |
"logps/chosen": -0.4006083011627197, | |
"logps/rejected": -0.464979887008667, | |
"loss": 2.9379, | |
"rewards/accuracies": 0.59375, | |
"rewards/chosen": -4.0060834884643555, | |
"rewards/margins": 0.6437152624130249, | |
"rewards/rejected": -4.649798393249512, | |
"step": 65 | |
}, | |
{ | |
"epoch": 0.14963259853039412, | |
"grad_norm": 39.36526232625554, | |
"learning_rate": 9.926188266120295e-07, | |
"logits/chosen": -1.0184242725372314, | |
"logits/rejected": -0.9939621686935425, | |
"logps/chosen": -0.3619542419910431, | |
"logps/rejected": -0.4431312084197998, | |
"loss": 2.9573, | |
"rewards/accuracies": 0.5562499761581421, | |
"rewards/chosen": -3.619542360305786, | |
"rewards/margins": 0.8117697834968567, | |
"rewards/rejected": -4.43131160736084, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.16032064128256512, | |
"grad_norm": 83.11076732917083, | |
"learning_rate": 9.890738003669027e-07, | |
"logits/chosen": -0.9596433639526367, | |
"logits/rejected": -0.8910166621208191, | |
"logps/chosen": -0.3588549494743347, | |
"logps/rejected": -0.4166484773159027, | |
"loss": 2.9742, | |
"rewards/accuracies": 0.53125, | |
"rewards/chosen": -3.588549852371216, | |
"rewards/margins": 0.5779348015785217, | |
"rewards/rejected": -4.1664838790893555, | |
"step": 75 | |
}, | |
{ | |
"epoch": 0.17100868403473615, | |
"grad_norm": 52.281331982276065, | |
"learning_rate": 9.848447601883433e-07, | |
"logits/chosen": -0.9426174163818359, | |
"logits/rejected": -0.9289323687553406, | |
"logps/chosen": -0.35129761695861816, | |
"logps/rejected": -0.4580927789211273, | |
"loss": 2.9737, | |
"rewards/accuracies": 0.5625, | |
"rewards/chosen": -3.5129764080047607, | |
"rewards/margins": 1.067950963973999, | |
"rewards/rejected": -4.580927848815918, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.18169672678690715, | |
"grad_norm": 61.53493979772547, | |
"learning_rate": 9.799376207714444e-07, | |
"logits/chosen": -0.9526857137680054, | |
"logits/rejected": -0.9304324388504028, | |
"logps/chosen": -0.34235039353370667, | |
"logps/rejected": -0.40353184938430786, | |
"loss": 2.7213, | |
"rewards/accuracies": 0.6187499761581421, | |
"rewards/chosen": -3.4235033988952637, | |
"rewards/margins": 0.6118148565292358, | |
"rewards/rejected": -4.035318851470947, | |
"step": 85 | |
}, | |
{ | |
"epoch": 0.19238476953907815, | |
"grad_norm": 75.22407650978651, | |
"learning_rate": 9.743592451943998e-07, | |
"logits/chosen": -0.9911141395568848, | |
"logits/rejected": -0.9571215510368347, | |
"logps/chosen": -0.4391642212867737, | |
"logps/rejected": -0.5185960531234741, | |
"loss": 3.0403, | |
"rewards/accuracies": 0.574999988079071, | |
"rewards/chosen": -4.391642093658447, | |
"rewards/margins": 0.7943190336227417, | |
"rewards/rejected": -5.1859612464904785, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.20307281229124916, | |
"grad_norm": 46.673632090780266, | |
"learning_rate": 9.681174353198686e-07, | |
"logits/chosen": -1.079331636428833, | |
"logits/rejected": -0.996097207069397, | |
"logps/chosen": -0.4490174353122711, | |
"logps/rejected": -0.49736976623535156, | |
"loss": 2.8747, | |
"rewards/accuracies": 0.5375000238418579, | |
"rewards/chosen": -4.490174293518066, | |
"rewards/margins": 0.4835231900215149, | |
"rewards/rejected": -4.973697662353516, | |
"step": 95 | |
}, | |
{ | |
"epoch": 0.21376085504342018, | |
"grad_norm": 78.85306309497338, | |
"learning_rate": 9.612209208833646e-07, | |
"logits/chosen": -0.9557577967643738, | |
"logits/rejected": -0.9308866262435913, | |
"logps/chosen": -0.4265132546424866, | |
"logps/rejected": -0.4960516393184662, | |
"loss": 2.9809, | |
"rewards/accuracies": 0.6000000238418579, | |
"rewards/chosen": -4.265132427215576, | |
"rewards/margins": 0.6953836679458618, | |
"rewards/rejected": -4.960515975952148, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.22444889779559118, | |
"grad_norm": 80.40817210917017, | |
"learning_rate": 9.536793472839324e-07, | |
"logits/chosen": -0.9734071493148804, | |
"logits/rejected": -0.9203007817268372, | |
"logps/chosen": -0.4045742154121399, | |
"logps/rejected": -0.5108767747879028, | |
"loss": 2.9566, | |
"rewards/accuracies": 0.6499999761581421, | |
"rewards/chosen": -4.045742034912109, | |
"rewards/margins": 1.063025712966919, | |
"rewards/rejected": -5.108767509460449, | |
"step": 105 | |
}, | |
{ | |
"epoch": 0.23513694054776219, | |
"grad_norm": 55.451042957143265, | |
"learning_rate": 9.455032620941839e-07, | |
"logits/chosen": -0.9206374883651733, | |
"logits/rejected": -0.8604587316513062, | |
"logps/chosen": -0.45949387550354004, | |
"logps/rejected": -0.6004349589347839, | |
"loss": 2.8412, | |
"rewards/accuracies": 0.6625000238418579, | |
"rewards/chosen": -4.594939231872559, | |
"rewards/margins": 1.4094107151031494, | |
"rewards/rejected": -6.004349708557129, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.2458249832999332, | |
"grad_norm": 51.58223883398887, | |
"learning_rate": 9.367041003085648e-07, | |
"logits/chosen": -0.9696682691574097, | |
"logits/rejected": -0.9112384915351868, | |
"logps/chosen": -0.4893345832824707, | |
"logps/rejected": -0.5542086362838745, | |
"loss": 2.7495, | |
"rewards/accuracies": 0.643750011920929, | |
"rewards/chosen": -4.893345355987549, | |
"rewards/margins": 0.6487414240837097, | |
"rewards/rejected": -5.542087078094482, | |
"step": 115 | |
}, | |
{ | |
"epoch": 0.2565130260521042, | |
"grad_norm": 75.59919212642018, | |
"learning_rate": 9.272941683504808e-07, | |
"logits/chosen": -0.9438816905021667, | |
"logits/rejected": -0.8547528386116028, | |
"logps/chosen": -0.5028254985809326, | |
"logps/rejected": -0.7035338878631592, | |
"loss": 2.5628, | |
"rewards/accuracies": 0.731249988079071, | |
"rewards/chosen": -5.028255462646484, | |
"rewards/margins": 2.0070836544036865, | |
"rewards/rejected": -7.03533935546875, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.26720106880427524, | |
"grad_norm": 66.32992513821185, | |
"learning_rate": 9.172866268606513e-07, | |
"logits/chosen": -1.016081690788269, | |
"logits/rejected": -0.9737744331359863, | |
"logps/chosen": -0.5569332838058472, | |
"logps/rejected": -0.6537975072860718, | |
"loss": 2.4448, | |
"rewards/accuracies": 0.65625, | |
"rewards/chosen": -5.569332599639893, | |
"rewards/margins": 0.9686424136161804, | |
"rewards/rejected": -6.537975311279297, | |
"step": 125 | |
}, | |
{ | |
"epoch": 0.27788911155644624, | |
"grad_norm": 139.11732623143496, | |
"learning_rate": 9.066954722907638e-07, | |
"logits/chosen": -1.032061219215393, | |
"logits/rejected": -1.0252352952957153, | |
"logps/chosen": -0.5443070530891418, | |
"logps/rejected": -0.8193408250808716, | |
"loss": 2.4333, | |
"rewards/accuracies": 0.7437499761581421, | |
"rewards/chosen": -5.443070411682129, | |
"rewards/margins": 2.750338077545166, | |
"rewards/rejected": -8.193408012390137, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.28857715430861725, | |
"grad_norm": 107.42202232758989, | |
"learning_rate": 8.955355173281707e-07, | |
"logits/chosen": -1.0170912742614746, | |
"logits/rejected": -0.9671396017074585, | |
"logps/chosen": -0.6021947264671326, | |
"logps/rejected": -0.7191929221153259, | |
"loss": 2.517, | |
"rewards/accuracies": 0.706250011920929, | |
"rewards/chosen": -6.021947383880615, | |
"rewards/margins": 1.169981837272644, | |
"rewards/rejected": -7.191929817199707, | |
"step": 135 | |
}, | |
{ | |
"epoch": 0.29926519706078825, | |
"grad_norm": 77.05576180382866, | |
"learning_rate": 8.838223701790055e-07, | |
"logits/chosen": -1.0649584531784058, | |
"logits/rejected": -1.0430896282196045, | |
"logps/chosen": -0.6696725487709045, | |
"logps/rejected": -0.8106359243392944, | |
"loss": 2.3997, | |
"rewards/accuracies": 0.737500011920929, | |
"rewards/chosen": -6.696726322174072, | |
"rewards/margins": 1.4096347093582153, | |
"rewards/rejected": -8.106359481811523, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.30995323981295925, | |
"grad_norm": 68.63585118244188, | |
"learning_rate": 8.71572412738697e-07, | |
"logits/chosen": -0.9915879964828491, | |
"logits/rejected": -0.9645885229110718, | |
"logps/chosen": -0.6888564825057983, | |
"logps/rejected": -0.9088963270187378, | |
"loss": 2.0828, | |
"rewards/accuracies": 0.731249988079071, | |
"rewards/chosen": -6.8885650634765625, | |
"rewards/margins": 2.2003989219665527, | |
"rewards/rejected": -9.088963508605957, | |
"step": 145 | |
}, | |
{ | |
"epoch": 0.32064128256513025, | |
"grad_norm": 77.90508875376052, | |
"learning_rate": 8.588027776804058e-07, | |
"logits/chosen": -1.0322893857955933, | |
"logits/rejected": -1.0123205184936523, | |
"logps/chosen": -0.7648183107376099, | |
"logps/rejected": -0.9603475332260132, | |
"loss": 2.2673, | |
"rewards/accuracies": 0.7749999761581421, | |
"rewards/chosen": -7.6481828689575195, | |
"rewards/margins": 1.9552921056747437, | |
"rewards/rejected": -9.603475570678711, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.33132932531730125, | |
"grad_norm": 75.78147375517075, | |
"learning_rate": 8.455313244934324e-07, | |
"logits/chosen": -1.0493463277816772, | |
"logits/rejected": -1.0279868841171265, | |
"logps/chosen": -0.8422037363052368, | |
"logps/rejected": -1.0871771574020386, | |
"loss": 2.2922, | |
"rewards/accuracies": 0.706250011920929, | |
"rewards/chosen": -8.422037124633789, | |
"rewards/margins": 2.4497344493865967, | |
"rewards/rejected": -10.871770858764648, | |
"step": 155 | |
}, | |
{ | |
"epoch": 0.3420173680694723, | |
"grad_norm": 86.49849369728787, | |
"learning_rate": 8.317766145051057e-07, | |
"logits/chosen": -1.0481699705123901, | |
"logits/rejected": -1.030601143836975, | |
"logps/chosen": -0.9466081857681274, | |
"logps/rejected": -1.3202154636383057, | |
"loss": 2.3755, | |
"rewards/accuracies": 0.7437499761581421, | |
"rewards/chosen": -9.466081619262695, | |
"rewards/margins": 3.736072540283203, | |
"rewards/rejected": -13.202154159545898, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.3527054108216433, | |
"grad_norm": 66.82340849667754, | |
"learning_rate": 8.175578849210894e-07, | |
"logits/chosen": -1.0408477783203125, | |
"logits/rejected": -1.0174505710601807, | |
"logps/chosen": -0.9799006581306458, | |
"logps/rejected": -1.3342236280441284, | |
"loss": 2.1125, | |
"rewards/accuracies": 0.7562500238418579, | |
"rewards/chosen": -9.799007415771484, | |
"rewards/margins": 3.5432305335998535, | |
"rewards/rejected": -13.34223747253418, | |
"step": 165 | |
}, | |
{ | |
"epoch": 0.3633934535738143, | |
"grad_norm": 97.43908089438905, | |
"learning_rate": 8.028950219204099e-07, | |
"logits/chosen": -1.0224933624267578, | |
"logits/rejected": -1.001030683517456, | |
"logps/chosen": -0.9700697064399719, | |
"logps/rejected": -1.358564853668213, | |
"loss": 1.9793, | |
"rewards/accuracies": 0.793749988079071, | |
"rewards/chosen": -9.70069694519043, | |
"rewards/margins": 3.884951114654541, | |
"rewards/rejected": -13.585647583007812, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.3740814963259853, | |
"grad_norm": 98.62704227490674, | |
"learning_rate": 7.878085328428368e-07, | |
"logits/chosen": -1.04830002784729, | |
"logits/rejected": -1.0014127492904663, | |
"logps/chosen": -1.0846463441848755, | |
"logps/rejected": -1.3184218406677246, | |
"loss": 1.8174, | |
"rewards/accuracies": 0.737500011920929, | |
"rewards/chosen": -10.846463203430176, | |
"rewards/margins": 2.3377552032470703, | |
"rewards/rejected": -13.18421745300293, | |
"step": 175 | |
}, | |
{ | |
"epoch": 0.3847695390781563, | |
"grad_norm": 74.26153272998572, | |
"learning_rate": 7.723195175075135e-07, | |
"logits/chosen": -0.9784607887268066, | |
"logits/rejected": -0.9590786099433899, | |
"logps/chosen": -1.03909432888031, | |
"logps/rejected": -1.3960068225860596, | |
"loss": 1.8592, | |
"rewards/accuracies": 0.7437499761581421, | |
"rewards/chosen": -10.39094352722168, | |
"rewards/margins": 3.5691237449645996, | |
"rewards/rejected": -13.960065841674805, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.3954575818303273, | |
"grad_norm": 88.60468047988923, | |
"learning_rate": 7.564496387029531e-07, | |
"logits/chosen": -1.0223743915557861, | |
"logits/rejected": -0.9691470861434937, | |
"logps/chosen": -1.0873353481292725, | |
"logps/rejected": -1.4810540676116943, | |
"loss": 1.8506, | |
"rewards/accuracies": 0.831250011920929, | |
"rewards/chosen": -10.873353958129883, | |
"rewards/margins": 3.937185764312744, | |
"rewards/rejected": -14.810541152954102, | |
"step": 185 | |
}, | |
{ | |
"epoch": 0.4061456245824983, | |
"grad_norm": 79.84179637831463, | |
"learning_rate": 7.402210918896689e-07, | |
"logits/chosen": -0.9927349090576172, | |
"logits/rejected": -1.0011526346206665, | |
"logps/chosen": -1.2325414419174194, | |
"logps/rejected": -1.739311933517456, | |
"loss": 1.6742, | |
"rewards/accuracies": 0.824999988079071, | |
"rewards/chosen": -12.325414657592773, | |
"rewards/margins": 5.067704200744629, | |
"rewards/rejected": -17.393117904663086, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.4168336673346693, | |
"grad_norm": 75.55606036176057, | |
"learning_rate": 7.236565741578162e-07, | |
"logits/chosen": -0.9720694422721863, | |
"logits/rejected": -0.9535917043685913, | |
"logps/chosen": -1.2131645679473877, | |
"logps/rejected": -1.5727177858352661, | |
"loss": 1.7343, | |
"rewards/accuracies": 0.7875000238418579, | |
"rewards/chosen": -12.131647109985352, | |
"rewards/margins": 3.5955300331115723, | |
"rewards/rejected": -15.727177619934082, | |
"step": 195 | |
}, | |
{ | |
"epoch": 0.42752171008684037, | |
"grad_norm": 95.29175169321584, | |
"learning_rate": 7.067792524832603e-07, | |
"logits/chosen": -0.9580856561660767, | |
"logits/rejected": -0.9478925466537476, | |
"logps/chosen": -1.2784286737442017, | |
"logps/rejected": -1.7080621719360352, | |
"loss": 1.7675, | |
"rewards/accuracies": 0.78125, | |
"rewards/chosen": -12.784285545349121, | |
"rewards/margins": 4.296335220336914, | |
"rewards/rejected": -17.08062171936035, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.43820975283901137, | |
"grad_norm": 97.56349101288879, | |
"learning_rate": 6.896127313264642e-07, | |
"logits/chosen": -1.003482460975647, | |
"logits/rejected": -0.9547850489616394, | |
"logps/chosen": -1.410736322402954, | |
"logps/rejected": -1.8478959798812866, | |
"loss": 1.8853, | |
"rewards/accuracies": 0.8062499761581421, | |
"rewards/chosen": -14.107362747192383, | |
"rewards/margins": 4.371596336364746, | |
"rewards/rejected": -18.478958129882812, | |
"step": 205 | |
}, | |
{ | |
"epoch": 0.44889779559118237, | |
"grad_norm": 124.17700452204937, | |
"learning_rate": 6.721810196195174e-07, | |
"logits/chosen": -1.0298535823822021, | |
"logits/rejected": -1.020567774772644, | |
"logps/chosen": -1.4878171682357788, | |
"logps/rejected": -1.9283632040023804, | |
"loss": 1.7977, | |
"rewards/accuracies": 0.84375, | |
"rewards/chosen": -14.878171920776367, | |
"rewards/margins": 4.405461311340332, | |
"rewards/rejected": -19.283634185791016, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.45958583834335337, | |
"grad_norm": 122.41736903454225, | |
"learning_rate": 6.545084971874736e-07, | |
"logits/chosen": -0.9621469378471375, | |
"logits/rejected": -0.9473578333854675, | |
"logps/chosen": -1.558885097503662, | |
"logps/rejected": -2.0420405864715576, | |
"loss": 1.712, | |
"rewards/accuracies": 0.800000011920929, | |
"rewards/chosen": -15.588850021362305, | |
"rewards/margins": 4.8315558433532715, | |
"rewards/rejected": -20.420406341552734, | |
"step": 215 | |
}, | |
{ | |
"epoch": 0.47027388109552437, | |
"grad_norm": 118.87466737296252, | |
"learning_rate": 6.3661988065096e-07, | |
"logits/chosen": -1.0177868604660034, | |
"logits/rejected": -1.0038330554962158, | |
"logps/chosen": -1.6249806880950928, | |
"logps/rejected": -2.1466097831726074, | |
"loss": 1.6798, | |
"rewards/accuracies": 0.75, | |
"rewards/chosen": -16.249807357788086, | |
"rewards/margins": 5.216291904449463, | |
"rewards/rejected": -21.46609878540039, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.48096192384769537, | |
"grad_norm": 71.45686372104745, | |
"learning_rate": 6.185401888577487e-07, | |
"logits/chosen": -1.0141699314117432, | |
"logits/rejected": -0.9860795736312866, | |
"logps/chosen": -1.6077144145965576, | |
"logps/rejected": -2.097548723220825, | |
"loss": 1.5264, | |
"rewards/accuracies": 0.7875000238418579, | |
"rewards/chosen": -16.077144622802734, | |
"rewards/margins": 4.898342132568359, | |
"rewards/rejected": -20.975486755371094, | |
"step": 225 | |
}, | |
{ | |
"epoch": 0.4916499665998664, | |
"grad_norm": 96.03329426013343, | |
"learning_rate": 6.002947078916364e-07, | |
"logits/chosen": -1.1012922525405884, | |
"logits/rejected": -1.0541749000549316, | |
"logps/chosen": -1.536604881286621, | |
"logps/rejected": -1.9562132358551025, | |
"loss": 1.5597, | |
"rewards/accuracies": 0.7562500238418579, | |
"rewards/chosen": -15.366048812866211, | |
"rewards/margins": 4.196080207824707, | |
"rewards/rejected": -19.562129974365234, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.5023380093520374, | |
"grad_norm": 92.4386577422302, | |
"learning_rate": 5.819089557075688e-07, | |
"logits/chosen": -1.1283349990844727, | |
"logits/rejected": -1.1022907495498657, | |
"logps/chosen": -1.527305245399475, | |
"logps/rejected": -2.0704562664031982, | |
"loss": 1.4964, | |
"rewards/accuracies": 0.7875000238418579, | |
"rewards/chosen": -15.273053169250488, | |
"rewards/margins": 5.431510925292969, | |
"rewards/rejected": -20.70456314086914, | |
"step": 235 | |
}, | |
{ | |
"epoch": 0.5130260521042084, | |
"grad_norm": 99.65700789182705, | |
"learning_rate": 5.634086464424742e-07, | |
"logits/chosen": -1.098283290863037, | |
"logits/rejected": -1.1012353897094727, | |
"logps/chosen": -1.430646538734436, | |
"logps/rejected": -1.9294793605804443, | |
"loss": 1.5595, | |
"rewards/accuracies": 0.8125, | |
"rewards/chosen": -14.306467056274414, | |
"rewards/margins": 4.98832893371582, | |
"rewards/rejected": -19.294795989990234, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.5237140948563794, | |
"grad_norm": 129.78574414242638, | |
"learning_rate": 5.448196544517167e-07, | |
"logits/chosen": -1.1896294355392456, | |
"logits/rejected": -1.1353044509887695, | |
"logps/chosen": -1.4528030157089233, | |
"logps/rejected": -2.033853054046631, | |
"loss": 1.475, | |
"rewards/accuracies": 0.8500000238418579, | |
"rewards/chosen": -14.52802848815918, | |
"rewards/margins": 5.810500144958496, | |
"rewards/rejected": -20.33852767944336, | |
"step": 245 | |
}, | |
{ | |
"epoch": 0.5344021376085505, | |
"grad_norm": 128.20283155514042, | |
"learning_rate": 5.26167978121472e-07, | |
"logits/chosen": -1.1316919326782227, | |
"logits/rejected": -1.1171941757202148, | |
"logps/chosen": -1.538417100906372, | |
"logps/rejected": -2.1081037521362305, | |
"loss": 1.4196, | |
"rewards/accuracies": 0.8500000238418579, | |
"rewards/chosen": -15.384170532226562, | |
"rewards/margins": 5.696866512298584, | |
"rewards/rejected": -21.081039428710938, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.5450901803607214, | |
"grad_norm": 297.4344168039998, | |
"learning_rate": 5.074797035076318e-07, | |
"logits/chosen": -1.1720324754714966, | |
"logits/rejected": -1.1475986242294312, | |
"logps/chosen": -1.6590303182601929, | |
"logps/rejected": -2.150458335876465, | |
"loss": 1.6319, | |
"rewards/accuracies": 0.8374999761581421, | |
"rewards/chosen": -16.590303421020508, | |
"rewards/margins": 4.914281845092773, | |
"rewards/rejected": -21.50458335876465, | |
"step": 255 | |
}, | |
{ | |
"epoch": 0.5557782231128925, | |
"grad_norm": 110.4855476557986, | |
"learning_rate": 4.887809678520975e-07, | |
"logits/chosen": -1.145662546157837, | |
"logits/rejected": -1.1171993017196655, | |
"logps/chosen": -1.5531560182571411, | |
"logps/rejected": -2.0303704738616943, | |
"loss": 1.4324, | |
"rewards/accuracies": 0.8125, | |
"rewards/chosen": -15.531560897827148, | |
"rewards/margins": 4.7721452713012695, | |
"rewards/rejected": -20.303707122802734, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.5664662658650634, | |
"grad_norm": 83.14465015789618, | |
"learning_rate": 4.700979230274829e-07, | |
"logits/chosen": -1.1167972087860107, | |
"logits/rejected": -1.1012585163116455, | |
"logps/chosen": -1.6575731039047241, | |
"logps/rejected": -2.175945997238159, | |
"loss": 1.5468, | |
"rewards/accuracies": 0.8062499761581421, | |
"rewards/chosen": -16.575729370117188, | |
"rewards/margins": 5.183730125427246, | |
"rewards/rejected": -21.75946044921875, | |
"step": 265 | |
}, | |
{ | |
"epoch": 0.5771543086172345, | |
"grad_norm": 116.06958067335016, | |
"learning_rate": 4.514566989613559e-07, | |
"logits/chosen": -1.1125719547271729, | |
"logits/rejected": -1.0850471258163452, | |
"logps/chosen": -1.4920045137405396, | |
"logps/rejected": -2.0412135124206543, | |
"loss": 1.411, | |
"rewards/accuracies": 0.8500000238418579, | |
"rewards/chosen": -14.920045852661133, | |
"rewards/margins": 5.492091655731201, | |
"rewards/rejected": -20.41213607788086, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.5878423513694054, | |
"grad_norm": 93.41636467602738, | |
"learning_rate": 4.328833670911724e-07, | |
"logits/chosen": -1.0854105949401855, | |
"logits/rejected": -1.0501768589019775, | |
"logps/chosen": -1.501579999923706, | |
"logps/rejected": -1.966059684753418, | |
"loss": 1.6514, | |
"rewards/accuracies": 0.800000011920929, | |
"rewards/chosen": -15.015800476074219, | |
"rewards/margins": 4.644796848297119, | |
"rewards/rejected": -19.66059684753418, | |
"step": 275 | |
}, | |
{ | |
"epoch": 0.5985303941215765, | |
"grad_norm": 76.77398013161601, | |
"learning_rate": 4.144039039010124e-07, | |
"logits/chosen": -1.1719205379486084, | |
"logits/rejected": -1.148206114768982, | |
"logps/chosen": -1.5624816417694092, | |
"logps/rejected": -2.106921672821045, | |
"loss": 1.5084, | |
"rewards/accuracies": 0.8125, | |
"rewards/chosen": -15.6248140335083, | |
"rewards/margins": 5.44440221786499, | |
"rewards/rejected": -21.069217681884766, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.6092184368737475, | |
"grad_norm": 111.58387969589569, | |
"learning_rate": 3.960441545911204e-07, | |
"logits/chosen": -1.1303155422210693, | |
"logits/rejected": -1.0974434614181519, | |
"logps/chosen": -1.6112607717514038, | |
"logps/rejected": -2.212517261505127, | |
"loss": 1.3065, | |
"rewards/accuracies": 0.8687499761581421, | |
"rewards/chosen": -16.112607955932617, | |
"rewards/margins": 6.012566089630127, | |
"rewards/rejected": -22.125173568725586, | |
"step": 285 | |
}, | |
{ | |
"epoch": 0.6199064796259185, | |
"grad_norm": 101.64126956685786, | |
"learning_rate": 3.778297969310529e-07, | |
"logits/chosen": -1.1603832244873047, | |
"logits/rejected": -1.117941975593567, | |
"logps/chosen": -1.6240203380584717, | |
"logps/rejected": -2.1277661323547363, | |
"loss": 1.5293, | |
"rewards/accuracies": 0.831250011920929, | |
"rewards/chosen": -16.240203857421875, | |
"rewards/margins": 5.037457466125488, | |
"rewards/rejected": -21.277660369873047, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.6305945223780896, | |
"grad_norm": 107.06654753863799, | |
"learning_rate": 3.5978630534699865e-07, | |
"logits/chosen": -1.0859363079071045, | |
"logits/rejected": -1.0710818767547607, | |
"logps/chosen": -1.6715633869171143, | |
"logps/rejected": -2.20039701461792, | |
"loss": 1.4051, | |
"rewards/accuracies": 0.8125, | |
"rewards/chosen": -16.715633392333984, | |
"rewards/margins": 5.288336753845215, | |
"rewards/rejected": -22.003969192504883, | |
"step": 295 | |
}, | |
{ | |
"epoch": 0.6412825651302605, | |
"grad_norm": 85.09256280069626, | |
"learning_rate": 3.4193891529348795e-07, | |
"logits/chosen": -1.015700340270996, | |
"logits/rejected": -0.9886563420295715, | |
"logps/chosen": -1.7295385599136353, | |
"logps/rejected": -2.197303056716919, | |
"loss": 1.7183, | |
"rewards/accuracies": 0.768750011920929, | |
"rewards/chosen": -17.295387268066406, | |
"rewards/margins": 4.677645206451416, | |
"rewards/rejected": -21.973031997680664, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.6519706078824316, | |
"grad_norm": 98.45045929238997, | |
"learning_rate": 3.243125879593286e-07, | |
"logits/chosen": -1.1253305673599243, | |
"logits/rejected": -1.078313946723938, | |
"logps/chosen": -1.716840386390686, | |
"logps/rejected": -2.1928813457489014, | |
"loss": 1.4901, | |
"rewards/accuracies": 0.78125, | |
"rewards/chosen": -17.16840171813965, | |
"rewards/margins": 4.760410785675049, | |
"rewards/rejected": -21.928813934326172, | |
"step": 305 | |
}, | |
{ | |
"epoch": 0.6626586506346025, | |
"grad_norm": 141.52088188995467, | |
"learning_rate": 3.069319753571269e-07, | |
"logits/chosen": -1.1675808429718018, | |
"logits/rejected": -1.1469465494155884, | |
"logps/chosen": -1.7660911083221436, | |
"logps/rejected": -2.279519557952881, | |
"loss": 1.6871, | |
"rewards/accuracies": 0.8125, | |
"rewards/chosen": -17.660913467407227, | |
"rewards/margins": 5.134285926818848, | |
"rewards/rejected": -22.795196533203125, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.6733466933867736, | |
"grad_norm": 101.86538699204806, | |
"learning_rate": 2.898213858452173e-07, | |
"logits/chosen": -1.153141736984253, | |
"logits/rejected": -1.097063660621643, | |
"logps/chosen": -1.705733299255371, | |
"logps/rejected": -2.247840166091919, | |
"loss": 1.5097, | |
"rewards/accuracies": 0.84375, | |
"rewards/chosen": -17.05733299255371, | |
"rewards/margins": 5.421066761016846, | |
"rewards/rejected": -22.47840118408203, | |
"step": 315 | |
}, | |
{ | |
"epoch": 0.6840347361389446, | |
"grad_norm": 122.36169835495791, | |
"learning_rate": 2.730047501302266e-07, | |
"logits/chosen": -1.136850357055664, | |
"logits/rejected": -1.1315498352050781, | |
"logps/chosen": -1.7248958349227905, | |
"logps/rejected": -2.382091760635376, | |
"loss": 1.446, | |
"rewards/accuracies": 0.862500011920929, | |
"rewards/chosen": -17.248958587646484, | |
"rewards/margins": 6.571959018707275, | |
"rewards/rejected": -23.8209171295166, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.6947227788911156, | |
"grad_norm": 100.7599593802445, | |
"learning_rate": 2.5650558779781635e-07, | |
"logits/chosen": -1.158361792564392, | |
"logits/rejected": -1.1068694591522217, | |
"logps/chosen": -1.8045142889022827, | |
"logps/rejected": -2.512817859649658, | |
"loss": 1.4403, | |
"rewards/accuracies": 0.824999988079071, | |
"rewards/chosen": -18.04514503479004, | |
"rewards/margins": 7.083035469055176, | |
"rewards/rejected": -25.128177642822266, | |
"step": 325 | |
}, | |
{ | |
"epoch": 0.7054108216432866, | |
"grad_norm": 82.7307588668697, | |
"learning_rate": 2.403469744184154e-07, | |
"logits/chosen": -1.0738600492477417, | |
"logits/rejected": -1.030057430267334, | |
"logps/chosen": -1.734301209449768, | |
"logps/rejected": -2.244229793548584, | |
"loss": 1.4411, | |
"rewards/accuracies": 0.824999988079071, | |
"rewards/chosen": -17.3430118560791, | |
"rewards/margins": 5.099286079406738, | |
"rewards/rejected": -22.442298889160156, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.7160988643954576, | |
"grad_norm": 111.19834108528815, | |
"learning_rate": 2.2455150927394878e-07, | |
"logits/chosen": -1.1105704307556152, | |
"logits/rejected": -1.092313289642334, | |
"logps/chosen": -1.7023674249649048, | |
"logps/rejected": -2.2848927974700928, | |
"loss": 1.3002, | |
"rewards/accuracies": 0.824999988079071, | |
"rewards/chosen": -17.023672103881836, | |
"rewards/margins": 5.82525634765625, | |
"rewards/rejected": -22.84893226623535, | |
"step": 335 | |
}, | |
{ | |
"epoch": 0.7267869071476286, | |
"grad_norm": 124.14973601872444, | |
"learning_rate": 2.0914128375069722e-07, | |
"logits/chosen": -1.1307401657104492, | |
"logits/rejected": -1.0960733890533447, | |
"logps/chosen": -1.639500617980957, | |
"logps/rejected": -2.2025198936462402, | |
"loss": 1.4763, | |
"rewards/accuracies": 0.8187500238418579, | |
"rewards/chosen": -16.395008087158203, | |
"rewards/margins": 5.630189895629883, | |
"rewards/rejected": -22.025196075439453, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.7374749498997996, | |
"grad_norm": 89.15665757381706, | |
"learning_rate": 1.9413785044249676e-07, | |
"logits/chosen": -1.1599509716033936, | |
"logits/rejected": -1.135851502418518, | |
"logps/chosen": -1.72158682346344, | |
"logps/rejected": -2.364271402359009, | |
"loss": 1.5031, | |
"rewards/accuracies": 0.875, | |
"rewards/chosen": -17.215869903564453, | |
"rewards/margins": 6.426844596862793, | |
"rewards/rejected": -23.642711639404297, | |
"step": 345 | |
}, | |
{ | |
"epoch": 0.7481629926519706, | |
"grad_norm": 110.91770853185307, | |
"learning_rate": 1.7956219300748792e-07, | |
"logits/chosen": -1.1471744775772095, | |
"logits/rejected": -1.1494718790054321, | |
"logps/chosen": -1.5995361804962158, | |
"logps/rejected": -2.1568686962127686, | |
"loss": 1.4483, | |
"rewards/accuracies": 0.8500000238418579, | |
"rewards/chosen": -15.995361328125, | |
"rewards/margins": 5.57332706451416, | |
"rewards/rejected": -21.568689346313477, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.7588510354041417, | |
"grad_norm": 92.35558706588404, | |
"learning_rate": 1.6543469682057104e-07, | |
"logits/chosen": -1.0737619400024414, | |
"logits/rejected": -1.0871598720550537, | |
"logps/chosen": -1.583603858947754, | |
"logps/rejected": -2.1480062007904053, | |
"loss": 1.2137, | |
"rewards/accuracies": 0.8687499761581421, | |
"rewards/chosen": -15.836038589477539, | |
"rewards/margins": 5.6440229415893555, | |
"rewards/rejected": -21.480064392089844, | |
"step": 355 | |
}, | |
{ | |
"epoch": 0.7695390781563126, | |
"grad_norm": 97.35140511945166, | |
"learning_rate": 1.5177512046261666e-07, | |
"logits/chosen": -1.1281821727752686, | |
"logits/rejected": -1.1263208389282227, | |
"logps/chosen": -1.5962927341461182, | |
"logps/rejected": -2.2565903663635254, | |
"loss": 1.4883, | |
"rewards/accuracies": 0.8187500238418579, | |
"rewards/chosen": -15.962926864624023, | |
"rewards/margins": 6.602975368499756, | |
"rewards/rejected": -22.565902709960938, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.7802271209084837, | |
"grad_norm": 97.26488316442018, | |
"learning_rate": 1.3860256808630427e-07, | |
"logits/chosen": -1.1745531558990479, | |
"logits/rejected": -1.1077674627304077, | |
"logps/chosen": -1.6627562046051025, | |
"logps/rejected": -2.321105480194092, | |
"loss": 1.4521, | |
"rewards/accuracies": 0.8125, | |
"rewards/chosen": -16.627561569213867, | |
"rewards/margins": 6.583495140075684, | |
"rewards/rejected": -23.211057662963867, | |
"step": 365 | |
}, | |
{ | |
"epoch": 0.7909151636606546, | |
"grad_norm": 112.70224926269489, | |
"learning_rate": 1.2593546269723647e-07, | |
"logits/chosen": -1.0878835916519165, | |
"logits/rejected": -1.075674295425415, | |
"logps/chosen": -1.6165920495986938, | |
"logps/rejected": -2.117642402648926, | |
"loss": 1.4406, | |
"rewards/accuracies": 0.856249988079071, | |
"rewards/chosen": -16.165922164916992, | |
"rewards/margins": 5.010504245758057, | |
"rewards/rejected": -21.17642593383789, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.8016032064128257, | |
"grad_norm": 116.18618106964092, | |
"learning_rate": 1.1379152038770029e-07, | |
"logits/chosen": -1.1332778930664062, | |
"logits/rejected": -1.1369507312774658, | |
"logps/chosen": -1.7694313526153564, | |
"logps/rejected": -2.3713538646698, | |
"loss": 1.4837, | |
"rewards/accuracies": 0.8500000238418579, | |
"rewards/chosen": -17.694313049316406, | |
"rewards/margins": 6.019228935241699, | |
"rewards/rejected": -23.71354103088379, | |
"step": 375 | |
}, | |
{ | |
"epoch": 0.8122912491649966, | |
"grad_norm": 133.07010667525282, | |
"learning_rate": 1.0218772555910954e-07, | |
"logits/chosen": -1.1531364917755127, | |
"logits/rejected": -1.1320288181304932, | |
"logps/chosen": -1.6155163049697876, | |
"logps/rejected": -2.160113573074341, | |
"loss": 1.5728, | |
"rewards/accuracies": 0.8500000238418579, | |
"rewards/chosen": -16.155162811279297, | |
"rewards/margins": 5.4459710121154785, | |
"rewards/rejected": -21.60113525390625, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.8229792919171677, | |
"grad_norm": 92.80555931457836, | |
"learning_rate": 9.114030716778432e-08, | |
"logits/chosen": -1.14119553565979, | |
"logits/rejected": -1.1190695762634277, | |
"logps/chosen": -1.6841446161270142, | |
"logps/rejected": -2.4013619422912598, | |
"loss": 1.2863, | |
"rewards/accuracies": 0.8687499761581421, | |
"rewards/chosen": -16.841445922851562, | |
"rewards/margins": 7.172172546386719, | |
"rewards/rejected": -24.013620376586914, | |
"step": 385 | |
}, | |
{ | |
"epoch": 0.8336673346693386, | |
"grad_norm": 106.02842713910356, | |
"learning_rate": 8.066471602728803e-08, | |
"logits/chosen": -1.164880633354187, | |
"logits/rejected": -1.1471444368362427, | |
"logps/chosen": -1.7512538433074951, | |
"logps/rejected": -2.3633463382720947, | |
"loss": 1.4838, | |
"rewards/accuracies": 0.8374999761581421, | |
"rewards/chosen": -17.512537002563477, | |
"rewards/margins": 6.120924949645996, | |
"rewards/rejected": -23.633460998535156, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.8443553774215097, | |
"grad_norm": 91.21482574420814, | |
"learning_rate": 7.077560319906694e-08, | |
"logits/chosen": -1.158891201019287, | |
"logits/rejected": -1.1362249851226807, | |
"logps/chosen": -1.6644847393035889, | |
"logps/rejected": -2.2324166297912598, | |
"loss": 1.3638, | |
"rewards/accuracies": 0.800000011920929, | |
"rewards/chosen": -16.644847869873047, | |
"rewards/margins": 5.679316997528076, | |
"rewards/rejected": -22.32416534423828, | |
"step": 395 | |
}, | |
{ | |
"epoch": 0.8550434201736807, | |
"grad_norm": 67.57151749023619, | |
"learning_rate": 6.148679950161672e-08, | |
"logits/chosen": -1.1628615856170654, | |
"logits/rejected": -1.1462781429290771, | |
"logps/chosen": -1.6841766834259033, | |
"logps/rejected": -2.238058567047119, | |
"loss": 1.2555, | |
"rewards/accuracies": 0.8187500238418579, | |
"rewards/chosen": -16.841764450073242, | |
"rewards/margins": 5.538818359375, | |
"rewards/rejected": -22.380582809448242, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.8550434201736807, | |
"eval_logits/chosen": -1.3520146608352661, | |
"eval_logits/rejected": -1.3613466024398804, | |
"eval_logps/chosen": -1.698158621788025, | |
"eval_logps/rejected": -2.272404670715332, | |
"eval_loss": 1.3605413436889648, | |
"eval_rewards/accuracies": 0.8455284833908081, | |
"eval_rewards/chosen": -16.981586456298828, | |
"eval_rewards/margins": 5.742460250854492, | |
"eval_rewards/rejected": -22.724044799804688, | |
"eval_runtime": 96.7859, | |
"eval_samples_per_second": 20.261, | |
"eval_steps_per_second": 1.271, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.8657314629258517, | |
"grad_norm": 113.07658741149837, | |
"learning_rate": 5.2811296166831666e-08, | |
"logits/chosen": -1.1250704526901245, | |
"logits/rejected": -1.142858624458313, | |
"logps/chosen": -1.7644565105438232, | |
"logps/rejected": -2.3112316131591797, | |
"loss": 1.3959, | |
"rewards/accuracies": 0.856249988079071, | |
"rewards/chosen": -17.64456558227539, | |
"rewards/margins": 5.467750549316406, | |
"rewards/rejected": -23.112316131591797, | |
"step": 405 | |
}, | |
{ | |
"epoch": 0.8764195056780227, | |
"grad_norm": 135.58367842014283, | |
"learning_rate": 4.4761226670592066e-08, | |
"logits/chosen": -1.148863434791565, | |
"logits/rejected": -1.1359000205993652, | |
"logps/chosen": -1.7182337045669556, | |
"logps/rejected": -2.268091917037964, | |
"loss": 1.5344, | |
"rewards/accuracies": 0.824999988079071, | |
"rewards/chosen": -17.182336807250977, | |
"rewards/margins": 5.4985833168029785, | |
"rewards/rejected": -22.680919647216797, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.8871075484301937, | |
"grad_norm": 114.17576942626454, | |
"learning_rate": 3.734784976300165e-08, | |
"logits/chosen": -1.1382702589035034, | |
"logits/rejected": -1.0858891010284424, | |
"logps/chosen": -1.606128454208374, | |
"logps/rejected": -2.25667142868042, | |
"loss": 1.6467, | |
"rewards/accuracies": 0.8500000238418579, | |
"rewards/chosen": -16.061288833618164, | |
"rewards/margins": 6.505424499511719, | |
"rewards/rejected": -22.56671142578125, | |
"step": 415 | |
}, | |
{ | |
"epoch": 0.8977955911823647, | |
"grad_norm": 101.01008878090249, | |
"learning_rate": 3.058153372200695e-08, | |
"logits/chosen": -1.1730471849441528, | |
"logits/rejected": -1.1232213973999023, | |
"logps/chosen": -1.5842628479003906, | |
"logps/rejected": -2.213921308517456, | |
"loss": 1.3771, | |
"rewards/accuracies": 0.862500011920929, | |
"rewards/chosen": -15.842630386352539, | |
"rewards/margins": 6.296584129333496, | |
"rewards/rejected": -22.13921356201172, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.9084836339345357, | |
"grad_norm": 111.86270544120462, | |
"learning_rate": 2.4471741852423233e-08, | |
"logits/chosen": -1.1721917390823364, | |
"logits/rejected": -1.1620614528656006, | |
"logps/chosen": -1.77499258518219, | |
"logps/rejected": -2.305689573287964, | |
"loss": 1.493, | |
"rewards/accuracies": 0.831250011920929, | |
"rewards/chosen": -17.74992561340332, | |
"rewards/margins": 5.306972980499268, | |
"rewards/rejected": -23.05689811706543, | |
"step": 425 | |
}, | |
{ | |
"epoch": 0.9191716766867067, | |
"grad_norm": 86.63448539967071, | |
"learning_rate": 1.9027019250647036e-08, | |
"logits/chosen": -1.1510651111602783, | |
"logits/rejected": -1.1352717876434326, | |
"logps/chosen": -1.7862894535064697, | |
"logps/rejected": -2.381641149520874, | |
"loss": 1.411, | |
"rewards/accuracies": 0.856249988079071, | |
"rewards/chosen": -17.862895965576172, | |
"rewards/margins": 5.953517436981201, | |
"rewards/rejected": -23.816410064697266, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.9298597194388778, | |
"grad_norm": 102.66949123740247, | |
"learning_rate": 1.4254980853566246e-08, | |
"logits/chosen": -1.1117022037506104, | |
"logits/rejected": -1.0704118013381958, | |
"logps/chosen": -1.6134551763534546, | |
"logps/rejected": -2.211256742477417, | |
"loss": 1.3969, | |
"rewards/accuracies": 0.8687499761581421, | |
"rewards/chosen": -16.134552001953125, | |
"rewards/margins": 5.9780168533325195, | |
"rewards/rejected": -22.112567901611328, | |
"step": 435 | |
}, | |
{ | |
"epoch": 0.9405477621910487, | |
"grad_norm": 112.7340895477117, | |
"learning_rate": 1.016230078838226e-08, | |
"logits/chosen": -1.1366580724716187, | |
"logits/rejected": -1.0769071578979492, | |
"logps/chosen": -1.7496669292449951, | |
"logps/rejected": -2.3226873874664307, | |
"loss": 1.3153, | |
"rewards/accuracies": 0.824999988079071, | |
"rewards/chosen": -17.49666976928711, | |
"rewards/margins": 5.7302045822143555, | |
"rewards/rejected": -23.226871490478516, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.9512358049432198, | |
"grad_norm": 92.65086353146947, | |
"learning_rate": 6.754703038239329e-09, | |
"logits/chosen": -1.0847865343093872, | |
"logits/rejected": -1.0684945583343506, | |
"logps/chosen": -1.726458191871643, | |
"logps/rejected": -2.406322956085205, | |
"loss": 1.2646, | |
"rewards/accuracies": 0.862500011920929, | |
"rewards/chosen": -17.26458168029785, | |
"rewards/margins": 6.798647403717041, | |
"rewards/rejected": -24.063228607177734, | |
"step": 445 | |
}, | |
{ | |
"epoch": 0.9619238476953907, | |
"grad_norm": 81.82402808973579, | |
"learning_rate": 4.036953436716895e-09, | |
"logits/chosen": -1.1987271308898926, | |
"logits/rejected": -1.1772375106811523, | |
"logps/chosen": -1.6759332418441772, | |
"logps/rejected": -2.2297987937927246, | |
"loss": 1.4218, | |
"rewards/accuracies": 0.8187500238418579, | |
"rewards/chosen": -16.759334564208984, | |
"rewards/margins": 5.5386552810668945, | |
"rewards/rejected": -22.297988891601562, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.9726118904475618, | |
"grad_norm": 114.73913428926, | |
"learning_rate": 2.0128530023804656e-09, | |
"logits/chosen": -1.1624139547348022, | |
"logits/rejected": -1.1274266242980957, | |
"logps/chosen": -1.695810317993164, | |
"logps/rejected": -2.3724701404571533, | |
"loss": 1.1616, | |
"rewards/accuracies": 0.918749988079071, | |
"rewards/chosen": -16.958105087280273, | |
"rewards/margins": 6.766595363616943, | |
"rewards/rejected": -23.724700927734375, | |
"step": 455 | |
}, | |
{ | |
"epoch": 0.9832999331997327, | |
"grad_norm": 105.12442076336163, | |
"learning_rate": 6.852326227130833e-10, | |
"logits/chosen": -1.1637624502182007, | |
"logits/rejected": -1.1525938510894775, | |
"logps/chosen": -1.7872707843780518, | |
"logps/rejected": -2.419015407562256, | |
"loss": 1.334, | |
"rewards/accuracies": 0.831250011920929, | |
"rewards/chosen": -17.87270736694336, | |
"rewards/margins": 6.317442893981934, | |
"rewards/rejected": -24.19015121459961, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.9939879759519038, | |
"grad_norm": 103.69467200755768, | |
"learning_rate": 5.594909486328348e-11, | |
"logits/chosen": -1.1328219175338745, | |
"logits/rejected": -1.139512300491333, | |
"logps/chosen": -1.773741364479065, | |
"logps/rejected": -2.4201297760009766, | |
"loss": 1.4736, | |
"rewards/accuracies": 0.8374999761581421, | |
"rewards/chosen": -17.737415313720703, | |
"rewards/margins": 6.4638848304748535, | |
"rewards/rejected": -24.201297760009766, | |
"step": 465 | |
} | |
], | |
"logging_steps": 5, | |
"max_steps": 467, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 1, | |
"save_steps": 1000000, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 0.0, | |
"train_batch_size": 2, | |
"trial_name": null, | |
"trial_params": null | |
} | |