|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982631930527722, |
|
"eval_steps": 400, |
|
"global_step": 467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01068804275217101, |
|
"grad_norm": 9.442932838948966, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -1.0071109533309937, |
|
"logits/rejected": -0.9781900644302368, |
|
"logps/chosen": -0.2738580107688904, |
|
"logps/rejected": -0.27158379554748535, |
|
"loss": 1.0523, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.5477160215377808, |
|
"rewards/margins": -0.004548341501504183, |
|
"rewards/rejected": -0.5431675910949707, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02137608550434202, |
|
"grad_norm": 6.34423728622988, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -1.0404982566833496, |
|
"logits/rejected": -0.9738548398017883, |
|
"logps/chosen": -0.2942856252193451, |
|
"logps/rejected": -0.2995370030403137, |
|
"loss": 1.0442, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.5885712504386902, |
|
"rewards/margins": 0.010502670891582966, |
|
"rewards/rejected": -0.5990740060806274, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03206412825651302, |
|
"grad_norm": 6.854457761517512, |
|
"learning_rate": 6.382978723404255e-07, |
|
"logits/chosen": -0.9717105031013489, |
|
"logits/rejected": -0.9914683103561401, |
|
"logps/chosen": -0.2636018991470337, |
|
"logps/rejected": -0.3009588122367859, |
|
"loss": 1.0229, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5272037982940674, |
|
"rewards/margins": 0.07471387088298798, |
|
"rewards/rejected": -0.6019176244735718, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04275217100868404, |
|
"grad_norm": 16.17238672181369, |
|
"learning_rate": 8.51063829787234e-07, |
|
"logits/chosen": -0.9552351236343384, |
|
"logits/rejected": -0.9299653768539429, |
|
"logps/chosen": -0.27658405900001526, |
|
"logps/rejected": -0.2946491837501526, |
|
"loss": 1.0348, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.5531681180000305, |
|
"rewards/margins": 0.03613026812672615, |
|
"rewards/rejected": -0.5892983675003052, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.053440213760855046, |
|
"grad_norm": 7.914459513231275, |
|
"learning_rate": 1.0638297872340424e-06, |
|
"logits/chosen": -1.0123283863067627, |
|
"logits/rejected": -0.9839458465576172, |
|
"logps/chosen": -0.2764621078968048, |
|
"logps/rejected": -0.29262328147888184, |
|
"loss": 1.0216, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.5529242157936096, |
|
"rewards/margins": 0.03232245892286301, |
|
"rewards/rejected": -0.5852465629577637, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06412825651302605, |
|
"grad_norm": 13.510536818444182, |
|
"learning_rate": 1.276595744680851e-06, |
|
"logits/chosen": -0.9960908889770508, |
|
"logits/rejected": -0.9520798921585083, |
|
"logps/chosen": -0.3060453534126282, |
|
"logps/rejected": -0.3202216625213623, |
|
"loss": 1.0213, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.6120907068252563, |
|
"rewards/margins": 0.028352651745080948, |
|
"rewards/rejected": -0.6404433250427246, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07481629926519706, |
|
"grad_norm": 10.603480288342643, |
|
"learning_rate": 1.4893617021276594e-06, |
|
"logits/chosen": -1.0775905847549438, |
|
"logits/rejected": -1.0043548345565796, |
|
"logps/chosen": -0.33030545711517334, |
|
"logps/rejected": -0.3744826912879944, |
|
"loss": 1.0195, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.6606109142303467, |
|
"rewards/margins": 0.08835448324680328, |
|
"rewards/rejected": -0.7489653825759888, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08550434201736808, |
|
"grad_norm": 14.893194407448227, |
|
"learning_rate": 1.702127659574468e-06, |
|
"logits/chosen": -1.0553807020187378, |
|
"logits/rejected": -1.0140490531921387, |
|
"logps/chosen": -0.3645663559436798, |
|
"logps/rejected": -0.461661159992218, |
|
"loss": 1.0284, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.7291327118873596, |
|
"rewards/margins": 0.19418945908546448, |
|
"rewards/rejected": -0.923322319984436, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09619238476953908, |
|
"grad_norm": 7.362675910290458, |
|
"learning_rate": 1.9148936170212767e-06, |
|
"logits/chosen": -1.1070150136947632, |
|
"logits/rejected": -1.0679465532302856, |
|
"logps/chosen": -0.4404965341091156, |
|
"logps/rejected": -0.5644907355308533, |
|
"loss": 1.0179, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.8809930682182312, |
|
"rewards/margins": 0.24798834323883057, |
|
"rewards/rejected": -1.1289814710617065, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10688042752171009, |
|
"grad_norm": 16.23414874505975, |
|
"learning_rate": 1.9997482349425066e-06, |
|
"logits/chosen": -1.0770556926727295, |
|
"logits/rejected": -1.0299774408340454, |
|
"logps/chosen": -0.3946690261363983, |
|
"logps/rejected": -0.47187358140945435, |
|
"loss": 1.0123, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.7893380522727966, |
|
"rewards/margins": 0.15440911054611206, |
|
"rewards/rejected": -0.9437471628189087, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11756847027388109, |
|
"grad_norm": 11.88283791262975, |
|
"learning_rate": 1.998210129767735e-06, |
|
"logits/chosen": -1.0645383596420288, |
|
"logits/rejected": -1.035369873046875, |
|
"logps/chosen": -0.3778243362903595, |
|
"logps/rejected": -0.48207464814186096, |
|
"loss": 0.9951, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.755648672580719, |
|
"rewards/margins": 0.2085006982088089, |
|
"rewards/rejected": -0.9641492962837219, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1282565130260521, |
|
"grad_norm": 10.444389026599103, |
|
"learning_rate": 1.995275937465126e-06, |
|
"logits/chosen": -1.082425594329834, |
|
"logits/rejected": -1.0538678169250488, |
|
"logps/chosen": -0.4237767159938812, |
|
"logps/rejected": -0.4713103175163269, |
|
"loss": 0.9836, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.8475534319877625, |
|
"rewards/margins": 0.0950673446059227, |
|
"rewards/rejected": -0.9426206350326538, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13894455577822312, |
|
"grad_norm": 12.992830889875604, |
|
"learning_rate": 1.9909497617679347e-06, |
|
"logits/chosen": -0.9931782484054565, |
|
"logits/rejected": -0.9680334329605103, |
|
"logps/chosen": -0.5701107382774353, |
|
"logps/rejected": -0.7114989757537842, |
|
"loss": 0.9774, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.1402214765548706, |
|
"rewards/margins": 0.2827766239643097, |
|
"rewards/rejected": -1.4229979515075684, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14963259853039412, |
|
"grad_norm": 8.908123494624329, |
|
"learning_rate": 1.985237653224059e-06, |
|
"logits/chosen": -0.9891507029533386, |
|
"logits/rejected": -0.9734717607498169, |
|
"logps/chosen": -0.5873534679412842, |
|
"logps/rejected": -0.7440844774246216, |
|
"loss": 0.9571, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1747069358825684, |
|
"rewards/margins": 0.3134620785713196, |
|
"rewards/rejected": -1.4881689548492432, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16032064128256512, |
|
"grad_norm": 15.06224561163384, |
|
"learning_rate": 1.9781476007338054e-06, |
|
"logits/chosen": -0.9478601217269897, |
|
"logits/rejected": -0.8844977617263794, |
|
"logps/chosen": -0.6380752921104431, |
|
"logps/rejected": -0.7878230810165405, |
|
"loss": 0.9386, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.2761505842208862, |
|
"rewards/margins": 0.29949551820755005, |
|
"rewards/rejected": -1.575646162033081, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17100868403473615, |
|
"grad_norm": 10.129109213694903, |
|
"learning_rate": 1.9696895203766866e-06, |
|
"logits/chosen": -0.9139761924743652, |
|
"logits/rejected": -0.9103153944015503, |
|
"logps/chosen": -0.7025324702262878, |
|
"logps/rejected": -0.9276626706123352, |
|
"loss": 0.8866, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.4050649404525757, |
|
"rewards/margins": 0.45026034116744995, |
|
"rewards/rejected": -1.8553253412246704, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18169672678690715, |
|
"grad_norm": 16.035849628874075, |
|
"learning_rate": 1.9598752415428888e-06, |
|
"logits/chosen": -0.9445829391479492, |
|
"logits/rejected": -0.9311642646789551, |
|
"logps/chosen": -0.8271282315254211, |
|
"logps/rejected": -1.0663609504699707, |
|
"loss": 0.8879, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.6542564630508423, |
|
"rewards/margins": 0.4784657061100006, |
|
"rewards/rejected": -2.1327219009399414, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19238476953907815, |
|
"grad_norm": 15.552471664159093, |
|
"learning_rate": 1.9487184903887996e-06, |
|
"logits/chosen": -0.9677060842514038, |
|
"logits/rejected": -0.9533635377883911, |
|
"logps/chosen": -1.1237901449203491, |
|
"logps/rejected": -1.4190008640289307, |
|
"loss": 0.9043, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.2475802898406982, |
|
"rewards/margins": 0.5904213786125183, |
|
"rewards/rejected": -2.8380017280578613, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20307281229124916, |
|
"grad_norm": 21.38276928877544, |
|
"learning_rate": 1.936234870639737e-06, |
|
"logits/chosen": -1.0183446407318115, |
|
"logits/rejected": -0.9617747068405151, |
|
"logps/chosen": -1.5094763040542603, |
|
"logps/rejected": -1.7956956624984741, |
|
"loss": 0.8115, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.0189526081085205, |
|
"rewards/margins": 0.5724390745162964, |
|
"rewards/rejected": -3.5913913249969482, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21376085504342018, |
|
"grad_norm": 31.546005742023485, |
|
"learning_rate": 1.922441841766729e-06, |
|
"logits/chosen": -0.8167861104011536, |
|
"logits/rejected": -0.8134365081787109, |
|
"logps/chosen": -1.9628349542617798, |
|
"logps/rejected": -2.347581148147583, |
|
"loss": 0.841, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.9256699085235596, |
|
"rewards/margins": 0.7694929838180542, |
|
"rewards/rejected": -4.695162296295166, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22444889779559118, |
|
"grad_norm": 31.175237667862007, |
|
"learning_rate": 1.907358694567865e-06, |
|
"logits/chosen": -0.7257764935493469, |
|
"logits/rejected": -0.682075560092926, |
|
"logps/chosen": -2.4148917198181152, |
|
"logps/rejected": -2.919673204421997, |
|
"loss": 0.8144, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.8297834396362305, |
|
"rewards/margins": 1.0095628499984741, |
|
"rewards/rejected": -5.839346408843994, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23513694054776219, |
|
"grad_norm": 42.60812515694024, |
|
"learning_rate": 1.8910065241883678e-06, |
|
"logits/chosen": -0.5907033681869507, |
|
"logits/rejected": -0.5452768206596375, |
|
"logps/chosen": -2.7082858085632324, |
|
"logps/rejected": -3.285773515701294, |
|
"loss": 0.7803, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -5.416571617126465, |
|
"rewards/margins": 1.1549749374389648, |
|
"rewards/rejected": -6.571547031402588, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2458249832999332, |
|
"grad_norm": 33.770352812549774, |
|
"learning_rate": 1.8734082006171296e-06, |
|
"logits/chosen": -0.6769031286239624, |
|
"logits/rejected": -0.6223554611206055, |
|
"logps/chosen": -2.841639995574951, |
|
"logps/rejected": -3.499586820602417, |
|
"loss": 0.7724, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -5.683279991149902, |
|
"rewards/margins": 1.3158934116363525, |
|
"rewards/rejected": -6.999173641204834, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2565130260521042, |
|
"grad_norm": 25.195574765320742, |
|
"learning_rate": 1.8545883367009615e-06, |
|
"logits/chosen": -0.7494109272956848, |
|
"logits/rejected": -0.6586568355560303, |
|
"logps/chosen": -2.6896004676818848, |
|
"logps/rejected": -3.3795294761657715, |
|
"loss": 0.7034, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -5.3792009353637695, |
|
"rewards/margins": 1.3798582553863525, |
|
"rewards/rejected": -6.759058952331543, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26720106880427524, |
|
"grad_norm": 20.698310297934206, |
|
"learning_rate": 1.8345732537213026e-06, |
|
"logits/chosen": -0.8739752769470215, |
|
"logits/rejected": -0.8345277905464172, |
|
"logps/chosen": -2.600498676300049, |
|
"logps/rejected": -3.1906166076660156, |
|
"loss": 0.6515, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -5.200997352600098, |
|
"rewards/margins": 1.1802361011505127, |
|
"rewards/rejected": -6.381233215332031, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27788911155644624, |
|
"grad_norm": 31.900476449074073, |
|
"learning_rate": 1.8133909445815276e-06, |
|
"logits/chosen": -0.876822829246521, |
|
"logits/rejected": -0.8683232069015503, |
|
"logps/chosen": -2.75192928314209, |
|
"logps/rejected": -3.620870590209961, |
|
"loss": 0.6498, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -5.50385856628418, |
|
"rewards/margins": 1.7378835678100586, |
|
"rewards/rejected": -7.241741180419922, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28857715430861725, |
|
"grad_norm": 30.23141141236411, |
|
"learning_rate": 1.7910710346563413e-06, |
|
"logits/chosen": -0.7084225416183472, |
|
"logits/rejected": -0.650471568107605, |
|
"logps/chosen": -3.4160752296447754, |
|
"logps/rejected": -4.176965713500977, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -6.832150459289551, |
|
"rewards/margins": 1.5217812061309814, |
|
"rewards/rejected": -8.353931427001953, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29926519706078825, |
|
"grad_norm": 29.441980968776832, |
|
"learning_rate": 1.767644740358011e-06, |
|
"logits/chosen": -0.76490318775177, |
|
"logits/rejected": -0.7356737852096558, |
|
"logps/chosen": -3.500870943069458, |
|
"logps/rejected": -4.334284782409668, |
|
"loss": 0.5747, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -7.001741886138916, |
|
"rewards/margins": 1.6668283939361572, |
|
"rewards/rejected": -8.668569564819336, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30995323981295925, |
|
"grad_norm": 28.87020107784321, |
|
"learning_rate": 1.743144825477394e-06, |
|
"logits/chosen": -0.6797415614128113, |
|
"logits/rejected": -0.650688648223877, |
|
"logps/chosen": -3.6205127239227295, |
|
"logps/rejected": -4.511746406555176, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -7.241025447845459, |
|
"rewards/margins": 1.7824666500091553, |
|
"rewards/rejected": -9.023492813110352, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32064128256513025, |
|
"grad_norm": 29.827503183327266, |
|
"learning_rate": 1.7176055553608117e-06, |
|
"logits/chosen": -0.7169264554977417, |
|
"logits/rejected": -0.6832514405250549, |
|
"logps/chosen": -3.934389114379883, |
|
"logps/rejected": -4.9375319480896, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -7.868778228759766, |
|
"rewards/margins": 2.0062854290008545, |
|
"rewards/rejected": -9.8750638961792, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33132932531730125, |
|
"grad_norm": 27.09179333048581, |
|
"learning_rate": 1.6910626489868648e-06, |
|
"logits/chosen": -0.8100920915603638, |
|
"logits/rejected": -0.7742663621902466, |
|
"logps/chosen": -3.824146270751953, |
|
"logps/rejected": -5.090175628662109, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -7.648292541503906, |
|
"rewards/margins": 2.5320582389831543, |
|
"rewards/rejected": -10.180351257324219, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3420173680694723, |
|
"grad_norm": 36.65170099175081, |
|
"learning_rate": 1.6635532290102113e-06, |
|
"logits/chosen": -0.8540701866149902, |
|
"logits/rejected": -0.8212080001831055, |
|
"logps/chosen": -4.092007637023926, |
|
"logps/rejected": -5.184715270996094, |
|
"loss": 0.5601, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -8.184015274047852, |
|
"rewards/margins": 2.185415744781494, |
|
"rewards/rejected": -10.369430541992188, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3527054108216433, |
|
"grad_norm": 44.09007725935235, |
|
"learning_rate": 1.6351157698421788e-06, |
|
"logits/chosen": -0.9053822755813599, |
|
"logits/rejected": -0.8696815371513367, |
|
"logps/chosen": -4.188479423522949, |
|
"logps/rejected": -5.3639140129089355, |
|
"loss": 0.5898, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -8.376958847045898, |
|
"rewards/margins": 2.3508691787719727, |
|
"rewards/rejected": -10.727828025817871, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3633934535738143, |
|
"grad_norm": 34.800340553634506, |
|
"learning_rate": 1.6057900438408199e-06, |
|
"logits/chosen": -0.8616800308227539, |
|
"logits/rejected": -0.8292746543884277, |
|
"logps/chosen": -4.644923686981201, |
|
"logps/rejected": -6.038055896759033, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -9.289847373962402, |
|
"rewards/margins": 2.7862656116485596, |
|
"rewards/rejected": -12.076111793518066, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3740814963259853, |
|
"grad_norm": 34.593547384833734, |
|
"learning_rate": 1.5756170656856736e-06, |
|
"logits/chosen": -0.9542654752731323, |
|
"logits/rejected": -0.889543354511261, |
|
"logps/chosen": -4.545766830444336, |
|
"logps/rejected": -5.687682628631592, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -9.091533660888672, |
|
"rewards/margins": 2.28383207321167, |
|
"rewards/rejected": -11.375365257263184, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3847695390781563, |
|
"grad_norm": 22.61281693291947, |
|
"learning_rate": 1.544639035015027e-06, |
|
"logits/chosen": -0.9639078378677368, |
|
"logits/rejected": -0.9341806173324585, |
|
"logps/chosen": -4.075970649719238, |
|
"logps/rejected": -5.5132246017456055, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -8.151941299438477, |
|
"rewards/margins": 2.8745083808898926, |
|
"rewards/rejected": -11.026449203491211, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3954575818303273, |
|
"grad_norm": 21.446599953079577, |
|
"learning_rate": 1.5128992774059062e-06, |
|
"logits/chosen": -1.0559054613113403, |
|
"logits/rejected": -0.9924653172492981, |
|
"logps/chosen": -3.7231125831604004, |
|
"logps/rejected": -5.130820274353027, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -7.446225166320801, |
|
"rewards/margins": 2.815417766571045, |
|
"rewards/rejected": -10.261640548706055, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4061456245824983, |
|
"grad_norm": 24.863835996393608, |
|
"learning_rate": 1.4804421837793377e-06, |
|
"logits/chosen": -0.9934264421463013, |
|
"logits/rejected": -0.9997881054878235, |
|
"logps/chosen": -4.336796760559082, |
|
"logps/rejected": -5.937041282653809, |
|
"loss": 0.4682, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -8.673593521118164, |
|
"rewards/margins": 3.200488328933716, |
|
"rewards/rejected": -11.874082565307617, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4168336673346693, |
|
"grad_norm": 34.56272131407248, |
|
"learning_rate": 1.4473131483156324e-06, |
|
"logits/chosen": -0.8811644315719604, |
|
"logits/rejected": -0.8515303730964661, |
|
"logps/chosen": -5.209665298461914, |
|
"logps/rejected": -6.913350582122803, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -10.419330596923828, |
|
"rewards/margins": 3.4073710441589355, |
|
"rewards/rejected": -13.826701164245605, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42752171008684037, |
|
"grad_norm": 26.404593181307447, |
|
"learning_rate": 1.4135585049665206e-06, |
|
"logits/chosen": -0.8241022825241089, |
|
"logits/rejected": -0.7840823531150818, |
|
"logps/chosen": -5.047942161560059, |
|
"logps/rejected": -6.955193996429443, |
|
"loss": 0.4519, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -10.095884323120117, |
|
"rewards/margins": 3.8145041465759277, |
|
"rewards/rejected": -13.910387992858887, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43820975283901137, |
|
"grad_norm": 35.5838299296831, |
|
"learning_rate": 1.3792254626529285e-06, |
|
"logits/chosen": -0.8618327975273132, |
|
"logits/rejected": -0.7756074666976929, |
|
"logps/chosen": -5.758598327636719, |
|
"logps/rejected": -7.596462249755859, |
|
"loss": 0.5778, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -11.517196655273438, |
|
"rewards/margins": 3.6757278442382812, |
|
"rewards/rejected": -15.192924499511719, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44889779559118237, |
|
"grad_norm": 42.32040382898782, |
|
"learning_rate": 1.3443620392390349e-06, |
|
"logits/chosen": -0.9941180944442749, |
|
"logits/rejected": -0.9657033085823059, |
|
"logps/chosen": -4.432991981506348, |
|
"logps/rejected": -6.000949859619141, |
|
"loss": 0.495, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -8.865983963012695, |
|
"rewards/margins": 3.135915756225586, |
|
"rewards/rejected": -12.001899719238281, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45958583834335337, |
|
"grad_norm": 28.859222169675768, |
|
"learning_rate": 1.3090169943749473e-06, |
|
"logits/chosen": -0.948104739189148, |
|
"logits/rejected": -0.9129034280776978, |
|
"logps/chosen": -3.579448699951172, |
|
"logps/rejected": -5.187192440032959, |
|
"loss": 0.4532, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -7.158897399902344, |
|
"rewards/margins": 3.2154877185821533, |
|
"rewards/rejected": -10.374384880065918, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.47027388109552437, |
|
"grad_norm": 33.6510053739595, |
|
"learning_rate": 1.27323976130192e-06, |
|
"logits/chosen": -0.9587677121162415, |
|
"logits/rejected": -0.9107363820075989, |
|
"logps/chosen": -4.461714744567871, |
|
"logps/rejected": -6.2298054695129395, |
|
"loss": 0.3885, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -8.923429489135742, |
|
"rewards/margins": 3.536180019378662, |
|
"rewards/rejected": -12.459610939025879, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48096192384769537, |
|
"grad_norm": 39.16622543078335, |
|
"learning_rate": 1.2370803777154975e-06, |
|
"logits/chosen": -0.7982478141784668, |
|
"logits/rejected": -0.7258783578872681, |
|
"logps/chosen": -7.227081298828125, |
|
"logps/rejected": -9.01085090637207, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -14.45416259765625, |
|
"rewards/margins": 3.5675411224365234, |
|
"rewards/rejected": -18.02170181274414, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4916499665998664, |
|
"grad_norm": 28.294600400326075, |
|
"learning_rate": 1.2005894157832728e-06, |
|
"logits/chosen": -0.9068690538406372, |
|
"logits/rejected": -0.8007113337516785, |
|
"logps/chosen": -5.985177516937256, |
|
"logps/rejected": -8.007855415344238, |
|
"loss": 0.4459, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -11.970355033874512, |
|
"rewards/margins": 4.045356750488281, |
|
"rewards/rejected": -16.015710830688477, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5023380093520374, |
|
"grad_norm": 26.428195821183824, |
|
"learning_rate": 1.1638179114151377e-06, |
|
"logits/chosen": -1.0134648084640503, |
|
"logits/rejected": -0.9478827714920044, |
|
"logps/chosen": -4.030945301055908, |
|
"logps/rejected": -5.84409761428833, |
|
"loss": 0.4607, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -8.061890602111816, |
|
"rewards/margins": 3.6263041496276855, |
|
"rewards/rejected": -11.68819522857666, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5130260521042084, |
|
"grad_norm": 28.506424636352925, |
|
"learning_rate": 1.1268172928849485e-06, |
|
"logits/chosen": -1.0107872486114502, |
|
"logits/rejected": -0.9833100438117981, |
|
"logps/chosen": -3.623994827270508, |
|
"logps/rejected": -5.339346885681152, |
|
"loss": 0.4664, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -7.247989654541016, |
|
"rewards/margins": 3.4307048320770264, |
|
"rewards/rejected": -10.678693771362305, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5237140948563794, |
|
"grad_norm": 37.9874271990268, |
|
"learning_rate": 1.0896393089034335e-06, |
|
"logits/chosen": -1.0698987245559692, |
|
"logits/rejected": -0.9614090919494629, |
|
"logps/chosen": -4.2720537185668945, |
|
"logps/rejected": -6.518821716308594, |
|
"loss": 0.3759, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -8.544107437133789, |
|
"rewards/margins": 4.493536472320557, |
|
"rewards/rejected": -13.037643432617188, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5344021376085505, |
|
"grad_norm": 37.6233219867946, |
|
"learning_rate": 1.052335956242944e-06, |
|
"logits/chosen": -0.9640167355537415, |
|
"logits/rejected": -0.9025171399116516, |
|
"logps/chosen": -5.073387622833252, |
|
"logps/rejected": -7.112657070159912, |
|
"loss": 0.3989, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -10.146775245666504, |
|
"rewards/margins": 4.078539848327637, |
|
"rewards/rejected": -14.225314140319824, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5450901803607214, |
|
"grad_norm": 35.09471619941238, |
|
"learning_rate": 1.0149594070152636e-06, |
|
"logits/chosen": -0.9901530146598816, |
|
"logits/rejected": -0.9247368574142456, |
|
"logps/chosen": -6.148016452789307, |
|
"logps/rejected": -8.221637725830078, |
|
"loss": 0.4697, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -12.296032905578613, |
|
"rewards/margins": 4.147244930267334, |
|
"rewards/rejected": -16.443275451660156, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5557782231128925, |
|
"grad_norm": 39.356165818725984, |
|
"learning_rate": 9.77561935704195e-07, |
|
"logits/chosen": -0.9357139468193054, |
|
"logits/rejected": -0.858476459980011, |
|
"logps/chosen": -6.003566741943359, |
|
"logps/rejected": -8.099205017089844, |
|
"loss": 0.4241, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -12.007133483886719, |
|
"rewards/margins": 4.191277027130127, |
|
"rewards/rejected": -16.198410034179688, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5664662658650634, |
|
"grad_norm": 20.857149706425567, |
|
"learning_rate": 9.401958460549657e-07, |
|
"logits/chosen": -0.8877873420715332, |
|
"logits/rejected": -0.8332953453063965, |
|
"logps/chosen": -5.713176250457764, |
|
"logps/rejected": -7.9226484298706055, |
|
"loss": 0.4085, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -11.426352500915527, |
|
"rewards/margins": 4.418946266174316, |
|
"rewards/rejected": -15.845296859741211, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5771543086172345, |
|
"grad_norm": 37.11096746877866, |
|
"learning_rate": 9.029133979227118e-07, |
|
"logits/chosen": -0.9584988355636597, |
|
"logits/rejected": -0.9051562547683716, |
|
"logps/chosen": -4.586709976196289, |
|
"logps/rejected": -6.5038323402404785, |
|
"loss": 0.4022, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -9.173419952392578, |
|
"rewards/margins": 3.8342444896698, |
|
"rewards/rejected": -13.007664680480957, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5878423513694054, |
|
"grad_norm": 25.04133162285963, |
|
"learning_rate": 8.657667341823448e-07, |
|
"logits/chosen": -0.9564048051834106, |
|
"logits/rejected": -0.8701663017272949, |
|
"logps/chosen": -4.893515586853027, |
|
"logps/rejected": -6.940362453460693, |
|
"loss": 0.4312, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -9.787031173706055, |
|
"rewards/margins": 4.093693256378174, |
|
"rewards/rejected": -13.880724906921387, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5985303941215765, |
|
"grad_norm": 31.64139590058085, |
|
"learning_rate": 8.288078078020249e-07, |
|
"logits/chosen": -1.0176098346710205, |
|
"logits/rejected": -0.9464299082756042, |
|
"logps/chosen": -5.894881248474121, |
|
"logps/rejected": -8.109701156616211, |
|
"loss": 0.4212, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -11.789762496948242, |
|
"rewards/margins": 4.429640769958496, |
|
"rewards/rejected": -16.219402313232422, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6092184368737475, |
|
"grad_norm": 34.98704174006504, |
|
"learning_rate": 7.920883091822408e-07, |
|
"logits/chosen": -1.0222933292388916, |
|
"logits/rejected": -0.9283574223518372, |
|
"logps/chosen": -5.977299213409424, |
|
"logps/rejected": -8.55643081665039, |
|
"loss": 0.3473, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -11.954598426818848, |
|
"rewards/margins": 5.158264636993408, |
|
"rewards/rejected": -17.11286163330078, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6199064796259185, |
|
"grad_norm": 43.7429550932754, |
|
"learning_rate": 7.556595938621058e-07, |
|
"logits/chosen": -1.0368258953094482, |
|
"logits/rejected": -0.9450758099555969, |
|
"logps/chosen": -6.416205406188965, |
|
"logps/rejected": -8.702176094055176, |
|
"loss": 0.4135, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -12.83241081237793, |
|
"rewards/margins": 4.571939945220947, |
|
"rewards/rejected": -17.40435218811035, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6305945223780896, |
|
"grad_norm": 38.571708947108014, |
|
"learning_rate": 7.195726106939973e-07, |
|
"logits/chosen": -1.0127325057983398, |
|
"logits/rejected": -0.9613968729972839, |
|
"logps/chosen": -6.0891900062561035, |
|
"logps/rejected": -8.455511093139648, |
|
"loss": 0.3415, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -12.178380012512207, |
|
"rewards/margins": 4.73264217376709, |
|
"rewards/rejected": -16.911022186279297, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6412825651302605, |
|
"grad_norm": 37.02547097442152, |
|
"learning_rate": 6.838778305869759e-07, |
|
"logits/chosen": -0.9378641247749329, |
|
"logits/rejected": -0.8806314468383789, |
|
"logps/chosen": -6.423588752746582, |
|
"logps/rejected": -8.611102104187012, |
|
"loss": 0.4404, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -12.847177505493164, |
|
"rewards/margins": 4.375027656555176, |
|
"rewards/rejected": -17.222204208374023, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6519706078824316, |
|
"grad_norm": 35.55070245031894, |
|
"learning_rate": 6.486251759186572e-07, |
|
"logits/chosen": -1.0858322381973267, |
|
"logits/rejected": -0.9954659342765808, |
|
"logps/chosen": -5.805714130401611, |
|
"logps/rejected": -7.78420877456665, |
|
"loss": 0.4396, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -11.611428260803223, |
|
"rewards/margins": 3.956988573074341, |
|
"rewards/rejected": -15.5684175491333, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6626586506346025, |
|
"grad_norm": 51.53731628000405, |
|
"learning_rate": 6.138639507142538e-07, |
|
"logits/chosen": -1.175060749053955, |
|
"logits/rejected": -1.1142823696136475, |
|
"logps/chosen": -5.7005181312561035, |
|
"logps/rejected": -7.967810153961182, |
|
"loss": 0.4227, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -11.401036262512207, |
|
"rewards/margins": 4.534584999084473, |
|
"rewards/rejected": -15.935620307922363, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6733466933867736, |
|
"grad_norm": 29.428644028564324, |
|
"learning_rate": 5.796427716904346e-07, |
|
"logits/chosen": -1.1236612796783447, |
|
"logits/rejected": -1.0238118171691895, |
|
"logps/chosen": -6.259681701660156, |
|
"logps/rejected": -8.45996379852295, |
|
"loss": 0.3742, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -12.519363403320312, |
|
"rewards/margins": 4.400565147399902, |
|
"rewards/rejected": -16.9199275970459, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6840347361389446, |
|
"grad_norm": 38.71321431370745, |
|
"learning_rate": 5.460095002604532e-07, |
|
"logits/chosen": -1.11953866481781, |
|
"logits/rejected": -1.0796916484832764, |
|
"logps/chosen": -6.55707311630249, |
|
"logps/rejected": -9.187610626220703, |
|
"loss": 0.3626, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -13.11414623260498, |
|
"rewards/margins": 5.2610764503479, |
|
"rewards/rejected": -18.375221252441406, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6947227788911156, |
|
"grad_norm": 38.929986299465604, |
|
"learning_rate": 5.130111755956327e-07, |
|
"logits/chosen": -1.1838449239730835, |
|
"logits/rejected": -1.0870417356491089, |
|
"logps/chosen": -6.676375389099121, |
|
"logps/rejected": -9.317723274230957, |
|
"loss": 0.4211, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -13.352750778198242, |
|
"rewards/margins": 5.282693862915039, |
|
"rewards/rejected": -18.635446548461914, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7054108216432866, |
|
"grad_norm": 26.360971338492213, |
|
"learning_rate": 4.806939488368308e-07, |
|
"logits/chosen": -1.0527994632720947, |
|
"logits/rejected": -0.9714158177375793, |
|
"logps/chosen": -6.790243625640869, |
|
"logps/rejected": -8.82271671295166, |
|
"loss": 0.3754, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -13.580487251281738, |
|
"rewards/margins": 4.06494665145874, |
|
"rewards/rejected": -17.64543342590332, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7160988643954576, |
|
"grad_norm": 37.25228754273986, |
|
"learning_rate": 4.4910301854789755e-07, |
|
"logits/chosen": -1.092002511024475, |
|
"logits/rejected": -1.0370265245437622, |
|
"logps/chosen": -6.746194362640381, |
|
"logps/rejected": -8.957503318786621, |
|
"loss": 0.379, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -13.492388725280762, |
|
"rewards/margins": 4.4226179122924805, |
|
"rewards/rejected": -17.915006637573242, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7267869071476286, |
|
"grad_norm": 41.78732477890408, |
|
"learning_rate": 4.1828256750139443e-07, |
|
"logits/chosen": -1.15060555934906, |
|
"logits/rejected": -1.0927339792251587, |
|
"logps/chosen": -6.618721008300781, |
|
"logps/rejected": -8.740182876586914, |
|
"loss": 0.4272, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -13.237442016601562, |
|
"rewards/margins": 4.242924213409424, |
|
"rewards/rejected": -17.480365753173828, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7374749498997996, |
|
"grad_norm": 31.334898714386284, |
|
"learning_rate": 3.882757008849935e-07, |
|
"logits/chosen": -1.1759268045425415, |
|
"logits/rejected": -1.125778317451477, |
|
"logps/chosen": -7.186532020568848, |
|
"logps/rejected": -9.452940940856934, |
|
"loss": 0.3551, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -14.373064041137695, |
|
"rewards/margins": 4.532819747924805, |
|
"rewards/rejected": -18.905881881713867, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7481629926519706, |
|
"grad_norm": 29.63352796318247, |
|
"learning_rate": 3.5912438601497584e-07, |
|
"logits/chosen": -1.186089038848877, |
|
"logits/rejected": -1.1533267498016357, |
|
"logps/chosen": -6.283223628997803, |
|
"logps/rejected": -8.40349006652832, |
|
"loss": 0.3724, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -12.566447257995605, |
|
"rewards/margins": 4.2405314445495605, |
|
"rewards/rejected": -16.80698013305664, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7588510354041417, |
|
"grad_norm": 37.40129439705042, |
|
"learning_rate": 3.308693936411421e-07, |
|
"logits/chosen": -1.0497562885284424, |
|
"logits/rejected": -1.0346195697784424, |
|
"logps/chosen": -6.789434909820557, |
|
"logps/rejected": -9.07376766204834, |
|
"loss": 0.3605, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -13.578869819641113, |
|
"rewards/margins": 4.568666458129883, |
|
"rewards/rejected": -18.14753532409668, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7695390781563126, |
|
"grad_norm": 42.85252213793353, |
|
"learning_rate": 3.035502409252333e-07, |
|
"logits/chosen": -1.11203134059906, |
|
"logits/rejected": -1.0642902851104736, |
|
"logps/chosen": -6.502237796783447, |
|
"logps/rejected": -9.193612098693848, |
|
"loss": 0.4275, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -13.004475593566895, |
|
"rewards/margins": 5.382746696472168, |
|
"rewards/rejected": -18.387224197387695, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7802271209084837, |
|
"grad_norm": 45.248127741114246, |
|
"learning_rate": 2.7720513617260855e-07, |
|
"logits/chosen": -1.1741015911102295, |
|
"logits/rejected": -1.0450173616409302, |
|
"logps/chosen": -6.776492118835449, |
|
"logps/rejected": -9.342794418334961, |
|
"loss": 0.3758, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -13.552984237670898, |
|
"rewards/margins": 5.132604598999023, |
|
"rewards/rejected": -18.685588836669922, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7909151636606546, |
|
"grad_norm": 70.88406643518205, |
|
"learning_rate": 2.5187092539447294e-07, |
|
"logits/chosen": -1.1018563508987427, |
|
"logits/rejected": -1.0579187870025635, |
|
"logps/chosen": -6.298445701599121, |
|
"logps/rejected": -8.556467056274414, |
|
"loss": 0.3808, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -12.596891403198242, |
|
"rewards/margins": 4.516043663024902, |
|
"rewards/rejected": -17.112934112548828, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8016032064128257, |
|
"grad_norm": 41.80456248679069, |
|
"learning_rate": 2.2758304077540058e-07, |
|
"logits/chosen": -1.1480379104614258, |
|
"logits/rejected": -1.1150692701339722, |
|
"logps/chosen": -6.318451881408691, |
|
"logps/rejected": -8.656303405761719, |
|
"loss": 0.3586, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -12.636903762817383, |
|
"rewards/margins": 4.675703525543213, |
|
"rewards/rejected": -17.312606811523438, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8122912491649966, |
|
"grad_norm": 31.017236490830967, |
|
"learning_rate": 2.043754511182191e-07, |
|
"logits/chosen": -1.1511554718017578, |
|
"logits/rejected": -1.0976629257202148, |
|
"logps/chosen": -6.138351917266846, |
|
"logps/rejected": -8.664915084838867, |
|
"loss": 0.4, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -12.276703834533691, |
|
"rewards/margins": 5.053128242492676, |
|
"rewards/rejected": -17.329830169677734, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8229792919171677, |
|
"grad_norm": 26.351372088988093, |
|
"learning_rate": 1.8228061433556864e-07, |
|
"logits/chosen": -1.1164242029190063, |
|
"logits/rejected": -1.0599582195281982, |
|
"logps/chosen": -6.1393351554870605, |
|
"logps/rejected": -8.908954620361328, |
|
"loss": 0.3271, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -12.278670310974121, |
|
"rewards/margins": 5.539238929748535, |
|
"rewards/rejected": -17.817909240722656, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8336673346693386, |
|
"grad_norm": 40.004488570738765, |
|
"learning_rate": 1.6132943205457606e-07, |
|
"logits/chosen": -1.1820439100265503, |
|
"logits/rejected": -1.1261646747589111, |
|
"logps/chosen": -6.401742458343506, |
|
"logps/rejected": -8.99330997467041, |
|
"loss": 0.4273, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -12.803484916687012, |
|
"rewards/margins": 5.183135032653809, |
|
"rewards/rejected": -17.98661994934082, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8443553774215097, |
|
"grad_norm": 41.3303995282676, |
|
"learning_rate": 1.415512063981339e-07, |
|
"logits/chosen": -1.1933691501617432, |
|
"logits/rejected": -1.143477201461792, |
|
"logps/chosen": -6.095961093902588, |
|
"logps/rejected": -8.315205574035645, |
|
"loss": 0.3615, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -12.191922187805176, |
|
"rewards/margins": 4.438488960266113, |
|
"rewards/rejected": -16.63041114807129, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"grad_norm": 30.146673376540157, |
|
"learning_rate": 1.2297359900323344e-07, |
|
"logits/chosen": -1.185856819152832, |
|
"logits/rejected": -1.149908423423767, |
|
"logps/chosen": -6.064610958099365, |
|
"logps/rejected": -8.274811744689941, |
|
"loss": 0.3805, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -12.12922191619873, |
|
"rewards/margins": 4.420398712158203, |
|
"rewards/rejected": -16.549623489379883, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"eval_logits/chosen": -1.3878380060195923, |
|
"eval_logits/rejected": -1.3844929933547974, |
|
"eval_logps/chosen": -5.970302104949951, |
|
"eval_logps/rejected": -8.178492546081543, |
|
"eval_loss": 0.34991469979286194, |
|
"eval_rewards/accuracies": 0.9004064798355103, |
|
"eval_rewards/chosen": -11.940604209899902, |
|
"eval_rewards/margins": 4.416379928588867, |
|
"eval_rewards/rejected": -16.356985092163086, |
|
"eval_runtime": 98.864, |
|
"eval_samples_per_second": 19.835, |
|
"eval_steps_per_second": 1.244, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8657314629258517, |
|
"grad_norm": 39.23606930955491, |
|
"learning_rate": 1.0562259233366333e-07, |
|
"logits/chosen": -1.1601266860961914, |
|
"logits/rejected": -1.1533467769622803, |
|
"logps/chosen": -6.3432416915893555, |
|
"logps/rejected": -8.685356140136719, |
|
"loss": 0.3527, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -12.686483383178711, |
|
"rewards/margins": 4.684228420257568, |
|
"rewards/rejected": -17.370712280273438, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8764195056780227, |
|
"grad_norm": 44.84060293631811, |
|
"learning_rate": 8.952245334118413e-08, |
|
"logits/chosen": -1.1762316226959229, |
|
"logits/rejected": -1.1400468349456787, |
|
"logps/chosen": -5.951014041900635, |
|
"logps/rejected": -8.487456321716309, |
|
"loss": 0.372, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -11.90202808380127, |
|
"rewards/margins": 5.072883605957031, |
|
"rewards/rejected": -16.974912643432617, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8871075484301937, |
|
"grad_norm": 31.58697079899106, |
|
"learning_rate": 7.46956995260033e-08, |
|
"logits/chosen": -1.1965105533599854, |
|
"logits/rejected": -1.0948525667190552, |
|
"logps/chosen": -5.939952373504639, |
|
"logps/rejected": -8.576761245727539, |
|
"loss": 0.3642, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -11.879904747009277, |
|
"rewards/margins": 5.273618698120117, |
|
"rewards/rejected": -17.153522491455078, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8977955911823647, |
|
"grad_norm": 56.62718923940337, |
|
"learning_rate": 6.11630674440139e-08, |
|
"logits/chosen": -1.2364650964736938, |
|
"logits/rejected": -1.1493674516677856, |
|
"logps/chosen": -5.8380866050720215, |
|
"logps/rejected": -8.528668403625488, |
|
"loss": 0.3543, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -11.676173210144043, |
|
"rewards/margins": 5.381163597106934, |
|
"rewards/rejected": -17.057336807250977, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9084836339345357, |
|
"grad_norm": 26.88857335924454, |
|
"learning_rate": 4.8943483704846465e-08, |
|
"logits/chosen": -1.2132270336151123, |
|
"logits/rejected": -1.1822996139526367, |
|
"logps/chosen": -6.329747200012207, |
|
"logps/rejected": -8.68973159790039, |
|
"loss": 0.378, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -12.659494400024414, |
|
"rewards/margins": 4.719969749450684, |
|
"rewards/rejected": -17.37946319580078, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9191716766867067, |
|
"grad_norm": 29.204672971590583, |
|
"learning_rate": 3.805403850129407e-08, |
|
"logits/chosen": -1.1887871026992798, |
|
"logits/rejected": -1.1395562887191772, |
|
"logps/chosen": -6.298637866973877, |
|
"logps/rejected": -8.703396797180176, |
|
"loss": 0.3701, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -12.597275733947754, |
|
"rewards/margins": 4.809514999389648, |
|
"rewards/rejected": -17.40679359436035, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9298597194388778, |
|
"grad_norm": 41.83119701192464, |
|
"learning_rate": 2.8509961707132492e-08, |
|
"logits/chosen": -1.1526520252227783, |
|
"logits/rejected": -1.087210774421692, |
|
"logps/chosen": -5.99376106262207, |
|
"logps/rejected": -8.27347183227539, |
|
"loss": 0.3539, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -11.98752212524414, |
|
"rewards/margins": 4.559422492980957, |
|
"rewards/rejected": -16.54694366455078, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9405477621910487, |
|
"grad_norm": 27.28448585229794, |
|
"learning_rate": 2.032460157676452e-08, |
|
"logits/chosen": -1.1298894882202148, |
|
"logits/rejected": -1.049036979675293, |
|
"logps/chosen": -6.4232072830200195, |
|
"logps/rejected": -8.850305557250977, |
|
"loss": 0.3414, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -12.846414566040039, |
|
"rewards/margins": 4.854195594787598, |
|
"rewards/rejected": -17.700611114501953, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9512358049432198, |
|
"grad_norm": 31.150711268639814, |
|
"learning_rate": 1.3509406076478659e-08, |
|
"logits/chosen": -1.1100740432739258, |
|
"logits/rejected": -1.0567227602005005, |
|
"logps/chosen": -6.3755292892456055, |
|
"logps/rejected": -9.159284591674805, |
|
"loss": 0.344, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -12.751058578491211, |
|
"rewards/margins": 5.567511081695557, |
|
"rewards/rejected": -18.31856918334961, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9619238476953907, |
|
"grad_norm": 33.19068830748795, |
|
"learning_rate": 8.07390687343379e-09, |
|
"logits/chosen": -1.250570297241211, |
|
"logits/rejected": -1.1990430355072021, |
|
"logps/chosen": -6.264920711517334, |
|
"logps/rejected": -8.49793815612793, |
|
"loss": 0.3294, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -12.529841423034668, |
|
"rewards/margins": 4.466032981872559, |
|
"rewards/rejected": -16.99587631225586, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9726118904475618, |
|
"grad_norm": 42.76771467797157, |
|
"learning_rate": 4.025706004760931e-09, |
|
"logits/chosen": -1.1908820867538452, |
|
"logits/rejected": -1.1271415948867798, |
|
"logps/chosen": -6.330782890319824, |
|
"logps/rejected": -9.00413703918457, |
|
"loss": 0.3373, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -12.661565780639648, |
|
"rewards/margins": 5.346711158752441, |
|
"rewards/rejected": -18.00827407836914, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9832999331997327, |
|
"grad_norm": 28.947424332254975, |
|
"learning_rate": 1.3704652454261667e-09, |
|
"logits/chosen": -1.1968469619750977, |
|
"logits/rejected": -1.1597331762313843, |
|
"logps/chosen": -6.481853485107422, |
|
"logps/rejected": -9.090927124023438, |
|
"loss": 0.3515, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -12.963706970214844, |
|
"rewards/margins": 5.218146800994873, |
|
"rewards/rejected": -18.181854248046875, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9939879759519038, |
|
"grad_norm": 39.23731303488194, |
|
"learning_rate": 1.1189818972656696e-10, |
|
"logits/chosen": -1.163874864578247, |
|
"logits/rejected": -1.1393449306488037, |
|
"logps/chosen": -6.374614715576172, |
|
"logps/rejected": -9.016167640686035, |
|
"loss": 0.3467, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -12.749229431152344, |
|
"rewards/margins": 5.283105373382568, |
|
"rewards/rejected": -18.03233528137207, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9982631930527722, |
|
"step": 467, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5656856803873622, |
|
"train_runtime": 11731.0657, |
|
"train_samples_per_second": 5.104, |
|
"train_steps_per_second": 0.04 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|