|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 500, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 0.1786131818333053, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -1.4481778144836426, |
|
"logits/rejected": -1.4499433040618896, |
|
"logps/chosen": -7.982362270355225, |
|
"logps/rejected": -8.15577507019043, |
|
"loss": -0.0009, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -7.982362270355225, |
|
"rewards/margins": 0.17341338098049164, |
|
"rewards/rejected": -8.15577507019043, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 0.07282553733728953, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": -1.4464797973632812, |
|
"logits/rejected": -1.4372261762619019, |
|
"logps/chosen": -8.047597885131836, |
|
"logps/rejected": -7.961185455322266, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -8.047597885131836, |
|
"rewards/margins": -0.08641364425420761, |
|
"rewards/rejected": -7.961185455322266, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 0.1915776567981181, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": -1.4458509683609009, |
|
"logits/rejected": -1.4425008296966553, |
|
"logps/chosen": -7.852419853210449, |
|
"logps/rejected": -7.86798095703125, |
|
"loss": -0.0011, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -7.852419853210449, |
|
"rewards/margins": 0.01556050218641758, |
|
"rewards/rejected": -7.86798095703125, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 0.18043036746469734, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -1.433124303817749, |
|
"logits/rejected": -1.4371713399887085, |
|
"logps/chosen": -8.189096450805664, |
|
"logps/rejected": -8.211885452270508, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -8.189096450805664, |
|
"rewards/margins": 0.02278941310942173, |
|
"rewards/rejected": -8.211885452270508, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 0.0768554595715667, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": -1.4702521562576294, |
|
"logits/rejected": -1.46065354347229, |
|
"logps/chosen": -8.118414878845215, |
|
"logps/rejected": -8.017342567443848, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -8.118414878845215, |
|
"rewards/margins": -0.10107225179672241, |
|
"rewards/rejected": -8.017342567443848, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 0.15370408069863134, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -1.4349619150161743, |
|
"logits/rejected": -1.4234455823898315, |
|
"logps/chosen": -7.866227626800537, |
|
"logps/rejected": -7.7843732833862305, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -7.866227626800537, |
|
"rewards/margins": -0.08185449987649918, |
|
"rewards/rejected": -7.7843732833862305, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 0.08486142616829283, |
|
"learning_rate": 4.3749999999999994e-07, |
|
"logits/chosen": -1.4395850896835327, |
|
"logits/rejected": -1.4178800582885742, |
|
"logps/chosen": -8.148027420043945, |
|
"logps/rejected": -8.014989852905273, |
|
"loss": -0.0007, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -8.148027420043945, |
|
"rewards/margins": -0.13303671777248383, |
|
"rewards/rejected": -8.014989852905273, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 0.1990381181235585, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.4183995723724365, |
|
"logits/rejected": -1.4341777563095093, |
|
"logps/chosen": -8.116990089416504, |
|
"logps/rejected": -8.271265983581543, |
|
"loss": -0.0007, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -8.116990089416504, |
|
"rewards/margins": 0.15427525341510773, |
|
"rewards/rejected": -8.271265983581543, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 0.05622179554254164, |
|
"learning_rate": 5.625e-07, |
|
"logits/chosen": -1.4420310258865356, |
|
"logits/rejected": -1.442714810371399, |
|
"logps/chosen": -8.133280754089355, |
|
"logps/rejected": -7.922200679779053, |
|
"loss": -0.0001, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -8.133280754089355, |
|
"rewards/margins": -0.21108034253120422, |
|
"rewards/rejected": -7.922200679779053, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 0.09631702855440245, |
|
"learning_rate": 5.999678242522831e-07, |
|
"logits/chosen": -1.4332040548324585, |
|
"logits/rejected": -1.4520256519317627, |
|
"logps/chosen": -8.272871017456055, |
|
"logps/rejected": -8.261492729187012, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -8.272871017456055, |
|
"rewards/margins": -0.011378437280654907, |
|
"rewards/rejected": -8.261492729187012, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 0.06712571499202777, |
|
"learning_rate": 5.996059263493219e-07, |
|
"logits/chosen": -1.4460947513580322, |
|
"logits/rejected": -1.4431495666503906, |
|
"logps/chosen": -8.187493324279785, |
|
"logps/rejected": -8.160319328308105, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -8.187493324279785, |
|
"rewards/margins": -0.02717405930161476, |
|
"rewards/rejected": -8.160319328308105, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 0.20114381829735603, |
|
"learning_rate": 5.988423976115163e-07, |
|
"logits/chosen": -1.4432101249694824, |
|
"logits/rejected": -1.455540657043457, |
|
"logps/chosen": -8.201032638549805, |
|
"logps/rejected": -8.437708854675293, |
|
"loss": -0.0006, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -8.201032638549805, |
|
"rewards/margins": 0.23667626082897186, |
|
"rewards/rejected": -8.437708854675293, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 0.17507947570299737, |
|
"learning_rate": 5.976782615723061e-07, |
|
"logits/chosen": -1.3914668560028076, |
|
"logits/rejected": -1.4124656915664673, |
|
"logps/chosen": -8.03328800201416, |
|
"logps/rejected": -8.427019119262695, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -8.03328800201416, |
|
"rewards/margins": 0.393731027841568, |
|
"rewards/rejected": -8.427019119262695, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 0.28383551734545676, |
|
"learning_rate": 5.961150787913738e-07, |
|
"logits/chosen": -1.399139404296875, |
|
"logits/rejected": -1.3929238319396973, |
|
"logps/chosen": -8.119722366333008, |
|
"logps/rejected": -8.134946823120117, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -8.119722366333008, |
|
"rewards/margins": 0.01522480882704258, |
|
"rewards/rejected": -8.134946823120117, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 0.05943092529446436, |
|
"learning_rate": 5.941549447626671e-07, |
|
"logits/chosen": -1.4151188135147095, |
|
"logits/rejected": -1.4230639934539795, |
|
"logps/chosen": -8.229839324951172, |
|
"logps/rejected": -8.183255195617676, |
|
"loss": -0.0001, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -8.229839324951172, |
|
"rewards/margins": -0.04658409580588341, |
|
"rewards/rejected": -8.183255195617676, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 0.11166728026103465, |
|
"learning_rate": 5.918004871053251e-07, |
|
"logits/chosen": -1.431121587753296, |
|
"logits/rejected": -1.4449329376220703, |
|
"logps/chosen": -8.453977584838867, |
|
"logps/rejected": -8.471723556518555, |
|
"loss": -0.0007, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -8.453977584838867, |
|
"rewards/margins": 0.017745357006788254, |
|
"rewards/rejected": -8.471723556518555, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 0.11291027423617692, |
|
"learning_rate": 5.890548620412763e-07, |
|
"logits/chosen": -1.4427986145019531, |
|
"logits/rejected": -1.4420478343963623, |
|
"logps/chosen": -8.645545959472656, |
|
"logps/rejected": -8.712440490722656, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -8.645545959472656, |
|
"rewards/margins": 0.06689504534006119, |
|
"rewards/rejected": -8.712440490722656, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 0.2075325531424713, |
|
"learning_rate": 5.859217501642258e-07, |
|
"logits/chosen": -1.431888222694397, |
|
"logits/rejected": -1.4438416957855225, |
|
"logps/chosen": -8.67556381225586, |
|
"logps/rejected": -8.743408203125, |
|
"loss": -0.0001, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -8.67556381225586, |
|
"rewards/margins": 0.0678454041481018, |
|
"rewards/rejected": -8.743408203125, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 0.1350380165212238, |
|
"learning_rate": 5.824053515057091e-07, |
|
"logits/chosen": -1.4262675046920776, |
|
"logits/rejected": -1.4206186532974243, |
|
"logps/chosen": -8.711984634399414, |
|
"logps/rejected": -8.466314315795898, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -8.711984634399414, |
|
"rewards/margins": -0.24566936492919922, |
|
"rewards/rejected": -8.466314315795898, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 0.056801258116621844, |
|
"learning_rate": 5.785103799048218e-07, |
|
"logits/chosen": -1.4670069217681885, |
|
"logits/rejected": -1.4761860370635986, |
|
"logps/chosen": -8.660918235778809, |
|
"logps/rejected": -8.600263595581055, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -8.660918235778809, |
|
"rewards/margins": -0.060654301196336746, |
|
"rewards/rejected": -8.600263595581055, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 0.02647029175400585, |
|
"learning_rate": 5.742420566891749e-07, |
|
"logits/chosen": -1.4849385023117065, |
|
"logits/rejected": -1.4768640995025635, |
|
"logps/chosen": -8.584749221801758, |
|
"logps/rejected": -8.679868698120117, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -8.584749221801758, |
|
"rewards/margins": 0.09512145817279816, |
|
"rewards/rejected": -8.679868698120117, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 0.14957708248219087, |
|
"learning_rate": 5.696061036755478e-07, |
|
"logits/chosen": -1.537284255027771, |
|
"logits/rejected": -1.524402141571045, |
|
"logps/chosen": -9.06226921081543, |
|
"logps/rejected": -9.15291976928711, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -9.06226921081543, |
|
"rewards/margins": 0.09065041691064835, |
|
"rewards/rejected": -9.15291976928711, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 0.04538978337584183, |
|
"learning_rate": 5.64608735499618e-07, |
|
"logits/chosen": -1.500104546546936, |
|
"logits/rejected": -1.4960343837738037, |
|
"logps/chosen": -9.003273010253906, |
|
"logps/rejected": -9.141524314880371, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -9.003273010253906, |
|
"rewards/margins": 0.1382521390914917, |
|
"rewards/rejected": -9.141524314880371, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 0.08148752548258974, |
|
"learning_rate": 5.592566512850545e-07, |
|
"logits/chosen": -1.5267969369888306, |
|
"logits/rejected": -1.522019624710083, |
|
"logps/chosen": -9.461637496948242, |
|
"logps/rejected": -9.392511367797852, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -9.461637496948242, |
|
"rewards/margins": -0.06912745535373688, |
|
"rewards/rejected": -9.392511367797852, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 0.011705138951069663, |
|
"learning_rate": 5.535570256631384e-07, |
|
"logits/chosen": -1.6196930408477783, |
|
"logits/rejected": -1.6130282878875732, |
|
"logps/chosen": -10.071660041809082, |
|
"logps/rejected": -9.955024719238281, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -10.071660041809082, |
|
"rewards/margins": -0.11663500964641571, |
|
"rewards/rejected": -9.955024719238281, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 0.03311359707714934, |
|
"learning_rate": 5.475174991549528e-07, |
|
"logits/chosen": -1.6836683750152588, |
|
"logits/rejected": -1.6870734691619873, |
|
"logps/chosen": -10.576017379760742, |
|
"logps/rejected": -10.587455749511719, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -10.576017379760742, |
|
"rewards/margins": 0.011437964625656605, |
|
"rewards/rejected": -10.587455749511719, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 0.0028807868975943276, |
|
"learning_rate": 5.411461679290317e-07, |
|
"logits/chosen": -1.716474175453186, |
|
"logits/rejected": -1.7054128646850586, |
|
"logps/chosen": -10.966715812683105, |
|
"logps/rejected": -11.17033576965332, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -10.966715812683105, |
|
"rewards/margins": 0.2036197930574417, |
|
"rewards/rejected": -11.17033576965332, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 0.0071240218588725325, |
|
"learning_rate": 5.34451572948201e-07, |
|
"logits/chosen": -1.7624616622924805, |
|
"logits/rejected": -1.750759482383728, |
|
"logps/chosen": -11.156391143798828, |
|
"logps/rejected": -11.09487533569336, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -11.156391143798828, |
|
"rewards/margins": -0.061515532433986664, |
|
"rewards/rejected": -11.09487533569336, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 0.007104778936656334, |
|
"learning_rate": 5.274426885201582e-07, |
|
"logits/chosen": -1.7734272480010986, |
|
"logits/rejected": -1.7652689218521118, |
|
"logps/chosen": -11.338696479797363, |
|
"logps/rejected": -11.272669792175293, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -11.338696479797363, |
|
"rewards/margins": -0.06602667272090912, |
|
"rewards/rejected": -11.272669792175293, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 0.0026785877800151164, |
|
"learning_rate": 5.201289102671411e-07, |
|
"logits/chosen": -1.7958892583847046, |
|
"logits/rejected": -1.7981617450714111, |
|
"logps/chosen": -11.584068298339844, |
|
"logps/rejected": -11.576414108276367, |
|
"loss": -0.0001, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -11.584068298339844, |
|
"rewards/margins": -0.007653522305190563, |
|
"rewards/rejected": -11.576414108276367, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 0.032494261377218156, |
|
"learning_rate": 5.12520042530811e-07, |
|
"logits/chosen": -1.7825043201446533, |
|
"logits/rejected": -1.7530311346054077, |
|
"logps/chosen": -11.603002548217773, |
|
"logps/rejected": -11.390130043029785, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -11.603002548217773, |
|
"rewards/margins": -0.21287231147289276, |
|
"rewards/rejected": -11.390130043029785, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 0.011685158472598062, |
|
"learning_rate": 5.046262852292346e-07, |
|
"logits/chosen": -1.770146369934082, |
|
"logits/rejected": -1.7625595331192017, |
|
"logps/chosen": -11.565991401672363, |
|
"logps/rejected": -11.481651306152344, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -11.565991401672363, |
|
"rewards/margins": -0.08434131741523743, |
|
"rewards/rejected": -11.481651306152344, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 0.01649802186957788, |
|
"learning_rate": 4.964582201835856e-07, |
|
"logits/chosen": -1.800264596939087, |
|
"logits/rejected": -1.78522527217865, |
|
"logps/chosen": -11.585084915161133, |
|
"logps/rejected": -11.45960521697998, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -11.585084915161133, |
|
"rewards/margins": -0.125479057431221, |
|
"rewards/rejected": -11.45960521697998, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 0.007019029699907713, |
|
"learning_rate": 4.880267969328908e-07, |
|
"logits/chosen": -1.7802222967147827, |
|
"logits/rejected": -1.7600643634796143, |
|
"logps/chosen": -11.943793296813965, |
|
"logps/rejected": -11.659793853759766, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -11.943793296813965, |
|
"rewards/margins": -0.2839995324611664, |
|
"rewards/rejected": -11.659793853759766, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 0.05226386515779408, |
|
"learning_rate": 4.793433180558423e-07, |
|
"logits/chosen": -1.803500771522522, |
|
"logits/rejected": -1.780199646949768, |
|
"logps/chosen": -11.922220230102539, |
|
"logps/rejected": -11.642582893371582, |
|
"loss": -0.0001, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -11.922220230102539, |
|
"rewards/margins": -0.2796374261379242, |
|
"rewards/rejected": -11.642582893371582, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 0.028525783518634065, |
|
"learning_rate": 4.704194240193467e-07, |
|
"logits/chosen": -1.8105300664901733, |
|
"logits/rejected": -1.8023895025253296, |
|
"logps/chosen": -11.983869552612305, |
|
"logps/rejected": -11.885191917419434, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -11.983869552612305, |
|
"rewards/margins": -0.09867729246616364, |
|
"rewards/rejected": -11.885191917419434, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 0.001398073561465244, |
|
"learning_rate": 4.6126707757412686e-07, |
|
"logits/chosen": -1.793454885482788, |
|
"logits/rejected": -1.7760779857635498, |
|
"logps/chosen": -11.986612319946289, |
|
"logps/rejected": -11.752717971801758, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -11.986612319946289, |
|
"rewards/margins": -0.2338949739933014, |
|
"rewards/rejected": -11.752717971801758, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 0.0014862685239923532, |
|
"learning_rate": 4.5189854771829086e-07, |
|
"logits/chosen": -1.7965761423110962, |
|
"logits/rejected": -1.7916818857192993, |
|
"logps/chosen": -11.609036445617676, |
|
"logps/rejected": -11.723310470581055, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -11.609036445617676, |
|
"rewards/margins": 0.11427430808544159, |
|
"rewards/rejected": -11.723310470581055, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 0.005759001469221081, |
|
"learning_rate": 4.4232639325036807e-07, |
|
"logits/chosen": -1.7666387557983398, |
|
"logits/rejected": -1.7522531747817993, |
|
"logps/chosen": -11.82097339630127, |
|
"logps/rejected": -11.537239074707031, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -11.82097339630127, |
|
"rewards/margins": -0.28373217582702637, |
|
"rewards/rejected": -11.537239074707031, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 0.008163598439494825, |
|
"learning_rate": 4.32563445933859e-07, |
|
"logits/chosen": -1.7980865240097046, |
|
"logits/rejected": -1.799093246459961, |
|
"logps/chosen": -11.458420753479004, |
|
"logps/rejected": -11.457086563110352, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -11.458420753479004, |
|
"rewards/margins": -0.001333725405856967, |
|
"rewards/rejected": -11.457086563110352, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 0.005297760091128626, |
|
"learning_rate": 4.226227932958664e-07, |
|
"logits/chosen": -1.7564678192138672, |
|
"logits/rejected": -1.7390811443328857, |
|
"logps/chosen": -11.681690216064453, |
|
"logps/rejected": -11.680364608764648, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -11.681690216064453, |
|
"rewards/margins": -0.0013252407079562545, |
|
"rewards/rejected": -11.680364608764648, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 0.006991157705444288, |
|
"learning_rate": 4.1251776108286854e-07, |
|
"logits/chosen": -1.7594162225723267, |
|
"logits/rejected": -1.7496426105499268, |
|
"logps/chosen": -11.553954124450684, |
|
"logps/rejected": -11.592232704162598, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -11.553954124450684, |
|
"rewards/margins": 0.03827826306223869, |
|
"rewards/rejected": -11.592232704162598, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 0.17801591448497012, |
|
"learning_rate": 4.022618953971514e-07, |
|
"logits/chosen": -1.7519290447235107, |
|
"logits/rejected": -1.7526050806045532, |
|
"logps/chosen": -11.367416381835938, |
|
"logps/rejected": -11.493242263793945, |
|
"loss": -0.0001, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -11.367416381835938, |
|
"rewards/margins": 0.12582536041736603, |
|
"rewards/rejected": -11.493242263793945, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 0.023941549745350215, |
|
"learning_rate": 3.918689445378477e-07, |
|
"logits/chosen": -1.7803691625595093, |
|
"logits/rejected": -1.7690092325210571, |
|
"logps/chosen": -11.14652156829834, |
|
"logps/rejected": -11.06640625, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -11.14652156829834, |
|
"rewards/margins": -0.08011455833911896, |
|
"rewards/rejected": -11.06640625, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 0.012228246875019251, |
|
"learning_rate": 3.813528405709251e-07, |
|
"logits/chosen": -1.763047456741333, |
|
"logits/rejected": -1.748477578163147, |
|
"logps/chosen": -10.784479141235352, |
|
"logps/rejected": -10.884133338928223, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -10.784479141235352, |
|
"rewards/margins": 0.09965618699789047, |
|
"rewards/rejected": -10.884133338928223, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 0.025471289629283092, |
|
"learning_rate": 3.707276806528282e-07, |
|
"logits/chosen": -1.7505123615264893, |
|
"logits/rejected": -1.726243257522583, |
|
"logps/chosen": -10.95793342590332, |
|
"logps/rejected": -10.815945625305176, |
|
"loss": -0.0004, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -10.95793342590332, |
|
"rewards/margins": -0.1419888436794281, |
|
"rewards/rejected": -10.815945625305176, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 0.041173650635296204, |
|
"learning_rate": 3.6000770813281334e-07, |
|
"logits/chosen": -1.7271692752838135, |
|
"logits/rejected": -1.7334954738616943, |
|
"logps/chosen": -10.472602844238281, |
|
"logps/rejected": -10.564123153686523, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -10.472602844238281, |
|
"rewards/margins": 0.09151904284954071, |
|
"rewards/rejected": -10.564123153686523, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 0.08997133666429326, |
|
"learning_rate": 3.4920729345930654e-07, |
|
"logits/chosen": -1.7182731628417969, |
|
"logits/rejected": -1.7184534072875977, |
|
"logps/chosen": -10.474161148071289, |
|
"logps/rejected": -10.386842727661133, |
|
"loss": -0.0002, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -10.474161148071289, |
|
"rewards/margins": -0.08731891214847565, |
|
"rewards/rejected": -10.386842727661133, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 0.1394130468386245, |
|
"learning_rate": 3.383409149158814e-07, |
|
"logits/chosen": -1.7007522583007812, |
|
"logits/rejected": -1.7036033868789673, |
|
"logps/chosen": -10.160537719726562, |
|
"logps/rejected": -10.112937927246094, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -10.160537719726562, |
|
"rewards/margins": -0.047600626945495605, |
|
"rewards/rejected": -10.112937927246094, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 0.020206583655585347, |
|
"learning_rate": 3.2742313921268035e-07, |
|
"logits/chosen": -1.6959460973739624, |
|
"logits/rejected": -1.6945714950561523, |
|
"logps/chosen": -10.04688549041748, |
|
"logps/rejected": -10.157293319702148, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -10.04688549041748, |
|
"rewards/margins": 0.11040810495615005, |
|
"rewards/rejected": -10.157293319702148, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 0.02211534123468387, |
|
"learning_rate": 3.1646860195929825e-07, |
|
"logits/chosen": -1.7011514902114868, |
|
"logits/rejected": -1.6924482583999634, |
|
"logps/chosen": -10.177471160888672, |
|
"logps/rejected": -10.2760648727417, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -10.177471160888672, |
|
"rewards/margins": 0.09859313070774078, |
|
"rewards/rejected": -10.2760648727417, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 0.01462304817094237, |
|
"learning_rate": 3.054919880453032e-07, |
|
"logits/chosen": -1.635061264038086, |
|
"logits/rejected": -1.6313447952270508, |
|
"logps/chosen": -10.064990043640137, |
|
"logps/rejected": -10.172506332397461, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -10.064990043640137, |
|
"rewards/margins": 0.10751698166131973, |
|
"rewards/rejected": -10.172506332397461, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 0.025471797170515387, |
|
"learning_rate": 2.9450801195469686e-07, |
|
"logits/chosen": -1.705881118774414, |
|
"logits/rejected": -1.7156155109405518, |
|
"logps/chosen": -10.338391304016113, |
|
"logps/rejected": -10.457733154296875, |
|
"loss": -0.0001, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -10.338391304016113, |
|
"rewards/margins": 0.11934256553649902, |
|
"rewards/rejected": -10.457733154296875, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 0.12522976970659572, |
|
"learning_rate": 2.835313980407017e-07, |
|
"logits/chosen": -1.7283920049667358, |
|
"logits/rejected": -1.693943977355957, |
|
"logps/chosen": -10.727476119995117, |
|
"logps/rejected": -10.667070388793945, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -10.727476119995117, |
|
"rewards/margins": -0.06040619686245918, |
|
"rewards/rejected": -10.667070388793945, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 0.054630239668369114, |
|
"learning_rate": 2.7257686078731973e-07, |
|
"logits/chosen": -1.7700306177139282, |
|
"logits/rejected": -1.7575089931488037, |
|
"logps/chosen": -10.57455825805664, |
|
"logps/rejected": -10.53483772277832, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -10.57455825805664, |
|
"rewards/margins": -0.039720237255096436, |
|
"rewards/rejected": -10.53483772277832, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 0.01001763169581052, |
|
"learning_rate": 2.6165908508411857e-07, |
|
"logits/chosen": -1.7749627828598022, |
|
"logits/rejected": -1.7755094766616821, |
|
"logps/chosen": -10.61900806427002, |
|
"logps/rejected": -10.749244689941406, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -10.61900806427002, |
|
"rewards/margins": 0.13023611903190613, |
|
"rewards/rejected": -10.749244689941406, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 0.005742953865366501, |
|
"learning_rate": 2.5079270654069354e-07, |
|
"logits/chosen": -1.7149406671524048, |
|
"logits/rejected": -1.7182254791259766, |
|
"logps/chosen": -10.701220512390137, |
|
"logps/rejected": -10.742330551147461, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -10.701220512390137, |
|
"rewards/margins": 0.04111091420054436, |
|
"rewards/rejected": -10.742330551147461, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 0.07672060396160495, |
|
"learning_rate": 2.399922918671867e-07, |
|
"logits/chosen": -1.7587283849716187, |
|
"logits/rejected": -1.765321969985962, |
|
"logps/chosen": -10.592453956604004, |
|
"logps/rejected": -10.729036331176758, |
|
"loss": -0.0002, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -10.592453956604004, |
|
"rewards/margins": 0.13658304512500763, |
|
"rewards/rejected": -10.729036331176758, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 0.02428213775826746, |
|
"learning_rate": 2.2927231934717176e-07, |
|
"logits/chosen": -1.7775872945785522, |
|
"logits/rejected": -1.7704694271087646, |
|
"logps/chosen": -10.622035026550293, |
|
"logps/rejected": -10.568597793579102, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -10.622035026550293, |
|
"rewards/margins": -0.053437769412994385, |
|
"rewards/rejected": -10.568597793579102, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 0.02860916331635737, |
|
"learning_rate": 2.1864715942907487e-07, |
|
"logits/chosen": -1.7398102283477783, |
|
"logits/rejected": -1.73589289188385, |
|
"logps/chosen": -10.513009071350098, |
|
"logps/rejected": -10.603796005249023, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -10.513009071350098, |
|
"rewards/margins": 0.09078951179981232, |
|
"rewards/rejected": -10.603796005249023, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 0.006648402581775306, |
|
"learning_rate": 2.081310554621522e-07, |
|
"logits/chosen": -1.7437083721160889, |
|
"logits/rejected": -1.7521770000457764, |
|
"logps/chosen": -10.70260238647461, |
|
"logps/rejected": -10.796627044677734, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -10.70260238647461, |
|
"rewards/margins": 0.09402483701705933, |
|
"rewards/rejected": -10.796627044677734, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 0.019271183476255463, |
|
"learning_rate": 1.9773810460284862e-07, |
|
"logits/chosen": -1.7898304462432861, |
|
"logits/rejected": -1.788010835647583, |
|
"logps/chosen": -10.732610702514648, |
|
"logps/rejected": -10.593297958374023, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -10.732610702514648, |
|
"rewards/margins": -0.1393119990825653, |
|
"rewards/rejected": -10.593297958374023, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 0.02906299326320322, |
|
"learning_rate": 1.874822389171314e-07, |
|
"logits/chosen": -1.7548414468765259, |
|
"logits/rejected": -1.7560005187988281, |
|
"logps/chosen": -10.635225296020508, |
|
"logps/rejected": -10.656919479370117, |
|
"loss": -0.0007, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -10.635225296020508, |
|
"rewards/margins": 0.02169397473335266, |
|
"rewards/rejected": -10.656919479370117, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 0.014440585705017344, |
|
"learning_rate": 1.7737720670413356e-07, |
|
"logits/chosen": -1.796992540359497, |
|
"logits/rejected": -1.7862850427627563, |
|
"logps/chosen": -10.90754508972168, |
|
"logps/rejected": -10.712991714477539, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -10.90754508972168, |
|
"rewards/margins": -0.19455406069755554, |
|
"rewards/rejected": -10.712991714477539, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 0.04236411566791533, |
|
"learning_rate": 1.6743655406614095e-07, |
|
"logits/chosen": -1.7757809162139893, |
|
"logits/rejected": -1.762711524963379, |
|
"logps/chosen": -10.787301063537598, |
|
"logps/rejected": -10.89518928527832, |
|
"loss": -0.0002, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -10.787301063537598, |
|
"rewards/margins": 0.10788729041814804, |
|
"rewards/rejected": -10.89518928527832, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 0.027523703126398503, |
|
"learning_rate": 1.5767360674963198e-07, |
|
"logits/chosen": -1.749119520187378, |
|
"logits/rejected": -1.7414264678955078, |
|
"logps/chosen": -10.739618301391602, |
|
"logps/rejected": -10.675132751464844, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -10.739618301391602, |
|
"rewards/margins": -0.06448470056056976, |
|
"rewards/rejected": -10.675132751464844, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 0.014789739092952364, |
|
"learning_rate": 1.4810145228170922e-07, |
|
"logits/chosen": -1.7625138759613037, |
|
"logits/rejected": -1.7610912322998047, |
|
"logps/chosen": -10.482194900512695, |
|
"logps/rejected": -10.562524795532227, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -10.482194900512695, |
|
"rewards/margins": 0.0803314819931984, |
|
"rewards/rejected": -10.562524795532227, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 0.01922207578951387, |
|
"learning_rate": 1.3873292242587306e-07, |
|
"logits/chosen": -1.7635924816131592, |
|
"logits/rejected": -1.7583682537078857, |
|
"logps/chosen": -10.654876708984375, |
|
"logps/rejected": -10.662198066711426, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -10.654876708984375, |
|
"rewards/margins": 0.0073205530643463135, |
|
"rewards/rejected": -10.662198066711426, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 0.03539872090887725, |
|
"learning_rate": 1.295805759806533e-07, |
|
"logits/chosen": -1.7667583227157593, |
|
"logits/rejected": -1.7786306142807007, |
|
"logps/chosen": -10.630470275878906, |
|
"logps/rejected": -10.790535926818848, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -10.630470275878906, |
|
"rewards/margins": 0.16006465256214142, |
|
"rewards/rejected": -10.790535926818848, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 0.022967566874532427, |
|
"learning_rate": 1.2065668194415777e-07, |
|
"logits/chosen": -1.7326412200927734, |
|
"logits/rejected": -1.729612946510315, |
|
"logps/chosen": -10.603561401367188, |
|
"logps/rejected": -10.510968208312988, |
|
"loss": -0.0001, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -10.603561401367188, |
|
"rewards/margins": -0.09259293973445892, |
|
"rewards/rejected": -10.510968208312988, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 0.012752562963674085, |
|
"learning_rate": 1.1197320306710923e-07, |
|
"logits/chosen": -1.762995958328247, |
|
"logits/rejected": -1.743704080581665, |
|
"logps/chosen": -10.666765213012695, |
|
"logps/rejected": -10.419971466064453, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -10.666765213012695, |
|
"rewards/margins": -0.24679343402385712, |
|
"rewards/rejected": -10.419971466064453, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 0.009738702402905076, |
|
"learning_rate": 1.035417798164145e-07, |
|
"logits/chosen": -1.747469186782837, |
|
"logits/rejected": -1.739599585533142, |
|
"logps/chosen": -10.463155746459961, |
|
"logps/rejected": -10.498678207397461, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -10.463155746459961, |
|
"rewards/margins": 0.03552195057272911, |
|
"rewards/rejected": -10.498678207397461, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 0.06431835444147056, |
|
"learning_rate": 9.537371477076535e-08, |
|
"logits/chosen": -1.7092878818511963, |
|
"logits/rejected": -1.6988884210586548, |
|
"logps/chosen": -10.540400505065918, |
|
"logps/rejected": -10.477300643920898, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -10.540400505065918, |
|
"rewards/margins": -0.06309934705495834, |
|
"rewards/rejected": -10.477300643920898, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 0.052909632347048964, |
|
"learning_rate": 8.747995746918898e-08, |
|
"logits/chosen": -1.7701542377471924, |
|
"logits/rejected": -1.758774757385254, |
|
"logps/chosen": -10.620153427124023, |
|
"logps/rejected": -10.608236312866211, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -10.620153427124023, |
|
"rewards/margins": -0.011916184797883034, |
|
"rewards/rejected": -10.608236312866211, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 0.012450945922480083, |
|
"learning_rate": 7.987108973285888e-08, |
|
"logits/chosen": -1.7810709476470947, |
|
"logits/rejected": -1.7817420959472656, |
|
"logps/chosen": -10.626587867736816, |
|
"logps/rejected": -10.649564743041992, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -10.626587867736816, |
|
"rewards/margins": 0.022976160049438477, |
|
"rewards/rejected": -10.649564743041992, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 0.004460244436581798, |
|
"learning_rate": 7.255731147984174e-08, |
|
"logits/chosen": -1.7550201416015625, |
|
"logits/rejected": -1.7136766910552979, |
|
"logps/chosen": -10.767892837524414, |
|
"logps/rejected": -10.837907791137695, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -10.767892837524414, |
|
"rewards/margins": 0.0700153112411499, |
|
"rewards/rejected": -10.837907791137695, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 0.019593651768819848, |
|
"learning_rate": 6.554842705179898e-08, |
|
"logits/chosen": -1.778969407081604, |
|
"logits/rejected": -1.75950026512146, |
|
"logps/chosen": -10.800111770629883, |
|
"logps/rejected": -10.750751495361328, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -10.800111770629883, |
|
"rewards/margins": -0.04935937374830246, |
|
"rewards/rejected": -10.750751495361328, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 0.03689590756531894, |
|
"learning_rate": 5.885383207096832e-08, |
|
"logits/chosen": -1.78683602809906, |
|
"logits/rejected": -1.7717739343643188, |
|
"logps/chosen": -10.598950386047363, |
|
"logps/rejected": -10.57533073425293, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -10.598950386047363, |
|
"rewards/margins": -0.02361871860921383, |
|
"rewards/rejected": -10.57533073425293, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 0.012548371625903876, |
|
"learning_rate": 5.2482500845047165e-08, |
|
"logits/chosen": -1.743941307067871, |
|
"logits/rejected": -1.7405914068222046, |
|
"logps/chosen": -10.441683769226074, |
|
"logps/rejected": -10.442001342773438, |
|
"loss": -0.0005, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -10.441683769226074, |
|
"rewards/margins": 0.00031833647517487407, |
|
"rewards/rejected": -10.442001342773438, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 0.013565529452432605, |
|
"learning_rate": 4.644297433686162e-08, |
|
"logits/chosen": -1.741358757019043, |
|
"logits/rejected": -1.7224204540252686, |
|
"logps/chosen": -10.534662246704102, |
|
"logps/rejected": -10.434551239013672, |
|
"loss": -0.0002, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -10.534662246704102, |
|
"rewards/margins": -0.10010989010334015, |
|
"rewards/rejected": -10.434551239013672, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 0.10515870132328753, |
|
"learning_rate": 4.074334871494558e-08, |
|
"logits/chosen": -1.7736488580703735, |
|
"logits/rejected": -1.7777044773101807, |
|
"logps/chosen": -10.679037094116211, |
|
"logps/rejected": -10.682011604309082, |
|
"loss": -0.0002, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -10.679037094116211, |
|
"rewards/margins": 0.002974617527797818, |
|
"rewards/rejected": -10.682011604309082, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 0.009978883732705937, |
|
"learning_rate": 3.5391264500382e-08, |
|
"logits/chosen": -1.774903655052185, |
|
"logits/rejected": -1.7690646648406982, |
|
"logps/chosen": -10.4728364944458, |
|
"logps/rejected": -10.374021530151367, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -10.4728364944458, |
|
"rewards/margins": -0.09881408512592316, |
|
"rewards/rejected": -10.374021530151367, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 0.006390665968550676, |
|
"learning_rate": 3.0393896324452226e-08, |
|
"logits/chosen": -1.7874574661254883, |
|
"logits/rejected": -1.7859035730361938, |
|
"logps/chosen": -10.748700141906738, |
|
"logps/rejected": -10.698121070861816, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -10.748700141906738, |
|
"rewards/margins": -0.050578661262989044, |
|
"rewards/rejected": -10.698121070861816, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 0.0968516068570453, |
|
"learning_rate": 2.5757943310825026e-08, |
|
"logits/chosen": -1.7406418323516846, |
|
"logits/rejected": -1.7307815551757812, |
|
"logps/chosen": -10.560035705566406, |
|
"logps/rejected": -10.44668960571289, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -10.560035705566406, |
|
"rewards/margins": -0.11334645748138428, |
|
"rewards/rejected": -10.44668960571289, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 0.00590008646483764, |
|
"learning_rate": 2.148962009517823e-08, |
|
"logits/chosen": -1.7577073574066162, |
|
"logits/rejected": -1.7527620792388916, |
|
"logps/chosen": -10.674304008483887, |
|
"logps/rejected": -10.642423629760742, |
|
"loss": 0.0, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -10.674304008483887, |
|
"rewards/margins": -0.03188156336545944, |
|
"rewards/rejected": -10.642423629760742, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 0.013120062560596776, |
|
"learning_rate": 1.759464849429082e-08, |
|
"logits/chosen": -1.7664194107055664, |
|
"logits/rejected": -1.760266900062561, |
|
"logps/chosen": -10.494427680969238, |
|
"logps/rejected": -10.483784675598145, |
|
"loss": -0.0002, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -10.494427680969238, |
|
"rewards/margins": -0.010644030757248402, |
|
"rewards/rejected": -10.483784675598145, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 0.1742770180554167, |
|
"learning_rate": 1.4078249835774169e-08, |
|
"logits/chosen": -1.778710961341858, |
|
"logits/rejected": -1.7815498113632202, |
|
"logps/chosen": -10.63943862915039, |
|
"logps/rejected": -10.601329803466797, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -10.63943862915039, |
|
"rewards/margins": -0.03810780122876167, |
|
"rewards/rejected": -10.601329803466797, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 0.0404689579696443, |
|
"learning_rate": 1.0945137958723705e-08, |
|
"logits/chosen": -1.7349185943603516, |
|
"logits/rejected": -1.730891227722168, |
|
"logps/chosen": -10.740056991577148, |
|
"logps/rejected": -10.685806274414062, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -10.740056991577148, |
|
"rewards/margins": -0.054250918328762054, |
|
"rewards/rejected": -10.685806274414062, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 0.09262486522903134, |
|
"learning_rate": 8.19951289467482e-09, |
|
"logits/chosen": -1.7872415781021118, |
|
"logits/rejected": -1.776834487915039, |
|
"logps/chosen": -10.619129180908203, |
|
"logps/rejected": -10.654691696166992, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -10.619129180908203, |
|
"rewards/margins": 0.03556279465556145, |
|
"rewards/rejected": -10.654691696166992, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 0.02488581461219639, |
|
"learning_rate": 5.84505523733293e-09, |
|
"logits/chosen": -1.7146650552749634, |
|
"logits/rejected": -1.6959879398345947, |
|
"logps/chosen": -10.7011137008667, |
|
"logps/rejected": -10.602450370788574, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -10.7011137008667, |
|
"rewards/margins": -0.09866499900817871, |
|
"rewards/rejected": -10.602450370788574, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 0.09420372269289778, |
|
"learning_rate": 3.8849212086261466e-09, |
|
"logits/chosen": -1.7629196643829346, |
|
"logits/rejected": -1.7624677419662476, |
|
"logps/chosen": -10.534662246704102, |
|
"logps/rejected": -10.774109840393066, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -10.534662246704102, |
|
"rewards/margins": 0.23944692313671112, |
|
"rewards/rejected": -10.774109840393066, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 0.012886454137460854, |
|
"learning_rate": 2.3217384276938756e-09, |
|
"logits/chosen": -1.7755534648895264, |
|
"logits/rejected": -1.7767679691314697, |
|
"logps/chosen": -10.819222450256348, |
|
"logps/rejected": -10.87352180480957, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -10.819222450256348, |
|
"rewards/margins": 0.05429871007800102, |
|
"rewards/rejected": -10.87352180480957, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 0.12756283914628896, |
|
"learning_rate": 1.1576023884836472e-09, |
|
"logits/chosen": -1.783143401145935, |
|
"logits/rejected": -1.7813498973846436, |
|
"logps/chosen": -10.936141967773438, |
|
"logps/rejected": -10.884490013122559, |
|
"loss": -0.0003, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -10.936141967773438, |
|
"rewards/margins": -0.051651883870363235, |
|
"rewards/rejected": -10.884490013122559, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 0.03220532320786913, |
|
"learning_rate": 3.940736506780395e-10, |
|
"logits/chosen": -1.7732326984405518, |
|
"logits/rejected": -1.7703964710235596, |
|
"logps/chosen": -10.523382186889648, |
|
"logps/rejected": -10.703128814697266, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -10.523382186889648, |
|
"rewards/margins": 0.17974743247032166, |
|
"rewards/rejected": -10.703128814697266, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 0.015956952957413534, |
|
"learning_rate": 3.2175747716822744e-11, |
|
"logits/chosen": -1.7491505146026611, |
|
"logits/rejected": -1.731268286705017, |
|
"logps/chosen": -10.880552291870117, |
|
"logps/rejected": -10.799489974975586, |
|
"loss": -0.0, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -10.880552291870117, |
|
"rewards/margins": -0.08106319606304169, |
|
"rewards/rejected": -10.799489974975586, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.998691442030882, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 4.822632607948269e-06, |
|
"train_runtime": 8193.4913, |
|
"train_samples_per_second": 7.461, |
|
"train_steps_per_second": 0.058 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|