|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9956659924877203, |
|
"eval_steps": 500, |
|
"global_step": 1296, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.023114706732158336, |
|
"grad_norm": 66.88623809814453, |
|
"learning_rate": 7.692307692307692e-08, |
|
"logits/chosen": -0.3347979187965393, |
|
"logits/rejected": -0.31468525528907776, |
|
"logps/chosen": -269.48956298828125, |
|
"logps/rejected": -267.7704162597656, |
|
"loss": 2.6159, |
|
"nll_loss": 0.7417957186698914, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -26.94895362854004, |
|
"rewards/margins": -0.1719130575656891, |
|
"rewards/rejected": -26.777042388916016, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04622941346431667, |
|
"grad_norm": 57.376678466796875, |
|
"learning_rate": 1.5384615384615385e-07, |
|
"logits/chosen": -0.3397385776042938, |
|
"logits/rejected": -0.3220021724700928, |
|
"logps/chosen": -263.6553039550781, |
|
"logps/rejected": -270.1563415527344, |
|
"loss": 2.5433, |
|
"nll_loss": 0.7268518805503845, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -26.365528106689453, |
|
"rewards/margins": 0.650107741355896, |
|
"rewards/rejected": -27.015636444091797, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06934412019647501, |
|
"grad_norm": 61.90145492553711, |
|
"learning_rate": 2.3076923076923078e-07, |
|
"logits/chosen": -0.283037930727005, |
|
"logits/rejected": -0.2716462016105652, |
|
"logps/chosen": -260.65875244140625, |
|
"logps/rejected": -264.00567626953125, |
|
"loss": 2.4476, |
|
"nll_loss": 0.7402013540267944, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -26.065876007080078, |
|
"rewards/margins": 0.33469510078430176, |
|
"rewards/rejected": -26.40056800842285, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09245882692863334, |
|
"grad_norm": 50.3497428894043, |
|
"learning_rate": 3.076923076923077e-07, |
|
"logits/chosen": -0.3415708541870117, |
|
"logits/rejected": -0.33246317505836487, |
|
"logps/chosen": -246.55178833007812, |
|
"logps/rejected": -248.4236297607422, |
|
"loss": 2.377, |
|
"nll_loss": 0.748863697052002, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -24.655179977416992, |
|
"rewards/margins": 0.18718396127223969, |
|
"rewards/rejected": -24.842365264892578, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11557353366079168, |
|
"grad_norm": 49.670677185058594, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -0.408900648355484, |
|
"logits/rejected": -0.38666287064552307, |
|
"logps/chosen": -251.69113159179688, |
|
"logps/rejected": -250.1224822998047, |
|
"loss": 2.5031, |
|
"nll_loss": 0.6844735145568848, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -25.169116973876953, |
|
"rewards/margins": -0.15686817467212677, |
|
"rewards/rejected": -25.01224708557129, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13868824039295002, |
|
"grad_norm": 56.84200668334961, |
|
"learning_rate": 4.6153846153846156e-07, |
|
"logits/chosen": -0.5338214635848999, |
|
"logits/rejected": -0.5284141302108765, |
|
"logps/chosen": -228.7010498046875, |
|
"logps/rejected": -227.7324676513672, |
|
"loss": 2.3944, |
|
"nll_loss": 0.6640177965164185, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -22.870105743408203, |
|
"rewards/margins": -0.09685804694890976, |
|
"rewards/rejected": -22.77324867248535, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16180294712510834, |
|
"grad_norm": 54.86004638671875, |
|
"learning_rate": 5.384615384615384e-07, |
|
"logits/chosen": -0.7272528409957886, |
|
"logits/rejected": -0.7068361639976501, |
|
"logps/chosen": -207.84902954101562, |
|
"logps/rejected": -207.88827514648438, |
|
"loss": 2.3592, |
|
"nll_loss": 0.6152782440185547, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -20.78490447998047, |
|
"rewards/margins": 0.0039252908900380135, |
|
"rewards/rejected": -20.788829803466797, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1849176538572667, |
|
"grad_norm": 51.230716705322266, |
|
"learning_rate": 6.153846153846154e-07, |
|
"logits/chosen": -0.8364453315734863, |
|
"logits/rejected": -0.8120291829109192, |
|
"logps/chosen": -197.1513671875, |
|
"logps/rejected": -198.78500366210938, |
|
"loss": 2.1038, |
|
"nll_loss": 0.5486661195755005, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -19.715137481689453, |
|
"rewards/margins": 0.16336408257484436, |
|
"rewards/rejected": -19.87849998474121, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.208032360589425, |
|
"grad_norm": 49.48822784423828, |
|
"learning_rate": 6.923076923076922e-07, |
|
"logits/chosen": -0.6982623338699341, |
|
"logits/rejected": -0.6783713698387146, |
|
"logps/chosen": -170.0928497314453, |
|
"logps/rejected": -176.90493774414062, |
|
"loss": 1.9408, |
|
"nll_loss": 0.47356802225112915, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -17.00928497314453, |
|
"rewards/margins": 0.6812085509300232, |
|
"rewards/rejected": -17.690494537353516, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23114706732158335, |
|
"grad_norm": 51.29805374145508, |
|
"learning_rate": 7.692307692307693e-07, |
|
"logits/chosen": -0.5311590433120728, |
|
"logits/rejected": -0.5080990195274353, |
|
"logps/chosen": -162.14718627929688, |
|
"logps/rejected": -163.9295196533203, |
|
"loss": 2.045, |
|
"nll_loss": 0.44545722007751465, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -16.214717864990234, |
|
"rewards/margins": 0.17823390662670135, |
|
"rewards/rejected": -16.3929500579834, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2542617740537417, |
|
"grad_norm": 48.0262565612793, |
|
"learning_rate": 8.461538461538461e-07, |
|
"logits/chosen": -0.40089306235313416, |
|
"logits/rejected": -0.37224632501602173, |
|
"logps/chosen": -149.81529235839844, |
|
"logps/rejected": -153.31297302246094, |
|
"loss": 1.8155, |
|
"nll_loss": 0.41241997480392456, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -14.981531143188477, |
|
"rewards/margins": 0.3497660458087921, |
|
"rewards/rejected": -15.331296920776367, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27737648078590005, |
|
"grad_norm": 47.4226188659668, |
|
"learning_rate": 9.230769230769231e-07, |
|
"logits/chosen": -0.3508472144603729, |
|
"logits/rejected": -0.3243268132209778, |
|
"logps/chosen": -155.70640563964844, |
|
"logps/rejected": -155.49754333496094, |
|
"loss": 1.8406, |
|
"nll_loss": 0.42402368783950806, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -15.570638656616211, |
|
"rewards/margins": -0.020885681733489037, |
|
"rewards/rejected": -15.549756050109863, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.30049118751805837, |
|
"grad_norm": 55.037635803222656, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.39025238156318665, |
|
"logits/rejected": -0.36617571115493774, |
|
"logps/chosen": -152.71519470214844, |
|
"logps/rejected": -160.30941772460938, |
|
"loss": 1.8955, |
|
"nll_loss": 0.42156749963760376, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -15.271520614624023, |
|
"rewards/margins": 0.7594239711761475, |
|
"rewards/rejected": -16.030942916870117, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3236058942502167, |
|
"grad_norm": 59.5285530090332, |
|
"learning_rate": 9.914236706689536e-07, |
|
"logits/chosen": -0.4246004521846771, |
|
"logits/rejected": -0.4167487621307373, |
|
"logps/chosen": -142.45510864257812, |
|
"logps/rejected": -147.0147247314453, |
|
"loss": 1.8864, |
|
"nll_loss": 0.40775737166404724, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -14.245511054992676, |
|
"rewards/margins": 0.4559602737426758, |
|
"rewards/rejected": -14.701472282409668, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34672060098237506, |
|
"grad_norm": 65.72272491455078, |
|
"learning_rate": 9.828473413379073e-07, |
|
"logits/chosen": -0.41359296441078186, |
|
"logits/rejected": -0.4051112234592438, |
|
"logps/chosen": -144.20643615722656, |
|
"logps/rejected": -148.5088653564453, |
|
"loss": 1.9229, |
|
"nll_loss": 0.41948890686035156, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -14.420641899108887, |
|
"rewards/margins": 0.43024420738220215, |
|
"rewards/rejected": -14.850886344909668, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3698353077145334, |
|
"grad_norm": 44.7535400390625, |
|
"learning_rate": 9.74271012006861e-07, |
|
"logits/chosen": -0.4151730537414551, |
|
"logits/rejected": -0.3891311287879944, |
|
"logps/chosen": -159.87423706054688, |
|
"logps/rejected": -162.4603729248047, |
|
"loss": 1.7633, |
|
"nll_loss": 0.4239375591278076, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -15.987421989440918, |
|
"rewards/margins": 0.25861597061157227, |
|
"rewards/rejected": -16.246036529541016, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3929500144466917, |
|
"grad_norm": 54.54914093017578, |
|
"learning_rate": 9.656946826758147e-07, |
|
"logits/chosen": -0.4074994921684265, |
|
"logits/rejected": -0.39586925506591797, |
|
"logps/chosen": -152.30300903320312, |
|
"logps/rejected": -158.6874237060547, |
|
"loss": 1.6418, |
|
"nll_loss": 0.4169434607028961, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -15.230302810668945, |
|
"rewards/margins": 0.6384423971176147, |
|
"rewards/rejected": -15.868745803833008, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41606472117885, |
|
"grad_norm": 50.04910659790039, |
|
"learning_rate": 9.571183533447683e-07, |
|
"logits/chosen": -0.35864660143852234, |
|
"logits/rejected": -0.3550480604171753, |
|
"logps/chosen": -148.34335327148438, |
|
"logps/rejected": -154.14390563964844, |
|
"loss": 1.6776, |
|
"nll_loss": 0.4253178536891937, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -14.83433723449707, |
|
"rewards/margins": 0.5800559520721436, |
|
"rewards/rejected": -15.414392471313477, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4391794279110084, |
|
"grad_norm": 50.81975555419922, |
|
"learning_rate": 9.485420240137221e-07, |
|
"logits/chosen": -0.369549036026001, |
|
"logits/rejected": -0.35568320751190186, |
|
"logps/chosen": -158.10638427734375, |
|
"logps/rejected": -163.52792358398438, |
|
"loss": 1.7469, |
|
"nll_loss": 0.43363142013549805, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -15.810638427734375, |
|
"rewards/margins": 0.5421568155288696, |
|
"rewards/rejected": -16.352794647216797, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4622941346431667, |
|
"grad_norm": 45.42930221557617, |
|
"learning_rate": 9.399656946826757e-07, |
|
"logits/chosen": -0.3367421329021454, |
|
"logits/rejected": -0.3279821276664734, |
|
"logps/chosen": -152.1122589111328, |
|
"logps/rejected": -158.53919982910156, |
|
"loss": 1.7332, |
|
"nll_loss": 0.4192579686641693, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -15.211225509643555, |
|
"rewards/margins": 0.6426929235458374, |
|
"rewards/rejected": -15.853918075561523, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48540884137532503, |
|
"grad_norm": 56.53390884399414, |
|
"learning_rate": 9.313893653516295e-07, |
|
"logits/chosen": -0.38974902033805847, |
|
"logits/rejected": -0.3610435128211975, |
|
"logps/chosen": -150.60289001464844, |
|
"logps/rejected": -152.52676391601562, |
|
"loss": 1.68, |
|
"nll_loss": 0.4199536442756653, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -15.06028938293457, |
|
"rewards/margins": 0.19238761067390442, |
|
"rewards/rejected": -15.252676010131836, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5085235481074833, |
|
"grad_norm": 51.94132995605469, |
|
"learning_rate": 9.228130360205832e-07, |
|
"logits/chosen": -0.4055999219417572, |
|
"logits/rejected": -0.3751467168331146, |
|
"logps/chosen": -166.01934814453125, |
|
"logps/rejected": -171.43978881835938, |
|
"loss": 1.7614, |
|
"nll_loss": 0.44684529304504395, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -16.6019344329834, |
|
"rewards/margins": 0.5420453548431396, |
|
"rewards/rejected": -17.143980026245117, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5316382548396418, |
|
"grad_norm": 47.48794174194336, |
|
"learning_rate": 9.142367066895368e-07, |
|
"logits/chosen": -0.4172639846801758, |
|
"logits/rejected": -0.39604124426841736, |
|
"logps/chosen": -161.71559143066406, |
|
"logps/rejected": -167.9268341064453, |
|
"loss": 1.7662, |
|
"nll_loss": 0.43918871879577637, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -16.171558380126953, |
|
"rewards/margins": 0.6211244463920593, |
|
"rewards/rejected": -16.792682647705078, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5547529615718001, |
|
"grad_norm": 47.16508865356445, |
|
"learning_rate": 9.056603773584905e-07, |
|
"logits/chosen": -0.4093747138977051, |
|
"logits/rejected": -0.39282265305519104, |
|
"logps/chosen": -160.66146850585938, |
|
"logps/rejected": -166.21859741210938, |
|
"loss": 1.5991, |
|
"nll_loss": 0.4364839196205139, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -16.06614875793457, |
|
"rewards/margins": 0.5557123422622681, |
|
"rewards/rejected": -16.62186050415039, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5778676683039584, |
|
"grad_norm": 55.471988677978516, |
|
"learning_rate": 8.970840480274442e-07, |
|
"logits/chosen": -0.3893429636955261, |
|
"logits/rejected": -0.371797651052475, |
|
"logps/chosen": -157.74957275390625, |
|
"logps/rejected": -160.80726623535156, |
|
"loss": 1.7402, |
|
"nll_loss": 0.41964635252952576, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -15.774957656860352, |
|
"rewards/margins": 0.3057698607444763, |
|
"rewards/rejected": -16.08072853088379, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6009823750361167, |
|
"grad_norm": 51.49521255493164, |
|
"learning_rate": 8.88507718696398e-07, |
|
"logits/chosen": -0.508994460105896, |
|
"logits/rejected": -0.49335479736328125, |
|
"logps/chosen": -152.62657165527344, |
|
"logps/rejected": -161.82626342773438, |
|
"loss": 1.5932, |
|
"nll_loss": 0.4252961277961731, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -15.262657165527344, |
|
"rewards/margins": 0.919969379901886, |
|
"rewards/rejected": -16.182626724243164, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.624097081768275, |
|
"grad_norm": 42.050086975097656, |
|
"learning_rate": 8.799313893653516e-07, |
|
"logits/chosen": -0.5146248936653137, |
|
"logits/rejected": -0.5052975416183472, |
|
"logps/chosen": -153.78634643554688, |
|
"logps/rejected": -161.01736450195312, |
|
"loss": 1.6238, |
|
"nll_loss": 0.4203058183193207, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -15.378637313842773, |
|
"rewards/margins": 0.723101019859314, |
|
"rewards/rejected": -16.10173797607422, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6472117885004334, |
|
"grad_norm": 47.4835205078125, |
|
"learning_rate": 8.713550600343052e-07, |
|
"logits/chosen": -0.45883655548095703, |
|
"logits/rejected": -0.4298950135707855, |
|
"logps/chosen": -156.53761291503906, |
|
"logps/rejected": -165.87417602539062, |
|
"loss": 1.611, |
|
"nll_loss": 0.4291974604129791, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -15.65376091003418, |
|
"rewards/margins": 0.9336563944816589, |
|
"rewards/rejected": -16.58741569519043, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6703264952325917, |
|
"grad_norm": 52.52881622314453, |
|
"learning_rate": 8.62778730703259e-07, |
|
"logits/chosen": -0.5712844133377075, |
|
"logits/rejected": -0.5348969101905823, |
|
"logps/chosen": -159.44664001464844, |
|
"logps/rejected": -164.33367919921875, |
|
"loss": 1.605, |
|
"nll_loss": 0.41903525590896606, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -15.944664001464844, |
|
"rewards/margins": 0.4887046813964844, |
|
"rewards/rejected": -16.433368682861328, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6934412019647501, |
|
"grad_norm": 40.79169464111328, |
|
"learning_rate": 8.542024013722127e-07, |
|
"logits/chosen": -0.4996193051338196, |
|
"logits/rejected": -0.4892319142818451, |
|
"logps/chosen": -161.40255737304688, |
|
"logps/rejected": -170.85952758789062, |
|
"loss": 1.6188, |
|
"nll_loss": 0.43107661604881287, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -16.140254974365234, |
|
"rewards/margins": 0.9456993341445923, |
|
"rewards/rejected": -17.085954666137695, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7165559086969084, |
|
"grad_norm": 46.80116653442383, |
|
"learning_rate": 8.456260720411664e-07, |
|
"logits/chosen": -0.5246766805648804, |
|
"logits/rejected": -0.5159187316894531, |
|
"logps/chosen": -166.08078002929688, |
|
"logps/rejected": -169.9657440185547, |
|
"loss": 1.6674, |
|
"nll_loss": 0.4305740296840668, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -16.60807991027832, |
|
"rewards/margins": 0.38849523663520813, |
|
"rewards/rejected": -16.99657440185547, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7396706154290668, |
|
"grad_norm": 44.07027053833008, |
|
"learning_rate": 8.3704974271012e-07, |
|
"logits/chosen": -0.5243274569511414, |
|
"logits/rejected": -0.5057517290115356, |
|
"logps/chosen": -159.46981811523438, |
|
"logps/rejected": -168.90744018554688, |
|
"loss": 1.6418, |
|
"nll_loss": 0.4369947910308838, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -15.946983337402344, |
|
"rewards/margins": 0.9437603950500488, |
|
"rewards/rejected": -16.890743255615234, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7627853221612251, |
|
"grad_norm": 47.953094482421875, |
|
"learning_rate": 8.284734133790737e-07, |
|
"logits/chosen": -0.5663309097290039, |
|
"logits/rejected": -0.5677110552787781, |
|
"logps/chosen": -157.61669921875, |
|
"logps/rejected": -165.10401916503906, |
|
"loss": 1.632, |
|
"nll_loss": 0.4376348853111267, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -15.761670112609863, |
|
"rewards/margins": 0.7487320303916931, |
|
"rewards/rejected": -16.51040267944336, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7859000288933834, |
|
"grad_norm": 49.6301383972168, |
|
"learning_rate": 8.198970840480274e-07, |
|
"logits/chosen": -0.6203452348709106, |
|
"logits/rejected": -0.6113607287406921, |
|
"logps/chosen": -163.147216796875, |
|
"logps/rejected": -169.24349975585938, |
|
"loss": 1.5495, |
|
"nll_loss": 0.44397497177124023, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -16.314722061157227, |
|
"rewards/margins": 0.6096271872520447, |
|
"rewards/rejected": -16.92435073852539, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8090147356255417, |
|
"grad_norm": 49.70254135131836, |
|
"learning_rate": 8.113207547169812e-07, |
|
"logits/chosen": -0.6143781542778015, |
|
"logits/rejected": -0.6132633090019226, |
|
"logps/chosen": -159.93780517578125, |
|
"logps/rejected": -169.04864501953125, |
|
"loss": 1.6389, |
|
"nll_loss": 0.41862422227859497, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -15.993780136108398, |
|
"rewards/margins": 0.9110851287841797, |
|
"rewards/rejected": -16.904865264892578, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8321294423577, |
|
"grad_norm": 53.936744689941406, |
|
"learning_rate": 8.027444253859347e-07, |
|
"logits/chosen": -0.5789452791213989, |
|
"logits/rejected": -0.5579988360404968, |
|
"logps/chosen": -163.99453735351562, |
|
"logps/rejected": -169.4527130126953, |
|
"loss": 1.6607, |
|
"nll_loss": 0.43801170587539673, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -16.39945411682129, |
|
"rewards/margins": 0.545818030834198, |
|
"rewards/rejected": -16.945270538330078, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8552441490898585, |
|
"grad_norm": 49.6528434753418, |
|
"learning_rate": 7.941680960548884e-07, |
|
"logits/chosen": -0.6104979515075684, |
|
"logits/rejected": -0.5787642002105713, |
|
"logps/chosen": -156.12814331054688, |
|
"logps/rejected": -166.38809204101562, |
|
"loss": 1.5211, |
|
"nll_loss": 0.4215327203273773, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -15.612813949584961, |
|
"rewards/margins": 1.0259974002838135, |
|
"rewards/rejected": -16.638811111450195, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8783588558220168, |
|
"grad_norm": 56.63078308105469, |
|
"learning_rate": 7.855917667238422e-07, |
|
"logits/chosen": -0.5714787840843201, |
|
"logits/rejected": -0.5493099093437195, |
|
"logps/chosen": -159.0956573486328, |
|
"logps/rejected": -167.24899291992188, |
|
"loss": 1.5505, |
|
"nll_loss": 0.44679298996925354, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -15.909564018249512, |
|
"rewards/margins": 0.8153331875801086, |
|
"rewards/rejected": -16.724897384643555, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9014735625541751, |
|
"grad_norm": 48.641788482666016, |
|
"learning_rate": 7.770154373927959e-07, |
|
"logits/chosen": -0.5352247357368469, |
|
"logits/rejected": -0.5140178799629211, |
|
"logps/chosen": -159.83766174316406, |
|
"logps/rejected": -170.78448486328125, |
|
"loss": 1.5767, |
|
"nll_loss": 0.4272715449333191, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -15.9837646484375, |
|
"rewards/margins": 1.0946825742721558, |
|
"rewards/rejected": -17.078449249267578, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9245882692863334, |
|
"grad_norm": 48.1425666809082, |
|
"learning_rate": 7.684391080617495e-07, |
|
"logits/chosen": -0.4901934564113617, |
|
"logits/rejected": -0.46460065245628357, |
|
"logps/chosen": -157.80355834960938, |
|
"logps/rejected": -165.04429626464844, |
|
"loss": 1.5406, |
|
"nll_loss": 0.4312973618507385, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": -15.780357360839844, |
|
"rewards/margins": 0.7240732908248901, |
|
"rewards/rejected": -16.504430770874023, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9477029760184917, |
|
"grad_norm": 49.3435173034668, |
|
"learning_rate": 7.598627787307032e-07, |
|
"logits/chosen": -0.46225637197494507, |
|
"logits/rejected": -0.45260077714920044, |
|
"logps/chosen": -149.41693115234375, |
|
"logps/rejected": -160.54165649414062, |
|
"loss": 1.5048, |
|
"nll_loss": 0.4266100823879242, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -14.941696166992188, |
|
"rewards/margins": 1.1124706268310547, |
|
"rewards/rejected": -16.05416488647461, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9708176827506501, |
|
"grad_norm": 44.590450286865234, |
|
"learning_rate": 7.512864493996569e-07, |
|
"logits/chosen": -0.557196855545044, |
|
"logits/rejected": -0.5299938321113586, |
|
"logps/chosen": -159.45950317382812, |
|
"logps/rejected": -168.58143615722656, |
|
"loss": 1.4956, |
|
"nll_loss": 0.4551734924316406, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -15.945948600769043, |
|
"rewards/margins": 0.9121924638748169, |
|
"rewards/rejected": -16.858142852783203, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9939323894828085, |
|
"grad_norm": 51.070491790771484, |
|
"learning_rate": 7.427101200686106e-07, |
|
"logits/chosen": -0.5334554314613342, |
|
"logits/rejected": -0.5415940284729004, |
|
"logps/chosen": -159.59457397460938, |
|
"logps/rejected": -169.33242797851562, |
|
"loss": 1.5867, |
|
"nll_loss": 0.4326634407043457, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -15.95945930480957, |
|
"rewards/margins": 0.973785400390625, |
|
"rewards/rejected": -16.933242797851562, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9985553308292401, |
|
"eval_logits/chosen": -0.5128109455108643, |
|
"eval_logits/rejected": -0.478252649307251, |
|
"eval_logps/chosen": -160.0940704345703, |
|
"eval_logps/rejected": -169.7456817626953, |
|
"eval_loss": 1.5247759819030762, |
|
"eval_nll_loss": 0.43732327222824097, |
|
"eval_rewards/accuracies": 0.658695638179779, |
|
"eval_rewards/chosen": -16.00940704345703, |
|
"eval_rewards/margins": 0.9651613831520081, |
|
"eval_rewards/rejected": -16.97456932067871, |
|
"eval_runtime": 74.3146, |
|
"eval_samples_per_second": 24.571, |
|
"eval_steps_per_second": 1.547, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.0170470962149667, |
|
"grad_norm": 31.19192886352539, |
|
"learning_rate": 7.341337907375643e-07, |
|
"logits/chosen": -0.5276386141777039, |
|
"logits/rejected": -0.53285151720047, |
|
"logps/chosen": -151.15855407714844, |
|
"logps/rejected": -171.3574676513672, |
|
"loss": 1.053, |
|
"nll_loss": 0.41920948028564453, |
|
"rewards/accuracies": 0.7906249761581421, |
|
"rewards/chosen": -15.115857124328613, |
|
"rewards/margins": 2.019890308380127, |
|
"rewards/rejected": -17.135746002197266, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0401618029471251, |
|
"grad_norm": 29.320512771606445, |
|
"learning_rate": 7.255574614065179e-07, |
|
"logits/chosen": -0.5006138682365417, |
|
"logits/rejected": -0.4941863417625427, |
|
"logps/chosen": -140.65113830566406, |
|
"logps/rejected": -162.8360137939453, |
|
"loss": 0.8732, |
|
"nll_loss": 0.39865946769714355, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -14.065114974975586, |
|
"rewards/margins": 2.2184863090515137, |
|
"rewards/rejected": -16.283601760864258, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.0632765096792833, |
|
"grad_norm": 35.25569152832031, |
|
"learning_rate": 7.169811320754716e-07, |
|
"logits/chosen": -0.5818579196929932, |
|
"logits/rejected": -0.560608983039856, |
|
"logps/chosen": -144.07252502441406, |
|
"logps/rejected": -166.63180541992188, |
|
"loss": 0.8386, |
|
"nll_loss": 0.3961290419101715, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -14.407251358032227, |
|
"rewards/margins": 2.2559285163879395, |
|
"rewards/rejected": -16.663179397583008, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.0863912164114418, |
|
"grad_norm": 43.183834075927734, |
|
"learning_rate": 7.084048027444254e-07, |
|
"logits/chosen": -0.5513390302658081, |
|
"logits/rejected": -0.5131951570510864, |
|
"logps/chosen": -143.40451049804688, |
|
"logps/rejected": -168.33999633789062, |
|
"loss": 0.8444, |
|
"nll_loss": 0.3932550549507141, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -14.34045124053955, |
|
"rewards/margins": 2.4935507774353027, |
|
"rewards/rejected": -16.834003448486328, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.1095059231436002, |
|
"grad_norm": 46.28910446166992, |
|
"learning_rate": 6.99828473413379e-07, |
|
"logits/chosen": -0.558716893196106, |
|
"logits/rejected": -0.5409826040267944, |
|
"logps/chosen": -149.60780334472656, |
|
"logps/rejected": -174.61407470703125, |
|
"loss": 0.8528, |
|
"nll_loss": 0.38355833292007446, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -14.960783004760742, |
|
"rewards/margins": 2.5006256103515625, |
|
"rewards/rejected": -17.461406707763672, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.1326206298757584, |
|
"grad_norm": 26.747591018676758, |
|
"learning_rate": 6.912521440823327e-07, |
|
"logits/chosen": -0.5055437684059143, |
|
"logits/rejected": -0.48391538858413696, |
|
"logps/chosen": -140.9654083251953, |
|
"logps/rejected": -165.58120727539062, |
|
"loss": 0.7887, |
|
"nll_loss": 0.39576801657676697, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -14.096539497375488, |
|
"rewards/margins": 2.4615824222564697, |
|
"rewards/rejected": -16.558120727539062, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.1557353366079168, |
|
"grad_norm": 35.81715774536133, |
|
"learning_rate": 6.826758147512864e-07, |
|
"logits/chosen": -0.600662887096405, |
|
"logits/rejected": -0.5748721361160278, |
|
"logps/chosen": -140.01742553710938, |
|
"logps/rejected": -164.94715881347656, |
|
"loss": 0.8574, |
|
"nll_loss": 0.38788530230522156, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -14.001742362976074, |
|
"rewards/margins": 2.492974042892456, |
|
"rewards/rejected": -16.49471664428711, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.178850043340075, |
|
"grad_norm": 32.66586685180664, |
|
"learning_rate": 6.740994854202401e-07, |
|
"logits/chosen": -0.6161425113677979, |
|
"logits/rejected": -0.6100048422813416, |
|
"logps/chosen": -145.22962951660156, |
|
"logps/rejected": -166.43936157226562, |
|
"loss": 0.8359, |
|
"nll_loss": 0.3928259015083313, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -14.52296257019043, |
|
"rewards/margins": 2.120974063873291, |
|
"rewards/rejected": -16.643936157226562, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.2019647500722335, |
|
"grad_norm": 33.49191665649414, |
|
"learning_rate": 6.655231560891939e-07, |
|
"logits/chosen": -0.5515680313110352, |
|
"logits/rejected": -0.5309056043624878, |
|
"logps/chosen": -149.71470642089844, |
|
"logps/rejected": -176.19627380371094, |
|
"loss": 0.8231, |
|
"nll_loss": 0.4030747413635254, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -14.971471786499023, |
|
"rewards/margins": 2.6481575965881348, |
|
"rewards/rejected": -17.619626998901367, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.2250794568043917, |
|
"grad_norm": 34.94106674194336, |
|
"learning_rate": 6.569468267581475e-07, |
|
"logits/chosen": -0.48874059319496155, |
|
"logits/rejected": -0.4897806644439697, |
|
"logps/chosen": -143.54364013671875, |
|
"logps/rejected": -170.92356872558594, |
|
"loss": 0.8061, |
|
"nll_loss": 0.3903985321521759, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -14.354365348815918, |
|
"rewards/margins": 2.7379937171936035, |
|
"rewards/rejected": -17.092357635498047, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.24819416353655, |
|
"grad_norm": 38.41946792602539, |
|
"learning_rate": 6.483704974271011e-07, |
|
"logits/chosen": -0.5544147491455078, |
|
"logits/rejected": -0.5470559000968933, |
|
"logps/chosen": -143.85928344726562, |
|
"logps/rejected": -165.72219848632812, |
|
"loss": 0.8594, |
|
"nll_loss": 0.4008564352989197, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -14.385930061340332, |
|
"rewards/margins": 2.1862895488739014, |
|
"rewards/rejected": -16.572219848632812, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.2713088702687085, |
|
"grad_norm": 34.09145736694336, |
|
"learning_rate": 6.397941680960549e-07, |
|
"logits/chosen": -0.6739064455032349, |
|
"logits/rejected": -0.6665552258491516, |
|
"logps/chosen": -153.54989624023438, |
|
"logps/rejected": -179.6954345703125, |
|
"loss": 0.7542, |
|
"nll_loss": 0.4033665060997009, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -15.354990005493164, |
|
"rewards/margins": 2.6145541667938232, |
|
"rewards/rejected": -17.96954345703125, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.2944235770008667, |
|
"grad_norm": 33.82338333129883, |
|
"learning_rate": 6.312178387650086e-07, |
|
"logits/chosen": -0.6586141586303711, |
|
"logits/rejected": -0.6481257677078247, |
|
"logps/chosen": -140.09243774414062, |
|
"logps/rejected": -168.21730041503906, |
|
"loss": 0.8033, |
|
"nll_loss": 0.383733332157135, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -14.009244918823242, |
|
"rewards/margins": 2.812483310699463, |
|
"rewards/rejected": -16.821727752685547, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.3175382837330252, |
|
"grad_norm": 37.8900032043457, |
|
"learning_rate": 6.226415094339622e-07, |
|
"logits/chosen": -0.5938795804977417, |
|
"logits/rejected": -0.5684852600097656, |
|
"logps/chosen": -150.65396118164062, |
|
"logps/rejected": -176.8959503173828, |
|
"loss": 0.8343, |
|
"nll_loss": 0.40136751532554626, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -15.065396308898926, |
|
"rewards/margins": 2.624197244644165, |
|
"rewards/rejected": -17.689594268798828, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.3406529904651836, |
|
"grad_norm": 41.33928298950195, |
|
"learning_rate": 6.14065180102916e-07, |
|
"logits/chosen": -0.5615830421447754, |
|
"logits/rejected": -0.5374659299850464, |
|
"logps/chosen": -146.83155822753906, |
|
"logps/rejected": -174.39060974121094, |
|
"loss": 0.8152, |
|
"nll_loss": 0.3893912732601166, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -14.683156967163086, |
|
"rewards/margins": 2.7559053897857666, |
|
"rewards/rejected": -17.43906021118164, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.3637676971973418, |
|
"grad_norm": 40.57521438598633, |
|
"learning_rate": 6.054888507718696e-07, |
|
"logits/chosen": -0.5162426233291626, |
|
"logits/rejected": -0.5248137712478638, |
|
"logps/chosen": -140.13807678222656, |
|
"logps/rejected": -166.10623168945312, |
|
"loss": 0.7793, |
|
"nll_loss": 0.4015275537967682, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -14.013806343078613, |
|
"rewards/margins": 2.596815347671509, |
|
"rewards/rejected": -16.61062240600586, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.3868824039295, |
|
"grad_norm": 32.84572219848633, |
|
"learning_rate": 5.969125214408233e-07, |
|
"logits/chosen": -0.5050234198570251, |
|
"logits/rejected": -0.4766197204589844, |
|
"logps/chosen": -143.4470672607422, |
|
"logps/rejected": -169.41189575195312, |
|
"loss": 0.8246, |
|
"nll_loss": 0.377535343170166, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -14.344708442687988, |
|
"rewards/margins": 2.5964834690093994, |
|
"rewards/rejected": -16.941190719604492, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4099971106616584, |
|
"grad_norm": 35.82289123535156, |
|
"learning_rate": 5.88336192109777e-07, |
|
"logits/chosen": -0.477673202753067, |
|
"logits/rejected": -0.4712616801261902, |
|
"logps/chosen": -141.12518310546875, |
|
"logps/rejected": -165.6627655029297, |
|
"loss": 0.8663, |
|
"nll_loss": 0.40321165323257446, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -14.112518310546875, |
|
"rewards/margins": 2.453758716583252, |
|
"rewards/rejected": -16.5662784576416, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.4331118173938169, |
|
"grad_norm": 22.42660903930664, |
|
"learning_rate": 5.797598627787307e-07, |
|
"logits/chosen": -0.559418797492981, |
|
"logits/rejected": -0.5618263483047485, |
|
"logps/chosen": -147.1465301513672, |
|
"logps/rejected": -175.36915588378906, |
|
"loss": 0.718, |
|
"nll_loss": 0.3817431330680847, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -14.714654922485352, |
|
"rewards/margins": 2.8222622871398926, |
|
"rewards/rejected": -17.536914825439453, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.456226524125975, |
|
"grad_norm": 36.228431701660156, |
|
"learning_rate": 5.711835334476843e-07, |
|
"logits/chosen": -0.45862525701522827, |
|
"logits/rejected": -0.4514252245426178, |
|
"logps/chosen": -134.56240844726562, |
|
"logps/rejected": -161.8486328125, |
|
"loss": 0.757, |
|
"nll_loss": 0.37470743060112, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -13.4562406539917, |
|
"rewards/margins": 2.7286224365234375, |
|
"rewards/rejected": -16.184864044189453, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.4793412308581335, |
|
"grad_norm": 37.52621078491211, |
|
"learning_rate": 5.626072041166381e-07, |
|
"logits/chosen": -0.4572775363922119, |
|
"logits/rejected": -0.4576060175895691, |
|
"logps/chosen": -132.4957275390625, |
|
"logps/rejected": -160.1851348876953, |
|
"loss": 0.8095, |
|
"nll_loss": 0.38259443640708923, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -13.249574661254883, |
|
"rewards/margins": 2.768939733505249, |
|
"rewards/rejected": -16.018512725830078, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.502455937590292, |
|
"grad_norm": 31.980655670166016, |
|
"learning_rate": 5.540308747855917e-07, |
|
"logits/chosen": -0.514967143535614, |
|
"logits/rejected": -0.5276812314987183, |
|
"logps/chosen": -144.45018005371094, |
|
"logps/rejected": -174.40380859375, |
|
"loss": 0.7572, |
|
"nll_loss": 0.391319215297699, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -14.445019721984863, |
|
"rewards/margins": 2.995361328125, |
|
"rewards/rejected": -17.440380096435547, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.5255706443224502, |
|
"grad_norm": 33.730308532714844, |
|
"learning_rate": 5.454545454545454e-07, |
|
"logits/chosen": -0.4829156994819641, |
|
"logits/rejected": -0.4813632071018219, |
|
"logps/chosen": -136.35617065429688, |
|
"logps/rejected": -163.93539428710938, |
|
"loss": 0.8365, |
|
"nll_loss": 0.3918777108192444, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -13.63561725616455, |
|
"rewards/margins": 2.757922887802124, |
|
"rewards/rejected": -16.393539428710938, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.5486853510546084, |
|
"grad_norm": 37.20032501220703, |
|
"learning_rate": 5.368782161234992e-07, |
|
"logits/chosen": -0.5203684568405151, |
|
"logits/rejected": -0.5274239778518677, |
|
"logps/chosen": -147.5542449951172, |
|
"logps/rejected": -175.27745056152344, |
|
"loss": 0.7471, |
|
"nll_loss": 0.3757604956626892, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -14.755424499511719, |
|
"rewards/margins": 2.772320032119751, |
|
"rewards/rejected": -17.527746200561523, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.5718000577867668, |
|
"grad_norm": 30.996845245361328, |
|
"learning_rate": 5.283018867924528e-07, |
|
"logits/chosen": -0.4579756259918213, |
|
"logits/rejected": -0.46921706199645996, |
|
"logps/chosen": -141.15423583984375, |
|
"logps/rejected": -167.1835479736328, |
|
"loss": 0.8266, |
|
"nll_loss": 0.3890347182750702, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -14.115423202514648, |
|
"rewards/margins": 2.6029300689697266, |
|
"rewards/rejected": -16.718355178833008, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.5949147645189252, |
|
"grad_norm": 24.3118896484375, |
|
"learning_rate": 5.197255574614064e-07, |
|
"logits/chosen": -0.4211729168891907, |
|
"logits/rejected": -0.42907238006591797, |
|
"logps/chosen": -149.1224365234375, |
|
"logps/rejected": -180.13198852539062, |
|
"loss": 0.7815, |
|
"nll_loss": 0.38901641964912415, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -14.91224479675293, |
|
"rewards/margins": 3.100955009460449, |
|
"rewards/rejected": -18.013200759887695, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.6180294712510834, |
|
"grad_norm": 31.886476516723633, |
|
"learning_rate": 5.111492281303602e-07, |
|
"logits/chosen": -0.36691445112228394, |
|
"logits/rejected": -0.33470767736434937, |
|
"logps/chosen": -139.63748168945312, |
|
"logps/rejected": -166.2130889892578, |
|
"loss": 0.7878, |
|
"nll_loss": 0.38116249442100525, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -13.96374797821045, |
|
"rewards/margins": 2.657560110092163, |
|
"rewards/rejected": -16.621309280395508, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.6411441779832419, |
|
"grad_norm": 26.3208065032959, |
|
"learning_rate": 5.025728987993139e-07, |
|
"logits/chosen": -0.3681151866912842, |
|
"logits/rejected": -0.3645099103450775, |
|
"logps/chosen": -127.74934387207031, |
|
"logps/rejected": -153.67477416992188, |
|
"loss": 0.7603, |
|
"nll_loss": 0.363952100276947, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -12.774932861328125, |
|
"rewards/margins": 2.5925447940826416, |
|
"rewards/rejected": -15.367477416992188, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.6642588847154003, |
|
"grad_norm": 34.93773651123047, |
|
"learning_rate": 4.939965694682676e-07, |
|
"logits/chosen": -0.4657221734523773, |
|
"logits/rejected": -0.47038349509239197, |
|
"logps/chosen": -130.09898376464844, |
|
"logps/rejected": -153.72354125976562, |
|
"loss": 0.7346, |
|
"nll_loss": 0.391609787940979, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -13.00989818572998, |
|
"rewards/margins": 2.362457513809204, |
|
"rewards/rejected": -15.372354507446289, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.6873735914475585, |
|
"grad_norm": 32.160396575927734, |
|
"learning_rate": 4.854202401372212e-07, |
|
"logits/chosen": -0.49930065870285034, |
|
"logits/rejected": -0.4945829510688782, |
|
"logps/chosen": -139.16799926757812, |
|
"logps/rejected": -165.69920349121094, |
|
"loss": 0.7889, |
|
"nll_loss": 0.3781120777130127, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -13.916799545288086, |
|
"rewards/margins": 2.6531200408935547, |
|
"rewards/rejected": -16.569921493530273, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.7104882981797167, |
|
"grad_norm": 32.3018798828125, |
|
"learning_rate": 4.768439108061749e-07, |
|
"logits/chosen": -0.43461236357688904, |
|
"logits/rejected": -0.43381839990615845, |
|
"logps/chosen": -149.93421936035156, |
|
"logps/rejected": -180.68209838867188, |
|
"loss": 0.7308, |
|
"nll_loss": 0.4099641740322113, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -14.99342155456543, |
|
"rewards/margins": 3.074786901473999, |
|
"rewards/rejected": -18.06821060180664, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.7336030049118751, |
|
"grad_norm": 36.8819694519043, |
|
"learning_rate": 4.6826758147512864e-07, |
|
"logits/chosen": -0.4040675759315491, |
|
"logits/rejected": -0.3790331482887268, |
|
"logps/chosen": -129.85499572753906, |
|
"logps/rejected": -156.1541290283203, |
|
"loss": 0.7651, |
|
"nll_loss": 0.3684754967689514, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -12.985501289367676, |
|
"rewards/margins": 2.629912853240967, |
|
"rewards/rejected": -15.615412712097168, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.7567177116440336, |
|
"grad_norm": 28.750247955322266, |
|
"learning_rate": 4.596912521440823e-07, |
|
"logits/chosen": -0.35753822326660156, |
|
"logits/rejected": -0.3664765954017639, |
|
"logps/chosen": -141.37014770507812, |
|
"logps/rejected": -173.26290893554688, |
|
"loss": 0.7928, |
|
"nll_loss": 0.37667417526245117, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -14.13701343536377, |
|
"rewards/margins": 3.1892781257629395, |
|
"rewards/rejected": -17.326290130615234, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.7798324183761918, |
|
"grad_norm": 31.120779037475586, |
|
"learning_rate": 4.51114922813036e-07, |
|
"logits/chosen": -0.3904396593570709, |
|
"logits/rejected": -0.37857958674430847, |
|
"logps/chosen": -134.3868408203125, |
|
"logps/rejected": -159.94497680664062, |
|
"loss": 0.7942, |
|
"nll_loss": 0.38718339800834656, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -13.438684463500977, |
|
"rewards/margins": 2.5558130741119385, |
|
"rewards/rejected": -15.994497299194336, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.8029471251083502, |
|
"grad_norm": 30.371490478515625, |
|
"learning_rate": 4.4253859348198967e-07, |
|
"logits/chosen": -0.3549109101295471, |
|
"logits/rejected": -0.3400852680206299, |
|
"logps/chosen": -137.1378631591797, |
|
"logps/rejected": -166.12417602539062, |
|
"loss": 0.7521, |
|
"nll_loss": 0.38493841886520386, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -13.713786125183105, |
|
"rewards/margins": 2.898630380630493, |
|
"rewards/rejected": -16.612417221069336, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.8260618318405086, |
|
"grad_norm": 35.5169677734375, |
|
"learning_rate": 4.339622641509434e-07, |
|
"logits/chosen": -0.3591529428958893, |
|
"logits/rejected": -0.3554760813713074, |
|
"logps/chosen": -138.97694396972656, |
|
"logps/rejected": -165.46890258789062, |
|
"loss": 0.7557, |
|
"nll_loss": 0.38407889008522034, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -13.89769458770752, |
|
"rewards/margins": 2.6491942405700684, |
|
"rewards/rejected": -16.546890258789062, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.8491765385726668, |
|
"grad_norm": 32.5655403137207, |
|
"learning_rate": 4.25385934819897e-07, |
|
"logits/chosen": -0.2939130961894989, |
|
"logits/rejected": -0.2781139016151428, |
|
"logps/chosen": -127.32011413574219, |
|
"logps/rejected": -156.74063110351562, |
|
"loss": 0.7463, |
|
"nll_loss": 0.36602723598480225, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -12.732012748718262, |
|
"rewards/margins": 2.942049741744995, |
|
"rewards/rejected": -15.67406177520752, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.872291245304825, |
|
"grad_norm": 31.026138305664062, |
|
"learning_rate": 4.1680960548885075e-07, |
|
"logits/chosen": -0.40986233949661255, |
|
"logits/rejected": -0.4055134356021881, |
|
"logps/chosen": -138.88014221191406, |
|
"logps/rejected": -168.54603576660156, |
|
"loss": 0.7996, |
|
"nll_loss": 0.38212329149246216, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -13.88801383972168, |
|
"rewards/margins": 2.9665894508361816, |
|
"rewards/rejected": -16.854602813720703, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.8954059520369835, |
|
"grad_norm": 28.253450393676758, |
|
"learning_rate": 4.0823327615780443e-07, |
|
"logits/chosen": -0.42939743399620056, |
|
"logits/rejected": -0.42729324102401733, |
|
"logps/chosen": -138.198486328125, |
|
"logps/rejected": -170.29718017578125, |
|
"loss": 0.7528, |
|
"nll_loss": 0.38865524530410767, |
|
"rewards/accuracies": 0.871874988079071, |
|
"rewards/chosen": -13.819849967956543, |
|
"rewards/margins": 3.209869384765625, |
|
"rewards/rejected": -17.02971839904785, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.918520658769142, |
|
"grad_norm": 38.18342590332031, |
|
"learning_rate": 3.9965694682675816e-07, |
|
"logits/chosen": -0.4124279022216797, |
|
"logits/rejected": -0.3961451053619385, |
|
"logps/chosen": -133.0750274658203, |
|
"logps/rejected": -158.16928100585938, |
|
"loss": 0.7746, |
|
"nll_loss": 0.39231568574905396, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -13.307504653930664, |
|
"rewards/margins": 2.509425401687622, |
|
"rewards/rejected": -15.816927909851074, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.9416353655013001, |
|
"grad_norm": 33.664310455322266, |
|
"learning_rate": 3.910806174957118e-07, |
|
"logits/chosen": -0.5193787217140198, |
|
"logits/rejected": -0.5243502855300903, |
|
"logps/chosen": -139.03890991210938, |
|
"logps/rejected": -166.02572631835938, |
|
"loss": 0.7765, |
|
"nll_loss": 0.38118356466293335, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -13.903889656066895, |
|
"rewards/margins": 2.6986842155456543, |
|
"rewards/rejected": -16.60257339477539, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.9647500722334585, |
|
"grad_norm": 33.969181060791016, |
|
"learning_rate": 3.825042881646655e-07, |
|
"logits/chosen": -0.544953465461731, |
|
"logits/rejected": -0.5464820861816406, |
|
"logps/chosen": -138.60324096679688, |
|
"logps/rejected": -165.5134735107422, |
|
"loss": 0.755, |
|
"nll_loss": 0.37446194887161255, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -13.860323905944824, |
|
"rewards/margins": 2.6910228729248047, |
|
"rewards/rejected": -16.551347732543945, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.987864778965617, |
|
"grad_norm": 33.37995529174805, |
|
"learning_rate": 3.739279588336192e-07, |
|
"logits/chosen": -0.5172384977340698, |
|
"logits/rejected": -0.5084408521652222, |
|
"logps/chosen": -138.13735961914062, |
|
"logps/rejected": -167.2960968017578, |
|
"loss": 0.7108, |
|
"nll_loss": 0.37610048055648804, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -13.813735961914062, |
|
"rewards/margins": 2.915872097015381, |
|
"rewards/rejected": -16.729610443115234, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.999422132331696, |
|
"eval_logits/chosen": -0.46843183040618896, |
|
"eval_logits/rejected": -0.4402734041213989, |
|
"eval_logps/chosen": -148.37490844726562, |
|
"eval_logps/rejected": -159.45883178710938, |
|
"eval_loss": 1.5252443552017212, |
|
"eval_nll_loss": 0.4055543839931488, |
|
"eval_rewards/accuracies": 0.6499999761581421, |
|
"eval_rewards/chosen": -14.837491035461426, |
|
"eval_rewards/margins": 1.1083911657333374, |
|
"eval_rewards/rejected": -15.945883750915527, |
|
"eval_runtime": 74.3854, |
|
"eval_samples_per_second": 24.548, |
|
"eval_steps_per_second": 1.546, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.010979485697775, |
|
"grad_norm": 13.765230178833008, |
|
"learning_rate": 3.6535162950257287e-07, |
|
"logits/chosen": -0.5043296217918396, |
|
"logits/rejected": -0.46686577796936035, |
|
"logps/chosen": -133.58786010742188, |
|
"logps/rejected": -166.25955200195312, |
|
"loss": 0.6206, |
|
"nll_loss": 0.35893407464027405, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/chosen": -13.358787536621094, |
|
"rewards/margins": 3.26716947555542, |
|
"rewards/rejected": -16.62595558166504, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.0340941924299334, |
|
"grad_norm": 11.040721893310547, |
|
"learning_rate": 3.5677530017152655e-07, |
|
"logits/chosen": -0.44609642028808594, |
|
"logits/rejected": -0.45637327432632446, |
|
"logps/chosen": -124.71421813964844, |
|
"logps/rejected": -165.182373046875, |
|
"loss": 0.4779, |
|
"nll_loss": 0.35270074009895325, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -12.47142219543457, |
|
"rewards/margins": 4.046815395355225, |
|
"rewards/rejected": -16.51823616027832, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.057208899162092, |
|
"grad_norm": 15.88128662109375, |
|
"learning_rate": 3.481989708404803e-07, |
|
"logits/chosen": -0.3785150647163391, |
|
"logits/rejected": -0.3858146667480469, |
|
"logps/chosen": -123.47149658203125, |
|
"logps/rejected": -166.243408203125, |
|
"loss": 0.464, |
|
"nll_loss": 0.34830960631370544, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -12.347149848937988, |
|
"rewards/margins": 4.277192115783691, |
|
"rewards/rejected": -16.624340057373047, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.0803236058942502, |
|
"grad_norm": 13.753726959228516, |
|
"learning_rate": 3.396226415094339e-07, |
|
"logits/chosen": -0.5063196420669556, |
|
"logits/rejected": -0.4997187554836273, |
|
"logps/chosen": -127.51603698730469, |
|
"logps/rejected": -165.35275268554688, |
|
"loss": 0.4607, |
|
"nll_loss": 0.3605991005897522, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -12.751605033874512, |
|
"rewards/margins": 3.7836709022521973, |
|
"rewards/rejected": -16.535274505615234, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.1034383126264085, |
|
"grad_norm": 16.0849666595459, |
|
"learning_rate": 3.3104631217838763e-07, |
|
"logits/chosen": -0.4099333882331848, |
|
"logits/rejected": -0.4197494387626648, |
|
"logps/chosen": -124.4246597290039, |
|
"logps/rejected": -165.6961669921875, |
|
"loss": 0.4726, |
|
"nll_loss": 0.3642919957637787, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -12.442464828491211, |
|
"rewards/margins": 4.127151966094971, |
|
"rewards/rejected": -16.569618225097656, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.1265530193585667, |
|
"grad_norm": 8.666687965393066, |
|
"learning_rate": 3.2246998284734136e-07, |
|
"logits/chosen": -0.3802010118961334, |
|
"logits/rejected": -0.37199968099594116, |
|
"logps/chosen": -127.65777587890625, |
|
"logps/rejected": -168.89389038085938, |
|
"loss": 0.4658, |
|
"nll_loss": 0.35325732827186584, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -12.765777587890625, |
|
"rewards/margins": 4.123614311218262, |
|
"rewards/rejected": -16.889392852783203, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.1496677260907253, |
|
"grad_norm": 18.571739196777344, |
|
"learning_rate": 3.13893653516295e-07, |
|
"logits/chosen": -0.3906049430370331, |
|
"logits/rejected": -0.38780367374420166, |
|
"logps/chosen": -125.8351058959961, |
|
"logps/rejected": -167.4423828125, |
|
"loss": 0.4662, |
|
"nll_loss": 0.35373836755752563, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -12.583511352539062, |
|
"rewards/margins": 4.160727500915527, |
|
"rewards/rejected": -16.744239807128906, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.1727824328228835, |
|
"grad_norm": 13.318151473999023, |
|
"learning_rate": 3.053173241852487e-07, |
|
"logits/chosen": -0.46741265058517456, |
|
"logits/rejected": -0.46371087431907654, |
|
"logps/chosen": -136.69558715820312, |
|
"logps/rejected": -179.5621795654297, |
|
"loss": 0.4453, |
|
"nll_loss": 0.35544899106025696, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -13.66955852508545, |
|
"rewards/margins": 4.286660194396973, |
|
"rewards/rejected": -17.956218719482422, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.1958971395550417, |
|
"grad_norm": 23.31292152404785, |
|
"learning_rate": 2.967409948542024e-07, |
|
"logits/chosen": -0.5026014447212219, |
|
"logits/rejected": -0.4931977391242981, |
|
"logps/chosen": -137.78359985351562, |
|
"logps/rejected": -178.4962921142578, |
|
"loss": 0.458, |
|
"nll_loss": 0.3667753040790558, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -13.778360366821289, |
|
"rewards/margins": 4.07127046585083, |
|
"rewards/rejected": -17.84963035583496, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.2190118462872004, |
|
"grad_norm": 28.351606369018555, |
|
"learning_rate": 2.881646655231561e-07, |
|
"logits/chosen": -0.4884619116783142, |
|
"logits/rejected": -0.4745559096336365, |
|
"logps/chosen": -134.03724670410156, |
|
"logps/rejected": -174.16436767578125, |
|
"loss": 0.4698, |
|
"nll_loss": 0.3588988780975342, |
|
"rewards/accuracies": 0.965624988079071, |
|
"rewards/chosen": -13.403724670410156, |
|
"rewards/margins": 4.012711524963379, |
|
"rewards/rejected": -17.41643714904785, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.2421265530193586, |
|
"grad_norm": 15.93598747253418, |
|
"learning_rate": 2.7958833619210975e-07, |
|
"logits/chosen": -0.5274711847305298, |
|
"logits/rejected": -0.5002039074897766, |
|
"logps/chosen": -132.9987335205078, |
|
"logps/rejected": -171.1938018798828, |
|
"loss": 0.4804, |
|
"nll_loss": 0.3535648286342621, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -13.299871444702148, |
|
"rewards/margins": 3.8195080757141113, |
|
"rewards/rejected": -17.119380950927734, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.265241259751517, |
|
"grad_norm": 13.009766578674316, |
|
"learning_rate": 2.710120068610635e-07, |
|
"logits/chosen": -0.40121275186538696, |
|
"logits/rejected": -0.4012083411216736, |
|
"logps/chosen": -134.23101806640625, |
|
"logps/rejected": -174.37557983398438, |
|
"loss": 0.445, |
|
"nll_loss": 0.3591146767139435, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -13.423103332519531, |
|
"rewards/margins": 4.014455795288086, |
|
"rewards/rejected": -17.437557220458984, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.2883559664836755, |
|
"grad_norm": 10.573274612426758, |
|
"learning_rate": 2.6243567753001716e-07, |
|
"logits/chosen": -0.3767433762550354, |
|
"logits/rejected": -0.3655189275741577, |
|
"logps/chosen": -126.58245849609375, |
|
"logps/rejected": -166.48191833496094, |
|
"loss": 0.4537, |
|
"nll_loss": 0.3470018208026886, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -12.658246994018555, |
|
"rewards/margins": 3.989945650100708, |
|
"rewards/rejected": -16.648191452026367, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.3114706732158337, |
|
"grad_norm": 13.547653198242188, |
|
"learning_rate": 2.5385934819897083e-07, |
|
"logits/chosen": -0.36574894189834595, |
|
"logits/rejected": -0.371765673160553, |
|
"logps/chosen": -132.08352661132812, |
|
"logps/rejected": -174.651611328125, |
|
"loss": 0.4627, |
|
"nll_loss": 0.35074400901794434, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -13.208353042602539, |
|
"rewards/margins": 4.256808280944824, |
|
"rewards/rejected": -17.465160369873047, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.334585379947992, |
|
"grad_norm": 13.845785140991211, |
|
"learning_rate": 2.452830188679245e-07, |
|
"logits/chosen": -0.3355964124202728, |
|
"logits/rejected": -0.3307664096355438, |
|
"logps/chosen": -128.27835083007812, |
|
"logps/rejected": -171.4265899658203, |
|
"loss": 0.4469, |
|
"nll_loss": 0.3528256416320801, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -12.827836990356445, |
|
"rewards/margins": 4.314822673797607, |
|
"rewards/rejected": -17.142658233642578, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.35770008668015, |
|
"grad_norm": 14.853008270263672, |
|
"learning_rate": 2.367066895368782e-07, |
|
"logits/chosen": -0.4388372302055359, |
|
"logits/rejected": -0.44896426796913147, |
|
"logps/chosen": -140.34243774414062, |
|
"logps/rejected": -182.12892150878906, |
|
"loss": 0.4646, |
|
"nll_loss": 0.3558862805366516, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -14.0342435836792, |
|
"rewards/margins": 4.178647518157959, |
|
"rewards/rejected": -18.212890625, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.3808147934123087, |
|
"grad_norm": 17.43394660949707, |
|
"learning_rate": 2.281303602058319e-07, |
|
"logits/chosen": -0.35451555252075195, |
|
"logits/rejected": -0.36915498971939087, |
|
"logps/chosen": -122.70450592041016, |
|
"logps/rejected": -161.46353149414062, |
|
"loss": 0.4646, |
|
"nll_loss": 0.3501938283443451, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -12.2704496383667, |
|
"rewards/margins": 3.875903606414795, |
|
"rewards/rejected": -16.14635467529297, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.403929500144467, |
|
"grad_norm": 12.251300811767578, |
|
"learning_rate": 2.1955403087478557e-07, |
|
"logits/chosen": -0.5434596538543701, |
|
"logits/rejected": -0.4985465407371521, |
|
"logps/chosen": -139.0737762451172, |
|
"logps/rejected": -181.25820922851562, |
|
"loss": 0.4465, |
|
"nll_loss": 0.34608450531959534, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -13.907376289367676, |
|
"rewards/margins": 4.218443393707275, |
|
"rewards/rejected": -18.12582015991211, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.427044206876625, |
|
"grad_norm": 16.99835205078125, |
|
"learning_rate": 2.1097770154373927e-07, |
|
"logits/chosen": -0.4769843518733978, |
|
"logits/rejected": -0.4647130072116852, |
|
"logps/chosen": -141.46798706054688, |
|
"logps/rejected": -184.92819213867188, |
|
"loss": 0.4619, |
|
"nll_loss": 0.34933143854141235, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -14.146799087524414, |
|
"rewards/margins": 4.346020698547363, |
|
"rewards/rejected": -18.49281883239746, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.4501589136087834, |
|
"grad_norm": 12.114070892333984, |
|
"learning_rate": 2.0240137221269295e-07, |
|
"logits/chosen": -0.546917736530304, |
|
"logits/rejected": -0.5437088012695312, |
|
"logps/chosen": -135.78121948242188, |
|
"logps/rejected": -176.3412628173828, |
|
"loss": 0.4464, |
|
"nll_loss": 0.3510003685951233, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -13.57812213897705, |
|
"rewards/margins": 4.056002616882324, |
|
"rewards/rejected": -17.634126663208008, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.473273620340942, |
|
"grad_norm": 10.714365005493164, |
|
"learning_rate": 1.9382504288164663e-07, |
|
"logits/chosen": -0.40261998772621155, |
|
"logits/rejected": -0.381422221660614, |
|
"logps/chosen": -126.05067443847656, |
|
"logps/rejected": -166.70254516601562, |
|
"loss": 0.5006, |
|
"nll_loss": 0.3639281392097473, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -12.605070114135742, |
|
"rewards/margins": 4.065185070037842, |
|
"rewards/rejected": -16.67025375366211, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.4963883270731, |
|
"grad_norm": 14.228996276855469, |
|
"learning_rate": 1.8524871355060033e-07, |
|
"logits/chosen": -0.3770430088043213, |
|
"logits/rejected": -0.3675852417945862, |
|
"logps/chosen": -127.53874206542969, |
|
"logps/rejected": -168.65322875976562, |
|
"loss": 0.4691, |
|
"nll_loss": 0.35038691759109497, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -12.753876686096191, |
|
"rewards/margins": 4.111447334289551, |
|
"rewards/rejected": -16.865324020385742, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.5195030338052584, |
|
"grad_norm": 14.870238304138184, |
|
"learning_rate": 1.76672384219554e-07, |
|
"logits/chosen": -0.4105190336704254, |
|
"logits/rejected": -0.4109324812889099, |
|
"logps/chosen": -122.85371398925781, |
|
"logps/rejected": -162.6988525390625, |
|
"loss": 0.4539, |
|
"nll_loss": 0.34932848811149597, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -12.285372734069824, |
|
"rewards/margins": 3.9845123291015625, |
|
"rewards/rejected": -16.269886016845703, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.542617740537417, |
|
"grad_norm": 18.935836791992188, |
|
"learning_rate": 1.6809605488850769e-07, |
|
"logits/chosen": -0.3432958424091339, |
|
"logits/rejected": -0.3376050591468811, |
|
"logps/chosen": -129.51318359375, |
|
"logps/rejected": -169.71923828125, |
|
"loss": 0.4619, |
|
"nll_loss": 0.36302056908607483, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -12.951319694519043, |
|
"rewards/margins": 4.020605564117432, |
|
"rewards/rejected": -16.971923828125, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.5657324472695753, |
|
"grad_norm": 11.919585227966309, |
|
"learning_rate": 1.5951972555746142e-07, |
|
"logits/chosen": -0.3838345408439636, |
|
"logits/rejected": -0.40160542726516724, |
|
"logps/chosen": -128.54510498046875, |
|
"logps/rejected": -172.9639129638672, |
|
"loss": 0.4529, |
|
"nll_loss": 0.3525315225124359, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -12.854510307312012, |
|
"rewards/margins": 4.441880226135254, |
|
"rewards/rejected": -17.296390533447266, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.5888471540017335, |
|
"grad_norm": 15.11146354675293, |
|
"learning_rate": 1.509433962264151e-07, |
|
"logits/chosen": -0.3595576286315918, |
|
"logits/rejected": -0.3595134913921356, |
|
"logps/chosen": -130.70518493652344, |
|
"logps/rejected": -174.2328643798828, |
|
"loss": 0.4664, |
|
"nll_loss": 0.3525366187095642, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -13.070520401000977, |
|
"rewards/margins": 4.352766990661621, |
|
"rewards/rejected": -17.42328453063965, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.611961860733892, |
|
"grad_norm": 20.21427345275879, |
|
"learning_rate": 1.423670668953688e-07, |
|
"logits/chosen": -0.3621642291545868, |
|
"logits/rejected": -0.3667486906051636, |
|
"logps/chosen": -124.94161224365234, |
|
"logps/rejected": -166.67266845703125, |
|
"loss": 0.449, |
|
"nll_loss": 0.34673169255256653, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -12.494161605834961, |
|
"rewards/margins": 4.173105239868164, |
|
"rewards/rejected": -16.667266845703125, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.6350765674660503, |
|
"grad_norm": 12.434587478637695, |
|
"learning_rate": 1.3379073756432248e-07, |
|
"logits/chosen": -0.39929765462875366, |
|
"logits/rejected": -0.38376063108444214, |
|
"logps/chosen": -134.3514404296875, |
|
"logps/rejected": -176.7789764404297, |
|
"loss": 0.4449, |
|
"nll_loss": 0.34507042169570923, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -13.435147285461426, |
|
"rewards/margins": 4.242752552032471, |
|
"rewards/rejected": -17.677898406982422, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.6581912741982086, |
|
"grad_norm": 19.153188705444336, |
|
"learning_rate": 1.2521440823327615e-07, |
|
"logits/chosen": -0.3830157220363617, |
|
"logits/rejected": -0.3753224015235901, |
|
"logps/chosen": -138.58120727539062, |
|
"logps/rejected": -179.58470153808594, |
|
"loss": 0.4505, |
|
"nll_loss": 0.3539791703224182, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -13.858120918273926, |
|
"rewards/margins": 4.100350379943848, |
|
"rewards/rejected": -17.958471298217773, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.681305980930367, |
|
"grad_norm": 21.320459365844727, |
|
"learning_rate": 1.1663807890222984e-07, |
|
"logits/chosen": -0.39191287755966187, |
|
"logits/rejected": -0.3838447630405426, |
|
"logps/chosen": -125.08642578125, |
|
"logps/rejected": -166.50216674804688, |
|
"loss": 0.4788, |
|
"nll_loss": 0.34421923756599426, |
|
"rewards/accuracies": 0.965624988079071, |
|
"rewards/chosen": -12.508642196655273, |
|
"rewards/margins": 4.141573905944824, |
|
"rewards/rejected": -16.65021514892578, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.7044206876625254, |
|
"grad_norm": 14.173407554626465, |
|
"learning_rate": 1.0806174957118352e-07, |
|
"logits/chosen": -0.3383176028728485, |
|
"logits/rejected": -0.32876265048980713, |
|
"logps/chosen": -124.4461898803711, |
|
"logps/rejected": -165.80368041992188, |
|
"loss": 0.4486, |
|
"nll_loss": 0.34746435284614563, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -12.444619178771973, |
|
"rewards/margins": 4.135747909545898, |
|
"rewards/rejected": -16.580368041992188, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.7275353943946836, |
|
"grad_norm": 17.886056900024414, |
|
"learning_rate": 9.948542024013721e-08, |
|
"logits/chosen": -0.3328002393245697, |
|
"logits/rejected": -0.3495582342147827, |
|
"logps/chosen": -131.67807006835938, |
|
"logps/rejected": -175.42776489257812, |
|
"loss": 0.4633, |
|
"nll_loss": 0.3507309556007385, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -13.167805671691895, |
|
"rewards/margins": 4.374970436096191, |
|
"rewards/rejected": -17.542776107788086, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.750650101126842, |
|
"grad_norm": 15.006377220153809, |
|
"learning_rate": 9.09090909090909e-08, |
|
"logits/chosen": -0.4121033251285553, |
|
"logits/rejected": -0.4125909209251404, |
|
"logps/chosen": -135.8062286376953, |
|
"logps/rejected": -179.68875122070312, |
|
"loss": 0.4435, |
|
"nll_loss": 0.3616895079612732, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -13.580622673034668, |
|
"rewards/margins": 4.388251304626465, |
|
"rewards/rejected": -17.968875885009766, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.773764807859, |
|
"grad_norm": 15.148149490356445, |
|
"learning_rate": 8.23327615780446e-08, |
|
"logits/chosen": -0.41161981225013733, |
|
"logits/rejected": -0.4115080237388611, |
|
"logps/chosen": -131.64926147460938, |
|
"logps/rejected": -172.00704956054688, |
|
"loss": 0.4567, |
|
"nll_loss": 0.3558318614959717, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -13.164926528930664, |
|
"rewards/margins": 4.0357770919799805, |
|
"rewards/rejected": -17.20070457458496, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.7968795145911587, |
|
"grad_norm": 14.142349243164062, |
|
"learning_rate": 7.375643224699828e-08, |
|
"logits/chosen": -0.38675186038017273, |
|
"logits/rejected": -0.3759027123451233, |
|
"logps/chosen": -119.2894058227539, |
|
"logps/rejected": -160.90139770507812, |
|
"loss": 0.4422, |
|
"nll_loss": 0.3399142920970917, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -11.92894172668457, |
|
"rewards/margins": 4.161198616027832, |
|
"rewards/rejected": -16.090139389038086, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.819994221323317, |
|
"grad_norm": 12.57529354095459, |
|
"learning_rate": 6.518010291595197e-08, |
|
"logits/chosen": -0.3739423155784607, |
|
"logits/rejected": -0.3440755009651184, |
|
"logps/chosen": -126.98980712890625, |
|
"logps/rejected": -168.39813232421875, |
|
"loss": 0.4573, |
|
"nll_loss": 0.347236305475235, |
|
"rewards/accuracies": 0.965624988079071, |
|
"rewards/chosen": -12.698979377746582, |
|
"rewards/margins": 4.140833377838135, |
|
"rewards/rejected": -16.839813232421875, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.843108928055475, |
|
"grad_norm": 14.81961727142334, |
|
"learning_rate": 5.660377358490566e-08, |
|
"logits/chosen": -0.3412761092185974, |
|
"logits/rejected": -0.3169342875480652, |
|
"logps/chosen": -121.49079895019531, |
|
"logps/rejected": -163.16494750976562, |
|
"loss": 0.4468, |
|
"nll_loss": 0.35947078466415405, |
|
"rewards/accuracies": 0.965624988079071, |
|
"rewards/chosen": -12.149078369140625, |
|
"rewards/margins": 4.167415618896484, |
|
"rewards/rejected": -16.31649398803711, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.8662236347876338, |
|
"grad_norm": 17.703699111938477, |
|
"learning_rate": 4.802744425385934e-08, |
|
"logits/chosen": -0.3797430396080017, |
|
"logits/rejected": -0.3891495168209076, |
|
"logps/chosen": -130.4372100830078, |
|
"logps/rejected": -171.90744018554688, |
|
"loss": 0.4626, |
|
"nll_loss": 0.35077929496765137, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -13.043721199035645, |
|
"rewards/margins": 4.147024631500244, |
|
"rewards/rejected": -17.190744400024414, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.889338341519792, |
|
"grad_norm": 12.169320106506348, |
|
"learning_rate": 3.945111492281304e-08, |
|
"logits/chosen": -0.3306048512458801, |
|
"logits/rejected": -0.31452488899230957, |
|
"logps/chosen": -128.30259704589844, |
|
"logps/rejected": -170.43658447265625, |
|
"loss": 0.4428, |
|
"nll_loss": 0.35800302028656006, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -12.830259323120117, |
|
"rewards/margins": 4.213398456573486, |
|
"rewards/rejected": -17.043659210205078, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.91245304825195, |
|
"grad_norm": 11.698453903198242, |
|
"learning_rate": 3.087478559176672e-08, |
|
"logits/chosen": -0.38229602575302124, |
|
"logits/rejected": -0.3816959857940674, |
|
"logps/chosen": -131.13912963867188, |
|
"logps/rejected": -173.8229522705078, |
|
"loss": 0.4627, |
|
"nll_loss": 0.3486027419567108, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -13.113912582397461, |
|
"rewards/margins": 4.268382549285889, |
|
"rewards/rejected": -17.382295608520508, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.935567754984109, |
|
"grad_norm": 12.507227897644043, |
|
"learning_rate": 2.229845626072041e-08, |
|
"logits/chosen": -0.3767063617706299, |
|
"logits/rejected": -0.3743255138397217, |
|
"logps/chosen": -133.4464569091797, |
|
"logps/rejected": -176.45843505859375, |
|
"loss": 0.4712, |
|
"nll_loss": 0.3417053818702698, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -13.344644546508789, |
|
"rewards/margins": 4.301196098327637, |
|
"rewards/rejected": -17.64583969116211, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.958682461716267, |
|
"grad_norm": 17.885986328125, |
|
"learning_rate": 1.3722126929674098e-08, |
|
"logits/chosen": -0.40772175788879395, |
|
"logits/rejected": -0.41167011857032776, |
|
"logps/chosen": -131.9138946533203, |
|
"logps/rejected": -176.85263061523438, |
|
"loss": 0.4666, |
|
"nll_loss": 0.3591021001338959, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -13.191390991210938, |
|
"rewards/margins": 4.49387264251709, |
|
"rewards/rejected": -17.685260772705078, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.9817971684484252, |
|
"grad_norm": 12.335119247436523, |
|
"learning_rate": 5.145797598627788e-09, |
|
"logits/chosen": -0.3768162131309509, |
|
"logits/rejected": -0.37075626850128174, |
|
"logps/chosen": -136.2458038330078, |
|
"logps/rejected": -178.2454071044922, |
|
"loss": 0.4426, |
|
"nll_loss": 0.3536157011985779, |
|
"rewards/accuracies": 0.9906250238418579, |
|
"rewards/chosen": -13.624580383300781, |
|
"rewards/margins": 4.1999616622924805, |
|
"rewards/rejected": -17.824541091918945, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.9956659924877203, |
|
"eval_logits/chosen": -0.34079235792160034, |
|
"eval_logits/rejected": -0.31415677070617676, |
|
"eval_logps/chosen": -143.9447784423828, |
|
"eval_logps/rejected": -155.83648681640625, |
|
"eval_loss": 1.5984355211257935, |
|
"eval_nll_loss": 0.3937048017978668, |
|
"eval_rewards/accuracies": 0.6304348111152649, |
|
"eval_rewards/chosen": -14.394478797912598, |
|
"eval_rewards/margins": 1.1891697645187378, |
|
"eval_rewards/rejected": -15.583648681640625, |
|
"eval_runtime": 73.5734, |
|
"eval_samples_per_second": 24.819, |
|
"eval_steps_per_second": 1.563, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 2.9956659924877203, |
|
"step": 1296, |
|
"total_flos": 0.0, |
|
"train_loss": 1.025652496903031, |
|
"train_runtime": 27913.7357, |
|
"train_samples_per_second": 5.951, |
|
"train_steps_per_second": 0.046 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1296, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|