|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9985553308292401, |
|
"eval_steps": 100, |
|
"global_step": 432, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.023114706732158336, |
|
"grad_norm": 66.92803955078125, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -0.33564168214797974, |
|
"logits/rejected": -0.3153206706047058, |
|
"logps/chosen": -269.33428955078125, |
|
"logps/rejected": -267.60894775390625, |
|
"loss": 2.6157, |
|
"nll_loss": 0.741317629814148, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -26.933429718017578, |
|
"rewards/margins": -0.17253029346466064, |
|
"rewards/rejected": -26.76089859008789, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04622941346431667, |
|
"grad_norm": 54.904842376708984, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -0.3472834527492523, |
|
"logits/rejected": -0.3292314112186432, |
|
"logps/chosen": -260.78680419921875, |
|
"logps/rejected": -267.32977294921875, |
|
"loss": 2.5223, |
|
"nll_loss": 0.7186762094497681, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -26.078683853149414, |
|
"rewards/margins": 0.6542952060699463, |
|
"rewards/rejected": -26.732980728149414, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06934412019647501, |
|
"grad_norm": 57.42607498168945, |
|
"learning_rate": 6.818181818181817e-07, |
|
"logits/chosen": -0.3462437689304352, |
|
"logits/rejected": -0.334714412689209, |
|
"logps/chosen": -247.49801635742188, |
|
"logps/rejected": -250.79483032226562, |
|
"loss": 2.3549, |
|
"nll_loss": 0.7035976052284241, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -24.749801635742188, |
|
"rewards/margins": 0.3296825885772705, |
|
"rewards/rejected": -25.079483032226562, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09245882692863334, |
|
"grad_norm": 46.98875427246094, |
|
"learning_rate": 9.09090909090909e-07, |
|
"logits/chosen": -0.5729629993438721, |
|
"logits/rejected": -0.5595733523368835, |
|
"logps/chosen": -215.0082244873047, |
|
"logps/rejected": -216.9280548095703, |
|
"loss": 2.1725, |
|
"nll_loss": 0.6498099565505981, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -21.500822067260742, |
|
"rewards/margins": 0.19198258221149445, |
|
"rewards/rejected": -21.692806243896484, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11557353366079168, |
|
"grad_norm": 47.18745803833008, |
|
"learning_rate": 9.845360824742267e-07, |
|
"logits/chosen": -0.8356858491897583, |
|
"logits/rejected": -0.8102104067802429, |
|
"logps/chosen": -196.44061279296875, |
|
"logps/rejected": -195.4991455078125, |
|
"loss": 2.1857, |
|
"nll_loss": 0.5283800959587097, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -19.644062042236328, |
|
"rewards/margins": -0.09414808452129364, |
|
"rewards/rejected": -19.549915313720703, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13868824039295002, |
|
"grad_norm": 55.287715911865234, |
|
"learning_rate": 9.587628865979382e-07, |
|
"logits/chosen": -0.6823571920394897, |
|
"logits/rejected": -0.68729567527771, |
|
"logps/chosen": -164.70936584472656, |
|
"logps/rejected": -164.9478302001953, |
|
"loss": 2.0037, |
|
"nll_loss": 0.4647987484931946, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -16.470935821533203, |
|
"rewards/margins": 0.02384711429476738, |
|
"rewards/rejected": -16.494781494140625, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16180294712510834, |
|
"grad_norm": 51.4761962890625, |
|
"learning_rate": 9.329896907216495e-07, |
|
"logits/chosen": -0.5190773010253906, |
|
"logits/rejected": -0.49602770805358887, |
|
"logps/chosen": -155.39878845214844, |
|
"logps/rejected": -157.3554229736328, |
|
"loss": 1.9943, |
|
"nll_loss": 0.4453979432582855, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -15.539876937866211, |
|
"rewards/margins": 0.195662721991539, |
|
"rewards/rejected": -15.735540390014648, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1849176538572667, |
|
"grad_norm": 46.25474166870117, |
|
"learning_rate": 9.072164948453608e-07, |
|
"logits/chosen": -0.4949778914451599, |
|
"logits/rejected": -0.47060757875442505, |
|
"logps/chosen": -158.2879180908203, |
|
"logps/rejected": -161.47145080566406, |
|
"loss": 1.8594, |
|
"nll_loss": 0.4298928380012512, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": -15.828791618347168, |
|
"rewards/margins": 0.3183526396751404, |
|
"rewards/rejected": -16.147144317626953, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.208032360589425, |
|
"grad_norm": 49.99284744262695, |
|
"learning_rate": 8.814432989690721e-07, |
|
"logits/chosen": -0.37455958127975464, |
|
"logits/rejected": -0.3673579692840576, |
|
"logps/chosen": -153.99154663085938, |
|
"logps/rejected": -161.9925079345703, |
|
"loss": 1.7748, |
|
"nll_loss": 0.42108353972435, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -15.39915657043457, |
|
"rewards/margins": 0.8000966310501099, |
|
"rewards/rejected": -16.19925308227539, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23114706732158335, |
|
"grad_norm": 47.385650634765625, |
|
"learning_rate": 8.556701030927834e-07, |
|
"logits/chosen": -0.3636273443698883, |
|
"logits/rejected": -0.3537142872810364, |
|
"logps/chosen": -153.48483276367188, |
|
"logps/rejected": -156.58370971679688, |
|
"loss": 1.9304, |
|
"nll_loss": 0.4187200665473938, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": -15.348485946655273, |
|
"rewards/margins": 0.3098832070827484, |
|
"rewards/rejected": -15.658369064331055, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23114706732158335, |
|
"eval_logits/chosen": -0.3853626549243927, |
|
"eval_logits/rejected": -0.36488524079322815, |
|
"eval_logps/chosen": -149.94454956054688, |
|
"eval_logps/rejected": -153.576171875, |
|
"eval_loss": 1.7872967720031738, |
|
"eval_nll_loss": 0.4085230827331543, |
|
"eval_rewards/accuracies": 0.5804347991943359, |
|
"eval_rewards/chosen": -14.994454383850098, |
|
"eval_rewards/margins": 0.36316320300102234, |
|
"eval_rewards/rejected": -15.357619285583496, |
|
"eval_runtime": 73.8265, |
|
"eval_samples_per_second": 24.734, |
|
"eval_steps_per_second": 1.558, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2542617740537417, |
|
"grad_norm": 47.490840911865234, |
|
"learning_rate": 8.298969072164948e-07, |
|
"logits/chosen": -0.3666607141494751, |
|
"logits/rejected": -0.3409901261329651, |
|
"logps/chosen": -147.7798309326172, |
|
"logps/rejected": -152.14883422851562, |
|
"loss": 1.7164, |
|
"nll_loss": 0.40307506918907166, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -14.777984619140625, |
|
"rewards/margins": 0.4368988573551178, |
|
"rewards/rejected": -15.214881896972656, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27737648078590005, |
|
"grad_norm": 45.41544723510742, |
|
"learning_rate": 8.041237113402062e-07, |
|
"logits/chosen": -0.36932340264320374, |
|
"logits/rejected": -0.3461097776889801, |
|
"logps/chosen": -155.2689208984375, |
|
"logps/rejected": -156.53543090820312, |
|
"loss": 1.7549, |
|
"nll_loss": 0.42143669724464417, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -15.526891708374023, |
|
"rewards/margins": 0.12664994597434998, |
|
"rewards/rejected": -15.653543472290039, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.30049118751805837, |
|
"grad_norm": 55.190162658691406, |
|
"learning_rate": 7.783505154639175e-07, |
|
"logits/chosen": -0.36812376976013184, |
|
"logits/rejected": -0.3466120660305023, |
|
"logps/chosen": -153.25393676757812, |
|
"logps/rejected": -161.2375030517578, |
|
"loss": 1.8234, |
|
"nll_loss": 0.4233035147190094, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -15.325393676757812, |
|
"rewards/margins": 0.7983576655387878, |
|
"rewards/rejected": -16.123750686645508, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3236058942502167, |
|
"grad_norm": 58.641231536865234, |
|
"learning_rate": 7.525773195876288e-07, |
|
"logits/chosen": -0.42011794447898865, |
|
"logits/rejected": -0.41268259286880493, |
|
"logps/chosen": -144.4340362548828, |
|
"logps/rejected": -149.6342010498047, |
|
"loss": 1.7976, |
|
"nll_loss": 0.4138007164001465, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -14.443403244018555, |
|
"rewards/margins": 0.520018458366394, |
|
"rewards/rejected": -14.963422775268555, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34672060098237506, |
|
"grad_norm": 59.50709533691406, |
|
"learning_rate": 7.268041237113402e-07, |
|
"logits/chosen": -0.45735687017440796, |
|
"logits/rejected": -0.44332581758499146, |
|
"logps/chosen": -141.5747528076172, |
|
"logps/rejected": -147.01950073242188, |
|
"loss": 1.8755, |
|
"nll_loss": 0.41054767370224, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -14.157475471496582, |
|
"rewards/margins": 0.5444743037223816, |
|
"rewards/rejected": -14.701950073242188, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3698353077145334, |
|
"grad_norm": 40.405235290527344, |
|
"learning_rate": 7.010309278350515e-07, |
|
"logits/chosen": -0.4439857602119446, |
|
"logits/rejected": -0.41624826192855835, |
|
"logps/chosen": -156.5723876953125, |
|
"logps/rejected": -160.45640563964844, |
|
"loss": 1.7024, |
|
"nll_loss": 0.4132766127586365, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -15.65723991394043, |
|
"rewards/margins": 0.38840025663375854, |
|
"rewards/rejected": -16.045639038085938, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3929500144466917, |
|
"grad_norm": 51.05006790161133, |
|
"learning_rate": 6.752577319587629e-07, |
|
"logits/chosen": -0.39237576723098755, |
|
"logits/rejected": -0.3862777650356293, |
|
"logps/chosen": -152.97549438476562, |
|
"logps/rejected": -160.07443237304688, |
|
"loss": 1.599, |
|
"nll_loss": 0.41846928000450134, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -15.2975492477417, |
|
"rewards/margins": 0.7098936438560486, |
|
"rewards/rejected": -16.007442474365234, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41606472117885, |
|
"grad_norm": 48.777130126953125, |
|
"learning_rate": 6.494845360824742e-07, |
|
"logits/chosen": -0.4046599864959717, |
|
"logits/rejected": -0.4020842909812927, |
|
"logps/chosen": -148.8914794921875, |
|
"logps/rejected": -155.31307983398438, |
|
"loss": 1.6521, |
|
"nll_loss": 0.4291691780090332, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -14.889147758483887, |
|
"rewards/margins": 0.6421611309051514, |
|
"rewards/rejected": -15.531309127807617, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4391794279110084, |
|
"grad_norm": 51.11661911010742, |
|
"learning_rate": 6.237113402061855e-07, |
|
"logits/chosen": -0.42425212264060974, |
|
"logits/rejected": -0.4136783480644226, |
|
"logps/chosen": -156.27357482910156, |
|
"logps/rejected": -162.0409393310547, |
|
"loss": 1.7189, |
|
"nll_loss": 0.42820248007774353, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -15.627357482910156, |
|
"rewards/margins": 0.5767360329627991, |
|
"rewards/rejected": -16.2040958404541, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4622941346431667, |
|
"grad_norm": 45.06605529785156, |
|
"learning_rate": 5.979381443298969e-07, |
|
"logits/chosen": -0.3681766986846924, |
|
"logits/rejected": -0.36281704902648926, |
|
"logps/chosen": -157.00155639648438, |
|
"logps/rejected": -163.3647003173828, |
|
"loss": 1.6908, |
|
"nll_loss": 0.4334492087364197, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -15.700152397155762, |
|
"rewards/margins": 0.6363152265548706, |
|
"rewards/rejected": -16.336469650268555, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4622941346431667, |
|
"eval_logits/chosen": -0.4014091491699219, |
|
"eval_logits/rejected": -0.3777381181716919, |
|
"eval_logps/chosen": -156.43685913085938, |
|
"eval_logps/rejected": -162.43849182128906, |
|
"eval_loss": 1.6701573133468628, |
|
"eval_nll_loss": 0.42524340748786926, |
|
"eval_rewards/accuracies": 0.5978260636329651, |
|
"eval_rewards/chosen": -15.643685340881348, |
|
"eval_rewards/margins": 0.6001652479171753, |
|
"eval_rewards/rejected": -16.243852615356445, |
|
"eval_runtime": 73.8669, |
|
"eval_samples_per_second": 24.72, |
|
"eval_steps_per_second": 1.557, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48540884137532503, |
|
"grad_norm": 52.39714813232422, |
|
"learning_rate": 5.721649484536082e-07, |
|
"logits/chosen": -0.4187684953212738, |
|
"logits/rejected": -0.39357370138168335, |
|
"logps/chosen": -151.34727478027344, |
|
"logps/rejected": -153.41477966308594, |
|
"loss": 1.6691, |
|
"nll_loss": 0.42135825753211975, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -15.134727478027344, |
|
"rewards/margins": 0.2067503184080124, |
|
"rewards/rejected": -15.34147834777832, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5085235481074833, |
|
"grad_norm": 48.871177673339844, |
|
"learning_rate": 5.463917525773195e-07, |
|
"logits/chosen": -0.46896496415138245, |
|
"logits/rejected": -0.4411422312259674, |
|
"logps/chosen": -160.63766479492188, |
|
"logps/rejected": -166.47012329101562, |
|
"loss": 1.7268, |
|
"nll_loss": 0.43222665786743164, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -16.063764572143555, |
|
"rewards/margins": 0.5832474231719971, |
|
"rewards/rejected": -16.64701271057129, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5316382548396418, |
|
"grad_norm": 46.8618278503418, |
|
"learning_rate": 5.20618556701031e-07, |
|
"logits/chosen": -0.4806763529777527, |
|
"logits/rejected": -0.4655001163482666, |
|
"logps/chosen": -164.1845245361328, |
|
"logps/rejected": -170.3590087890625, |
|
"loss": 1.753, |
|
"nll_loss": 0.4462898373603821, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -16.4184513092041, |
|
"rewards/margins": 0.6174517869949341, |
|
"rewards/rejected": -17.03590202331543, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5547529615718001, |
|
"grad_norm": 48.733802795410156, |
|
"learning_rate": 4.948453608247422e-07, |
|
"logits/chosen": -0.42387205362319946, |
|
"logits/rejected": -0.4110351502895355, |
|
"logps/chosen": -159.4880828857422, |
|
"logps/rejected": -164.54312133789062, |
|
"loss": 1.5882, |
|
"nll_loss": 0.4322621822357178, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -15.948808670043945, |
|
"rewards/margins": 0.5055034756660461, |
|
"rewards/rejected": -16.45431137084961, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5778676683039584, |
|
"grad_norm": 53.66923141479492, |
|
"learning_rate": 4.6907216494845357e-07, |
|
"logits/chosen": -0.45519933104515076, |
|
"logits/rejected": -0.44147634506225586, |
|
"logps/chosen": -156.13479614257812, |
|
"logps/rejected": -159.08212280273438, |
|
"loss": 1.7404, |
|
"nll_loss": 0.4152873158454895, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": -15.613479614257812, |
|
"rewards/margins": 0.29473432898521423, |
|
"rewards/rejected": -15.908210754394531, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6009823750361167, |
|
"grad_norm": 50.42735290527344, |
|
"learning_rate": 4.432989690721649e-07, |
|
"logits/chosen": -0.5512745976448059, |
|
"logits/rejected": -0.5374751091003418, |
|
"logps/chosen": -152.5029296875, |
|
"logps/rejected": -160.7849884033203, |
|
"loss": 1.5895, |
|
"nll_loss": 0.4251280725002289, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -15.250292778015137, |
|
"rewards/margins": 0.8282074928283691, |
|
"rewards/rejected": -16.07849884033203, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.624097081768275, |
|
"grad_norm": 42.25716781616211, |
|
"learning_rate": 4.175257731958763e-07, |
|
"logits/chosen": -0.5728802680969238, |
|
"logits/rejected": -0.5652969479560852, |
|
"logps/chosen": -153.87515258789062, |
|
"logps/rejected": -160.92990112304688, |
|
"loss": 1.622, |
|
"nll_loss": 0.42097169160842896, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -15.3875150680542, |
|
"rewards/margins": 0.7054744362831116, |
|
"rewards/rejected": -16.09299087524414, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6472117885004334, |
|
"grad_norm": 52.24223327636719, |
|
"learning_rate": 3.917525773195876e-07, |
|
"logits/chosen": -0.5287462472915649, |
|
"logits/rejected": -0.5079108476638794, |
|
"logps/chosen": -155.820068359375, |
|
"logps/rejected": -165.27578735351562, |
|
"loss": 1.6145, |
|
"nll_loss": 0.42984142899513245, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -15.582005500793457, |
|
"rewards/margins": 0.9455726742744446, |
|
"rewards/rejected": -16.527578353881836, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6703264952325917, |
|
"grad_norm": 54.0615119934082, |
|
"learning_rate": 3.659793814432989e-07, |
|
"logits/chosen": -0.5829291343688965, |
|
"logits/rejected": -0.5588080883026123, |
|
"logps/chosen": -160.86459350585938, |
|
"logps/rejected": -166.34828186035156, |
|
"loss": 1.5808, |
|
"nll_loss": 0.4233360290527344, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -16.08646011352539, |
|
"rewards/margins": 0.5483680963516235, |
|
"rewards/rejected": -16.63482666015625, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6934412019647501, |
|
"grad_norm": 43.99635696411133, |
|
"learning_rate": 3.402061855670103e-07, |
|
"logits/chosen": -0.5929441452026367, |
|
"logits/rejected": -0.585766613483429, |
|
"logps/chosen": -164.1534423828125, |
|
"logps/rejected": -173.26889038085938, |
|
"loss": 1.6317, |
|
"nll_loss": 0.43874359130859375, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -16.415346145629883, |
|
"rewards/margins": 0.911544144153595, |
|
"rewards/rejected": -17.326889038085938, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6934412019647501, |
|
"eval_logits/chosen": -0.49481379985809326, |
|
"eval_logits/rejected": -0.4753292500972748, |
|
"eval_logps/chosen": -154.68177795410156, |
|
"eval_logps/rejected": -161.5185089111328, |
|
"eval_loss": 1.6161738634109497, |
|
"eval_nll_loss": 0.42018982768058777, |
|
"eval_rewards/accuracies": 0.615217387676239, |
|
"eval_rewards/chosen": -15.468178749084473, |
|
"eval_rewards/margins": 0.6836734414100647, |
|
"eval_rewards/rejected": -16.151851654052734, |
|
"eval_runtime": 74.0087, |
|
"eval_samples_per_second": 24.673, |
|
"eval_steps_per_second": 1.554, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7165559086969084, |
|
"grad_norm": 47.26166534423828, |
|
"learning_rate": 3.1443298969072163e-07, |
|
"logits/chosen": -0.5505023002624512, |
|
"logits/rejected": -0.5488861799240112, |
|
"logps/chosen": -164.3349609375, |
|
"logps/rejected": -167.97909545898438, |
|
"loss": 1.6871, |
|
"nll_loss": 0.42464059591293335, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -16.433496475219727, |
|
"rewards/margins": 0.36441320180892944, |
|
"rewards/rejected": -16.797908782958984, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7396706154290668, |
|
"grad_norm": 44.56684875488281, |
|
"learning_rate": 2.8865979381443296e-07, |
|
"logits/chosen": -0.5739372372627258, |
|
"logits/rejected": -0.5621416568756104, |
|
"logps/chosen": -154.22021484375, |
|
"logps/rejected": -162.76797485351562, |
|
"loss": 1.6709, |
|
"nll_loss": 0.42187362909317017, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -15.422021865844727, |
|
"rewards/margins": 0.8547781109809875, |
|
"rewards/rejected": -16.276798248291016, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7627853221612251, |
|
"grad_norm": 48.82611083984375, |
|
"learning_rate": 2.6288659793814435e-07, |
|
"logits/chosen": -0.584633469581604, |
|
"logits/rejected": -0.5864993333816528, |
|
"logps/chosen": -154.85739135742188, |
|
"logps/rejected": -161.60858154296875, |
|
"loss": 1.6549, |
|
"nll_loss": 0.43096083402633667, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -15.485738754272461, |
|
"rewards/margins": 0.6751174330711365, |
|
"rewards/rejected": -16.160858154296875, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7859000288933834, |
|
"grad_norm": 46.865325927734375, |
|
"learning_rate": 2.3711340206185566e-07, |
|
"logits/chosen": -0.5921510457992554, |
|
"logits/rejected": -0.5866528153419495, |
|
"logps/chosen": -157.51187133789062, |
|
"logps/rejected": -162.7019500732422, |
|
"loss": 1.5515, |
|
"nll_loss": 0.4305364489555359, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -15.751187324523926, |
|
"rewards/margins": 0.5190097093582153, |
|
"rewards/rejected": -16.27019500732422, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8090147356255417, |
|
"grad_norm": 45.32524871826172, |
|
"learning_rate": 2.11340206185567e-07, |
|
"logits/chosen": -0.574793815612793, |
|
"logits/rejected": -0.575833261013031, |
|
"logps/chosen": -158.30972290039062, |
|
"logps/rejected": -167.1160888671875, |
|
"loss": 1.6559, |
|
"nll_loss": 0.4133908152580261, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -15.830973625183105, |
|
"rewards/margins": 0.8806363940238953, |
|
"rewards/rejected": -16.71160888671875, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8321294423577, |
|
"grad_norm": 53.514156341552734, |
|
"learning_rate": 1.8556701030927835e-07, |
|
"logits/chosen": -0.5572192668914795, |
|
"logits/rejected": -0.5455694794654846, |
|
"logps/chosen": -161.35818481445312, |
|
"logps/rejected": -166.35061645507812, |
|
"loss": 1.7006, |
|
"nll_loss": 0.4319698214530945, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -16.135818481445312, |
|
"rewards/margins": 0.4992440342903137, |
|
"rewards/rejected": -16.635061264038086, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8552441490898585, |
|
"grad_norm": 47.664703369140625, |
|
"learning_rate": 1.5979381443298966e-07, |
|
"logits/chosen": -0.5517255067825317, |
|
"logits/rejected": -0.5260570049285889, |
|
"logps/chosen": -155.55886840820312, |
|
"logps/rejected": -165.2543487548828, |
|
"loss": 1.5564, |
|
"nll_loss": 0.42134684324264526, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -15.555887222290039, |
|
"rewards/margins": 0.9695472717285156, |
|
"rewards/rejected": -16.525434494018555, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8783588558220168, |
|
"grad_norm": 48.99809646606445, |
|
"learning_rate": 1.3402061855670102e-07, |
|
"logits/chosen": -0.5434561967849731, |
|
"logits/rejected": -0.5287705063819885, |
|
"logps/chosen": -154.3162841796875, |
|
"logps/rejected": -161.20472717285156, |
|
"loss": 1.5817, |
|
"nll_loss": 0.431587278842926, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -15.431628227233887, |
|
"rewards/margins": 0.6888439655303955, |
|
"rewards/rejected": -16.120471954345703, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9014735625541751, |
|
"grad_norm": 52.46923065185547, |
|
"learning_rate": 1.0824742268041237e-07, |
|
"logits/chosen": -0.5911422967910767, |
|
"logits/rejected": -0.5752480626106262, |
|
"logps/chosen": -158.2704620361328, |
|
"logps/rejected": -168.7931671142578, |
|
"loss": 1.6055, |
|
"nll_loss": 0.42453208565711975, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -15.827044486999512, |
|
"rewards/margins": 1.0522701740264893, |
|
"rewards/rejected": -16.879314422607422, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9245882692863334, |
|
"grad_norm": 50.29128646850586, |
|
"learning_rate": 8.24742268041237e-08, |
|
"logits/chosen": -0.5491029024124146, |
|
"logits/rejected": -0.5335959196090698, |
|
"logps/chosen": -162.01458740234375, |
|
"logps/rejected": -167.11669921875, |
|
"loss": 1.62, |
|
"nll_loss": 0.44402360916137695, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -16.201457977294922, |
|
"rewards/margins": 0.5102119445800781, |
|
"rewards/rejected": -16.711669921875, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9245882692863334, |
|
"eval_logits/chosen": -0.514378547668457, |
|
"eval_logits/rejected": -0.49102360010147095, |
|
"eval_logps/chosen": -155.9636688232422, |
|
"eval_logps/rejected": -163.15530395507812, |
|
"eval_loss": 1.5946580171585083, |
|
"eval_nll_loss": 0.4261849522590637, |
|
"eval_rewards/accuracies": 0.626086950302124, |
|
"eval_rewards/chosen": -15.596366882324219, |
|
"eval_rewards/margins": 0.7191624045372009, |
|
"eval_rewards/rejected": -16.315528869628906, |
|
"eval_runtime": 73.9588, |
|
"eval_samples_per_second": 24.689, |
|
"eval_steps_per_second": 1.555, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9477029760184917, |
|
"grad_norm": 49.76476287841797, |
|
"learning_rate": 5.670103092783505e-08, |
|
"logits/chosen": -0.47304850816726685, |
|
"logits/rejected": -0.46452435851097107, |
|
"logps/chosen": -148.42233276367188, |
|
"logps/rejected": -157.0984649658203, |
|
"loss": 1.5582, |
|
"nll_loss": 0.42344313859939575, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -14.842233657836914, |
|
"rewards/margins": 0.8676150441169739, |
|
"rewards/rejected": -15.709848403930664, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9708176827506501, |
|
"grad_norm": 45.05620574951172, |
|
"learning_rate": 3.092783505154639e-08, |
|
"logits/chosen": -0.5102072954177856, |
|
"logits/rejected": -0.4865845739841461, |
|
"logps/chosen": -150.34286499023438, |
|
"logps/rejected": -156.001708984375, |
|
"loss": 1.5674, |
|
"nll_loss": 0.4283737242221832, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -15.03428840637207, |
|
"rewards/margins": 0.5658840537071228, |
|
"rewards/rejected": -15.600171089172363, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9939323894828085, |
|
"grad_norm": 46.765380859375, |
|
"learning_rate": 5.154639175257731e-09, |
|
"logits/chosen": -0.5253250002861023, |
|
"logits/rejected": -0.534714937210083, |
|
"logps/chosen": -157.79779052734375, |
|
"logps/rejected": -164.88742065429688, |
|
"loss": 1.645, |
|
"nll_loss": 0.42819744348526, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -15.779779434204102, |
|
"rewards/margins": 0.7089639902114868, |
|
"rewards/rejected": -16.48874282836914, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9985553308292401, |
|
"step": 432, |
|
"total_flos": 0.0, |
|
"train_loss": 1.7731637126869626, |
|
"train_runtime": 10231.9294, |
|
"train_samples_per_second": 5.412, |
|
"train_steps_per_second": 0.042 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 432, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|